1484 files changed, 63160 insertions, 32327 deletions
diff --git a/Documentation/arm64/sve.txt b/Documentation/arm64/sve.txt
index 9940e924a47e..5689fc9a976a 100644
--- a/Documentation/arm64/sve.txt
+++ b/Documentation/arm64/sve.txt
@@ -56,6 +56,18 @@ model features for SVE is included in Appendix A.
   is to connect to a target process first and then attempt a
   ptrace(PTRACE_GETREGSET, pid, NT_ARM_SVE, &iov).
 
+* Whenever SVE scalable register values (Zn, Pn, FFR) are exchanged in memory
+  between userspace and the kernel, the register value is encoded in memory in
+  an endianness-invariant layout, with bits [(8 * i + 7) : (8 * i)] encoded at
+  byte offset i from the start of the memory representation.  This affects for
+  example the signal frame (struct sve_context) and ptrace interface
+  (struct user_sve_header) and associated data.
+
+  Beware that on big-endian systems this results in a different byte order than
+  for the FPSIMD V-registers, which are stored as single host-endian 128-bit
+  values, with bits [(127 - 8 * i) : (120 - 8 * i)] of the register encoded at
+  byte offset i.  (struct fpsimd_context, struct user_fpsimd_state).
+
 
 2.  Vector length terminology
 -----------------------------
@@ -124,6 +136,10 @@ the SVE instruction set architecture.
   size and layout.  Macros SVE_SIG_* are defined [1] to facilitate access to
   the members.
 
+* Each scalable register (Zn, Pn, FFR) is stored in an endianness-invariant
+  layout, with bits [(8 * i + 7) : (8 * i)] stored at byte offset i from the
+  start of the register's representation in memory.
+
 * If the SVE context is too big to fit in sigcontext.__reserved[], then extra
   space is allocated on the stack, an extra_context record is written in
   __reserved[] referencing this space.  sve_context is then written in the
diff --git a/Documentation/block/switching-sched.txt b/Documentation/block/switching-sched.txt
index 3b2612e342f1..7977f6fb8b20 100644
--- a/Documentation/block/switching-sched.txt
+++ b/Documentation/block/switching-sched.txt
@@ -13,11 +13,9 @@ you can do so by typing:
 
 # mount none /sys -t sysfs
 
-As of the Linux 2.6.10 kernel, it is now possible to change the
-IO scheduler for a given block device on the fly (thus making it possible,
-for instance, to set the CFQ scheduler for the system default, but
-set a specific device to use the deadline or noop schedulers - which
-can improve that device's throughput).
+It is possible to change the IO scheduler for a given block device on
+the fly to select one of mq-deadline, none, bfq, or kyber schedulers -
+which can improve that device's throughput.
 
 To set a specific scheduler, simply do this:
 
@@ -30,8 +28,8 @@ The list of defined schedulers can be found by simply doing
 a "cat /sys/block/DEV/queue/scheduler" - the list of valid names
 will be displayed, with the currently selected scheduler in brackets:
 
-# cat /sys/block/hda/queue/scheduler
-noop deadline [cfq]
-# echo deadline > /sys/block/hda/queue/scheduler
-# cat /sys/block/hda/queue/scheduler
-noop [deadline] cfq
+# cat /sys/block/sda/queue/scheduler
+[mq-deadline] kyber bfq none
+# echo none >/sys/block/sda/queue/scheduler
+# cat /sys/block/sda/queue/scheduler
+[none] mq-deadline kyber bfq
diff --git a/Documentation/cgroup-v1/blkio-controller.txt b/Documentation/cgroup-v1/blkio-controller.txt
index 673dc34d3f78..d1a1b7bdd03a 100644
--- a/Documentation/cgroup-v1/blkio-controller.txt
+++ b/Documentation/cgroup-v1/blkio-controller.txt
@@ -8,61 +8,13 @@ both at leaf nodes as well as at intermediate nodes in a storage hierarchy.
 Plan is to use the same cgroup based management interface for blkio controller
 and based on user options switch IO policies in the background.
 
-Currently two IO control policies are implemented. First one is proportional
-weight time based division of disk policy. It is implemented in CFQ. Hence
-this policy takes effect only on leaf nodes when CFQ is being used. The second
-one is throttling policy which can be used to specify upper IO rate limits
-on devices. This policy is implemented in generic block layer and can be
-used on leaf nodes as well as higher level logical devices like device mapper.
+One IO control policy is throttling policy which can be used to
+specify upper IO rate limits on devices. This policy is implemented in
+generic block layer and can be used on leaf nodes as well as higher
+level logical devices like device mapper.
 
 HOWTO
 =====
-Proportional Weight division of bandwidth
------------------------------------------
-You can do a very simple testing of running two dd threads in two different
-cgroups. Here is what you can do.
-
-- Enable Block IO controller
-	CONFIG_BLK_CGROUP=y
-
-- Enable group scheduling in CFQ
-	CONFIG_CFQ_GROUP_IOSCHED=y
-
-- Compile and boot into kernel and mount IO controller (blkio); see
-  cgroups.txt, Why are cgroups needed?.
-
-	mount -t tmpfs cgroup_root /sys/fs/cgroup
-	mkdir /sys/fs/cgroup/blkio
-	mount -t cgroup -o blkio none /sys/fs/cgroup/blkio
-
-- Create two cgroups
-	mkdir -p /sys/fs/cgroup/blkio/test1/ /sys/fs/cgroup/blkio/test2
-
-- Set weights of group test1 and test2
-	echo 1000 > /sys/fs/cgroup/blkio/test1/blkio.weight
-	echo 500 > /sys/fs/cgroup/blkio/test2/blkio.weight
-
-- Create two same size files (say 512MB each) on same disk (file1, file2) and
-  launch two dd threads in different cgroup to read those files.
-
-	sync
-	echo 3 > /proc/sys/vm/drop_caches
-
-	dd if=/mnt/sdb/zerofile1 of=/dev/null &
-	echo $! > /sys/fs/cgroup/blkio/test1/tasks
-	cat /sys/fs/cgroup/blkio/test1/tasks
-
-	dd if=/mnt/sdb/zerofile2 of=/dev/null &
-	echo $! > /sys/fs/cgroup/blkio/test2/tasks
-	cat /sys/fs/cgroup/blkio/test2/tasks
-
-- At macro level, first dd should finish first. To get more precise data, keep
-  on looking at (with the help of script), at blkio.disk_time and
-  blkio.disk_sectors files of both test1 and test2 groups. This will tell how
-  much disk time (in milliseconds), each group got and how many sectors each
-  group dispatched to the disk. We provide fairness in terms of disk time, so
-  ideally io.disk_time of cgroups should be in proportion to the weight.
-
 Throttling/Upper Limit policy
 -----------------------------
 - Enable Block IO controller
@@ -94,7 +46,7 @@ Throttling/Upper Limit policy
 Hierarchical Cgroups
 ====================
 
-Both CFQ and throttling implement hierarchy support; however,
+Throttling implements hierarchy support; however,
 throttling's hierarchy support is enabled iff "sane_behavior" is
 enabled from cgroup side, which currently is a development option and
 not publicly available.
@@ -107,9 +59,8 @@ If somebody created a hierarchy like as follows.
 			|
 		     test3
 
-CFQ by default and throttling with "sane_behavior" will handle the
-hierarchy correctly.  For details on CFQ hierarchy support, refer to
-Documentation/block/cfq-iosched.txt.  For throttling, all limits apply
+Throttling with "sane_behavior" will handle the
+hierarchy correctly. For throttling, all limits apply
 to the whole subtree while all statistics are local to the IOs
 directly generated by tasks in that cgroup.
 
@@ -130,10 +81,6 @@ CONFIG_DEBUG_BLK_CGROUP
 	- Debug help. Right now some additional stats file show up in cgroup
 	  if this option is enabled.
 
-CONFIG_CFQ_GROUP_IOSCHED
-	- Enables group scheduling in CFQ. Currently only 1 level of group
-	  creation is allowed.
-
 CONFIG_BLK_DEV_THROTTLING
 	- Enable block device throttling support in block layer.
 
@@ -344,32 +291,3 @@ Common files among various policies
 - blkio.reset_stats
 	- Writing an int to this file will result in resetting all the stats
 	  for that cgroup.
-
-CFQ sysfs tunable
-=================
-/sys/block/<disk>/queue/iosched/slice_idle
-------------------------------------------
-On a faster hardware CFQ can be slow, especially with sequential workload.
-This happens because CFQ idles on a single queue and single queue might not
-drive deeper request queue depths to keep the storage busy. In such scenarios
-one can try setting slice_idle=0 and that would switch CFQ to IOPS
-(IO operations per second) mode on NCQ supporting hardware.
-
-That means CFQ will not idle between cfq queues of a cfq group and hence be
-able to driver higher queue depth and achieve better throughput. That also
-means that cfq provides fairness among groups in terms of IOPS and not in
-terms of disk time.
-
-/sys/block/<disk>/queue/iosched/group_idle
-------------------------------------------
-If one disables idling on individual cfq queues and cfq service trees by
-setting slice_idle=0, group_idle kicks in. That means CFQ will still idle
-on the group in an attempt to provide fairness among groups.
-
-By default group_idle is same as slice_idle and does not do anything if
-slice_idle is enabled.
-
-One can experience an overall throughput drop if you have created multiple
-groups and put applications in that group which are not driving enough
-IO to keep disk busy. In that case set group_idle=0, and CFQ will not idle
-on individual groups and throughput should improve.
diff --git a/Documentation/cgroup-v1/hugetlb.txt b/Documentation/cgroup-v1/hugetlb.txt
index 106245c3aecc..1260e5369b9b 100644
--- a/Documentation/cgroup-v1/hugetlb.txt
+++ b/Documentation/cgroup-v1/hugetlb.txt
@@ -32,14 +32,18 @@ Brief summary of control files
  hugetlb.<hugepagesize>.usage_in_bytes     # show current usage for "hugepagesize" hugetlb
  hugetlb.<hugepagesize>.failcnt		   # show the number of allocation failure due to HugeTLB limit
 
-For a system supporting two hugepage size (16M and 16G) the control
+For a system supporting three hugepage sizes (64k, 32M and 1G), the control
 files include:
 
-hugetlb.16GB.limit_in_bytes
-hugetlb.16GB.max_usage_in_bytes
-hugetlb.16GB.usage_in_bytes
-hugetlb.16GB.failcnt
-hugetlb.16MB.limit_in_bytes
-hugetlb.16MB.max_usage_in_bytes
-hugetlb.16MB.usage_in_bytes
-hugetlb.16MB.failcnt
+hugetlb.1GB.limit_in_bytes
+hugetlb.1GB.max_usage_in_bytes
+hugetlb.1GB.usage_in_bytes
+hugetlb.1GB.failcnt
+hugetlb.64KB.limit_in_bytes
+hugetlb.64KB.max_usage_in_bytes
+hugetlb.64KB.usage_in_bytes
+hugetlb.64KB.failcnt
+hugetlb.32MB.limit_in_bytes
+hugetlb.32MB.max_usage_in_bytes
+hugetlb.32MB.usage_in_bytes
+hugetlb.32MB.failcnt
diff --git a/Documentation/devicetree/bindings/display/allwinner,sun6i-a31-mipi-dsi.yaml b/Documentation/devicetree/bindings/display/allwinner,sun6i-a31-mipi-dsi.yaml
new file mode 100644
index 000000000000..47950fced28d
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/allwinner,sun6i-a31-mipi-dsi.yaml
@@ -0,0 +1,100 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/allwinner,sun6i-a31-mipi-dsi.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A31 MIPI-DSI Controller Device Tree Bindings
+
+maintainers:
+  - Chen-Yu Tsai <wens@csie.org>
+  - Maxime Ripard <maxime.ripard@bootlin.com>
+
+properties:
+  "#address-cells": true
+  "#size-cells": true
+
+  compatible:
+    const: allwinner,sun6i-a31-mipi-dsi
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  clocks:
+    items:
+      - description: Bus Clock
+      - description: Module Clock
+
+  clock-names:
+    items:
+      - const: bus
+      - const: mod
+
+  resets:
+    maxItems: 1
+
+  phys:
+    maxItems: 1
+
+  phy-names:
+    const: dphy
+
+  port:
+    type: object
+    description:
+      A port node with endpoint definitions as defined in
+      Documentation/devicetree/bindings/media/video-interfaces.txt. That
+      port should be the input endpoint, usually coming from the
+      associated TCON.
+
+patternProperties:
+  "^panel@[0-9]+$": true
+
+required:
+  - "#address-cells"
+  - "#size-cells"
+  - compatible
+  - reg
+  - interrupts
+  - clocks
+  - clock-names
+  - phys
+  - phy-names
+  - resets
+  - port
+
+additionalProperties: false
+
+examples:
+  - |
+    dsi0: dsi@1ca0000 {
+        compatible = "allwinner,sun6i-a31-mipi-dsi";
+        reg = <0x01ca0000 0x1000>;
+        interrupts = <0 89 4>;
+        clocks = <&ccu 23>, <&ccu 96>;
+        clock-names = "bus", "mod";
+        resets = <&ccu 4>;
+        phys = <&dphy0>;
+        phy-names = "dphy";
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        panel@0 {
+                compatible = "bananapi,lhr050h41", "ilitek,ili9881c";
+                reg = <0>;
+                power-gpios = <&pio 1 7 0>; /* PB07 */
+                reset-gpios = <&r_pio 0 5 1>; /* PL05 */
+                backlight = <&pwm_bl>;
+        };
+
+        port {
+            dsi0_in_tcon0: endpoint {
+                remote-endpoint = <&tcon0_out_dsi0>;
+            };
+        };
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/display/arm,komeda.txt b/Documentation/devicetree/bindings/display/arm,komeda.txt
index 02b226532ebd..8513695ee47f 100644
--- a/Documentation/devicetree/bindings/display/arm,komeda.txt
+++ b/Documentation/devicetree/bindings/display/arm,komeda.txt
@@ -7,10 +7,13 @@ Required properties:
 - clocks: A list of phandle + clock-specifier pairs, one for each entry
     in 'clock-names'
 - clock-names: A list of clock names. It should contain:
-      - "mclk": for the main processor clock
-      - "pclk": for the APB interface clock
+      - "aclk": for the main processor clock
 - #address-cells: Must be 1
 - #size-cells: Must be 0
+- iommus: configure the stream id to IOMMU, Must be configured if want to
+    enable iommu in display. for how to configure this node please reference
+        devicetree/bindings/iommu/arm,smmu-v3.txt,
+        devicetree/bindings/iommu/iommu.txt
 
 Required properties for sub-node: pipeline@nq
 Each device contains one or two pipeline sub-nodes (at least one), each
@@ -20,7 +23,6 @@ pipeline node should provide properties:
     in 'clock-names'
 - clock-names: should contain:
       - "pxclk": pixel clock
-      - "aclk": AXI interface clock
 
 - port: each pipeline connect to an encoder input port. The connection is
     modeled using the OF graph bindings specified in
@@ -42,12 +44,15 @@ Example:
 		compatible = "arm,mali-d71";
 		reg = <0xc00000 0x20000>;
 		interrupts = <0 168 4>;
-		clocks = <&dpu_mclk>, <&dpu_aclk>;
-		clock-names = "mclk", "pclk";
+		clocks = <&dpu_aclk>;
+		clock-names = "aclk";
+		iommus = <&smmu 0>, <&smmu 1>, <&smmu 2>, <&smmu 3>,
+			<&smmu 4>, <&smmu 5>, <&smmu 6>, <&smmu 7>,
+			<&smmu 8>, <&smmu 9>;
 
 		dp0_pipe0: pipeline@0 {
-			clocks = <&fpgaosc2>, <&dpu_aclk>;
-			clock-names = "pxclk", "aclk";
+			clocks = <&fpgaosc2>;
+			clock-names = "pxclk";
 			reg = <0>;
 
 			port {
@@ -58,8 +63,8 @@ Example:
 		};
 
 		dp0_pipe1: pipeline@1 {
-			clocks = <&fpgaosc2>, <&dpu_aclk>;
-			clock-names = "pxclk", "aclk";
+			clocks = <&fpgaosc2>;
+			clock-names = "pxclk";
 			reg = <1>;
 
 			port {
diff --git a/Documentation/devicetree/bindings/display/bridge/renesas,lvds.txt b/Documentation/devicetree/bindings/display/bridge/renesas,lvds.txt
index 900a884ad9f5..c6a196d0b075 100644
--- a/Documentation/devicetree/bindings/display/bridge/renesas,lvds.txt
+++ b/Documentation/devicetree/bindings/display/bridge/renesas,lvds.txt
@@ -9,6 +9,7 @@ Required properties:
 - compatible : Shall contain one of
   - "renesas,r8a7743-lvds" for R8A7743 (RZ/G1M) compatible LVDS encoders
   - "renesas,r8a7744-lvds" for R8A7744 (RZ/G1N) compatible LVDS encoders
+  - "renesas,r8a774a1-lvds" for R8A774A1 (RZ/G2M) compatible LVDS encoders
   - "renesas,r8a774c0-lvds" for R8A774C0 (RZ/G2E) compatible LVDS encoders
   - "renesas,r8a7790-lvds" for R8A7790 (R-Car H2) compatible LVDS encoders
   - "renesas,r8a7791-lvds" for R8A7791 (R-Car M2-W) compatible LVDS encoders
@@ -45,14 +46,24 @@ OF graph bindings specified in Documentation/devicetree/bindings/graph.txt.
 
 Each port shall have a single endpoint.
 
+Optional properties:
+
+- renesas,companion : phandle to the companion LVDS encoder. This property is
+  mandatory for the first LVDS encoder on D3 and E3 SoCs, and shall point to
+  the second encoder to be used as a companion in dual-link mode. It shall not
+  be set for any other LVDS encoder.
+
 
 Example:
 
 	lvds0: lvds@feb90000 {
-		compatible = "renesas,r8a7790-lvds";
-		reg = <0 0xfeb90000 0 0x1c>;
-		clocks = <&cpg CPG_MOD 726>;
-		resets = <&cpg 726>;
+		compatible = "renesas,r8a77990-lvds";
+		reg = <0 0xfeb90000 0 0x20>;
+		clocks = <&cpg CPG_MOD 727>;
+		power-domains = <&sysc R8A77990_PD_ALWAYS_ON>;
+		resets = <&cpg 727>;
+
+		renesas,companion = <&lvds1>;
 
 		ports {
 			#address-cells = <1>;
diff --git a/Documentation/devicetree/bindings/display/bridge/sii902x.txt b/Documentation/devicetree/bindings/display/bridge/sii902x.txt
index 72d2dc6c3e6b..2df44b7d3821 100644
--- a/Documentation/devicetree/bindings/display/bridge/sii902x.txt
+++ b/Documentation/devicetree/bindings/display/bridge/sii902x.txt
@@ -5,10 +5,44 @@ Required properties:
 	- reg: i2c address of the bridge
 
 Optional properties:
-	- interrupts: describe the interrupt line used to inform the host 
+	- interrupts: describe the interrupt line used to inform the host
 	  about hotplug events.
 	- reset-gpios: OF device-tree gpio specification for RST_N pin.
 
+	HDMI audio properties:
+	- #sound-dai-cells: <0> or <1>. <0> if only i2s or spdif pin
+	   is wired, <1> if the both are wired. HDMI audio is
+	   configured only if this property is found.
+	- sil,i2s-data-lanes: Array of up to 4 integers with values of 0-3
+	   Each integer indicates which i2s pin is connected to which
+	   audio fifo. The first integer selects i2s audio pin for the
+	   first audio fifo#0 (HDMI channels 1&2), second for fifo#1
+	   (HDMI channels 3&4), and so on. There is 4 fifos and 4 i2s
+	   pins (SD0 - SD3). Any i2s pin can be connected to any fifo,
+	   but there can be no gaps. E.g. an i2s pin must be mapped to
+	   fifo#0 and fifo#1 before mapping a channel to fifo#2. Default
+	   value is <0>, describing SD0 pin beiging routed to hdmi audio
+	   fifo #0.
+	- clocks: phandle and clock specifier for each clock listed in
+           the clock-names property
+	- clock-names: "mclk"
+	   Describes SII902x MCLK input. MCLK is used to produce
+	   HDMI audio CTS values. This property is required if
+	   "#sound-dai-cells"-property is present. This property follows
+	   Documentation/devicetree/bindings/clock/clock-bindings.txt
+	   consumer binding.
+
+	If HDMI audio is configured the sii902x device becomes an I2S
+	and/or spdif audio codec component (e.g a digital audio sink),
+	that can be used in configuring a full audio devices with
+	simple-card or audio-graph-card binding. See their binding
+	documents on how to describe the way the sii902x device is
+	connected to the rest of the audio system:
+	Documentation/devicetree/bindings/sound/simple-card.txt
+	Documentation/devicetree/bindings/sound/audio-graph-card.txt
+	Note: In case of the audio-graph-card binding the used port
+	index should be 3.
+
 Optional subnodes:
 	- video input: this subnode can contain a video input port node
 	  to connect the bridge to a display controller output (See this
@@ -21,6 +55,12 @@ Example:
 		compatible = "sil,sii9022";
 		reg = <0x39>;
 		reset-gpios = <&pioA 1 0>;
+
+		#sound-dai-cells = <0>;
+		sil,i2s-data-lanes = < 0 1 2 >;
+		clocks = <&mclk>;
+		clock-names = "mclk";
+
 		ports {
 			#address-cells = <1>;
 			#size-cells = <0>;
diff --git a/Documentation/devicetree/bindings/display/bridge/thine,thc63lvd1024.txt b/Documentation/devicetree/bindings/display/bridge/thine,thc63lvd1024.txt
index 37f0c04d5a28..d17d1e5820d7 100644
--- a/Documentation/devicetree/bindings/display/bridge/thine,thc63lvd1024.txt
+++ b/Documentation/devicetree/bindings/display/bridge/thine,thc63lvd1024.txt
@@ -28,6 +28,12 @@ Optional video port nodes:
 - port@1: Second LVDS input port
 - port@3: Second digital CMOS/TTL parallel output
 
+The device can operate in single-link mode or dual-link mode. In single-link
+mode, all pixels are received on port@0, and port@1 shall not contain any
+endpoint. In dual-link mode, even-numbered pixels are received on port@0 and
+odd-numbered pixels on port@1, and both port@0 and port@1 shall contain
+endpoints.
+
 Example:
 --------
 
diff --git a/Documentation/devicetree/bindings/display/bridge/toshiba,tc358767.txt b/Documentation/devicetree/bindings/display/bridge/toshiba,tc358767.txt
index e3f6aa6a214d..583c5e9dbe6b 100644
--- a/Documentation/devicetree/bindings/display/bridge/toshiba,tc358767.txt
+++ b/Documentation/devicetree/bindings/display/bridge/toshiba,tc358767.txt
@@ -12,6 +12,7 @@ Optional properties:
                    (active high shutdown input)
  - reset-gpios: OF device-tree gpio specification for RSTX pin
                 (active low system reset)
+ - toshiba,hpd-pin: TC358767 GPIO pin number to which HPD is connected to (0 or 1)
  - ports: the ports node can contain video interface port nodes to connect
    to a DPI/DSI source and to an eDP/DP sink according to [1][2]:
     - port@0: DSI input port
diff --git a/Documentation/devicetree/bindings/display/ingenic,lcd.txt b/Documentation/devicetree/bindings/display/ingenic,lcd.txt
new file mode 100644
index 000000000000..7b536c8c6dde
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/ingenic,lcd.txt
@@ -0,0 +1,44 @@
+Ingenic JZ47xx LCD driver
+
+Required properties:
+- compatible: one of:
+  * ingenic,jz4740-lcd
+  * ingenic,jz4725b-lcd
+- reg: LCD registers location and length
+- clocks: LCD pixclock and device clock specifiers.
+	   The device clock is only required on the JZ4740.
+- clock-names: "lcd_pclk" and "lcd"
+- interrupts: Specifies the interrupt line the LCD controller is connected to.
+
+Example:
+
+panel {
+	compatible = "sharp,ls020b1dd01d";
+
+	backlight = <&backlight>;
+	power-supply = <&vcc>;
+
+	port {
+		panel_input: endpoint {
+			remote-endpoint = <&panel_output>;
+		};
+	};
+};
+
+
+lcd: lcd-controller@13050000 {
+	compatible = "ingenic,jz4725b-lcd";
+	reg = <0x13050000 0x1000>;
+
+	interrupt-parent = <&intc>;
+	interrupts = <31>;
+
+	clocks = <&cgu JZ4725B_CLK_LCD>;
+	clock-names = "lcd";
+
+	port {
+		panel_output: endpoint {
+			remote-endpoint = <&panel_input>;
+		};
+	};
+};
diff --git a/Documentation/devicetree/bindings/display/panel/armadeus,st0700-adapt.txt b/Documentation/devicetree/bindings/display/panel/armadeus,st0700-adapt.txt
new file mode 100644
index 000000000000..a30d63db3c8f
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/panel/armadeus,st0700-adapt.txt
@@ -0,0 +1,9 @@
+Armadeus ST0700 Adapt. A Santek ST0700I5Y-RBSLW 7.0" WVGA (800x480) TFT with
+an adapter board.
+
+Required properties:
+- compatible: "armadeus,st0700-adapt"
+- power-supply: see panel-common.txt
+
+Optional properties:
+- backlight: see panel-common.txt
diff --git a/Documentation/devicetree/bindings/display/panel/edt,et-series.txt b/Documentation/devicetree/bindings/display/panel/edt,et-series.txt
index f56b99ebd9be..be8684327ee4 100644
--- a/Documentation/devicetree/bindings/display/panel/edt,et-series.txt
+++ b/Documentation/devicetree/bindings/display/panel/edt,et-series.txt
@@ -6,6 +6,22 @@ Display bindings for EDT Display Technology Corp. Displays which are
 compatible with the simple-panel binding, which is specified in
 simple-panel.txt
 
+3,5" QVGA TFT Panels
+--------------------
++-----------------+---------------------+-------------------------------------+
+| Identifier      | compatbile          | description                         |
++=================+=====================+=====================================+
+| ET035012DM6     | edt,et035012dm6     | 3.5" QVGA TFT LCD panel             |
++-----------------+---------------------+-------------------------------------+
+
+4,3" WVGA TFT Panels
+--------------------
+
++-----------------+---------------------+-------------------------------------+
+| Identifier      | compatbile          | description                         |
++=================+=====================+=====================================+
+| ETM0430G0DH6    | edt,etm0430g0dh6    | 480x272 TFT Display                 |
++-----------------+---------------------+-------------------------------------+
 
 5,7" WVGA TFT Panels
 --------------------
diff --git a/Documentation/devicetree/bindings/display/panel/evervision,vgg804821.txt b/Documentation/devicetree/bindings/display/panel/evervision,vgg804821.txt
new file mode 100644
index 000000000000..82d22e191ac3
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/panel/evervision,vgg804821.txt
@@ -0,0 +1,12 @@
+Evervision Electronics Co. Ltd. VGG804821 5.0" WVGA TFT LCD Panel
+
+Required properties:
+- compatible: should be "evervision,vgg804821"
+- power-supply: See simple-panel.txt
+
+Optional properties:
+- backlight: See simple-panel.txt
+- enable-gpios: See simple-panel.txt
+
+This binding is compatible with the simple-panel binding, which is specified
+in simple-panel.txt in this directory.
diff --git a/Documentation/devicetree/bindings/display/panel/friendlyarm,hd702e.txt b/Documentation/devicetree/bindings/display/panel/friendlyarm,hd702e.txt
new file mode 100644
index 000000000000..6c9156fc3478
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/panel/friendlyarm,hd702e.txt
@@ -0,0 +1,32 @@
+FriendlyELEC HD702E 800x1280 LCD panel
+
+HD702E lcd is FriendlyELEC developed eDP LCD panel with 800x1280
+resolution. It has built in Goodix, GT9271 captive touchscreen
+with backlight adjustable via PWM.
+
+Required properties:
+- compatible: should be "friendlyarm,hd702e"
+- power-supply: regulator to provide the supply voltage
+
+Optional properties:
+- backlight: phandle of the backlight device attached to the panel
+
+Optional nodes:
+- Video port for LCD panel input.
+
+This binding is compatible with the simple-panel binding, which is specified
+in simple-panel.txt in this directory.
+
+Example:
+
+	panel {
+		compatible ="friendlyarm,hd702e", "simple-panel";
+		backlight = <&backlight>;
+		power-supply = <&vcc3v3_sys>;
+
+		port {
+			panel_in_edp: endpoint {
+				remote-endpoint = <&edp_out_panel>;
+			};
+		};
+	};
diff --git a/Documentation/devicetree/bindings/display/panel/koe,tx14d24vm1bpa.txt b/Documentation/devicetree/bindings/display/panel/koe,tx14d24vm1bpa.txt
new file mode 100644
index 000000000000..be7ac666807b
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/panel/koe,tx14d24vm1bpa.txt
@@ -0,0 +1,42 @@
+Kaohsiung Opto-Electronics Inc. 5.7" QVGA (320 x 240) TFT LCD panel
+
+Required properties:
+- compatible: should be "koe,tx14d24vm1bpa"
+- backlight: phandle of the backlight device attached to the panel
+- power-supply: single regulator to provide the supply voltage
+
+Required nodes:
+- port: Parallel port mapping to connect this display
+
+This panel needs single power supply voltage. Its backlight is conntrolled
+via PWM signal.
+
+Example:
+--------
+
+Example device-tree definition when connected to iMX53 based board
+
+	lcd_panel: lcd-panel {
+		compatible = "koe,tx14d24vm1bpa";
+		backlight = <&backlight_lcd>;
+		power-supply = <&reg_3v3>;
+
+		port {
+			lcd_panel_in: endpoint {
+				remote-endpoint = <&lcd_display_out>;
+			};
+		};
+	};
+
+Then one needs to extend the dispX node:
+
+	lcd_display: disp1 {
+
+		port@1 {
+			reg = <1>;
+
+			lcd_display_out: endpoint {
+				remote-endpoint = <&lcd_panel_in>;
+			};
+		};
+	};
diff --git a/Documentation/devicetree/bindings/display/panel/osddisplays,osd101t2045-53ts.txt b/Documentation/devicetree/bindings/display/panel/osddisplays,osd101t2045-53ts.txt
new file mode 100644
index 000000000000..85c0b2cacfda
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/panel/osddisplays,osd101t2045-53ts.txt
@@ -0,0 +1,11 @@
+One Stop Displays OSD101T2045-53TS 10.1" 1920x1200 panel
+
+Required properties:
+- compatible: should be "osddisplays,osd101t2045-53ts"
+- power-supply: as specified in the base binding
+
+Optional properties:
+- backlight: as specified in the base binding
+
+This binding is compatible with the simple-panel binding, which is specified
+in simple-panel.txt in this directory.
diff --git a/Documentation/devicetree/bindings/display/panel/osddisplays,osd101t2587-53ts.txt b/Documentation/devicetree/bindings/display/panel/osddisplays,osd101t2587-53ts.txt
new file mode 100644
index 000000000000..9d88e96003fc
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/panel/osddisplays,osd101t2587-53ts.txt
@@ -0,0 +1,14 @@
+One Stop Displays OSD101T2587-53TS 10.1" 1920x1200 panel
+
+The panel is similar to OSD101T2045-53TS, but it needs additional
+MIPI_DSI_TURN_ON_PERIPHERAL message from the host.
+
+Required properties:
+- compatible: should be "osddisplays,osd101t2587-53ts"
+- power-supply: as specified in the base binding
+
+Optional properties:
+- backlight: as specified in the base binding
+
+This binding is compatible with the simple-panel binding, which is specified
+in simple-panel.txt in this directory.
diff --git a/Documentation/devicetree/bindings/display/panel/samsung,s6e63m0.txt b/Documentation/devicetree/bindings/display/panel/samsung,s6e63m0.txt
new file mode 100644
index 000000000000..9fb9ebeef8e4
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/panel/samsung,s6e63m0.txt
@@ -0,0 +1,33 @@
+Samsung s6e63m0 AMOLED LCD panel
+
+Required properties:
+  - compatible: "samsung,s6e63m0"
+  - reset-gpios: GPIO spec for reset pin
+  - vdd3-supply: VDD regulator
+  - vci-supply: VCI regulator
+
+The panel must obey rules for SPI slave device specified in document [1].
+
+The device node can contain one 'port' child node with one child
+'endpoint' node, according to the bindings defined in [2]. This
+node should describe panel's video bus.
+
+[1]: Documentation/devicetree/bindings/spi/spi-bus.txt
+[2]: Documentation/devicetree/bindings/media/video-interfaces.txt
+
+Example:
+
+		s6e63m0: display@0 {
+			compatible = "samsung,s6e63m0";
+			reg = <0>;
+			reset-gpio = <&mp05 5 1>;
+			vdd3-supply = <&ldo12_reg>;
+			vci-supply = <&ldo11_reg>;
+			spi-max-frequency = <1200000>;
+
+			port {
+				lcd_ep: endpoint {
+					remote-endpoint = <&fimd_ep>;
+				};
+			};
+		};
diff --git a/Documentation/devicetree/bindings/display/panel/tfc,s9700rtwv43tr-01b.txt b/Documentation/devicetree/bindings/display/panel/tfc,s9700rtwv43tr-01b.txt
new file mode 100644
index 000000000000..dfb572f085eb
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/panel/tfc,s9700rtwv43tr-01b.txt
@@ -0,0 +1,15 @@
+TFC S9700RTWV43TR-01B 7" Three Five Corp 800x480 LCD panel with
+resistive touch
+
+The panel is found on TI AM335x-evm.
+
+Required properties:
+- compatible: should be "tfc,s9700rtwv43tr-01b"
+- power-supply: See panel-common.txt
+
+Optional properties:
+- enable-gpios: GPIO pin to enable or disable the panel, if there is one
+- backlight: phandle of the backlight device attached to the panel
+
+This binding is compatible with the simple-panel binding, which is specified
+in simple-panel.txt in this directory.
diff --git a/Documentation/devicetree/bindings/display/panel/vl050_8048nt_c01.txt b/Documentation/devicetree/bindings/display/panel/vl050_8048nt_c01.txt
new file mode 100644
index 000000000000..b42bf06bbd99
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/panel/vl050_8048nt_c01.txt
@@ -0,0 +1,12 @@
+VXT 800x480 color TFT LCD panel
+
+Required properties:
+- compatible: should be "vxt,vl050-8048nt-c01"
+- power-supply: as specified in the base binding
+
+Optional properties:
+- backlight: as specified in the base binding
+- enable-gpios: as specified in the base binding
+
+This binding is compatible with the simple-panel binding, which is specified
+in simple-panel.txt in this directory.
diff --git a/Documentation/devicetree/bindings/display/renesas,du.txt b/Documentation/devicetree/bindings/display/renesas,du.txt
index aedb22b4d161..c97dfacad281 100644
--- a/Documentation/devicetree/bindings/display/renesas,du.txt
+++ b/Documentation/devicetree/bindings/display/renesas,du.txt
@@ -7,6 +7,7 @@ Required Properties:
     - "renesas,du-r8a7744" for R8A7744 (RZ/G1N) compatible DU
     - "renesas,du-r8a7745" for R8A7745 (RZ/G1E) compatible DU
     - "renesas,du-r8a77470" for R8A77470 (RZ/G1C) compatible DU
+    - "renesas,du-r8a774a1" for R8A774A1 (RZ/G2M) compatible DU
     - "renesas,du-r8a774c0" for R8A774C0 (RZ/G2E) compatible DU
     - "renesas,du-r8a7779" for R8A7779 (R-Car H1) compatible DU
     - "renesas,du-r8a7790" for R8A7790 (R-Car H2) compatible DU
@@ -58,6 +59,7 @@ corresponding to each DU output.
  R8A7744 (RZ/G1N)       DPAD 0         LVDS 0         -              -
  R8A7745 (RZ/G1E)       DPAD 0         DPAD 1         -              -
  R8A77470 (RZ/G1C)      DPAD 0         DPAD 1         LVDS 0         -
+ R8A774A1 (RZ/G2M)      DPAD 0         HDMI 0         LVDS 0         -
  R8A774C0 (RZ/G2E)      DPAD 0         LVDS 0         LVDS 1         -
  R8A7779 (R-Car H1)     DPAD 0         DPAD 1         -              -
  R8A7790 (R-Car H2)     DPAD 0         LVDS 0         LVDS 1         -
diff --git a/Documentation/devicetree/bindings/display/rockchip/dw_hdmi-rockchip.txt b/Documentation/devicetree/bindings/display/rockchip/dw_hdmi-rockchip.txt
index 39143424a474..3d32ce137e7f 100644
--- a/Documentation/devicetree/bindings/display/rockchip/dw_hdmi-rockchip.txt
+++ b/Documentation/devicetree/bindings/display/rockchip/dw_hdmi-rockchip.txt
@@ -12,6 +12,7 @@ following device-specific properties.
 Required properties:
 
 - compatible: should be one of the following:
+		"rockchip,rk3228-dw-hdmi"
 		"rockchip,rk3288-dw-hdmi"
 		"rockchip,rk3328-dw-hdmi"
 		"rockchip,rk3399-dw-hdmi"
@@ -38,6 +39,13 @@ Optional properties
 - phys: from general PHY binding: the phandle for the PHY device.
 - phy-names: Should be "hdmi" if phys references an external phy.
 
+Optional pinctrl entry:
+- If you have both a "unwedge" and "default" pinctrl entry, dw_hdmi
+  will switch to the unwedge pinctrl state for 10ms if it ever gets an
+  i2c timeout.  It's intended that this unwedge pinctrl entry will
+  cause the SDA line to be driven low to work around a hardware
+  errata.
+
 Example:
 
 hdmi: hdmi@ff980000 {
diff --git a/Documentation/devicetree/bindings/display/st,stm32-ltdc.txt b/Documentation/devicetree/bindings/display/st,stm32-ltdc.txt
index 3eb1b48b47dd..60c54da4e526 100644
--- a/Documentation/devicetree/bindings/display/st,stm32-ltdc.txt
+++ b/Documentation/devicetree/bindings/display/st,stm32-ltdc.txt
@@ -40,6 +40,8 @@ Mandatory nodes specific to STM32 DSI:
 - panel or bridge node: A node containing the panel or bridge description as
   documented in [6].
   - port: panel or bridge port node, connected to the DSI output port (port@1).
+Optional properties:
+- phy-dsi-supply: phandle of the regulator that provides the supply voltage.
 
 Note: You can find more documentation in the following references
 [1] Documentation/devicetree/bindings/clock/clock-bindings.txt
@@ -101,6 +103,7 @@ Example 2: DSI panel
 			clock-names = "pclk", "ref";
 			resets = <&rcc STM32F4_APB2_RESET(DSI)>;
 			reset-names = "apb";
+			phy-dsi-supply = <&reg18>;
 
 			ports {
 				#address-cells = <1>;
diff --git a/Documentation/devicetree/bindings/display/sunxi/sun6i-dsi.txt b/Documentation/devicetree/bindings/display/sunxi/sun6i-dsi.txt
deleted file mode 100644
index 6a6cf5de08b0..000000000000
--- a/Documentation/devicetree/bindings/display/sunxi/sun6i-dsi.txt
+++ /dev/null
@@ -1,93 +0,0 @@
-Allwinner A31 DSI Encoder
-=========================
-
-The DSI pipeline consists of two separate blocks: the DSI controller
-itself, and its associated D-PHY.
-
-DSI Encoder
------------
-
-The DSI Encoder generates the DSI signal from the TCON's.
-
-Required properties:
-  - compatible: value must be one of:
-    * allwinner,sun6i-a31-mipi-dsi
-  - reg: base address and size of memory-mapped region
-  - interrupts: interrupt associated to this IP
-  - clocks: phandles to the clocks feeding the DSI encoder
-    * bus: the DSI interface clock
-    * mod: the DSI module clock
-  - clock-names: the clock names mentioned above
-  - phys: phandle to the D-PHY
-  - phy-names: must be "dphy"
-  - resets: phandle to the reset controller driving the encoder
-
-  - ports: A ports node with endpoint definitions as defined in
-    Documentation/devicetree/bindings/media/video-interfaces.txt. The
-    first port should be the input endpoint, usually coming from the
-    associated TCON.
-
-Any MIPI-DSI device attached to this should be described according to
-the bindings defined in ../mipi-dsi-bus.txt
-
-D-PHY
------
-
-Required properties:
-  - compatible: value must be one of:
-    * allwinner,sun6i-a31-mipi-dphy
-  - reg: base address and size of memory-mapped region
-  - clocks: phandles to the clocks feeding the DSI encoder
-    * bus: the DSI interface clock
-    * mod: the DSI module clock
-  - clock-names: the clock names mentioned above
-  - resets: phandle to the reset controller driving the encoder
-
-Example:
-
-dsi0: dsi@1ca0000 {
-	compatible = "allwinner,sun6i-a31-mipi-dsi";
-	reg = <0x01ca0000 0x1000>;
-	interrupts = <GIC_SPI 89 IRQ_TYPE_LEVEL_HIGH>;
-	clocks = <&ccu CLK_BUS_MIPI_DSI>,
-		 <&ccu CLK_DSI_SCLK>;
-	clock-names = "bus", "mod";
-	resets = <&ccu RST_BUS_MIPI_DSI>;
-	phys = <&dphy0>;
-	phy-names = "dphy";
-	#address-cells = <1>;
-	#size-cells = <0>;
-
-	panel@0 {
-		compatible = "bananapi,lhr050h41", "ilitek,ili9881c";
-		reg = <0>;
-		power-gpios = <&pio 1 7 GPIO_ACTIVE_HIGH>; /* PB07 */
-		reset-gpios = <&r_pio 0 5 GPIO_ACTIVE_LOW>; /* PL05 */
-		backlight = <&pwm_bl>;
-	};
-
-	ports {
-		#address-cells = <1>;
-		#size-cells = <0>;
-
-		port@0 {
-			#address-cells = <1>;
-			#size-cells = <0>;
-			reg = <0>;
-
-			dsi0_in_tcon0: endpoint {
-				remote-endpoint = <&tcon0_out_dsi0>;
-			};
-		};
-	};
-};
-
-dphy0: d-phy@1ca1000 {
-	compatible = "allwinner,sun6i-a31-mipi-dphy";
-	reg = <0x01ca1000 0x1000>;
-	clocks = <&ccu CLK_BUS_MIPI_DSI>,
-		 <&ccu CLK_DSI_DPHY>;
-	clock-names = "bus", "mod";
-	resets = <&ccu RST_BUS_MIPI_DSI>;
-	#phy-cells = <0>;
-};
diff --git a/Documentation/devicetree/bindings/gpu/arm,mali-midgard.txt b/Documentation/devicetree/bindings/gpu/arm,mali-midgard.txt
index 1b1a74129141..e5ad3b2afe17 100644
--- a/Documentation/devicetree/bindings/gpu/arm,mali-midgard.txt
+++ b/Documentation/devicetree/bindings/gpu/arm,mali-midgard.txt
@@ -15,6 +15,7 @@ Required properties:
     + "arm,mali-t860"
     + "arm,mali-t880"
   * which must be preceded by one of the following vendor specifics:
+    + "allwinner,sun50i-h6-mali"
     + "amlogic,meson-gxm-mali"
     + "rockchip,rk3288-mali"
     + "rockchip,rk3399-mali"
@@ -31,21 +32,36 @@ Optional properties:
 
 - clocks : Phandle to clock for the Mali Midgard device.
 
+- clock-names : Specify the names of the clocks specified in clocks
+  when multiple clocks are present.
+    * core: clock driving the GPU itself (When only one clock is present,
+      assume it's this clock.)
+    * bus: bus clock for the GPU
+
 - mali-supply : Phandle to regulator for the Mali device. Refer to
   Documentation/devicetree/bindings/regulator/regulator.txt for details.
 
 - operating-points-v2 : Refer to Documentation/devicetree/bindings/opp/opp.txt
   for details.
 
+- #cooling-cells: Refer to Documentation/devicetree/bindings/thermal/thermal.txt
+  for details.
+
 - resets : Phandle of the GPU reset line.
 
 Vendor-specific bindings
 ------------------------
 
 The Mali GPU is integrated very differently from one SoC to
-another. In order to accomodate those differences, you have the option
+another. In order to accommodate those differences, you have the option
 to specify one more vendor-specific compatible, among:
 
+- "allwinner,sun50i-h6-mali"
+  Required properties:
+  - clocks : phandles to core and bus clocks
+  - clock-names : must contain "core" and "bus"
+  - resets: phandle to GPU reset line
+
 - "amlogic,meson-gxm-mali"
   Required properties:
   - resets : Should contain phandles of :
@@ -65,6 +81,7 @@ gpu@ffa30000 {
 	mali-supply = <&vdd_gpu>;
 	operating-points-v2 = <&gpu_opp_table>;
 	power-domains = <&power RK3288_PD_GPU>;
+	#cooling-cells = <2>;
 };
 
 gpu_opp_table: opp_table0 {
diff --git a/Documentation/devicetree/bindings/phy/allwinner,sun6i-a31-mipi-dphy.yaml b/Documentation/devicetree/bindings/phy/allwinner,sun6i-a31-mipi-dphy.yaml
new file mode 100644
index 000000000000..250f9d5aabdf
--- /dev/null
+++ b/Documentation/devicetree/bindings/phy/allwinner,sun6i-a31-mipi-dphy.yaml
@@ -0,0 +1,57 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/allwinner,sun6i-a31-mipi-dphy.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A31 MIPI D-PHY Controller Device Tree Bindings
+
+maintainers:
+  - Chen-Yu Tsai <wens@csie.org>
+  - Maxime Ripard <maxime.ripard@bootlin.com>
+
+properties:
+  "#phy-cells":
+    const: 0
+
+  compatible:
+    const: allwinner,sun6i-a31-mipi-dphy
+
+  reg:
+    maxItems: 1
+
+  clocks:
+    items:
+      - description: Bus Clock
+      - description: Module Clock
+
+  clock-names:
+    items:
+      - const: bus
+      - const: mod
+
+  resets:
+    maxItems: 1
+
+required:
+  - "#phy-cells"
+  - compatible
+  - reg
+  - clocks
+  - clock-names
+  - resets
+
+additionalProperties: false
+
+examples:
+  - |
+    dphy0: d-phy@1ca1000 {
+        compatible = "allwinner,sun6i-a31-mipi-dphy";
+        reg = <0x01ca1000 0x1000>;
+        clocks = <&ccu 23>, <&ccu 97>;
+        clock-names = "bus", "mod";
+        resets = <&ccu 4>;
+        #phy-cells = <0>;
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/vendor-prefixes.yaml b/Documentation/devicetree/bindings/vendor-prefixes.yaml
index 33a65a45e319..f0bcff033ecc 100644
--- a/Documentation/devicetree/bindings/vendor-prefixes.yaml
+++ b/Documentation/devicetree/bindings/vendor-prefixes.yaml
@@ -287,6 +287,8 @@ patternProperties:
     description: Everest Semiconductor Co. Ltd.
   "^everspin,.*":
     description: Everspin Technologies, Inc.
+  "^evervision,.*":
+    description: Evervision Electronics Co. Ltd.
   "^exar,.*":
     description: Exar Corporation
   "^excito,.*":
@@ -849,6 +851,8 @@ patternProperties:
     description: Shenzhen Techstar Electronics Co., Ltd.
   "^terasic,.*":
     description: Terasic Inc.
+  "^tfc,.*":
+    description: Three Five Corp
   "^thine,.*":
     description: THine Electronics, Inc.
   "^ti,.*":
@@ -923,6 +927,8 @@ patternProperties:
     description: Voipac Technologies s.r.o.
   "^vot,.*":
     description: Vision Optical Technology Co., Ltd.
+  "^vxt,.*":
+    description: VXT Ltd
   "^wd,.*":
     description: Western Digital Corp.
   "^wetek,.*":
diff --git a/Documentation/fb/modedb.txt b/Documentation/fb/modedb.txt
index 16aa08453911..1dd5a52f9390 100644
--- a/Documentation/fb/modedb.txt
+++ b/Documentation/fb/modedb.txt
@@ -51,6 +51,20 @@ To force the VGA output to be enabled and drive a specific mode say:
 Specifying the option multiple times for different ports is possible, e.g.:
     video=LVDS-1:d video=HDMI-1:D
 
+Options can also be passed after the mode, using commas as separator.
+
+       Sample usage: 720x480,rotate=180 - 720x480 mode, rotated by 180 degrees
+
+Valid options are:
+
+  - margin_top, margin_bottom, margin_left, margin_right (integer):
+    Number of pixels in the margins, typically to deal with overscan on TVs
+  - reflect_x (boolean): Perform an axial symmetry on the X axis
+  - reflect_y (boolean): Perform an axial symmetry on the Y axis
+  - rotate (integer): Rotate the initial framebuffer by x
+    degrees. Valid values are 0, 90, 180 and 270.
+
+
 ***** oOo ***** oOo ***** oOo ***** oOo ***** oOo ***** oOo ***** oOo *****
 
 What is the VESA(TM) Coordinated Video Timings (CVT)?
diff --git a/Documentation/gpu/amdgpu.rst b/Documentation/gpu/amdgpu.rst
index a740e491dfcc..5acdd1842ea2 100644
--- a/Documentation/gpu/amdgpu.rst
+++ b/Documentation/gpu/amdgpu.rst
@@ -37,10 +37,10 @@ Buffer Objects
 PRIME Buffer Sharing
 --------------------
 
-.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
+.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
    :doc: PRIME Buffer Sharing
 
-.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
+.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
    :internal:
 
 MMU Notifier
@@ -70,6 +70,26 @@ Interrupt Handling
 .. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
    :internal:
 
+AMDGPU XGMI Support
+===================
+
+.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
+   :doc: AMDGPU XGMI Support
+
+.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
+   :internal:
+
+AMDGPU RAS debugfs control interface
+====================================
+
+.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+   :doc: AMDGPU RAS debugfs control interface
+
+
+.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+   :internal:
+
+
 GPU Power/Thermal Controls and Monitoring
 =========================================
 
diff --git a/Documentation/gpu/drivers.rst b/Documentation/gpu/drivers.rst
index 044a7025477c..4bfb7068e9f7 100644
--- a/Documentation/gpu/drivers.rst
+++ b/Documentation/gpu/drivers.rst
@@ -7,6 +7,7 @@ GPU Driver Documentation
    amdgpu
    amdgpu-dc
    i915
+   mcde
    meson
    pl111
    tegra
diff --git a/Documentation/gpu/drm-client.rst b/Documentation/gpu/drm-client.rst
index 7e672063e7eb..58b5a1d1219d 100644
--- a/Documentation/gpu/drm-client.rst
+++ b/Documentation/gpu/drm-client.rst
@@ -10,3 +10,6 @@ Kernel clients
 
 .. kernel-doc:: drivers/gpu/drm/drm_client.c
    :export:
+
+.. kernel-doc:: drivers/gpu/drm/drm_client_modeset.c
+   :export:
diff --git a/Documentation/gpu/drm-kms-helpers.rst b/Documentation/gpu/drm-kms-helpers.rst
index 14102ae035dc..b327bbc11182 100644
--- a/Documentation/gpu/drm-kms-helpers.rst
+++ b/Documentation/gpu/drm-kms-helpers.rst
@@ -181,6 +181,21 @@ Panel Helper Reference
 .. kernel-doc:: drivers/gpu/drm/drm_panel_orientation_quirks.c
    :export:
 
+Panel Self Refresh Helper Reference
+===================================
+
+.. kernel-doc:: drivers/gpu/drm/drm_self_refresh_helper.c
+   :doc: overview
+
+.. kernel-doc:: drivers/gpu/drm/drm_self_refresh_helper.c
+   :export:
+
+HDCP Helper Functions Reference
+===============================
+
+.. kernel-doc:: drivers/gpu/drm/drm_hdcp.c
+   :export:
+
 Display Port Helper Functions Reference
 =======================================
 
diff --git a/Documentation/gpu/drm-mm.rst b/Documentation/gpu/drm-mm.rst
index 54a696d961a7..c8ebd4f66a6a 100644
--- a/Documentation/gpu/drm-mm.rst
+++ b/Documentation/gpu/drm-mm.rst
@@ -79,7 +79,6 @@ count for the TTM, which will call your initialization function.
 
 See the radeon_ttm.c file for an example of usage.
 
-
 The Graphics Execution Manager (GEM)
 ====================================
 
@@ -380,6 +379,39 @@ GEM CMA Helper Functions Reference
 .. kernel-doc:: drivers/gpu/drm/drm_gem_cma_helper.c
    :export:
 
+VRAM Helper Function Reference
+==============================
+
+.. kernel-doc:: drivers/gpu/drm/drm_vram_helper_common.c
+   :doc: overview
+
+.. kernel-doc:: include/drm/drm_gem_vram_helper.h
+   :internal:
+
+GEM VRAM Helper Functions Reference
+-----------------------------------
+
+.. kernel-doc:: drivers/gpu/drm/drm_gem_vram_helper.c
+   :doc: overview
+
+.. kernel-doc:: include/drm/drm_gem_vram_helper.h
+   :internal:
+
+.. kernel-doc:: drivers/gpu/drm/drm_gem_vram_helper.c
+   :export:
+
+VRAM MM Helper Functions Reference
+----------------------------------
+
+.. kernel-doc:: drivers/gpu/drm/drm_vram_mm_helper.c
+   :doc: overview
+
+.. kernel-doc:: include/drm/drm_vram_mm_helper.h
+   :internal:
+
+.. kernel-doc:: drivers/gpu/drm/drm_vram_mm_helper.c
+   :export:
+
 VMA Offset Manager
 ==================
 
diff --git a/Documentation/gpu/drm-uapi.rst b/Documentation/gpu/drm-uapi.rst
index c9fd23efd957..94f90521f58c 100644
--- a/Documentation/gpu/drm-uapi.rst
+++ b/Documentation/gpu/drm-uapi.rst
@@ -85,16 +85,18 @@ leads to a few additional requirements:
 - The userspace side must be fully reviewed and tested to the standards of that
   userspace project. For e.g. mesa this means piglit testcases and review on the
   mailing list. This is again to ensure that the new interface actually gets the
-  job done.
+  job done.  The userspace-side reviewer should also provide an Acked-by on the
+  kernel uAPI patch indicating that they believe the proposed uAPI is sound and
+  sufficiently documented and validated for userspace's consumption.
 
 - The userspace patches must be against the canonical upstream, not some vendor
   fork. This is to make sure that no one cheats on the review and testing
   requirements by doing a quick fork.
 
 - The kernel patch can only be merged after all the above requirements are met,
-  but it **must** be merged **before** the userspace patches land. uAPI always flows
-  from the kernel, doing things the other way round risks divergence of the uAPI
-  definitions and header files.
+  but it **must** be merged to either drm-next or drm-misc-next **before** the
+  userspace patches land. uAPI always flows from the kernel, doing things the
+  other way round risks divergence of the uAPI definitions and header files.
 
 These are fairly steep requirements, but have grown out from years of shared
 pain and experience with uAPI added hastily, and almost always regretted about
@@ -327,3 +329,12 @@ DRM_IOCTL_MODESET_CTL
     mode setting, since on many devices the vertical blank counter is
     reset to 0 at some point during modeset. Modern drivers should not
     call this any more since with kernel mode setting it is a no-op.
+
+Userspace API Structures
+========================
+
+.. kernel-doc:: include/uapi/drm/drm_mode.h
+   :doc: overview
+
+.. kernel-doc:: include/uapi/drm/drm_mode.h
+   :internal:
diff --git a/Documentation/gpu/i915.rst b/Documentation/gpu/i915.rst
index 055df45596c1..c38ef0dda605 100644
--- a/Documentation/gpu/i915.rst
+++ b/Documentation/gpu/i915.rst
@@ -61,7 +61,7 @@ Intel GVT-g Host Support(vGPU device model)
 Workarounds
 -----------
 
-.. kernel-doc:: drivers/gpu/drm/i915/intel_workarounds.c
+.. kernel-doc:: drivers/gpu/drm/i915/gt/intel_workarounds.c
    :doc: Hardware workarounds
 
 Display Hardware Handling
@@ -82,13 +82,13 @@ change.
 Frontbuffer Tracking
 --------------------
 
-.. kernel-doc:: drivers/gpu/drm/i915/intel_frontbuffer.c
+.. kernel-doc:: drivers/gpu/drm/i915/display/intel_frontbuffer.c
    :doc: frontbuffer tracking
 
-.. kernel-doc:: drivers/gpu/drm/i915/intel_frontbuffer.h
+.. kernel-doc:: drivers/gpu/drm/i915/display/intel_frontbuffer.h
    :internal:
 
-.. kernel-doc:: drivers/gpu/drm/i915/intel_frontbuffer.c
+.. kernel-doc:: drivers/gpu/drm/i915/display/intel_frontbuffer.c
    :internal:
 
 .. kernel-doc:: drivers/gpu/drm/i915/i915_gem.c
@@ -97,10 +97,10 @@ Frontbuffer Tracking
 Display FIFO Underrun Reporting
 -------------------------------
 
-.. kernel-doc:: drivers/gpu/drm/i915/intel_fifo_underrun.c
+.. kernel-doc:: drivers/gpu/drm/i915/display/intel_fifo_underrun.c
    :doc: fifo underrun handling
 
-.. kernel-doc:: drivers/gpu/drm/i915/intel_fifo_underrun.c
+.. kernel-doc:: drivers/gpu/drm/i915/display/intel_fifo_underrun.c
    :internal:
 
 Plane Configuration
@@ -115,10 +115,10 @@ panel self refresh.
 Atomic Plane Helpers
 --------------------
 
-.. kernel-doc:: drivers/gpu/drm/i915/intel_atomic_plane.c
+.. kernel-doc:: drivers/gpu/drm/i915/display/intel_atomic_plane.c
    :doc: atomic plane helpers
 
-.. kernel-doc:: drivers/gpu/drm/i915/intel_atomic_plane.c
+.. kernel-doc:: drivers/gpu/drm/i915/display/intel_atomic_plane.c
    :internal:
 
 Output Probing
@@ -132,19 +132,19 @@ probing, so those sections fully apply.
 Hotplug
 -------
 
-.. kernel-doc:: drivers/gpu/drm/i915/intel_hotplug.c
+.. kernel-doc:: drivers/gpu/drm/i915/display/intel_hotplug.c
    :doc: Hotplug
 
-.. kernel-doc:: drivers/gpu/drm/i915/intel_hotplug.c
+.. kernel-doc:: drivers/gpu/drm/i915/display/intel_hotplug.c
    :internal:
 
 High Definition Audio
 ---------------------
 
-.. kernel-doc:: drivers/gpu/drm/i915/intel_audio.c
+.. kernel-doc:: drivers/gpu/drm/i915/display/intel_audio.c
    :doc: High Definition Audio over HDMI and Display Port
 
-.. kernel-doc:: drivers/gpu/drm/i915/intel_audio.c
+.. kernel-doc:: drivers/gpu/drm/i915/display/intel_audio.c
    :internal:
 
 .. kernel-doc:: include/drm/i915_component.h
@@ -153,58 +153,58 @@ High Definition Audio
 Intel HDMI LPE Audio Support
 ----------------------------
 
-.. kernel-doc:: drivers/gpu/drm/i915/intel_lpe_audio.c
+.. kernel-doc:: drivers/gpu/drm/i915/display/intel_lpe_audio.c
    :doc: LPE Audio integration for HDMI or DP playback
 
-.. kernel-doc:: drivers/gpu/drm/i915/intel_lpe_audio.c
+.. kernel-doc:: drivers/gpu/drm/i915/display/intel_lpe_audio.c
    :internal:
 
 Panel Self Refresh PSR (PSR/SRD)
 --------------------------------
 
-.. kernel-doc:: drivers/gpu/drm/i915/intel_psr.c
+.. kernel-doc:: drivers/gpu/drm/i915/display/intel_psr.c
    :doc: Panel Self Refresh (PSR/SRD)
 
-.. kernel-doc:: drivers/gpu/drm/i915/intel_psr.c
+.. kernel-doc:: drivers/gpu/drm/i915/display/intel_psr.c
    :internal:
 
 Frame Buffer Compression (FBC)
 ------------------------------
 
-.. kernel-doc:: drivers/gpu/drm/i915/intel_fbc.c
+.. kernel-doc:: drivers/gpu/drm/i915/display/intel_fbc.c
    :doc: Frame Buffer Compression (FBC)
 
-.. kernel-doc:: drivers/gpu/drm/i915/intel_fbc.c
+.. kernel-doc:: drivers/gpu/drm/i915/display/intel_fbc.c
    :internal:
 
 Display Refresh Rate Switching (DRRS)
 -------------------------------------
 
-.. kernel-doc:: drivers/gpu/drm/i915/intel_dp.c
+.. kernel-doc:: drivers/gpu/drm/i915/display/intel_dp.c
    :doc: Display Refresh Rate Switching (DRRS)
 
-.. kernel-doc:: drivers/gpu/drm/i915/intel_dp.c
+.. kernel-doc:: drivers/gpu/drm/i915/display/intel_dp.c
    :functions: intel_dp_set_drrs_state
 
-.. kernel-doc:: drivers/gpu/drm/i915/intel_dp.c
+.. kernel-doc:: drivers/gpu/drm/i915/display/intel_dp.c
    :functions: intel_edp_drrs_enable
 
-.. kernel-doc:: drivers/gpu/drm/i915/intel_dp.c
+.. kernel-doc:: drivers/gpu/drm/i915/display/intel_dp.c
    :functions: intel_edp_drrs_disable
 
-.. kernel-doc:: drivers/gpu/drm/i915/intel_dp.c
+.. kernel-doc:: drivers/gpu/drm/i915/display/intel_dp.c
    :functions: intel_edp_drrs_invalidate
 
-.. kernel-doc:: drivers/gpu/drm/i915/intel_dp.c
+.. kernel-doc:: drivers/gpu/drm/i915/display/intel_dp.c
    :functions: intel_edp_drrs_flush
 
-.. kernel-doc:: drivers/gpu/drm/i915/intel_dp.c
+.. kernel-doc:: drivers/gpu/drm/i915/display/intel_dp.c
    :functions: intel_dp_drrs_init
 
 DPIO
 ----
 
-.. kernel-doc:: drivers/gpu/drm/i915/intel_dpio_phy.c
+.. kernel-doc:: drivers/gpu/drm/i915/display/intel_dpio_phy.c
    :doc: DPIO
 
 CSR firmware support for DMC
@@ -219,34 +219,34 @@ CSR firmware support for DMC
 Video BIOS Table (VBT)
 ----------------------
 
-.. kernel-doc:: drivers/gpu/drm/i915/intel_bios.c
+.. kernel-doc:: drivers/gpu/drm/i915/display/intel_bios.c
    :doc: Video BIOS Table (VBT)
 
-.. kernel-doc:: drivers/gpu/drm/i915/intel_bios.c
+.. kernel-doc:: drivers/gpu/drm/i915/display/intel_bios.c
    :internal:
 
-.. kernel-doc:: drivers/gpu/drm/i915/intel_vbt_defs.h
+.. kernel-doc:: drivers/gpu/drm/i915/display/intel_vbt_defs.h
    :internal:
 
 Display clocks
 --------------
 
-.. kernel-doc:: drivers/gpu/drm/i915/intel_cdclk.c
+.. kernel-doc:: drivers/gpu/drm/i915/display/intel_cdclk.c
    :doc: CDCLK / RAWCLK
 
-.. kernel-doc:: drivers/gpu/drm/i915/intel_cdclk.c
+.. kernel-doc:: drivers/gpu/drm/i915/display/intel_cdclk.c
    :internal:
 
 Display PLLs
 ------------
 
-.. kernel-doc:: drivers/gpu/drm/i915/intel_dpll_mgr.c
+.. kernel-doc:: drivers/gpu/drm/i915/display/intel_dpll_mgr.c
    :doc: Display PLLs
 
-.. kernel-doc:: drivers/gpu/drm/i915/intel_dpll_mgr.c
+.. kernel-doc:: drivers/gpu/drm/i915/display/intel_dpll_mgr.c
    :internal:
 
-.. kernel-doc:: drivers/gpu/drm/i915/intel_dpll_mgr.h
+.. kernel-doc:: drivers/gpu/drm/i915/display/intel_dpll_mgr.h
    :internal:
 
 Memory Management and Command Submission
@@ -349,7 +349,7 @@ of buffer object caches. Shrinking is used to make main memory
 available. Note that this is mostly orthogonal to evicting buffer
 objects, which has the goal to make space in gpu virtual address spaces.
 
-.. kernel-doc:: drivers/gpu/drm/i915/i915_gem_shrinker.c
+.. kernel-doc:: drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
    :internal:
 
 Batchbuffer Parsing
@@ -373,18 +373,15 @@ Batchbuffer Pools
 User Batchbuffer Execution
 --------------------------
 
-.. kernel-doc:: drivers/gpu/drm/i915/i915_gem_execbuffer.c
+.. kernel-doc:: drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
    :doc: User command execution
 
 Logical Rings, Logical Ring Contexts and Execlists
 --------------------------------------------------
 
-.. kernel-doc:: drivers/gpu/drm/i915/intel_lrc.c
+.. kernel-doc:: drivers/gpu/drm/i915/gt/intel_lrc.c
    :doc: Logical Rings, Logical Ring Contexts and Execlists
 
-.. kernel-doc:: drivers/gpu/drm/i915/intel_lrc.c
-   :internal:
-
 Global GTT views
 ----------------
 
@@ -415,10 +412,10 @@ Hardware Tiling and Swizzling Details
 Object Tiling IOCTLs
 --------------------
 
-.. kernel-doc:: drivers/gpu/drm/i915/i915_gem_tiling.c
+.. kernel-doc:: drivers/gpu/drm/i915/gem/i915_gem_tiling.c
    :internal:
 
-.. kernel-doc:: drivers/gpu/drm/i915/i915_gem_tiling.c
+.. kernel-doc:: drivers/gpu/drm/i915/gem/i915_gem_tiling.c
    :doc: buffer object tiling
 
 WOPCM
@@ -478,12 +475,6 @@ i915_context_create and i915_context_free
 .. kernel-doc:: drivers/gpu/drm/i915/i915_trace.h
    :doc: i915_context_create and i915_context_free tracepoints
 
-switch_mm
----------
-
-.. kernel-doc:: drivers/gpu/drm/i915/i915_trace.h
-   :doc: switch_mm tracepoint
-
 Perf
 ====
 
diff --git a/Documentation/gpu/mcde.rst b/Documentation/gpu/mcde.rst
new file mode 100644
index 000000000000..c69e977defda
--- /dev/null
+++ b/Documentation/gpu/mcde.rst
@@ -0,0 +1,8 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=======================================================
+ drm/mcde ST-Ericsson MCDE Multi-channel display engine
+=======================================================
+
+.. kernel-doc:: drivers/gpu/drm/mcde/mcde_drv.c
+   :doc: ST-Ericsson MCDE DRM Driver
diff --git a/Documentation/gpu/todo.rst b/Documentation/gpu/todo.rst
index 1528ad2d598b..0a49c5a1d9ce 100644
--- a/Documentation/gpu/todo.rst
+++ b/Documentation/gpu/todo.rst
@@ -10,25 +10,6 @@ graphics subsystem useful as newbie projects. Or for slow rainy days.
 Subsystem-wide refactorings
 ===========================
 
-De-midlayer drivers
--------------------
-
-With the recent ``drm_bus`` cleanup patches for 3.17 it is no longer required
-to have a ``drm_bus`` structure set up. Drivers can directly set up the
-``drm_device`` structure instead of relying on bus methods in ``drm_usb.c``
-and ``drm_pci.c``. The goal is to get rid of the driver's ``->load`` /
-``->unload`` callbacks and open-code the load/unload sequence properly, using
-the new two-stage ``drm_device`` setup/teardown.
-
-Once all existing drivers are converted we can also remove those bus support
-files for USB and platform devices.
-
-All you need is a GPU for a non-converted driver (currently almost all of
-them, but also all the virtual ones used by KVM, so everyone qualifies).
-
-Contact: Daniel Vetter, Thierry Reding, respective driver maintainers
-
-
 Remove custom dumb_map_offset implementations
 ---------------------------------------------
 
@@ -247,6 +228,12 @@ struct drm_gem_object_funcs
 GEM objects can now have a function table instead of having the callbacks on the
 DRM driver struct. This is now the preferred way and drivers can be moved over.
 
+DRM_GEM_CMA_VMAP_DRIVER_OPS, DRM_GEM_SHMEM_DRIVER_OPS already support this, but
+DRM_GEM_VRAM_DRIVER_PRIME does not yet and needs to be aligned with the previous
+two. We also need a 2nd version of the CMA define that doesn't require the
+vmapping to be present (different hook for prime importing). Plus this needs to
+be rolled out to all drivers using their own implementations, too.
+
 Use DRM_MODESET_LOCK_ALL_* helpers instead of boilerplate
 ---------------------------------------------------------
 
@@ -300,6 +287,21 @@ it to use drm_mode_hsync() instead.
 
 Contact: Sean Paul
 
+drm_fb_helper tasks
+-------------------
+
+- drm_fb_helper_restore_fbdev_mode_unlocked() should call restore_fbdev_mode()
+  not the _force variant so it can bail out if there is a master. But first
+  these igt tests need to be fixed: kms_fbcon_fbt@psr and
+  kms_fbcon_fbt@psr-suspend.
+
+- The max connector argument for drm_fb_helper_init() and
+  drm_fb_helper_fbdev_setup() isn't used anymore and can be removed.
+
+- The helper doesn't keep an array of connectors anymore so these can be
+  removed: drm_fb_helper_single_add_all_connectors(),
+  drm_fb_helper_add_one_connector() and drm_fb_helper_remove_one_connector().
+
 Core refactorings
 =================
 
@@ -488,5 +490,20 @@ i915
   device_link_add to model the dependency between i915 and snd_had. See
   https://dri.freedesktop.org/docs/drm/driver-api/device_link.html
 
+Bootsplash
+==========
+
+There is support in place now for writing internal DRM clients making it
+possible to pick up the bootsplash work that was rejected because it was written
+for fbdev.
+
+- [v6,8/8] drm/client: Hack: Add bootsplash example
+  https://patchwork.freedesktop.org/patch/306579/
+
+- [RFC PATCH v2 00/13] Kernel based bootsplash
+  https://lkml.org/lkml/2017/12/13/764
+
+Contact: Sam Ravnborg
+
 Outside DRM
 ===========
diff --git a/MAINTAINERS b/MAINTAINERS
index 57f496cff999..d6600715a662 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5140,6 +5140,13 @@ S:	Maintained
 F:	drivers/gpu/drm/tinydrm/st7735r.c
 F:	Documentation/devicetree/bindings/display/sitronix,st7735r.txt
 
+DRM DRIVER FOR ST-ERICSSON MCDE
+M:	Linus Walleij <linus.walleij@linaro.org>
+T:	git git://anongit.freedesktop.org/drm/drm-misc
+S:	Maintained
+F:	drivers/gpu/drm/mcde/
+F:	Documentation/devicetree/bindings/display/ste,mcde.txt
+
 DRM DRIVER FOR TDFX VIDEO CARDS
 S:	Orphan / Obsolete
 F:	drivers/gpu/drm/tdfx/
@@ -5184,6 +5191,7 @@ T:	git git://people.freedesktop.org/~thomash/linux
 S:	Supported
 F:	drivers/gpu/drm/vmwgfx/
 F:	include/uapi/drm/vmwgfx_drm.h
+F:	mm/as_dirty_helpers.c
 
 DRM DRIVERS
 M:	David Airlie <airlied@linux.ie>
@@ -5425,6 +5433,7 @@ T:	git git://anongit.freedesktop.org/drm/drm-misc
 
 DRM PANEL DRIVERS
 M:	Thierry Reding <thierry.reding@gmail.com>
+R:	Sam Ravnborg <sam@ravnborg.org>
 L:	dri-devel@lists.freedesktop.org
 T:	git git://anongit.freedesktop.org/drm/drm-misc
 S:	Maintained
@@ -5453,7 +5462,6 @@ F:	Documentation/gpu/xen-front.rst
 DRM TTM SUBSYSTEM
 M:	Christian Koenig <christian.koenig@amd.com>
 M:	Huang Rui <ray.huang@amd.com>
-M:	Junwei Zhang <Jerry.Zhang@amd.com>
 T:	git git://people.freedesktop.org/~agd5f/linux
 S:	Maintained
 L:	dri-devel@lists.freedesktop.org
diff --git a/Makefile b/Makefile
index b81e17261250..9514dac2660a 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
 VERSION = 5
 PATCHLEVEL = 2
 SUBLEVEL = 0
-EXTRAVERSION = -rc4
+EXTRAVERSION = -rc5
 NAME = Golden Lions
 
 # *DOCUMENTATION*
diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index 8fbd583b18e1..e9d2e578cbe6 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -51,7 +51,7 @@ endif
 
 KBUILD_CFLAGS	+= -mgeneral-regs-only $(lseinstr) $(brokengasinst)
 KBUILD_CFLAGS	+= -fno-asynchronous-unwind-tables
-KBUILD_CFLAGS	+= -Wno-psabi
+KBUILD_CFLAGS	+= $(call cc-disable-warning, psabi)
 KBUILD_AFLAGS	+= $(lseinstr) $(brokengasinst)
 
 KBUILD_CFLAGS	+= $(call cc-option,-mabi=lp64)
diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h
index 3a1870228946..dff8f9ea5754 100644
--- a/arch/arm64/include/asm/tlbflush.h
+++ b/arch/arm64/include/asm/tlbflush.h
@@ -195,6 +195,9 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma,
 	unsigned long asid = ASID(vma->vm_mm);
 	unsigned long addr;
 
+	start = round_down(start, stride);
+	end = round_up(end, stride);
+
 	if ((end - start) >= (MAX_TLBI_OPS * stride)) {
 		flush_tlb_mm(vma->vm_mm);
 		return;
diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
index 7b7ac0f6cec9..d819a3e8b552 100644
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -260,6 +260,13 @@ struct kvm_vcpu_events {
 	 KVM_REG_SIZE_U256 |						\
 	 ((i) & (KVM_ARM64_SVE_MAX_SLICES - 1)))
 
+/*
+ * Register values for KVM_REG_ARM64_SVE_ZREG(), KVM_REG_ARM64_SVE_PREG() and
+ * KVM_REG_ARM64_SVE_FFR() are represented in memory in an endianness-
+ * invariant layout which differs from the layout used for the FPSIMD
+ * V-registers on big-endian systems: see sigcontext.h for more explanation.
+ */
+
 #define KVM_ARM64_SVE_VQ_MIN __SVE_VQ_MIN
 #define KVM_ARM64_SVE_VQ_MAX __SVE_VQ_MAX
 
diff --git a/arch/arm64/include/uapi/asm/ptrace.h b/arch/arm64/include/uapi/asm/ptrace.h
index d78623acb649..97c53203150b 100644
--- a/arch/arm64/include/uapi/asm/ptrace.h
+++ b/arch/arm64/include/uapi/asm/ptrace.h
@@ -176,6 +176,10 @@ struct user_sve_header {
  *	FPCR	uint32_t			FPCR
  *
  * Additional data might be appended in the future.
+ *
+ * The Z-, P- and FFR registers are represented in memory in an endianness-
+ * invariant layout which differs from the layout used for the FPSIMD
+ * V-registers on big-endian systems: see sigcontext.h for more explanation.
  */
 
 #define SVE_PT_SVE_ZREG_SIZE(vq)	__SVE_ZREG_SIZE(vq)
diff --git a/arch/arm64/include/uapi/asm/sigcontext.h b/arch/arm64/include/uapi/asm/sigcontext.h
index 5f3c0cec5af9..3d448a0bb225 100644
--- a/arch/arm64/include/uapi/asm/sigcontext.h
+++ b/arch/arm64/include/uapi/asm/sigcontext.h
@@ -77,6 +77,15 @@ struct fpsimd_context {
 	__uint128_t vregs[32];
 };
 
+/*
+ * Note: similarly to all other integer fields, each V-register is stored in an
+ * endianness-dependent format, with the byte at offset i from the start of the
+ * in-memory representation of the register value containing
+ *
+ *    bits [(7 + 8 * i) : (8 * i)] of the register on little-endian hosts; or
+ *    bits [(127 - 8 * i) : (120 - 8 * i)] on big-endian hosts.
+ */
+
 /* ESR_EL1 context */
 #define ESR_MAGIC	0x45535201
 
@@ -204,6 +213,11 @@ struct sve_context {
  *	FFR	uint16_t[vq]			first-fault status register
  *
  * Additional data might be appended in the future.
+ *
+ * Unlike vregs[] in fpsimd_context, each SVE scalable register (Z-, P- or FFR)
+ * is encoded in memory in an endianness-invariant format, with the byte at
+ * offset i from the start of the in-memory representation containing bits
+ * [(7 + 8 * i) : (8 * i)] of the register value.
  */
 
 #define SVE_SIG_ZREG_SIZE(vq)	__SVE_ZREG_SIZE(vq)
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index a38bf74bcca8..bb42cd04baec 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -39,6 +39,7 @@
 #include <linux/slab.h>
 #include <linux/stddef.h>
 #include <linux/sysctl.h>
+#include <linux/swab.h>
 
 #include <asm/esr.h>
 #include <asm/fpsimd.h>
@@ -352,6 +353,23 @@ static int __init sve_sysctl_init(void) { return 0; }
 #define ZREG(sve_state, vq, n) ((char *)(sve_state) +		\
 	(SVE_SIG_ZREG_OFFSET(vq, n) - SVE_SIG_REGS_OFFSET))
 
+#ifdef CONFIG_CPU_BIG_ENDIAN
+static __uint128_t arm64_cpu_to_le128(__uint128_t x)
+{
+	u64 a = swab64(x);
+	u64 b = swab64(x >> 64);
+
+	return ((__uint128_t)a << 64) | b;
+}
+#else
+static __uint128_t arm64_cpu_to_le128(__uint128_t x)
+{
+	return x;
+}
+#endif
+
+#define arm64_le128_to_cpu(x) arm64_cpu_to_le128(x)
+
 /*
  * Transfer the FPSIMD state in task->thread.uw.fpsimd_state to
  * task->thread.sve_state.
@@ -369,14 +387,16 @@ static void fpsimd_to_sve(struct task_struct *task)
 	void *sst = task->thread.sve_state;
 	struct user_fpsimd_state const *fst = &task->thread.uw.fpsimd_state;
 	unsigned int i;
+	__uint128_t *p;
 
 	if (!system_supports_sve())
 		return;
 
 	vq = sve_vq_from_vl(task->thread.sve_vl);
-	for (i = 0; i < 32; ++i)
-		memcpy(ZREG(sst, vq, i), &fst->vregs[i],
-		       sizeof(fst->vregs[i]));
+	for (i = 0; i < 32; ++i) {
+		p = (__uint128_t *)ZREG(sst, vq, i);
+		*p = arm64_cpu_to_le128(fst->vregs[i]);
+	}
 }
 
 /*
@@ -395,14 +415,16 @@ static void sve_to_fpsimd(struct task_struct *task)
 	void const *sst = task->thread.sve_state;
 	struct user_fpsimd_state *fst = &task->thread.uw.fpsimd_state;
 	unsigned int i;
+	__uint128_t const *p;
 
 	if (!system_supports_sve())
 		return;
 
 	vq = sve_vq_from_vl(task->thread.sve_vl);
-	for (i = 0; i < 32; ++i)
-		memcpy(&fst->vregs[i], ZREG(sst, vq, i),
-		       sizeof(fst->vregs[i]));
+	for (i = 0; i < 32; ++i) {
+		p = (__uint128_t const *)ZREG(sst, vq, i);
+		fst->vregs[i] = arm64_le128_to_cpu(*p);
+	}
 }
 
 #ifdef CONFIG_ARM64_SVE
@@ -491,6 +513,7 @@ void sve_sync_from_fpsimd_zeropad(struct task_struct *task)
 	void *sst = task->thread.sve_state;
 	struct user_fpsimd_state const *fst = &task->thread.uw.fpsimd_state;
 	unsigned int i;
+	__uint128_t *p;
 
 	if (!test_tsk_thread_flag(task, TIF_SVE))
 		return;
@@ -499,9 +522,10 @@ void sve_sync_from_fpsimd_zeropad(struct task_struct *task)
 
 	memset(sst, 0, SVE_SIG_REGS_SIZE(vq));
 
-	for (i = 0; i < 32; ++i)
-		memcpy(ZREG(sst, vq, i), &fst->vregs[i],
-		       sizeof(fst->vregs[i]));
+	for (i = 0; i < 32; ++i) {
+		p = (__uint128_t *)ZREG(sst, vq, i);
+		*p = arm64_cpu_to_le128(fst->vregs[i]);
+	}
 }
 
 int sve_set_vector_length(struct task_struct *task,
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 7dede2e34b70..ccf00a8b98c6 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -876,6 +876,23 @@ static inline int pmd_present(pmd_t pmd)
 	return false;
 }
 
+static inline int pmd_is_serializing(pmd_t pmd)
+{
+	/*
+	 * If the pmd is undergoing a split, the _PAGE_PRESENT bit is clear
+	 * and _PAGE_INVALID is set (see pmd_present, pmdp_invalidate).
+	 *
+	 * This condition may also occur when flushing a pmd while flushing
+	 * it (see ptep_modify_prot_start), so callers must ensure this
+	 * case is fine as well.
+	 */
+	if ((pmd_raw(pmd) & cpu_to_be64(_PAGE_PRESENT | _PAGE_INVALID)) ==
+						cpu_to_be64(_PAGE_INVALID))
+		return true;
+
+	return false;
+}
+
 static inline int pmd_bad(pmd_t pmd)
 {
 	if (radix_enabled())
@@ -1092,6 +1109,19 @@ static inline int pmd_protnone(pmd_t pmd)
 #define pmd_access_permitted pmd_access_permitted
 static inline bool pmd_access_permitted(pmd_t pmd, bool write)
 {
+	/*
+	 * pmdp_invalidate sets this combination (which is not caught by
+	 * !pte_present() check in pte_access_permitted), to prevent
+	 * lock-free lookups, as part of the serialize_against_pte_lookup()
+	 * synchronisation.
+	 *
+	 * This also catches the case where the PTE's hardware PRESENT bit is
+	 * cleared while TLB is flushed, which is suboptimal but should not
+	 * be frequent.
+	 */
+	if (pmd_is_serializing(pmd))
+		return false;
+
 	return pte_access_permitted(pmd_pte(pmd), write);
 }
 
diff --git a/arch/powerpc/include/asm/btext.h b/arch/powerpc/include/asm/btext.h
index 3ffad030393c..461b0f193864 100644
--- a/arch/powerpc/include/asm/btext.h
+++ b/arch/powerpc/include/asm/btext.h
@@ -13,7 +13,11 @@ extern void btext_update_display(unsigned long phys, int width, int height,
 				 int depth, int pitch);
 extern void btext_setup_display(int width, int height, int depth, int pitch,
 				unsigned long address);
+#ifdef CONFIG_PPC32
 extern void btext_prepare_BAT(void);
+#else
+static inline void btext_prepare_BAT(void) { }
+#endif
 extern void btext_map(void);
 extern void btext_unmap(void);
 
diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h
index 4a585cba1787..c68476818753 100644
--- a/arch/powerpc/include/asm/kexec.h
+++ b/arch/powerpc/include/asm/kexec.h
@@ -94,6 +94,9 @@ static inline bool kdump_in_progress(void)
 	return crashing_cpu >= 0;
 }
 
+void relocate_new_kernel(unsigned long indirection_page, unsigned long reboot_code_buffer,
+			 unsigned long start_address) __noreturn;
+
 #ifdef CONFIG_KEXEC_FILE
 extern const struct kexec_file_ops kexec_elf64_ops;
 
diff --git a/arch/powerpc/kernel/machine_kexec_32.c b/arch/powerpc/kernel/machine_kexec_32.c
index affe5dcce7f4..2b160d68db49 100644
--- a/arch/powerpc/kernel/machine_kexec_32.c
+++ b/arch/powerpc/kernel/machine_kexec_32.c
@@ -30,7 +30,6 @@ typedef void (*relocate_new_kernel_t)(
  */
 void default_machine_kexec(struct kimage *image)
 {
-	extern const unsigned char relocate_new_kernel[];
 	extern const unsigned int relocate_new_kernel_size;
 	unsigned long page_list;
 	unsigned long reboot_code_buffer, reboot_code_buffer_phys;
@@ -58,6 +57,9 @@ void default_machine_kexec(struct kimage *image)
 				reboot_code_buffer + KEXEC_CONTROL_PAGE_SIZE);
 	printk(KERN_INFO "Bye!\n");
 
+	if (!IS_ENABLED(CONFIG_FSL_BOOKE) && !IS_ENABLED(CONFIG_44x))
+		relocate_new_kernel(page_list, reboot_code_buffer_phys, image->start);
+
 	/* now call it */
 	rnk = (relocate_new_kernel_t) reboot_code_buffer;
 	(*rnk)(page_list, reboot_code_buffer_phys, image->start);
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index 3555cad7bdde..ed446b7ea164 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -2336,6 +2336,7 @@ static void __init prom_check_displays(void)
 			prom_printf("W=%d H=%d LB=%d addr=0x%x\n",
 				    width, height, pitch, addr);
 			btext_setup_display(width, height, 8, pitch, addr);
+			btext_prepare_BAT();
 		}
 #endif /* CONFIG_PPC_EARLY_DEBUG_BOOTX */
 	}
diff --git a/arch/powerpc/kernel/prom_init_check.sh b/arch/powerpc/kernel/prom_init_check.sh
index 518d416971c1..160bef0d553d 100644
--- a/arch/powerpc/kernel/prom_init_check.sh
+++ b/arch/powerpc/kernel/prom_init_check.sh
@@ -24,7 +24,7 @@ fi
 WHITELIST="add_reloc_offset __bss_start __bss_stop copy_and_flush
 _end enter_prom $MEM_FUNCS reloc_offset __secondary_hold
 __secondary_hold_acknowledge __secondary_hold_spinloop __start
-logo_linux_clut224
+logo_linux_clut224 btext_prepare_BAT
 reloc_got2 kernstart_addr memstart_addr linux_banner _stext
 __prom_init_toc_start __prom_init_toc_end btext_setup_display TOC."
 
diff --git a/arch/powerpc/mm/book3s64/pgtable.c b/arch/powerpc/mm/book3s64/pgtable.c
index a255707e4aee..01bc9663360d 100644
--- a/arch/powerpc/mm/book3s64/pgtable.c
+++ b/arch/powerpc/mm/book3s64/pgtable.c
@@ -112,6 +112,9 @@ pmd_t pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
 	/*
 	 * This ensures that generic code that rely on IRQ disabling
 	 * to prevent a parallel THP split work as expected.
+	 *
+	 * Marking the entry with _PAGE_INVALID && ~_PAGE_PRESENT requires
+	 * a special case check in pmd_access_permitted.
 	 */
 	serialize_against_pte_lookup(vma->vm_mm);
 	return __pmd(old_pmd);
diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
index 39d2f8012386..fc10c0c24f51 100644
--- a/arch/powerpc/mm/pgtable.c
+++ b/arch/powerpc/mm/pgtable.c
@@ -368,13 +368,25 @@ pte_t *__find_linux_pte(pgd_t *pgdir, unsigned long ea,
 	pdshift = PMD_SHIFT;
 	pmdp = pmd_offset(&pud, ea);
 	pmd  = READ_ONCE(*pmdp);
+
 	/*
-	 * A hugepage collapse is captured by pmd_none, because
-	 * it mark the pmd none and do a hpte invalidate.
+	 * A hugepage collapse is captured by this condition, see
+	 * pmdp_collapse_flush.
 	 */
 	if (pmd_none(pmd))
 		return NULL;
 
+#ifdef CONFIG_PPC_BOOK3S_64
+	/*
+	 * A hugepage split is captured by this condition, see
+	 * pmdp_invalidate.
+	 *
+	 * Huge page modification can be caught here too.
+	 */
+	if (pmd_is_serializing(pmd))
+		return NULL;
+#endif
+
 	if (pmd_trans_huge(pmd) || pmd_devmap(pmd)) {
 		if (is_thp)
 			*is_thp = true;
diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
index 9e27fa05a7ae..4c95c365058a 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -536,7 +536,7 @@ static inline void __fpregs_load_activate(void)
 	struct fpu *fpu = &current->thread.fpu;
 	int cpu = smp_processor_id();
 
-	if (WARN_ON_ONCE(current->mm == NULL))
+	if (WARN_ON_ONCE(current->flags & PF_KTHREAD))
 		return;
 
 	if (!fpregs_state_valid(fpu, cpu)) {
@@ -567,11 +567,11 @@ static inline void __fpregs_load_activate(void)
  * otherwise.
  *
  * The FPU context is only stored/restored for a user task and
- * ->mm is used to distinguish between kernel and user threads.
+ * PF_KTHREAD is used to distinguish between kernel and user threads.
  */
 static inline void switch_fpu_prepare(struct fpu *old_fpu, int cpu)
 {
-	if (static_cpu_has(X86_FEATURE_FPU) && current->mm) {
+	if (static_cpu_has(X86_FEATURE_FPU) && !(current->flags & PF_KTHREAD)) {
 		if (!copy_fpregs_to_fpstate(old_fpu))
 			old_fpu->last_cpu = -1;
 		else
diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h
index 9f15384c504a..310118805f57 100644
--- a/arch/x86/include/asm/intel-family.h
+++ b/arch/x86/include/asm/intel-family.h
@@ -52,6 +52,9 @@
 
 #define INTEL_FAM6_CANNONLAKE_MOBILE	0x66
 
+#define INTEL_FAM6_ICELAKE_X		0x6A
+#define INTEL_FAM6_ICELAKE_XEON_D	0x6C
+#define INTEL_FAM6_ICELAKE_DESKTOP	0x7D
 #define INTEL_FAM6_ICELAKE_MOBILE	0x7E
 
 /* "Small Core" Processors (Atom) */
diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
index 70a04436380e..a813987b5552 100644
--- a/arch/x86/kernel/cpu/microcode/core.c
+++ b/arch/x86/kernel/cpu/microcode/core.c
@@ -872,7 +872,7 @@ int __init microcode_init(void)
 		goto out_ucode_group;
 
 	register_syscore_ops(&mc_syscore_ops);
-	cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "x86/microcode:online",
+	cpuhp_setup_state_nocalls(CPUHP_AP_MICROCODE_LOADER, "x86/microcode:online",
 				  mc_cpu_online, mc_cpu_down_prep);
 
 	pr_info("Microcode Update Driver: v%s.", DRIVER_VERSION);
diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
index 7ee93125a211..397206f23d14 100644
--- a/arch/x86/kernel/cpu/resctrl/monitor.c
+++ b/arch/x86/kernel/cpu/resctrl/monitor.c
@@ -360,6 +360,9 @@ static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_domain *dom_mbm)
 	struct list_head *head;
 	struct rdtgroup *entry;
 
+	if (!is_mbm_local_enabled())
+		return;
+
 	r_mba = &rdt_resources_all[RDT_RESOURCE_MBA];
 	closid = rgrp->closid;
 	rmid = rgrp->mon.rmid;
diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index 2f48f208f7e2..2131b8bbaad7 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -2534,7 +2534,12 @@ static int __init_one_rdt_domain(struct rdt_domain *d, struct rdt_resource *r,
 		if (closid_allocated(i) && i != closid) {
 			mode = rdtgroup_mode_by_closid(i);
 			if (mode == RDT_MODE_PSEUDO_LOCKSETUP)
-				break;
+				/*
+				 * ctrl values for locksetup aren't relevant
+				 * until the schemata is written, and the mode
+				 * becomes RDT_MODE_PSEUDO_LOCKED.
+				 */
+				continue;
 			/*
 			 * If CDP is active include peer domain's
 			 * usage to ensure there is no overlap
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index 466fca686fb9..649fbc3fcf9f 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -102,7 +102,7 @@ static void __kernel_fpu_begin(void)
 
 	kernel_fpu_disable();
 
-	if (current->mm) {
+	if (!(current->flags & PF_KTHREAD)) {
 		if (!test_thread_flag(TIF_NEED_FPU_LOAD)) {
 			set_thread_flag(TIF_NEED_FPU_LOAD);
 			/*
diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
index 5a8d118bc423..0071b794ed19 100644
--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -5,6 +5,7 @@
 
 #include <linux/compat.h>
 #include <linux/cpu.h>
+#include <linux/pagemap.h>
 
 #include <asm/fpu/internal.h>
 #include <asm/fpu/signal.h>
@@ -61,6 +62,11 @@ static inline int save_fsave_header(struct task_struct *tsk, void __user *buf)
 		struct user_i387_ia32_struct env;
 		struct _fpstate_32 __user *fp = buf;
 
+		fpregs_lock();
+		if (!test_thread_flag(TIF_NEED_FPU_LOAD))
+			copy_fxregs_to_kernel(&tsk->thread.fpu);
+		fpregs_unlock();
+
 		convert_from_fxsr(&env, tsk);
 
 		if (__copy_to_user(buf, &env, sizeof(env)) ||
@@ -189,15 +195,7 @@ retry:
 	fpregs_unlock();
 
 	if (ret) {
-		int aligned_size;
-		int nr_pages;
-
-		aligned_size = offset_in_page(buf_fx) + fpu_user_xstate_size;
-		nr_pages = DIV_ROUND_UP(aligned_size, PAGE_SIZE);
-
-		ret = get_user_pages_unlocked((unsigned long)buf_fx, nr_pages,
-					      NULL, FOLL_WRITE);
-		if (ret == nr_pages)
+		if (!fault_in_pages_writeable(buf_fx, fpu_user_xstate_size))
 			goto retry;
 		return -EFAULT;
 	}
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index 9a8c1648fc9a..6690c5652aeb 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -758,7 +758,7 @@ int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt)
 		       BREAK_INSTR_SIZE);
 	bpt->type = BP_POKE_BREAKPOINT;
 
-	return err;
+	return 0;
 }
 
 int kgdb_arch_remove_breakpoint(struct kgdb_bkpt *bpt)
diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c
index 8dc0fc0b1382..296da58f3013 100644
--- a/arch/x86/mm/kasan_init_64.c
+++ b/arch/x86/mm/kasan_init_64.c
@@ -199,7 +199,7 @@ static inline p4d_t *early_p4d_offset(pgd_t *pgd, unsigned long addr)
 	if (!pgtable_l5_enabled())
 		return (p4d_t *)pgd;
 
-	p4d = __pa_nodebug(pgd_val(*pgd)) & PTE_PFN_MASK;
+	p4d = pgd_val(*pgd) & PTE_PFN_MASK;
 	p4d += __START_KERNEL_map - phys_base;
 	return (p4d_t *)p4d + p4d_index(addr);
 }
diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c
index dc3f058bdf9b..dc6182eecefa 100644
--- a/arch/x86/mm/kaslr.c
+++ b/arch/x86/mm/kaslr.c
@@ -52,7 +52,7 @@ static __initdata struct kaslr_memory_region {
 } kaslr_regions[] = {
 	{ &page_offset_base, 0 },
 	{ &vmalloc_base, 0 },
-	{ &vmemmap_base, 1 },
+	{ &vmemmap_base, 0 },
 };
 
 /* Get size in bytes used by the memory region */
@@ -78,6 +78,7 @@ void __init kernel_randomize_memory(void)
 	unsigned long rand, memory_tb;
 	struct rnd_state rand_state;
 	unsigned long remain_entropy;
+	unsigned long vmemmap_size;
 
 	vaddr_start = pgtable_l5_enabled() ? __PAGE_OFFSET_BASE_L5 : __PAGE_OFFSET_BASE_L4;
 	vaddr = vaddr_start;
@@ -109,6 +110,14 @@ void __init kernel_randomize_memory(void)
 	if (memory_tb < kaslr_regions[0].size_tb)
 		kaslr_regions[0].size_tb = memory_tb;
 
+	/*
+	 * Calculate the vmemmap region size in TBs, aligned to a TB
+	 * boundary.
+	 */
+	vmemmap_size = (kaslr_regions[0].size_tb << (TB_SHIFT - PAGE_SHIFT)) *
+			sizeof(struct page);
+	kaslr_regions[2].size_tb = DIV_ROUND_UP(vmemmap_size, 1UL << TB_SHIFT);
+
 	/* Calculate entropy available between regions */
 	remain_entropy = vaddr_end - vaddr_start;
 	for (i = 0; i < ARRAY_SIZE(kaslr_regions); i++)
diff --git a/block/Kconfig b/block/Kconfig
index 1b220101a9cb..2466dcc3ef1d 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -73,6 +73,7 @@ config BLK_DEV_INTEGRITY
 
 config BLK_DEV_ZONED
 	bool "Zoned block device support"
+	select MQ_IOSCHED_DEADLINE
 	---help---
 	Block layer zoned block device support. This option enables
 	support for ZAC/ZBC host-managed and host-aware zoned block devices.
diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c
index 59f46904cb11..b3796a40a61a 100644
--- a/block/bfq-cgroup.c
+++ b/block/bfq-cgroup.c
@@ -1046,8 +1046,7 @@ struct blkcg_policy blkcg_policy_bfq = {
 struct cftype bfq_blkcg_legacy_files[] = {
 	{
 		.name = "bfq.weight",
-		.link_name = "weight",
-		.flags = CFTYPE_NOT_ON_ROOT | CFTYPE_SYMLINKED,
+		.flags = CFTYPE_NOT_ON_ROOT,
 		.seq_show = bfq_io_show_weight,
 		.write_u64 = bfq_io_set_weight_legacy,
 	},
@@ -1167,8 +1166,7 @@ struct cftype bfq_blkcg_legacy_files[] = {
 struct cftype bfq_blkg_files[] = {
 	{
 		.name = "bfq.weight",
-		.link_name = "weight",
-		.flags = CFTYPE_NOT_ON_ROOT | CFTYPE_SYMLINKED,
+		.flags = CFTYPE_NOT_ON_ROOT,
 		.seq_show = bfq_io_show_weight,
 		.write = bfq_io_set_weight,
 	},
diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
index 6aea0ebc3a73..2489ddbb21db 100644
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -821,38 +821,28 @@ static const struct blk_mq_debugfs_attr blk_mq_debugfs_ctx_attrs[] = {
 	{},
 };
 
-static bool debugfs_create_files(struct dentry *parent, void *data,
+static void debugfs_create_files(struct dentry *parent, void *data,
 				 const struct blk_mq_debugfs_attr *attr)
 {
 	if (IS_ERR_OR_NULL(parent))
-		return false;
+		return;
 
 	d_inode(parent)->i_private = data;
 
-	for (; attr->name; attr++) {
-		if (!debugfs_create_file(attr->name, attr->mode, parent,
-					 (void *)attr, &blk_mq_debugfs_fops))
-			return false;
-	}
-	return true;
+	for (; attr->name; attr++)
+		debugfs_create_file(attr->name, attr->mode, parent,
+				    (void *)attr, &blk_mq_debugfs_fops);
 }
 
-int blk_mq_debugfs_register(struct request_queue *q)
+void blk_mq_debugfs_register(struct request_queue *q)
 {
 	struct blk_mq_hw_ctx *hctx;
 	int i;
 
-	if (!blk_debugfs_root)
-		return -ENOENT;
-
 	q->debugfs_dir = debugfs_create_dir(kobject_name(q->kobj.parent),
 					    blk_debugfs_root);
-	if (!q->debugfs_dir)
-		return -ENOMEM;
 
-	if (!debugfs_create_files(q->debugfs_dir, q,
-				  blk_mq_debugfs_queue_attrs))
-		goto err;
+	debugfs_create_files(q->debugfs_dir, q, blk_mq_debugfs_queue_attrs);
 
 	/*
 	 * blk_mq_init_sched() attempted to do this already, but q->debugfs_dir
@@ -864,11 +854,10 @@ int blk_mq_debugfs_register(struct request_queue *q)
 
 	/* Similarly, blk_mq_init_hctx() couldn't do this previously. */
 	queue_for_each_hw_ctx(q, hctx, i) {
-		if (!hctx->debugfs_dir && blk_mq_debugfs_register_hctx(q, hctx))
-			goto err;
-		if (q->elevator && !hctx->sched_debugfs_dir &&
-		    blk_mq_debugfs_register_sched_hctx(q, hctx))
-			goto err;
+		if (!hctx->debugfs_dir)
+			blk_mq_debugfs_register_hctx(q, hctx);
+		if (q->elevator && !hctx->sched_debugfs_dir)
+			blk_mq_debugfs_register_sched_hctx(q, hctx);
 	}
 
 	if (q->rq_qos) {
@@ -879,12 +868,6 @@ int blk_mq_debugfs_register(struct request_queue *q)
 			rqos = rqos->next;
 		}
 	}
-
-	return 0;
-
-err:
-	blk_mq_debugfs_unregister(q);
-	return -ENOMEM;
 }
 
 void blk_mq_debugfs_unregister(struct request_queue *q)
@@ -894,52 +877,32 @@ void blk_mq_debugfs_unregister(struct request_queue *q)
 	q->debugfs_dir = NULL;
 }
 
-static int blk_mq_debugfs_register_ctx(struct blk_mq_hw_ctx *hctx,
-				       struct blk_mq_ctx *ctx)
+static void blk_mq_debugfs_register_ctx(struct blk_mq_hw_ctx *hctx,
+					struct blk_mq_ctx *ctx)
 {
 	struct dentry *ctx_dir;
 	char name[20];
 
 	snprintf(name, sizeof(name), "cpu%u", ctx->cpu);
 	ctx_dir = debugfs_create_dir(name, hctx->debugfs_dir);
-	if (!ctx_dir)
-		return -ENOMEM;
 
-	if (!debugfs_create_files(ctx_dir, ctx, blk_mq_debugfs_ctx_attrs))
-		return -ENOMEM;
-
-	return 0;
+	debugfs_create_files(ctx_dir, ctx, blk_mq_debugfs_ctx_attrs);
 }
 
-int blk_mq_debugfs_register_hctx(struct request_queue *q,
-				 struct blk_mq_hw_ctx *hctx)
+void blk_mq_debugfs_register_hctx(struct request_queue *q,
+				  struct blk_mq_hw_ctx *hctx)
 {
 	struct blk_mq_ctx *ctx;
 	char name[20];
 	int i;
 
-	if (!q->debugfs_dir)
-		return -ENOENT;
-
 	snprintf(name, sizeof(name), "hctx%u", hctx->queue_num);
 	hctx->debugfs_dir = debugfs_create_dir(name, q->debugfs_dir);
-	if (!hctx->debugfs_dir)
-		return -ENOMEM;
-
-	if (!debugfs_create_files(hctx->debugfs_dir, hctx,
-				  blk_mq_debugfs_hctx_attrs))
-		goto err;
-
-	hctx_for_each_ctx(hctx, ctx, i) {
-		if (blk_mq_debugfs_register_ctx(hctx, ctx))
-			goto err;
-	}
 
-	return 0;
+	debugfs_create_files(hctx->debugfs_dir, hctx, blk_mq_debugfs_hctx_attrs);
 
-err:
-	blk_mq_debugfs_unregister_hctx(hctx);
-	return -ENOMEM;
+	hctx_for_each_ctx(hctx, ctx, i)
+		blk_mq_debugfs_register_ctx(hctx, ctx);
 }
 
 void blk_mq_debugfs_unregister_hctx(struct blk_mq_hw_ctx *hctx)
@@ -949,17 +912,13 @@ void blk_mq_debugfs_unregister_hctx(struct blk_mq_hw_ctx *hctx)
 	hctx->debugfs_dir = NULL;
 }
 
-int blk_mq_debugfs_register_hctxs(struct request_queue *q)
+void blk_mq_debugfs_register_hctxs(struct request_queue *q)
 {
 	struct blk_mq_hw_ctx *hctx;
 	int i;
 
-	queue_for_each_hw_ctx(q, hctx, i) {
-		if (blk_mq_debugfs_register_hctx(q, hctx))
-			return -ENOMEM;
-	}
-
-	return 0;
+	queue_for_each_hw_ctx(q, hctx, i)
+		blk_mq_debugfs_register_hctx(q, hctx);
 }
 
 void blk_mq_debugfs_unregister_hctxs(struct request_queue *q)
@@ -971,29 +930,16 @@ void blk_mq_debugfs_unregister_hctxs(struct request_queue *q)
 		blk_mq_debugfs_unregister_hctx(hctx);
 }
 
-int blk_mq_debugfs_register_sched(struct request_queue *q)
+void blk_mq_debugfs_register_sched(struct request_queue *q)
 {
 	struct elevator_type *e = q->elevator->type;
 
-	if (!q->debugfs_dir)
-		return -ENOENT;
-
 	if (!e->queue_debugfs_attrs)
-		return 0;
+		return;
 
 	q->sched_debugfs_dir = debugfs_create_dir("sched", q->debugfs_dir);
-	if (!q->sched_debugfs_dir)
-		return -ENOMEM;
 
-	if (!debugfs_create_files(q->sched_debugfs_dir, q,
-				  e->queue_debugfs_attrs))
-		goto err;
-
-	return 0;
-
-err:
-	blk_mq_debugfs_unregister_sched(q);
-	return -ENOMEM;
+	debugfs_create_files(q->sched_debugfs_dir, q, e->queue_debugfs_attrs);
 }
 
 void blk_mq_debugfs_unregister_sched(struct request_queue *q)
@@ -1008,36 +954,22 @@ void blk_mq_debugfs_unregister_rqos(struct rq_qos *rqos)
 	rqos->debugfs_dir = NULL;
 }
 
-int blk_mq_debugfs_register_rqos(struct rq_qos *rqos)
+void blk_mq_debugfs_register_rqos(struct rq_qos *rqos)
 {
 	struct request_queue *q = rqos->q;
 	const char *dir_name = rq_qos_id_to_name(rqos->id);
 
-	if (!q->debugfs_dir)
-		return -ENOENT;
-
 	if (rqos->debugfs_dir || !rqos->ops->debugfs_attrs)
-		return 0;
+		return;
 
-	if (!q->rqos_debugfs_dir) {
+	if (!q->rqos_debugfs_dir)
 		q->rqos_debugfs_dir = debugfs_create_dir("rqos",
 							 q->debugfs_dir);
-		if (!q->rqos_debugfs_dir)
-			return -ENOMEM;
-	}
 
 	rqos->debugfs_dir = debugfs_create_dir(dir_name,
 					       rqos->q->rqos_debugfs_dir);
-	if (!rqos->debugfs_dir)
-		return -ENOMEM;
 
-	if (!debugfs_create_files(rqos->debugfs_dir, rqos,
-				  rqos->ops->debugfs_attrs))
-		goto err;
-	return 0;
- err:
-	blk_mq_debugfs_unregister_rqos(rqos);
-	return -ENOMEM;
+	debugfs_create_files(rqos->debugfs_dir, rqos, rqos->ops->debugfs_attrs);
 }
 
 void blk_mq_debugfs_unregister_queue_rqos(struct request_queue *q)
@@ -1046,27 +978,18 @@ void blk_mq_debugfs_unregister_queue_rqos(struct request_queue *q)
 	q->rqos_debugfs_dir = NULL;
 }
 
-int blk_mq_debugfs_register_sched_hctx(struct request_queue *q,
-				       struct blk_mq_hw_ctx *hctx)
+void blk_mq_debugfs_register_sched_hctx(struct request_queue *q,
+					struct blk_mq_hw_ctx *hctx)
 {
 	struct elevator_type *e = q->elevator->type;
 
-	if (!hctx->debugfs_dir)
-		return -ENOENT;
-
 	if (!e->hctx_debugfs_attrs)
-		return 0;
+		return;
 
 	hctx->sched_debugfs_dir = debugfs_create_dir("sched",
 						     hctx->debugfs_dir);
-	if (!hctx->sched_debugfs_dir)
-		return -ENOMEM;
-
-	if (!debugfs_create_files(hctx->sched_debugfs_dir, hctx,
-				  e->hctx_debugfs_attrs))
-		return -ENOMEM;
-
-	return 0;
+	debugfs_create_files(hctx->sched_debugfs_dir, hctx,
+			     e->hctx_debugfs_attrs);
 }
 
 void blk_mq_debugfs_unregister_sched_hctx(struct blk_mq_hw_ctx *hctx)
diff --git a/block/blk-mq-debugfs.h b/block/blk-mq-debugfs.h
index 8c9012a578c1..a68aa6041a10 100644
--- a/block/blk-mq-debugfs.h
+++ b/block/blk-mq-debugfs.h
@@ -18,74 +18,68 @@ struct blk_mq_debugfs_attr {
 int __blk_mq_debugfs_rq_show(struct seq_file *m, struct request *rq);
 int blk_mq_debugfs_rq_show(struct seq_file *m, void *v);
 
-int blk_mq_debugfs_register(struct request_queue *q);
+void blk_mq_debugfs_register(struct request_queue *q);
 void blk_mq_debugfs_unregister(struct request_queue *q);
-int blk_mq_debugfs_register_hctx(struct request_queue *q,
-				 struct blk_mq_hw_ctx *hctx);
+void blk_mq_debugfs_register_hctx(struct request_queue *q,
+				  struct blk_mq_hw_ctx *hctx);
 void blk_mq_debugfs_unregister_hctx(struct blk_mq_hw_ctx *hctx);
-int blk_mq_debugfs_register_hctxs(struct request_queue *q);
+void blk_mq_debugfs_register_hctxs(struct request_queue *q);
 void blk_mq_debugfs_unregister_hctxs(struct request_queue *q);
 
-int blk_mq_debugfs_register_sched(struct request_queue *q);
+void blk_mq_debugfs_register_sched(struct request_queue *q);
 void blk_mq_debugfs_unregister_sched(struct request_queue *q);
-int blk_mq_debugfs_register_sched_hctx(struct request_queue *q,
+void blk_mq_debugfs_register_sched_hctx(struct request_queue *q,
 				       struct blk_mq_hw_ctx *hctx);
 void blk_mq_debugfs_unregister_sched_hctx(struct blk_mq_hw_ctx *hctx);
 
-int blk_mq_debugfs_register_rqos(struct rq_qos *rqos);
+void blk_mq_debugfs_register_rqos(struct rq_qos *rqos);
 void blk_mq_debugfs_unregister_rqos(struct rq_qos *rqos);
 void blk_mq_debugfs_unregister_queue_rqos(struct request_queue *q);
 #else
-static inline int blk_mq_debugfs_register(struct request_queue *q)
+static inline void blk_mq_debugfs_register(struct request_queue *q)
 {
-	return 0;
 }
 
 static inline void blk_mq_debugfs_unregister(struct request_queue *q)
 {
 }
 
-static inline int blk_mq_debugfs_register_hctx(struct request_queue *q,
-					       struct blk_mq_hw_ctx *hctx)
+static inline void blk_mq_debugfs_register_hctx(struct request_queue *q,
+						struct blk_mq_hw_ctx *hctx)
 {
-	return 0;
 }
 
 static inline void blk_mq_debugfs_unregister_hctx(struct blk_mq_hw_ctx *hctx)
 {
 }
 
-static inline int blk_mq_debugfs_register_hctxs(struct request_queue *q)
+static inline void blk_mq_debugfs_register_hctxs(struct request_queue *q)
 {
-	return 0;
 }
 
 static inline void blk_mq_debugfs_unregister_hctxs(struct request_queue *q)
 {
 }
 
-static inline int blk_mq_debugfs_register_sched(struct request_queue *q)
+static inline void blk_mq_debugfs_register_sched(struct request_queue *q)
 {
-	return 0;
 }
 
 static inline void blk_mq_debugfs_unregister_sched(struct request_queue *q)
 {
 }
 
-static inline int blk_mq_debugfs_register_sched_hctx(struct request_queue *q,
-						     struct blk_mq_hw_ctx *hctx)
+static inline void blk_mq_debugfs_register_sched_hctx(struct request_queue *q,
+						      struct blk_mq_hw_ctx *hctx)
 {
-	return 0;
 }
 
 static inline void blk_mq_debugfs_unregister_sched_hctx(struct blk_mq_hw_ctx *hctx)
 {
 }
 
-static inline int blk_mq_debugfs_register_rqos(struct rq_qos *rqos)
+static inline void blk_mq_debugfs_register_rqos(struct rq_qos *rqos)
 {
-	return 0;
 }
 
 static inline void blk_mq_debugfs_unregister_rqos(struct rq_qos *rqos)
diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index 500cb04901cc..2766066a15db 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -555,7 +555,6 @@ void blk_mq_sched_free_requests(struct request_queue *q)
 	int i;
 
 	lockdep_assert_held(&q->sysfs_lock);
-	WARN_ON(!q->elevator);
 
 	queue_for_each_hw_ctx(q, hctx, i) {
 		if (hctx->sched_tags)
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index aaa57e0c809d..4a2dff303865 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -4460,9 +4460,12 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = {
 	{ "ST3320[68]13AS",	"SD1[5-9]",	ATA_HORKAGE_NONCQ |
 						ATA_HORKAGE_FIRMWARE_WARN },
 
-	/* drives which fail FPDMA_AA activation (some may freeze afterwards) */
-	{ "ST1000LM024 HN-M101MBB", "2AR10001",	ATA_HORKAGE_BROKEN_FPDMA_AA },
-	{ "ST1000LM024 HN-M101MBB", "2BA30001",	ATA_HORKAGE_BROKEN_FPDMA_AA },
+	/* drives which fail FPDMA_AA activation (some may freeze afterwards)
+	   the ST disks also have LPM issues */
+	{ "ST1000LM024 HN-M101MBB", "2AR10001",	ATA_HORKAGE_BROKEN_FPDMA_AA |
+						ATA_HORKAGE_NOLPM, },
+	{ "ST1000LM024 HN-M101MBB", "2BA30001",	ATA_HORKAGE_BROKEN_FPDMA_AA |
+						ATA_HORKAGE_NOLPM, },
 	{ "VB0250EAVER",	"HPG7",		ATA_HORKAGE_BROKEN_FPDMA_AA },
 
 	/* Blacklist entries taken from Silicon Image 3124/3132
diff --git a/drivers/base/devres.c b/drivers/base/devres.c
index e038e2b3b7ea..0bbb328bd17f 100644
--- a/drivers/base/devres.c
+++ b/drivers/base/devres.c
@@ -755,10 +755,32 @@ void devm_remove_action(struct device *dev, void (*action)(void *), void *data)
 
 	WARN_ON(devres_destroy(dev, devm_action_release, devm_action_match,
 			       &devres));
-
 }
 EXPORT_SYMBOL_GPL(devm_remove_action);
 
+/**
+ * devm_release_action() - release previously added custom action
+ * @dev: Device that owns the action
+ * @action: Function implementing the action
+ * @data: Pointer to data passed to @action implementation
+ *
+ * Releases and removes instance of @action previously added by
+ * devm_add_action().  Both action and data should match one of the
+ * existing entries.
+ */
+void devm_release_action(struct device *dev, void (*action)(void *), void *data)
+{
+	struct action_devres devres = {
+		.data = data,
+		.action = action,
+	};
+
+	WARN_ON(devres_release(dev, devm_action_release, devm_action_match,
+			       &devres));
+
+}
+EXPORT_SYMBOL_GPL(devm_release_action);
+
 /*
  * Managed kmalloc/kfree
  */
diff --git a/drivers/block/null_blk_zoned.c b/drivers/block/null_blk_zoned.c
index 5d1c261a2cfd..fca0c97ff1aa 100644
--- a/drivers/block/null_blk_zoned.c
+++ b/drivers/block/null_blk_zoned.c
@@ -74,10 +74,6 @@ int null_zone_report(struct gendisk *disk, sector_t sector,
 	struct nullb_device *dev = nullb->dev;
 	unsigned int zno, nrz = 0;
 
-	if (!dev->zoned)
-		/* Not a zoned null device */
-		return -EOPNOTSUPP;
-
 	zno = null_zone_no(dev, sector);
 	if (zno < dev->nr_zones) {
 		nrz = min_t(unsigned int, *nr_zones, dev->nr_zones - zno);
diff --git a/drivers/block/ps3vram.c b/drivers/block/ps3vram.c
index 4c7f51b1eda9..4628e1a27a2b 100644
--- a/drivers/block/ps3vram.c
+++ b/drivers/block/ps3vram.c
@@ -767,7 +767,7 @@ static int ps3vram_probe(struct ps3_system_bus_device *dev)
 	strlcpy(gendisk->disk_name, DEVICE_NAME, sizeof(gendisk->disk_name));
 	set_capacity(gendisk, priv->size >> 9);
 
-	dev_info(&dev->core, "%s: Using %lu MiB of GPU memory\n",
+	dev_info(&dev->core, "%s: Using %llu MiB of GPU memory\n",
 		 gendisk->disk_name, get_capacity(gendisk) >> 11);
 
 	device_add_disk(&dev->core, gendisk, NULL);
diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c
index b2a951a798e2..5c69c9a9a6a4 100644
--- a/drivers/clocksource/arm_arch_timer.c
+++ b/drivers/clocksource/arm_arch_timer.c
@@ -149,22 +149,22 @@ u32 arch_timer_reg_read(int access, enum arch_timer_reg reg,
 	return val;
 }
 
-static u64 arch_counter_get_cntpct_stable(void)
+static notrace u64 arch_counter_get_cntpct_stable(void)
 {
 	return __arch_counter_get_cntpct_stable();
 }
 
-static u64 arch_counter_get_cntpct(void)
+static notrace u64 arch_counter_get_cntpct(void)
 {
 	return __arch_counter_get_cntpct();
 }
 
-static u64 arch_counter_get_cntvct_stable(void)
+static notrace u64 arch_counter_get_cntvct_stable(void)
 {
 	return __arch_counter_get_cntvct_stable();
 }
 
-static u64 arch_counter_get_cntvct(void)
+static notrace u64 arch_counter_get_cntvct(void)
 {
 	return __arch_counter_get_cntvct();
 }
diff --git a/drivers/clocksource/timer-ti-dm.c b/drivers/clocksource/timer-ti-dm.c
index e40b55a7086f..5394d9dbdfbc 100644
--- a/drivers/clocksource/timer-ti-dm.c
+++ b/drivers/clocksource/timer-ti-dm.c
@@ -896,7 +896,7 @@ static int omap_dm_timer_remove(struct platform_device *pdev)
 	return ret;
 }
 
-const static struct omap_dm_timer_ops dmtimer_ops = {
+static const struct omap_dm_timer_ops dmtimer_ops = {
 	.request_by_node = omap_dm_timer_request_by_node,
 	.request_specific = omap_dm_timer_request_specific,
 	.request = omap_dm_timer_request,
diff --git a/drivers/dax/device.c b/drivers/dax/device.c
index 996d68ff992a..8465d12fecba 100644
--- a/drivers/dax/device.c
+++ b/drivers/dax/device.c
@@ -27,9 +27,8 @@ static void dev_dax_percpu_release(struct percpu_ref *ref)
 	complete(&dev_dax->cmp);
 }
 
-static void dev_dax_percpu_exit(void *data)
+static void dev_dax_percpu_exit(struct percpu_ref *ref)
 {
-	struct percpu_ref *ref = data;
 	struct dev_dax *dev_dax = ref_to_dev_dax(ref);
 
 	dev_dbg(&dev_dax->dev, "%s\n", __func__);
@@ -466,18 +465,12 @@ int dev_dax_probe(struct device *dev)
 	if (rc)
 		return rc;
 
-	rc = devm_add_action_or_reset(dev, dev_dax_percpu_exit, &dev_dax->ref);
-	if (rc)
-		return rc;
-
 	dev_dax->pgmap.ref = &dev_dax->ref;
 	dev_dax->pgmap.kill = dev_dax_percpu_kill;
+	dev_dax->pgmap.cleanup = dev_dax_percpu_exit;
 	addr = devm_memremap_pages(dev, &dev_dax->pgmap);
-	if (IS_ERR(addr)) {
-		devm_remove_action(dev, dev_dax_percpu_exit, &dev_dax->ref);
-		percpu_ref_exit(&dev_dax->ref);
+	if (IS_ERR(addr))
 		return PTR_ERR(addr);
-	}
 
 	inode = dax_inode(dax_dev);
 	cdev = inode->i_cdev;
diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
index 7c858020d14b..6c15deb5d4ad 100644
--- a/drivers/dma-buf/dma-buf.c
+++ b/drivers/dma-buf/dma-buf.c
@@ -34,8 +34,10 @@
 #include <linux/poll.h>
 #include <linux/reservation.h>
 #include <linux/mm.h>
+#include <linux/mount.h>
 
 #include <uapi/linux/dma-buf.h>
+#include <uapi/linux/magic.h>
 
 static inline int is_dma_buf_file(struct file *);
 
@@ -46,6 +48,41 @@ struct dma_buf_list {
 
 static struct dma_buf_list db_list;
 
+static char *dmabuffs_dname(struct dentry *dentry, char *buffer, int buflen)
+{
+	struct dma_buf *dmabuf;
+	char name[DMA_BUF_NAME_LEN];
+	size_t ret = 0;
+
+	dmabuf = dentry->d_fsdata;
+	mutex_lock(&dmabuf->lock);
+	if (dmabuf->name)
+		ret = strlcpy(name, dmabuf->name, DMA_BUF_NAME_LEN);
+	mutex_unlock(&dmabuf->lock);
+
+	return dynamic_dname(dentry, buffer, buflen, "/%s:%s",
+			     dentry->d_name.name, ret > 0 ? name : "");
+}
+
+static const struct dentry_operations dma_buf_dentry_ops = {
+	.d_dname = dmabuffs_dname,
+};
+
+static struct vfsmount *dma_buf_mnt;
+
+static struct dentry *dma_buf_fs_mount(struct file_system_type *fs_type,
+		int flags, const char *name, void *data)
+{
+	return mount_pseudo(fs_type, "dmabuf:", NULL, &dma_buf_dentry_ops,
+			DMA_BUF_MAGIC);
+}
+
+static struct file_system_type dma_buf_fs_type = {
+	.name = "dmabuf",
+	.mount = dma_buf_fs_mount,
+	.kill_sb = kill_anon_super,
+};
+
 static int dma_buf_release(struct inode *inode, struct file *file)
 {
 	struct dma_buf *dmabuf;
@@ -90,6 +127,10 @@ static int dma_buf_mmap_internal(struct file *file, struct vm_area_struct *vma)
 
 	dmabuf = file->private_data;
 
+	/* check if buffer supports mmap */
+	if (!dmabuf->ops->mmap)
+		return -EINVAL;
+
 	/* check for overflowing the buffer's size */
 	if (vma->vm_pgoff + vma_pages(vma) >
 	    dmabuf->size >> PAGE_SHIFT)
@@ -276,6 +317,43 @@ out:
 	return events;
 }
 
+/**
+ * dma_buf_set_name - Set a name to a specific dma_buf to track the usage.
+ * The name of the dma-buf buffer can only be set when the dma-buf is not
+ * attached to any devices. It could theoritically support changing the
+ * name of the dma-buf if the same piece of memory is used for multiple
+ * purpose between different devices.
+ *
+ * @dmabuf [in]     dmabuf buffer that will be renamed.
+ * @buf:   [in]     A piece of userspace memory that contains the name of
+ *                  the dma-buf.
+ *
+ * Returns 0 on success. If the dma-buf buffer is already attached to
+ * devices, return -EBUSY.
+ *
+ */
+static long dma_buf_set_name(struct dma_buf *dmabuf, const char __user *buf)
+{
+	char *name = strndup_user(buf, DMA_BUF_NAME_LEN);
+	long ret = 0;
+
+	if (IS_ERR(name))
+		return PTR_ERR(name);
+
+	mutex_lock(&dmabuf->lock);
+	if (!list_empty(&dmabuf->attachments)) {
+		ret = -EBUSY;
+		kfree(name);
+		goto out_unlock;
+	}
+	kfree(dmabuf->name);
+	dmabuf->name = name;
+
+out_unlock:
+	mutex_unlock(&dmabuf->lock);
+	return ret;
+}
+
 static long dma_buf_ioctl(struct file *file,
 			  unsigned int cmd, unsigned long arg)
 {
@@ -314,11 +392,29 @@ static long dma_buf_ioctl(struct file *file,
 			ret = dma_buf_begin_cpu_access(dmabuf, direction);
 
 		return ret;
+
+	case DMA_BUF_SET_NAME:
+		return dma_buf_set_name(dmabuf, (const char __user *)arg);
+
 	default:
 		return -ENOTTY;
 	}
 }
 
+static void dma_buf_show_fdinfo(struct seq_file *m, struct file *file)
+{
+	struct dma_buf *dmabuf = file->private_data;
+
+	seq_printf(m, "size:\t%zu\n", dmabuf->size);
+	/* Don't count the temporary reference taken inside procfs seq_show */
+	seq_printf(m, "count:\t%ld\n", file_count(dmabuf->file) - 1);
+	seq_printf(m, "exp_name:\t%s\n", dmabuf->exp_name);
+	mutex_lock(&dmabuf->lock);
+	if (dmabuf->name)
+		seq_printf(m, "name:\t%s\n", dmabuf->name);
+	mutex_unlock(&dmabuf->lock);
+}
+
 static const struct file_operations dma_buf_fops = {
 	.release	= dma_buf_release,
 	.mmap		= dma_buf_mmap_internal,
@@ -328,6 +424,7 @@ static const struct file_operations dma_buf_fops = {
 #ifdef CONFIG_COMPAT
 	.compat_ioctl	= dma_buf_ioctl,
 #endif
+	.show_fdinfo	= dma_buf_show_fdinfo,
 };
 
 /*
@@ -338,6 +435,32 @@ static inline int is_dma_buf_file(struct file *file)
 	return file->f_op == &dma_buf_fops;
 }
 
+static struct file *dma_buf_getfile(struct dma_buf *dmabuf, int flags)
+{
+	struct file *file;
+	struct inode *inode = alloc_anon_inode(dma_buf_mnt->mnt_sb);
+
+	if (IS_ERR(inode))
+		return ERR_CAST(inode);
+
+	inode->i_size = dmabuf->size;
+	inode_set_bytes(inode, dmabuf->size);
+
+	file = alloc_file_pseudo(inode, dma_buf_mnt, "dmabuf",
+				 flags, &dma_buf_fops);
+	if (IS_ERR(file))
+		goto err_alloc_file;
+	file->f_flags = flags & (O_ACCMODE | O_NONBLOCK);
+	file->private_data = dmabuf;
+	file->f_path.dentry->d_fsdata = dmabuf;
+
+	return file;
+
+err_alloc_file:
+	iput(inode);
+	return file;
+}
+
 /**
  * DOC: dma buf device access
  *
@@ -404,8 +527,7 @@ struct dma_buf *dma_buf_export(const struct dma_buf_export_info *exp_info)
 			  || !exp_info->ops
 			  || !exp_info->ops->map_dma_buf
 			  || !exp_info->ops->unmap_dma_buf
-			  || !exp_info->ops->release
-			  || !exp_info->ops->mmap)) {
+			  || !exp_info->ops->release)) {
 		return ERR_PTR(-EINVAL);
 	}
 
@@ -433,8 +555,7 @@ struct dma_buf *dma_buf_export(const struct dma_buf_export_info *exp_info)
 	}
 	dmabuf->resv = resv;
 
-	file = anon_inode_getfile("dmabuf", &dma_buf_fops, dmabuf,
-					exp_info->flags);
+	file = dma_buf_getfile(dmabuf, exp_info->flags);
 	if (IS_ERR(file)) {
 		ret = PTR_ERR(file);
 		goto err_dmabuf;
@@ -573,6 +694,7 @@ struct dma_buf_attachment *dma_buf_attach(struct dma_buf *dmabuf,
 	list_add(&attach->node, &dmabuf->attachments);
 
 	mutex_unlock(&dmabuf->lock);
+
 	return attach;
 
 err_attach:
@@ -595,6 +717,9 @@ void dma_buf_detach(struct dma_buf *dmabuf, struct dma_buf_attachment *attach)
 	if (WARN_ON(!dmabuf || !attach))
 		return;
 
+	if (attach->sgt)
+		dmabuf->ops->unmap_dma_buf(attach, attach->sgt, attach->dir);
+
 	mutex_lock(&dmabuf->lock);
 	list_del(&attach->node);
 	if (dmabuf->ops->detach)
@@ -630,10 +755,27 @@ struct sg_table *dma_buf_map_attachment(struct dma_buf_attachment *attach,
 	if (WARN_ON(!attach || !attach->dmabuf))
 		return ERR_PTR(-EINVAL);
 
+	if (attach->sgt) {
+		/*
+		 * Two mappings with different directions for the same
+		 * attachment are not allowed.
+		 */
+		if (attach->dir != direction &&
+		    attach->dir != DMA_BIDIRECTIONAL)
+			return ERR_PTR(-EBUSY);
+
+		return attach->sgt;
+	}
+
 	sg_table = attach->dmabuf->ops->map_dma_buf(attach, direction);
 	if (!sg_table)
 		sg_table = ERR_PTR(-ENOMEM);
 
+	if (!IS_ERR(sg_table) && attach->dmabuf->ops->cache_sgt_mapping) {
+		attach->sgt = sg_table;
+		attach->dir = direction;
+	}
+
 	return sg_table;
 }
 EXPORT_SYMBOL_GPL(dma_buf_map_attachment);
@@ -657,8 +799,10 @@ void dma_buf_unmap_attachment(struct dma_buf_attachment *attach,
 	if (WARN_ON(!attach || !attach->dmabuf || !sg_table))
 		return;
 
-	attach->dmabuf->ops->unmap_dma_buf(attach, sg_table,
-						direction);
+	if (attach->sgt == sg_table)
+		return;
+
+	attach->dmabuf->ops->unmap_dma_buf(attach, sg_table, direction);
 }
 EXPORT_SYMBOL_GPL(dma_buf_unmap_attachment);
 
@@ -906,6 +1050,10 @@ int dma_buf_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma,
 	if (WARN_ON(!dmabuf || !vma))
 		return -EINVAL;
 
+	/* check if buffer supports mmap */
+	if (!dmabuf->ops->mmap)
+		return -EINVAL;
+
 	/* check for offset overflow */
 	if (pgoff + vma_pages(vma) < pgoff)
 		return -EOVERFLOW;
@@ -1025,8 +1173,8 @@ static int dma_buf_debug_show(struct seq_file *s, void *unused)
 		return ret;
 
 	seq_puts(s, "\nDma-buf Objects:\n");
-	seq_printf(s, "%-8s\t%-8s\t%-8s\t%-8s\texp_name\n",
-		   "size", "flags", "mode", "count");
+	seq_printf(s, "%-8s\t%-8s\t%-8s\t%-8s\texp_name\t%-8s\n",
+		   "size", "flags", "mode", "count", "ino");
 
 	list_for_each_entry(buf_obj, &db_list.head, list_node) {
 		ret = mutex_lock_interruptible(&buf_obj->lock);
@@ -1037,11 +1185,13 @@ static int dma_buf_debug_show(struct seq_file *s, void *unused)
 			continue;
 		}
 
-		seq_printf(s, "%08zu\t%08x\t%08x\t%08ld\t%s\n",
+		seq_printf(s, "%08zu\t%08x\t%08x\t%08ld\t%s\t%08lu\t%s\n",
 				buf_obj->size,
 				buf_obj->file->f_flags, buf_obj->file->f_mode,
 				file_count(buf_obj->file),
-				buf_obj->exp_name);
+				buf_obj->exp_name,
+				file_inode(buf_obj->file)->i_ino,
+				buf_obj->name ?: "");
 
 		robj = buf_obj->resv;
 		while (true) {
@@ -1068,6 +1218,7 @@ static int dma_buf_debug_show(struct seq_file *s, void *unused)
 				   fence->ops->get_driver_name(fence),
 				   fence->ops->get_timeline_name(fence),
 				   dma_fence_is_signaled(fence) ? "" : "un");
+			dma_fence_put(fence);
 		}
 		rcu_read_unlock();
 
@@ -1136,6 +1287,10 @@ static inline void dma_buf_uninit_debugfs(void)
 
 static int __init dma_buf_init(void)
 {
+	dma_buf_mnt = kern_mount(&dma_buf_fs_type);
+	if (IS_ERR(dma_buf_mnt))
+		return PTR_ERR(dma_buf_mnt);
+
 	mutex_init(&db_list.lock);
 	INIT_LIST_HEAD(&db_list.head);
 	dma_buf_init_debugfs();
@@ -1146,5 +1301,6 @@ subsys_initcall(dma_buf_init);
 static void __exit dma_buf_deinit(void)
 {
 	dma_buf_uninit_debugfs();
+	kern_unmount(dma_buf_mnt);
 }
 __exitcall(dma_buf_deinit);
diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c
index 227a19476d56..59ac96ec7ba8 100644
--- a/drivers/dma-buf/dma-fence.c
+++ b/drivers/dma-buf/dma-fence.c
@@ -248,8 +248,25 @@ void dma_fence_release(struct kref *kref)
 
 	trace_dma_fence_destroy(fence);
 
-	/* Failed to signal before release, could be a refcounting issue */
-	WARN_ON(!list_empty(&fence->cb_list));
+	if (WARN(!list_empty(&fence->cb_list),
+		 "Fence %s:%s:%llx:%llx released with pending signals!\n",
+		 fence->ops->get_driver_name(fence),
+		 fence->ops->get_timeline_name(fence),
+		 fence->context, fence->seqno)) {
+		unsigned long flags;
+
+		/*
+		 * Failed to signal before release, likely a refcounting issue.
+		 *
+		 * This should never happen, but if it does make sure that we
+		 * don't leave chains dangling. We set the error flag first
+		 * so that the callbacks know this signal is due to an error.
+		 */
+		spin_lock_irqsave(fence->lock, flags);
+		fence->error = -EDEADLK;
+		dma_fence_signal_locked(fence);
+		spin_unlock_irqrestore(fence->lock, flags);
+	}
 
 	if (fence->ops->release)
 		fence->ops->release(fence);
diff --git a/drivers/dma-buf/reservation.c b/drivers/dma-buf/reservation.c
index 4d32e2c67862..4447e13d1e89 100644
--- a/drivers/dma-buf/reservation.c
+++ b/drivers/dma-buf/reservation.c
@@ -365,6 +365,10 @@ int reservation_object_get_fences_rcu(struct reservation_object *obj,
 					   GFP_NOWAIT | __GFP_NOWARN);
 			if (!nshared) {
 				rcu_read_unlock();
+
+				dma_fence_put(fence_excl);
+				fence_excl = NULL;
+
 				nshared = krealloc(shared, sz, GFP_KERNEL);
 				if (nshared) {
 					shared = nshared;
diff --git a/drivers/dma-buf/sync_debug.c b/drivers/dma-buf/sync_debug.c
index 3bb462cfb06c..101394f16930 100644
--- a/drivers/dma-buf/sync_debug.c
+++ b/drivers/dma-buf/sync_debug.c
@@ -188,29 +188,3 @@ static __init int sync_debugfs_init(void)
 	return 0;
 }
 late_initcall(sync_debugfs_init);
-
-#define DUMP_CHUNK 256
-static char sync_dump_buf[64 * 1024];
-void sync_dump(void)
-{
-	struct seq_file s = {
-		.buf = sync_dump_buf,
-		.size = sizeof(sync_dump_buf) - 1,
-	};
-	int i;
-
-	sync_info_debugfs_show(&s, NULL);
-
-	for (i = 0; i < s.count; i += DUMP_CHUNK) {
-		if ((s.count - i) > DUMP_CHUNK) {
-			char c = s.buf[i + DUMP_CHUNK];
-
-			s.buf[i + DUMP_CHUNK] = 0;
-			pr_cont("%s", s.buf + i);
-			s.buf[i + DUMP_CHUNK] = c;
-		} else {
-			s.buf[s.count] = 0;
-			pr_cont("%s", s.buf + i);
-		}
-	}
-}
diff --git a/drivers/dma-buf/sync_debug.h b/drivers/dma-buf/sync_debug.h
index 05e33f937ad0..6176e52ba2d7 100644
--- a/drivers/dma-buf/sync_debug.h
+++ b/drivers/dma-buf/sync_debug.h
@@ -68,6 +68,5 @@ void sync_timeline_debug_add(struct sync_timeline *obj);
 void sync_timeline_debug_remove(struct sync_timeline *obj);
 void sync_file_debug_add(struct sync_file *fence);
 void sync_file_debug_remove(struct sync_file *fence);
-void sync_dump(void);
 
 #endif /* _LINUX_SYNC_H */
diff --git a/drivers/gpio/gpio-pca953x.c b/drivers/gpio/gpio-pca953x.c
index 2328d04201a9..cfe827cefad8 100644
--- a/drivers/gpio/gpio-pca953x.c
+++ b/drivers/gpio/gpio-pca953x.c
@@ -305,7 +305,8 @@ static const struct regmap_config pca953x_i2c_regmap = {
 	.volatile_reg = pca953x_volatile_register,
 
 	.cache_type = REGCACHE_RBTREE,
-	.max_register = 0x7f,
+	/* REVISIT: should be 0x7f but some 24 bit chips use REG_ADDR_AI */
+	.max_register = 0xff,
 };
 
 static u8 pca953x_recalc_addr(struct pca953x_chip *chip, int reg, int off,
diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index 36f900d63979..b9362b4f6353 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -161,6 +161,13 @@ config DRM_TTM
 	  GPU memory types. Will be enabled automatically if a device driver
 	  uses it.
 
+config DRM_VRAM_HELPER
+	tristate
+	depends on DRM
+	select DRM_TTM
+	help
+	  Helpers for VRAM memory management
+
 config DRM_GEM_CMA_HELPER
 	bool
 	depends on DRM
@@ -309,6 +316,8 @@ source "drivers/gpu/drm/sti/Kconfig"
 
 source "drivers/gpu/drm/imx/Kconfig"
 
+source "drivers/gpu/drm/ingenic/Kconfig"
+
 source "drivers/gpu/drm/v3d/Kconfig"
 
 source "drivers/gpu/drm/vc4/Kconfig"
@@ -343,6 +352,8 @@ source "drivers/gpu/drm/panfrost/Kconfig"
 
 source "drivers/gpu/drm/aspeed/Kconfig"
 
+source "drivers/gpu/drm/mcde/Kconfig"
+
 # Keep legacy drivers last
 
 menuconfig DRM_LEGACY
diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
index 72f5036d9bfa..9f0d2ee35794 100644
--- a/drivers/gpu/drm/Makefile
+++ b/drivers/gpu/drm/Makefile
@@ -17,7 +17,7 @@ drm-y       :=	drm_auth.o drm_cache.o \
 		drm_plane.o drm_color_mgmt.o drm_print.o \
 		drm_dumb_buffers.o drm_mode_config.o drm_vblank.o \
 		drm_syncobj.o drm_lease.o drm_writeback.o drm_client.o \
-		drm_atomic_uapi.o
+		drm_client_modeset.o drm_atomic_uapi.o drm_hdcp.o
 
 drm-$(CONFIG_DRM_LEGACY) += drm_legacy_misc.o drm_bufs.o drm_context.o drm_dma.o drm_scatter.o drm_lock.o
 drm-$(CONFIG_DRM_LIB_RANDOM) += lib/drm_random.o
@@ -32,13 +32,18 @@ drm-$(CONFIG_AGP) += drm_agpsupport.o
 drm-$(CONFIG_DEBUG_FS) += drm_debugfs.o drm_debugfs_crc.o
 drm-$(CONFIG_DRM_LOAD_EDID_FIRMWARE) += drm_edid_load.o
 
+drm_vram_helper-y := drm_gem_vram_helper.o \
+		     drm_vram_helper_common.o \
+		     drm_vram_mm_helper.o
+obj-$(CONFIG_DRM_VRAM_HELPER) += drm_vram_helper.o
+
 drm_kms_helper-y := drm_crtc_helper.o drm_dp_helper.o drm_dsc.o drm_probe_helper.o \
 		drm_plane_helper.o drm_dp_mst_topology.o drm_atomic_helper.o \
 		drm_kms_helper_common.o drm_dp_dual_mode_helper.o \
 		drm_simple_kms_helper.o drm_modeset_helper.o \
 		drm_scdc_helper.o drm_gem_framebuffer_helper.o \
 		drm_atomic_state_helper.o drm_damage_helper.o \
-		drm_format_helper.o
+		drm_format_helper.o drm_self_refresh_helper.o
 
 drm_kms_helper-$(CONFIG_DRM_PANEL_BRIDGE) += bridge/panel.o
 drm_kms_helper-$(CONFIG_DRM_FBDEV_EMULATION) += drm_fb_helper.o
@@ -94,6 +99,7 @@ obj-$(CONFIG_DRM_TEGRA) += tegra/
 obj-$(CONFIG_DRM_STM) += stm/
 obj-$(CONFIG_DRM_STI) += sti/
 obj-$(CONFIG_DRM_IMX) += imx/
+obj-$(CONFIG_DRM_INGENIC) += ingenic/
 obj-$(CONFIG_DRM_MEDIATEK) += mediatek/
 obj-$(CONFIG_DRM_MESON)	+= meson/
 obj-y			+= i2c/
@@ -113,3 +119,4 @@ obj-$(CONFIG_DRM_VBOXVIDEO) += vboxvideo/
 obj-$(CONFIG_DRM_LIMA)  += lima/
 obj-$(CONFIG_DRM_PANFROST) += panfrost/
 obj-$(CONFIG_DRM_ASPEED_GFX) += aspeed/
+obj-$(CONFIG_DRM_MCDE) += mcde/
diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig b/drivers/gpu/drm/amd/amdgpu/Kconfig
index 844f0a162981..a04f2fc7bf37 100644
--- a/drivers/gpu/drm/amd/amdgpu/Kconfig
+++ b/drivers/gpu/drm/amd/amdgpu/Kconfig
@@ -27,10 +27,11 @@ config DRM_AMDGPU_CIK
 config DRM_AMDGPU_USERPTR
 	bool "Always enable userptr write support"
 	depends on DRM_AMDGPU
-	select MMU_NOTIFIER
+	depends on ARCH_HAS_HMM
+	select HMM_MIRROR
 	help
-	  This option selects CONFIG_MMU_NOTIFIER if it isn't already
-	  selected to enabled full userptr support.
+	  This option selects CONFIG_HMM and CONFIG_HMM_MIRROR if it
+	  isn't already selected to enabled full userptr support.
 
 config DRM_AMDGPU_GART_DEBUGFS
 	bool "Allow GART access through debugfs"
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
index fdd0ca4b0f0b..57ce44cc3226 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -49,7 +49,7 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
 	amdgpu_cs.o amdgpu_bios.o amdgpu_benchmark.o amdgpu_test.o \
 	amdgpu_pm.o atombios_dp.o amdgpu_afmt.o amdgpu_trace_points.o \
 	atombios_encoders.o amdgpu_sa.o atombios_i2c.o \
-	amdgpu_prime.o amdgpu_vm.o amdgpu_ib.o amdgpu_pll.o \
+	amdgpu_dma_buf.o amdgpu_vm.o amdgpu_ib.o amdgpu_pll.o \
 	amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \
 	amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o amdgpu_atomfirmware.o \
 	amdgpu_vf_error.o amdgpu_sched.o amdgpu_debugfs.o amdgpu_ids.o \
@@ -173,7 +173,7 @@ endif
 amdgpu-$(CONFIG_COMPAT) += amdgpu_ioc32.o
 amdgpu-$(CONFIG_VGA_SWITCHEROO) += amdgpu_atpx_handler.o
 amdgpu-$(CONFIG_ACPI) += amdgpu_acpi.o
-amdgpu-$(CONFIG_MMU_NOTIFIER) += amdgpu_mn.o
+amdgpu-$(CONFIG_HMM_MIRROR) += amdgpu_mn.o
 
 include $(FULL_AMD_PATH)/powerplay/Makefile
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 14398f55f602..cbcd253d18d5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -44,9 +44,9 @@
 #include <drm/ttm/ttm_module.h>
 #include <drm/ttm/ttm_execbuf_util.h>
 
-#include <drm/drmP.h>
-#include <drm/drm_gem.h>
 #include <drm/amdgpu_drm.h>
+#include <drm/drm_gem.h>
+#include <drm/drm_ioctl.h>
 #include <drm/gpu_scheduler.h>
 
 #include <kgd_kfd_interface.h>
@@ -118,7 +118,6 @@ extern int amdgpu_disp_priority;
 extern int amdgpu_hw_i2c;
 extern int amdgpu_pcie_gen2;
 extern int amdgpu_msi;
-extern int amdgpu_lockup_timeout;
 extern int amdgpu_dpm;
 extern int amdgpu_fw_load_type;
 extern int amdgpu_aspm;
@@ -211,6 +210,7 @@ struct amdgpu_irq_src;
 struct amdgpu_fpriv;
 struct amdgpu_bo_va_mapping;
 struct amdgpu_atif;
+struct kfd_vm_fault_info;
 
 enum amdgpu_cp_irq {
 	AMDGPU_CP_IRQ_GFX_EOP = 0,
@@ -415,6 +415,7 @@ struct amdgpu_fpriv {
 };
 
 int amdgpu_file_to_fpriv(struct file *filp, struct amdgpu_fpriv **fpriv);
+int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev);
 
 int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 		  unsigned size, struct amdgpu_ib *ib);
@@ -558,6 +559,8 @@ struct amdgpu_asic_funcs {
 			       uint64_t *count1);
 	/* do we need to reset the asic at init time (e.g., kexec) */
 	bool (*need_reset_on_init)(struct amdgpu_device *adev);
+	/* PCIe replay counter */
+	uint64_t (*get_pcie_replay_count)(struct amdgpu_device *adev);
 };
 
 /*
@@ -639,6 +642,11 @@ struct nbio_hdp_flush_reg {
 	u32 ref_and_mask_sdma1;
 };
 
+struct amdgpu_mmio_remap {
+	u32 reg_offset;
+	resource_size_t bus_addr;
+};
+
 struct amdgpu_nbio_funcs {
 	const struct nbio_hdp_flush_reg *hdp_flush_reg;
 	u32 (*get_hdp_flush_req_offset)(struct amdgpu_device *adev);
@@ -666,6 +674,7 @@ struct amdgpu_nbio_funcs {
 	void (*ih_control)(struct amdgpu_device *adev);
 	void (*init_registers)(struct amdgpu_device *adev);
 	void (*detect_hw_virt)(struct amdgpu_device *adev);
+	void (*remap_hdp_registers)(struct amdgpu_device *adev);
 };
 
 struct amdgpu_df_funcs {
@@ -680,6 +689,12 @@ struct amdgpu_df_funcs {
 				      u32 *flags);
 	void (*enable_ecc_force_par_wr_rmw)(struct amdgpu_device *adev,
 					    bool enable);
+	int (*pmc_start)(struct amdgpu_device *adev, uint64_t config,
+					 int is_enable);
+	int (*pmc_stop)(struct amdgpu_device *adev, uint64_t config,
+					 int is_disable);
+	void (*pmc_get_count)(struct amdgpu_device *adev, uint64_t config,
+					 uint64_t *count);
 };
 /* Define the HW IP blocks will be used in driver , add more if necessary */
 enum amd_hw_ip_block_type {
@@ -764,6 +779,7 @@ struct amdgpu_device {
 	void __iomem			*rmmio;
 	/* protects concurrent MM_INDEX/DATA based register access */
 	spinlock_t mmio_idx_lock;
+	struct amdgpu_mmio_remap        rmmio_remap;
 	/* protects concurrent SMC based register access */
 	spinlock_t smc_idx_lock;
 	amdgpu_rreg_t			smc_rreg;
@@ -906,7 +922,7 @@ struct amdgpu_device {
 	const struct amdgpu_df_funcs	*df_funcs;
 
 	/* delayed work_func for deferring clockgating during resume */
-	struct delayed_work     late_init_work;
+	struct delayed_work     delayed_init_work;
 
 	struct amdgpu_virt	virt;
 	/* firmware VRAM reservation */
@@ -936,6 +952,13 @@ struct amdgpu_device {
 	struct work_struct		xgmi_reset_work;
 
 	bool                            in_baco_reset;
+
+	long				gfx_timeout;
+	long				sdma_timeout;
+	long				video_timeout;
+	long				compute_timeout;
+
+	uint64_t			unique_id;
 };
 
 static inline struct amdgpu_device *amdgpu_ttm_adev(struct ttm_bo_device *bdev)
@@ -1065,6 +1088,7 @@ int emu_soc_asic_init(struct amdgpu_device *adev);
 #define amdgpu_asic_init_doorbell_index(adev) (adev)->asic_funcs->init_doorbell_index((adev))
 #define amdgpu_asic_get_pcie_usage(adev, cnt0, cnt1) ((adev)->asic_funcs->get_pcie_usage((adev), (cnt0), (cnt1)))
 #define amdgpu_asic_need_reset_on_init(adev) (adev)->asic_funcs->need_reset_on_init((adev))
+#define amdgpu_asic_get_pcie_replay_count(adev) ((adev)->asic_funcs->get_pcie_replay_count((adev)))
 
 /* Common functions */
 bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev);
@@ -1081,6 +1105,9 @@ void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
 					     const u32 array_size);
 
 bool amdgpu_device_is_px(struct drm_device *dev);
+bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev,
+				      struct amdgpu_device *peer_adev);
+
 /* atpx handler */
 #if defined(CONFIG_VGA_SWITCHEROO)
 void amdgpu_register_atpx_handler(void);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c
index 0a4fba196b84..eba42c752bca 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c
@@ -24,6 +24,7 @@
  */
 
 #include <linux/irqdomain.h>
+#include <linux/pci.h>
 #include <linux/pm_domain.h>
 #include <linux/platform_device.h>
 #include <sound/designware_i2s.h>
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
index 56f8ca2a3bb4..1e41367ef74e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
@@ -27,7 +27,7 @@
 #include <linux/power_supply.h>
 #include <linux/pm_runtime.h>
 #include <acpi/video.h>
-#include <drm/drmP.h>
+
 #include <drm/drm_crtc_helper.h>
 #include "amdgpu.h"
 #include "amdgpu_pm.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_afmt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_afmt.c
index 3889486f71fe..a4d65973bf7c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_afmt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_afmt.c
@@ -25,7 +25,7 @@
  */
 #include <linux/hdmi.h>
 #include <linux/gcd.h>
-#include <drm/drmP.h>
+
 #include <drm/amdgpu_drm.h>
 #include "amdgpu.h"
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index aeead072fa79..c8887a1c852a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -22,11 +22,13 @@
 
 #include "amdgpu_amdkfd.h"
 #include "amd_shared.h"
-#include <drm/drmP.h>
+
 #include "amdgpu.h"
 #include "amdgpu_gfx.h"
+#include "amdgpu_dma_buf.h"
 #include <linux/module.h>
 #include <linux/dma-buf.h>
+#include "amdgpu_xgmi.h"
 
 static const unsigned int compute_vmid_bitmap = 0xFF00;
 
@@ -148,7 +150,8 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
 		};
 
 		/* this is going to have a few of the MSBs set that we need to
-		 * clear */
+		 * clear
+		 */
 		bitmap_complement(gpu_resources.queue_bitmap,
 				  adev->gfx.mec.queue_bitmap,
 				  KGD_MAX_QUEUES);
@@ -162,7 +165,8 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
 				  gpu_resources.queue_bitmap);
 
 		/* According to linux/bitmap.h we shouldn't use bitmap_clear if
-		 * nbits is not compile time constant */
+		 * nbits is not compile time constant
+		 */
 		last_valid_bit = 1 /* only first MEC can have compute queues */
 				* adev->gfx.mec.num_pipe_per_mec
 				* adev->gfx.mec.num_queue_per_pipe;
@@ -335,6 +339,40 @@ void amdgpu_amdkfd_free_gtt_mem(struct kgd_dev *kgd, void *mem_obj)
 	amdgpu_bo_unref(&(bo));
 }
 
+int amdgpu_amdkfd_alloc_gws(struct kgd_dev *kgd, size_t size,
+				void **mem_obj)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
+	struct amdgpu_bo *bo = NULL;
+	struct amdgpu_bo_param bp;
+	int r;
+
+	memset(&bp, 0, sizeof(bp));
+	bp.size = size;
+	bp.byte_align = 1;
+	bp.domain = AMDGPU_GEM_DOMAIN_GWS;
+	bp.flags = AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
+	bp.type = ttm_bo_type_device;
+	bp.resv = NULL;
+
+	r = amdgpu_bo_create(adev, &bp, &bo);
+	if (r) {
+		dev_err(adev->dev,
+			"failed to allocate gws BO for amdkfd (%d)\n", r);
+		return r;
+	}
+
+	*mem_obj = bo;
+	return 0;
+}
+
+void amdgpu_amdkfd_free_gws(struct kgd_dev *kgd, void *mem_obj)
+{
+	struct amdgpu_bo *bo = (struct amdgpu_bo *)mem_obj;
+
+	amdgpu_bo_unref(&bo);
+}
+
 uint32_t amdgpu_amdkfd_get_fw_version(struct kgd_dev *kgd,
 				      enum kgd_engine_type type)
 {
@@ -518,6 +556,34 @@ uint64_t amdgpu_amdkfd_get_hive_id(struct kgd_dev *kgd)
 
 	return adev->gmc.xgmi.hive_id;
 }
+uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct kgd_dev *dst, struct kgd_dev *src)
+{
+	struct amdgpu_device *peer_adev = (struct amdgpu_device *)src;
+	struct amdgpu_device *adev = (struct amdgpu_device *)dst;
+	int ret = amdgpu_xgmi_get_hops_count(adev, peer_adev);
+
+	if (ret < 0) {
+		DRM_ERROR("amdgpu: failed to get  xgmi hops count between node %d and %d. ret = %d\n",
+			adev->gmc.xgmi.physical_node_id,
+			peer_adev->gmc.xgmi.physical_node_id, ret);
+		ret = 0;
+	}
+	return  (uint8_t)ret;
+}
+
+uint64_t amdgpu_amdkfd_get_mmio_remap_phys_addr(struct kgd_dev *kgd)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
+
+	return adev->rmmio_remap.bus_addr;
+}
+
+uint32_t amdgpu_amdkfd_get_num_gws(struct kgd_dev *kgd)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
+
+	return adev->gds.gws_size;
+}
 
 int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine,
 				uint32_t vmid, uint64_t gpu_addr,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index 4e37fa7e85b1..f968bf147c5e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -61,7 +61,6 @@ struct kgd_mem {
 
 	atomic_t invalid;
 	struct amdkfd_process_info *process_info;
-	struct page **user_pages;
 
 	struct amdgpu_sync sync;
 
@@ -154,6 +153,10 @@ int amdgpu_amdkfd_alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
 				void **mem_obj, uint64_t *gpu_addr,
 				void **cpu_ptr, bool mqd_gfx9);
 void amdgpu_amdkfd_free_gtt_mem(struct kgd_dev *kgd, void *mem_obj);
+int amdgpu_amdkfd_alloc_gws(struct kgd_dev *kgd, size_t size, void **mem_obj);
+void amdgpu_amdkfd_free_gws(struct kgd_dev *kgd, void *mem_obj);
+int amdgpu_amdkfd_add_gws_to_process(void *info, void *gws, struct kgd_mem **mem);
+int amdgpu_amdkfd_remove_gws_from_process(void *info, void *mem);
 uint32_t amdgpu_amdkfd_get_fw_version(struct kgd_dev *kgd,
 				      enum kgd_engine_type type);
 void amdgpu_amdkfd_get_local_mem_info(struct kgd_dev *kgd,
@@ -169,6 +172,9 @@ int amdgpu_amdkfd_get_dmabuf_info(struct kgd_dev *kgd, int dma_buf_fd,
 				  uint32_t *flags);
 uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd);
 uint64_t amdgpu_amdkfd_get_hive_id(struct kgd_dev *kgd);
+uint64_t amdgpu_amdkfd_get_mmio_remap_phys_addr(struct kgd_dev *kgd);
+uint32_t amdgpu_amdkfd_get_num_gws(struct kgd_dev *kgd);
+uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct kgd_dev *dst, struct kgd_dev *src);
 
 #define read_user_wptr(mmptr, wptr, dst)				\
 	({								\
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
index fa09e11a600c..5f459bf5f622 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
@@ -23,7 +23,7 @@
 #include <linux/fdtable.h>
 #include <linux/uaccess.h>
 #include <linux/mmu_context.h>
-#include <drm/drmP.h>
+
 #include "amdgpu.h"
 #include "amdgpu_amdkfd.h"
 #include "cikd.h"
@@ -310,7 +310,7 @@ static inline uint32_t get_sdma_base_addr(struct cik_sdma_rlc_registers *m)
 	retval = m->sdma_engine_id * SDMA1_REGISTER_OFFSET +
 			m->sdma_queue_id * KFD_CIK_SDMA_QUEUE_OFFSET;
 
-	pr_debug("kfd: sdma base address: 0x%x\n", retval);
+	pr_debug("sdma base address: 0x%x\n", retval);
 
 	return retval;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
index fec3a6aa1de6..6d2f61449606 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
@@ -24,7 +24,7 @@
 #include <linux/fdtable.h>
 #include <linux/uaccess.h>
 #include <linux/mmu_context.h>
-#include <drm/drmP.h>
+
 #include "amdgpu.h"
 #include "amdgpu_amdkfd.h"
 #include "gfx_v8_0.h"
@@ -266,7 +266,7 @@ static inline uint32_t get_sdma_base_addr(struct vi_sdma_mqd *m)
 
 	retval = m->sdma_engine_id * SDMA1_REGISTER_OFFSET +
 		m->sdma_queue_id * KFD_VI_SDMA_QUEUE_OFFSET;
-	pr_debug("kfd: sdma base address: 0x%x\n", retval);
+	pr_debug("sdma base address: 0x%x\n", retval);
 
 	return retval;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
index ef3d93b995b2..85395f2d83a6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
@@ -26,7 +26,7 @@
 #include <linux/fdtable.h>
 #include <linux/uaccess.h>
 #include <linux/mmu_context.h>
-#include <drm/drmP.h>
+
 #include "amdgpu.h"
 #include "amdgpu_amdkfd.h"
 #include "soc15_hw_ip.h"
@@ -225,8 +225,8 @@ static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
 
 	lock_srbm(kgd, 0, 0, 0, vmid);
 
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG), sh_mem_config);
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_BASES), sh_mem_bases);
+	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG), sh_mem_config);
+	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_BASES), sh_mem_bases);
 	/* APE1 no longer exists on GFX9 */
 
 	unlock_srbm(kgd);
@@ -369,7 +369,7 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
 		value = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS));
 		value = REG_SET_FIELD(value, RLC_CP_SCHEDULERS, scheduler1,
 			((mec << 5) | (pipe << 3) | queue_id | 0x80));
-		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS), value);
+		WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS), value);
 	}
 
 	/* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */
@@ -378,13 +378,13 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
 
 	for (reg = hqd_base;
 	     reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++)
-		WREG32(reg, mqd_hqd[reg - hqd_base]);
+		WREG32_RLC(reg, mqd_hqd[reg - hqd_base]);
 
 
 	/* Activate doorbell logic before triggering WPTR poll. */
 	data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control,
 			     CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL), data);
+	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL), data);
 
 	if (wptr) {
 		/* Don't read wptr with get_user because the user
@@ -413,25 +413,25 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
 		guessed_wptr += m->cp_hqd_pq_wptr_lo & ~(queue_size - 1);
 		guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32;
 
-		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_LO),
+		WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_LO),
 		       lower_32_bits(guessed_wptr));
-		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI),
+		WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI),
 		       upper_32_bits(guessed_wptr));
-		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR),
+		WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR),
 		       lower_32_bits((uintptr_t)wptr));
-		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI),
+		WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI),
 		       upper_32_bits((uintptr_t)wptr));
 		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PQ_WPTR_POLL_CNTL1),
 		       get_queue_mask(adev, pipe_id, queue_id));
 	}
 
 	/* Start the EOP fetcher */
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_RPTR),
+	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_RPTR),
 	       REG_SET_FIELD(m->cp_hqd_eop_rptr,
 			     CP_HQD_EOP_RPTR, INIT_FETCHER, 1));
 
 	data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1);
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE), data);
+	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE), data);
 
 	release_queue(kgd);
 
@@ -633,7 +633,7 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
 	acquire_queue(kgd, pipe_id, queue_id);
 
 	if (m->cp_hqd_vmid == 0)
-		WREG32_FIELD15(GC, 0, RLC_CP_SCHEDULERS, scheduler1, 0);
+		WREG32_FIELD15_RLC(GC, 0, RLC_CP_SCHEDULERS, scheduler1, 0);
 
 	switch (reset_type) {
 	case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN:
@@ -647,7 +647,7 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
 		break;
 	}
 
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_DEQUEUE_REQUEST), type);
+	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_DEQUEUE_REQUEST), type);
 
 	end_jiffies = (utimeout * HZ / 1000) + jiffies;
 	while (true) {
@@ -726,29 +726,8 @@ static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
 	return reg & ATC_VMID0_PASID_MAPPING__PASID_MASK;
 }
 
-static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid)
-{
-	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
-
-	/* Use legacy mode tlb invalidation.
-	 *
-	 * Currently on Raven the code below is broken for anything but
-	 * legacy mode due to a MMHUB power gating problem. A workaround
-	 * is for MMHUB to wait until the condition PER_VMID_INVALIDATE_REQ
-	 * == PER_VMID_INVALIDATE_ACK instead of simply waiting for the ack
-	 * bit.
-	 *
-	 * TODO 1: agree on the right set of invalidation registers for
-	 * KFD use. Use the last one for now. Invalidate both GC and
-	 * MMHUB.
-	 *
-	 * TODO 2: support range-based invalidation, requires kfg2kgd
-	 * interface change
-	 */
-	amdgpu_gmc_flush_gpu_tlb(adev, vmid, 0);
-}
-
-static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid)
+static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid,
+			uint32_t flush_type)
 {
 	signed long r;
 	uint32_t seq;
@@ -761,7 +740,7 @@ static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid)
 			PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
 			PACKET3_INVALIDATE_TLBS_ALL_HUB(1) |
 			PACKET3_INVALIDATE_TLBS_PASID(pasid) |
-			PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(0)); /* legacy */
+			PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
 	amdgpu_fence_emit_polling(ring, &seq);
 	amdgpu_ring_commit(ring);
 	spin_unlock(&adev->gfx.kiq.ring_lock);
@@ -780,12 +759,16 @@ static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
 	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
 	int vmid;
 	struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
+	uint32_t flush_type = 0;
 
 	if (adev->in_gpu_reset)
 		return -EIO;
+	if (adev->gmc.xgmi.num_physical_nodes &&
+		adev->asic_type == CHIP_VEGA20)
+		flush_type = 2;
 
 	if (ring->sched.ready)
-		return invalidate_tlbs_with_kiq(adev, pasid);
+		return invalidate_tlbs_with_kiq(adev, pasid, flush_type);
 
 	for (vmid = 0; vmid < 16; vmid++) {
 		if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid))
@@ -793,7 +776,8 @@ static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
 		if (get_atc_vmid_pasid_mapping_valid(kgd, vmid)) {
 			if (get_atc_vmid_pasid_mapping_pasid(kgd, vmid)
 				== pasid) {
-				write_vmid_invalidate_request(kgd, vmid);
+				amdgpu_gmc_flush_gpu_tlb(adev, vmid,
+							 flush_type);
 				break;
 			}
 		}
@@ -811,7 +795,22 @@ static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid)
 		return 0;
 	}
 
-	write_vmid_invalidate_request(kgd, vmid);
+	/* Use legacy mode tlb invalidation.
+	 *
+	 * Currently on Raven the code below is broken for anything but
+	 * legacy mode due to a MMHUB power gating problem. A workaround
+	 * is for MMHUB to wait until the condition PER_VMID_INVALIDATE_REQ
+	 * == PER_VMID_INVALIDATE_ACK instead of simply waiting for the ack
+	 * bit.
+	 *
+	 * TODO 1: agree on the right set of invalidation registers for
+	 * KFD use. Use the last one for now. Invalidate both GC and
+	 * MMHUB.
+	 *
+	 * TODO 2: support range-based invalidation, requires kfg2kgd
+	 * interface change
+	 */
+	amdgpu_gmc_flush_gpu_tlb(adev, vmid, 0);
 	return 0;
 }
 
@@ -838,7 +837,7 @@ static int kgd_wave_control_execute(struct kgd_dev *kgd,
 
 	mutex_lock(&adev->grbm_idx_mutex);
 
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX), gfx_index_val);
+	WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, gfx_index_val);
 	WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_CMD), sq_cmd);
 
 	data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
@@ -848,7 +847,7 @@ static int kgd_wave_control_execute(struct kgd_dev *kgd,
 	data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
 		SE_BROADCAST_WRITES, 1);
 
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX), data);
+	WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data);
 	mutex_unlock(&adev->grbm_idx_mutex);
 
 	return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index a6e5184d436c..df26bf34b675 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -22,14 +22,16 @@
 
 #define pr_fmt(fmt) "kfd2kgd: " fmt
 
+#include <linux/dma-buf.h>
 #include <linux/list.h>
 #include <linux/pagemap.h>
 #include <linux/sched/mm.h>
-#include <linux/dma-buf.h>
-#include <drm/drmP.h>
+#include <linux/sched/task.h>
+
 #include "amdgpu_object.h"
 #include "amdgpu_vm.h"
 #include "amdgpu_amdkfd.h"
+#include "amdgpu_dma_buf.h"
 
 /* Special VM and GART address alignment needed for VI pre-Fiji due to
  * a HW bug.
@@ -456,6 +458,17 @@ static void add_kgd_mem_to_kfd_bo_list(struct kgd_mem *mem,
 	mutex_unlock(&process_info->lock);
 }
 
+static void remove_kgd_mem_from_kfd_bo_list(struct kgd_mem *mem,
+		struct amdkfd_process_info *process_info)
+{
+	struct ttm_validate_buffer *bo_list_entry;
+
+	bo_list_entry = &mem->validate_list;
+	mutex_lock(&process_info->lock);
+	list_del(&bo_list_entry->head);
+	mutex_unlock(&process_info->lock);
+}
+
 /* Initializes user pages. It registers the MMU notifier and validates
  * the userptr BO in the GTT domain.
  *
@@ -491,28 +504,12 @@ static int init_user_pages(struct kgd_mem *mem, struct mm_struct *mm,
 		goto out;
 	}
 
-	/* If no restore worker is running concurrently, user_pages
-	 * should not be allocated
-	 */
-	WARN(mem->user_pages, "Leaking user_pages array");
-
-	mem->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages,
-					   sizeof(struct page *),
-					   GFP_KERNEL | __GFP_ZERO);
-	if (!mem->user_pages) {
-		pr_err("%s: Failed to allocate pages array\n", __func__);
-		ret = -ENOMEM;
-		goto unregister_out;
-	}
-
-	ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, mem->user_pages);
+	ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, bo->tbo.ttm->pages);
 	if (ret) {
 		pr_err("%s: Failed to get user pages: %d\n", __func__, ret);
-		goto free_out;
+		goto unregister_out;
 	}
 
-	amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, mem->user_pages);
-
 	ret = amdgpu_bo_reserve(bo, true);
 	if (ret) {
 		pr_err("%s: Failed to reserve BO\n", __func__);
@@ -525,11 +522,7 @@ static int init_user_pages(struct kgd_mem *mem, struct mm_struct *mm,
 	amdgpu_bo_unreserve(bo);
 
 release_out:
-	if (ret)
-		release_pages(mem->user_pages, bo->tbo.ttm->num_pages);
-free_out:
-	kvfree(mem->user_pages);
-	mem->user_pages = NULL;
+	amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
 unregister_out:
 	if (ret)
 		amdgpu_mn_unregister(bo);
@@ -588,13 +581,12 @@ static int reserve_bo_and_vm(struct kgd_mem *mem,
 	ctx->kfd_bo.priority = 0;
 	ctx->kfd_bo.tv.bo = &bo->tbo;
 	ctx->kfd_bo.tv.num_shared = 1;
-	ctx->kfd_bo.user_pages = NULL;
 	list_add(&ctx->kfd_bo.tv.head, &ctx->list);
 
 	amdgpu_vm_get_pd_bo(vm, &ctx->list, &ctx->vm_pd[0]);
 
 	ret = ttm_eu_reserve_buffers(&ctx->ticket, &ctx->list,
-				     false, &ctx->duplicates);
+				     false, &ctx->duplicates, true);
 	if (!ret)
 		ctx->reserved = true;
 	else {
@@ -652,7 +644,6 @@ static int reserve_bo_and_cond_vms(struct kgd_mem *mem,
 	ctx->kfd_bo.priority = 0;
 	ctx->kfd_bo.tv.bo = &bo->tbo;
 	ctx->kfd_bo.tv.num_shared = 1;
-	ctx->kfd_bo.user_pages = NULL;
 	list_add(&ctx->kfd_bo.tv.head, &ctx->list);
 
 	i = 0;
@@ -668,7 +659,7 @@ static int reserve_bo_and_cond_vms(struct kgd_mem *mem,
 	}
 
 	ret = ttm_eu_reserve_buffers(&ctx->ticket, &ctx->list,
-				     false, &ctx->duplicates);
+				     false, &ctx->duplicates, true);
 	if (!ret)
 		ctx->reserved = true;
 	else
@@ -896,6 +887,9 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info,
 				  AMDGPU_FENCE_OWNER_KFD, false);
 	if (ret)
 		goto wait_pd_fail;
+	ret = reservation_object_reserve_shared(vm->root.base.bo->tbo.resv, 1);
+	if (ret)
+		goto reserve_shared_fail;
 	amdgpu_bo_fence(vm->root.base.bo,
 			&vm->process_info->eviction_fence->base, true);
 	amdgpu_bo_unreserve(vm->root.base.bo);
@@ -909,6 +903,7 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info,
 
 	return 0;
 
+reserve_shared_fail:
 wait_pd_fail:
 validate_pd_fail:
 	amdgpu_bo_unreserve(vm->root.base.bo);
@@ -1109,7 +1104,8 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
 		if (!offset || !*offset)
 			return -EINVAL;
 		user_addr = *offset;
-	} else if (flags & ALLOC_MEM_FLAGS_DOORBELL) {
+	} else if (flags & (ALLOC_MEM_FLAGS_DOORBELL |
+			ALLOC_MEM_FLAGS_MMIO_REMAP)) {
 		domain = AMDGPU_GEM_DOMAIN_GTT;
 		alloc_domain = AMDGPU_GEM_DOMAIN_CPU;
 		bo_type = ttm_bo_type_sg;
@@ -1199,12 +1195,8 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
 
 	if (user_addr) {
 		ret = init_user_pages(*mem, current->mm, user_addr);
-		if (ret) {
-			mutex_lock(&avm->process_info->lock);
-			list_del(&(*mem)->validate_list.head);
-			mutex_unlock(&avm->process_info->lock);
+		if (ret)
 			goto allocate_init_user_pages_failed;
-		}
 	}
 
 	if (offset)
@@ -1213,6 +1205,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
 	return 0;
 
 allocate_init_user_pages_failed:
+	remove_kgd_mem_from_kfd_bo_list(*mem, avm->process_info);
 	amdgpu_bo_unref(&bo);
 	/* Don't unreserve system mem limit twice */
 	goto err_reserve_limit;
@@ -1262,15 +1255,6 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
 	list_del(&bo_list_entry->head);
 	mutex_unlock(&process_info->lock);
 
-	/* Free user pages if necessary */
-	if (mem->user_pages) {
-		pr_debug("%s: Freeing user_pages array\n", __func__);
-		if (mem->user_pages[0])
-			release_pages(mem->user_pages,
-					mem->bo->tbo.ttm->num_pages);
-		kvfree(mem->user_pages);
-	}
-
 	ret = reserve_bo_and_cond_vms(mem, NULL, BO_VM_ALL, &ctx);
 	if (unlikely(ret))
 		return ret;
@@ -1294,8 +1278,8 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
 	/* Free the sync object */
 	amdgpu_sync_free(&mem->sync);
 
-	/* If the SG is not NULL, it's one we created for a doorbell
-	 * BO. We need to free it.
+	/* If the SG is not NULL, it's one we created for a doorbell or mmio
+	 * remap BO. We need to free it.
 	 */
 	if (mem->bo->tbo.sg) {
 		sg_free_table(mem->bo->tbo.sg);
@@ -1409,7 +1393,7 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
 			ret = map_bo_to_gpuvm(adev, entry, ctx.sync,
 					      is_invalid_userptr);
 			if (ret) {
-				pr_err("Failed to map radeon bo to gpuvm\n");
+				pr_err("Failed to map bo to gpuvm\n");
 				goto map_bo_to_gpuvm_failed;
 			}
 
@@ -1744,25 +1728,11 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info,
 
 		bo = mem->bo;
 
-		if (!mem->user_pages) {
-			mem->user_pages =
-				kvmalloc_array(bo->tbo.ttm->num_pages,
-						 sizeof(struct page *),
-						 GFP_KERNEL | __GFP_ZERO);
-			if (!mem->user_pages) {
-				pr_err("%s: Failed to allocate pages array\n",
-				       __func__);
-				return -ENOMEM;
-			}
-		} else if (mem->user_pages[0]) {
-			release_pages(mem->user_pages, bo->tbo.ttm->num_pages);
-		}
-
 		/* Get updated user pages */
 		ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm,
-						   mem->user_pages);
+						   bo->tbo.ttm->pages);
 		if (ret) {
-			mem->user_pages[0] = NULL;
+			bo->tbo.ttm->pages[0] = NULL;
 			pr_info("%s: Failed to get user pages: %d\n",
 				__func__, ret);
 			/* Pretend it succeeded. It will fail later
@@ -1771,17 +1741,28 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info,
 			 * stalled user mode queues.
 			 */
 		}
-
-		/* Mark the BO as valid unless it was invalidated
-		 * again concurrently
-		 */
-		if (atomic_cmpxchg(&mem->invalid, invalid, 0) != invalid)
-			return -EAGAIN;
 	}
 
 	return 0;
 }
 
+/* Remove invalid userptr BOs from hmm track list
+ *
+ * Stop HMM track the userptr update
+ */
+static void untrack_invalid_user_pages(struct amdkfd_process_info *process_info)
+{
+	struct kgd_mem *mem, *tmp_mem;
+	struct amdgpu_bo *bo;
+
+	list_for_each_entry_safe(mem, tmp_mem,
+				 &process_info->userptr_inval_list,
+				 validate_list.head) {
+		bo = mem->bo;
+		amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
+	}
+}
+
 /* Validate invalid userptr BOs
  *
  * Validates BOs on the userptr_inval_list, and moves them back to the
@@ -1806,7 +1787,8 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
 				     GFP_KERNEL);
 	if (!pd_bo_list_entries) {
 		pr_err("%s: Failed to allocate PD BO list entries\n", __func__);
-		return -ENOMEM;
+		ret = -ENOMEM;
+		goto out_no_mem;
 	}
 
 	INIT_LIST_HEAD(&resv_list);
@@ -1827,10 +1809,11 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
 	}
 
 	/* Reserve all BOs and page tables for validation */
-	ret = ttm_eu_reserve_buffers(&ticket, &resv_list, false, &duplicates);
+	ret = ttm_eu_reserve_buffers(&ticket, &resv_list, false, &duplicates,
+				     true);
 	WARN(!list_empty(&duplicates), "Duplicates should be empty");
 	if (ret)
-		goto out;
+		goto out_free;
 
 	amdgpu_sync_create(&sync);
 
@@ -1846,10 +1829,8 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
 
 		bo = mem->bo;
 
-		/* Copy pages array and validate the BO if we got user pages */
-		if (mem->user_pages[0]) {
-			amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm,
-						     mem->user_pages);
+		/* Validate the BO if we got user pages */
+		if (bo->tbo.ttm->pages[0]) {
 			amdgpu_bo_placement_from_domain(bo, mem->domain);
 			ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
 			if (ret) {
@@ -1858,16 +1839,16 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
 			}
 		}
 
-		/* Validate succeeded, now the BO owns the pages, free
-		 * our copy of the pointer array. Put this BO back on
-		 * the userptr_valid_list. If we need to revalidate
-		 * it, we need to start from scratch.
-		 */
-		kvfree(mem->user_pages);
-		mem->user_pages = NULL;
 		list_move_tail(&mem->validate_list.head,
 			       &process_info->userptr_valid_list);
 
+		/* Stop HMM track the userptr update. We dont check the return
+		 * value for concurrent CPU page table update because we will
+		 * reschedule the restore worker if process_info->evicted_bos
+		 * is updated.
+		 */
+		amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
+
 		/* Update mapping. If the BO was not validated
 		 * (because we couldn't get user pages), this will
 		 * clear the page table entries, which will result in
@@ -1897,8 +1878,9 @@ unreserve_out:
 	ttm_eu_backoff_reservation(&ticket, &resv_list);
 	amdgpu_sync_wait(&sync, false);
 	amdgpu_sync_free(&sync);
-out:
+out_free:
 	kfree(pd_bo_list_entries);
+out_no_mem:
 
 	return ret;
 }
@@ -1963,7 +1945,9 @@ static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work)
 		 * hanging. No point trying again.
 		 */
 	}
+
 unlock_out:
+	untrack_invalid_user_pages(process_info);
 	mutex_unlock(&process_info->lock);
 	mmput(mm);
 	put_task_struct(usertask);
@@ -2032,7 +2016,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
 	}
 
 	ret = ttm_eu_reserve_buffers(&ctx.ticket, &ctx.list,
-				     false, &duplicate_save);
+				     false, &duplicate_save, true);
 	if (ret) {
 		pr_debug("Memory eviction: TTM Reserve Failed. Try again\n");
 		goto ttm_reserve_fail;
@@ -2130,3 +2114,88 @@ ttm_reserve_fail:
 	kfree(pd_bo_list);
 	return ret;
 }
+
+int amdgpu_amdkfd_add_gws_to_process(void *info, void *gws, struct kgd_mem **mem)
+{
+	struct amdkfd_process_info *process_info = (struct amdkfd_process_info *)info;
+	struct amdgpu_bo *gws_bo = (struct amdgpu_bo *)gws;
+	int ret;
+
+	if (!info || !gws)
+		return -EINVAL;
+
+	*mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL);
+	if (!*mem)
+		return -ENOMEM;
+
+	mutex_init(&(*mem)->lock);
+	(*mem)->bo = amdgpu_bo_ref(gws_bo);
+	(*mem)->domain = AMDGPU_GEM_DOMAIN_GWS;
+	(*mem)->process_info = process_info;
+	add_kgd_mem_to_kfd_bo_list(*mem, process_info, false);
+	amdgpu_sync_create(&(*mem)->sync);
+
+
+	/* Validate gws bo the first time it is added to process */
+	mutex_lock(&(*mem)->process_info->lock);
+	ret = amdgpu_bo_reserve(gws_bo, false);
+	if (unlikely(ret)) {
+		pr_err("Reserve gws bo failed %d\n", ret);
+		goto bo_reservation_failure;
+	}
+
+	ret = amdgpu_amdkfd_bo_validate(gws_bo, AMDGPU_GEM_DOMAIN_GWS, true);
+	if (ret) {
+		pr_err("GWS BO validate failed %d\n", ret);
+		goto bo_validation_failure;
+	}
+	/* GWS resource is shared b/t amdgpu and amdkfd
+	 * Add process eviction fence to bo so they can
+	 * evict each other.
+	 */
+	amdgpu_bo_fence(gws_bo, &process_info->eviction_fence->base, true);
+	amdgpu_bo_unreserve(gws_bo);
+	mutex_unlock(&(*mem)->process_info->lock);
+
+	return ret;
+
+bo_validation_failure:
+	amdgpu_bo_unreserve(gws_bo);
+bo_reservation_failure:
+	mutex_unlock(&(*mem)->process_info->lock);
+	amdgpu_sync_free(&(*mem)->sync);
+	remove_kgd_mem_from_kfd_bo_list(*mem, process_info);
+	amdgpu_bo_unref(&gws_bo);
+	mutex_destroy(&(*mem)->lock);
+	kfree(*mem);
+	*mem = NULL;
+	return ret;
+}
+
+int amdgpu_amdkfd_remove_gws_from_process(void *info, void *mem)
+{
+	int ret;
+	struct amdkfd_process_info *process_info = (struct amdkfd_process_info *)info;
+	struct kgd_mem *kgd_mem = (struct kgd_mem *)mem;
+	struct amdgpu_bo *gws_bo = kgd_mem->bo;
+
+	/* Remove BO from process's validate list so restore worker won't touch
+	 * it anymore
+	 */
+	remove_kgd_mem_from_kfd_bo_list(kgd_mem, process_info);
+
+	ret = amdgpu_bo_reserve(gws_bo, false);
+	if (unlikely(ret)) {
+		pr_err("Reserve gws bo failed %d\n", ret);
+		//TODO add BO back to validate_list?
+		return ret;
+	}
+	amdgpu_amdkfd_remove_eviction_fence(gws_bo,
+			process_info->eviction_fence);
+	amdgpu_bo_unreserve(gws_bo);
+	amdgpu_sync_free(&kgd_mem->sync);
+	amdgpu_bo_unref(&gws_bo);
+	mutex_destroy(&kgd_mem->lock);
+	kfree(mem);
+	return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
index e02781b37e73..1c9d40f97a9b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
@@ -23,7 +23,7 @@
  * Authors: Dave Airlie
  *          Alex Deucher
  */
-#include <drm/drmP.h>
+
 #include <drm/amdgpu_drm.h>
 #include "amdgpu.h"
 #include "amdgpu_atombios.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
index f96d75c6e099..a2dbdf13c4c7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
@@ -20,7 +20,7 @@
  * OTHER DEALINGS IN THE SOFTWARE.
  *
  */
-#include <drm/drmP.h>
+
 #include <drm/amdgpu_drm.h>
 #include "amdgpu.h"
 #include "atomfirmware.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
index 3079ea8523c5..649e68c4479b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
@@ -21,7 +21,7 @@
  *
  * Authors: Jerome Glisse
  */
-#include <drm/drmP.h>
+
 #include <drm/amdgpu_drm.h>
 #include "amdgpu.h"
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c
index a5df80d50d44..50dff69a0f6e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c
@@ -25,10 +25,11 @@
  *          Alex Deucher
  *          Jerome Glisse
  */
-#include <drm/drmP.h>
+
 #include "amdgpu.h"
 #include "atom.h"
 
+#include <linux/pci.h>
 #include <linux/slab.h>
 #include <linux/acpi.h>
 /*
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
index 5c79da8e1150..7bcf86c61999 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
@@ -28,7 +28,8 @@
  *    Christian König <deathsimple@vodafone.de>
  */
 
-#include <drm/drmP.h>
+#include <linux/uaccess.h>
+
 #include "amdgpu.h"
 #include "amdgpu_trace.h"
 
@@ -81,9 +82,9 @@ int amdgpu_bo_list_create(struct amdgpu_device *adev, struct drm_file *filp,
 		return -ENOMEM;
 
 	kref_init(&list->refcount);
-	list->gds_obj = adev->gds.gds_gfx_bo;
-	list->gws_obj = adev->gds.gws_gfx_bo;
-	list->oa_obj = adev->gds.oa_gfx_bo;
+	list->gds_obj = NULL;
+	list->gws_obj = NULL;
+	list->oa_obj = NULL;
 
 	array = amdgpu_bo_list_array_entry(list, 0);
 	memset(array, 0, num_entries * sizeof(struct amdgpu_bo_list_entry));
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
index 7c5f5d1601e6..a130e766cbdb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
@@ -36,7 +36,7 @@ struct amdgpu_bo_list_entry {
 	struct amdgpu_bo_va		*bo_va;
 	uint32_t			priority;
 	struct page			**user_pages;
-	int				user_invalidated;
+	bool				user_invalidated;
 };
 
 struct amdgpu_bo_list {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
index 387f1cf1dc20..031b094607bd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
@@ -22,8 +22,9 @@
  *
  */
 #include <linux/list.h>
+#include <linux/pci.h>
 #include <linux/slab.h>
-#include <drm/drmP.h>
+
 #include <linux/firmware.h>
 #include <drm/amdgpu_drm.h>
 #include "amdgpu.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
index bf04c12bd324..73b2ede773d3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
@@ -23,7 +23,7 @@
  * Authors: Dave Airlie
  *          Alex Deucher
  */
-#include <drm/drmP.h>
+
 #include <drm/drm_edid.h>
 #include <drm/drm_fb_helper.h>
 #include <drm/drm_probe_helper.h>
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 2f6239b6be6f..dc63707e426f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -24,9 +24,11 @@
  * Authors:
  *    Jerome Glisse <glisse@freedesktop.org>
  */
+
+#include <linux/file.h>
 #include <linux/pagemap.h>
 #include <linux/sync_file.h>
-#include <drm/drmP.h>
+
 #include <drm/amdgpu_drm.h>
 #include <drm/drm_syncobj.h>
 #include "amdgpu.h"
@@ -52,7 +54,6 @@ static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p,
 	p->uf_entry.tv.bo = &bo->tbo;
 	/* One for TTM and one for the CS job */
 	p->uf_entry.tv.num_shared = 2;
-	p->uf_entry.user_pages = NULL;
 
 	drm_gem_object_put_unlocked(gobj);
 
@@ -542,14 +543,14 @@ static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p,
 		if (usermm && usermm != current->mm)
 			return -EPERM;
 
-		/* Check if we have user pages and nobody bound the BO already */
-		if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm) &&
-		    lobj->user_pages) {
+		if (amdgpu_ttm_tt_is_userptr(bo->tbo.ttm) &&
+		    lobj->user_invalidated && lobj->user_pages) {
 			amdgpu_bo_placement_from_domain(bo,
 							AMDGPU_GEM_DOMAIN_CPU);
 			r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
 			if (r)
 				return r;
+
 			amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm,
 						     lobj->user_pages);
 			binding_userptr = true;
@@ -580,7 +581,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
 	struct amdgpu_bo *gds;
 	struct amdgpu_bo *gws;
 	struct amdgpu_bo *oa;
-	unsigned tries = 10;
 	int r;
 
 	INIT_LIST_HEAD(&p->validated);
@@ -616,79 +616,45 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
 	if (p->uf_entry.tv.bo && !ttm_to_amdgpu_bo(p->uf_entry.tv.bo)->parent)
 		list_add(&p->uf_entry.tv.head, &p->validated);
 
-	while (1) {
-		struct list_head need_pages;
-
-		r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true,
-					   &duplicates);
-		if (unlikely(r != 0)) {
-			if (r != -ERESTARTSYS)
-				DRM_ERROR("ttm_eu_reserve_buffers failed.\n");
-			goto error_free_pages;
-		}
-
-		INIT_LIST_HEAD(&need_pages);
-		amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
-			struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
-
-			if (amdgpu_ttm_tt_userptr_invalidated(bo->tbo.ttm,
-				 &e->user_invalidated) && e->user_pages) {
-
-				/* We acquired a page array, but somebody
-				 * invalidated it. Free it and try again
-				 */
-				release_pages(e->user_pages,
-					      bo->tbo.ttm->num_pages);
-				kvfree(e->user_pages);
-				e->user_pages = NULL;
-			}
-
-			if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm) &&
-			    !e->user_pages) {
-				list_del(&e->tv.head);
-				list_add(&e->tv.head, &need_pages);
-
-				amdgpu_bo_unreserve(bo);
-			}
+	/* Get userptr backing pages. If pages are updated after registered
+	 * in amdgpu_gem_userptr_ioctl(), amdgpu_cs_list_validate() will do
+	 * amdgpu_ttm_backend_bind() to flush and invalidate new pages
+	 */
+	amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
+		struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
+		bool userpage_invalidated = false;
+		int i;
+
+		e->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages,
+					sizeof(struct page *),
+					GFP_KERNEL | __GFP_ZERO);
+		if (!e->user_pages) {
+			DRM_ERROR("calloc failure\n");
+			return -ENOMEM;
 		}
 
-		if (list_empty(&need_pages))
-			break;
-
-		/* Unreserve everything again. */
-		ttm_eu_backoff_reservation(&p->ticket, &p->validated);
-
-		/* We tried too many times, just abort */
-		if (!--tries) {
-			r = -EDEADLK;
-			DRM_ERROR("deadlock in %s\n", __func__);
-			goto error_free_pages;
+		r = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, e->user_pages);
+		if (r) {
+			kvfree(e->user_pages);
+			e->user_pages = NULL;
+			return r;
 		}
 
-		/* Fill the page arrays for all userptrs. */
-		list_for_each_entry(e, &need_pages, tv.head) {
-			struct ttm_tt *ttm = e->tv.bo->ttm;
-
-			e->user_pages = kvmalloc_array(ttm->num_pages,
-							 sizeof(struct page*),
-							 GFP_KERNEL | __GFP_ZERO);
-			if (!e->user_pages) {
-				r = -ENOMEM;
-				DRM_ERROR("calloc failure in %s\n", __func__);
-				goto error_free_pages;
-			}
-
-			r = amdgpu_ttm_tt_get_user_pages(ttm, e->user_pages);
-			if (r) {
-				DRM_ERROR("amdgpu_ttm_tt_get_user_pages failed.\n");
-				kvfree(e->user_pages);
-				e->user_pages = NULL;
-				goto error_free_pages;
+		for (i = 0; i < bo->tbo.ttm->num_pages; i++) {
+			if (bo->tbo.ttm->pages[i] != e->user_pages[i]) {
+				userpage_invalidated = true;
+				break;
 			}
 		}
+		e->user_invalidated = userpage_invalidated;
+	}
 
-		/* And try again. */
-		list_splice(&need_pages, &p->validated);
+	r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true,
+				   &duplicates, true);
+	if (unlikely(r != 0)) {
+		if (r != -ERESTARTSYS)
+			DRM_ERROR("ttm_eu_reserve_buffers failed.\n");
+		goto out;
 	}
 
 	amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold,
@@ -757,17 +723,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
 error_validate:
 	if (r)
 		ttm_eu_backoff_reservation(&p->ticket, &p->validated);
-
-error_free_pages:
-
-	amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
-		if (!e->user_pages)
-			continue;
-
-		release_pages(e->user_pages, e->tv.bo->ttm->num_pages);
-		kvfree(e->user_pages);
-	}
-
+out:
 	return r;
 }
 
@@ -1054,11 +1010,9 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
 		j++;
 	}
 
-	/* UVD & VCE fw doesn't support user fences */
+	/* MM engine doesn't support user fences */
 	ring = to_amdgpu_ring(parser->entity->rq->sched);
-	if (parser->job->uf_addr && (
-	    ring->funcs->type == AMDGPU_RING_TYPE_UVD ||
-	    ring->funcs->type == AMDGPU_RING_TYPE_VCE))
+	if (parser->job->uf_addr && ring->funcs->no_user_fence)
 		return -EINVAL;
 
 	return amdgpu_ctx_wait_prev_fence(parser->ctx, parser->entity);
@@ -1328,7 +1282,6 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
 	struct amdgpu_bo_list_entry *e;
 	struct amdgpu_job *job;
 	uint64_t seq;
-
 	int r;
 
 	job = p->job;
@@ -1338,15 +1291,23 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
 	if (r)
 		goto error_unlock;
 
-	/* No memory allocation is allowed while holding the mn lock */
+	/* No memory allocation is allowed while holding the mn lock.
+	 * p->mn is hold until amdgpu_cs_submit is finished and fence is added
+	 * to BOs.
+	 */
 	amdgpu_mn_lock(p->mn);
+
+	/* If userptr are invalidated after amdgpu_cs_parser_bos(), return
+	 * -EAGAIN, drmIoctl in libdrm will restart the amdgpu_cs_ioctl.
+	 */
 	amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
 		struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
 
-		if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm)) {
-			r = -ERESTARTSYS;
-			goto error_abort;
-		}
+		r |= !amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
+	}
+	if (r) {
+		r = -EAGAIN;
+		goto error_abort;
 	}
 
 	job->owner = p->filp;
@@ -1442,6 +1403,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
 
 out:
 	amdgpu_cs_parser_fini(&parser, r, reserved_buffers);
+
 	return r;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
index 54dd02a898b9..06f83cac0d3a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
@@ -79,7 +79,7 @@ int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 	list_add(&csa_tv.head, &list);
 	amdgpu_vm_get_pd_bo(vm, &list, &pd);
 
-	r = ttm_eu_reserve_buffers(&ticket, &list, true, NULL);
+	r = ttm_eu_reserve_buffers(&ticket, &list, true, NULL, true);
 	if (r) {
 		DRM_ERROR("failed to reserve CSA,PD BOs: err=%d\n", r);
 		return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index a28a3d722ba2..f539a2a92774 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -22,7 +22,6 @@
  * Authors: monk liu <monk.liu@amd.com>
  */
 
-#include <drm/drmP.h>
 #include <drm/drm_auth.h>
 #include "amdgpu.h"
 #include "amdgpu_sched.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
index 8930d66f2204..f255a00c4492 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
@@ -24,8 +24,11 @@
  */
 
 #include <linux/kthread.h>
-#include <drm/drmP.h>
-#include <linux/debugfs.h>
+#include <linux/pci.h>
+#include <linux/uaccess.h>
+
+#include <drm/drm_debugfs.h>
+
 #include "amdgpu.h"
 
 /**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index f4ac632a87b2..a65c0661253a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -27,9 +27,10 @@
  */
 #include <linux/power_supply.h>
 #include <linux/kthread.h>
+#include <linux/module.h>
 #include <linux/console.h>
 #include <linux/slab.h>
-#include <drm/drmP.h>
+
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_probe_helper.h>
 #include <drm/amdgpu_drm.h>
@@ -97,6 +98,28 @@ static const char *amdgpu_asic_name[] = {
 	"LAST",
 };
 
+/**
+ * DOC: pcie_replay_count
+ *
+ * The amdgpu driver provides a sysfs API for reporting the total number
+ * of PCIe replays (NAKs)
+ * The file pcie_replay_count is used for this and returns the total
+ * number of replays as a sum of the NAKs generated and NAKs received
+ */
+
+static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct drm_device *ddev = dev_get_drvdata(dev);
+	struct amdgpu_device *adev = ddev->dev_private;
+	uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev);
+
+	return snprintf(buf, PAGE_SIZE, "%llu\n", cnt);
+}
+
+static DEVICE_ATTR(pcie_replay_count, S_IRUGO,
+		amdgpu_device_get_pcie_replay_count, NULL);
+
 static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
 
 /**
@@ -910,8 +933,10 @@ def_value:
  * Validates certain module parameters and updates
  * the associated values used by the driver (all asics).
  */
-static void amdgpu_device_check_arguments(struct amdgpu_device *adev)
+static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
 {
+	int ret = 0;
+
 	if (amdgpu_sched_jobs < 4) {
 		dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
 			 amdgpu_sched_jobs);
@@ -956,12 +981,15 @@ static void amdgpu_device_check_arguments(struct amdgpu_device *adev)
 		amdgpu_vram_page_split = 1024;
 	}
 
-	if (amdgpu_lockup_timeout == 0) {
-		dev_warn(adev->dev, "lockup_timeout msut be > 0, adjusting to 10000\n");
-		amdgpu_lockup_timeout = 10000;
+	ret = amdgpu_device_get_job_timeout_settings(adev);
+	if (ret) {
+		dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
+		return ret;
 	}
 
 	adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
+
+	return ret;
 }
 
 /**
@@ -1505,12 +1533,26 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
 		r = amdgpu_virt_request_full_gpu(adev, true);
 		if (r)
 			return -EAGAIN;
+
+		/* query the reg access mode at the very beginning */
+		amdgpu_virt_init_reg_access_mode(adev);
 	}
 
 	adev->pm.pp_feature = amdgpu_pp_feature_mask;
 	if (amdgpu_sriov_vf(adev))
 		adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
 
+	/* Read BIOS */
+	if (!amdgpu_get_bios(adev))
+		return -EINVAL;
+
+	r = amdgpu_atombios_init(adev);
+	if (r) {
+		dev_err(adev->dev, "amdgpu_atombios_init failed\n");
+		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
+		return r;
+	}
+
 	for (i = 0; i < adev->num_ip_blocks; i++) {
 		if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
 			DRM_ERROR("disabled ip block: %d <%s>\n",
@@ -1550,6 +1592,7 @@ static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
 		if (adev->ip_blocks[i].status.hw)
 			continue;
 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
+		    (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
 			r = adev->ip_blocks[i].version->funcs->hw_init(adev);
 			if (r) {
@@ -1821,6 +1864,43 @@ static int amdgpu_device_set_pg_state(struct amdgpu_device *adev, enum amd_power
 	return 0;
 }
 
+static int amdgpu_device_enable_mgpu_fan_boost(void)
+{
+	struct amdgpu_gpu_instance *gpu_ins;
+	struct amdgpu_device *adev;
+	int i, ret = 0;
+
+	mutex_lock(&mgpu_info.mutex);
+
+	/*
+	 * MGPU fan boost feature should be enabled
+	 * only when there are two or more dGPUs in
+	 * the system
+	 */
+	if (mgpu_info.num_dgpu < 2)
+		goto out;
+
+	for (i = 0; i < mgpu_info.num_dgpu; i++) {
+		gpu_ins = &(mgpu_info.gpu_ins[i]);
+		adev = gpu_ins->adev;
+		if (!(adev->flags & AMD_IS_APU) &&
+		    !gpu_ins->mgpu_fan_enabled &&
+		    adev->powerplay.pp_funcs &&
+		    adev->powerplay.pp_funcs->enable_mgpu_fan_boost) {
+			ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
+			if (ret)
+				break;
+
+			gpu_ins->mgpu_fan_enabled = 1;
+		}
+	}
+
+out:
+	mutex_unlock(&mgpu_info.mutex);
+
+	return ret;
+}
+
 /**
  * amdgpu_device_ip_late_init - run late init for hardware IPs
  *
@@ -1854,11 +1934,15 @@ static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
 	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
 	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
 
-	queue_delayed_work(system_wq, &adev->late_init_work,
-			   msecs_to_jiffies(AMDGPU_RESUME_MS));
-
 	amdgpu_device_fill_reset_magic(adev);
 
+	r = amdgpu_device_enable_mgpu_fan_boost();
+	if (r)
+		DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
+
+	/* set to low pstate by default */
+	amdgpu_xgmi_set_pstate(adev, 0);
+
 	return 0;
 }
 
@@ -1957,65 +2041,20 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
 	return 0;
 }
 
-static int amdgpu_device_enable_mgpu_fan_boost(void)
-{
-	struct amdgpu_gpu_instance *gpu_ins;
-	struct amdgpu_device *adev;
-	int i, ret = 0;
-
-	mutex_lock(&mgpu_info.mutex);
-
-	/*
-	 * MGPU fan boost feature should be enabled
-	 * only when there are two or more dGPUs in
-	 * the system
-	 */
-	if (mgpu_info.num_dgpu < 2)
-		goto out;
-
-	for (i = 0; i < mgpu_info.num_dgpu; i++) {
-		gpu_ins = &(mgpu_info.gpu_ins[i]);
-		adev = gpu_ins->adev;
-		if (!(adev->flags & AMD_IS_APU) &&
-		    !gpu_ins->mgpu_fan_enabled &&
-		    adev->powerplay.pp_funcs &&
-		    adev->powerplay.pp_funcs->enable_mgpu_fan_boost) {
-			ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
-			if (ret)
-				break;
-
-			gpu_ins->mgpu_fan_enabled = 1;
-		}
-	}
-
-out:
-	mutex_unlock(&mgpu_info.mutex);
-
-	return ret;
-}
-
 /**
- * amdgpu_device_ip_late_init_func_handler - work handler for ib test
+ * amdgpu_device_delayed_init_work_handler - work handler for IB tests
  *
  * @work: work_struct.
  */
-static void amdgpu_device_ip_late_init_func_handler(struct work_struct *work)
+static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
 {
 	struct amdgpu_device *adev =
-		container_of(work, struct amdgpu_device, late_init_work.work);
+		container_of(work, struct amdgpu_device, delayed_init_work.work);
 	int r;
 
 	r = amdgpu_ib_ring_tests(adev);
 	if (r)
 		DRM_ERROR("ib ring test failed (%d).\n", r);
-
-	r = amdgpu_device_enable_mgpu_fan_boost();
-	if (r)
-		DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
-
-	/*set to low pstate by default */
-	amdgpu_xgmi_set_pstate(adev, 0);
-
 }
 
 static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
@@ -2467,7 +2506,9 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 	mutex_init(&adev->lock_reset);
 	mutex_init(&adev->virt.dpm_mutex);
 
-	amdgpu_device_check_arguments(adev);
+	r = amdgpu_device_check_arguments(adev);
+	if (r)
+		return r;
 
 	spin_lock_init(&adev->mmio_idx_lock);
 	spin_lock_init(&adev->smc_idx_lock);
@@ -2485,8 +2526,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 	INIT_LIST_HEAD(&adev->ring_lru_list);
 	spin_lock_init(&adev->ring_lru_list_lock);
 
-	INIT_DELAYED_WORK(&adev->late_init_work,
-			  amdgpu_device_ip_late_init_func_handler);
+	INIT_DELAYED_WORK(&adev->delayed_init_work,
+			  amdgpu_device_delayed_init_work_handler);
 	INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
 			  amdgpu_device_delay_enable_gfx_off);
 
@@ -2552,19 +2593,6 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 		goto fence_driver_init;
 	}
 
-	/* Read BIOS */
-	if (!amdgpu_get_bios(adev)) {
-		r = -EINVAL;
-		goto failed;
-	}
-
-	r = amdgpu_atombios_init(adev);
-	if (r) {
-		dev_err(adev->dev, "amdgpu_atombios_init failed\n");
-		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
-		goto failed;
-	}
-
 	/* detect if we are with an SRIOV vbios */
 	amdgpu_device_detect_sriov_bios(adev);
 
@@ -2666,6 +2694,10 @@ fence_driver_init:
 	if (r)
 		DRM_ERROR("registering pm debugfs failed (%d).\n", r);
 
+	r = amdgpu_ucode_sysfs_init(adev);
+	if (r)
+		DRM_ERROR("Creating firmware sysfs failed (%d).\n", r);
+
 	r = amdgpu_debugfs_gem_init(adev);
 	if (r)
 		DRM_ERROR("registering gem debugfs failed (%d).\n", r);
@@ -2706,7 +2738,16 @@ fence_driver_init:
 	}
 
 	/* must succeed. */
-	amdgpu_ras_post_init(adev);
+	amdgpu_ras_resume(adev);
+
+	queue_delayed_work(system_wq, &adev->delayed_init_work,
+			   msecs_to_jiffies(AMDGPU_RESUME_MS));
+
+	r = device_create_file(adev->dev, &dev_attr_pcie_replay_count);
+	if (r) {
+		dev_err(adev->dev, "Could not create pcie_replay_count");
+		return r;
+	}
 
 	return 0;
 
@@ -2749,7 +2790,7 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
 		adev->firmware.gpu_info_fw = NULL;
 	}
 	adev->accel_working = false;
-	cancel_delayed_work_sync(&adev->late_init_work);
+	cancel_delayed_work_sync(&adev->delayed_init_work);
 	/* free i2c buses */
 	if (!amdgpu_device_has_dc_support(adev))
 		amdgpu_i2c_fini(adev);
@@ -2771,6 +2812,8 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
 	adev->rmmio = NULL;
 	amdgpu_device_doorbell_fini(adev);
 	amdgpu_debugfs_regs_cleanup(adev);
+	device_remove_file(adev->dev, &dev_attr_pcie_replay_count);
+	amdgpu_ucode_sysfs_fini(adev);
 }
 
 
@@ -2810,7 +2853,7 @@ int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon)
 	if (fbcon)
 		amdgpu_fbdev_set_suspend(adev, 1);
 
-	cancel_delayed_work_sync(&adev->late_init_work);
+	cancel_delayed_work_sync(&adev->delayed_init_work);
 
 	if (!amdgpu_device_has_dc_support(adev)) {
 		/* turn off display hw */
@@ -2851,6 +2894,8 @@ int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon)
 
 	amdgpu_amdkfd_suspend(adev);
 
+	amdgpu_ras_suspend(adev);
+
 	r = amdgpu_device_ip_suspend_phase1(adev);
 
 	/* evict vram memory */
@@ -2928,6 +2973,9 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon)
 	if (r)
 		return r;
 
+	queue_delayed_work(system_wq, &adev->delayed_init_work,
+			   msecs_to_jiffies(AMDGPU_RESUME_MS));
+
 	if (!amdgpu_device_has_dc_support(adev)) {
 		/* pin cursors */
 		list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
@@ -2951,7 +2999,7 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon)
 		return r;
 
 	/* Make sure IB tests flushed */
-	flush_delayed_work(&adev->late_init_work);
+	flush_delayed_work(&adev->delayed_init_work);
 
 	/* blat the mode back in */
 	if (fbcon) {
@@ -2971,6 +3019,8 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon)
 
 	drm_kms_helper_poll_enable(dev);
 
+	amdgpu_ras_resume(adev);
+
 	/*
 	 * Most of the connector probing functions try to acquire runtime pm
 	 * refs to ensure that the GPU is powered on when connector polling is
@@ -3335,8 +3385,6 @@ static int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
 		if (!ring || !ring->sched.thread)
 			continue;
 
-		drm_sched_stop(&ring->sched);
-
 		/* after all hw jobs are reset, hw fence is meaningless, so force_completion */
 		amdgpu_fence_driver_force_completion(ring);
 	}
@@ -3344,8 +3392,7 @@ static int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
 	if(job)
 		drm_sched_increase_karma(&job->base);
 
-
-
+	/* Don't suspend on bare metal if we are not going to HW reset the ASIC */
 	if (!amdgpu_sriov_vf(adev)) {
 
 		if (!need_full_reset)
@@ -3452,6 +3499,13 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,
 				if (vram_lost)
 					amdgpu_device_fill_reset_magic(tmp_adev);
 
+				r = amdgpu_device_ip_late_init(tmp_adev);
+				if (r)
+					goto out;
+
+				/* must succeed. */
+				amdgpu_ras_resume(tmp_adev);
+
 				/* Update PSP FW topology after reset */
 				if (hive && tmp_adev->gmc.xgmi.num_physical_nodes > 1)
 					r = amdgpu_xgmi_update_topology(hive, tmp_adev);
@@ -3483,38 +3537,21 @@ end:
 	return r;
 }
 
-static void amdgpu_device_post_asic_reset(struct amdgpu_device *adev,
-					  struct amdgpu_job *job)
+static bool amdgpu_device_lock_adev(struct amdgpu_device *adev, bool trylock)
 {
-	int i;
-
-	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
-		struct amdgpu_ring *ring = adev->rings[i];
-
-		if (!ring || !ring->sched.thread)
-			continue;
-
-		if (!adev->asic_reset_res)
-			drm_sched_resubmit_jobs(&ring->sched);
-
-		drm_sched_start(&ring->sched, !adev->asic_reset_res);
-	}
-
-	if (!amdgpu_device_has_dc_support(adev)) {
-		drm_helper_resume_force_mode(adev->ddev);
-	}
-
-	adev->asic_reset_res = 0;
-}
+	if (trylock) {
+		if (!mutex_trylock(&adev->lock_reset))
+			return false;
+	} else
+		mutex_lock(&adev->lock_reset);
 
-static void amdgpu_device_lock_adev(struct amdgpu_device *adev)
-{
-	mutex_lock(&adev->lock_reset);
 	atomic_inc(&adev->gpu_reset_counter);
 	adev->in_gpu_reset = 1;
 	/* Block kfd: SRIOV would do it separately */
 	if (!amdgpu_sriov_vf(adev))
                 amdgpu_amdkfd_pre_reset(adev);
+
+	return true;
 }
 
 static void amdgpu_device_unlock_adev(struct amdgpu_device *adev)
@@ -3542,40 +3579,44 @@ static void amdgpu_device_unlock_adev(struct amdgpu_device *adev)
 int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
 			      struct amdgpu_job *job)
 {
-	int r;
+	struct list_head device_list, *device_list_handle =  NULL;
+	bool need_full_reset, job_signaled;
 	struct amdgpu_hive_info *hive = NULL;
-	bool need_full_reset = false;
 	struct amdgpu_device *tmp_adev = NULL;
-	struct list_head device_list, *device_list_handle =  NULL;
+	int i, r = 0;
 
+	need_full_reset = job_signaled = false;
 	INIT_LIST_HEAD(&device_list);
 
 	dev_info(adev->dev, "GPU reset begin!\n");
 
+	cancel_delayed_work_sync(&adev->delayed_init_work);
+
+	hive = amdgpu_get_xgmi_hive(adev, false);
+
 	/*
-	 * In case of XGMI hive disallow concurrent resets to be triggered
-	 * by different nodes. No point also since the one node already executing
-	 * reset will also reset all the other nodes in the hive.
+	 * Here we trylock to avoid chain of resets executing from
+	 * either trigger by jobs on different adevs in XGMI hive or jobs on
+	 * different schedulers for same device while this TO handler is running.
+	 * We always reset all schedulers for device and all devices for XGMI
+	 * hive so that should take care of them too.
 	 */
-	hive = amdgpu_get_xgmi_hive(adev, 0);
-	if (hive && adev->gmc.xgmi.num_physical_nodes > 1 &&
-	    !mutex_trylock(&hive->reset_lock))
+
+	if (hive && !mutex_trylock(&hive->reset_lock)) {
+		DRM_INFO("Bailing on TDR for s_job:%llx, hive: %llx as another already in progress",
+			 job->base.id, hive->hive_id);
 		return 0;
+	}
 
 	/* Start with adev pre asic reset first for soft reset check.*/
-	amdgpu_device_lock_adev(adev);
-	r = amdgpu_device_pre_asic_reset(adev,
-					 job,
-					 &need_full_reset);
-	if (r) {
-		/*TODO Should we stop ?*/
-		DRM_ERROR("GPU pre asic reset failed with err, %d for drm dev, %s ",
-			  r, adev->ddev->unique);
-		adev->asic_reset_res = r;
+	if (!amdgpu_device_lock_adev(adev, !hive)) {
+		DRM_INFO("Bailing on TDR for s_job:%llx, as another already in progress",
+					 job->base.id);
+		return 0;
 	}
 
 	/* Build list of devices to reset */
-	if  (need_full_reset && adev->gmc.xgmi.num_physical_nodes > 1) {
+	if  (adev->gmc.xgmi.num_physical_nodes > 1) {
 		if (!hive) {
 			amdgpu_device_unlock_adev(adev);
 			return -ENODEV;
@@ -3592,13 +3633,56 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
 		device_list_handle = &device_list;
 	}
 
+	/* block all schedulers and reset given job's ring */
+	list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
+		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
+			struct amdgpu_ring *ring = tmp_adev->rings[i];
+
+			if (!ring || !ring->sched.thread)
+				continue;
+
+			drm_sched_stop(&ring->sched, &job->base);
+		}
+	}
+
+
+	/*
+	 * Must check guilty signal here since after this point all old
+	 * HW fences are force signaled.
+	 *
+	 * job->base holds a reference to parent fence
+	 */
+	if (job && job->base.s_fence->parent &&
+	    dma_fence_is_signaled(job->base.s_fence->parent))
+		job_signaled = true;
+
+	if (!amdgpu_device_ip_need_full_reset(adev))
+		device_list_handle = &device_list;
+
+	if (job_signaled) {
+		dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
+		goto skip_hw_reset;
+	}
+
+
+	/* Guilty job will be freed after this*/
+	r = amdgpu_device_pre_asic_reset(adev,
+					 job,
+					 &need_full_reset);
+	if (r) {
+		/*TODO Should we stop ?*/
+		DRM_ERROR("GPU pre asic reset failed with err, %d for drm dev, %s ",
+			  r, adev->ddev->unique);
+		adev->asic_reset_res = r;
+	}
+
 retry:	/* Rest of adevs pre asic reset from XGMI hive. */
 	list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
 
 		if (tmp_adev == adev)
 			continue;
 
-		amdgpu_device_lock_adev(tmp_adev);
+		amdgpu_device_lock_adev(tmp_adev, false);
 		r = amdgpu_device_pre_asic_reset(tmp_adev,
 						 NULL,
 						 &need_full_reset);
@@ -3622,9 +3706,28 @@ retry:	/* Rest of adevs pre asic reset from XGMI hive. */
 			goto retry;
 	}
 
+skip_hw_reset:
+
 	/* Post ASIC reset for all devs .*/
 	list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
-		amdgpu_device_post_asic_reset(tmp_adev, tmp_adev == adev ? job : NULL);
+		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
+			struct amdgpu_ring *ring = tmp_adev->rings[i];
+
+			if (!ring || !ring->sched.thread)
+				continue;
+
+			/* No point to resubmit jobs if we didn't HW reset*/
+			if (!tmp_adev->asic_reset_res && !job_signaled)
+				drm_sched_resubmit_jobs(&ring->sched);
+
+			drm_sched_start(&ring->sched, !tmp_adev->asic_reset_res);
+		}
+
+		if (!amdgpu_device_has_dc_support(tmp_adev) && !job_signaled) {
+			drm_helper_resume_force_mode(tmp_adev->ddev);
+		}
+
+		tmp_adev->asic_reset_res = 0;
 
 		if (r) {
 			/* bad news, how to tell it to userspace ? */
@@ -3637,7 +3740,7 @@ retry:	/* Rest of adevs pre asic reset from XGMI hive. */
 		amdgpu_device_unlock_adev(tmp_adev);
 	}
 
-	if (hive && adev->gmc.xgmi.num_physical_nodes > 1)
+	if (hive)
 		mutex_unlock(&hive->reset_lock);
 
 	if (r)
@@ -3645,43 +3748,6 @@ retry:	/* Rest of adevs pre asic reset from XGMI hive. */
 	return r;
 }
 
-static void amdgpu_device_get_min_pci_speed_width(struct amdgpu_device *adev,
-						  enum pci_bus_speed *speed,
-						  enum pcie_link_width *width)
-{
-	struct pci_dev *pdev = adev->pdev;
-	enum pci_bus_speed cur_speed;
-	enum pcie_link_width cur_width;
-	u32 ret = 1;
-
-	*speed = PCI_SPEED_UNKNOWN;
-	*width = PCIE_LNK_WIDTH_UNKNOWN;
-
-	while (pdev) {
-		cur_speed = pcie_get_speed_cap(pdev);
-		cur_width = pcie_get_width_cap(pdev);
-		ret = pcie_bandwidth_available(adev->pdev, NULL,
-						       NULL, &cur_width);
-		if (!ret)
-			cur_width = PCIE_LNK_WIDTH_RESRV;
-
-		if (cur_speed != PCI_SPEED_UNKNOWN) {
-			if (*speed == PCI_SPEED_UNKNOWN)
-				*speed = cur_speed;
-			else if (cur_speed < *speed)
-				*speed = cur_speed;
-		}
-
-		if (cur_width != PCIE_LNK_WIDTH_UNKNOWN) {
-			if (*width == PCIE_LNK_WIDTH_UNKNOWN)
-				*width = cur_width;
-			else if (cur_width < *width)
-				*width = cur_width;
-		}
-		pdev = pci_upstream_bridge(pdev);
-	}
-}
-
 /**
  * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
  *
@@ -3715,8 +3781,8 @@ static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
 	if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
 		return;
 
-	amdgpu_device_get_min_pci_speed_width(adev, &platform_speed_cap,
-					      &platform_link_width);
+	pcie_bandwidth_available(adev->pdev, NULL,
+				 &platform_speed_cap, &platform_link_width);
 
 	if (adev->pm.pcie_gen_mask == 0) {
 		/* asic caps */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
index b083b219b1a9..535650967b1a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
@@ -23,7 +23,7 @@
  * Authors: Dave Airlie
  *          Alex Deucher
  */
-#include <drm/drmP.h>
+
 #include <drm/amdgpu_drm.h>
 #include "amdgpu.h"
 #include "amdgpu_i2c.h"
@@ -32,11 +32,13 @@
 #include "amdgpu_display.h"
 #include <asm/div64.h>
 
+#include <linux/pci.h>
 #include <linux/pm_runtime.h>
 #include <drm/drm_crtc_helper.h>
 #include <drm/drm_edid.h>
 #include <drm/drm_gem_framebuffer_helper.h>
 #include <drm/drm_fb_helper.h>
+#include <drm/drm_vblank.h>
 
 static void amdgpu_display_flip_callback(struct dma_fence *f,
 					 struct dma_fence_cb *cb)
@@ -631,10 +633,6 @@ int amdgpu_display_modeset_create_props(struct amdgpu_device *adev)
 					 amdgpu_dither_enum_list, sz);
 
 	if (amdgpu_device_has_dc_support(adev)) {
-		adev->mode_info.max_bpc_property =
-			drm_property_create_range(adev->ddev, 0, "max bpc", 8, 16);
-		if (!adev->mode_info.max_bpc_property)
-			return -ENOMEM;
 		adev->mode_info.abm_level_property =
 			drm_property_create_range(adev->ddev, 0,
 						"abm level", 0, 4);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
index a38e0fb4a6fe..489041df1f45 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
@@ -1,5 +1,5 @@
 /*
- * Copyright 2012 Advanced Micro Devices, Inc.
+ * Copyright 2019 Advanced Micro Devices, Inc.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -31,8 +31,6 @@
  * objects between different devices via PRIME <prime_buffer_sharing>`.
  */
 
-#include <drm/drmP.h>
-
 #include "amdgpu.h"
 #include "amdgpu_display.h"
 #include "amdgpu_gem.h"
@@ -103,7 +101,8 @@ void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr)
  * Returns:
  * 0 on success or a negative error code on failure.
  */
-int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma)
+int amdgpu_gem_prime_mmap(struct drm_gem_object *obj,
+			  struct vm_area_struct *vma)
 {
 	struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
 	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
@@ -137,57 +136,6 @@ int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma
 	return ret;
 }
 
-/**
- * amdgpu_gem_prime_import_sg_table - &drm_driver.gem_prime_import_sg_table
- * implementation
- * @dev: DRM device
- * @attach: DMA-buf attachment
- * @sg: Scatter/gather table
- *
- * Imports shared DMA buffer memory exported by another device.
- *
- * Returns:
- * A new GEM BO of the given DRM device, representing the memory
- * described by the given DMA-buf attachment and scatter/gather table.
- */
-struct drm_gem_object *
-amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
-				 struct dma_buf_attachment *attach,
-				 struct sg_table *sg)
-{
-	struct reservation_object *resv = attach->dmabuf->resv;
-	struct amdgpu_device *adev = dev->dev_private;
-	struct amdgpu_bo *bo;
-	struct amdgpu_bo_param bp;
-	int ret;
-
-	memset(&bp, 0, sizeof(bp));
-	bp.size = attach->dmabuf->size;
-	bp.byte_align = PAGE_SIZE;
-	bp.domain = AMDGPU_GEM_DOMAIN_CPU;
-	bp.flags = 0;
-	bp.type = ttm_bo_type_sg;
-	bp.resv = resv;
-	ww_mutex_lock(&resv->lock, NULL);
-	ret = amdgpu_bo_create(adev, &bp, &bo);
-	if (ret)
-		goto error;
-
-	bo->tbo.sg = sg;
-	bo->tbo.ttm->sg = sg;
-	bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT;
-	bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT;
-	if (attach->dmabuf->ops != &amdgpu_dmabuf_ops)
-		bo->prime_shared_count = 1;
-
-	ww_mutex_unlock(&resv->lock);
-	return &bo->gem_base;
-
-error:
-	ww_mutex_unlock(&resv->lock);
-	return ERR_PTR(ret);
-}
-
 static int
 __reservation_object_make_exclusive(struct reservation_object *obj)
 {
@@ -231,7 +179,7 @@ err_fences_put:
 }
 
 /**
- * amdgpu_gem_map_attach - &dma_buf_ops.attach implementation
+ * amdgpu_dma_buf_map_attach - &dma_buf_ops.attach implementation
  * @dma_buf: Shared DMA buffer
  * @attach: DMA-buf attachment
  *
@@ -242,8 +190,8 @@ err_fences_put:
  * Returns:
  * 0 on success or a negative error code on failure.
  */
-static int amdgpu_gem_map_attach(struct dma_buf *dma_buf,
-				 struct dma_buf_attachment *attach)
+static int amdgpu_dma_buf_map_attach(struct dma_buf *dma_buf,
+				     struct dma_buf_attachment *attach)
 {
 	struct drm_gem_object *obj = dma_buf->priv;
 	struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
@@ -291,15 +239,15 @@ error_detach:
 }
 
 /**
- * amdgpu_gem_map_detach - &dma_buf_ops.detach implementation
+ * amdgpu_dma_buf_map_detach - &dma_buf_ops.detach implementation
  * @dma_buf: Shared DMA buffer
  * @attach: DMA-buf attachment
  *
  * This is called when a shared DMA buffer no longer needs to be accessible by
  * another device. For now, simply unpins the buffer from GTT.
  */
-static void amdgpu_gem_map_detach(struct dma_buf *dma_buf,
-				  struct dma_buf_attachment *attach)
+static void amdgpu_dma_buf_map_detach(struct dma_buf *dma_buf,
+				      struct dma_buf_attachment *attach)
 {
 	struct drm_gem_object *obj = dma_buf->priv;
 	struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
@@ -334,7 +282,7 @@ struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *obj)
 }
 
 /**
- * amdgpu_gem_begin_cpu_access - &dma_buf_ops.begin_cpu_access implementation
+ * amdgpu_dma_buf_begin_cpu_access - &dma_buf_ops.begin_cpu_access implementation
  * @dma_buf: Shared DMA buffer
  * @direction: Direction of DMA transfer
  *
@@ -345,8 +293,8 @@ struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *obj)
  * Returns:
  * 0 on success or a negative error code on failure.
  */
-static int amdgpu_gem_begin_cpu_access(struct dma_buf *dma_buf,
-				       enum dma_data_direction direction)
+static int amdgpu_dma_buf_begin_cpu_access(struct dma_buf *dma_buf,
+					   enum dma_data_direction direction)
 {
 	struct amdgpu_bo *bo = gem_to_amdgpu_bo(dma_buf->priv);
 	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
@@ -374,12 +322,12 @@ static int amdgpu_gem_begin_cpu_access(struct dma_buf *dma_buf,
 }
 
 const struct dma_buf_ops amdgpu_dmabuf_ops = {
-	.attach = amdgpu_gem_map_attach,
-	.detach = amdgpu_gem_map_detach,
+	.attach = amdgpu_dma_buf_map_attach,
+	.detach = amdgpu_dma_buf_map_detach,
 	.map_dma_buf = drm_gem_map_dma_buf,
 	.unmap_dma_buf = drm_gem_unmap_dma_buf,
 	.release = drm_gem_dmabuf_release,
-	.begin_cpu_access = amdgpu_gem_begin_cpu_access,
+	.begin_cpu_access = amdgpu_dma_buf_begin_cpu_access,
 	.mmap = drm_gem_dmabuf_mmap,
 	.vmap = drm_gem_dmabuf_vmap,
 	.vunmap = drm_gem_dmabuf_vunmap,
@@ -418,6 +366,57 @@ struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,
 }
 
 /**
+ * amdgpu_gem_prime_import_sg_table - &drm_driver.gem_prime_import_sg_table
+ * implementation
+ * @dev: DRM device
+ * @attach: DMA-buf attachment
+ * @sg: Scatter/gather table
+ *
+ * Imports shared DMA buffer memory exported by another device.
+ *
+ * Returns:
+ * A new GEM BO of the given DRM device, representing the memory
+ * described by the given DMA-buf attachment and scatter/gather table.
+ */
+struct drm_gem_object *
+amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
+				 struct dma_buf_attachment *attach,
+				 struct sg_table *sg)
+{
+	struct reservation_object *resv = attach->dmabuf->resv;
+	struct amdgpu_device *adev = dev->dev_private;
+	struct amdgpu_bo *bo;
+	struct amdgpu_bo_param bp;
+	int ret;
+
+	memset(&bp, 0, sizeof(bp));
+	bp.size = attach->dmabuf->size;
+	bp.byte_align = PAGE_SIZE;
+	bp.domain = AMDGPU_GEM_DOMAIN_CPU;
+	bp.flags = 0;
+	bp.type = ttm_bo_type_sg;
+	bp.resv = resv;
+	ww_mutex_lock(&resv->lock, NULL);
+	ret = amdgpu_bo_create(adev, &bp, &bo);
+	if (ret)
+		goto error;
+
+	bo->tbo.sg = sg;
+	bo->tbo.ttm->sg = sg;
+	bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT;
+	bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT;
+	if (attach->dmabuf->ops != &amdgpu_dmabuf_ops)
+		bo->prime_shared_count = 1;
+
+	ww_mutex_unlock(&resv->lock);
+	return &bo->gem_base;
+
+error:
+	ww_mutex_unlock(&resv->lock);
+	return ERR_PTR(ret);
+}
+
+/**
  * amdgpu_gem_prime_import - &drm_driver.gem_prime_import implementation
  * @dev: DRM device
  * @dma_buf: Shared DMA buffer
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h
new file mode 100644
index 000000000000..c7056cbe8685
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __AMDGPU_DMA_BUF_H__
+#define __AMDGPU_DMA_BUF_H__
+
+#include <drm/drm_gem.h>
+
+struct sg_table *amdgpu_gem_prime_get_sg_table(struct drm_gem_object *obj);
+struct drm_gem_object *
+amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
+				 struct dma_buf_attachment *attach,
+				 struct sg_table *sg);
+struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,
+					struct drm_gem_object *gobj,
+					int flags);
+struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev,
+					    struct dma_buf *dma_buf);
+struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *);
+void *amdgpu_gem_prime_vmap(struct drm_gem_object *obj);
+void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr);
+int amdgpu_gem_prime_mmap(struct drm_gem_object *obj,
+			  struct vm_area_struct *vma);
+
+extern const struct dma_buf_ops amdgpu_dmabuf_ops;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c
index 523b8ab6b04e..eedecaf4c804 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c
@@ -22,7 +22,6 @@
  * Authors: Alex Deucher
  */
 
-#include <drm/drmP.h>
 #include "amdgpu.h"
 #include "amdgpu_atombios.h"
 #include "amdgpu_i2c.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
index dca35407879d..521dbd0d9af8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
@@ -75,6 +75,20 @@ struct amdgpu_dpm_thermal {
 	int                min_temp;
 	/* high temperature threshold */
 	int                max_temp;
+	/* edge max emergency(shutdown) temp */
+	int                max_edge_emergency_temp;
+	/* hotspot low temperature threshold */
+	int                min_hotspot_temp;
+	/* hotspot high temperature critical threshold */
+	int                max_hotspot_crit_temp;
+	/* hotspot max emergency(shutdown) temp */
+	int                max_hotspot_emergency_temp;
+	/* memory low temperature threshold */
+	int                min_mem_temp;
+	/* memory high temperature critical threshold */
+	int                max_mem_crit_temp;
+	/* memory max emergency(shutdown) temp */
+	int                max_mem_emergency_temp;
 	/* was last interrupt low to high or high to low */
 	bool               high_to_low;
 	/* interrupt source */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 1e2cc9d68a05..0a577a389024 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -22,21 +22,23 @@
  * OTHER DEALINGS IN THE SOFTWARE.
  */
 
-#include <drm/drmP.h>
 #include <drm/amdgpu_drm.h>
+#include <drm/drm_drv.h>
 #include <drm/drm_gem.h>
+#include <drm/drm_vblank.h>
 #include "amdgpu_drv.h"
 
 #include <drm/drm_pciids.h>
 #include <linux/console.h>
 #include <linux/module.h>
+#include <linux/pci.h>
 #include <linux/pm_runtime.h>
 #include <linux/vga_switcheroo.h>
 #include <drm/drm_probe_helper.h>
 
 #include "amdgpu.h"
 #include "amdgpu_irq.h"
-#include "amdgpu_gem.h"
+#include "amdgpu_dma_buf.h"
 
 #include "amdgpu_amdkfd.h"
 
@@ -81,6 +83,8 @@
 #define KMS_DRIVER_MINOR	32
 #define KMS_DRIVER_PATCHLEVEL	0
 
+#define AMDGPU_MAX_TIMEOUT_PARAM_LENTH	256
+
 int amdgpu_vram_limit = 0;
 int amdgpu_vis_vram_limit = 0;
 int amdgpu_gart_size = -1; /* auto */
@@ -93,7 +97,7 @@ int amdgpu_disp_priority = 0;
 int amdgpu_hw_i2c = 0;
 int amdgpu_pcie_gen2 = -1;
 int amdgpu_msi = -1;
-int amdgpu_lockup_timeout = 10000;
+char amdgpu_lockup_timeout[AMDGPU_MAX_TIMEOUT_PARAM_LENTH];
 int amdgpu_dpm = -1;
 int amdgpu_fw_load_type = -1;
 int amdgpu_aspm = -1;
@@ -227,12 +231,21 @@ MODULE_PARM_DESC(msi, "MSI support (1 = enable, 0 = disable, -1 = auto)");
 module_param_named(msi, amdgpu_msi, int, 0444);
 
 /**
- * DOC: lockup_timeout (int)
- * Set GPU scheduler timeout value in ms. Value 0 is invalidated, will be adjusted to 10000.
- * Negative values mean 'infinite timeout' (MAX_JIFFY_OFFSET). The default is 10000.
- */
-MODULE_PARM_DESC(lockup_timeout, "GPU lockup timeout in ms > 0 (default 10000)");
-module_param_named(lockup_timeout, amdgpu_lockup_timeout, int, 0444);
+ * DOC: lockup_timeout (string)
+ * Set GPU scheduler timeout value in ms.
+ *
+ * The format can be [Non-Compute] or [GFX,Compute,SDMA,Video]. That is there can be one or
+ * multiple values specified. 0 and negative values are invalidated. They will be adjusted
+ * to default timeout.
+ *  - With one value specified, the setting will apply to all non-compute jobs.
+ *  - With multiple values specified, the first one will be for GFX. The second one is for Compute.
+ *    And the third and fourth ones are for SDMA and Video.
+ * By default(with no lockup_timeout settings), the timeout for all non-compute(GFX, SDMA and Video)
+ * jobs is 10000. And there is no timeout enforced on compute jobs.
+ */
+MODULE_PARM_DESC(lockup_timeout, "GPU lockup timeout in ms (default: 10000 for non-compute jobs and no timeout for compute jobs), "
+		"format is [Non-Compute] or [GFX,Compute,SDMA,Video]");
+module_param_string(lockup_timeout, amdgpu_lockup_timeout, sizeof(amdgpu_lockup_timeout), 0444);
 
 /**
  * DOC: dpm (int)
@@ -655,6 +668,16 @@ MODULE_PARM_DESC(noretry,
 int halt_if_hws_hang;
 module_param(halt_if_hws_hang, int, 0644);
 MODULE_PARM_DESC(halt_if_hws_hang, "Halt if HWS hang is detected (0 = off (default), 1 = on)");
+
+/**
+ * DOC: hws_gws_support(bool)
+ * Whether HWS support gws barriers. Default value: false (not supported)
+ * This will be replaced with a MEC firmware version check once firmware
+ * is ready
+ */
+bool hws_gws_support;
+module_param(hws_gws_support, bool, 0444);
+MODULE_PARM_DESC(hws_gws_support, "MEC FW support gws barriers (false = not supported (Default), true = supported)");
 #endif
 
 /**
@@ -1216,6 +1239,62 @@ int amdgpu_file_to_fpriv(struct file *filp, struct amdgpu_fpriv **fpriv)
 	return 0;
 }
 
+int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
+{
+	char *input = amdgpu_lockup_timeout;
+	char *timeout_setting = NULL;
+	int index = 0;
+	long timeout;
+	int ret = 0;
+
+	/*
+	 * By default timeout for non compute jobs is 10000.
+	 * And there is no timeout enforced on compute jobs.
+	 */
+	adev->gfx_timeout = adev->sdma_timeout = adev->video_timeout = 10000;
+	adev->compute_timeout = MAX_SCHEDULE_TIMEOUT;
+
+	if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENTH)) {
+		while ((timeout_setting = strsep(&input, ",")) &&
+				strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENTH)) {
+			ret = kstrtol(timeout_setting, 0, &timeout);
+			if (ret)
+				return ret;
+
+			/* Invalidate 0 and negative values */
+			if (timeout <= 0) {
+				index++;
+				continue;
+			}
+
+			switch (index++) {
+			case 0:
+				adev->gfx_timeout = timeout;
+				break;
+			case 1:
+				adev->compute_timeout = timeout;
+				break;
+			case 2:
+				adev->sdma_timeout = timeout;
+				break;
+			case 3:
+				adev->video_timeout = timeout;
+				break;
+			default:
+				break;
+			}
+		}
+		/*
+		 * There is only one value specified and
+		 * it should apply to all non-compute jobs.
+		 */
+		if (index == 1)
+			adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
+	}
+
+	return ret;
+}
+
 static bool
 amdgpu_get_crtc_scanout_position(struct drm_device *dev, unsigned int pipe,
 				 bool in_vblank_irq, int *vpos, int *hpos,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_encoders.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_encoders.c
index ec78e2b2015c..571a6dfb473e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_encoders.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_encoders.c
@@ -23,7 +23,7 @@
  * Authors: Dave Airlie
  *          Alex Deucher
  */
-#include <drm/drmP.h>
+
 #include <drm/drm_crtc_helper.h>
 #include <drm/amdgpu_drm.h>
 #include "amdgpu.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
index e47609218839..eb3569b46c1e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
@@ -23,22 +23,22 @@
  * Authors:
  *     David Airlie
  */
+
 #include <linux/module.h>
-#include <linux/slab.h>
 #include <linux/pm_runtime.h>
+#include <linux/slab.h>
+#include <linux/vga_switcheroo.h>
 
-#include <drm/drmP.h>
+#include <drm/amdgpu_drm.h>
 #include <drm/drm_crtc.h>
 #include <drm/drm_crtc_helper.h>
-#include <drm/amdgpu_drm.h>
+#include <drm/drm_fb_helper.h>
+#include <drm/drm_fourcc.h>
+
 #include "amdgpu.h"
 #include "cikd.h"
 #include "amdgpu_gem.h"
 
-#include <drm/drm_fb_helper.h>
-
-#include <linux/vga_switcheroo.h>
-
 #include "amdgpu_display.h"
 
 /* object hierarchy -
@@ -121,6 +121,7 @@ static int amdgpufb_create_pinned_object(struct amdgpu_fbdev *rfbdev,
 					 struct drm_mode_fb_cmd2 *mode_cmd,
 					 struct drm_gem_object **gobj_p)
 {
+	const struct drm_format_info *info;
 	struct amdgpu_device *adev = rfbdev->adev;
 	struct drm_gem_object *gobj = NULL;
 	struct amdgpu_bo *abo = NULL;
@@ -131,7 +132,8 @@ static int amdgpufb_create_pinned_object(struct amdgpu_fbdev *rfbdev,
 	int height = mode_cmd->height;
 	u32 cpp;
 
-	cpp = drm_format_plane_cpp(mode_cmd->pixel_format, 0);
+	info = drm_get_format_info(adev->ddev, mode_cmd);
+	cpp = info->cpp[0];
 
 	/* need to align pitch with crtc limits */
 	mode_cmd->pitches[0] = amdgpu_align_pitch(adev, mode_cmd->width, cpp,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index 4dee2326b29c..df49fa4bbf61 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -34,7 +34,9 @@
 #include <linux/kref.h>
 #include <linux/slab.h>
 #include <linux/firmware.h>
-#include <drm/drmP.h>
+
+#include <drm/drm_debugfs.h>
+
 #include "amdgpu.h"
 #include "amdgpu_trace.h"
 
@@ -427,9 +429,13 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
 int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring,
 				  unsigned num_hw_submission)
 {
+	struct amdgpu_device *adev = ring->adev;
 	long timeout;
 	int r;
 
+	if (!adev)
+		return -EINVAL;
+
 	/* Check that num_hw_submission is a power of two */
 	if ((num_hw_submission & (num_hw_submission - 1)) != 0)
 		return -EINVAL;
@@ -451,12 +457,31 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring,
 
 	/* No need to setup the GPU scheduler for KIQ ring */
 	if (ring->funcs->type != AMDGPU_RING_TYPE_KIQ) {
-		/* for non-sriov case, no timeout enforce on compute ring */
-		if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE)
-				&& !amdgpu_sriov_vf(ring->adev))
-			timeout = MAX_SCHEDULE_TIMEOUT;
-		else
-			timeout = msecs_to_jiffies(amdgpu_lockup_timeout);
+		switch (ring->funcs->type) {
+		case AMDGPU_RING_TYPE_GFX:
+			timeout = adev->gfx_timeout;
+			break;
+		case AMDGPU_RING_TYPE_COMPUTE:
+			/*
+			 * For non-sriov case, no timeout enforce
+			 * on compute ring by default. Unless user
+			 * specifies a timeout for compute ring.
+			 *
+			 * For sriov case, always use the timeout
+			 * as gfx ring
+			 */
+			if (!amdgpu_sriov_vf(ring->adev))
+				timeout = adev->compute_timeout;
+			else
+				timeout = adev->gfx_timeout;
+			break;
+		case AMDGPU_RING_TYPE_SDMA:
+			timeout = adev->sdma_timeout;
+			break;
+		default:
+			timeout = adev->video_timeout;
+			break;
+		}
 
 		r = drm_sched_init(&ring->sched, &amdgpu_sched_ops,
 				   num_hw_submission, amdgpu_job_hang_limit,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
index 6d11e1721147..d79ab1da9e07 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
@@ -25,7 +25,10 @@
  *          Alex Deucher
  *          Jerome Glisse
  */
-#include <drm/drmP.h>
+
+#include <linux/pci.h>
+#include <linux/vmalloc.h>
+
 #include <drm/amdgpu_drm.h>
 #ifdef CONFIG_X86
 #include <asm/set_memory.h>
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h
index f89f5734d985..dad2186f4ed5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h
@@ -27,26 +27,11 @@
 struct amdgpu_ring;
 struct amdgpu_bo;
 
-struct amdgpu_gds_asic_info {
-	uint32_t	total_size;
-	uint32_t	gfx_partition_size;
-	uint32_t	cs_partition_size;
-};
-
 struct amdgpu_gds {
-	struct amdgpu_gds_asic_info	mem;
-	struct amdgpu_gds_asic_info	gws;
-	struct amdgpu_gds_asic_info	oa;
+	uint32_t gds_size;
+	uint32_t gws_size;
+	uint32_t oa_size;
 	uint32_t			gds_compute_max_wave_id;
-
-	/* At present, GDS, GWS and OA resources for gfx (graphics)
-	 * is always pre-allocated and available for graphics operation.
-	 * Such resource is shared between all gfx clients.
-	 * TODO: move this operation to user space
-	 * */
-	struct amdgpu_bo*		gds_gfx_bo;
-	struct amdgpu_bo*		gws_gfx_bo;
-	struct amdgpu_bo*		oa_gfx_bo;
 };
 
 struct amdgpu_gds_reg_offset {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index d4fcf5475464..37b526c6f494 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -26,9 +26,13 @@
  *          Jerome Glisse
  */
 #include <linux/ktime.h>
+#include <linux/module.h>
 #include <linux/pagemap.h>
-#include <drm/drmP.h>
+#include <linux/pci.h>
+
 #include <drm/amdgpu_drm.h>
+#include <drm/drm_debugfs.h>
+
 #include "amdgpu.h"
 #include "amdgpu_display.h"
 #include "amdgpu_xgmi.h"
@@ -171,7 +175,7 @@ void amdgpu_gem_object_close(struct drm_gem_object *obj,
 
 	amdgpu_vm_get_pd_bo(vm, &list, &vm_pd);
 
-	r = ttm_eu_reserve_buffers(&ticket, &list, false, &duplicates);
+	r = ttm_eu_reserve_buffers(&ticket, &list, false, &duplicates, true);
 	if (r) {
 		dev_err(adev->dev, "leaking bo va because "
 			"we fail to reserve bo (%d)\n", r);
@@ -330,26 +334,24 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,
 
 		r = amdgpu_bo_reserve(bo, true);
 		if (r)
-			goto free_pages;
+			goto user_pages_done;
 
 		amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);
 		r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
 		amdgpu_bo_unreserve(bo);
 		if (r)
-			goto free_pages;
+			goto user_pages_done;
 	}
 
 	r = drm_gem_handle_create(filp, gobj, &handle);
-	/* drop reference from allocate - handle holds it now */
-	drm_gem_object_put_unlocked(gobj);
 	if (r)
-		return r;
+		goto user_pages_done;
 
 	args->handle = handle;
-	return 0;
 
-free_pages:
-	release_pages(bo->tbo.ttm->pages, bo->tbo.ttm->num_pages);
+user_pages_done:
+	if (args->flags & AMDGPU_GEM_USERPTR_VALIDATE)
+		amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
 
 release_object:
 	drm_gem_object_put_unlocked(gobj);
@@ -610,7 +612,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
 
 	amdgpu_vm_get_pd_bo(&fpriv->vm, &list, &vm_pd);
 
-	r = ttm_eu_reserve_buffers(&ticket, &list, true, &duplicates);
+	r = ttm_eu_reserve_buffers(&ticket, &list, true, &duplicates, true);
 	if (r)
 		goto error_unref;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h
index f1ddfc50bcc7..b8ba6e27c61f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h
@@ -39,22 +39,6 @@ int amdgpu_gem_object_open(struct drm_gem_object *obj,
 void amdgpu_gem_object_close(struct drm_gem_object *obj,
 				struct drm_file *file_priv);
 unsigned long amdgpu_gem_timeout(uint64_t timeout_ns);
-struct sg_table *amdgpu_gem_prime_get_sg_table(struct drm_gem_object *obj);
-struct drm_gem_object *
-amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
-				 struct dma_buf_attachment *attach,
-				 struct sg_table *sg);
-struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,
-					struct drm_gem_object *gobj,
-					int flags);
-struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev,
-					    struct dma_buf *dma_buf);
-struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *);
-void *amdgpu_gem_prime_vmap(struct drm_gem_object *obj);
-void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr);
-int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma);
-
-extern const struct dma_buf_ops amdgpu_dmabuf_ops;
 
 /*
  * GEM objects.
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index 997932ebbb83..f198185c1fb6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -22,7 +22,7 @@
  * OTHER DEALINGS IN THE SOFTWARE.
  *
  */
-#include <drm/drmP.h>
+
 #include "amdgpu.h"
 #include "amdgpu_gfx.h"
 #include "amdgpu_rlc.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index 250d9212cc38..924d83e711ef 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -24,6 +24,8 @@
  *
  */
 
+#include <linux/io-64-nonatomic-lo-hi.h>
+
 #include "amdgpu.h"
 
 /**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
index 62591d081856..627104401e84 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
@@ -22,7 +22,6 @@
  * Authors: Christian König
  */
 
-#include <drm/drmP.h>
 #include "amdgpu.h"
 
 struct amdgpu_gtt_mgr {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c
index f2739995c335..70dbe343f51d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c
@@ -23,9 +23,10 @@
  * Authors: Dave Airlie
  *          Alex Deucher
  */
+
 #include <linux/export.h>
+#include <linux/pci.h>
 
-#include <drm/drmP.h>
 #include <drm/drm_edid.h>
 #include <drm/amdgpu_drm.h>
 #include "amdgpu.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index fe393a46f881..c124e583bb91 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -28,8 +28,10 @@
  */
 #include <linux/seq_file.h>
 #include <linux/slab.h>
-#include <drm/drmP.h>
+
 #include <drm/amdgpu_drm.h>
+#include <drm/drm_debugfs.h>
+
 #include "amdgpu.h"
 #include "atom.h"
 #include "amdgpu_trace.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
index df9b173c3d0b..df6d33381f8b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
@@ -24,7 +24,7 @@
 
 #include <linux/idr.h>
 #include <linux/dma-fence-array.h>
-#include <drm/drmP.h>
+
 
 #include "amdgpu.h"
 #include "amdgpu_trace.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
index 934dfdcb4e73..6d8f05511aba 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
@@ -21,7 +21,8 @@
  *
  */
 
-#include <drm/drmP.h>
+#include <linux/dma-mapping.h>
+
 #include "amdgpu.h"
 #include "amdgpu_ih.h"
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ioc32.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ioc32.c
index 26482914dc4b..5cf142e849bb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ioc32.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ioc32.c
@@ -29,8 +29,9 @@
  */
 #include <linux/compat.h>
 
-#include <drm/drmP.h>
 #include <drm/amdgpu_drm.h>
+#include <drm/drm_ioctl.h>
+
 #include "amdgpu_drv.h"
 
 long amdgpu_kms_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
index af4c3b1af322..2a3f5ec298db 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
@@ -43,8 +43,11 @@
  */
 
 #include <linux/irq.h>
-#include <drm/drmP.h>
+#include <linux/pci.h>
+
 #include <drm/drm_crtc_helper.h>
+#include <drm/drm_irq.h>
+#include <drm/drm_vblank.h>
 #include <drm/amdgpu_drm.h>
 #include "amdgpu.h"
 #include "amdgpu_ih.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index 0a17fb1af204..9d76e0923a5a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -24,7 +24,7 @@
 #include <linux/kthread.h>
 #include <linux/wait.h>
 #include <linux/sched.h>
-#include <drm/drmP.h>
+
 #include "amdgpu.h"
 #include "amdgpu_trace.h"
 
@@ -51,6 +51,8 @@ static void amdgpu_job_timedout(struct drm_sched_job *s_job)
 
 	if (amdgpu_device_should_recover_gpu(ring->adev))
 		amdgpu_device_gpu_recover(ring->adev, job);
+	else
+		drm_sched_suspend_timeout(&ring->sched);
 }
 
 int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index b17d0545728e..a70e5a32749a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -25,8 +25,9 @@
  *          Alex Deucher
  *          Jerome Glisse
  */
-#include <drm/drmP.h>
+
 #include "amdgpu.h"
+#include <drm/drm_debugfs.h>
 #include <drm/amdgpu_drm.h>
 #include "amdgpu_sched.h"
 #include "amdgpu_uvd.h"
@@ -35,6 +36,8 @@
 
 #include <linux/vga_switcheroo.h>
 #include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/pci.h>
 #include <linux/pm_runtime.h>
 #include "amdgpu_amdkfd.h"
 #include "amdgpu_gem.h"
@@ -590,13 +593,10 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
 		struct drm_amdgpu_info_gds gds_info;
 
 		memset(&gds_info, 0, sizeof(gds_info));
-		gds_info.gds_gfx_partition_size = adev->gds.mem.gfx_partition_size;
-		gds_info.compute_partition_size = adev->gds.mem.cs_partition_size;
-		gds_info.gds_total_size = adev->gds.mem.total_size;
-		gds_info.gws_per_gfx_partition = adev->gds.gws.gfx_partition_size;
-		gds_info.gws_per_compute_partition = adev->gds.gws.cs_partition_size;
-		gds_info.oa_per_gfx_partition = adev->gds.oa.gfx_partition_size;
-		gds_info.oa_per_compute_partition = adev->gds.oa.cs_partition_size;
+		gds_info.compute_partition_size = adev->gds.gds_size;
+		gds_info.gds_total_size = adev->gds.gds_size;
+		gds_info.gws_per_compute_partition = adev->gds.gws_size;
+		gds_info.oa_per_compute_partition = adev->gds.oa_size;
 		return copy_to_user(out, &gds_info,
 				    min((size_t)size, sizeof(gds_info))) ? -EFAULT : 0;
 	}
@@ -977,7 +977,7 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
 	int r, pasid;
 
 	/* Ensure IB tests are run on ring */
-	flush_delayed_work(&adev->late_init_work);
+	flush_delayed_work(&adev->delayed_init_work);
 
 	file_priv->driver_priv = NULL;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
index 58ed401c5996..4ff4cf5988ea 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
@@ -45,9 +45,9 @@
 
 #include <linux/firmware.h>
 #include <linux/module.h>
-#include <linux/mmu_notifier.h>
+#include <linux/hmm.h>
 #include <linux/interval_tree.h>
-#include <drm/drmP.h>
+
 #include <drm/drm.h>
 
 #include "amdgpu.h"
@@ -58,14 +58,12 @@
  *
  * @adev: amdgpu device pointer
  * @mm: process address space
- * @mn: MMU notifier structure
  * @type: type of MMU notifier
  * @work: destruction work item
  * @node: hash table node to find structure by adev and mn
  * @lock: rw semaphore protecting the notifier nodes
  * @objects: interval tree containing amdgpu_mn_nodes
- * @read_lock: mutex for recursive locking of @lock
- * @recursion: depth of recursion
+ * @mirror: HMM mirror function support
  *
  * Data for each amdgpu device and process address space.
  */
@@ -73,7 +71,6 @@ struct amdgpu_mn {
 	/* constant after initialisation */
 	struct amdgpu_device	*adev;
 	struct mm_struct	*mm;
-	struct mmu_notifier	mn;
 	enum amdgpu_mn_type	type;
 
 	/* only used on destruction */
@@ -85,8 +82,9 @@ struct amdgpu_mn {
 	/* objects protected by lock */
 	struct rw_semaphore	lock;
 	struct rb_root_cached	objects;
-	struct mutex		read_lock;
-	atomic_t		recursion;
+
+	/* HMM mirror */
+	struct hmm_mirror	mirror;
 };
 
 /**
@@ -103,7 +101,7 @@ struct amdgpu_mn_node {
 };
 
 /**
- * amdgpu_mn_destroy - destroy the MMU notifier
+ * amdgpu_mn_destroy - destroy the HMM mirror
  *
  * @work: previously sheduled work item
  *
@@ -129,28 +127,26 @@ static void amdgpu_mn_destroy(struct work_struct *work)
 	}
 	up_write(&amn->lock);
 	mutex_unlock(&adev->mn_lock);
-	mmu_notifier_unregister_no_release(&amn->mn, amn->mm);
+
+	hmm_mirror_unregister(&amn->mirror);
 	kfree(amn);
 }
 
 /**
- * amdgpu_mn_release - callback to notify about mm destruction
+ * amdgpu_hmm_mirror_release - callback to notify about mm destruction
  *
- * @mn: our notifier
- * @mm: the mm this callback is about
+ * @mirror: the HMM mirror (mm) this callback is about
  *
- * Shedule a work item to lazy destroy our notifier.
+ * Shedule a work item to lazy destroy HMM mirror.
  */
-static void amdgpu_mn_release(struct mmu_notifier *mn,
-			      struct mm_struct *mm)
+static void amdgpu_hmm_mirror_release(struct hmm_mirror *mirror)
 {
-	struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
+	struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror);
 
 	INIT_WORK(&amn->work, amdgpu_mn_destroy);
 	schedule_work(&amn->work);
 }
 
-
 /**
  * amdgpu_mn_lock - take the write side lock for this notifier
  *
@@ -181,14 +177,10 @@ void amdgpu_mn_unlock(struct amdgpu_mn *mn)
 static int amdgpu_mn_read_lock(struct amdgpu_mn *amn, bool blockable)
 {
 	if (blockable)
-		mutex_lock(&amn->read_lock);
-	else if (!mutex_trylock(&amn->read_lock))
+		down_read(&amn->lock);
+	else if (!down_read_trylock(&amn->lock))
 		return -EAGAIN;
 
-	if (atomic_inc_return(&amn->recursion) == 1)
-		down_read_non_owner(&amn->lock);
-	mutex_unlock(&amn->read_lock);
-
 	return 0;
 }
 
@@ -199,8 +191,7 @@ static int amdgpu_mn_read_lock(struct amdgpu_mn *amn, bool blockable)
  */
 static void amdgpu_mn_read_unlock(struct amdgpu_mn *amn)
 {
-	if (atomic_dec_return(&amn->recursion) == 0)
-		up_read_non_owner(&amn->lock);
+	up_read(&amn->lock);
 }
 
 /**
@@ -229,149 +220,132 @@ static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node,
 			true, false, MAX_SCHEDULE_TIMEOUT);
 		if (r <= 0)
 			DRM_ERROR("(%ld) failed to wait for user bo\n", r);
-
-		amdgpu_ttm_tt_mark_user_pages(bo->tbo.ttm);
 	}
 }
 
 /**
- * amdgpu_mn_invalidate_range_start_gfx - callback to notify about mm change
+ * amdgpu_mn_sync_pagetables_gfx - callback to notify about mm change
  *
- * @mn: our notifier
- * @range: mmu notifier context
+ * @mirror: the hmm_mirror (mm) is about to update
+ * @update: the update start, end address
  *
  * Block for operations on BOs to finish and mark pages as accessed and
  * potentially dirty.
  */
-static int amdgpu_mn_invalidate_range_start_gfx(struct mmu_notifier *mn,
-			const struct mmu_notifier_range *range)
+static int amdgpu_mn_sync_pagetables_gfx(struct hmm_mirror *mirror,
+			const struct hmm_update *update)
 {
-	struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
+	struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror);
+	unsigned long start = update->start;
+	unsigned long end = update->end;
+	bool blockable = update->blockable;
 	struct interval_tree_node *it;
-	unsigned long end;
 
 	/* notification is exclusive, but interval is inclusive */
-	end = range->end - 1;
+	end -= 1;
 
 	/* TODO we should be able to split locking for interval tree and
 	 * amdgpu_mn_invalidate_node
 	 */
-	if (amdgpu_mn_read_lock(amn, mmu_notifier_range_blockable(range)))
+	if (amdgpu_mn_read_lock(amn, blockable))
 		return -EAGAIN;
 
-	it = interval_tree_iter_first(&amn->objects, range->start, end);
+	it = interval_tree_iter_first(&amn->objects, start, end);
 	while (it) {
 		struct amdgpu_mn_node *node;
 
-		if (!mmu_notifier_range_blockable(range)) {
+		if (!blockable) {
 			amdgpu_mn_read_unlock(amn);
 			return -EAGAIN;
 		}
 
 		node = container_of(it, struct amdgpu_mn_node, it);
-		it = interval_tree_iter_next(it, range->start, end);
+		it = interval_tree_iter_next(it, start, end);
 
-		amdgpu_mn_invalidate_node(node, range->start, end);
+		amdgpu_mn_invalidate_node(node, start, end);
 	}
 
+	amdgpu_mn_read_unlock(amn);
+
 	return 0;
 }
 
 /**
- * amdgpu_mn_invalidate_range_start_hsa - callback to notify about mm change
+ * amdgpu_mn_sync_pagetables_hsa - callback to notify about mm change
  *
- * @mn: our notifier
- * @mm: the mm this callback is about
- * @start: start of updated range
- * @end: end of updated range
+ * @mirror: the hmm_mirror (mm) is about to update
+ * @update: the update start, end address
  *
  * We temporarily evict all BOs between start and end. This
  * necessitates evicting all user-mode queues of the process. The BOs
  * are restorted in amdgpu_mn_invalidate_range_end_hsa.
  */
-static int amdgpu_mn_invalidate_range_start_hsa(struct mmu_notifier *mn,
-			const struct mmu_notifier_range *range)
+static int amdgpu_mn_sync_pagetables_hsa(struct hmm_mirror *mirror,
+			const struct hmm_update *update)
 {
-	struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
+	struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror);
+	unsigned long start = update->start;
+	unsigned long end = update->end;
+	bool blockable = update->blockable;
 	struct interval_tree_node *it;
-	unsigned long end;
 
 	/* notification is exclusive, but interval is inclusive */
-	end = range->end - 1;
+	end -= 1;
 
-	if (amdgpu_mn_read_lock(amn, mmu_notifier_range_blockable(range)))
+	if (amdgpu_mn_read_lock(amn, blockable))
 		return -EAGAIN;
 
-	it = interval_tree_iter_first(&amn->objects, range->start, end);
+	it = interval_tree_iter_first(&amn->objects, start, end);
 	while (it) {
 		struct amdgpu_mn_node *node;
 		struct amdgpu_bo *bo;
 
-		if (!mmu_notifier_range_blockable(range)) {
+		if (!blockable) {
 			amdgpu_mn_read_unlock(amn);
 			return -EAGAIN;
 		}
 
 		node = container_of(it, struct amdgpu_mn_node, it);
-		it = interval_tree_iter_next(it, range->start, end);
+		it = interval_tree_iter_next(it, start, end);
 
 		list_for_each_entry(bo, &node->bos, mn_list) {
 			struct kgd_mem *mem = bo->kfd_bo;
 
 			if (amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm,
-							 range->start,
-							 end))
-				amdgpu_amdkfd_evict_userptr(mem, range->mm);
+							 start, end))
+				amdgpu_amdkfd_evict_userptr(mem, amn->mm);
 		}
 	}
 
+	amdgpu_mn_read_unlock(amn);
+
 	return 0;
 }
 
-/**
- * amdgpu_mn_invalidate_range_end - callback to notify about mm change
- *
- * @mn: our notifier
- * @mm: the mm this callback is about
- * @start: start of updated range
- * @end: end of updated range
- *
- * Release the lock again to allow new command submissions.
+/* Low bits of any reasonable mm pointer will be unused due to struct
+ * alignment. Use these bits to make a unique key from the mm pointer
+ * and notifier type.
  */
-static void amdgpu_mn_invalidate_range_end(struct mmu_notifier *mn,
-			const struct mmu_notifier_range *range)
-{
-	struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
-
-	amdgpu_mn_read_unlock(amn);
-}
+#define AMDGPU_MN_KEY(mm, type) ((unsigned long)(mm) + (type))
 
-static const struct mmu_notifier_ops amdgpu_mn_ops[] = {
+static struct hmm_mirror_ops amdgpu_hmm_mirror_ops[] = {
 	[AMDGPU_MN_TYPE_GFX] = {
-		.release = amdgpu_mn_release,
-		.invalidate_range_start = amdgpu_mn_invalidate_range_start_gfx,
-		.invalidate_range_end = amdgpu_mn_invalidate_range_end,
+		.sync_cpu_device_pagetables = amdgpu_mn_sync_pagetables_gfx,
+		.release = amdgpu_hmm_mirror_release
 	},
 	[AMDGPU_MN_TYPE_HSA] = {
-		.release = amdgpu_mn_release,
-		.invalidate_range_start = amdgpu_mn_invalidate_range_start_hsa,
-		.invalidate_range_end = amdgpu_mn_invalidate_range_end,
+		.sync_cpu_device_pagetables = amdgpu_mn_sync_pagetables_hsa,
+		.release = amdgpu_hmm_mirror_release
 	},
 };
 
-/* Low bits of any reasonable mm pointer will be unused due to struct
- * alignment. Use these bits to make a unique key from the mm pointer
- * and notifier type.
- */
-#define AMDGPU_MN_KEY(mm, type) ((unsigned long)(mm) + (type))
-
 /**
- * amdgpu_mn_get - create notifier context
+ * amdgpu_mn_get - create HMM mirror context
  *
  * @adev: amdgpu device pointer
  * @type: type of MMU notifier context
  *
- * Creates a notifier context for current->mm.
+ * Creates a HMM mirror context for current->mm.
  */
 struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
 				enum amdgpu_mn_type type)
@@ -401,12 +375,10 @@ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
 	amn->mm = mm;
 	init_rwsem(&amn->lock);
 	amn->type = type;
-	amn->mn.ops = &amdgpu_mn_ops[type];
 	amn->objects = RB_ROOT_CACHED;
-	mutex_init(&amn->read_lock);
-	atomic_set(&amn->recursion, 0);
 
-	r = __mmu_notifier_register(&amn->mn, mm);
+	amn->mirror.ops = &amdgpu_hmm_mirror_ops[type];
+	r = hmm_mirror_register(&amn->mirror, mm);
 	if (r)
 		goto free_amn;
 
@@ -432,7 +404,7 @@ free_amn:
  * @bo: amdgpu buffer object
  * @addr: userptr addr we should monitor
  *
- * Registers an MMU notifier for the given BO at the specified address.
+ * Registers an HMM mirror for the given BO at the specified address.
  * Returns 0 on success, -ERRNO if anything goes wrong.
  */
 int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
@@ -488,11 +460,11 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
 }
 
 /**
- * amdgpu_mn_unregister - unregister a BO for notifier updates
+ * amdgpu_mn_unregister - unregister a BO for HMM mirror updates
  *
  * @bo: amdgpu buffer object
  *
- * Remove any registration of MMU notifier updates from the buffer object.
+ * Remove any registration of HMM mirror updates from the buffer object.
  */
 void amdgpu_mn_unregister(struct amdgpu_bo *bo)
 {
@@ -528,3 +500,26 @@ void amdgpu_mn_unregister(struct amdgpu_bo *bo)
 	mutex_unlock(&adev->mn_lock);
 }
 
+/* flags used by HMM internal, not related to CPU/GPU PTE flags */
+static const uint64_t hmm_range_flags[HMM_PFN_FLAG_MAX] = {
+		(1 << 0), /* HMM_PFN_VALID */
+		(1 << 1), /* HMM_PFN_WRITE */
+		0 /* HMM_PFN_DEVICE_PRIVATE */
+};
+
+static const uint64_t hmm_range_values[HMM_PFN_VALUE_MAX] = {
+		0xfffffffffffffffeUL, /* HMM_PFN_ERROR */
+		0, /* HMM_PFN_NONE */
+		0xfffffffffffffffcUL /* HMM_PFN_SPECIAL */
+};
+
+void amdgpu_hmm_init_range(struct hmm_range *range)
+{
+	if (range) {
+		range->flags = hmm_range_flags;
+		range->values = hmm_range_values;
+		range->pfn_shift = PAGE_SHIFT;
+		range->pfns = NULL;
+		INIT_LIST_HEAD(&range->list);
+	}
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
index eb0f432f78fe..f5b67c63ed6b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
@@ -25,22 +25,24 @@
 #define __AMDGPU_MN_H__
 
 /*
- * MMU Notifier
+ * HMM mirror
  */
 struct amdgpu_mn;
+struct hmm_range;
 
 enum amdgpu_mn_type {
 	AMDGPU_MN_TYPE_GFX,
 	AMDGPU_MN_TYPE_HSA,
 };
 
-#if defined(CONFIG_MMU_NOTIFIER)
+#if defined(CONFIG_HMM_MIRROR)
 void amdgpu_mn_lock(struct amdgpu_mn *mn);
 void amdgpu_mn_unlock(struct amdgpu_mn *mn);
 struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
 				enum amdgpu_mn_type type);
 int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr);
 void amdgpu_mn_unregister(struct amdgpu_bo *bo);
+void amdgpu_hmm_init_range(struct hmm_range *range);
 #else
 static inline void amdgpu_mn_lock(struct amdgpu_mn *mn) {}
 static inline void amdgpu_mn_unlock(struct amdgpu_mn *mn) {}
@@ -51,6 +53,8 @@ static inline struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
 }
 static inline int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
 {
+	DRM_WARN_ONCE("HMM_MIRROR kernel config option is not enabled, "
+		      "add CONFIG_ZONE_DEVICE=y in config file to fix this\n");
 	return -ENODEV;
 }
 static inline void amdgpu_mn_unregister(struct amdgpu_bo *bo) {}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
index 2e9e3db778c6..eb9975f4decb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
@@ -331,8 +331,6 @@ struct amdgpu_mode_info {
 	struct drm_property *audio_property;
 	/* FMT dithering */
 	struct drm_property *dither_property;
-	/* maximum number of bits per channel for monitor color */
-	struct drm_property *max_bpc_property;
 	/* Adaptive Backlight Modulation (power feature) */
 	struct drm_property *abm_level_property;
 	/* hardcoded DFP edid from BIOS */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 93b2c5a48a71..16f96f2e3671 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -31,7 +31,7 @@
  */
 #include <linux/list.h>
 #include <linux/slab.h>
-#include <drm/drmP.h>
+
 #include <drm/amdgpu_drm.h>
 #include <drm/drm_cache.h>
 #include "amdgpu.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pll.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pll.c
index 8e67c1210d7c..1f2305b7bd13 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pll.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pll.c
@@ -20,7 +20,7 @@
  * OTHER DEALINGS IN THE SOFTWARE.
  *
  */
-#include <drm/drmP.h>
+
 #include <drm/amdgpu_drm.h>
 #include "amdgpu.h"
 #include "atom.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
index 039cfa2ec89d..194d0c75b072 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
@@ -22,7 +22,9 @@
  * Authors: Rafał Miłecki <zajec5@gmail.com>
  *          Alex Deucher <alexdeucher@gmail.com>
  */
-#include <drm/drmP.h>
+
+#include <drm/drm_debugfs.h>
+
 #include "amdgpu.h"
 #include "amdgpu_drv.h"
 #include "amdgpu_pm.h"
@@ -31,6 +33,7 @@
 #include "amdgpu_smu.h"
 #include "atom.h"
 #include <linux/power_supply.h>
+#include <linux/pci.h>
 #include <linux/hwmon.h>
 #include <linux/hwmon-sysfs.h>
 #include <linux/nospec.h>
@@ -67,6 +70,15 @@ static const struct cg_flag_name clocks[] = {
 	{0, NULL},
 };
 
+static const struct hwmon_temp_label {
+	enum PP_HWMON_TEMP channel;
+	const char *label;
+} temp_label[] = {
+	{PP_TEMP_EDGE, "edge"},
+	{PP_TEMP_JUNCTION, "junction"},
+	{PP_TEMP_MEM, "mem"},
+};
+
 void amdgpu_pm_acpi_event_handler(struct amdgpu_device *adev)
 {
 	if (adev->pm.dpm_enabled) {
@@ -758,7 +770,11 @@ static ssize_t amdgpu_set_ppfeature_status(struct device *dev,
 
 	pr_debug("featuremask = 0x%llx\n", featuremask);
 
-	if (adev->powerplay.pp_funcs->set_ppfeature_status) {
+	if (is_support_sw_smu(adev)) {
+		ret = smu_set_ppfeature_status(&adev->smu, featuremask);
+		if (ret)
+			return -EINVAL;
+	} else if (adev->powerplay.pp_funcs->set_ppfeature_status) {
 		ret = amdgpu_dpm_set_ppfeature_status(adev, featuremask);
 		if (ret)
 			return -EINVAL;
@@ -774,7 +790,9 @@ static ssize_t amdgpu_get_ppfeature_status(struct device *dev,
 	struct drm_device *ddev = dev_get_drvdata(dev);
 	struct amdgpu_device *adev = ddev->dev_private;
 
-	if (adev->powerplay.pp_funcs->get_ppfeature_status)
+	if (is_support_sw_smu(adev)) {
+		return smu_get_ppfeature_status(&adev->smu, buf);
+	} else if (adev->powerplay.pp_funcs->get_ppfeature_status)
 		return amdgpu_dpm_get_ppfeature_status(adev, buf);
 
 	return snprintf(buf, PAGE_SIZE, "\n");
@@ -1303,6 +1321,32 @@ static ssize_t amdgpu_get_busy_percent(struct device *dev,
 }
 
 /**
+ * DOC: mem_busy_percent
+ *
+ * The amdgpu driver provides a sysfs API for reading how busy the VRAM
+ * is as a percentage.  The file mem_busy_percent is used for this.
+ * The SMU firmware computes a percentage of load based on the
+ * aggregate activity level in the IP cores.
+ */
+static ssize_t amdgpu_get_memory_busy_percent(struct device *dev,
+		struct device_attribute *attr,
+		char *buf)
+{
+	struct drm_device *ddev = dev_get_drvdata(dev);
+	struct amdgpu_device *adev = ddev->dev_private;
+	int r, value, size = sizeof(value);
+
+	/* read the IP busy sensor */
+	r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_MEM_LOAD,
+				   (void *)&value, &size);
+
+	if (r)
+		return r;
+
+	return snprintf(buf, PAGE_SIZE, "%d\n", value);
+}
+
+/**
  * DOC: pcie_bw
  *
  * The amdgpu driver provides a sysfs API for estimating how much data
@@ -1327,6 +1371,29 @@ static ssize_t amdgpu_get_pcie_bw(struct device *dev,
 			count0, count1, pcie_get_mps(adev->pdev));
 }
 
+/**
+ * DOC: unique_id
+ *
+ * The amdgpu driver provides a sysfs API for providing a unique ID for the GPU
+ * The file unique_id is used for this.
+ * This will provide a Unique ID that will persist from machine to machine
+ *
+ * NOTE: This will only work for GFX9 and newer. This file will be absent
+ * on unsupported ASICs (GFX8 and older)
+ */
+static ssize_t amdgpu_get_unique_id(struct device *dev,
+		struct device_attribute *attr,
+		char *buf)
+{
+	struct drm_device *ddev = dev_get_drvdata(dev);
+	struct amdgpu_device *adev = ddev->dev_private;
+
+	if (adev->unique_id)
+		return snprintf(buf, PAGE_SIZE, "%016llx\n", adev->unique_id);
+
+	return 0;
+}
+
 static DEVICE_ATTR(power_dpm_state, S_IRUGO | S_IWUSR, amdgpu_get_dpm_state, amdgpu_set_dpm_state);
 static DEVICE_ATTR(power_dpm_force_performance_level, S_IRUGO | S_IWUSR,
 		   amdgpu_get_dpm_forced_performance_level,
@@ -1371,10 +1438,13 @@ static DEVICE_ATTR(pp_od_clk_voltage, S_IRUGO | S_IWUSR,
 		amdgpu_set_pp_od_clk_voltage);
 static DEVICE_ATTR(gpu_busy_percent, S_IRUGO,
 		amdgpu_get_busy_percent, NULL);
+static DEVICE_ATTR(mem_busy_percent, S_IRUGO,
+		amdgpu_get_memory_busy_percent, NULL);
 static DEVICE_ATTR(pcie_bw, S_IRUGO, amdgpu_get_pcie_bw, NULL);
 static DEVICE_ATTR(ppfeatures, S_IRUGO | S_IWUSR,
 		amdgpu_get_ppfeature_status,
 		amdgpu_set_ppfeature_status);
+static DEVICE_ATTR(unique_id, S_IRUGO, amdgpu_get_unique_id, NULL);
 
 static ssize_t amdgpu_hwmon_show_temp(struct device *dev,
 				      struct device_attribute *attr,
@@ -1382,6 +1452,7 @@ static ssize_t amdgpu_hwmon_show_temp(struct device *dev,
 {
 	struct amdgpu_device *adev = dev_get_drvdata(dev);
 	struct drm_device *ddev = adev->ddev;
+	int channel = to_sensor_dev_attr(attr)->index;
 	int r, temp, size = sizeof(temp);
 
 	/* Can't get temperature when the card is off */
@@ -1389,11 +1460,32 @@ static ssize_t amdgpu_hwmon_show_temp(struct device *dev,
 	     (ddev->switch_power_state != DRM_SWITCH_POWER_ON))
 		return -EINVAL;
 
-	/* get the temperature */
-	r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_TEMP,
-				   (void *)&temp, &size);
-	if (r)
-		return r;
+	if (channel >= PP_TEMP_MAX)
+		return -EINVAL;
+
+	switch (channel) {
+	case PP_TEMP_JUNCTION:
+		/* get current junction temperature */
+		r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_HOTSPOT_TEMP,
+					   (void *)&temp, &size);
+		if (r)
+			return r;
+		break;
+	case PP_TEMP_EDGE:
+		/* get current edge temperature */
+		r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_EDGE_TEMP,
+					   (void *)&temp, &size);
+		if (r)
+			return r;
+		break;
+	case PP_TEMP_MEM:
+		/* get current memory temperature */
+		r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_MEM_TEMP,
+					   (void *)&temp, &size);
+		if (r)
+			return r;
+		break;
+	}
 
 	return snprintf(buf, PAGE_SIZE, "%d\n", temp);
 }
@@ -1414,6 +1506,76 @@ static ssize_t amdgpu_hwmon_show_temp_thresh(struct device *dev,
 	return snprintf(buf, PAGE_SIZE, "%d\n", temp);
 }
 
+static ssize_t amdgpu_hwmon_show_hotspot_temp_thresh(struct device *dev,
+					     struct device_attribute *attr,
+					     char *buf)
+{
+	struct amdgpu_device *adev = dev_get_drvdata(dev);
+	int hyst = to_sensor_dev_attr(attr)->index;
+	int temp;
+
+	if (hyst)
+		temp = adev->pm.dpm.thermal.min_hotspot_temp;
+	else
+		temp = adev->pm.dpm.thermal.max_hotspot_crit_temp;
+
+	return snprintf(buf, PAGE_SIZE, "%d\n", temp);
+}
+
+static ssize_t amdgpu_hwmon_show_mem_temp_thresh(struct device *dev,
+					     struct device_attribute *attr,
+					     char *buf)
+{
+	struct amdgpu_device *adev = dev_get_drvdata(dev);
+	int hyst = to_sensor_dev_attr(attr)->index;
+	int temp;
+
+	if (hyst)
+		temp = adev->pm.dpm.thermal.min_mem_temp;
+	else
+		temp = adev->pm.dpm.thermal.max_mem_crit_temp;
+
+	return snprintf(buf, PAGE_SIZE, "%d\n", temp);
+}
+
+static ssize_t amdgpu_hwmon_show_temp_label(struct device *dev,
+					     struct device_attribute *attr,
+					     char *buf)
+{
+	int channel = to_sensor_dev_attr(attr)->index;
+
+	if (channel >= PP_TEMP_MAX)
+		return -EINVAL;
+
+	return snprintf(buf, PAGE_SIZE, "%s\n", temp_label[channel].label);
+}
+
+static ssize_t amdgpu_hwmon_show_temp_emergency(struct device *dev,
+					     struct device_attribute *attr,
+					     char *buf)
+{
+	struct amdgpu_device *adev = dev_get_drvdata(dev);
+	int channel = to_sensor_dev_attr(attr)->index;
+	int temp = 0;
+
+	if (channel >= PP_TEMP_MAX)
+		return -EINVAL;
+
+	switch (channel) {
+	case PP_TEMP_JUNCTION:
+		temp = adev->pm.dpm.thermal.max_hotspot_emergency_temp;
+		break;
+	case PP_TEMP_EDGE:
+		temp = adev->pm.dpm.thermal.max_edge_emergency_temp;
+		break;
+	case PP_TEMP_MEM:
+		temp = adev->pm.dpm.thermal.max_mem_emergency_temp;
+		break;
+	}
+
+	return snprintf(buf, PAGE_SIZE, "%d\n", temp);
+}
+
 static ssize_t amdgpu_hwmon_get_pwm1_enable(struct device *dev,
 					    struct device_attribute *attr,
 					    char *buf)
@@ -1983,11 +2145,20 @@ static ssize_t amdgpu_hwmon_show_mclk_label(struct device *dev,
  *
  * hwmon interfaces for GPU temperature:
  *
- * - temp1_input: the on die GPU temperature in millidegrees Celsius
+ * - temp[1-3]_input: the on die GPU temperature in millidegrees Celsius
+ *   - temp2_input and temp3_input are supported on SOC15 dGPUs only
  *
- * - temp1_crit: temperature critical max value in millidegrees Celsius
+ * - temp[1-3]_label: temperature channel label
+ *   - temp2_label and temp3_label are supported on SOC15 dGPUs only
  *
- * - temp1_crit_hyst: temperature hysteresis for critical limit in millidegrees Celsius
+ * - temp[1-3]_crit: temperature critical max value in millidegrees Celsius
+ *   - temp2_crit and temp3_crit are supported on SOC15 dGPUs only
+ *
+ * - temp[1-3]_crit_hyst: temperature hysteresis for critical limit in millidegrees Celsius
+ *   - temp2_crit_hyst and temp3_crit_hyst are supported on SOC15 dGPUs only
+ *
+ * - temp[1-3]_emergency: temperature emergency max value(asic shutdown) in millidegrees Celsius
+ *   - these are supported on SOC15 dGPUs only
  *
  * hwmon interfaces for GPU voltage:
  *
@@ -2035,9 +2206,21 @@ static ssize_t amdgpu_hwmon_show_mclk_label(struct device *dev,
  *
  */
 
-static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, amdgpu_hwmon_show_temp, NULL, 0);
+static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, amdgpu_hwmon_show_temp, NULL, PP_TEMP_EDGE);
 static SENSOR_DEVICE_ATTR(temp1_crit, S_IRUGO, amdgpu_hwmon_show_temp_thresh, NULL, 0);
 static SENSOR_DEVICE_ATTR(temp1_crit_hyst, S_IRUGO, amdgpu_hwmon_show_temp_thresh, NULL, 1);
+static SENSOR_DEVICE_ATTR(temp1_emergency, S_IRUGO, amdgpu_hwmon_show_temp_emergency, NULL, PP_TEMP_EDGE);
+static SENSOR_DEVICE_ATTR(temp2_input, S_IRUGO, amdgpu_hwmon_show_temp, NULL, PP_TEMP_JUNCTION);
+static SENSOR_DEVICE_ATTR(temp2_crit, S_IRUGO, amdgpu_hwmon_show_hotspot_temp_thresh, NULL, 0);
+static SENSOR_DEVICE_ATTR(temp2_crit_hyst, S_IRUGO, amdgpu_hwmon_show_hotspot_temp_thresh, NULL, 1);
+static SENSOR_DEVICE_ATTR(temp2_emergency, S_IRUGO, amdgpu_hwmon_show_temp_emergency, NULL, PP_TEMP_JUNCTION);
+static SENSOR_DEVICE_ATTR(temp3_input, S_IRUGO, amdgpu_hwmon_show_temp, NULL, PP_TEMP_MEM);
+static SENSOR_DEVICE_ATTR(temp3_crit, S_IRUGO, amdgpu_hwmon_show_mem_temp_thresh, NULL, 0);
+static SENSOR_DEVICE_ATTR(temp3_crit_hyst, S_IRUGO, amdgpu_hwmon_show_mem_temp_thresh, NULL, 1);
+static SENSOR_DEVICE_ATTR(temp3_emergency, S_IRUGO, amdgpu_hwmon_show_temp_emergency, NULL, PP_TEMP_MEM);
+static SENSOR_DEVICE_ATTR(temp1_label, S_IRUGO, amdgpu_hwmon_show_temp_label, NULL, PP_TEMP_EDGE);
+static SENSOR_DEVICE_ATTR(temp2_label, S_IRUGO, amdgpu_hwmon_show_temp_label, NULL, PP_TEMP_JUNCTION);
+static SENSOR_DEVICE_ATTR(temp3_label, S_IRUGO, amdgpu_hwmon_show_temp_label, NULL, PP_TEMP_MEM);
 static SENSOR_DEVICE_ATTR(pwm1, S_IRUGO | S_IWUSR, amdgpu_hwmon_get_pwm1, amdgpu_hwmon_set_pwm1, 0);
 static SENSOR_DEVICE_ATTR(pwm1_enable, S_IRUGO | S_IWUSR, amdgpu_hwmon_get_pwm1_enable, amdgpu_hwmon_set_pwm1_enable, 0);
 static SENSOR_DEVICE_ATTR(pwm1_min, S_IRUGO, amdgpu_hwmon_get_pwm1_min, NULL, 0);
@@ -2064,6 +2247,18 @@ static struct attribute *hwmon_attributes[] = {
 	&sensor_dev_attr_temp1_input.dev_attr.attr,
 	&sensor_dev_attr_temp1_crit.dev_attr.attr,
 	&sensor_dev_attr_temp1_crit_hyst.dev_attr.attr,
+	&sensor_dev_attr_temp2_input.dev_attr.attr,
+	&sensor_dev_attr_temp2_crit.dev_attr.attr,
+	&sensor_dev_attr_temp2_crit_hyst.dev_attr.attr,
+	&sensor_dev_attr_temp3_input.dev_attr.attr,
+	&sensor_dev_attr_temp3_crit.dev_attr.attr,
+	&sensor_dev_attr_temp3_crit_hyst.dev_attr.attr,
+	&sensor_dev_attr_temp1_emergency.dev_attr.attr,
+	&sensor_dev_attr_temp2_emergency.dev_attr.attr,
+	&sensor_dev_attr_temp3_emergency.dev_attr.attr,
+	&sensor_dev_attr_temp1_label.dev_attr.attr,
+	&sensor_dev_attr_temp2_label.dev_attr.attr,
+	&sensor_dev_attr_temp3_label.dev_attr.attr,
 	&sensor_dev_attr_pwm1.dev_attr.attr,
 	&sensor_dev_attr_pwm1_enable.dev_attr.attr,
 	&sensor_dev_attr_pwm1_min.dev_attr.attr,
@@ -2186,6 +2381,22 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj,
 	     attr == &sensor_dev_attr_freq2_label.dev_attr.attr))
 		return 0;
 
+	/* only SOC15 dGPUs support hotspot and mem temperatures */
+	if (((adev->flags & AMD_IS_APU) ||
+	     adev->asic_type < CHIP_VEGA10) &&
+	    (attr == &sensor_dev_attr_temp2_crit.dev_attr.attr ||
+	     attr == &sensor_dev_attr_temp2_crit_hyst.dev_attr.attr ||
+	     attr == &sensor_dev_attr_temp3_crit.dev_attr.attr ||
+	     attr == &sensor_dev_attr_temp3_crit_hyst.dev_attr.attr ||
+	     attr == &sensor_dev_attr_temp1_emergency.dev_attr.attr ||
+	     attr == &sensor_dev_attr_temp2_emergency.dev_attr.attr ||
+	     attr == &sensor_dev_attr_temp3_emergency.dev_attr.attr ||
+	     attr == &sensor_dev_attr_temp2_input.dev_attr.attr ||
+	     attr == &sensor_dev_attr_temp3_input.dev_attr.attr ||
+	     attr == &sensor_dev_attr_temp2_label.dev_attr.attr ||
+	     attr == &sensor_dev_attr_temp3_label.dev_attr.attr))
+		return 0;
+
 	return effective_mode;
 }
 
@@ -2492,7 +2703,7 @@ void amdgpu_pm_print_power_states(struct amdgpu_device *adev)
 
 int amdgpu_pm_load_smu_firmware(struct amdgpu_device *adev, uint32_t *smu_version)
 {
-	int r = -EINVAL;
+	int r;
 
 	if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->load_firmware) {
 		r = adev->powerplay.pp_funcs->load_firmware(adev->powerplay.pp_handle);
@@ -2502,7 +2713,7 @@ int amdgpu_pm_load_smu_firmware(struct amdgpu_device *adev, uint32_t *smu_versio
 		}
 		*smu_version = adev->pm.fw_version;
 	}
-	return r;
+	return 0;
 }
 
 int amdgpu_pm_sysfs_init(struct amdgpu_device *adev)
@@ -2627,6 +2838,16 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev)
 				"gpu_busy_level\n");
 		return ret;
 	}
+	/* APU does not have its own dedicated memory */
+	if (!(adev->flags & AMD_IS_APU)) {
+		ret = device_create_file(adev->dev,
+				&dev_attr_mem_busy_percent);
+		if (ret) {
+			DRM_ERROR("failed to create device file	"
+					"mem_busy_percent\n");
+			return ret;
+		}
+	}
 	/* PCIe Perf counters won't work on APU nodes */
 	if (!(adev->flags & AMD_IS_APU)) {
 		ret = device_create_file(adev->dev, &dev_attr_pcie_bw);
@@ -2635,6 +2856,12 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev)
 			return ret;
 		}
 	}
+	if (adev->unique_id)
+		ret = device_create_file(adev->dev, &dev_attr_unique_id);
+	if (ret) {
+		DRM_ERROR("failed to create device file unique_id\n");
+		return ret;
+	}
 	ret = amdgpu_debugfs_pm_init(adev);
 	if (ret) {
 		DRM_ERROR("Failed to register debugfs file for dpm!\n");
@@ -2693,7 +2920,11 @@ void amdgpu_pm_sysfs_fini(struct amdgpu_device *adev)
 				&dev_attr_pp_od_clk_voltage);
 	device_remove_file(adev->dev, &dev_attr_gpu_busy_percent);
 	if (!(adev->flags & AMD_IS_APU))
+		device_remove_file(adev->dev, &dev_attr_mem_busy_percent);
+	if (!(adev->flags & AMD_IS_APU))
 		device_remove_file(adev->dev, &dev_attr_pcie_bw);
+	if (adev->unique_id)
+		device_remove_file(adev->dev, &dev_attr_unique_id);
 	if ((adev->asic_type >= CHIP_VEGA10) &&
 	    !(adev->flags & AMD_IS_APU))
 		device_remove_file(adev->dev, &dev_attr_ppfeatures);
@@ -2790,6 +3021,10 @@ static int amdgpu_debugfs_pm_info_pp(struct seq_file *m, struct amdgpu_device *a
 	/* GPU Load */
 	if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_LOAD, (void *)&value, &size))
 		seq_printf(m, "GPU Load: %u %%\n", value);
+	/* MEM Load */
+	if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_MEM_LOAD, (void *)&value, &size))
+		seq_printf(m, "MEM Load: %u %%\n", value);
+
 	seq_printf(m, "\n");
 
 	/* SMC feature mask */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index 86cc24b2e0aa..909be1bf2294 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -24,7 +24,7 @@
  */
 
 #include <linux/firmware.h>
-#include <drm/drmP.h>
+
 #include "amdgpu.h"
 #include "amdgpu_psp.h"
 #include "amdgpu_ucode.h"
@@ -289,6 +289,34 @@ static int psp_asd_load(struct psp_context *psp)
 	return ret;
 }
 
+static void psp_prep_reg_prog_cmd_buf(struct psp_gfx_cmd_resp *cmd,
+		uint32_t id, uint32_t value)
+{
+	cmd->cmd_id = GFX_CMD_ID_PROG_REG;
+	cmd->cmd.cmd_setup_reg_prog.reg_value = value;
+	cmd->cmd.cmd_setup_reg_prog.reg_id = id;
+}
+
+int psp_reg_program(struct psp_context *psp, enum psp_reg_prog_id reg,
+		uint32_t value)
+{
+	struct psp_gfx_cmd_resp *cmd = NULL;
+	int ret = 0;
+
+	if (reg >= PSP_REG_LAST)
+		return -EINVAL;
+
+	cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
+	if (!cmd)
+		return -ENOMEM;
+
+	psp_prep_reg_prog_cmd_buf(cmd, reg, value);
+	ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr);
+
+	kfree(cmd);
+	return ret;
+}
+
 static void psp_prep_xgmi_ta_load_cmd_buf(struct psp_gfx_cmd_resp *cmd,
 					  uint64_t xgmi_ta_mc, uint64_t xgmi_mc_shared,
 					  uint32_t xgmi_ta_size, uint32_t shared_size)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
index cde113f07c96..cf49539b0b07 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
@@ -62,6 +62,14 @@ struct psp_ring
 	uint32_t			ring_size;
 };
 
+/* More registers may will be supported */
+enum psp_reg_prog_id {
+	PSP_REG_IH_RB_CNTL        = 0,  /* register IH_RB_CNTL */
+	PSP_REG_IH_RB_CNTL_RING1  = 1,  /* register IH_RB_CNTL_RING1 */
+	PSP_REG_IH_RB_CNTL_RING2  = 2,  /* register IH_RB_CNTL_RING2 */
+	PSP_REG_LAST
+};
+
 struct psp_funcs
 {
 	int (*init_microcode)(struct psp_context *psp);
@@ -95,12 +103,26 @@ struct psp_funcs
 	int (*ras_cure_posion)(struct psp_context *psp, uint64_t *mode_ptr);
 };
 
+#define AMDGPU_XGMI_MAX_CONNECTED_NODES		64
+struct psp_xgmi_node_info {
+	uint64_t				node_id;
+	uint8_t					num_hops;
+	uint8_t					is_sharing_enabled;
+	enum ta_xgmi_assigned_sdma_engine	sdma_engine;
+};
+
+struct psp_xgmi_topology_info {
+	uint32_t			num_nodes;
+	struct psp_xgmi_node_info	nodes[AMDGPU_XGMI_MAX_CONNECTED_NODES];
+};
+
 struct psp_xgmi_context {
 	uint8_t				initialized;
 	uint32_t			session_id;
 	struct amdgpu_bo                *xgmi_shared_bo;
 	uint64_t                        xgmi_shared_mc_addr;
 	void                            *xgmi_shared_buf;
+	struct psp_xgmi_topology_info	top_info;
 };
 
 struct psp_ras_context {
@@ -181,18 +203,6 @@ struct amdgpu_psp_funcs {
 					enum AMDGPU_UCODE_ID);
 };
 
-#define AMDGPU_XGMI_MAX_CONNECTED_NODES		64
-struct psp_xgmi_node_info {
-	uint64_t				node_id;
-	uint8_t					num_hops;
-	uint8_t					is_sharing_enabled;
-	enum ta_xgmi_assigned_sdma_engine	sdma_engine;
-};
-
-struct psp_xgmi_topology_info {
-	uint32_t			num_nodes;
-	struct psp_xgmi_node_info	nodes[AMDGPU_XGMI_MAX_CONNECTED_NODES];
-};
 
 #define psp_ring_init(psp, type) (psp)->funcs->ring_init((psp), (type))
 #define psp_ring_create(psp, type) (psp)->funcs->ring_create((psp), (type))
@@ -250,5 +260,6 @@ int psp_ras_enable_features(struct psp_context *psp,
 		union ta_ras_cmd_input *info, bool enable);
 
 extern const struct amdgpu_ip_block_version psp_v11_0_ip_block;
-
+int psp_reg_program(struct psp_context *psp, enum psp_reg_prog_id reg,
+		uint32_t value);
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 22bd21efe6b1..4d387557cc37 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -24,6 +24,8 @@
 #include <linux/debugfs.h>
 #include <linux/list.h>
 #include <linux/module.h>
+#include <linux/uaccess.h>
+
 #include "amdgpu.h"
 #include "amdgpu_ras.h"
 #include "amdgpu_atomfirmware.h"
@@ -90,6 +92,12 @@ struct ras_manager {
 	struct ras_err_data err_data;
 };
 
+struct ras_badpage {
+	unsigned int bp;
+	unsigned int size;
+	unsigned int flags;
+};
+
 const char *ras_error_string[] = {
 	"none",
 	"parity",
@@ -118,9 +126,16 @@ const char *ras_block_string[] = {
 #define ras_err_str(i) (ras_error_string[ffs(i)])
 #define ras_block_str(i) (ras_block_string[i])
 
-#define AMDGPU_RAS_FLAG_INIT_BY_VBIOS 1
+#define AMDGPU_RAS_FLAG_INIT_BY_VBIOS		1
+#define AMDGPU_RAS_FLAG_INIT_NEED_RESET		2
 #define RAS_DEFAULT_FLAGS (AMDGPU_RAS_FLAG_INIT_BY_VBIOS)
 
+static int amdgpu_ras_reserve_vram(struct amdgpu_device *adev,
+		uint64_t offset, uint64_t size,
+		struct amdgpu_bo **bo_ptr);
+static int amdgpu_ras_release_vram(struct amdgpu_device *adev,
+		struct amdgpu_bo **bo_ptr);
+
 static void amdgpu_ras_self_test(struct amdgpu_device *adev)
 {
 	/* TODO */
@@ -237,8 +252,8 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f,
 
 	return 0;
 }
-/*
- * DOC: ras debugfs control interface
+/**
+ * DOC: AMDGPU RAS debugfs control interface
  *
  * It accepts struct ras_debug_if who has two members.
  *
@@ -300,6 +315,7 @@ static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f, const char __user *
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(f)->i_private;
 	struct ras_debug_if data;
+	struct amdgpu_bo *bo;
 	int ret = 0;
 
 	ret = amdgpu_ras_debugfs_ctrl_parse_data(f, buf, size, pos, &data);
@@ -317,7 +333,16 @@ static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f, const char __user *
 		ret = amdgpu_ras_feature_enable(adev, &data.head, 1);
 		break;
 	case 2:
+		ret = amdgpu_ras_reserve_vram(adev,
+				data.inject.address, PAGE_SIZE, &bo);
+		/* This address might be used already on failure. In fact we can
+		 * perform an injection in such case.
+		 */
+		if (ret)
+			break;
+		data.inject.address = amdgpu_bo_gpu_offset(bo);
 		ret = amdgpu_ras_error_inject(adev, &data.inject);
+		amdgpu_ras_release_vram(adev, &bo);
 		break;
 	default:
 		ret = -EINVAL;
@@ -521,6 +546,8 @@ int amdgpu_ras_feature_enable(struct amdgpu_device *adev,
 				enable ? "enable":"disable",
 				ras_block_str(head->block),
 				ret);
+		if (ret == TA_RAS_STATUS__RESET_NEEDED)
+			return -EAGAIN;
 		return -EINVAL;
 	}
 
@@ -541,16 +568,32 @@ int amdgpu_ras_feature_enable_on_boot(struct amdgpu_device *adev,
 		return -EINVAL;
 
 	if (con->flags & AMDGPU_RAS_FLAG_INIT_BY_VBIOS) {
-		/* If ras is enabled by vbios, we set up ras object first in
-		 * both case. For enable, that is all what we need do. For
-		 * disable, we need perform a ras TA disable cmd after that.
-		 */
-		ret = __amdgpu_ras_feature_enable(adev, head, 1);
-		if (ret)
-			return ret;
+		if (enable) {
+			/* There is no harm to issue a ras TA cmd regardless of
+			 * the currecnt ras state.
+			 * If current state == target state, it will do nothing
+			 * But sometimes it requests driver to reset and repost
+			 * with error code -EAGAIN.
+			 */
+			ret = amdgpu_ras_feature_enable(adev, head, 1);
+			/* With old ras TA, we might fail to enable ras.
+			 * Log it and just setup the object.
+			 * TODO need remove this WA in the future.
+			 */
+			if (ret == -EINVAL) {
+				ret = __amdgpu_ras_feature_enable(adev, head, 1);
+				if (!ret)
+					DRM_INFO("RAS INFO: %s setup object\n",
+						ras_block_str(head->block));
+			}
+		} else {
+			/* setup the object then issue a ras TA disable cmd.*/
+			ret = __amdgpu_ras_feature_enable(adev, head, 1);
+			if (ret)
+				return ret;
 
-		if (!enable)
 			ret = amdgpu_ras_feature_enable(adev, head, 0);
+		}
 	} else
 		ret = amdgpu_ras_feature_enable(adev, head, enable);
 
@@ -691,6 +734,77 @@ int amdgpu_ras_query_error_count(struct amdgpu_device *adev,
 
 /* sysfs begin */
 
+static int amdgpu_ras_badpages_read(struct amdgpu_device *adev,
+		struct ras_badpage **bps, unsigned int *count);
+
+static char *amdgpu_ras_badpage_flags_str(unsigned int flags)
+{
+	switch (flags) {
+	case 0:
+		return "R";
+	case 1:
+		return "P";
+	case 2:
+	default:
+		return "F";
+	};
+}
+
+/*
+ * DOC: ras sysfs gpu_vram_bad_pages interface
+ *
+ * It allows user to read the bad pages of vram on the gpu through
+ * /sys/class/drm/card[0/1/2...]/device/ras/gpu_vram_bad_pages
+ *
+ * It outputs multiple lines, and each line stands for one gpu page.
+ *
+ * The format of one line is below,
+ * gpu pfn : gpu page size : flags
+ *
+ * gpu pfn and gpu page size are printed in hex format.
+ * flags can be one of below character,
+ * R: reserved, this gpu page is reserved and not able to use.
+ * P: pending for reserve, this gpu page is marked as bad, will be reserved
+ *    in next window of page_reserve.
+ * F: unable to reserve. this gpu page can't be reserved due to some reasons.
+ *
+ * examples:
+ * 0x00000001 : 0x00001000 : R
+ * 0x00000002 : 0x00001000 : P
+ */
+
+static ssize_t amdgpu_ras_sysfs_badpages_read(struct file *f,
+		struct kobject *kobj, struct bin_attribute *attr,
+		char *buf, loff_t ppos, size_t count)
+{
+	struct amdgpu_ras *con =
+		container_of(attr, struct amdgpu_ras, badpages_attr);
+	struct amdgpu_device *adev = con->adev;
+	const unsigned int element_size =
+		sizeof("0xabcdabcd : 0x12345678 : R\n") - 1;
+	unsigned int start = div64_ul(ppos + element_size - 1, element_size);
+	unsigned int end = div64_ul(ppos + count - 1, element_size);
+	ssize_t s = 0;
+	struct ras_badpage *bps = NULL;
+	unsigned int bps_count = 0;
+
+	memset(buf, 0, count);
+
+	if (amdgpu_ras_badpages_read(adev, &bps, &bps_count))
+		return 0;
+
+	for (; start < end && start < bps_count; start++)
+		s += scnprintf(&buf[s], element_size + 1,
+				"0x%08x : 0x%08x : %1s\n",
+				bps[start].bp,
+				bps[start].size,
+				amdgpu_ras_badpage_flags_str(bps[start].flags));
+
+	kfree(bps);
+
+	return s;
+}
+
 static ssize_t amdgpu_ras_sysfs_features_read(struct device *dev,
 		struct device_attribute *attr, char *buf)
 {
@@ -731,9 +845,14 @@ static int amdgpu_ras_sysfs_create_feature_node(struct amdgpu_device *adev)
 		&con->features_attr.attr,
 		NULL
 	};
+	struct bin_attribute *bin_attrs[] = {
+		&con->badpages_attr,
+		NULL
+	};
 	struct attribute_group group = {
 		.name = "ras",
 		.attrs = attrs,
+		.bin_attrs = bin_attrs,
 	};
 
 	con->features_attr = (struct device_attribute) {
@@ -743,7 +862,19 @@ static int amdgpu_ras_sysfs_create_feature_node(struct amdgpu_device *adev)
 		},
 			.show = amdgpu_ras_sysfs_features_read,
 	};
+
+	con->badpages_attr = (struct bin_attribute) {
+		.attr = {
+			.name = "gpu_vram_bad_pages",
+			.mode = S_IRUGO,
+		},
+		.size = 0,
+		.private = NULL,
+		.read = amdgpu_ras_sysfs_badpages_read,
+	};
+
 	sysfs_attr_init(attrs[0]);
+	sysfs_bin_attr_init(bin_attrs[0]);
 
 	return sysfs_create_group(&adev->dev->kobj, &group);
 }
@@ -755,9 +886,14 @@ static int amdgpu_ras_sysfs_remove_feature_node(struct amdgpu_device *adev)
 		&con->features_attr.attr,
 		NULL
 	};
+	struct bin_attribute *bin_attrs[] = {
+		&con->badpages_attr,
+		NULL
+	};
 	struct attribute_group group = {
 		.name = "ras",
 		.attrs = attrs,
+		.bin_attrs = bin_attrs,
 	};
 
 	sysfs_remove_group(&adev->dev->kobj, &group);
@@ -1089,6 +1225,53 @@ static int amdgpu_ras_interrupt_remove_all(struct amdgpu_device *adev)
 /* ih end */
 
 /* recovery begin */
+
+/* return 0 on success.
+ * caller need free bps.
+ */
+static int amdgpu_ras_badpages_read(struct amdgpu_device *adev,
+		struct ras_badpage **bps, unsigned int *count)
+{
+	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+	struct ras_err_handler_data *data;
+	int i = 0;
+	int ret = 0;
+
+	if (!con || !con->eh_data || !bps || !count)
+		return -EINVAL;
+
+	mutex_lock(&con->recovery_lock);
+	data = con->eh_data;
+	if (!data || data->count == 0) {
+		*bps = NULL;
+		goto out;
+	}
+
+	*bps = kmalloc(sizeof(struct ras_badpage) * data->count, GFP_KERNEL);
+	if (!*bps) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	for (; i < data->count; i++) {
+		(*bps)[i] = (struct ras_badpage){
+			.bp = data->bps[i].bp,
+			.size = AMDGPU_GPU_PAGE_SIZE,
+			.flags = 0,
+		};
+
+		if (data->last_reserved <= i)
+			(*bps)[i].flags = 1;
+		else if (data->bps[i].bo == NULL)
+			(*bps)[i].flags = 2;
+	}
+
+	*count = data->count;
+out:
+	mutex_unlock(&con->recovery_lock);
+	return ret;
+}
+
 static void amdgpu_ras_do_recovery(struct work_struct *work)
 {
 	struct amdgpu_ras *ras =
@@ -1340,6 +1523,19 @@ static int amdgpu_ras_recovery_fini(struct amdgpu_device *adev)
 }
 /* recovery end */
 
+/* return 0 if ras will reset gpu and repost.*/
+int amdgpu_ras_request_reset_on_boot(struct amdgpu_device *adev,
+		unsigned int block)
+{
+	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+
+	if (!ras)
+		return -EINVAL;
+
+	ras->flags |= AMDGPU_RAS_FLAG_INIT_NEED_RESET;
+	return 0;
+}
+
 /*
  * check hardware's ras ability which will be saved in hw_supported.
  * if hardware does not support ras, we can skip some ras initializtion and
@@ -1415,8 +1611,10 @@ recovery_out:
 	return -EINVAL;
 }
 
-/* do some init work after IP late init as dependence */
-void amdgpu_ras_post_init(struct amdgpu_device *adev)
+/* do some init work after IP late init as dependence.
+ * and it runs in resume/gpu reset/booting up cases.
+ */
+void amdgpu_ras_resume(struct amdgpu_device *adev)
 {
 	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
 	struct ras_manager *obj, *tmp;
@@ -1444,6 +1642,32 @@ void amdgpu_ras_post_init(struct amdgpu_device *adev)
 			}
 		}
 	}
+
+	if (con->flags & AMDGPU_RAS_FLAG_INIT_NEED_RESET) {
+		con->flags &= ~AMDGPU_RAS_FLAG_INIT_NEED_RESET;
+		/* setup ras obj state as disabled.
+		 * for init_by_vbios case.
+		 * if we want to enable ras, just enable it in a normal way.
+		 * If we want do disable it, need setup ras obj as enabled,
+		 * then issue another TA disable cmd.
+		 * See feature_enable_on_boot
+		 */
+		amdgpu_ras_disable_all_features(adev, 1);
+		amdgpu_ras_reset_gpu(adev, 0);
+	}
+}
+
+void amdgpu_ras_suspend(struct amdgpu_device *adev)
+{
+	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+
+	if (!con)
+		return;
+
+	amdgpu_ras_disable_all_features(adev, 0);
+	/* Make sure all ras objects are disabled. */
+	if (con->features)
+		amdgpu_ras_disable_all_features(adev, 1);
 }
 
 /* do some fini work before IP fini as dependence */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
index eaef5edefc34..94c652f5265a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
@@ -93,6 +93,7 @@ struct amdgpu_ras {
 	struct dentry *ent;
 	/* sysfs */
 	struct device_attribute features_attr;
+	struct bin_attribute badpages_attr;
 	/* block array */
 	struct ras_manager *objs;
 
@@ -172,9 +173,17 @@ static inline int amdgpu_ras_is_supported(struct amdgpu_device *adev,
 {
 	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
 
+	if (block >= AMDGPU_RAS_BLOCK_COUNT)
+		return 0;
 	return ras && (ras->supported & (1 << block));
 }
 
+int amdgpu_ras_request_reset_on_boot(struct amdgpu_device *adev,
+		unsigned int block);
+
+void amdgpu_ras_resume(struct amdgpu_device *adev);
+void amdgpu_ras_suspend(struct amdgpu_device *adev);
+
 int amdgpu_ras_query_error_count(struct amdgpu_device *adev,
 		bool is_ce);
 
@@ -187,13 +196,10 @@ int amdgpu_ras_reserve_bad_pages(struct amdgpu_device *adev);
 static inline int amdgpu_ras_reset_gpu(struct amdgpu_device *adev,
 		bool is_baco)
 {
-	/* remove me when gpu reset works on vega20 A1. */
-#if 0
 	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
 
 	if (atomic_cmpxchg(&ras->in_recovery, 0, 1) == 0)
 		schedule_work(&ras->recovery_work);
-#endif
 	return 0;
 }
 
@@ -255,7 +261,6 @@ amdgpu_ras_error_to_ta(enum amdgpu_ras_error_type error) {
 
 /* called in ip_init and ip_fini */
 int amdgpu_ras_init(struct amdgpu_device *adev);
-void amdgpu_ras_post_init(struct amdgpu_device *adev);
 int amdgpu_ras_fini(struct amdgpu_device *adev);
 int amdgpu_ras_pre_fini(struct amdgpu_device *adev);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index 8f5026c123ef..ee440fe29b91 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -28,8 +28,9 @@
  */
 #include <linux/seq_file.h>
 #include <linux/slab.h>
+#include <linux/uaccess.h>
 #include <linux/debugfs.h>
-#include <drm/drmP.h>
+
 #include <drm/amdgpu_drm.h>
 #include "amdgpu.h"
 #include "atom.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index d7fae2676269..cdddce938bf5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -114,6 +114,7 @@ struct amdgpu_ring_funcs {
 	uint32_t		align_mask;
 	u32			nop;
 	bool			support_64bit_ptrs;
+	bool			no_user_fence;
 	unsigned		vmhub;
 	unsigned		extra_dw;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
index bfaf5c6323be..0bd1d4ffc19e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
@@ -41,7 +41,7 @@
  * If we are asked to block we wait on all the oldest fence of all
  * rings. We just wait for any of those fence to complete.
  */
-#include <drm/drmP.h>
+
 #include "amdgpu.h"
 
 static void amdgpu_sa_bo_remove_locked(struct amdgpu_sa_bo *sa_bo);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
index 639297250c21..c799691dfa84 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
@@ -23,8 +23,11 @@
  */
 
 #include <linux/fdtable.h>
+#include <linux/file.h>
 #include <linux/pid.h>
+
 #include <drm/amdgpu_drm.h>
+
 #include "amdgpu.h"
 
 #include "amdgpu_vm.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.h
index 2a1a0c734bdd..12299fd95691 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.h
@@ -25,7 +25,10 @@
 #ifndef __AMDGPU_SCHED_H__
 #define __AMDGPU_SCHED_H__
 
-#include <drm/drmP.h>
+enum drm_sched_priority;
+
+struct drm_device;
+struct drm_file;
 
 enum drm_sched_priority amdgpu_to_sched_priority(int amdgpu_priority);
 int amdgpu_sched_ioctl(struct drm_device *dev, void *data,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
index 115bb0c99b0f..a425329d1897 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
@@ -20,7 +20,7 @@
  * OTHER DEALINGS IN THE SOFTWARE.
  *
  */
-#include <drm/drmP.h>
+
 #include "amdgpu.h"
 #include "amdgpu_sdma.h"
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
index 2d6f5ec77a68..9828f3c7c655 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
@@ -28,7 +28,6 @@
  *    Christian König <christian.koenig@amd.com>
  */
 
-#include <drm/drmP.h>
 #include "amdgpu.h"
 #include "amdgpu_trace.h"
 #include "amdgpu_amdkfd.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c
index 8904e62dca7a..b66d29d5ffa2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c
@@ -22,7 +22,7 @@
  *
  * Authors: Michel Dänzer
  */
-#include <drm/drmP.h>
+
 #include <drm/amdgpu_drm.h>
 #include "amdgpu.h"
 #include "amdgpu_uvd.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
index d3ca2424b5fe..77674a7b9616 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
@@ -28,8 +28,6 @@
 #include <linux/types.h>
 #include <linux/tracepoint.h>
 
-#include <drm/drmP.h>
-
 #undef TRACE_SYSTEM
 #define TRACE_SYSTEM amdgpu
 #define TRACE_INCLUDE_FILE amdgpu_trace
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace_points.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace_points.c
index f212402570a5..57c6c39ba064 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace_points.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace_points.c
@@ -21,7 +21,7 @@
  *
  * Author : Dave Airlie <airlied@redhat.com>
  */
-#include <drm/drmP.h>
+
 #include <drm/amdgpu_drm.h>
 #include "amdgpu.h"
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 0c52d1f9fe0f..d81bebf76310 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -29,20 +29,26 @@
  *    Thomas Hellstrom <thomas-at-tungstengraphics-dot-com>
  *    Dave Airlie
  */
+
+#include <linux/dma-mapping.h>
+#include <linux/iommu.h>
+#include <linux/hmm.h>
+#include <linux/pagemap.h>
+#include <linux/sched/task.h>
+#include <linux/seq_file.h>
+#include <linux/slab.h>
+#include <linux/swap.h>
+#include <linux/swiotlb.h>
+
 #include <drm/ttm/ttm_bo_api.h>
 #include <drm/ttm/ttm_bo_driver.h>
 #include <drm/ttm/ttm_placement.h>
 #include <drm/ttm/ttm_module.h>
 #include <drm/ttm/ttm_page_alloc.h>
-#include <drm/drmP.h>
+
+#include <drm/drm_debugfs.h>
 #include <drm/amdgpu_drm.h>
-#include <linux/seq_file.h>
-#include <linux/slab.h>
-#include <linux/swiotlb.h>
-#include <linux/swap.h>
-#include <linux/pagemap.h>
-#include <linux/debugfs.h>
-#include <linux/iommu.h>
+
 #include "amdgpu.h"
 #include "amdgpu_object.h"
 #include "amdgpu_trace.h"
@@ -703,143 +709,191 @@ static unsigned long amdgpu_ttm_io_mem_pfn(struct ttm_buffer_object *bo,
 /*
  * TTM backend functions.
  */
-struct amdgpu_ttm_gup_task_list {
-	struct list_head	list;
-	struct task_struct	*task;
-};
-
 struct amdgpu_ttm_tt {
 	struct ttm_dma_tt	ttm;
 	u64			offset;
 	uint64_t		userptr;
 	struct task_struct	*usertask;
 	uint32_t		userflags;
-	spinlock_t              guptasklock;
-	struct list_head        guptasks;
-	atomic_t		mmu_invalidations;
-	uint32_t		last_set_pages;
+#if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR)
+	struct hmm_range	*ranges;
+	int			nr_ranges;
+#endif
 };
 
 /**
- * amdgpu_ttm_tt_get_user_pages - Pin pages of memory pointed to by a USERPTR
- * pointer to memory
+ * amdgpu_ttm_tt_get_user_pages - get device accessible pages that back user
+ * memory and start HMM tracking CPU page table update
  *
- * Called by amdgpu_gem_userptr_ioctl() and amdgpu_cs_parser_bos().
- * This provides a wrapper around the get_user_pages() call to provide
- * device accessible pages that back user memory.
+ * Calling function must call amdgpu_ttm_tt_userptr_range_done() once and only
+ * once afterwards to stop HMM tracking
  */
+#if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR)
+
+/* Support Userptr pages cross max 16 vmas */
+#define MAX_NR_VMAS	(16)
+
 int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
 {
 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
 	struct mm_struct *mm = gtt->usertask->mm;
-	unsigned int flags = 0;
-	unsigned pinned = 0;
-	int r;
+	unsigned long start = gtt->userptr;
+	unsigned long end = start + ttm->num_pages * PAGE_SIZE;
+	struct vm_area_struct *vma = NULL, *vmas[MAX_NR_VMAS];
+	struct hmm_range *ranges;
+	unsigned long nr_pages, i;
+	uint64_t *pfns, f;
+	int r = 0;
 
 	if (!mm) /* Happens during process shutdown */
 		return -ESRCH;
 
-	if (!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY))
-		flags |= FOLL_WRITE;
-
 	down_read(&mm->mmap_sem);
 
-	if (gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) {
-		/*
-		 * check that we only use anonymous memory to prevent problems
-		 * with writeback
-		 */
-		unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE;
-		struct vm_area_struct *vma;
+	/* user pages may cross multiple VMAs */
+	gtt->nr_ranges = 0;
+	do {
+		unsigned long vm_start;
 
-		vma = find_vma(mm, gtt->userptr);
-		if (!vma || vma->vm_file || vma->vm_end < end) {
-			up_read(&mm->mmap_sem);
-			return -EPERM;
+		if (gtt->nr_ranges >= MAX_NR_VMAS) {
+			DRM_ERROR("Too many VMAs in userptr range\n");
+			r = -EFAULT;
+			goto out;
+		}
+
+		vm_start = vma ? vma->vm_end : start;
+		vma = find_vma(mm, vm_start);
+		if (unlikely(!vma || vm_start < vma->vm_start)) {
+			r = -EFAULT;
+			goto out;
 		}
+		vmas[gtt->nr_ranges++] = vma;
+	} while (end > vma->vm_end);
+
+	DRM_DEBUG_DRIVER("0x%lx nr_ranges %d pages 0x%lx\n",
+		start, gtt->nr_ranges, ttm->num_pages);
+
+	if (unlikely((gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) &&
+		vmas[0]->vm_file)) {
+		r = -EPERM;
+		goto out;
 	}
 
-	/* loop enough times using contiguous pages of memory */
-	do {
-		unsigned num_pages = ttm->num_pages - pinned;
-		uint64_t userptr = gtt->userptr + pinned * PAGE_SIZE;
-		struct page **p = pages + pinned;
-		struct amdgpu_ttm_gup_task_list guptask;
+	ranges = kvmalloc_array(gtt->nr_ranges, sizeof(*ranges), GFP_KERNEL);
+	if (unlikely(!ranges)) {
+		r = -ENOMEM;
+		goto out;
+	}
 
-		guptask.task = current;
-		spin_lock(&gtt->guptasklock);
-		list_add(&guptask.list, &gtt->guptasks);
-		spin_unlock(&gtt->guptasklock);
+	pfns = kvmalloc_array(ttm->num_pages, sizeof(*pfns), GFP_KERNEL);
+	if (unlikely(!pfns)) {
+		r = -ENOMEM;
+		goto out_free_ranges;
+	}
 
-		if (mm == current->mm)
-			r = get_user_pages(userptr, num_pages, flags, p, NULL);
-		else
-			r = get_user_pages_remote(gtt->usertask,
-					mm, userptr, num_pages,
-					flags, p, NULL, NULL);
+	for (i = 0; i < gtt->nr_ranges; i++)
+		amdgpu_hmm_init_range(&ranges[i]);
 
-		spin_lock(&gtt->guptasklock);
-		list_del(&guptask.list);
-		spin_unlock(&gtt->guptasklock);
+	f = ranges[0].flags[HMM_PFN_VALID];
+	f |= amdgpu_ttm_tt_is_readonly(ttm) ?
+				0 : ranges[0].flags[HMM_PFN_WRITE];
+	memset64(pfns, f, ttm->num_pages);
 
-		if (r < 0)
-			goto release_pages;
+	for (nr_pages = 0, i = 0; i < gtt->nr_ranges; i++) {
+		ranges[i].vma = vmas[i];
+		ranges[i].start = max(start, vmas[i]->vm_start);
+		ranges[i].end = min(end, vmas[i]->vm_end);
+		ranges[i].pfns = pfns + nr_pages;
+		nr_pages += (ranges[i].end - ranges[i].start) / PAGE_SIZE;
 
-		pinned += r;
+		r = hmm_vma_fault(&ranges[i], true);
+		if (unlikely(r))
+			break;
+	}
+	if (unlikely(r)) {
+		while (i--)
+			hmm_vma_range_done(&ranges[i]);
 
-	} while (pinned < ttm->num_pages);
+		goto out_free_pfns;
+	}
 
 	up_read(&mm->mmap_sem);
+
+	for (i = 0; i < ttm->num_pages; i++) {
+		pages[i] = hmm_pfn_to_page(&ranges[0], pfns[i]);
+		if (!pages[i]) {
+			pr_err("Page fault failed for pfn[%lu] = 0x%llx\n",
+			       i, pfns[i]);
+			goto out_invalid_pfn;
+		}
+	}
+	gtt->ranges = ranges;
+
 	return 0;
 
-release_pages:
-	release_pages(pages, pinned);
+out_free_pfns:
+	kvfree(pfns);
+out_free_ranges:
+	kvfree(ranges);
+out:
 	up_read(&mm->mmap_sem);
+
 	return r;
+
+out_invalid_pfn:
+	for (i = 0; i < gtt->nr_ranges; i++)
+		hmm_vma_range_done(&ranges[i]);
+	kvfree(pfns);
+	kvfree(ranges);
+	return -ENOMEM;
 }
 
 /**
- * amdgpu_ttm_tt_set_user_pages - Copy pages in, putting old pages as necessary.
+ * amdgpu_ttm_tt_userptr_range_done - stop HMM track the CPU page table change
+ * Check if the pages backing this ttm range have been invalidated
  *
- * Called by amdgpu_cs_list_validate(). This creates the page list
- * that backs user memory and will ultimately be mapped into the device
- * address space.
+ * Returns: true if pages are still valid
  */
-void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages)
+bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm)
 {
 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
-	unsigned i;
+	bool r = false;
+	int i;
 
-	gtt->last_set_pages = atomic_read(&gtt->mmu_invalidations);
-	for (i = 0; i < ttm->num_pages; ++i) {
-		if (ttm->pages[i])
-			put_page(ttm->pages[i]);
+	if (!gtt || !gtt->userptr)
+		return false;
 
-		ttm->pages[i] = pages ? pages[i] : NULL;
+	DRM_DEBUG_DRIVER("user_pages_done 0x%llx nr_ranges %d pages 0x%lx\n",
+		gtt->userptr, gtt->nr_ranges, ttm->num_pages);
+
+	WARN_ONCE(!gtt->ranges || !gtt->ranges[0].pfns,
+		"No user pages to check\n");
+
+	if (gtt->ranges) {
+		for (i = 0; i < gtt->nr_ranges; i++)
+			r |= hmm_vma_range_done(&gtt->ranges[i]);
+		kvfree(gtt->ranges[0].pfns);
+		kvfree(gtt->ranges);
+		gtt->ranges = NULL;
 	}
+
+	return r;
 }
+#endif
 
 /**
- * amdgpu_ttm_tt_mark_user_page - Mark pages as dirty
+ * amdgpu_ttm_tt_set_user_pages - Copy pages in, putting old pages as necessary.
  *
- * Called while unpinning userptr pages
+ * Called by amdgpu_cs_list_validate(). This creates the page list
+ * that backs user memory and will ultimately be mapped into the device
+ * address space.
  */
-void amdgpu_ttm_tt_mark_user_pages(struct ttm_tt *ttm)
+void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages)
 {
-	struct amdgpu_ttm_tt *gtt = (void *)ttm;
-	unsigned i;
-
-	for (i = 0; i < ttm->num_pages; ++i) {
-		struct page *page = ttm->pages[i];
+	unsigned long i;
 
-		if (!page)
-			continue;
-
-		if (!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY))
-			set_page_dirty(page);
-
-		mark_page_accessed(page);
-	}
+	for (i = 0; i < ttm->num_pages; ++i)
+		ttm->pages[i] = pages ? pages[i] : NULL;
 }
 
 /**
@@ -901,10 +955,14 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm)
 	/* unmap the pages mapped to the device */
 	dma_unmap_sg(adev->dev, ttm->sg->sgl, ttm->sg->nents, direction);
 
-	/* mark the pages as dirty */
-	amdgpu_ttm_tt_mark_user_pages(ttm);
-
 	sg_free_table(ttm->sg);
+
+#if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR)
+	if (gtt->ranges &&
+	    ttm->pages[0] == hmm_pfn_to_page(&gtt->ranges[0],
+					     gtt->ranges[0].pfns[0]))
+		WARN_ONCE(1, "Missing get_user_page_done\n");
+#endif
 }
 
 int amdgpu_ttm_gart_bind(struct amdgpu_device *adev,
@@ -1254,11 +1312,6 @@ int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr,
 	gtt->usertask = current->group_leader;
 	get_task_struct(gtt->usertask);
 
-	spin_lock_init(&gtt->guptasklock);
-	INIT_LIST_HEAD(&gtt->guptasks);
-	atomic_set(&gtt->mmu_invalidations, 0);
-	gtt->last_set_pages = 0;
-
 	return 0;
 }
 
@@ -1287,7 +1340,6 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
 				  unsigned long end)
 {
 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
-	struct amdgpu_ttm_gup_task_list *entry;
 	unsigned long size;
 
 	if (gtt == NULL || !gtt->userptr)
@@ -1300,48 +1352,20 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
 	if (gtt->userptr > end || gtt->userptr + size <= start)
 		return false;
 
-	/* Search the lists of tasks that hold this mapping and see
-	 * if current is one of them.  If it is return false.
-	 */
-	spin_lock(&gtt->guptasklock);
-	list_for_each_entry(entry, &gtt->guptasks, list) {
-		if (entry->task == current) {
-			spin_unlock(&gtt->guptasklock);
-			return false;
-		}
-	}
-	spin_unlock(&gtt->guptasklock);
-
-	atomic_inc(&gtt->mmu_invalidations);
-
 	return true;
 }
 
 /**
- * amdgpu_ttm_tt_userptr_invalidated - Has the ttm_tt object been invalidated?
- */
-bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm,
-				       int *last_invalidated)
-{
-	struct amdgpu_ttm_tt *gtt = (void *)ttm;
-	int prev_invalidated = *last_invalidated;
-
-	*last_invalidated = atomic_read(&gtt->mmu_invalidations);
-	return prev_invalidated != *last_invalidated;
-}
-
-/**
- * amdgpu_ttm_tt_userptr_needs_pages - Have the pages backing this ttm_tt object
- * been invalidated since the last time they've been set?
+ * amdgpu_ttm_tt_is_userptr - Have the pages backing by userptr?
  */
-bool amdgpu_ttm_tt_userptr_needs_pages(struct ttm_tt *ttm)
+bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm)
 {
 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
 
 	if (gtt == NULL || !gtt->userptr)
 		return false;
 
-	return atomic_read(&gtt->mmu_invalidations) != gtt->last_set_pages;
+	return true;
 }
 
 /**
@@ -1753,44 +1777,26 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
 
 	/* Initialize various on-chip memory pools */
 	r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_GDS,
-			   adev->gds.mem.total_size);
+			   adev->gds.gds_size);
 	if (r) {
 		DRM_ERROR("Failed initializing GDS heap.\n");
 		return r;
 	}
 
-	r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
-				    4, AMDGPU_GEM_DOMAIN_GDS,
-				    &adev->gds.gds_gfx_bo, NULL, NULL);
-	if (r)
-		return r;
-
 	r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_GWS,
-			   adev->gds.gws.total_size);
+			   adev->gds.gws_size);
 	if (r) {
 		DRM_ERROR("Failed initializing gws heap.\n");
 		return r;
 	}
 
-	r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size,
-				    1, AMDGPU_GEM_DOMAIN_GWS,
-				    &adev->gds.gws_gfx_bo, NULL, NULL);
-	if (r)
-		return r;
-
 	r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_OA,
-			   adev->gds.oa.total_size);
+			   adev->gds.oa_size);
 	if (r) {
 		DRM_ERROR("Failed initializing oa heap.\n");
 		return r;
 	}
 
-	r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size,
-				    1, AMDGPU_GEM_DOMAIN_OA,
-				    &adev->gds.oa_gfx_bo, NULL, NULL);
-	if (r)
-		return r;
-
 	/* Register debugfs entries for amdgpu_ttm */
 	r = amdgpu_ttm_debugfs_init(adev);
 	if (r) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
index b5b2d101f7db..c2b7669004ba 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
@@ -101,9 +101,21 @@ int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma);
 int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo);
 int amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo);
 
+#if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR)
 int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages);
+bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm);
+#else
+static inline int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
+{
+	return -EPERM;
+}
+static inline bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm)
+{
+	return false;
+}
+#endif
+
 void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages);
-void amdgpu_ttm_tt_mark_user_pages(struct ttm_tt *ttm);
 int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr,
 				     uint32_t flags);
 bool amdgpu_ttm_tt_has_userptr(struct ttm_tt *ttm);
@@ -112,7 +124,7 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
 				  unsigned long end);
 bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm,
 				       int *last_invalidated);
-bool amdgpu_ttm_tt_userptr_needs_pages(struct ttm_tt *ttm);
+bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm);
 bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm);
 uint64_t amdgpu_ttm_tt_pde_flags(struct ttm_tt *ttm, struct ttm_mem_reg *mem);
 uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
index 7b33867036e7..524f70f2b52f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
@@ -24,7 +24,7 @@
 #include <linux/firmware.h>
 #include <linux/slab.h>
 #include <linux/module.h>
-#include <drm/drmP.h>
+
 #include "amdgpu.h"
 #include "amdgpu_ucode.h"
 
@@ -313,6 +313,69 @@ amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type)
 	return AMDGPU_FW_LOAD_DIRECT;
 }
 
+#define FW_VERSION_ATTR(name, mode, field)				\
+static ssize_t show_##name(struct device *dev,				\
+			  struct device_attribute *attr,		\
+			  char *buf)					\
+{									\
+	struct drm_device *ddev = dev_get_drvdata(dev);			\
+	struct amdgpu_device *adev = ddev->dev_private;			\
+									\
+	return snprintf(buf, PAGE_SIZE, "0x%08x\n", adev->field);	\
+}									\
+static DEVICE_ATTR(name, mode, show_##name, NULL)
+
+FW_VERSION_ATTR(vce_fw_version, 0444, vce.fw_version);
+FW_VERSION_ATTR(uvd_fw_version, 0444, uvd.fw_version);
+FW_VERSION_ATTR(mc_fw_version, 0444, gmc.fw_version);
+FW_VERSION_ATTR(me_fw_version, 0444, gfx.me_fw_version);
+FW_VERSION_ATTR(pfp_fw_version, 0444, gfx.pfp_fw_version);
+FW_VERSION_ATTR(ce_fw_version, 0444, gfx.ce_fw_version);
+FW_VERSION_ATTR(rlc_fw_version, 0444, gfx.rlc_fw_version);
+FW_VERSION_ATTR(rlc_srlc_fw_version, 0444, gfx.rlc_srlc_fw_version);
+FW_VERSION_ATTR(rlc_srlg_fw_version, 0444, gfx.rlc_srlg_fw_version);
+FW_VERSION_ATTR(rlc_srls_fw_version, 0444, gfx.rlc_srls_fw_version);
+FW_VERSION_ATTR(mec_fw_version, 0444, gfx.mec_fw_version);
+FW_VERSION_ATTR(mec2_fw_version, 0444, gfx.mec2_fw_version);
+FW_VERSION_ATTR(sos_fw_version, 0444, psp.sos_fw_version);
+FW_VERSION_ATTR(asd_fw_version, 0444, psp.asd_fw_version);
+FW_VERSION_ATTR(ta_ras_fw_version, 0444, psp.ta_fw_version);
+FW_VERSION_ATTR(ta_xgmi_fw_version, 0444, psp.ta_fw_version);
+FW_VERSION_ATTR(smc_fw_version, 0444, pm.fw_version);
+FW_VERSION_ATTR(sdma_fw_version, 0444, sdma.instance[0].fw_version);
+FW_VERSION_ATTR(sdma2_fw_version, 0444, sdma.instance[1].fw_version);
+FW_VERSION_ATTR(vcn_fw_version, 0444, vcn.fw_version);
+FW_VERSION_ATTR(dmcu_fw_version, 0444, dm.dmcu_fw_version);
+
+static struct attribute *fw_attrs[] = {
+	&dev_attr_vce_fw_version.attr, &dev_attr_uvd_fw_version.attr,
+	&dev_attr_mc_fw_version.attr, &dev_attr_me_fw_version.attr,
+	&dev_attr_pfp_fw_version.attr, &dev_attr_ce_fw_version.attr,
+	&dev_attr_rlc_fw_version.attr, &dev_attr_rlc_srlc_fw_version.attr,
+	&dev_attr_rlc_srlg_fw_version.attr, &dev_attr_rlc_srls_fw_version.attr,
+	&dev_attr_mec_fw_version.attr, &dev_attr_mec2_fw_version.attr,
+	&dev_attr_sos_fw_version.attr, &dev_attr_asd_fw_version.attr,
+	&dev_attr_ta_ras_fw_version.attr, &dev_attr_ta_xgmi_fw_version.attr,
+	&dev_attr_smc_fw_version.attr, &dev_attr_sdma_fw_version.attr,
+	&dev_attr_sdma2_fw_version.attr, &dev_attr_vcn_fw_version.attr,
+	&dev_attr_dmcu_fw_version.attr, NULL
+};
+
+static const struct attribute_group fw_attr_group = {
+	.name = "fw_version",
+	.attrs = fw_attrs
+};
+
+int amdgpu_ucode_sysfs_init(struct amdgpu_device *adev)
+{
+	return sysfs_create_group(&adev->dev->kobj, &fw_attr_group);
+}
+
+void amdgpu_ucode_sysfs_fini(struct amdgpu_device *adev)
+{
+	sysfs_remove_group(&adev->dev->kobj, &fw_attr_group);
+}
+
 static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev,
 				       struct amdgpu_firmware_info *ucode,
 				       uint64_t mc_addr, void *kptr)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
index 7ac25a1c7853..ec4c2ea1f05a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
@@ -291,7 +291,9 @@ bool amdgpu_ucode_hdr_version(union amdgpu_firmware_header *hdr,
 
 int amdgpu_ucode_init_bo(struct amdgpu_device *adev);
 int amdgpu_ucode_create_bo(struct amdgpu_device *adev);
+int amdgpu_ucode_sysfs_init(struct amdgpu_device *adev);
 void amdgpu_ucode_free_bo(struct amdgpu_device *adev);
+void amdgpu_ucode_sysfs_fini(struct amdgpu_device *adev);
 
 enum amdgpu_firmware_load_type
 amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
index 4e5d13e41f6a..5b2fea3b4a2c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
@@ -30,7 +30,7 @@
 
 #include <linux/firmware.h>
 #include <linux/module.h>
-#include <drm/drmP.h>
+
 #include <drm/drm.h>
 
 #include "amdgpu.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
index f7189e22f6b7..b70b3c45bb29 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
@@ -27,7 +27,7 @@
 
 #include <linux/firmware.h>
 #include <linux/module.h>
-#include <drm/drmP.h>
+
 #include <drm/drm.h>
 
 #include "amdgpu.h"
@@ -1092,7 +1092,7 @@ int amdgpu_vce_ring_test_ring(struct amdgpu_ring *ring)
 	for (i = 0; i < timeout; i++) {
 		if (amdgpu_ring_get_rptr(ring) != rptr)
 			break;
-		DRM_UDELAY(1);
+		udelay(1);
 	}
 
 	if (i >= timeout)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
index ecf6f96df2ad..9f32bf862d94 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
@@ -26,7 +26,8 @@
 
 #include <linux/firmware.h>
 #include <linux/module.h>
-#include <drm/drmP.h>
+#include <linux/pci.h>
+
 #include <drm/drm.h>
 
 #include "amdgpu.h"
@@ -212,132 +213,6 @@ int amdgpu_vcn_resume(struct amdgpu_device *adev)
 	return 0;
 }
 
-static int amdgpu_vcn_pause_dpg_mode(struct amdgpu_device *adev,
-				     struct dpg_pause_state *new_state)
-{
-	int ret_code;
-	uint32_t reg_data = 0;
-	uint32_t reg_data2 = 0;
-	struct amdgpu_ring *ring;
-
-	/* pause/unpause if state is changed */
-	if (adev->vcn.pause_state.fw_based != new_state->fw_based) {
-		DRM_DEBUG("dpg pause state changed %d:%d -> %d:%d",
-			adev->vcn.pause_state.fw_based, adev->vcn.pause_state.jpeg,
-			new_state->fw_based, new_state->jpeg);
-
-		reg_data = RREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE) &
-			(~UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK);
-
-		if (new_state->fw_based == VCN_DPG_STATE__PAUSE) {
-			ret_code = 0;
-
-			if (!(reg_data & UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK))
-				SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS,
-						   UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF,
-						   UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
-
-			if (!ret_code) {
-				/* pause DPG non-jpeg */
-				reg_data |= UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
-				WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data);
-				SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_DPG_PAUSE,
-						   UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK,
-						   UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK, ret_code);
-
-				/* Restore */
-				ring = &adev->vcn.ring_enc[0];
-				WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO, ring->gpu_addr);
-				WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
-				WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE, ring->ring_size / 4);
-				WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR, lower_32_bits(ring->wptr));
-				WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
-
-				ring = &adev->vcn.ring_enc[1];
-				WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO2, ring->gpu_addr);
-				WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
-				WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE2, ring->ring_size / 4);
-				WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr));
-				WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
-
-				ring = &adev->vcn.ring_dec;
-				WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR,
-						   RREG32_SOC15(UVD, 0, mmUVD_SCRATCH2) & 0x7FFFFFFF);
-				SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS,
-						   UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON,
-						   UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
-			}
-		} else {
-			/* unpause dpg non-jpeg, no need to wait */
-			reg_data &= ~UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
-			WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data);
-		}
-		adev->vcn.pause_state.fw_based = new_state->fw_based;
-	}
-
-	/* pause/unpause if state is changed */
-	if (adev->vcn.pause_state.jpeg != new_state->jpeg) {
-		DRM_DEBUG("dpg pause state changed %d:%d -> %d:%d",
-			adev->vcn.pause_state.fw_based, adev->vcn.pause_state.jpeg,
-			new_state->fw_based, new_state->jpeg);
-
-		reg_data = RREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE) &
-			(~UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK);
-
-		if (new_state->jpeg == VCN_DPG_STATE__PAUSE) {
-			ret_code = 0;
-
-			if (!(reg_data & UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK))
-				SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS,
-						   UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF,
-						   UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
-
-			if (!ret_code) {
-				/* Make sure JPRG Snoop is disabled before sending the pause */
-				reg_data2 = RREG32_SOC15(UVD, 0, mmUVD_POWER_STATUS);
-				reg_data2 |= UVD_POWER_STATUS__JRBC_SNOOP_DIS_MASK;
-				WREG32_SOC15(UVD, 0, mmUVD_POWER_STATUS, reg_data2);
-
-				/* pause DPG jpeg */
-				reg_data |= UVD_DPG_PAUSE__JPEG_PAUSE_DPG_REQ_MASK;
-				WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data);
-				SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_DPG_PAUSE,
-							UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK,
-							UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK, ret_code);
-
-				/* Restore */
-				ring = &adev->vcn.ring_jpeg;
-				WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_VMID, 0);
-				WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL,
-							UVD_JRBC_RB_CNTL__RB_NO_FETCH_MASK |
-							UVD_JRBC_RB_CNTL__RB_RPTR_WR_EN_MASK);
-				WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_LOW,
-							lower_32_bits(ring->gpu_addr));
-				WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_HIGH,
-							upper_32_bits(ring->gpu_addr));
-				WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_RPTR, ring->wptr);
-				WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR, ring->wptr);
-				WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL,
-							UVD_JRBC_RB_CNTL__RB_RPTR_WR_EN_MASK);
-
-				ring = &adev->vcn.ring_dec;
-				WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR,
-						   RREG32_SOC15(UVD, 0, mmUVD_SCRATCH2) & 0x7FFFFFFF);
-				SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS,
-						   UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON,
-						   UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
-			}
-		} else {
-			/* unpause dpg jpeg, no need to wait */
-			reg_data &= ~UVD_DPG_PAUSE__JPEG_PAUSE_DPG_REQ_MASK;
-			WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data);
-		}
-		adev->vcn.pause_state.jpeg = new_state->jpeg;
-	}
-
-	return 0;
-}
-
 static void amdgpu_vcn_idle_work_handler(struct work_struct *work)
 {
 	struct amdgpu_device *adev =
@@ -362,7 +237,7 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct *work)
 		else
 			new_state.jpeg = VCN_DPG_STATE__UNPAUSE;
 
-		amdgpu_vcn_pause_dpg_mode(adev, &new_state);
+		adev->vcn.pause_dpg_mode(adev, &new_state);
 	}
 
 	fences += amdgpu_fence_count_emitted(&adev->vcn.ring_jpeg);
@@ -417,7 +292,7 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring)
 		else if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_JPEG)
 			new_state.jpeg = VCN_DPG_STATE__PAUSE;
 
-		amdgpu_vcn_pause_dpg_mode(adev, &new_state);
+		adev->vcn.pause_dpg_mode(adev, &new_state);
 	}
 }
 
@@ -446,7 +321,7 @@ int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring)
 		tmp = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9));
 		if (tmp == 0xDEADBEEF)
 			break;
-		DRM_UDELAY(1);
+		udelay(1);
 	}
 
 	if (i >= adev->usec_timeout)
@@ -594,7 +469,7 @@ error:
 int amdgpu_vcn_enc_ring_test_ring(struct amdgpu_ring *ring)
 {
 	struct amdgpu_device *adev = ring->adev;
-	uint32_t rptr = amdgpu_ring_get_rptr(ring);
+	uint32_t rptr;
 	unsigned i;
 	int r;
 
@@ -602,13 +477,15 @@ int amdgpu_vcn_enc_ring_test_ring(struct amdgpu_ring *ring)
 	if (r)
 		return r;
 
+	rptr = amdgpu_ring_get_rptr(ring);
+
 	amdgpu_ring_write(ring, VCN_ENC_CMD_END);
 	amdgpu_ring_commit(ring);
 
 	for (i = 0; i < adev->usec_timeout; i++) {
 		if (amdgpu_ring_get_rptr(ring) != rptr)
 			break;
-		DRM_UDELAY(1);
+		udelay(1);
 	}
 
 	if (i >= adev->usec_timeout)
@@ -767,7 +644,7 @@ int amdgpu_vcn_jpeg_ring_test_ring(struct amdgpu_ring *ring)
 		tmp = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9));
 		if (tmp == 0xDEADBEEF)
 			break;
-		DRM_UDELAY(1);
+		udelay(1);
 	}
 
 	if (i >= adev->usec_timeout)
@@ -841,7 +718,7 @@ int amdgpu_vcn_jpeg_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 		tmp = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9));
 		if (tmp == 0xDEADBEEF)
 			break;
-		DRM_UDELAY(1);
+		udelay(1);
 	}
 
 	if (i >= adev->usec_timeout)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
index a0ad19af9080..a1ee19251aae 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
@@ -45,6 +45,27 @@
 #define VCN_ENC_CMD_REG_WRITE		0x0000000b
 #define VCN_ENC_CMD_REG_WAIT		0x0000000c
 
+#define RREG32_SOC15_DPG_MODE(ip, inst, reg, mask, sram_sel) 				\
+	({	WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_MASK, mask); 			\
+		WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_CTL, 				\
+			UVD_DPG_LMA_CTL__MASK_EN_MASK | 				\
+			((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) 	\
+			<< UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT) | 			\
+			(sram_sel << UVD_DPG_LMA_CTL__SRAM_SEL__SHIFT)); 		\
+		RREG32_SOC15(ip, inst, mmUVD_DPG_LMA_DATA); 				\
+	})
+
+#define WREG32_SOC15_DPG_MODE(ip, inst, reg, value, mask, sram_sel) 			\
+	do { 										\
+		WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_DATA, value); 			\
+		WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_MASK, mask); 			\
+		WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_CTL, 				\
+			UVD_DPG_LMA_CTL__READ_WRITE_MASK | 				\
+			((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) 	\
+			<< UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT) | 			\
+			(sram_sel << UVD_DPG_LMA_CTL__SRAM_SEL__SHIFT)); 		\
+	} while (0)
+
 enum engine_status_constants {
 	UVD_PGFSM_STATUS__UVDM_UVDU_PWR_ON = 0x2AAAA0,
 	UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON = 0x00000002,
@@ -81,6 +102,8 @@ struct amdgpu_vcn {
 	unsigned		num_enc_rings;
 	enum amd_powergating_state cur_state;
 	struct dpg_pause_state pause_state;
+	int (*pause_dpg_mode)(struct amdgpu_device *adev,
+		struct dpg_pause_state *new_state);
 };
 
 int amdgpu_vcn_sw_init(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index 7d484fad3909..07a7e3820b7b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -21,6 +21,10 @@
  *
  */
 
+#include <linux/module.h>
+
+#include <drm/drm_drv.h>
+
 #include "amdgpu.h"
 
 bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev)
@@ -426,3 +430,47 @@ uint32_t amdgpu_virt_get_mclk(struct amdgpu_device *adev, bool lowest)
 	return clk;
 }
 
+void amdgpu_virt_init_reg_access_mode(struct amdgpu_device *adev)
+{
+	struct amdgpu_virt *virt = &adev->virt;
+
+	if (virt->ops && virt->ops->init_reg_access_mode)
+		virt->ops->init_reg_access_mode(adev);
+}
+
+bool amdgpu_virt_support_psp_prg_ih_reg(struct amdgpu_device *adev)
+{
+	bool ret = false;
+	struct amdgpu_virt *virt = &adev->virt;
+
+	if (amdgpu_sriov_vf(adev)
+		&& (virt->reg_access_mode & AMDGPU_VIRT_REG_ACCESS_PSP_PRG_IH))
+		ret = true;
+
+	return ret;
+}
+
+bool amdgpu_virt_support_rlc_prg_reg(struct amdgpu_device *adev)
+{
+	bool ret = false;
+	struct amdgpu_virt *virt = &adev->virt;
+
+	if (amdgpu_sriov_vf(adev)
+		&& (virt->reg_access_mode & AMDGPU_VIRT_REG_ACCESS_RLC)
+		&& !(amdgpu_sriov_runtime(adev)))
+		ret = true;
+
+	return ret;
+}
+
+bool amdgpu_virt_support_skip_setting(struct amdgpu_device *adev)
+{
+	bool ret = false;
+	struct amdgpu_virt *virt = &adev->virt;
+
+	if (amdgpu_sriov_vf(adev)
+		&& (virt->reg_access_mode & AMDGPU_VIRT_REG_SKIP_SEETING))
+		ret = true;
+
+	return ret;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
index 584947b7ccf3..dca25deee75c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
@@ -48,6 +48,12 @@ struct amdgpu_vf_error_buffer {
 	uint64_t data[AMDGPU_VF_ERROR_ENTRY_SIZE];
 };
 
+/* According to the fw feature, some new reg access modes are supported */
+#define AMDGPU_VIRT_REG_ACCESS_LEGACY          (1 << 0) /* directly mmio */
+#define AMDGPU_VIRT_REG_ACCESS_PSP_PRG_IH      (1 << 1) /* by PSP */
+#define AMDGPU_VIRT_REG_ACCESS_RLC             (1 << 2) /* by RLC */
+#define AMDGPU_VIRT_REG_SKIP_SEETING           (1 << 3) /* Skip setting reg */
+
 /**
  * struct amdgpu_virt_ops - amdgpu device virt operations
  */
@@ -59,6 +65,7 @@ struct amdgpu_virt_ops {
 	void (*trans_msg)(struct amdgpu_device *adev, u32 req, u32 data1, u32 data2, u32 data3);
 	int (*get_pp_clk)(struct amdgpu_device *adev, u32 type, char *buf);
 	int (*force_dpm_level)(struct amdgpu_device *adev, u32 level);
+	void (*init_reg_access_mode)(struct amdgpu_device *adev);
 };
 
 /*
@@ -258,6 +265,7 @@ struct amdgpu_virt {
 	uint32_t gim_feature;
 	/* protect DPM events to GIM */
 	struct mutex                    dpm_mutex;
+	uint32_t reg_access_mode;
 };
 
 #define amdgpu_sriov_enabled(adev) \
@@ -307,4 +315,9 @@ void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev);
 uint32_t amdgpu_virt_get_sclk(struct amdgpu_device *adev, bool lowest);
 uint32_t amdgpu_virt_get_mclk(struct amdgpu_device *adev, bool lowest);
 
+void amdgpu_virt_init_reg_access_mode(struct amdgpu_device *adev);
+bool amdgpu_virt_support_psp_prg_ih_reg(struct amdgpu_device *adev);
+bool amdgpu_virt_support_rlc_prg_reg(struct amdgpu_device *adev);
+bool amdgpu_virt_support_skip_setting(struct amdgpu_device *adev);
+
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 4f10f5aba00b..e44f9dd202e8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -28,7 +28,7 @@
 #include <linux/dma-fence-array.h>
 #include <linux/interval_tree_generic.h>
 #include <linux/idr.h>
-#include <drm/drmP.h>
+
 #include <drm/amdgpu_drm.h>
 #include "amdgpu.h"
 #include "amdgpu_trace.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
index ec9ea3fdbb4a..8abc9b6892ea 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -22,7 +22,6 @@
  * Authors: Christian König
  */
 
-#include <drm/drmP.h>
 #include "amdgpu.h"
 
 struct amdgpu_vram_mgr {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
index a48c84c51775..d11eba09eadd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
@@ -40,6 +40,34 @@ void *amdgpu_xgmi_hive_try_lock(struct amdgpu_hive_info *hive)
 	return &hive->device_list;
 }
 
+/**
+ * DOC: AMDGPU XGMI Support
+ *
+ * XGMI is a high speed interconnect that joins multiple GPU cards
+ * into a homogeneous memory space that is organized by a collective
+ * hive ID and individual node IDs, both of which are 64-bit numbers.
+ *
+ * The file xgmi_device_id contains the unique per GPU device ID and
+ * is stored in the /sys/class/drm/card${cardno}/device/ directory.
+ *
+ * Inside the device directory a sub-directory 'xgmi_hive_info' is
+ * created which contains the hive ID and the list of nodes.
+ *
+ * The hive ID is stored in:
+ *   /sys/class/drm/card${cardno}/device/xgmi_hive_info/xgmi_hive_id
+ *
+ * The node information is stored in numbered directories:
+ *   /sys/class/drm/card${cardno}/device/xgmi_hive_info/node${nodeno}/xgmi_device_id
+ *
+ * Each device has their own xgmi_hive_info direction with a mirror
+ * set of node sub-directories.
+ *
+ * The XGMI memory space is built by contiguously adding the power of
+ * two padded VRAM space from each node to each other.
+ *
+ */
+
+
 static ssize_t amdgpu_xgmi_show_hive_id(struct device *dev,
 		struct device_attribute *attr, char *buf)
 {
@@ -238,7 +266,7 @@ int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_dev
 	/* Each psp need to set the latest topology */
 	ret = psp_xgmi_set_topology_info(&adev->psp,
 					 hive->number_devices,
-					 &hive->topology_info);
+					 &adev->psp.xgmi_context.top_info);
 	if (ret)
 		dev_err(adev->dev,
 			"XGMI: Set topology failure on device %llx, hive %llx, ret %d",
@@ -248,9 +276,22 @@ int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_dev
 	return ret;
 }
 
+
+int amdgpu_xgmi_get_hops_count(struct amdgpu_device *adev,
+		struct amdgpu_device *peer_adev)
+{
+	struct psp_xgmi_topology_info *top = &adev->psp.xgmi_context.top_info;
+	int i;
+
+	for (i = 0 ; i < top->num_nodes; ++i)
+		if (top->nodes[i].node_id == peer_adev->gmc.xgmi.node_id)
+			return top->nodes[i].num_hops;
+	return	-EINVAL;
+}
+
 int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
 {
-	struct psp_xgmi_topology_info *hive_topology;
+	struct psp_xgmi_topology_info *top_info;
 	struct amdgpu_hive_info *hive;
 	struct amdgpu_xgmi	*entry;
 	struct amdgpu_device *tmp_adev = NULL;
@@ -283,35 +324,46 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
 		goto exit;
 	}
 
-	hive_topology = &hive->topology_info;
+	top_info = &adev->psp.xgmi_context.top_info;
 
 	list_add_tail(&adev->gmc.xgmi.head, &hive->device_list);
 	list_for_each_entry(entry, &hive->device_list, head)
-		hive_topology->nodes[count++].node_id = entry->node_id;
+		top_info->nodes[count++].node_id = entry->node_id;
+	top_info->num_nodes = count;
 	hive->number_devices = count;
 
-	/* Each psp need to get the latest topology */
 	list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
-		ret = psp_xgmi_get_topology_info(&tmp_adev->psp, count, hive_topology);
+		/* update node list for other device in the hive */
+		if (tmp_adev != adev) {
+			top_info = &tmp_adev->psp.xgmi_context.top_info;
+			top_info->nodes[count - 1].node_id = adev->gmc.xgmi.node_id;
+			top_info->num_nodes = count;
+		}
+		ret = amdgpu_xgmi_update_topology(hive, tmp_adev);
+		if (ret)
+			goto exit;
+	}
+
+	/* get latest topology info for each device from psp */
+	list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
+		ret = psp_xgmi_get_topology_info(&tmp_adev->psp, count,
+				&tmp_adev->psp.xgmi_context.top_info);
 		if (ret) {
 			dev_err(tmp_adev->dev,
 				"XGMI: Get topology failure on device %llx, hive %llx, ret %d",
 				tmp_adev->gmc.xgmi.node_id,
 				tmp_adev->gmc.xgmi.hive_id, ret);
 			/* To do : continue with some node failed or disable the whole hive */
-			break;
+			goto exit;
 		}
 	}
 
-	list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
-		ret = amdgpu_xgmi_update_topology(hive, tmp_adev);
-		if (ret)
-			break;
-	}
-
 	if (!ret)
 		ret = amdgpu_xgmi_sysfs_add_dev_info(adev, hive);
 
+
+	mutex_unlock(&hive->hive_lock);
+exit:
 	if (!ret)
 		dev_info(adev->dev, "XGMI: Add node %d, hive 0x%llx.\n",
 			 adev->gmc.xgmi.physical_node_id, adev->gmc.xgmi.hive_id);
@@ -320,9 +372,6 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
 			adev->gmc.xgmi.physical_node_id, adev->gmc.xgmi.hive_id,
 			ret);
 
-
-	mutex_unlock(&hive->hive_lock);
-exit:
 	return ret;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
index 3e9c91e9a4bf..fbcee31788c4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
@@ -27,7 +27,6 @@
 struct amdgpu_hive_info {
 	uint64_t		hive_id;
 	struct list_head	device_list;
-	struct psp_xgmi_topology_info	topology_info;
 	int number_devices;
 	struct mutex hive_lock, reset_lock;
 	struct kobject *kobj;
@@ -41,6 +40,8 @@ int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_dev
 int amdgpu_xgmi_add_device(struct amdgpu_device *adev);
 void amdgpu_xgmi_remove_device(struct amdgpu_device *adev);
 int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate);
+int amdgpu_xgmi_get_hops_count(struct amdgpu_device *adev,
+		struct amdgpu_device *peer_adev);
 
 static inline bool amdgpu_xgmi_same_hive(struct amdgpu_device *adev,
 		struct amdgpu_device *bo_adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/atom.h b/drivers/gpu/drm/amd/amdgpu/atom.h
index a39170991afe..4205bbe5d8d7 100644
--- a/drivers/gpu/drm/amd/amdgpu/atom.h
+++ b/drivers/gpu/drm/amd/amdgpu/atom.h
@@ -26,7 +26,8 @@
 #define ATOM_H
 
 #include <linux/types.h>
-#include <drm/drmP.h>
+
+struct drm_device;
 
 #define ATOM_BIOS_MAGIC		0xAA55
 #define ATOM_ATI_MAGIC_PTR	0x30
diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_crtc.c b/drivers/gpu/drm/amd/amdgpu/atombios_crtc.c
index 8a0818b23ea4..213e62a28ba0 100644
--- a/drivers/gpu/drm/amd/amdgpu/atombios_crtc.c
+++ b/drivers/gpu/drm/amd/amdgpu/atombios_crtc.c
@@ -23,7 +23,7 @@
  * Authors: Dave Airlie
  *          Alex Deucher
  */
-#include <drm/drmP.h>
+
 #include <drm/drm_crtc_helper.h>
 #include <drm/amdgpu_drm.h>
 #include <drm/drm_fixed.h>
diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_dp.c b/drivers/gpu/drm/amd/amdgpu/atombios_dp.c
index f81068ba4cc6..6858cde9fc5d 100644
--- a/drivers/gpu/drm/amd/amdgpu/atombios_dp.c
+++ b/drivers/gpu/drm/amd/amdgpu/atombios_dp.c
@@ -24,7 +24,7 @@
  *          Alex Deucher
  *          Jerome Glisse
  */
-#include <drm/drmP.h>
+
 #include <drm/amdgpu_drm.h>
 #include "amdgpu.h"
 
diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c
index 60e2447e12c5..1e94a9b652f7 100644
--- a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c
+++ b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c
@@ -23,7 +23,9 @@
  * Authors: Dave Airlie
  *          Alex Deucher
  */
-#include <drm/drmP.h>
+
+#include <linux/pci.h>
+
 #include <drm/drm_crtc_helper.h>
 #include <drm/amdgpu_drm.h>
 #include "amdgpu.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_i2c.c b/drivers/gpu/drm/amd/amdgpu/atombios_i2c.c
index f9b2ce9a98f3..980c363b1a0a 100644
--- a/drivers/gpu/drm/amd/amdgpu/atombios_i2c.c
+++ b/drivers/gpu/drm/amd/amdgpu/atombios_i2c.c
@@ -22,7 +22,7 @@
  * Authors: Alex Deucher
  *
  */
-#include <drm/drmP.h>
+
 #include <drm/amdgpu_drm.h>
 #include "amdgpu.h"
 #include "atom.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c
index 07c1f239e9c3..1ffbc0d3d7a1 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik.c
@@ -24,7 +24,8 @@
 #include <linux/firmware.h>
 #include <linux/slab.h>
 #include <linux/module.h>
-#include <drm/drmP.h>
+#include <linux/pci.h>
+
 #include "amdgpu.h"
 #include "amdgpu_atombios.h"
 #include "amdgpu_ih.h"
@@ -1804,6 +1805,18 @@ static bool cik_need_reset_on_init(struct amdgpu_device *adev)
 	return false;
 }
 
+static uint64_t cik_get_pcie_replay_count(struct amdgpu_device *adev)
+{
+	uint64_t nak_r, nak_g;
+
+	/* Get the number of NAKs received and generated */
+	nak_r = RREG32_PCIE(ixPCIE_RX_NUM_NAK);
+	nak_g = RREG32_PCIE(ixPCIE_RX_NUM_NAK_GENERATED);
+
+	/* Add the total number of NAKs, i.e the number of replays */
+	return (nak_r + nak_g);
+}
+
 static const struct amdgpu_asic_funcs cik_asic_funcs =
 {
 	.read_disabled_bios = &cik_read_disabled_bios,
@@ -1821,6 +1834,7 @@ static const struct amdgpu_asic_funcs cik_asic_funcs =
 	.init_doorbell_index = &legacy_doorbell_index_init,
 	.get_pcie_usage = &cik_get_pcie_usage,
 	.need_reset_on_init = &cik_need_reset_on_init,
+	.get_pcie_replay_count = &cik_get_pcie_replay_count,
 };
 
 static int cik_common_early_init(void *handle)
diff --git a/drivers/gpu/drm/amd/amdgpu/cik_ih.c b/drivers/gpu/drm/amd/amdgpu/cik_ih.c
index 721c757156e8..401c99f0b2d0 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik_ih.c
@@ -20,7 +20,9 @@
  * OTHER DEALINGS IN THE SOFTWARE.
  *
  */
-#include <drm/drmP.h>
+
+#include <linux/pci.h>
+
 #include "amdgpu.h"
 #include "amdgpu_ih.h"
 #include "cikd.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
index d42808b05971..c45304f1047c 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
@@ -21,8 +21,10 @@
  *
  * Authors: Alex Deucher
  */
+
 #include <linux/firmware.h>
-#include <drm/drmP.h>
+#include <linux/module.h>
+
 #include "amdgpu.h"
 #include "amdgpu_ucode.h"
 #include "amdgpu_trace.h"
@@ -640,7 +642,7 @@ static int cik_sdma_ring_test_ring(struct amdgpu_ring *ring)
 		tmp = le32_to_cpu(adev->wb.wb[index]);
 		if (tmp == 0xDEADBEEF)
 			break;
-		DRM_UDELAY(1);
+		udelay(1);
 	}
 
 	if (i >= adev->usec_timeout)
diff --git a/drivers/gpu/drm/amd/amdgpu/cz_ih.c b/drivers/gpu/drm/amd/amdgpu/cz_ih.c
index 61024b9c7a4b..1dca0cabc326 100644
--- a/drivers/gpu/drm/amd/amdgpu/cz_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/cz_ih.c
@@ -20,7 +20,9 @@
  * OTHER DEALINGS IN THE SOFTWARE.
  *
  */
-#include <drm/drmP.h>
+
+#include <linux/pci.h>
+
 #include "amdgpu.h"
 #include "amdgpu_ih.h"
 #include "vid.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
index 1f0426d2fc2a..1ffd1963e765 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
@@ -20,7 +20,10 @@
  * OTHER DEALINGS IN THE SOFTWARE.
  *
  */
-#include <drm/drmP.h>
+
+#include <drm/drm_fourcc.h>
+#include <drm/drm_vblank.h>
+
 #include "amdgpu.h"
 #include "amdgpu_pm.h"
 #include "amdgpu_i2c.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
index 2280b971d758..9e0782b54066 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
@@ -20,7 +20,10 @@
  * OTHER DEALINGS IN THE SOFTWARE.
  *
  */
-#include <drm/drmP.h>
+
+#include <drm/drm_fourcc.h>
+#include <drm/drm_vblank.h>
+
 #include "amdgpu.h"
 #include "amdgpu_pm.h"
 #include "amdgpu_i2c.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
index bea32f076b91..4bf453e07dca 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
@@ -20,7 +20,12 @@
  * OTHER DEALINGS IN THE SOFTWARE.
  *
  */
-#include <drm/drmP.h>
+
+#include <linux/pci.h>
+
+#include <drm/drm_fourcc.h>
+#include <drm/drm_vblank.h>
+
 #include "amdgpu.h"
 #include "amdgpu_pm.h"
 #include "amdgpu_i2c.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
index 13da915991dd..b23418ca8f6a 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
@@ -20,7 +20,10 @@
  * OTHER DEALINGS IN THE SOFTWARE.
  *
  */
-#include <drm/drmP.h>
+
+#include <drm/drm_fourcc.h>
+#include <drm/drm_vblank.h>
+
 #include "amdgpu.h"
 #include "amdgpu_pm.h"
 #include "amdgpu_i2c.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c
index e4cc1d48eaab..3026298da7eb 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c
@@ -20,7 +20,9 @@
  * OTHER DEALINGS IN THE SOFTWARE.
  *
  */
-#include <drm/drmP.h>
+
+#include <drm/drm_vblank.h>
+
 #include "amdgpu.h"
 #include "amdgpu_pm.h"
 #include "amdgpu_i2c.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c
index d5ebe566809b..8c09bf994acd 100644
--- a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c
+++ b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c
@@ -105,6 +105,431 @@ static void df_v3_6_get_clockgating_state(struct amdgpu_device *adev,
 		*flags |= AMD_CG_SUPPORT_DF_MGCG;
 }
 
+/* hold counter assignment per gpu struct */
+struct df_v3_6_event_mask {
+		struct amdgpu_device gpu;
+		uint64_t config_assign_mask[AMDGPU_DF_MAX_COUNTERS];
+};
+
+/* get assigned df perfmon ctr as int */
+static void df_v3_6_pmc_config_2_cntr(struct amdgpu_device *adev,
+				      uint64_t config,
+				      int *counter)
+{
+	struct df_v3_6_event_mask *mask;
+	int i;
+
+	mask = container_of(adev, struct df_v3_6_event_mask, gpu);
+
+	for (i = 0; i < AMDGPU_DF_MAX_COUNTERS; i++) {
+		if ((config & 0x0FFFFFFUL) == mask->config_assign_mask[i]) {
+			*counter = i;
+			return;
+		}
+	}
+}
+
+/* get address based on counter assignment */
+static void df_v3_6_pmc_get_addr(struct amdgpu_device *adev,
+				 uint64_t config,
+				 int is_ctrl,
+				 uint32_t *lo_base_addr,
+				 uint32_t *hi_base_addr)
+{
+
+	int target_cntr = -1;
+
+	df_v3_6_pmc_config_2_cntr(adev, config, &target_cntr);
+
+	if (target_cntr < 0)
+		return;
+
+	switch (target_cntr) {
+
+	case 0:
+		*lo_base_addr = is_ctrl ? smnPerfMonCtlLo0 : smnPerfMonCtrLo0;
+		*hi_base_addr = is_ctrl ? smnPerfMonCtlHi0 : smnPerfMonCtrHi0;
+		break;
+	case 1:
+		*lo_base_addr = is_ctrl ? smnPerfMonCtlLo1 : smnPerfMonCtrLo1;
+		*hi_base_addr = is_ctrl ? smnPerfMonCtlHi1 : smnPerfMonCtrHi1;
+		break;
+	case 2:
+		*lo_base_addr = is_ctrl ? smnPerfMonCtlLo2 : smnPerfMonCtrLo2;
+		*hi_base_addr = is_ctrl ? smnPerfMonCtlHi2 : smnPerfMonCtrHi2;
+		break;
+	case 3:
+		*lo_base_addr = is_ctrl ? smnPerfMonCtlLo3 : smnPerfMonCtrLo3;
+		*hi_base_addr = is_ctrl ? smnPerfMonCtlHi3 : smnPerfMonCtrHi3;
+		break;
+
+	}
+
+}
+
+/* get read counter address */
+static void df_v3_6_pmc_get_read_settings(struct amdgpu_device *adev,
+					  uint64_t config,
+					  uint32_t *lo_base_addr,
+					  uint32_t *hi_base_addr)
+{
+	df_v3_6_pmc_get_addr(adev, config, 0, lo_base_addr, hi_base_addr);
+}
+
+/* get control counter settings i.e. address and values to set */
+static void df_v3_6_pmc_get_ctrl_settings(struct amdgpu_device *adev,
+					  uint64_t config,
+					  uint32_t *lo_base_addr,
+					  uint32_t *hi_base_addr,
+					  uint32_t *lo_val,
+					  uint32_t *hi_val)
+{
+
+	uint32_t eventsel, instance, unitmask;
+	uint32_t es_5_0, es_13_0, es_13_6, es_13_12, es_11_8, es_7_0;
+
+	df_v3_6_pmc_get_addr(adev, config, 1, lo_base_addr, hi_base_addr);
+
+	if (lo_val == NULL || hi_val == NULL)
+		return;
+
+	if ((*lo_base_addr == 0) || (*hi_base_addr == 0)) {
+		DRM_ERROR("DF PMC addressing not retrieved! Lo: %x, Hi: %x",
+				*lo_base_addr, *hi_base_addr);
+		return;
+	}
+
+	eventsel = GET_EVENT(config);
+	instance = GET_INSTANCE(config);
+	unitmask = GET_UNITMASK(config);
+
+	es_5_0 = eventsel & 0x3FUL;
+	es_13_6 = instance;
+	es_13_0 = (es_13_6 << 6) + es_5_0;
+	es_13_12 = (es_13_0 & 0x03000UL) >> 12;
+	es_11_8 = (es_13_0 & 0x0F00UL) >> 8;
+	es_7_0 = es_13_0 & 0x0FFUL;
+	*lo_val = (es_7_0 & 0xFFUL) | ((unitmask & 0x0FUL) << 8);
+	*hi_val = (es_11_8 | ((es_13_12)<<(29)));
+}
+
+/* assign df performance counters for read */
+static int df_v3_6_pmc_assign_cntr(struct amdgpu_device *adev,
+				   uint64_t config,
+				   int *is_assigned)
+{
+
+	struct df_v3_6_event_mask *mask;
+	int i, target_cntr;
+
+	target_cntr = -1;
+
+	*is_assigned = 0;
+
+	df_v3_6_pmc_config_2_cntr(adev, config, &target_cntr);
+
+	if (target_cntr >= 0) {
+		*is_assigned = 1;
+		return 0;
+	}
+
+	mask = container_of(adev, struct df_v3_6_event_mask, gpu);
+
+	for (i = 0; i < AMDGPU_DF_MAX_COUNTERS; i++) {
+		if (mask->config_assign_mask[i] == 0ULL) {
+			mask->config_assign_mask[i] = config & 0x0FFFFFFUL;
+			return 0;
+		}
+	}
+
+	return -ENOSPC;
+}
+
+/* release performance counter */
+static void df_v3_6_pmc_release_cntr(struct amdgpu_device *adev,
+				     uint64_t config)
+{
+
+	struct df_v3_6_event_mask *mask;
+	int target_cntr;
+
+	target_cntr = -1;
+
+	df_v3_6_pmc_config_2_cntr(adev, config, &target_cntr);
+
+	mask = container_of(adev, struct df_v3_6_event_mask, gpu);
+
+	if (target_cntr >= 0)
+		mask->config_assign_mask[target_cntr] = 0ULL;
+
+}
+
+/*
+ * get xgmi link counters via programmable data fabric (df) counters (max 4)
+ * using cake tx event.
+ *
+ * @adev -> amdgpu device
+ * @instance-> currently cake has 2 links to poll on vega20
+ * @count -> counters to pass
+ *
+ */
+
+static void df_v3_6_get_xgmi_link_cntr(struct amdgpu_device *adev,
+				       int instance,
+				       uint64_t *count)
+{
+	uint32_t lo_base_addr, hi_base_addr, lo_val, hi_val;
+	uint64_t config;
+
+	config = GET_INSTANCE_CONFIG(instance);
+
+	df_v3_6_pmc_get_read_settings(adev, config, &lo_base_addr,
+				      &hi_base_addr);
+
+	if ((lo_base_addr == 0) || (hi_base_addr == 0))
+		return;
+
+	lo_val = RREG32_PCIE(lo_base_addr);
+	hi_val = RREG32_PCIE(hi_base_addr);
+
+	*count  = ((hi_val | 0ULL) << 32) | (lo_val | 0ULL);
+}
+
+/*
+ * reset xgmi link counters
+ *
+ * @adev -> amdgpu device
+ * @instance-> currently cake has 2 links to poll on vega20
+ *
+ */
+static void df_v3_6_reset_xgmi_link_cntr(struct amdgpu_device *adev,
+					 int instance)
+{
+	uint32_t lo_base_addr, hi_base_addr;
+	uint64_t config;
+
+	config = 0ULL | (0x7ULL) | ((0x46ULL + instance) << 8) | (0x2 << 16);
+
+	df_v3_6_pmc_get_read_settings(adev, config, &lo_base_addr,
+				      &hi_base_addr);
+
+	if ((lo_base_addr == 0) || (hi_base_addr == 0))
+		return;
+
+	WREG32_PCIE(lo_base_addr, 0UL);
+	WREG32_PCIE(hi_base_addr, 0UL);
+}
+
+/*
+ * add xgmi link counters
+ *
+ * @adev -> amdgpu device
+ * @instance-> currently cake has 2 links to poll on vega20
+ *
+ */
+
+static int df_v3_6_add_xgmi_link_cntr(struct amdgpu_device *adev,
+				      int instance)
+{
+	uint32_t lo_base_addr, hi_base_addr, lo_val, hi_val;
+	uint64_t config;
+	int ret, is_assigned;
+
+	if (instance < 0 || instance > 1)
+		return -EINVAL;
+
+	config = GET_INSTANCE_CONFIG(instance);
+
+	ret = df_v3_6_pmc_assign_cntr(adev, config, &is_assigned);
+
+	if (ret || is_assigned)
+		return ret;
+
+	df_v3_6_pmc_get_ctrl_settings(adev,
+			config,
+			&lo_base_addr,
+			&hi_base_addr,
+			&lo_val,
+			&hi_val);
+
+	WREG32_PCIE(lo_base_addr, lo_val);
+	WREG32_PCIE(hi_base_addr, hi_val);
+
+	return ret;
+}
+
+
+/*
+ * start xgmi link counters
+ *
+ * @adev -> amdgpu device
+ * @instance-> currently cake has 2 links to poll on vega20
+ * @is_enable -> either resume or assign event via df perfmon
+ *
+ */
+
+static int df_v3_6_start_xgmi_link_cntr(struct amdgpu_device *adev,
+					int instance,
+					int is_enable)
+{
+	uint32_t lo_base_addr, hi_base_addr, lo_val;
+	uint64_t config;
+	int ret;
+
+	if (instance < 0 || instance > 1)
+		return -EINVAL;
+
+	if (is_enable) {
+
+		ret = df_v3_6_add_xgmi_link_cntr(adev, instance);
+
+		if (ret)
+			return ret;
+
+	} else {
+
+		config = GET_INSTANCE_CONFIG(instance);
+
+		df_v3_6_pmc_get_ctrl_settings(adev,
+				config,
+				&lo_base_addr,
+				&hi_base_addr,
+				NULL,
+				NULL);
+
+		if (lo_base_addr == 0)
+			return -EINVAL;
+
+		lo_val = RREG32_PCIE(lo_base_addr);
+
+		WREG32_PCIE(lo_base_addr, lo_val | (1ULL << 22));
+
+		ret = 0;
+	}
+
+	return ret;
+
+}
+
+/*
+ * start xgmi link counters
+ *
+ * @adev -> amdgpu device
+ * @instance-> currently cake has 2 links to poll on vega20
+ * @is_enable -> either pause or unassign event via df perfmon
+ *
+ */
+
+static int df_v3_6_stop_xgmi_link_cntr(struct amdgpu_device *adev,
+				       int instance,
+				       int is_disable)
+{
+
+	uint32_t lo_base_addr, hi_base_addr, lo_val;
+	uint64_t config;
+
+	config = GET_INSTANCE_CONFIG(instance);
+
+	if (is_disable) {
+		df_v3_6_reset_xgmi_link_cntr(adev, instance);
+		df_v3_6_pmc_release_cntr(adev, config);
+	} else {
+
+		df_v3_6_pmc_get_ctrl_settings(adev,
+				config,
+				&lo_base_addr,
+				&hi_base_addr,
+				NULL,
+				NULL);
+
+		if ((lo_base_addr == 0) || (hi_base_addr == 0))
+			return -EINVAL;
+
+		lo_val = RREG32_PCIE(lo_base_addr);
+
+		WREG32_PCIE(lo_base_addr, lo_val & ~(1ULL << 22));
+	}
+
+	return 0;
+}
+
+static int df_v3_6_pmc_start(struct amdgpu_device *adev, uint64_t config,
+			     int is_enable)
+{
+	int xgmi_tx_link, ret = 0;
+
+	switch (adev->asic_type) {
+	case CHIP_VEGA20:
+		xgmi_tx_link = IS_DF_XGMI_0_TX(config) ? 0
+					: (IS_DF_XGMI_1_TX(config) ? 1 : -1);
+
+		if (xgmi_tx_link >= 0)
+			ret = df_v3_6_start_xgmi_link_cntr(adev, xgmi_tx_link,
+						      is_enable);
+
+		if (ret)
+			return ret;
+
+		ret = 0;
+		break;
+	default:
+		break;
+	}
+
+	return ret;
+}
+
+static int df_v3_6_pmc_stop(struct amdgpu_device *adev, uint64_t config,
+			    int is_disable)
+{
+	int xgmi_tx_link, ret = 0;
+
+	switch (adev->asic_type) {
+	case CHIP_VEGA20:
+			xgmi_tx_link = IS_DF_XGMI_0_TX(config) ? 0
+				: (IS_DF_XGMI_1_TX(config) ? 1 : -1);
+
+			if (xgmi_tx_link >= 0) {
+				ret = df_v3_6_stop_xgmi_link_cntr(adev,
+								  xgmi_tx_link,
+								  is_disable);
+				if (ret)
+					return ret;
+			}
+
+			ret = 0;
+			break;
+	default:
+		break;
+	}
+
+	return ret;
+}
+
+static void df_v3_6_pmc_get_count(struct amdgpu_device *adev,
+				  uint64_t config,
+				  uint64_t *count)
+{
+
+	int xgmi_tx_link;
+
+	switch (adev->asic_type) {
+	case CHIP_VEGA20:
+		xgmi_tx_link = IS_DF_XGMI_0_TX(config) ? 0
+					: (IS_DF_XGMI_1_TX(config) ? 1 : -1);
+
+		if (xgmi_tx_link >= 0) {
+			df_v3_6_reset_xgmi_link_cntr(adev, xgmi_tx_link);
+			df_v3_6_get_xgmi_link_cntr(adev, xgmi_tx_link, count);
+		}
+
+		break;
+	default:
+		break;
+	}
+
+}
+
 const struct amdgpu_df_funcs df_v3_6_funcs = {
 	.init = df_v3_6_init,
 	.enable_broadcast_mode = df_v3_6_enable_broadcast_mode,
@@ -113,4 +538,7 @@ const struct amdgpu_df_funcs df_v3_6_funcs = {
 	.update_medium_grain_clock_gating =
 			df_v3_6_update_medium_grain_clock_gating,
 	.get_clockgating_state = df_v3_6_get_clockgating_state,
+	.pmc_start = df_v3_6_pmc_start,
+	.pmc_stop = df_v3_6_pmc_stop,
+	.pmc_get_count = df_v3_6_pmc_get_count
 };
diff --git a/drivers/gpu/drm/amd/amdgpu/df_v3_6.h b/drivers/gpu/drm/amd/amdgpu/df_v3_6.h
index e79c58e5efcb..fcffd807764d 100644
--- a/drivers/gpu/drm/amd/amdgpu/df_v3_6.h
+++ b/drivers/gpu/drm/amd/amdgpu/df_v3_6.h
@@ -35,6 +35,23 @@ enum DF_V3_6_MGCG {
 	DF_V3_6_MGCG_ENABLE_63_CYCLE_DELAY = 15
 };
 
+/* Defined in global_features.h as FTI_PERFMON_VISIBLE */
+#define AMDGPU_DF_MAX_COUNTERS		4
+
+/* get flags from df perfmon config */
+#define GET_EVENT(x)			(x & 0xFFUL)
+#define GET_INSTANCE(x)			((x >> 8) & 0xFFUL)
+#define GET_UNITMASK(x)			((x >> 16) & 0xFFUL)
+#define GET_INSTANCE_CONFIG(x)		(0ULL | (0x07ULL) \
+					| ((0x046ULL + x) << 8) \
+					| (0x02 << 16))
+
+/* df event conf macros */
+#define IS_DF_XGMI_0_TX(x) (GET_EVENT(x) == 0x7 \
+		&& GET_INSTANCE(x) == 0x46 && GET_UNITMASK(x) == 0x2)
+#define IS_DF_XGMI_1_TX(x) (GET_EVENT(x) == 0x7 \
+		&& GET_INSTANCE(x) == 0x47 && GET_UNITMASK(x) == 0x2)
+
 extern const struct amdgpu_df_funcs df_v3_6_funcs;
 
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
index c0cb244f58cd..91f10995249b 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
@@ -21,6 +21,8 @@
  *
  */
 #include <linux/firmware.h>
+#include <linux/module.h>
+
 #include "amdgpu.h"
 #include "amdgpu_ih.h"
 #include "amdgpu_gfx.h"
@@ -1812,7 +1814,7 @@ static int gfx_v6_0_ring_test_ring(struct amdgpu_ring *ring)
 		tmp = RREG32(scratch);
 		if (tmp == 0xDEADBEEF)
 			break;
-		DRM_UDELAY(1);
+		udelay(1);
 	}
 
 	if (i >= adev->usec_timeout)
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index a59e0fdf5a97..003bb5769183 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -20,8 +20,10 @@
  * OTHER DEALINGS IN THE SOFTWARE.
  *
  */
+
 #include <linux/firmware.h>
-#include <drm/drmP.h>
+#include <linux/module.h>
+
 #include "amdgpu.h"
 #include "amdgpu_ih.h"
 #include "amdgpu_gfx.h"
@@ -2080,7 +2082,7 @@ static int gfx_v7_0_ring_test_ring(struct amdgpu_ring *ring)
 		tmp = RREG32(scratch);
 		if (tmp == 0xDEADBEEF)
 			break;
-		DRM_UDELAY(1);
+		udelay(1);
 	}
 	if (i >= adev->usec_timeout)
 		r = -ETIMEDOUT;
@@ -4493,12 +4495,8 @@ static int gfx_v7_0_sw_init(void *handle)
 
 static int gfx_v7_0_sw_fini(void *handle)
 {
-	int i;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
-	amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
-	amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
-	amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
+	int i;
 
 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
 		amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
@@ -5070,30 +5068,10 @@ static void gfx_v7_0_set_irq_funcs(struct amdgpu_device *adev)
 static void gfx_v7_0_set_gds_init(struct amdgpu_device *adev)
 {
 	/* init asci gds info */
-	adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
-	adev->gds.gws.total_size = 64;
-	adev->gds.oa.total_size = 16;
+	adev->gds.gds_size = RREG32(mmGDS_VMID0_SIZE);
+	adev->gds.gws_size = 64;
+	adev->gds.oa_size = 16;
 	adev->gds.gds_compute_max_wave_id = RREG32(mmGDS_COMPUTE_MAX_WAVE_ID);
-
-	if (adev->gds.mem.total_size == 64 * 1024) {
-		adev->gds.mem.gfx_partition_size = 4096;
-		adev->gds.mem.cs_partition_size = 4096;
-
-		adev->gds.gws.gfx_partition_size = 4;
-		adev->gds.gws.cs_partition_size = 4;
-
-		adev->gds.oa.gfx_partition_size = 4;
-		adev->gds.oa.cs_partition_size = 1;
-	} else {
-		adev->gds.mem.gfx_partition_size = 1024;
-		adev->gds.mem.cs_partition_size = 1024;
-
-		adev->gds.gws.gfx_partition_size = 16;
-		adev->gds.gws.cs_partition_size = 16;
-
-		adev->gds.oa.gfx_partition_size = 4;
-		adev->gds.oa.cs_partition_size = 4;
-	}
 }
 
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 02955e6e9dd9..b7a2df46dc22 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -20,9 +20,13 @@
  * OTHER DEALINGS IN THE SOFTWARE.
  *
  */
+
+#include <linux/delay.h>
 #include <linux/kernel.h>
 #include <linux/firmware.h>
-#include <drm/drmP.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+
 #include "amdgpu.h"
 #include "amdgpu_gfx.h"
 #include "vi.h"
@@ -855,7 +859,7 @@ static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
 		tmp = RREG32(scratch);
 		if (tmp == 0xDEADBEEF)
 			break;
-		DRM_UDELAY(1);
+		udelay(1);
 	}
 
 	if (i >= adev->usec_timeout)
@@ -2057,12 +2061,8 @@ static int gfx_v8_0_sw_init(void *handle)
 
 static int gfx_v8_0_sw_fini(void *handle)
 {
-	int i;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
-	amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
-	amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
-	amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
+	int i;
 
 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
 		amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
@@ -7010,30 +7010,10 @@ static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
 {
 	/* init asci gds info */
-	adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
-	adev->gds.gws.total_size = 64;
-	adev->gds.oa.total_size = 16;
+	adev->gds.gds_size = RREG32(mmGDS_VMID0_SIZE);
+	adev->gds.gws_size = 64;
+	adev->gds.oa_size = 16;
 	adev->gds.gds_compute_max_wave_id = RREG32(mmGDS_COMPUTE_MAX_WAVE_ID);
-
-	if (adev->gds.mem.total_size == 64 * 1024) {
-		adev->gds.mem.gfx_partition_size = 4096;
-		adev->gds.mem.cs_partition_size = 4096;
-
-		adev->gds.gws.gfx_partition_size = 4;
-		adev->gds.gws.cs_partition_size = 4;
-
-		adev->gds.oa.gfx_partition_size = 4;
-		adev->gds.oa.cs_partition_size = 1;
-	} else {
-		adev->gds.mem.gfx_partition_size = 1024;
-		adev->gds.mem.cs_partition_size = 1024;
-
-		adev->gds.gws.gfx_partition_size = 16;
-		adev->gds.gws.cs_partition_size = 16;
-
-		adev->gds.oa.gfx_partition_size = 4;
-		adev->gds.oa.cs_partition_size = 4;
-	}
 }
 
 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index b610e3b30d95..b4b85e550bc2 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -20,9 +20,13 @@
  * OTHER DEALINGS IN THE SOFTWARE.
  *
  */
+
+#include <linux/delay.h>
 #include <linux/kernel.h>
 #include <linux/firmware.h>
-#include <drm/drmP.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+
 #include "amdgpu.h"
 #include "amdgpu_gfx.h"
 #include "soc15.h"
@@ -35,6 +39,7 @@
 #include "vega10_enum.h"
 #include "hdp/hdp_4_0_offset.h"
 
+#include "soc15.h"
 #include "soc15_common.h"
 #include "clearstate_gfx9.h"
 #include "v9_structs.h"
@@ -309,12 +314,14 @@ static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
 {
 	switch (adev->asic_type) {
 	case CHIP_VEGA10:
-		soc15_program_register_sequence(adev,
-						 golden_settings_gc_9_0,
-						 ARRAY_SIZE(golden_settings_gc_9_0));
-		soc15_program_register_sequence(adev,
-						 golden_settings_gc_9_0_vg10,
-						 ARRAY_SIZE(golden_settings_gc_9_0_vg10));
+		if (!amdgpu_virt_support_skip_setting(adev)) {
+			soc15_program_register_sequence(adev,
+							 golden_settings_gc_9_0,
+							 ARRAY_SIZE(golden_settings_gc_9_0));
+			soc15_program_register_sequence(adev,
+							 golden_settings_gc_9_0_vg10,
+							 ARRAY_SIZE(golden_settings_gc_9_0_vg10));
+		}
 		break;
 	case CHIP_VEGA12:
 		soc15_program_register_sequence(adev,
@@ -419,7 +426,7 @@ static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
 		tmp = RREG32(scratch);
 		if (tmp == 0xDEADBEEF)
 			break;
-		DRM_UDELAY(1);
+		udelay(1);
 	}
 
 	if (i >= adev->usec_timeout)
@@ -1468,8 +1475,7 @@ static int gfx_v9_0_ngg_init(struct amdgpu_device *adev)
 
 	/* GDS reserve memory: 64 bytes alignment */
 	adev->gfx.ngg.gds_reserve_size = ALIGN(5 * 4, 0x40);
-	adev->gds.mem.total_size -= adev->gfx.ngg.gds_reserve_size;
-	adev->gds.mem.gfx_partition_size -= adev->gfx.ngg.gds_reserve_size;
+	adev->gds.gds_size -= adev->gfx.ngg.gds_reserve_size;
 	adev->gfx.ngg.gds_reserve_addr = RREG32_SOC15(GC, 0, mmGDS_VMID0_BASE);
 	adev->gfx.ngg.gds_reserve_addr += RREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE);
 
@@ -1577,7 +1583,7 @@ static int gfx_v9_0_ngg_en(struct amdgpu_device *adev)
 
 	gfx_v9_0_write_data_to_reg(ring, 0, false,
 				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE),
-			           (adev->gds.mem.total_size +
+			           (adev->gds.gds_size +
 				    adev->gfx.ngg.gds_reserve_size));
 
 	amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
@@ -1791,10 +1797,6 @@ static int gfx_v9_0_sw_fini(void *handle)
 		kfree(ras_if);
 	}
 
-	amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
-	amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
-	amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
-
 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
 		amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
@@ -1806,9 +1808,7 @@ static int gfx_v9_0_sw_fini(void *handle)
 
 	gfx_v9_0_mec_fini(adev);
 	gfx_v9_0_ngg_fini(adev);
-	amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
-				&adev->gfx.rlc.clear_state_gpu_addr,
-				(void **)&adev->gfx.rlc.cs_ptr);
+	amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
 	if (adev->asic_type == CHIP_RAVEN) {
 		amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
 				&adev->gfx.rlc.cp_table_gpu_addr,
@@ -1844,7 +1844,7 @@ static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh
 	else
 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
 
-	WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, data);
+	WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data);
 }
 
 static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev)
@@ -1912,8 +1912,8 @@ static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
 	for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
 		soc15_grbm_select(adev, 0, 0, 0, i);
 		/* CP and shaders */
-		WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
-		WREG32_SOC15(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
+		WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
+		WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
 	}
 	soc15_grbm_select(adev, 0, 0, 0, 0);
 	mutex_unlock(&adev->srbm_mutex);
@@ -1924,7 +1924,7 @@ static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
 	u32 tmp;
 	int i;
 
-	WREG32_FIELD15(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
+	WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
 
 	gfx_v9_0_tiling_mode_table_init(adev);
 
@@ -1941,17 +1941,17 @@ static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
 		if (i == 0) {
 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
-			WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, tmp);
-			WREG32_SOC15(GC, 0, mmSH_MEM_BASES, 0);
+			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
+			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0);
 		} else {
 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
-			WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, tmp);
+			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
 			tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
 				(adev->gmc.private_aperture_start >> 48));
 			tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
 				(adev->gmc.shared_aperture_start >> 48));
-			WREG32_SOC15(GC, 0, mmSH_MEM_BASES, tmp);
+			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp);
 		}
 	}
 	soc15_grbm_select(adev, 0, 0, 0, 0);
@@ -1967,7 +1967,7 @@ static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
 	 */
 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
 
-	WREG32_SOC15(GC, 0, mmPA_SC_FIFO_SIZE,
+	WREG32_SOC15_RLC(GC, 0, mmPA_SC_FIFO_SIZE,
 		   (adev->gfx.config.sc_prim_fifo_size_frontend <<
 			PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
 		   (adev->gfx.config.sc_prim_fifo_size_backend <<
@@ -2034,11 +2034,11 @@ static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
 static void gfx_v9_0_init_csb(struct amdgpu_device *adev)
 {
 	/* csib */
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI),
+	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI),
 			adev->gfx.rlc.clear_state_gpu_addr >> 32);
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO),
+	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO),
 			adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH),
+	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH),
 			adev->gfx.rlc.clear_state_size);
 }
 
@@ -2508,7 +2508,7 @@ static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
 		for (i = 0; i < adev->gfx.num_gfx_rings; i++)
 			adev->gfx.gfx_ring[i].sched.ready = false;
 	}
-	WREG32_SOC15(GC, 0, mmCP_ME_CNTL, tmp);
+	WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp);
 	udelay(50);
 }
 
@@ -2706,9 +2706,9 @@ static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
 	int i;
 
 	if (enable) {
-		WREG32_SOC15(GC, 0, mmCP_MEC_CNTL, 0);
+		WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0);
 	} else {
-		WREG32_SOC15(GC, 0, mmCP_MEC_CNTL,
+		WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL,
 			(CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
 		for (i = 0; i < adev->gfx.num_compute_rings; i++)
 			adev->gfx.compute_ring[i].sched.ready = false;
@@ -2769,9 +2769,9 @@ static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
 	tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
 	tmp &= 0xffffff00;
 	tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
-	WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
+	WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
 	tmp |= 0x80;
-	WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
+	WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
 }
 
 static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev)
@@ -2989,67 +2989,67 @@ static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
 	/* disable wptr polling */
 	WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
 
-	WREG32_SOC15(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
+	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
 	       mqd->cp_hqd_eop_base_addr_lo);
-	WREG32_SOC15(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
+	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
 	       mqd->cp_hqd_eop_base_addr_hi);
 
 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
-	WREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL,
+	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL,
 	       mqd->cp_hqd_eop_control);
 
 	/* enable doorbell? */
-	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
+	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
 	       mqd->cp_hqd_pq_doorbell_control);
 
 	/* disable the queue if it's active */
 	if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
-		WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
+		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
 		for (j = 0; j < adev->usec_timeout; j++) {
 			if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
 				break;
 			udelay(1);
 		}
-		WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
+		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
 		       mqd->cp_hqd_dequeue_request);
-		WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR,
+		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR,
 		       mqd->cp_hqd_pq_rptr);
-		WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_LO,
+		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
 		       mqd->cp_hqd_pq_wptr_lo);
-		WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_HI,
+		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
 		       mqd->cp_hqd_pq_wptr_hi);
 	}
 
 	/* set the pointer to the MQD */
-	WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR,
+	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR,
 	       mqd->cp_mqd_base_addr_lo);
-	WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR_HI,
+	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI,
 	       mqd->cp_mqd_base_addr_hi);
 
 	/* set MQD vmid to 0 */
-	WREG32_SOC15(GC, 0, mmCP_MQD_CONTROL,
+	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL,
 	       mqd->cp_mqd_control);
 
 	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
-	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE,
+	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE,
 	       mqd->cp_hqd_pq_base_lo);
-	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE_HI,
+	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI,
 	       mqd->cp_hqd_pq_base_hi);
 
 	/* set up the HQD, this is similar to CP_RB0_CNTL */
-	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL,
+	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL,
 	       mqd->cp_hqd_pq_control);
 
 	/* set the wb address whether it's enabled or not */
-	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
+	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
 				mqd->cp_hqd_pq_rptr_report_addr_lo);
-	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
+	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
 				mqd->cp_hqd_pq_rptr_report_addr_hi);
 
 	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
-	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
+	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
 	       mqd->cp_hqd_pq_wptr_poll_addr_lo);
-	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
+	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
 	       mqd->cp_hqd_pq_wptr_poll_addr_hi);
 
 	/* enable the doorbell if requested */
@@ -3060,23 +3060,23 @@ static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
 					(adev->doorbell_index.userqueue_end * 2) << 2);
 	}
 
-	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
+	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
 	       mqd->cp_hqd_pq_doorbell_control);
 
 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
-	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_LO,
+	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
 	       mqd->cp_hqd_pq_wptr_lo);
-	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_HI,
+	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
 	       mqd->cp_hqd_pq_wptr_hi);
 
 	/* set the vmid for the queue */
-	WREG32_SOC15(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid);
+	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid);
 
-	WREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE,
+	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE,
 	       mqd->cp_hqd_persistent_state);
 
 	/* activate the queue */
-	WREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE,
+	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE,
 	       mqd->cp_hqd_active);
 
 	if (ring->use_doorbell)
@@ -3093,7 +3093,7 @@ static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring)
 	/* disable the queue if it's active */
 	if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
 
-		WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
+		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
 
 		for (j = 0; j < adev->usec_timeout; j++) {
 			if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
@@ -3105,21 +3105,21 @@ static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring)
 			DRM_DEBUG("KIQ dequeue request failed.\n");
 
 			/* Manual disable if dequeue request times out */
-			WREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE, 0);
+			WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0);
 		}
 
-		WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
+		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
 		      0);
 	}
 
-	WREG32_SOC15(GC, 0, mmCP_HQD_IQ_TIMER, 0);
-	WREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL, 0);
-	WREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0);
-	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000);
-	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
-	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR, 0);
-	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0);
-	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0);
+	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0);
+	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0);
+	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0);
+	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000);
+	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
+	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0);
+	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0);
+	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0);
 
 	return 0;
 }
@@ -3539,6 +3539,241 @@ static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
 				   (1 << (oa_size + oa_base)) - (1 << oa_base));
 }
 
+static const u32 vgpr_init_compute_shader[] =
+{
+	0xb07c0000, 0xbe8000ff,
+	0x000000f8, 0xbf110800,
+	0x7e000280, 0x7e020280,
+	0x7e040280, 0x7e060280,
+	0x7e080280, 0x7e0a0280,
+	0x7e0c0280, 0x7e0e0280,
+	0x80808800, 0xbe803200,
+	0xbf84fff5, 0xbf9c0000,
+	0xd28c0001, 0x0001007f,
+	0xd28d0001, 0x0002027e,
+	0x10020288, 0xb8810904,
+	0xb7814000, 0xd1196a01,
+	0x00000301, 0xbe800087,
+	0xbefc00c1, 0xd89c4000,
+	0x00020201, 0xd89cc080,
+	0x00040401, 0x320202ff,
+	0x00000800, 0x80808100,
+	0xbf84fff8, 0x7e020280,
+	0xbf810000, 0x00000000,
+};
+
+static const u32 sgpr_init_compute_shader[] =
+{
+	0xb07c0000, 0xbe8000ff,
+	0x0000005f, 0xbee50080,
+	0xbe812c65, 0xbe822c65,
+	0xbe832c65, 0xbe842c65,
+	0xbe852c65, 0xb77c0005,
+	0x80808500, 0xbf84fff8,
+	0xbe800080, 0xbf810000,
+};
+
+static const struct soc15_reg_entry vgpr_init_regs[] = {
+   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
+   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
+   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
+   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
+   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */
+   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 },
+   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 },
+   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
+   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x100007f }, /* VGPRS=15 (256 logical VGPRs, SGPRS=1 (16 SGPRs, BULKY=1 */
+   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 },  /* 64KB LDS */
+};
+
+static const struct soc15_reg_entry sgpr_init_regs[] = {
+   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
+   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
+   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
+   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
+   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */
+   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 },
+   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 },
+   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
+   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x340 }, /* SGPRS=13 (112 GPRS) */
+   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
+};
+
+static const struct soc15_reg_entry sec_ded_counter_registers[] = {
+   { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT) },
+   { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT) },
+   { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT) },
+   { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT) },
+   { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT) },
+   { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT) },
+   { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT) },
+   { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT) },
+   { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT) },
+   { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT) },
+   { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT) },
+   { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED) },
+   { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT) },
+   { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT) },
+   { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT) },
+   { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO) },
+   { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT) },
+   { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT) },
+   { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT) },
+   { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT) },
+   { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT) },
+   { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2) },
+   { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT) },
+   { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT) },
+   { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT) },
+   { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT) },
+   { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT) },
+   { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2) },
+   { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT) },
+   { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2) },
+   { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT) },
+};
+
+static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
+{
+	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
+	struct amdgpu_ib ib;
+	struct dma_fence *f = NULL;
+	int r, i, j;
+	unsigned total_size, vgpr_offset, sgpr_offset;
+	u64 gpu_addr;
+
+	/* only support when RAS is enabled */
+	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
+		return 0;
+
+	/* bail if the compute ring is not ready */
+	if (!ring->sched.ready)
+		return 0;
+
+	total_size =
+		((ARRAY_SIZE(vgpr_init_regs) * 3) + 4 + 5 + 2) * 4;
+	total_size +=
+		((ARRAY_SIZE(sgpr_init_regs) * 3) + 4 + 5 + 2) * 4;
+	total_size = ALIGN(total_size, 256);
+	vgpr_offset = total_size;
+	total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
+	sgpr_offset = total_size;
+	total_size += sizeof(sgpr_init_compute_shader);
+
+	/* allocate an indirect buffer to put the commands in */
+	memset(&ib, 0, sizeof(ib));
+	r = amdgpu_ib_get(adev, NULL, total_size, &ib);
+	if (r) {
+		DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
+		return r;
+	}
+
+	/* load the compute shaders */
+	for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
+		ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
+
+	for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
+		ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
+
+	/* init the ib length to 0 */
+	ib.length_dw = 0;
+
+	/* VGPR */
+	/* write the register state for the compute dispatch */
+	for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i++) {
+		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
+		ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs[i])
+								- PACKET3_SET_SH_REG_START;
+		ib.ptr[ib.length_dw++] = vgpr_init_regs[i].reg_value;
+	}
+	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
+	gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
+	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
+	ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
+							- PACKET3_SET_SH_REG_START;
+	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
+	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
+
+	/* write dispatch packet */
+	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
+	ib.ptr[ib.length_dw++] = 128; /* x */
+	ib.ptr[ib.length_dw++] = 1; /* y */
+	ib.ptr[ib.length_dw++] = 1; /* z */
+	ib.ptr[ib.length_dw++] =
+		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
+
+	/* write CS partial flush packet */
+	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
+	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
+
+	/* SGPR */
+	/* write the register state for the compute dispatch */
+	for (i = 0; i < ARRAY_SIZE(sgpr_init_regs); i++) {
+		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
+		ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr_init_regs[i])
+								- PACKET3_SET_SH_REG_START;
+		ib.ptr[ib.length_dw++] = sgpr_init_regs[i].reg_value;
+	}
+	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
+	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
+	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
+	ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
+							- PACKET3_SET_SH_REG_START;
+	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
+	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
+
+	/* write dispatch packet */
+	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
+	ib.ptr[ib.length_dw++] = 128; /* x */
+	ib.ptr[ib.length_dw++] = 1; /* y */
+	ib.ptr[ib.length_dw++] = 1; /* z */
+	ib.ptr[ib.length_dw++] =
+		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
+
+	/* write CS partial flush packet */
+	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
+	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
+
+	/* shedule the ib on the ring */
+	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
+	if (r) {
+		DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
+		goto fail;
+	}
+
+	/* wait for the GPU to finish processing the IB */
+	r = dma_fence_wait(f, false);
+	if (r) {
+		DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
+		goto fail;
+	}
+
+	/* read back registers to clear the counters */
+	mutex_lock(&adev->grbm_idx_mutex);
+	for (j = 0; j < 16; j++) {
+		gfx_v9_0_select_se_sh(adev, 0x01, 0x0, j);
+		for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
+			RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i]));
+		gfx_v9_0_select_se_sh(adev, 0x02, 0x0, j);
+		for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
+			RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i]));
+		gfx_v9_0_select_se_sh(adev, 0x03, 0x0, j);
+		for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
+			RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i]));
+		gfx_v9_0_select_se_sh(adev, 0x04, 0x0, j);
+		for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
+			RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i]));
+	}
+	WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000);
+	mutex_unlock(&adev->grbm_idx_mutex);
+
+fail:
+	amdgpu_ib_free(adev, &ib, NULL);
+	dma_fence_put(f);
+
+	return r;
+}
+
 static int gfx_v9_0_early_init(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
@@ -3580,8 +3815,31 @@ static int gfx_v9_0_ecc_late_init(void *handle)
 		return 0;
 	}
 
-	if (*ras_if)
+	/* requires IBs so do in late init after IB pool is initialized */
+	r = gfx_v9_0_do_edc_gpr_workarounds(adev);
+	if (r)
+		return r;
+
+	/* handle resume path. */
+	if (*ras_if) {
+		/* resend ras TA enable cmd during resume.
+		 * prepare to handle failure.
+		 */
+		ih_info.head = **ras_if;
+		r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
+		if (r) {
+			if (r == -EAGAIN) {
+				/* request a gpu reset. will run again. */
+				amdgpu_ras_request_reset_on_boot(adev,
+						AMDGPU_RAS_BLOCK__GFX);
+				return 0;
+			}
+			/* fail to enable ras, cleanup all. */
+			goto irq;
+		}
+		/* enable successfully. continue. */
 		goto resume;
+	}
 
 	*ras_if = kmalloc(sizeof(**ras_if), GFP_KERNEL);
 	if (!*ras_if)
@@ -3590,8 +3848,14 @@ static int gfx_v9_0_ecc_late_init(void *handle)
 	**ras_if = ras_block;
 
 	r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
-	if (r)
+	if (r) {
+		if (r == -EAGAIN) {
+			amdgpu_ras_request_reset_on_boot(adev,
+					AMDGPU_RAS_BLOCK__GFX);
+			r = 0;
+		}
 		goto feature;
+	}
 
 	ih_info.head = **ras_if;
 	fs_info.head = **ras_if;
@@ -3624,7 +3888,7 @@ interrupt:
 feature:
 	kfree(*ras_if);
 	*ras_if = NULL;
-	return -EINVAL;
+	return r;
 }
 
 static int gfx_v9_0_late_init(void *handle)
@@ -4329,8 +4593,8 @@ static void gfx_v9_0_hqd_set_priority(struct amdgpu_device *adev,
 	mutex_lock(&adev->srbm_mutex);
 	soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
 
-	WREG32_SOC15(GC, 0, mmCP_HQD_PIPE_PRIORITY, pipe_priority);
-	WREG32_SOC15(GC, 0, mmCP_HQD_QUEUE_PRIORITY, queue_priority);
+	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PIPE_PRIORITY, pipe_priority);
+	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_QUEUE_PRIORITY, queue_priority);
 
 	soc15_grbm_select(adev, 0, 0, 0, 0);
 	mutex_unlock(&adev->srbm_mutex);
@@ -5066,13 +5330,13 @@ static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
 	case CHIP_VEGA10:
 	case CHIP_VEGA12:
 	case CHIP_VEGA20:
-		adev->gds.mem.total_size = 0x10000;
+		adev->gds.gds_size = 0x10000;
 		break;
 	case CHIP_RAVEN:
-		adev->gds.mem.total_size = 0x1000;
+		adev->gds.gds_size = 0x1000;
 		break;
 	default:
-		adev->gds.mem.total_size = 0x10000;
+		adev->gds.gds_size = 0x10000;
 		break;
 	}
 
@@ -5096,28 +5360,8 @@ static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
 		break;
 	}
 
-	adev->gds.gws.total_size = 64;
-	adev->gds.oa.total_size = 16;
-
-	if (adev->gds.mem.total_size == 64 * 1024) {
-		adev->gds.mem.gfx_partition_size = 4096;
-		adev->gds.mem.cs_partition_size = 4096;
-
-		adev->gds.gws.gfx_partition_size = 4;
-		adev->gds.gws.cs_partition_size = 4;
-
-		adev->gds.oa.gfx_partition_size = 4;
-		adev->gds.oa.cs_partition_size = 1;
-	} else {
-		adev->gds.mem.gfx_partition_size = 1024;
-		adev->gds.mem.cs_partition_size = 1024;
-
-		adev->gds.gws.gfx_partition_size = 16;
-		adev->gds.gws.cs_partition_size = 16;
-
-		adev->gds.oa.gfx_partition_size = 4;
-		adev->gds.oa.cs_partition_size = 4;
-	}
+	adev->gds.gws_size = 64;
+	adev->gds.oa_size = 16;
 }
 
 static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
index 7bb5359d0bbd..9f0f189fc111 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
@@ -71,12 +71,12 @@ static void gfxhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev)
 	uint64_t value;
 
 	/* Program the AGP BAR */
-	WREG32_SOC15(GC, 0, mmMC_VM_AGP_BASE, 0);
-	WREG32_SOC15(GC, 0, mmMC_VM_AGP_BOT, adev->gmc.agp_start >> 24);
-	WREG32_SOC15(GC, 0, mmMC_VM_AGP_TOP, adev->gmc.agp_end >> 24);
+	WREG32_SOC15_RLC(GC, 0, mmMC_VM_AGP_BASE, 0);
+	WREG32_SOC15_RLC(GC, 0, mmMC_VM_AGP_BOT, adev->gmc.agp_start >> 24);
+	WREG32_SOC15_RLC(GC, 0, mmMC_VM_AGP_TOP, adev->gmc.agp_end >> 24);
 
 	/* Program the system aperture low logical page number. */
-	WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR,
+	WREG32_SOC15_RLC(GC, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR,
 		     min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18);
 
 	if (adev->asic_type == CHIP_RAVEN && adev->rev_id >= 0x8)
@@ -86,11 +86,11 @@ static void gfxhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev)
 		 * workaround that increase system aperture high address (add 1)
 		 * to get rid of the VM fault and hardware hang.
 		 */
-		WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
+		WREG32_SOC15_RLC(GC, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
 			     max((adev->gmc.fb_end >> 18) + 0x1,
 				 adev->gmc.agp_end >> 18));
 	else
-		WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
+		WREG32_SOC15_RLC(GC, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
 			     max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18);
 
 	/* Set default page address. */
@@ -129,7 +129,7 @@ static void gfxhub_v1_0_init_tlb_regs(struct amdgpu_device *adev)
 			    MTYPE, MTYPE_UC);/* XXX for emulation. */
 	tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ATC_EN, 1);
 
-	WREG32_SOC15(GC, 0, mmMC_VM_MX_L1_TLB_CNTL, tmp);
+	WREG32_SOC15_RLC(GC, 0, mmMC_VM_MX_L1_TLB_CNTL, tmp);
 }
 
 static void gfxhub_v1_0_init_cache_regs(struct amdgpu_device *adev)
@@ -146,12 +146,12 @@ static void gfxhub_v1_0_init_cache_regs(struct amdgpu_device *adev)
 	tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 0);
 	tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 1);
 	tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, IDENTITY_MODE_FRAGMENT_SIZE, 0);
-	WREG32_SOC15(GC, 0, mmVM_L2_CNTL, tmp);
+	WREG32_SOC15_RLC(GC, 0, mmVM_L2_CNTL, tmp);
 
 	tmp = RREG32_SOC15(GC, 0, mmVM_L2_CNTL2);
 	tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1);
 	tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1);
-	WREG32_SOC15(GC, 0, mmVM_L2_CNTL2, tmp);
+	WREG32_SOC15_RLC(GC, 0, mmVM_L2_CNTL2, tmp);
 
 	tmp = mmVM_L2_CNTL3_DEFAULT;
 	if (adev->gmc.translate_further) {
@@ -163,12 +163,12 @@ static void gfxhub_v1_0_init_cache_regs(struct amdgpu_device *adev)
 		tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3,
 				    L2_CACHE_BIGK_FRAGMENT_SIZE, 6);
 	}
-	WREG32_SOC15(GC, 0, mmVM_L2_CNTL3, tmp);
+	WREG32_SOC15_RLC(GC, 0, mmVM_L2_CNTL3, tmp);
 
 	tmp = mmVM_L2_CNTL4_DEFAULT;
 	tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4, VMC_TAP_PDE_REQUEST_PHYSICAL, 0);
 	tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4, VMC_TAP_PTE_REQUEST_PHYSICAL, 0);
-	WREG32_SOC15(GC, 0, mmVM_L2_CNTL4, tmp);
+	WREG32_SOC15_RLC(GC, 0, mmVM_L2_CNTL4, tmp);
 }
 
 static void gfxhub_v1_0_enable_system_domain(struct amdgpu_device *adev)
@@ -267,9 +267,9 @@ int gfxhub_v1_0_gart_enable(struct amdgpu_device *adev)
 		 * VF copy registers so vbios post doesn't program them, for
 		 * SRIOV driver need to program them
 		 */
-		WREG32_SOC15(GC, 0, mmMC_VM_FB_LOCATION_BASE,
+		WREG32_SOC15_RLC(GC, 0, mmMC_VM_FB_LOCATION_BASE,
 			     adev->gmc.vram_start >> 24);
-		WREG32_SOC15(GC, 0, mmMC_VM_FB_LOCATION_TOP,
+		WREG32_SOC15_RLC(GC, 0, mmMC_VM_FB_LOCATION_TOP,
 			     adev->gmc.vram_end >> 24);
 	}
 
@@ -303,7 +303,7 @@ void gfxhub_v1_0_gart_disable(struct amdgpu_device *adev)
 				MC_VM_MX_L1_TLB_CNTL,
 				ENABLE_ADVANCED_DRIVER_MODEL,
 				0);
-	WREG32_SOC15(GC, 0, mmMC_VM_MX_L1_TLB_CNTL, tmp);
+	WREG32_SOC15_RLC(GC, 0, mmMC_VM_MX_L1_TLB_CNTL, tmp);
 
 	/* Setup L2 cache */
 	WREG32_FIELD15(GC, 0, VM_L2_CNTL, ENABLE_L2_CACHE, 0);
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
index b06d876da2d9..ca8dbe91cc8b 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
@@ -20,8 +20,11 @@
  * OTHER DEALINGS IN THE SOFTWARE.
  *
  */
+
 #include <linux/firmware.h>
-#include <drm/drmP.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+
 #include <drm/drm_cache.h>
 #include "amdgpu.h"
 #include "gmc_v6_0.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
index 75aa3332aee2..57f80065d57a 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
@@ -20,8 +20,11 @@
  * OTHER DEALINGS IN THE SOFTWARE.
  *
  */
+
 #include <linux/firmware.h>
-#include <drm/drmP.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+
 #include <drm/drm_cache.h>
 #include "amdgpu.h"
 #include "cikd.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
index 8a3b5e6fc6c9..9238280d1ff7 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
@@ -20,8 +20,11 @@
  * OTHER DEALINGS IN THE SOFTWARE.
  *
  */
+
 #include <linux/firmware.h>
-#include <drm/drmP.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+
 #include <drm/drm_cache.h>
 #include "amdgpu.h"
 #include "gmc_v8_0.h"
@@ -289,7 +292,7 @@ out:
  *
  * @adev: amdgpu_device pointer
  *
- * Load the GDDR MC ucode into the hw (CIK).
+ * Load the GDDR MC ucode into the hw (VI).
  * Returns 0 on success, error on failure.
  */
 static int gmc_v8_0_tonga_mc_load_microcode(struct amdgpu_device *adev)
@@ -443,7 +446,7 @@ static void gmc_v8_0_vram_gtt_location(struct amdgpu_device *adev,
  * @adev: amdgpu_device pointer
  *
  * Set the location of vram, gart, and AGP in the GPU's
- * physical address space (CIK).
+ * physical address space (VI).
  */
 static void gmc_v8_0_mc_program(struct amdgpu_device *adev)
 {
@@ -515,7 +518,7 @@ static void gmc_v8_0_mc_program(struct amdgpu_device *adev)
  * @adev: amdgpu_device pointer
  *
  * Look up the amount of vram, vram width, and decide how to place
- * vram and gart within the GPU's physical address space (CIK).
+ * vram and gart within the GPU's physical address space (VI).
  * Returns 0 for success.
  */
 static int gmc_v8_0_mc_init(struct amdgpu_device *adev)
@@ -630,7 +633,7 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev)
  * @adev: amdgpu_device pointer
  * @vmid: vm instance to flush
  *
- * Flush the TLB for the requested page table (CIK).
+ * Flush the TLB for the requested page table (VI).
  */
 static void gmc_v8_0_flush_gpu_tlb(struct amdgpu_device *adev,
 				uint32_t vmid, uint32_t flush_type)
@@ -800,7 +803,7 @@ static void gmc_v8_0_set_prt(struct amdgpu_device *adev, bool enable)
  * This sets up the TLBs, programs the page tables for VMID0,
  * sets up the hw for VMIDs 1-15 which are allocated on
  * demand, and sets up the global locations for the LDS, GDS,
- * and GPUVM for FSA64 clients (CIK).
+ * and GPUVM for FSA64 clients (VI).
  * Returns 0 for success, errors for failure.
  */
 static int gmc_v8_0_gart_enable(struct amdgpu_device *adev)
@@ -948,7 +951,7 @@ static int gmc_v8_0_gart_init(struct amdgpu_device *adev)
  *
  * @adev: amdgpu_device pointer
  *
- * This disables all VM page table (CIK).
+ * This disables all VM page table (VI).
  */
 static void gmc_v8_0_gart_disable(struct amdgpu_device *adev)
 {
@@ -978,7 +981,7 @@ static void gmc_v8_0_gart_disable(struct amdgpu_device *adev)
  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
  *
- * Print human readable fault information (CIK).
+ * Print human readable fault information (VI).
  */
 static void gmc_v8_0_vm_decode_fault(struct amdgpu_device *adev, u32 status,
 				     u32 addr, u32 mc_client, unsigned pasid)
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 72837b8c7031..8e3f5990e278 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -20,8 +20,12 @@
  * OTHER DEALINGS IN THE SOFTWARE.
  *
  */
+
 #include <linux/firmware.h>
+#include <linux/pci.h>
+
 #include <drm/drm_cache.h>
+
 #include "amdgpu.h"
 #include "gmc_v9_0.h"
 #include "amdgpu_atomfirmware.h"
@@ -686,8 +690,25 @@ static int gmc_v9_0_ecc_late_init(void *handle)
 		return 0;
 	}
 	/* handle resume path. */
-	if (*ras_if)
+	if (*ras_if) {
+		/* resend ras TA enable cmd during resume.
+		 * prepare to handle failure.
+		 */
+		ih_info.head = **ras_if;
+		r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
+		if (r) {
+			if (r == -EAGAIN) {
+				/* request a gpu reset. will run again. */
+				amdgpu_ras_request_reset_on_boot(adev,
+						AMDGPU_RAS_BLOCK__UMC);
+				return 0;
+			}
+			/* fail to enable ras, cleanup all. */
+			goto irq;
+		}
+		/* enable successfully. continue. */
 		goto resume;
+	}
 
 	*ras_if = kmalloc(sizeof(**ras_if), GFP_KERNEL);
 	if (!*ras_if)
@@ -696,8 +717,14 @@ static int gmc_v9_0_ecc_late_init(void *handle)
 	**ras_if = ras_block;
 
 	r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
-	if (r)
+	if (r) {
+		if (r == -EAGAIN) {
+			amdgpu_ras_request_reset_on_boot(adev,
+					AMDGPU_RAS_BLOCK__UMC);
+			r = 0;
+		}
 		goto feature;
+	}
 
 	ih_info.head = **ras_if;
 	fs_info.head = **ras_if;
@@ -730,7 +757,7 @@ interrupt:
 feature:
 	kfree(*ras_if);
 	*ras_if = NULL;
-	return -EINVAL;
+	return r;
 }
 
 
@@ -1099,6 +1126,9 @@ static void gmc_v9_0_init_golden_registers(struct amdgpu_device *adev)
 
 	switch (adev->asic_type) {
 	case CHIP_VEGA10:
+		if (amdgpu_virt_support_skip_setting(adev))
+			break;
+		/* fall through */
 	case CHIP_VEGA20:
 		soc15_program_register_sequence(adev,
 						golden_settings_mmhub_1_0_0,
@@ -1163,6 +1193,9 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device *adev)
 	tmp = RREG32_SOC15(HDP, 0, mmHDP_HOST_PATH_CNTL);
 	WREG32_SOC15(HDP, 0, mmHDP_HOST_PATH_CNTL, tmp);
 
+	WREG32_SOC15(HDP, 0, mmHDP_NONSURFACE_BASE, (adev->gmc.vram_start >> 8));
+	WREG32_SOC15(HDP, 0, mmHDP_NONSURFACE_BASE_HI, (adev->gmc.vram_start >> 40));
+
 	/* After HDP is initialized, flush HDP.*/
 	adev->nbio_funcs->hdp_flush(adev, NULL);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c
index b1626e1d2f5d..a13dd9a51149 100644
--- a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c
@@ -20,7 +20,9 @@
  * OTHER DEALINGS IN THE SOFTWARE.
  *
  */
-#include <drm/drmP.h>
+
+#include <linux/pci.h>
+
 #include "amdgpu.h"
 #include "amdgpu_ih.h"
 #include "vid.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/kv_dpm.c b/drivers/gpu/drm/amd/amdgpu/kv_dpm.c
index f2e6b148ccad..4b3faaccecb9 100644
--- a/drivers/gpu/drm/amd/amdgpu/kv_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/kv_dpm.c
@@ -21,7 +21,6 @@
  *
  */
 
-#include <drm/drmP.h>
 #include "amdgpu.h"
 #include "amdgpu_pm.h"
 #include "cikd.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/kv_smc.c b/drivers/gpu/drm/amd/amdgpu/kv_smc.c
index b82e33c01571..2d9ab6b8be66 100644
--- a/drivers/gpu/drm/amd/amdgpu/kv_smc.c
+++ b/drivers/gpu/drm/amd/amdgpu/kv_smc.c
@@ -22,7 +22,6 @@
  * Authors: Alex Deucher
  */
 
-#include <drm/drmP.h>
 #include "amdgpu.h"
 #include "cikd.h"
 #include "kv_dpm.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
index 41a9a5779623..05d1d448c8f5 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
@@ -111,6 +111,9 @@ static void mmhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev)
 		WREG32_SOC15(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
 			     max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18);
 
+	if (amdgpu_virt_support_skip_setting(adev))
+		return;
+
 	/* Set default page address. */
 	value = adev->vram_scratch.gpu_addr - adev->gmc.vram_start +
 		adev->vm_manager.vram_base_offset;
@@ -156,6 +159,9 @@ static void mmhub_v1_0_init_cache_regs(struct amdgpu_device *adev)
 {
 	uint32_t tmp;
 
+	if (amdgpu_virt_support_skip_setting(adev))
+		return;
+
 	/* Setup L2 cache */
 	tmp = RREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL);
 	tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 1);
@@ -202,6 +208,9 @@ static void mmhub_v1_0_enable_system_domain(struct amdgpu_device *adev)
 
 static void mmhub_v1_0_disable_identity_aperture(struct amdgpu_device *adev)
 {
+	if (amdgpu_virt_support_skip_setting(adev))
+		return;
+
 	WREG32_SOC15(MMHUB, 0, mmVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32,
 		     0XFFFFFFFF);
 	WREG32_SOC15(MMHUB, 0, mmVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32,
@@ -338,11 +347,13 @@ void mmhub_v1_0_gart_disable(struct amdgpu_device *adev)
 				0);
 	WREG32_SOC15(MMHUB, 0, mmMC_VM_MX_L1_TLB_CNTL, tmp);
 
-	/* Setup L2 cache */
-	tmp = RREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL);
-	tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 0);
-	WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL, tmp);
-	WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL3, 0);
+	if (!amdgpu_virt_support_skip_setting(adev)) {
+		/* Setup L2 cache */
+		tmp = RREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL);
+		tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 0);
+		WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL, tmp);
+		WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL3, 0);
+	}
 }
 
 /**
@@ -354,6 +365,10 @@ void mmhub_v1_0_gart_disable(struct amdgpu_device *adev)
 void mmhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev, bool value)
 {
 	u32 tmp;
+
+	if (amdgpu_virt_support_skip_setting(adev))
+		return;
+
 	tmp = RREG32_SOC15(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_CNTL);
 	tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL,
 			RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
index 2471e7cf75ea..31030f86be86 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
@@ -26,6 +26,7 @@
 #include "nbio/nbio_6_1_sh_mask.h"
 #include "gc/gc_9_0_offset.h"
 #include "gc/gc_9_0_sh_mask.h"
+#include "mp/mp_9_0_offset.h"
 #include "soc15.h"
 #include "vega10_ih.h"
 #include "soc15_common.h"
@@ -343,7 +344,7 @@ flr_done:
 
 	/* Trigger recovery for world switch failure if no TDR */
 	if (amdgpu_device_should_recover_gpu(adev)
-		&& amdgpu_lockup_timeout == MAX_SCHEDULE_TIMEOUT)
+		&& adev->sdma_timeout == MAX_SCHEDULE_TIMEOUT)
 		amdgpu_device_gpu_recover(adev, NULL);
 }
 
@@ -448,6 +449,23 @@ void xgpu_ai_mailbox_put_irq(struct amdgpu_device *adev)
 	amdgpu_irq_put(adev, &adev->virt.rcv_irq, 0);
 }
 
+static void xgpu_ai_init_reg_access_mode(struct amdgpu_device *adev)
+{
+	uint32_t rlc_fw_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6);
+	uint32_t sos_fw_ver = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_58);
+
+	adev->virt.reg_access_mode = AMDGPU_VIRT_REG_ACCESS_LEGACY;
+
+	if (rlc_fw_ver >= 0x5d)
+		adev->virt.reg_access_mode |= AMDGPU_VIRT_REG_ACCESS_RLC;
+
+	if (sos_fw_ver >= 0x80455)
+		adev->virt.reg_access_mode |= AMDGPU_VIRT_REG_ACCESS_PSP_PRG_IH;
+
+	if (sos_fw_ver >= 0x8045b)
+		adev->virt.reg_access_mode |= AMDGPU_VIRT_REG_SKIP_SEETING;
+}
+
 const struct amdgpu_virt_ops xgpu_ai_virt_ops = {
 	.req_full_gpu	= xgpu_ai_request_full_gpu_access,
 	.rel_full_gpu	= xgpu_ai_release_full_gpu_access,
@@ -456,4 +474,5 @@ const struct amdgpu_virt_ops xgpu_ai_virt_ops = {
 	.trans_msg = xgpu_ai_mailbox_trans_msg,
 	.get_pp_clk = xgpu_ai_get_pp_clk,
 	.force_dpm_level = xgpu_ai_force_dpm_level,
+	.init_reg_access_mode = xgpu_ai_init_reg_access_mode,
 };
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c
index 1cdb98ad2db3..73419fa38159 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c
@@ -29,9 +29,18 @@
 #include "nbio/nbio_7_0_sh_mask.h"
 #include "nbio/nbio_7_0_smn.h"
 #include "vega10_enum.h"
+#include <uapi/linux/kfd_ioctl.h>
 
 #define smnNBIF_MGCG_CTRL_LCLK	0x1013a05c
 
+static void nbio_v7_0_remap_hdp_registers(struct amdgpu_device *adev)
+{
+	WREG32_SOC15(NBIO, 0, mmREMAP_HDP_MEM_FLUSH_CNTL,
+		adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL);
+	WREG32_SOC15(NBIO, 0, mmREMAP_HDP_REG_FLUSH_CNTL,
+		adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_REG_FLUSH_CNTL);
+}
+
 static u32 nbio_v7_0_get_rev_id(struct amdgpu_device *adev)
 {
         u32 tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0);
@@ -55,10 +64,9 @@ static void nbio_v7_0_hdp_flush(struct amdgpu_device *adev,
 				struct amdgpu_ring *ring)
 {
 	if (!ring || !ring->funcs->emit_wreg)
-		WREG32_SOC15_NO_KIQ(NBIO, 0, mmHDP_MEM_COHERENCY_FLUSH_CNTL, 0);
+		WREG32_NO_KIQ((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);
 	else
-		amdgpu_ring_emit_wreg(ring, SOC15_REG_OFFSET(
-			NBIO, 0, mmHDP_MEM_COHERENCY_FLUSH_CNTL), 0);
+		amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);
 }
 
 static u32 nbio_v7_0_get_memsize(struct amdgpu_device *adev)
@@ -283,4 +291,5 @@ const struct amdgpu_nbio_funcs nbio_v7_0_funcs = {
 	.ih_control = nbio_v7_0_ih_control,
 	.init_registers = nbio_v7_0_init_registers,
 	.detect_hw_virt = nbio_v7_0_detect_hw_virt,
+	.remap_hdp_registers = nbio_v7_0_remap_hdp_registers,
 };
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
index c69d51598cfe..bfaaa327ae3c 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
@@ -27,9 +27,18 @@
 #include "nbio/nbio_7_4_offset.h"
 #include "nbio/nbio_7_4_sh_mask.h"
 #include "nbio/nbio_7_4_0_smn.h"
+#include <uapi/linux/kfd_ioctl.h>
 
 #define smnNBIF_MGCG_CTRL_LCLK	0x1013a21c
 
+static void nbio_v7_4_remap_hdp_registers(struct amdgpu_device *adev)
+{
+	WREG32_SOC15(NBIO, 0, mmREMAP_HDP_MEM_FLUSH_CNTL,
+		adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL);
+	WREG32_SOC15(NBIO, 0, mmREMAP_HDP_REG_FLUSH_CNTL,
+		adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_REG_FLUSH_CNTL);
+}
+
 static u32 nbio_v7_4_get_rev_id(struct amdgpu_device *adev)
 {
 	u32 tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0);
@@ -53,10 +62,9 @@ static void nbio_v7_4_hdp_flush(struct amdgpu_device *adev,
 				struct amdgpu_ring *ring)
 {
 	if (!ring || !ring->funcs->emit_wreg)
-		WREG32_SOC15_NO_KIQ(NBIO, 0, mmHDP_MEM_COHERENCY_FLUSH_CNTL, 0);
+		WREG32_NO_KIQ((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);
 	else
-		amdgpu_ring_emit_wreg(ring, SOC15_REG_OFFSET(
-			NBIO, 0, mmHDP_MEM_COHERENCY_FLUSH_CNTL), 0);
+		amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);
 }
 
 static u32 nbio_v7_4_get_memsize(struct amdgpu_device *adev)
@@ -262,4 +270,5 @@ const struct amdgpu_nbio_funcs nbio_v7_4_funcs = {
 	.ih_control = nbio_v7_4_ih_control,
 	.init_registers = nbio_v7_4_init_registers,
 	.detect_hw_virt = nbio_v7_4_detect_hw_virt,
+	.remap_hdp_registers = nbio_v7_4_remap_hdp_registers,
 };
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h
index 2f79765b4bdb..7f8edc66ddff 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h
+++ b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h
@@ -94,6 +94,7 @@ enum psp_gfx_cmd_id
     GFX_CMD_ID_SAVE_RESTORE = 0x00000008,   /* save/restore HW IP FW */
     GFX_CMD_ID_SETUP_VMR    = 0x00000009,   /* setup VMR region */
     GFX_CMD_ID_DESTROY_VMR  = 0x0000000A,   /* destroy VMR region */
+    GFX_CMD_ID_PROG_REG     = 0x0000000B,   /* program regs */
 };
 
 
@@ -217,6 +218,12 @@ struct psp_gfx_cmd_save_restore_ip_fw
     enum psp_gfx_fw_type    fw_type;              /* FW type */
 };
 
+/* Command to setup register program */
+struct psp_gfx_cmd_reg_prog {
+	uint32_t	reg_value;
+	uint32_t	reg_id;
+};
+
 /* All GFX ring buffer commands. */
 union psp_gfx_commands
 {
@@ -226,6 +233,7 @@ union psp_gfx_commands
     struct psp_gfx_cmd_setup_tmr        cmd_setup_tmr;
     struct psp_gfx_cmd_load_ip_fw       cmd_load_ip_fw;
     struct psp_gfx_cmd_save_restore_ip_fw cmd_save_restore_ip_fw;
+    struct psp_gfx_cmd_reg_prog       cmd_setup_reg_prog;
 };
 
 
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c
index 77c2bc344dfc..ce1ea31feee0 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c
@@ -24,6 +24,9 @@
  */
 
 #include <linux/firmware.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+
 #include "amdgpu.h"
 #include "amdgpu_psp.h"
 #include "amdgpu_ucode.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
index b91df7bd1d98..b1e7aca72578 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
@@ -21,6 +21,8 @@
  */
 
 #include <linux/firmware.h>
+#include <linux/module.h>
+
 #include "amdgpu.h"
 #include "amdgpu_psp.h"
 #include "amdgpu_ucode.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c
index 143f0fae69d5..2ea772692037 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c
@@ -24,7 +24,9 @@
  */
 
 #include <linux/firmware.h>
-#include <drm/drmP.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+
 #include "amdgpu.h"
 #include "amdgpu_psp.h"
 #include "amdgpu_ucode.h"
@@ -50,6 +52,10 @@ MODULE_FIRMWARE("amdgpu/vega12_asd.bin");
 
 static uint32_t sos_old_versions[] = {1517616, 1510592, 1448594, 1446554};
 
+static bool psp_v3_1_support_vmr_ring(struct psp_context *psp);
+static int psp_v3_1_ring_stop(struct psp_context *psp,
+			      enum psp_ring_type ring_type);
+
 static int psp_v3_1_init_microcode(struct psp_context *psp)
 {
 	struct amdgpu_device *adev = psp->adev;
@@ -296,27 +302,57 @@ static int psp_v3_1_ring_create(struct psp_context *psp,
 
 	psp_v3_1_reroute_ih(psp);
 
-	/* Write low address of the ring to C2PMSG_69 */
-	psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr);
-	WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_69, psp_ring_reg);
-	/* Write high address of the ring to C2PMSG_70 */
-	psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr);
-	WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_70, psp_ring_reg);
-	/* Write size of ring to C2PMSG_71 */
-	psp_ring_reg = ring->ring_size;
-	WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_71, psp_ring_reg);
-	/* Write the ring initialization command to C2PMSG_64 */
-	psp_ring_reg = ring_type;
-	psp_ring_reg = psp_ring_reg << 16;
-	WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, psp_ring_reg);
-
-	/* there might be handshake issue with hardware which needs delay */
-	mdelay(20);
-
-	/* Wait for response flag (bit 31) in C2PMSG_64 */
-	ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
-			   0x80000000, 0x8000FFFF, false);
+	if (psp_v3_1_support_vmr_ring(psp)) {
+		ret = psp_v3_1_ring_stop(psp, ring_type);
+		if (ret) {
+			DRM_ERROR("psp_v3_1_ring_stop_sriov failed!\n");
+			return ret;
+		}
+
+		/* Write low address of the ring to C2PMSG_102 */
+		psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr);
+		WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102, psp_ring_reg);
+		/* Write high address of the ring to C2PMSG_103 */
+		psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr);
+		WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_103, psp_ring_reg);
+		/* No size initialization for sriov  */
+		/* Write the ring initialization command to C2PMSG_101 */
+		psp_ring_reg = ring_type;
+		psp_ring_reg = psp_ring_reg << 16;
+		WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101, psp_ring_reg);
+
+		/* there might be hardware handshake issue which needs delay */
+		mdelay(20);
+
+		/* Wait for response flag (bit 31) in C2PMSG_101 */
+		ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0,
+					mmMP0_SMN_C2PMSG_101), 0x80000000,
+					0x8000FFFF, false);
+	} else {
+
+		/* Write low address of the ring to C2PMSG_69 */
+		psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr);
+		WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_69, psp_ring_reg);
+		/* Write high address of the ring to C2PMSG_70 */
+		psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr);
+		WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_70, psp_ring_reg);
+		/* Write size of ring to C2PMSG_71 */
+		psp_ring_reg = ring->ring_size;
+		WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_71, psp_ring_reg);
+		/* Write the ring initialization command to C2PMSG_64 */
+		psp_ring_reg = ring_type;
+		psp_ring_reg = psp_ring_reg << 16;
+		WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, psp_ring_reg);
+
+		/* there might be hardware handshake issue which needs delay */
+		mdelay(20);
+
+		/* Wait for response flag (bit 31) in C2PMSG_64 */
+		ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0,
+					mmMP0_SMN_C2PMSG_64), 0x80000000,
+					0x8000FFFF, false);
 
+	}
 	return ret;
 }
 
@@ -327,16 +363,31 @@ static int psp_v3_1_ring_stop(struct psp_context *psp,
 	unsigned int psp_ring_reg = 0;
 	struct amdgpu_device *adev = psp->adev;
 
-	/* Write the ring destroy command to C2PMSG_64 */
-	psp_ring_reg = 3 << 16;
-	WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, psp_ring_reg);
-
-	/* there might be handshake issue with hardware which needs delay */
-	mdelay(20);
-
-	/* Wait for response flag (bit 31) in C2PMSG_64 */
-	ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
-			   0x80000000, 0x80000000, false);
+	if (psp_v3_1_support_vmr_ring(psp)) {
+		/* Write the Destroy GPCOM ring command to C2PMSG_101 */
+		psp_ring_reg = GFX_CTRL_CMD_ID_DESTROY_GPCOM_RING;
+		WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101, psp_ring_reg);
+
+		/* there might be handshake issue which needs delay */
+		mdelay(20);
+
+		/* Wait for response flag (bit 31) in C2PMSG_101 */
+		ret = psp_wait_for(psp,
+				SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101),
+				0x80000000, 0x80000000, false);
+	} else {
+		/* Write the ring destroy command to C2PMSG_64 */
+		psp_ring_reg = 3 << 16;
+		WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, psp_ring_reg);
+
+		/* there might be handshake issue which needs delay */
+		mdelay(20);
+
+		/* Wait for response flag (bit 31) in C2PMSG_64 */
+		ret = psp_wait_for(psp,
+				SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
+				0x80000000, 0x80000000, false);
+	}
 
 	return ret;
 }
@@ -375,7 +426,10 @@ static int psp_v3_1_cmd_submit(struct psp_context *psp,
 	uint32_t rb_frame_size_dw = sizeof(struct psp_gfx_rb_frame) / 4;
 
 	/* KM (GPCOM) prepare write pointer */
-	psp_write_ptr_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67);
+	if (psp_v3_1_support_vmr_ring(psp))
+		psp_write_ptr_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102);
+	else
+		psp_write_ptr_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67);
 
 	/* Update KM RB frame pointer to new frame */
 	/* write_frame ptr increments by size of rb_frame in bytes */
@@ -404,7 +458,13 @@ static int psp_v3_1_cmd_submit(struct psp_context *psp,
 
 	/* Update the write Pointer in DWORDs */
 	psp_write_ptr_reg = (psp_write_ptr_reg + rb_frame_size_dw) % ring_size_dw;
-	WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67, psp_write_ptr_reg);
+	if (psp_v3_1_support_vmr_ring(psp)) {
+		WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102, psp_write_ptr_reg);
+		/* send interrupt to PSP for SRIOV ring write pointer update */
+		WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101,
+					GFX_CTRL_CMD_ID_CONSUME_CMD);
+	} else
+		WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67, psp_write_ptr_reg);
 
 	return 0;
 }
@@ -574,6 +634,14 @@ static int psp_v3_1_mode1_reset(struct psp_context *psp)
 	return 0;
 }
 
+static bool psp_v3_1_support_vmr_ring(struct psp_context *psp)
+{
+	if (amdgpu_sriov_vf(psp->adev) && psp->sos_fw_version >= 0x80455)
+		return true;
+
+	return false;
+}
+
 static const struct psp_funcs psp_v3_1_funcs = {
 	.init_microcode = psp_v3_1_init_microcode,
 	.bootloader_load_sysdrv = psp_v3_1_bootloader_load_sysdrv,
@@ -586,6 +654,7 @@ static const struct psp_funcs psp_v3_1_funcs = {
 	.compare_sram_data = psp_v3_1_compare_sram_data,
 	.smu_reload_quirk = psp_v3_1_smu_reload_quirk,
 	.mode1_reset = psp_v3_1_mode1_reset,
+	.support_vmr_ring = psp_v3_1_support_vmr_ring,
 };
 
 void psp_v3_1_set_psp_funcs(struct psp_context *psp)
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
index 36196372e8db..a10175838013 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
@@ -21,8 +21,11 @@
  *
  * Authors: Alex Deucher
  */
+
+#include <linux/delay.h>
 #include <linux/firmware.h>
-#include <drm/drmP.h>
+#include <linux/module.h>
+
 #include "amdgpu.h"
 #include "amdgpu_ucode.h"
 #include "amdgpu_trace.h"
@@ -574,7 +577,7 @@ static int sdma_v2_4_ring_test_ring(struct amdgpu_ring *ring)
 		tmp = le32_to_cpu(adev->wb.wb[index]);
 		if (tmp == 0xDEADBEEF)
 			break;
-		DRM_UDELAY(1);
+		udelay(1);
 	}
 
 	if (i >= adev->usec_timeout)
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
index 6d39544e7829..5f4e2c616241 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
@@ -21,8 +21,11 @@
  *
  * Authors: Alex Deucher
  */
+
+#include <linux/delay.h>
 #include <linux/firmware.h>
-#include <drm/drmP.h>
+#include <linux/module.h>
+
 #include "amdgpu.h"
 #include "amdgpu_ucode.h"
 #include "amdgpu_trace.h"
@@ -846,7 +849,7 @@ static int sdma_v3_0_ring_test_ring(struct amdgpu_ring *ring)
 		tmp = le32_to_cpu(adev->wb.wb[index]);
 		if (tmp == 0xDEADBEEF)
 			break;
-		DRM_UDELAY(1);
+		udelay(1);
 	}
 
 	if (i >= adev->usec_timeout)
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index 9c88ce513d78..bc3087599523 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -21,8 +21,11 @@
  *
  */
 
+#include <linux/delay.h>
 #include <linux/firmware.h>
-#include <drm/drmP.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+
 #include "amdgpu.h"
 #include "amdgpu_ucode.h"
 #include "amdgpu_trace.h"
@@ -210,12 +213,14 @@ static void sdma_v4_0_init_golden_registers(struct amdgpu_device *adev)
 {
 	switch (adev->asic_type) {
 	case CHIP_VEGA10:
-		soc15_program_register_sequence(adev,
-						 golden_settings_sdma_4,
-						 ARRAY_SIZE(golden_settings_sdma_4));
-		soc15_program_register_sequence(adev,
-						 golden_settings_sdma_vg10,
-						 ARRAY_SIZE(golden_settings_sdma_vg10));
+		if (!amdgpu_virt_support_skip_setting(adev)) {
+			soc15_program_register_sequence(adev,
+							 golden_settings_sdma_4,
+							 ARRAY_SIZE(golden_settings_sdma_4));
+			soc15_program_register_sequence(adev,
+							 golden_settings_sdma_vg10,
+							 ARRAY_SIZE(golden_settings_sdma_vg10));
+		}
 		break;
 	case CHIP_VEGA12:
 		soc15_program_register_sequence(adev,
@@ -1207,7 +1212,7 @@ static int sdma_v4_0_ring_test_ring(struct amdgpu_ring *ring)
 		tmp = le32_to_cpu(adev->wb.wb[index]);
 		if (tmp == 0xDEADBEEF)
 			break;
-		DRM_UDELAY(1);
+		udelay(1);
 	}
 
 	if (i >= adev->usec_timeout)
@@ -1521,8 +1526,25 @@ static int sdma_v4_0_late_init(void *handle)
 	}
 
 	/* handle resume path. */
-	if (*ras_if)
+	if (*ras_if) {
+		/* resend ras TA enable cmd during resume.
+		 * prepare to handle failure.
+		 */
+		ih_info.head = **ras_if;
+		r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
+		if (r) {
+			if (r == -EAGAIN) {
+				/* request a gpu reset. will run again. */
+				amdgpu_ras_request_reset_on_boot(adev,
+						AMDGPU_RAS_BLOCK__SDMA);
+				return 0;
+			}
+			/* fail to enable ras, cleanup all. */
+			goto irq;
+		}
+		/* enable successfully. continue. */
 		goto resume;
+	}
 
 	*ras_if = kmalloc(sizeof(**ras_if), GFP_KERNEL);
 	if (!*ras_if)
@@ -1531,8 +1553,14 @@ static int sdma_v4_0_late_init(void *handle)
 	**ras_if = ras_block;
 
 	r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
-	if (r)
+	if (r) {
+		if (r == -EAGAIN) {
+			amdgpu_ras_request_reset_on_boot(adev,
+					AMDGPU_RAS_BLOCK__SDMA);
+			r = 0;
+		}
 		goto feature;
+	}
 
 	ih_info.head = **ras_if;
 	fs_info.head = **ras_if;
@@ -1571,7 +1599,7 @@ interrupt:
 feature:
 	kfree(*ras_if);
 	*ras_if = NULL;
-	return -EINVAL;
+	return r;
 }
 
 static int sdma_v4_0_sw_init(void *handle)
diff --git a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c
index 9d8df68893b9..5e1a2528df7f 100644
--- a/drivers/gpu/drm/amd/amdgpu/si.c
+++ b/drivers/gpu/drm/amd/amdgpu/si.c
@@ -24,7 +24,8 @@
 #include <linux/firmware.h>
 #include <linux/slab.h>
 #include <linux/module.h>
-#include <drm/drmP.h>
+#include <linux/pci.h>
+
 #include "amdgpu.h"
 #include "amdgpu_atombios.h"
 #include "amdgpu_ih.h"
@@ -1375,6 +1376,18 @@ static void si_get_pcie_usage(struct amdgpu_device *adev, uint64_t *count0,
 	*count1 = RREG32_PCIE(ixPCIE_PERF_COUNT1_TXCLK) | (cnt1_of << 32);
 }
 
+static uint64_t si_get_pcie_replay_count(struct amdgpu_device *adev)
+{
+	uint64_t nak_r, nak_g;
+
+	/* Get the number of NAKs received and generated */
+	nak_r = RREG32_PCIE(ixPCIE_RX_NUM_NAK);
+	nak_g = RREG32_PCIE(ixPCIE_RX_NUM_NAK_GENERATED);
+
+	/* Add the total number of NAKs, i.e the number of replays */
+	return (nak_r + nak_g);
+}
+
 static const struct amdgpu_asic_funcs si_asic_funcs =
 {
 	.read_disabled_bios = &si_read_disabled_bios,
@@ -1393,6 +1406,7 @@ static const struct amdgpu_asic_funcs si_asic_funcs =
 	.need_full_reset = &si_need_full_reset,
 	.get_pcie_usage = &si_get_pcie_usage,
 	.need_reset_on_init = &si_need_reset_on_init,
+	.get_pcie_replay_count = &si_get_pcie_replay_count,
 };
 
 static uint32_t si_get_rev_id(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/si_dma.c b/drivers/gpu/drm/amd/amdgpu/si_dma.c
index 3eeefd40dae0..bdda8b4e03f0 100644
--- a/drivers/gpu/drm/amd/amdgpu/si_dma.c
+++ b/drivers/gpu/drm/amd/amdgpu/si_dma.c
@@ -21,7 +21,7 @@
  *
  * Authors: Alex Deucher
  */
-#include <drm/drmP.h>
+
 #include "amdgpu.h"
 #include "amdgpu_trace.h"
 #include "si.h"
@@ -230,7 +230,7 @@ static int si_dma_ring_test_ring(struct amdgpu_ring *ring)
 		tmp = le32_to_cpu(adev->wb.wb[index]);
 		if (tmp == 0xDEADBEEF)
 			break;
-		DRM_UDELAY(1);
+		udelay(1);
 	}
 
 	if (i >= adev->usec_timeout)
diff --git a/drivers/gpu/drm/amd/amdgpu/si_dpm.c b/drivers/gpu/drm/amd/amdgpu/si_dpm.c
index d57e75e5c71f..4cb4c891120b 100644
--- a/drivers/gpu/drm/amd/amdgpu/si_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/si_dpm.c
@@ -21,7 +21,9 @@
  *
  */
 
-#include <drm/drmP.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+
 #include "amdgpu.h"
 #include "amdgpu_pm.h"
 #include "amdgpu_dpm.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/si_ih.c b/drivers/gpu/drm/amd/amdgpu/si_ih.c
index 8c50c9cab455..57bb5f9e08b2 100644
--- a/drivers/gpu/drm/amd/amdgpu/si_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/si_ih.c
@@ -20,7 +20,9 @@
  * OTHER DEALINGS IN THE SOFTWARE.
  *
  */
-#include <drm/drmP.h>
+
+#include <linux/pci.h>
+
 #include "amdgpu.h"
 #include "amdgpu_ih.h"
 #include "sid.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/si_smc.c b/drivers/gpu/drm/amd/amdgpu/si_smc.c
index 4a2fd8b61940..8f994ffa9cd1 100644
--- a/drivers/gpu/drm/amd/amdgpu/si_smc.c
+++ b/drivers/gpu/drm/amd/amdgpu/si_smc.c
@@ -23,7 +23,7 @@
  */
 
 #include <linux/firmware.h>
-#include <drm/drmP.h>
+
 #include "amdgpu.h"
 #include "sid.h"
 #include "ppsmc.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c
index b7e594c2bfb4..b769995c3029 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
@@ -23,7 +23,8 @@
 #include <linux/firmware.h>
 #include <linux/slab.h>
 #include <linux/module.h>
-#include <drm/drmP.h>
+#include <linux/pci.h>
+
 #include "amdgpu.h"
 #include "amdgpu_atombios.h"
 #include "amdgpu_ih.h"
@@ -44,6 +45,7 @@
 #include "smuio/smuio_9_0_offset.h"
 #include "smuio/smuio_9_0_sh_mask.h"
 #include "nbio/nbio_7_0_default.h"
+#include "nbio/nbio_7_0_offset.h"
 #include "nbio/nbio_7_0_sh_mask.h"
 #include "nbio/nbio_7_0_smn.h"
 #include "mp/mp_9_0_offset.h"
@@ -64,6 +66,9 @@
 #include "dce_virtual.h"
 #include "mxgpu_ai.h"
 #include "amdgpu_smu.h"
+#include "amdgpu_ras.h"
+#include "amdgpu_xgmi.h"
+#include <uapi/linux/kfd_ioctl.h>
 
 #define mmMP0_MISC_CGTT_CTRL0                                                                   0x01b9
 #define mmMP0_MISC_CGTT_CTRL0_BASE_IDX                                                          0
@@ -230,7 +235,7 @@ void soc15_grbm_select(struct amdgpu_device *adev,
 	grbm_gfx_cntl = REG_SET_FIELD(grbm_gfx_cntl, GRBM_GFX_CNTL, VMID, vmid);
 	grbm_gfx_cntl = REG_SET_FIELD(grbm_gfx_cntl, GRBM_GFX_CNTL, QUEUEID, queue);
 
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_CNTL), grbm_gfx_cntl);
+	WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_CNTL, grbm_gfx_cntl);
 }
 
 static void soc15_vga_set_state(struct amdgpu_device *adev, bool state)
@@ -385,7 +390,15 @@ void soc15_program_register_sequence(struct amdgpu_device *adev,
 			tmp &= ~(entry->and_mask);
 			tmp |= entry->or_mask;
 		}
-		WREG32(reg, tmp);
+
+		if (reg == SOC15_REG_OFFSET(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3) ||
+			reg == SOC15_REG_OFFSET(GC, 0, mmPA_SC_ENHANCE) ||
+			reg == SOC15_REG_OFFSET(GC, 0, mmPA_SC_ENHANCE_1) ||
+			reg == SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG))
+			WREG32_RLC(reg, tmp);
+		else
+			WREG32(reg, tmp);
+
 	}
 
 }
@@ -475,6 +488,13 @@ static int soc15_asic_reset(struct amdgpu_device *adev)
 			soc15_asic_get_baco_capability(adev, &baco_reset);
 		else
 			baco_reset = false;
+		if (baco_reset) {
+			struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev, 0);
+			struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+
+			if (hive || (ras && ras->supported))
+				baco_reset = false;
+		}
 		break;
 	default:
 		baco_reset = false;
@@ -606,12 +626,24 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev)
 	case CHIP_VEGA20:
 		amdgpu_device_ip_block_add(adev, &vega10_common_ip_block);
 		amdgpu_device_ip_block_add(adev, &gmc_v9_0_ip_block);
-		amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block);
-		if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) {
-			if (adev->asic_type == CHIP_VEGA20)
-				amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block);
-			else
-				amdgpu_device_ip_block_add(adev, &psp_v3_1_ip_block);
+
+		/* For Vega10 SR-IOV, PSP need to be initialized before IH */
+		if (amdgpu_sriov_vf(adev)) {
+			if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) {
+				if (adev->asic_type == CHIP_VEGA20)
+					amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block);
+				else
+					amdgpu_device_ip_block_add(adev, &psp_v3_1_ip_block);
+			}
+			amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block);
+		} else {
+			amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block);
+			if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) {
+				if (adev->asic_type == CHIP_VEGA20)
+					amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block);
+				else
+					amdgpu_device_ip_block_add(adev, &psp_v3_1_ip_block);
+			}
 		}
 		amdgpu_device_ip_block_add(adev, &gfx_v9_0_ip_block);
 		amdgpu_device_ip_block_add(adev, &sdma_v4_0_ip_block);
@@ -733,7 +765,8 @@ static bool soc15_need_reset_on_init(struct amdgpu_device *adev)
 	/* Just return false for soc15 GPUs.  Reset does not seem to
 	 * be necessary.
 	 */
-	return false;
+	if (!amdgpu_passthrough(adev))
+		return false;
 
 	if (adev->flags & AMD_IS_APU)
 		return false;
@@ -748,6 +781,18 @@ static bool soc15_need_reset_on_init(struct amdgpu_device *adev)
 	return false;
 }
 
+static uint64_t soc15_get_pcie_replay_count(struct amdgpu_device *adev)
+{
+	uint64_t nak_r, nak_g;
+
+	/* Get the number of NAKs received and generated */
+	nak_r = RREG32_PCIE(smnPCIE_RX_NUM_NAK);
+	nak_g = RREG32_PCIE(smnPCIE_RX_NUM_NAK_GENERATED);
+
+	/* Add the total number of NAKs, i.e the number of replays */
+	return (nak_r + nak_g);
+}
+
 static const struct amdgpu_asic_funcs soc15_asic_funcs =
 {
 	.read_disabled_bios = &soc15_read_disabled_bios,
@@ -765,6 +810,7 @@ static const struct amdgpu_asic_funcs soc15_asic_funcs =
 	.init_doorbell_index = &vega10_doorbell_index_init,
 	.get_pcie_usage = &soc15_get_pcie_usage,
 	.need_reset_on_init = &soc15_need_reset_on_init,
+	.get_pcie_replay_count = &soc15_get_pcie_replay_count,
 };
 
 static const struct amdgpu_asic_funcs vega20_asic_funcs =
@@ -784,12 +830,16 @@ static const struct amdgpu_asic_funcs vega20_asic_funcs =
 	.init_doorbell_index = &vega20_doorbell_index_init,
 	.get_pcie_usage = &soc15_get_pcie_usage,
 	.need_reset_on_init = &soc15_need_reset_on_init,
+	.get_pcie_replay_count = &soc15_get_pcie_replay_count,
 };
 
 static int soc15_common_early_init(void *handle)
 {
+#define MMIO_REG_HOLE_OFFSET (0x80000 - PAGE_SIZE)
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
+	adev->rmmio_remap.reg_offset = MMIO_REG_HOLE_OFFSET;
+	adev->rmmio_remap.bus_addr = adev->rmmio_base + MMIO_REG_HOLE_OFFSET;
 	adev->smc_rreg = NULL;
 	adev->smc_wreg = NULL;
 	adev->pcie_rreg = &soc15_pcie_rreg;
@@ -998,11 +1048,17 @@ static void soc15_doorbell_range_init(struct amdgpu_device *adev)
 	int i;
 	struct amdgpu_ring *ring;
 
-	for (i = 0; i < adev->sdma.num_instances; i++) {
-		ring = &adev->sdma.instance[i].ring;
-		adev->nbio_funcs->sdma_doorbell_range(adev, i,
-			ring->use_doorbell, ring->doorbell_index,
-			adev->doorbell_index.sdma_doorbell_range);
+	/*  Two reasons to skip
+	*		1, Host driver already programmed them
+	*		2, To avoid registers program violations in SR-IOV
+	*/
+	if (!amdgpu_virt_support_skip_setting(adev)) {
+		for (i = 0; i < adev->sdma.num_instances; i++) {
+			ring = &adev->sdma.instance[i].ring;
+			adev->nbio_funcs->sdma_doorbell_range(adev, i,
+				ring->use_doorbell, ring->doorbell_index,
+				adev->doorbell_index.sdma_doorbell_range);
+		}
 	}
 
 	adev->nbio_funcs->ih_doorbell_range(adev, adev->irq.ih.use_doorbell,
@@ -1019,6 +1075,12 @@ static int soc15_common_hw_init(void *handle)
 	soc15_program_aspm(adev);
 	/* setup nbio registers */
 	adev->nbio_funcs->init_registers(adev);
+	/* remap HDP registers to a hole in mmio space,
+	 * for the purpose of expose those registers
+	 * to process space
+	 */
+	if (adev->nbio_funcs->remap_hdp_registers)
+		adev->nbio_funcs->remap_hdp_registers(adev);
 	/* enable the doorbell aperture */
 	soc15_enable_doorbell_aperture(adev, true);
 	/* HW doorbell routing policy: doorbell writing not
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.h b/drivers/gpu/drm/amd/amdgpu/soc15.h
index a66c8bfbbaa6..06f39f5bbf76 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.h
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.h
@@ -42,8 +42,18 @@ struct soc15_reg_golden {
 	u32	or_mask;
 };
 
+struct soc15_reg_entry {
+	uint32_t hwip;
+	uint32_t inst;
+	uint32_t seg;
+	uint32_t reg_offset;
+	uint32_t reg_value;
+};
+
 #define SOC15_REG_ENTRY(ip, inst, reg)	ip##_HWIP, inst, reg##_BASE_IDX, reg
 
+#define SOC15_REG_ENTRY_OFFSET(entry)	(adev->reg_offset[entry.hwip][entry.inst][entry.seg] + entry.reg_offset)
+
 #define SOC15_REG_GOLDEN_VALUE(ip, inst, reg, and_mask, or_mask) \
 	{ ip##_HWIP, inst, reg##_BASE_IDX, reg, and_mask, or_mask }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15_common.h b/drivers/gpu/drm/amd/amdgpu/soc15_common.h
index 49c262540940..47f74dab365d 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15_common.h
+++ b/drivers/gpu/drm/amd/amdgpu/soc15_common.h
@@ -69,26 +69,60 @@
 		}						\
 	} while (0)
 
-#define RREG32_SOC15_DPG_MODE(ip, inst, reg, mask, sram_sel) 	\
-		({ WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_MASK, mask); \
-			WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_CTL,	\
-				UVD_DPG_LMA_CTL__MASK_EN_MASK |				\
-				((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) \
-				<< UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT) | \
-				(sram_sel << UVD_DPG_LMA_CTL__SRAM_SEL__SHIFT));	\
-			RREG32_SOC15(ip, inst, mmUVD_DPG_LMA_DATA); })
+#define WREG32_RLC(reg, value) \
+	do {							\
+		if (amdgpu_virt_support_rlc_prg_reg(adev)) {    \
+			uint32_t i = 0;	\
+			uint32_t retries = 50000;	\
+			uint32_t r0 = adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG0_BASE_IDX] + mmSCRATCH_REG0;	\
+			uint32_t r1 = adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG1;	\
+			uint32_t spare_int = adev->reg_offset[GC_HWIP][0][mmRLC_SPARE_INT_BASE_IDX] + mmRLC_SPARE_INT;	\
+			WREG32(r0, value);	\
+			WREG32(r1, (reg | 0x80000000));	\
+			WREG32(spare_int, 0x1);	\
+			for (i = 0; i < retries; i++) {	\
+				u32 tmp = RREG32(r1);	\
+				if (!(tmp & 0x80000000))	\
+					break;	\
+				udelay(10);	\
+			}	\
+			if (i >= retries)	\
+				pr_err("timeout: rlcg program reg:0x%05x failed !\n", reg);	\
+		} else {	\
+			WREG32(reg, value); \
+		}	\
+	} while (0)
 
-#define WREG32_SOC15_DPG_MODE(ip, inst, reg, value, mask, sram_sel)	\
+#define WREG32_SOC15_RLC_SHADOW(ip, inst, reg, value) \
 	do {							\
-		WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_DATA, value);	\
-		WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_MASK, mask);		\
-		WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_CTL,	\
-			UVD_DPG_LMA_CTL__READ_WRITE_MASK |	\
-			((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) \
-			<< UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT) |	\
-			(sram_sel << UVD_DPG_LMA_CTL__SRAM_SEL__SHIFT)); \
+		uint32_t target_reg = adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg;\
+		if (amdgpu_virt_support_rlc_prg_reg(adev)) {    \
+			uint32_t r2 = adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG2;	\
+			uint32_t r3 = adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG3;	\
+			uint32_t grbm_cntl = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_CNTL_BASE_IDX] + mmGRBM_GFX_CNTL;   \
+			uint32_t grbm_idx = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_INDEX_BASE_IDX] + mmGRBM_GFX_INDEX;   \
+			if (target_reg == grbm_cntl) \
+				WREG32(r2, value);	\
+			else if (target_reg == grbm_idx) \
+				WREG32(r3, value);	\
+			WREG32(target_reg, value);	\
+		} else {	\
+			WREG32(target_reg, value); \
+		}	\
 	} while (0)
 
-#endif
+#define WREG32_SOC15_RLC(ip, inst, reg, value) \
+	do {							\
+			uint32_t target_reg = adev->reg_offset[GC_HWIP][0][reg##_BASE_IDX] + reg;\
+			WREG32_RLC(target_reg, value); \
+	} while (0)
+
+#define WREG32_FIELD15_RLC(ip, idx, reg, field, val)   \
+    WREG32_RLC((adev->reg_offset[ip##_HWIP][idx][mm##reg##_BASE_IDX] + mm##reg), \
+    (RREG32(adev->reg_offset[ip##_HWIP][idx][mm##reg##_BASE_IDX] + mm##reg) \
+    & ~REG_FIELD_MASK(reg, field)) | (val) << REG_FIELD_SHIFT(reg, field))
 
+#define WREG32_SOC15_OFFSET_RLC(ip, inst, reg, offset, value) \
+    WREG32_RLC(((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) + offset), value)
 
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c
index a20b711a6756..e40140bf6699 100644
--- a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c
@@ -20,7 +20,9 @@
  * OTHER DEALINGS IN THE SOFTWARE.
  *
  */
-#include <drm/drmP.h>
+
+#include <linux/pci.h>
+
 #include "amdgpu.h"
 #include "amdgpu_ih.h"
 #include "vid.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
index c4fb58667fd4..82abd8e728ab 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
@@ -23,7 +23,7 @@
  */
 
 #include <linux/firmware.h>
-#include <drm/drmP.h>
+
 #include "amdgpu.h"
 #include "amdgpu_uvd.h"
 #include "cikd.h"
@@ -491,7 +491,7 @@ static int uvd_v4_2_ring_test_ring(struct amdgpu_ring *ring)
 		tmp = RREG32(mmUVD_CONTEXT_ID);
 		if (tmp == 0xDEADBEEF)
 			break;
-		DRM_UDELAY(1);
+		udelay(1);
 	}
 
 	if (i >= adev->usec_timeout)
@@ -741,6 +741,7 @@ static const struct amdgpu_ring_funcs uvd_v4_2_ring_funcs = {
 	.type = AMDGPU_RING_TYPE_UVD,
 	.align_mask = 0xf,
 	.support_64bit_ptrs = false,
+	.no_user_fence = true,
 	.get_rptr = uvd_v4_2_ring_get_rptr,
 	.get_wptr = uvd_v4_2_ring_get_wptr,
 	.set_wptr = uvd_v4_2_ring_set_wptr,
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
index 52bd8a654734..01e62fb8e6e0 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
@@ -22,8 +22,9 @@
  * Authors: Christian König <christian.koenig@amd.com>
  */
 
+#include <linux/delay.h>
 #include <linux/firmware.h>
-#include <drm/drmP.h>
+
 #include "amdgpu.h"
 #include "amdgpu_uvd.h"
 #include "vid.h"
@@ -506,7 +507,7 @@ static int uvd_v5_0_ring_test_ring(struct amdgpu_ring *ring)
 		tmp = RREG32(mmUVD_CONTEXT_ID);
 		if (tmp == 0xDEADBEEF)
 			break;
-		DRM_UDELAY(1);
+		udelay(1);
 	}
 
 	if (i >= adev->usec_timeout)
@@ -849,6 +850,7 @@ static const struct amdgpu_ring_funcs uvd_v5_0_ring_funcs = {
 	.type = AMDGPU_RING_TYPE_UVD,
 	.align_mask = 0xf,
 	.support_64bit_ptrs = false,
+	.no_user_fence = true,
 	.get_rptr = uvd_v5_0_ring_get_rptr,
 	.get_wptr = uvd_v5_0_ring_get_wptr,
 	.set_wptr = uvd_v5_0_ring_set_wptr,
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
index c9edddf9f88a..670784a78512 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
@@ -23,7 +23,7 @@
  */
 
 #include <linux/firmware.h>
-#include <drm/drmP.h>
+
 #include "amdgpu.h"
 #include "amdgpu_uvd.h"
 #include "vid.h"
@@ -170,20 +170,23 @@ static void uvd_v6_0_enc_ring_set_wptr(struct amdgpu_ring *ring)
 static int uvd_v6_0_enc_ring_test_ring(struct amdgpu_ring *ring)
 {
 	struct amdgpu_device *adev = ring->adev;
-	uint32_t rptr = amdgpu_ring_get_rptr(ring);
+	uint32_t rptr;
 	unsigned i;
 	int r;
 
 	r = amdgpu_ring_alloc(ring, 16);
 	if (r)
 		return r;
+
+	rptr = amdgpu_ring_get_rptr(ring);
+
 	amdgpu_ring_write(ring, HEVC_ENC_CMD_END);
 	amdgpu_ring_commit(ring);
 
 	for (i = 0; i < adev->usec_timeout; i++) {
 		if (amdgpu_ring_get_rptr(ring) != rptr)
 			break;
-		DRM_UDELAY(1);
+		udelay(1);
 	}
 
 	if (i >= adev->usec_timeout)
@@ -957,7 +960,7 @@ static int uvd_v6_0_ring_test_ring(struct amdgpu_ring *ring)
 		tmp = RREG32(mmUVD_CONTEXT_ID);
 		if (tmp == 0xDEADBEEF)
 			break;
-		DRM_UDELAY(1);
+		udelay(1);
 	}
 
 	if (i >= adev->usec_timeout)
@@ -1502,6 +1505,7 @@ static const struct amdgpu_ring_funcs uvd_v6_0_ring_phys_funcs = {
 	.type = AMDGPU_RING_TYPE_UVD,
 	.align_mask = 0xf,
 	.support_64bit_ptrs = false,
+	.no_user_fence = true,
 	.get_rptr = uvd_v6_0_ring_get_rptr,
 	.get_wptr = uvd_v6_0_ring_get_wptr,
 	.set_wptr = uvd_v6_0_ring_set_wptr,
@@ -1527,6 +1531,7 @@ static const struct amdgpu_ring_funcs uvd_v6_0_ring_vm_funcs = {
 	.type = AMDGPU_RING_TYPE_UVD,
 	.align_mask = 0xf,
 	.support_64bit_ptrs = false,
+	.no_user_fence = true,
 	.get_rptr = uvd_v6_0_ring_get_rptr,
 	.get_wptr = uvd_v6_0_ring_get_wptr,
 	.set_wptr = uvd_v6_0_ring_set_wptr,
@@ -1555,6 +1560,7 @@ static const struct amdgpu_ring_funcs uvd_v6_0_enc_ring_vm_funcs = {
 	.align_mask = 0x3f,
 	.nop = HEVC_ENC_CMD_NO_OP,
 	.support_64bit_ptrs = false,
+	.no_user_fence = true,
 	.get_rptr = uvd_v6_0_enc_ring_get_rptr,
 	.get_wptr = uvd_v6_0_enc_ring_get_wptr,
 	.set_wptr = uvd_v6_0_enc_ring_set_wptr,
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
index 2191d3d0a219..a6bfe7651d07 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
@@ -22,7 +22,7 @@
  */
 
 #include <linux/firmware.h>
-#include <drm/drmP.h>
+
 #include "amdgpu.h"
 #include "amdgpu_uvd.h"
 #include "soc15.h"
@@ -175,7 +175,7 @@ static void uvd_v7_0_enc_ring_set_wptr(struct amdgpu_ring *ring)
 static int uvd_v7_0_enc_ring_test_ring(struct amdgpu_ring *ring)
 {
 	struct amdgpu_device *adev = ring->adev;
-	uint32_t rptr = amdgpu_ring_get_rptr(ring);
+	uint32_t rptr;
 	unsigned i;
 	int r;
 
@@ -185,13 +185,16 @@ static int uvd_v7_0_enc_ring_test_ring(struct amdgpu_ring *ring)
 	r = amdgpu_ring_alloc(ring, 16);
 	if (r)
 		return r;
+
+	rptr = amdgpu_ring_get_rptr(ring);
+
 	amdgpu_ring_write(ring, HEVC_ENC_CMD_END);
 	amdgpu_ring_commit(ring);
 
 	for (i = 0; i < adev->usec_timeout; i++) {
 		if (amdgpu_ring_get_rptr(ring) != rptr)
 			break;
-		DRM_UDELAY(1);
+		udelay(1);
 	}
 
 	if (i >= adev->usec_timeout)
@@ -1227,7 +1230,7 @@ static int uvd_v7_0_ring_test_ring(struct amdgpu_ring *ring)
 		tmp = RREG32_SOC15(UVD, ring->me, mmUVD_CONTEXT_ID);
 		if (tmp == 0xDEADBEEF)
 			break;
-		DRM_UDELAY(1);
+		udelay(1);
 	}
 
 	if (i >= adev->usec_timeout)
@@ -1759,6 +1762,7 @@ static const struct amdgpu_ring_funcs uvd_v7_0_ring_vm_funcs = {
 	.type = AMDGPU_RING_TYPE_UVD,
 	.align_mask = 0xf,
 	.support_64bit_ptrs = false,
+	.no_user_fence = true,
 	.vmhub = AMDGPU_MMHUB,
 	.get_rptr = uvd_v7_0_ring_get_rptr,
 	.get_wptr = uvd_v7_0_ring_get_wptr,
@@ -1791,6 +1795,7 @@ static const struct amdgpu_ring_funcs uvd_v7_0_enc_ring_vm_funcs = {
 	.align_mask = 0x3f,
 	.nop = HEVC_ENC_CMD_NO_OP,
 	.support_64bit_ptrs = false,
+	.no_user_fence = true,
 	.vmhub = AMDGPU_MMHUB,
 	.get_rptr = uvd_v7_0_enc_ring_get_rptr,
 	.get_wptr = uvd_v7_0_enc_ring_get_wptr,
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
index 40363ca6c5f1..b6837fcfdba7 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
@@ -26,7 +26,7 @@
  */
 
 #include <linux/firmware.h>
-#include <drm/drmP.h>
+
 #include "amdgpu.h"
 #include "amdgpu_vce.h"
 #include "cikd.h"
@@ -605,6 +605,7 @@ static const struct amdgpu_ring_funcs vce_v2_0_ring_funcs = {
 	.align_mask = 0xf,
 	.nop = VCE_CMD_NO_OP,
 	.support_64bit_ptrs = false,
+	.no_user_fence = true,
 	.get_rptr = vce_v2_0_ring_get_rptr,
 	.get_wptr = vce_v2_0_ring_get_wptr,
 	.set_wptr = vce_v2_0_ring_set_wptr,
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
index 6ec65cf11112..475ae68f38f5 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
@@ -26,7 +26,7 @@
  */
 
 #include <linux/firmware.h>
-#include <drm/drmP.h>
+
 #include "amdgpu.h"
 #include "amdgpu_vce.h"
 #include "vid.h"
@@ -894,6 +894,7 @@ static const struct amdgpu_ring_funcs vce_v3_0_ring_phys_funcs = {
 	.align_mask = 0xf,
 	.nop = VCE_CMD_NO_OP,
 	.support_64bit_ptrs = false,
+	.no_user_fence = true,
 	.get_rptr = vce_v3_0_ring_get_rptr,
 	.get_wptr = vce_v3_0_ring_get_wptr,
 	.set_wptr = vce_v3_0_ring_set_wptr,
@@ -917,6 +918,7 @@ static const struct amdgpu_ring_funcs vce_v3_0_ring_vm_funcs = {
 	.align_mask = 0xf,
 	.nop = VCE_CMD_NO_OP,
 	.support_64bit_ptrs = false,
+	.no_user_fence = true,
 	.get_rptr = vce_v3_0_ring_get_rptr,
 	.get_wptr = vce_v3_0_ring_get_wptr,
 	.set_wptr = vce_v3_0_ring_set_wptr,
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
index c0ec27991c22..eafbe8d8248d 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
@@ -25,7 +25,7 @@
  */
 
 #include <linux/firmware.h>
-#include <drm/drmP.h>
+
 #include "amdgpu.h"
 #include "amdgpu_vce.h"
 #include "soc15.h"
@@ -1069,6 +1069,7 @@ static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = {
 	.align_mask = 0x3f,
 	.nop = VCE_CMD_NO_OP,
 	.support_64bit_ptrs = false,
+	.no_user_fence = true,
 	.vmhub = AMDGPU_MMHUB,
 	.get_rptr = vce_v4_0_ring_get_rptr,
 	.get_wptr = vce_v4_0_ring_get_wptr,
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
index 3dbc51f9d3b9..d30ff256ff57 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
@@ -22,7 +22,7 @@
  */
 
 #include <linux/firmware.h>
-#include <drm/drmP.h>
+
 #include "amdgpu.h"
 #include "amdgpu_vcn.h"
 #include "soc15.h"
@@ -49,6 +49,8 @@ static void vcn_v1_0_set_jpeg_ring_funcs(struct amdgpu_device *adev);
 static void vcn_v1_0_set_irq_funcs(struct amdgpu_device *adev);
 static void vcn_v1_0_jpeg_ring_set_patch_ring(struct amdgpu_ring *ring, uint32_t ptr);
 static int vcn_v1_0_set_powergating_state(void *handle, enum amd_powergating_state state);
+static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev,
+				struct dpg_pause_state *new_state);
 
 /**
  * vcn_v1_0_early_init - set function pointers
@@ -140,7 +142,9 @@ static int vcn_v1_0_sw_init(void *handle)
 	if (r)
 		return r;
 
-	return r;
+	adev->vcn.pause_dpg_mode = vcn_v1_0_pause_dpg_mode;
+
+	return 0;
 }
 
 /**
@@ -1204,6 +1208,132 @@ static int vcn_v1_0_stop(struct amdgpu_device *adev)
 	return r;
 }
 
+static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev,
+				struct dpg_pause_state *new_state)
+{
+	int ret_code;
+	uint32_t reg_data = 0;
+	uint32_t reg_data2 = 0;
+	struct amdgpu_ring *ring;
+
+	/* pause/unpause if state is changed */
+	if (adev->vcn.pause_state.fw_based != new_state->fw_based) {
+		DRM_DEBUG("dpg pause state changed %d:%d -> %d:%d",
+			adev->vcn.pause_state.fw_based, adev->vcn.pause_state.jpeg,
+			new_state->fw_based, new_state->jpeg);
+
+		reg_data = RREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE) &
+			(~UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK);
+
+		if (new_state->fw_based == VCN_DPG_STATE__PAUSE) {
+			ret_code = 0;
+
+			if (!(reg_data & UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK))
+				SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS,
+						   UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF,
+						   UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
+
+			if (!ret_code) {
+				/* pause DPG non-jpeg */
+				reg_data |= UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
+				WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data);
+				SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_DPG_PAUSE,
+						   UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK,
+						   UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK, ret_code);
+
+				/* Restore */
+				ring = &adev->vcn.ring_enc[0];
+				WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO, ring->gpu_addr);
+				WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
+				WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE, ring->ring_size / 4);
+				WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR, lower_32_bits(ring->wptr));
+				WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
+
+				ring = &adev->vcn.ring_enc[1];
+				WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO2, ring->gpu_addr);
+				WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
+				WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE2, ring->ring_size / 4);
+				WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr));
+				WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
+
+				ring = &adev->vcn.ring_dec;
+				WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR,
+						   RREG32_SOC15(UVD, 0, mmUVD_SCRATCH2) & 0x7FFFFFFF);
+				SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS,
+						   UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON,
+						   UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
+			}
+		} else {
+			/* unpause dpg non-jpeg, no need to wait */
+			reg_data &= ~UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
+			WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data);
+		}
+		adev->vcn.pause_state.fw_based = new_state->fw_based;
+	}
+
+	/* pause/unpause if state is changed */
+	if (adev->vcn.pause_state.jpeg != new_state->jpeg) {
+		DRM_DEBUG("dpg pause state changed %d:%d -> %d:%d",
+			adev->vcn.pause_state.fw_based, adev->vcn.pause_state.jpeg,
+			new_state->fw_based, new_state->jpeg);
+
+		reg_data = RREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE) &
+			(~UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK);
+
+		if (new_state->jpeg == VCN_DPG_STATE__PAUSE) {
+			ret_code = 0;
+
+			if (!(reg_data & UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK))
+				SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS,
+						   UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF,
+						   UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
+
+			if (!ret_code) {
+				/* Make sure JPRG Snoop is disabled before sending the pause */
+				reg_data2 = RREG32_SOC15(UVD, 0, mmUVD_POWER_STATUS);
+				reg_data2 |= UVD_POWER_STATUS__JRBC_SNOOP_DIS_MASK;
+				WREG32_SOC15(UVD, 0, mmUVD_POWER_STATUS, reg_data2);
+
+				/* pause DPG jpeg */
+				reg_data |= UVD_DPG_PAUSE__JPEG_PAUSE_DPG_REQ_MASK;
+				WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data);
+				SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_DPG_PAUSE,
+							UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK,
+							UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK, ret_code);
+
+				/* Restore */
+				ring = &adev->vcn.ring_jpeg;
+				WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_VMID, 0);
+				WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL,
+							UVD_JRBC_RB_CNTL__RB_NO_FETCH_MASK |
+							UVD_JRBC_RB_CNTL__RB_RPTR_WR_EN_MASK);
+				WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_LOW,
+							lower_32_bits(ring->gpu_addr));
+				WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_HIGH,
+							upper_32_bits(ring->gpu_addr));
+				WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_RPTR, ring->wptr);
+				WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR, ring->wptr);
+				WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL,
+							UVD_JRBC_RB_CNTL__RB_RPTR_WR_EN_MASK);
+
+				ring = &adev->vcn.ring_dec;
+				WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR,
+						   RREG32_SOC15(UVD, 0, mmUVD_SCRATCH2) & 0x7FFFFFFF);
+				SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS,
+						   UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON,
+						   UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
+			}
+		} else {
+			/* unpause dpg jpeg, no need to wait */
+			reg_data &= ~UVD_DPG_PAUSE__JPEG_PAUSE_DPG_REQ_MASK;
+			WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data);
+		}
+		adev->vcn.pause_state.jpeg = new_state->jpeg;
+	}
+
+	return 0;
+}
+
 static bool vcn_v1_0_is_idle(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
@@ -2054,6 +2184,7 @@ static const struct amdgpu_ring_funcs vcn_v1_0_dec_ring_vm_funcs = {
 	.type = AMDGPU_RING_TYPE_VCN_DEC,
 	.align_mask = 0xf,
 	.support_64bit_ptrs = false,
+	.no_user_fence = true,
 	.vmhub = AMDGPU_MMHUB,
 	.get_rptr = vcn_v1_0_dec_ring_get_rptr,
 	.get_wptr = vcn_v1_0_dec_ring_get_wptr,
@@ -2087,6 +2218,7 @@ static const struct amdgpu_ring_funcs vcn_v1_0_enc_ring_vm_funcs = {
 	.align_mask = 0x3f,
 	.nop = VCN_ENC_CMD_NO_OP,
 	.support_64bit_ptrs = false,
+	.no_user_fence = true,
 	.vmhub = AMDGPU_MMHUB,
 	.get_rptr = vcn_v1_0_enc_ring_get_rptr,
 	.get_wptr = vcn_v1_0_enc_ring_get_wptr,
@@ -2118,6 +2250,7 @@ static const struct amdgpu_ring_funcs vcn_v1_0_jpeg_ring_vm_funcs = {
 	.align_mask = 0xf,
 	.nop = PACKET0(0x81ff, 0),
 	.support_64bit_ptrs = false,
+	.no_user_fence = true,
 	.vmhub = AMDGPU_MMHUB,
 	.extra_dw = 64,
 	.get_rptr = vcn_v1_0_jpeg_ring_get_rptr,
diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
index 8d89ab7f0ae8..22260e6963b8 100644
--- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
@@ -20,7 +20,9 @@
  * OTHER DEALINGS IN THE SOFTWARE.
  *
  */
-#include <drm/drmP.h>
+
+#include <linux/pci.h>
+
 #include "amdgpu.h"
 #include "amdgpu_ih.h"
 #include "soc15.h"
@@ -48,14 +50,29 @@ static void vega10_ih_enable_interrupts(struct amdgpu_device *adev)
 
 	ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RB_ENABLE, 1);
 	ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, ENABLE_INTR, 1);
-	WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL, ih_rb_cntl);
+	if (amdgpu_virt_support_psp_prg_ih_reg(adev)) {
+		if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL, ih_rb_cntl)) {
+			DRM_ERROR("PSP program IH_RB_CNTL failed!\n");
+			return;
+		}
+	} else {
+		WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL, ih_rb_cntl);
+	}
 	adev->irq.ih.enabled = true;
 
 	if (adev->irq.ih1.ring_size) {
 		ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1);
 		ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING1,
 					   RB_ENABLE, 1);
-		WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1, ih_rb_cntl);
+		if (amdgpu_virt_support_psp_prg_ih_reg(adev)) {
+			if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING1,
+						ih_rb_cntl)) {
+				DRM_ERROR("program IH_RB_CNTL_RING1 failed!\n");
+				return;
+			}
+		} else {
+			WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1, ih_rb_cntl);
+		}
 		adev->irq.ih1.enabled = true;
 	}
 
@@ -63,7 +80,15 @@ static void vega10_ih_enable_interrupts(struct amdgpu_device *adev)
 		ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2);
 		ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING2,
 					   RB_ENABLE, 1);
-		WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2, ih_rb_cntl);
+		if (amdgpu_virt_support_psp_prg_ih_reg(adev)) {
+			if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING2,
+						ih_rb_cntl)) {
+				DRM_ERROR("program IH_RB_CNTL_RING2 failed!\n");
+				return;
+			}
+		} else {
+			WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2, ih_rb_cntl);
+		}
 		adev->irq.ih2.enabled = true;
 	}
 }
@@ -81,7 +106,15 @@ static void vega10_ih_disable_interrupts(struct amdgpu_device *adev)
 
 	ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RB_ENABLE, 0);
 	ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, ENABLE_INTR, 0);
-	WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL, ih_rb_cntl);
+	if (amdgpu_virt_support_psp_prg_ih_reg(adev)) {
+		if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL, ih_rb_cntl)) {
+			DRM_ERROR("PSP program IH_RB_CNTL failed!\n");
+			return;
+		}
+	} else {
+		WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL, ih_rb_cntl);
+	}
+
 	/* set rptr, wptr to 0 */
 	WREG32_SOC15(OSSSYS, 0, mmIH_RB_RPTR, 0);
 	WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR, 0);
@@ -92,7 +125,15 @@ static void vega10_ih_disable_interrupts(struct amdgpu_device *adev)
 		ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1);
 		ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING1,
 					   RB_ENABLE, 0);
-		WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1, ih_rb_cntl);
+		if (amdgpu_virt_support_psp_prg_ih_reg(adev)) {
+			if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING1,
+						ih_rb_cntl)) {
+				DRM_ERROR("program IH_RB_CNTL_RING1 failed!\n");
+				return;
+			}
+		} else {
+			WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1, ih_rb_cntl);
+		}
 		/* set rptr, wptr to 0 */
 		WREG32_SOC15(OSSSYS, 0, mmIH_RB_RPTR_RING1, 0);
 		WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_RING1, 0);
@@ -104,7 +145,16 @@ static void vega10_ih_disable_interrupts(struct amdgpu_device *adev)
 		ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2);
 		ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING2,
 					   RB_ENABLE, 0);
-		WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2, ih_rb_cntl);
+		if (amdgpu_virt_support_psp_prg_ih_reg(adev)) {
+			if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING2,
+						ih_rb_cntl)) {
+				DRM_ERROR("program IH_RB_CNTL_RING2 failed!\n");
+				return;
+			}
+		} else {
+			WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2, ih_rb_cntl);
+		}
+
 		/* set rptr, wptr to 0 */
 		WREG32_SOC15(OSSSYS, 0, mmIH_RB_RPTR_RING2, 0);
 		WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_RING2, 0);
@@ -187,7 +237,15 @@ static int vega10_ih_irq_init(struct amdgpu_device *adev)
 	ih_rb_cntl = vega10_ih_rb_cntl(ih, ih_rb_cntl);
 	ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RPTR_REARM,
 				   !!adev->irq.msi_enabled);
-	WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL, ih_rb_cntl);
+
+	if (amdgpu_virt_support_psp_prg_ih_reg(adev)) {
+		if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL, ih_rb_cntl)) {
+			DRM_ERROR("PSP program IH_RB_CNTL failed!\n");
+			return -ETIMEDOUT;
+		}
+	} else {
+		WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL, ih_rb_cntl);
+	}
 
 	/* set the writeback address whether it's enabled or not */
 	WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_ADDR_LO,
@@ -214,7 +272,15 @@ static int vega10_ih_irq_init(struct amdgpu_device *adev)
 					   WPTR_OVERFLOW_ENABLE, 0);
 		ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL,
 					   RB_FULL_DRAIN_ENABLE, 1);
-		WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1, ih_rb_cntl);
+		if (amdgpu_virt_support_psp_prg_ih_reg(adev)) {
+			if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING1,
+						ih_rb_cntl)) {
+				DRM_ERROR("program IH_RB_CNTL_RING1 failed!\n");
+				return -ETIMEDOUT;
+			}
+		} else {
+			WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1, ih_rb_cntl);
+		}
 
 		/* set rptr, wptr to 0 */
 		WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_RING1, 0);
@@ -232,7 +298,16 @@ static int vega10_ih_irq_init(struct amdgpu_device *adev)
 
 		ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2);
 		ih_rb_cntl = vega10_ih_rb_cntl(ih, ih_rb_cntl);
-		WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2, ih_rb_cntl);
+
+		if (amdgpu_virt_support_psp_prg_ih_reg(adev)) {
+			if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING2,
+						ih_rb_cntl)) {
+				DRM_ERROR("program IH_RB_CNTL_RING2 failed!\n");
+				return -ETIMEDOUT;
+			}
+		} else {
+			WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2, ih_rb_cntl);
+		}
 
 		/* set rptr, wptr to 0 */
 		WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_RING2, 0);
diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c
index 5e5b42a0744a..d40ed1a828dd 100644
--- a/drivers/gpu/drm/amd/amdgpu/vi.c
+++ b/drivers/gpu/drm/amd/amdgpu/vi.c
@@ -20,8 +20,10 @@
  * OTHER DEALINGS IN THE SOFTWARE.
  *
  */
+
+#include <linux/pci.h>
 #include <linux/slab.h>
-#include <drm/drmP.h>
+
 #include "amdgpu.h"
 #include "amdgpu_atombios.h"
 #include "amdgpu_ih.h"
@@ -987,6 +989,18 @@ static void vi_get_pcie_usage(struct amdgpu_device *adev, uint64_t *count0,
 	*count1 = RREG32_PCIE(ixPCIE_PERF_COUNT1_TXCLK) | (cnt1_of << 32);
 }
 
+static uint64_t vi_get_pcie_replay_count(struct amdgpu_device *adev)
+{
+	uint64_t nak_r, nak_g;
+
+	/* Get the number of NAKs received and generated */
+	nak_r = RREG32_PCIE(ixPCIE_RX_NUM_NAK);
+	nak_g = RREG32_PCIE(ixPCIE_RX_NUM_NAK_GENERATED);
+
+	/* Add the total number of NAKs, i.e the number of replays */
+	return (nak_r + nak_g);
+}
+
 static bool vi_need_reset_on_init(struct amdgpu_device *adev)
 {
 	u32 clock_cntl, pc;
@@ -1021,6 +1035,7 @@ static const struct amdgpu_asic_funcs vi_asic_funcs =
 	.init_doorbell_index = &legacy_doorbell_index_init,
 	.get_pcie_usage = &vi_get_pcie_usage,
 	.need_reset_on_init = &vi_need_reset_on_init,
+	.get_pcie_replay_count = &vi_get_pcie_replay_count,
 };
 
 #define CZ_REV_BRISTOL(rev)	 \
diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
index 3621efbd5759..e413d4a71fa3 100644
--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
@@ -21,7 +21,7 @@
  */
 
 static const uint32_t cwsr_trap_gfx8_hex[] = {
-	0xbf820001, 0xbf82012b,
+	0xbf820001, 0xbf820121,
 	0xb8f4f802, 0x89748674,
 	0xb8f5f803, 0x8675ff75,
 	0x00000400, 0xbf850017,
@@ -36,12 +36,7 @@ static const uint32_t cwsr_trap_gfx8_hex[] = {
 	0x8671ff71, 0x0000ffff,
 	0x8f728374, 0xb972e0c2,
 	0xbf800002, 0xb9740002,
-	0xbe801f70, 0xb8f5f803,
-	0x8675ff75, 0x00000100,
-	0xbf840006, 0xbefa0080,
-	0xb97a0203, 0x8671ff71,
-	0x0000ffff, 0x80f08870,
-	0x82f18071, 0xbefa0080,
+	0xbe801f70, 0xbefa0080,
 	0xb97a0283, 0xbef60068,
 	0xbef70069, 0xb8fa1c07,
 	0x8e7a9c7a, 0x87717a71,
@@ -279,15 +274,17 @@ static const uint32_t cwsr_trap_gfx8_hex[] = {
 
 
 static const uint32_t cwsr_trap_gfx9_hex[] = {
-	0xbf820001, 0xbf82015d,
+	0xbf820001, 0xbf82015e,
 	0xb8f8f802, 0x89788678,
-	0xb8f1f803, 0x866eff71,
-	0x00000400, 0xbf850037,
-	0x866eff71, 0x00000800,
-	0xbf850003, 0x866eff71,
-	0x00000100, 0xbf840008,
+	0xb8fbf803, 0x866eff7b,
+	0x00000400, 0xbf85003b,
+	0x866eff7b, 0x00000800,
+	0xbf850003, 0x866eff7b,
+	0x00000100, 0xbf84000c,
 	0x866eff78, 0x00002000,
-	0xbf840001, 0xbf810000,
+	0xbf840005, 0xbf8e0010,
+	0xb8eef803, 0x866eff6e,
+	0x00000400, 0xbf84fffb,
 	0x8778ff78, 0x00002000,
 	0x80ec886c, 0x82ed806d,
 	0xb8eef807, 0x866fff6e,
@@ -295,13 +292,13 @@ static const uint32_t cwsr_trap_gfx9_hex[] = {
 	0x8977ff77, 0xfc000000,
 	0x87776f77, 0x896eff6e,
 	0x001f8000, 0xb96ef807,
-	0xb8f0f812, 0xb8f1f813,
-	0x8ef08870, 0xc0071bb8,
+	0xb8faf812, 0xb8fbf813,
+	0x8efa887a, 0xc0071bbd,
 	0x00000000, 0xbf8cc07f,
-	0xc0071c38, 0x00000008,
+	0xc0071ebd, 0x00000008,
 	0xbf8cc07f, 0x86ee6e6e,
 	0xbf840001, 0xbe801d6e,
-	0xb8f1f803, 0x8671ff71,
+	0xb8fbf803, 0x867bff7b,
 	0x000001ff, 0xbf850002,
 	0x806c846c, 0x826d806d,
 	0x866dff6d, 0x0000ffff,
@@ -311,258 +308,256 @@ static const uint32_t cwsr_trap_gfx9_hex[] = {
 	0x8f6e8378, 0xb96ee0c2,
 	0xbf800002, 0xb9780002,
 	0xbe801f6c, 0x866dff6d,
-	0x0000ffff, 0xbef00080,
-	0xb9700283, 0xb8f02407,
-	0x8e709c70, 0x876d706d,
-	0xb8f003c7, 0x8e709b70,
-	0x876d706d, 0xb8f0f807,
-	0x8670ff70, 0x00007fff,
-	0xb970f807, 0xbeee007e,
+	0x0000ffff, 0xbefa0080,
+	0xb97a0283, 0xb8fa2407,
+	0x8e7a9b7a, 0x876d7a6d,
+	0xb8fa03c7, 0x8e7a9a7a,
+	0x876d7a6d, 0xb8faf807,
+	0x867aff7a, 0x00007fff,
+	0xb97af807, 0xbeee007e,
 	0xbeef007f, 0xbefe0180,
-	0xbf900004, 0x87708478,
-	0xb970f802, 0xbf8e0002,
-	0xbf88fffe, 0xb8f02a05,
+	0xbf900004, 0x877a8478,
+	0xb97af802, 0xbf8e0002,
+	0xbf88fffe, 0xb8fa2a05,
+	0x807a817a, 0x8e7a8a7a,
+	0xb8fb1605, 0x807b817b,
+	0x8e7b867b, 0x807a7b7a,
+	0x807a7e7a, 0x827b807f,
+	0x867bff7b, 0x0000ffff,
+	0xc04b1c3d, 0x00000050,
+	0xbf8cc07f, 0xc04b1d3d,
+	0x00000060, 0xbf8cc07f,
+	0xc0431e7d, 0x00000074,
+	0xbf8cc07f, 0xbef4007e,
+	0x8675ff7f, 0x0000ffff,
+	0x8775ff75, 0x00040000,
+	0xbef60080, 0xbef700ff,
+	0x00807fac, 0x867aff7f,
+	0x08000000, 0x8f7a837a,
+	0x87777a77, 0x867aff7f,
+	0x70000000, 0x8f7a817a,
+	0x87777a77, 0xbef1007c,
+	0xbef00080, 0xb8f02a05,
 	0x80708170, 0x8e708a70,
-	0xb8f11605, 0x80718171,
-	0x8e718671, 0x80707170,
-	0x80707e70, 0x8271807f,
-	0x8671ff71, 0x0000ffff,
-	0xc0471cb8, 0x00000040,
-	0xbf8cc07f, 0xc04b1d38,
-	0x00000048, 0xbf8cc07f,
-	0xc0431e78, 0x00000058,
-	0xbf8cc07f, 0xc0471eb8,
-	0x0000005c, 0xbf8cc07f,
-	0xbef4007e, 0x8675ff7f,
-	0x0000ffff, 0x8775ff75,
-	0x00040000, 0xbef60080,
-	0xbef700ff, 0x00807fac,
-	0x8670ff7f, 0x08000000,
-	0x8f708370, 0x87777077,
-	0x8670ff7f, 0x70000000,
-	0x8f708170, 0x87777077,
-	0xbefb007c, 0xbefa0080,
-	0xb8fa2a05, 0x807a817a,
-	0x8e7a8a7a, 0xb8f01605,
-	0x80708170, 0x8e708670,
-	0x807a707a, 0xbef60084,
-	0xbef600ff, 0x01000000,
-	0xbefe007c, 0xbefc007a,
-	0xc0611efa, 0x0000007c,
-	0xbf8cc07f, 0x807a847a,
-	0xbefc007e, 0xbefe007c,
-	0xbefc007a, 0xc0611b3a,
+	0xb8fa1605, 0x807a817a,
+	0x8e7a867a, 0x80707a70,
+	0xbef60084, 0xbef600ff,
+	0x01000000, 0xbefe007c,
+	0xbefc0070, 0xc0611c7a,
 	0x0000007c, 0xbf8cc07f,
-	0x807a847a, 0xbefc007e,
-	0xbefe007c, 0xbefc007a,
-	0xc0611b7a, 0x0000007c,
-	0xbf8cc07f, 0x807a847a,
+	0x80708470, 0xbefc007e,
+	0xbefe007c, 0xbefc0070,
+	0xc0611b3a, 0x0000007c,
+	0xbf8cc07f, 0x80708470,
 	0xbefc007e, 0xbefe007c,
-	0xbefc007a, 0xc0611bba,
+	0xbefc0070, 0xc0611b7a,
 	0x0000007c, 0xbf8cc07f,
-	0x807a847a, 0xbefc007e,
-	0xbefe007c, 0xbefc007a,
-	0xc0611bfa, 0x0000007c,
-	0xbf8cc07f, 0x807a847a,
+	0x80708470, 0xbefc007e,
+	0xbefe007c, 0xbefc0070,
+	0xc0611bba, 0x0000007c,
+	0xbf8cc07f, 0x80708470,
 	0xbefc007e, 0xbefe007c,
-	0xbefc007a, 0xc0611e3a,
+	0xbefc0070, 0xc0611bfa,
 	0x0000007c, 0xbf8cc07f,
-	0x807a847a, 0xbefc007e,
-	0xb8f1f803, 0xbefe007c,
-	0xbefc007a, 0xc0611c7a,
-	0x0000007c, 0xbf8cc07f,
-	0x807a847a, 0xbefc007e,
-	0xbefe007c, 0xbefc007a,
-	0xc0611a3a, 0x0000007c,
-	0xbf8cc07f, 0x807a847a,
+	0x80708470, 0xbefc007e,
+	0xbefe007c, 0xbefc0070,
+	0xc0611e3a, 0x0000007c,
+	0xbf8cc07f, 0x80708470,
+	0xbefc007e, 0xb8fbf803,
+	0xbefe007c, 0xbefc0070,
+	0xc0611efa, 0x0000007c,
+	0xbf8cc07f, 0x80708470,
 	0xbefc007e, 0xbefe007c,
-	0xbefc007a, 0xc0611a7a,
-	0x0000007c, 0xbf8cc07f,
-	0x807a847a, 0xbefc007e,
-	0xb8fbf801, 0xbefe007c,
-	0xbefc007a, 0xc0611efa,
+	0xbefc0070, 0xc0611a3a,
 	0x0000007c, 0xbf8cc07f,
-	0x807a847a, 0xbefc007e,
-	0x8670ff7f, 0x04000000,
-	0xbeef0080, 0x876f6f70,
-	0xb8fa2a05, 0x807a817a,
-	0x8e7a8a7a, 0xb8f11605,
-	0x80718171, 0x8e718471,
-	0x8e768271, 0xbef600ff,
-	0x01000000, 0xbef20174,
-	0x80747a74, 0x82758075,
-	0xbefc0080, 0xbf800000,
-	0xbe802b00, 0xbe822b02,
-	0xbe842b04, 0xbe862b06,
-	0xbe882b08, 0xbe8a2b0a,
-	0xbe8c2b0c, 0xbe8e2b0e,
-	0xc06b003a, 0x00000000,
-	0xbf8cc07f, 0xc06b013a,
-	0x00000010, 0xbf8cc07f,
-	0xc06b023a, 0x00000020,
-	0xbf8cc07f, 0xc06b033a,
-	0x00000030, 0xbf8cc07f,
-	0x8074c074, 0x82758075,
-	0x807c907c, 0xbf0a717c,
-	0xbf85ffe7, 0xbef40172,
-	0xbefa0080, 0xbefe00c1,
-	0xbeff00c1, 0xbee80080,
-	0xbee90080, 0xbef600ff,
-	0x01000000, 0xe0724000,
-	0x7a1d0000, 0xe0724100,
-	0x7a1d0100, 0xe0724200,
-	0x7a1d0200, 0xe0724300,
-	0x7a1d0300, 0xbefe00c1,
-	0xbeff00c1, 0xb8f14306,
-	0x8671c171, 0xbf84002c,
-	0xbf8a0000, 0x8670ff6f,
-	0x04000000, 0xbf840028,
-	0x8e718671, 0x8e718271,
-	0xbef60071, 0xb8fa2a05,
-	0x807a817a, 0x8e7a8a7a,
-	0xb8f01605, 0x80708170,
-	0x8e708670, 0x807a707a,
-	0x807aff7a, 0x00000080,
+	0x80708470, 0xbefc007e,
+	0xbefe007c, 0xbefc0070,
+	0xc0611a7a, 0x0000007c,
+	0xbf8cc07f, 0x80708470,
+	0xbefc007e, 0xb8f1f801,
+	0xbefe007c, 0xbefc0070,
+	0xc0611c7a, 0x0000007c,
+	0xbf8cc07f, 0x80708470,
+	0xbefc007e, 0x867aff7f,
+	0x04000000, 0xbeef0080,
+	0x876f6f7a, 0xb8f02a05,
+	0x80708170, 0x8e708a70,
+	0xb8fb1605, 0x807b817b,
+	0x8e7b847b, 0x8e76827b,
 	0xbef600ff, 0x01000000,
-	0xbefc0080, 0xd28c0002,
-	0x000100c1, 0xd28d0003,
-	0x000204c1, 0xd1060002,
-	0x00011103, 0x7e0602ff,
-	0x00000200, 0xbefc00ff,
-	0x00010000, 0xbe800077,
-	0x8677ff77, 0xff7fffff,
-	0x8777ff77, 0x00058000,
-	0xd8ec0000, 0x00000002,
-	0xbf8cc07f, 0xe0765000,
-	0x7a1d0002, 0x68040702,
-	0xd0c9006a, 0x0000e302,
-	0xbf87fff7, 0xbef70000,
-	0xbefa00ff, 0x00000400,
+	0xbef20174, 0x80747074,
+	0x82758075, 0xbefc0080,
+	0xbf800000, 0xbe802b00,
+	0xbe822b02, 0xbe842b04,
+	0xbe862b06, 0xbe882b08,
+	0xbe8a2b0a, 0xbe8c2b0c,
+	0xbe8e2b0e, 0xc06b003a,
+	0x00000000, 0xbf8cc07f,
+	0xc06b013a, 0x00000010,
+	0xbf8cc07f, 0xc06b023a,
+	0x00000020, 0xbf8cc07f,
+	0xc06b033a, 0x00000030,
+	0xbf8cc07f, 0x8074c074,
+	0x82758075, 0x807c907c,
+	0xbf0a7b7c, 0xbf85ffe7,
+	0xbef40172, 0xbef00080,
 	0xbefe00c1, 0xbeff00c1,
-	0xb8f12a05, 0x80718171,
-	0x8e718271, 0x8e768871,
+	0xbee80080, 0xbee90080,
 	0xbef600ff, 0x01000000,
-	0xbefc0084, 0xbf0a717c,
-	0xbf840015, 0xbf11017c,
-	0x8071ff71, 0x00001000,
-	0x7e000300, 0x7e020301,
-	0x7e040302, 0x7e060303,
-	0xe0724000, 0x7a1d0000,
-	0xe0724100, 0x7a1d0100,
-	0xe0724200, 0x7a1d0200,
-	0xe0724300, 0x7a1d0300,
-	0x807c847c, 0x807aff7a,
-	0x00000400, 0xbf0a717c,
-	0xbf85ffef, 0xbf9c0000,
-	0xbf8200dc, 0xbef4007e,
-	0x8675ff7f, 0x0000ffff,
-	0x8775ff75, 0x00040000,
-	0xbef60080, 0xbef700ff,
-	0x00807fac, 0x866eff7f,
-	0x08000000, 0x8f6e836e,
-	0x87776e77, 0x866eff7f,
-	0x70000000, 0x8f6e816e,
-	0x87776e77, 0x866eff7f,
-	0x04000000, 0xbf84001e,
+	0xe0724000, 0x701d0000,
+	0xe0724100, 0x701d0100,
+	0xe0724200, 0x701d0200,
+	0xe0724300, 0x701d0300,
 	0xbefe00c1, 0xbeff00c1,
-	0xb8ef4306, 0x866fc16f,
-	0xbf840019, 0x8e6f866f,
-	0x8e6f826f, 0xbef6006f,
-	0xb8f82a05, 0x80788178,
-	0x8e788a78, 0xb8ee1605,
-	0x806e816e, 0x8e6e866e,
-	0x80786e78, 0x8078ff78,
+	0xb8fb4306, 0x867bc17b,
+	0xbf84002c, 0xbf8a0000,
+	0x867aff6f, 0x04000000,
+	0xbf840028, 0x8e7b867b,
+	0x8e7b827b, 0xbef6007b,
+	0xb8f02a05, 0x80708170,
+	0x8e708a70, 0xb8fa1605,
+	0x807a817a, 0x8e7a867a,
+	0x80707a70, 0x8070ff70,
 	0x00000080, 0xbef600ff,
 	0x01000000, 0xbefc0080,
-	0xe0510000, 0x781d0000,
-	0xe0510100, 0x781d0000,
-	0x807cff7c, 0x00000200,
-	0x8078ff78, 0x00000200,
-	0xbf0a6f7c, 0xbf85fff6,
-	0xbef80080, 0xbefe00c1,
-	0xbeff00c1, 0xb8ef2a05,
-	0x806f816f, 0x8e6f826f,
-	0x8e76886f, 0xbef600ff,
-	0x01000000, 0xbeee0078,
-	0x8078ff78, 0x00000400,
-	0xbefc0084, 0xbf11087c,
-	0x806fff6f, 0x00008000,
-	0xe0524000, 0x781d0000,
-	0xe0524100, 0x781d0100,
-	0xe0524200, 0x781d0200,
-	0xe0524300, 0x781d0300,
-	0xbf8c0f70, 0x7e000300,
+	0xd28c0002, 0x000100c1,
+	0xd28d0003, 0x000204c1,
+	0xd1060002, 0x00011103,
+	0x7e0602ff, 0x00000200,
+	0xbefc00ff, 0x00010000,
+	0xbe800077, 0x8677ff77,
+	0xff7fffff, 0x8777ff77,
+	0x00058000, 0xd8ec0000,
+	0x00000002, 0xbf8cc07f,
+	0xe0765000, 0x701d0002,
+	0x68040702, 0xd0c9006a,
+	0x0000f702, 0xbf87fff7,
+	0xbef70000, 0xbef000ff,
+	0x00000400, 0xbefe00c1,
+	0xbeff00c1, 0xb8fb2a05,
+	0x807b817b, 0x8e7b827b,
+	0x8e76887b, 0xbef600ff,
+	0x01000000, 0xbefc0084,
+	0xbf0a7b7c, 0xbf840015,
+	0xbf11017c, 0x807bff7b,
+	0x00001000, 0x7e000300,
 	0x7e020301, 0x7e040302,
-	0x7e060303, 0x807c847c,
-	0x8078ff78, 0x00000400,
-	0xbf0a6f7c, 0xbf85ffee,
-	0xbf9c0000, 0xe0524000,
-	0x6e1d0000, 0xe0524100,
-	0x6e1d0100, 0xe0524200,
-	0x6e1d0200, 0xe0524300,
-	0x6e1d0300, 0xb8f82a05,
+	0x7e060303, 0xe0724000,
+	0x701d0000, 0xe0724100,
+	0x701d0100, 0xe0724200,
+	0x701d0200, 0xe0724300,
+	0x701d0300, 0x807c847c,
+	0x8070ff70, 0x00000400,
+	0xbf0a7b7c, 0xbf85ffef,
+	0xbf9c0000, 0xbf8200da,
+	0xbef4007e, 0x8675ff7f,
+	0x0000ffff, 0x8775ff75,
+	0x00040000, 0xbef60080,
+	0xbef700ff, 0x00807fac,
+	0x866eff7f, 0x08000000,
+	0x8f6e836e, 0x87776e77,
+	0x866eff7f, 0x70000000,
+	0x8f6e816e, 0x87776e77,
+	0x866eff7f, 0x04000000,
+	0xbf84001e, 0xbefe00c1,
+	0xbeff00c1, 0xb8ef4306,
+	0x866fc16f, 0xbf840019,
+	0x8e6f866f, 0x8e6f826f,
+	0xbef6006f, 0xb8f82a05,
 	0x80788178, 0x8e788a78,
 	0xb8ee1605, 0x806e816e,
 	0x8e6e866e, 0x80786e78,
-	0x80f8c078, 0xb8ef1605,
-	0x806f816f, 0x8e6f846f,
-	0x8e76826f, 0xbef600ff,
-	0x01000000, 0xbefc006f,
-	0xc031003a, 0x00000078,
-	0x80f8c078, 0xbf8cc07f,
-	0x80fc907c, 0xbf800000,
-	0xbe802d00, 0xbe822d02,
-	0xbe842d04, 0xbe862d06,
-	0xbe882d08, 0xbe8a2d0a,
-	0xbe8c2d0c, 0xbe8e2d0e,
-	0xbf06807c, 0xbf84fff0,
+	0x8078ff78, 0x00000080,
+	0xbef600ff, 0x01000000,
+	0xbefc0080, 0xe0510000,
+	0x781d0000, 0xe0510100,
+	0x781d0000, 0x807cff7c,
+	0x00000200, 0x8078ff78,
+	0x00000200, 0xbf0a6f7c,
+	0xbf85fff6, 0xbef80080,
+	0xbefe00c1, 0xbeff00c1,
+	0xb8ef2a05, 0x806f816f,
+	0x8e6f826f, 0x8e76886f,
+	0xbef600ff, 0x01000000,
+	0xbeee0078, 0x8078ff78,
+	0x00000400, 0xbefc0084,
+	0xbf11087c, 0x806fff6f,
+	0x00008000, 0xe0524000,
+	0x781d0000, 0xe0524100,
+	0x781d0100, 0xe0524200,
+	0x781d0200, 0xe0524300,
+	0x781d0300, 0xbf8c0f70,
+	0x7e000300, 0x7e020301,
+	0x7e040302, 0x7e060303,
+	0x807c847c, 0x8078ff78,
+	0x00000400, 0xbf0a6f7c,
+	0xbf85ffee, 0xbf9c0000,
+	0xe0524000, 0x6e1d0000,
+	0xe0524100, 0x6e1d0100,
+	0xe0524200, 0x6e1d0200,
+	0xe0524300, 0x6e1d0300,
 	0xb8f82a05, 0x80788178,
 	0x8e788a78, 0xb8ee1605,
 	0x806e816e, 0x8e6e866e,
-	0x80786e78, 0xbef60084,
+	0x80786e78, 0x80f8c078,
+	0xb8ef1605, 0x806f816f,
+	0x8e6f846f, 0x8e76826f,
 	0xbef600ff, 0x01000000,
-	0xc0211bfa, 0x00000078,
-	0x80788478, 0xc0211b3a,
+	0xbefc006f, 0xc031003a,
+	0x00000078, 0x80f8c078,
+	0xbf8cc07f, 0x80fc907c,
+	0xbf800000, 0xbe802d00,
+	0xbe822d02, 0xbe842d04,
+	0xbe862d06, 0xbe882d08,
+	0xbe8a2d0a, 0xbe8c2d0c,
+	0xbe8e2d0e, 0xbf06807c,
+	0xbf84fff0, 0xb8f82a05,
+	0x80788178, 0x8e788a78,
+	0xb8ee1605, 0x806e816e,
+	0x8e6e866e, 0x80786e78,
+	0xbef60084, 0xbef600ff,
+	0x01000000, 0xc0211bfa,
 	0x00000078, 0x80788478,
-	0xc0211b7a, 0x00000078,
-	0x80788478, 0xc0211eba,
+	0xc0211b3a, 0x00000078,
+	0x80788478, 0xc0211b7a,
 	0x00000078, 0x80788478,
-	0xc0211efa, 0x00000078,
-	0x80788478, 0xc0211c3a,
+	0xc0211c3a, 0x00000078,
+	0x80788478, 0xc0211c7a,
 	0x00000078, 0x80788478,
-	0xc0211c7a, 0x00000078,
-	0x80788478, 0xc0211a3a,
+	0xc0211eba, 0x00000078,
+	0x80788478, 0xc0211efa,
 	0x00000078, 0x80788478,
-	0xc0211a7a, 0x00000078,
-	0x80788478, 0xc0211cfa,
+	0xc0211a3a, 0x00000078,
+	0x80788478, 0xc0211a7a,
 	0x00000078, 0x80788478,
-	0xbf8cc07f, 0xbefc006f,
-	0xbefe007a, 0xbeff007b,
-	0x866f71ff, 0x000003ff,
-	0xb96f4803, 0x866f71ff,
-	0xfffff800, 0x8f6f8b6f,
-	0xb96fa2c3, 0xb973f801,
-	0xb8ee2a05, 0x806e816e,
-	0x8e6e8a6e, 0xb8ef1605,
-	0x806f816f, 0x8e6f866f,
-	0x806e6f6e, 0x806e746e,
-	0x826f8075, 0x866fff6f,
-	0x0000ffff, 0xc0071cb7,
-	0x00000040, 0xc00b1d37,
-	0x00000048, 0xc0031e77,
-	0x00000058, 0xc0071eb7,
-	0x0000005c, 0xbf8cc07f,
-	0x866fff6d, 0xf0000000,
-	0x8f6f9c6f, 0x8e6f906f,
-	0xbeee0080, 0x876e6f6e,
-	0x866fff6d, 0x08000000,
-	0x8f6f9b6f, 0x8e6f8f6f,
-	0x876e6f6e, 0x866fff70,
-	0x00800000, 0x8f6f976f,
-	0xb96ef807, 0x866dff6d,
-	0x0000ffff, 0x86fe7e7e,
-	0x86ea6a6a, 0x8f6e8370,
-	0xb96ee0c2, 0xbf800002,
-	0xb9700002, 0xbf8a0000,
-	0x95806f6c, 0xbf810000,
+	0xc0211cfa, 0x00000078,
+	0x80788478, 0xbf8cc07f,
+	0xbefc006f, 0xbefe0070,
+	0xbeff0071, 0x866f7bff,
+	0x000003ff, 0xb96f4803,
+	0x866f7bff, 0xfffff800,
+	0x8f6f8b6f, 0xb96fa2c3,
+	0xb973f801, 0xb8ee2a05,
+	0x806e816e, 0x8e6e8a6e,
+	0xb8ef1605, 0x806f816f,
+	0x8e6f866f, 0x806e6f6e,
+	0x806e746e, 0x826f8075,
+	0x866fff6f, 0x0000ffff,
+	0xc00b1c37, 0x00000050,
+	0xc00b1d37, 0x00000060,
+	0xc0031e77, 0x00000074,
+	0xbf8cc07f, 0x866fff6d,
+	0xf8000000, 0x8f6f9b6f,
+	0x8e6f906f, 0xbeee0080,
+	0x876e6f6e, 0x866fff6d,
+	0x04000000, 0x8f6f9a6f,
+	0x8e6f8f6f, 0x876e6f6e,
+	0x866fff7a, 0x00800000,
+	0x8f6f976f, 0xb96ef807,
+	0x866dff6d, 0x0000ffff,
+	0x86fe7e7e, 0x86ea6a6a,
+	0x8f6e837a, 0xb96ee0c2,
+	0xbf800002, 0xb97a0002,
+	0xbf8a0000, 0x95806f6c,
+	0xbf810000, 0x00000000,
 };
diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm
index abe1a5da29fb..a47f5b933120 100644
--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm
+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm
@@ -282,19 +282,6 @@ if G8SR_DEBUG_TIMESTAMP
         s_waitcnt lgkmcnt(0)         //FIXME, will cause xnack??
 end
 
-    //check whether there is mem_viol
-    s_getreg_b32    s_save_trapsts, hwreg(HW_REG_TRAPSTS)
-    s_and_b32   s_save_trapsts, s_save_trapsts, SQ_WAVE_TRAPSTS_MEM_VIOL_MASK
-    s_cbranch_scc0  L_NO_PC_REWIND
-
-    //if so, need rewind PC assuming GDS operation gets NACKed
-    s_mov_b32       s_save_tmp, 0                                                           //clear mem_viol bit
-    s_setreg_b32    hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_MEM_VIOL_SHIFT, 1), s_save_tmp    //clear mem_viol bit
-    s_and_b32       s_save_pc_hi, s_save_pc_hi, 0x0000ffff    //pc[47:32]
-    s_sub_u32       s_save_pc_lo, s_save_pc_lo, 8             //pc[31:0]-8
-    s_subb_u32      s_save_pc_hi, s_save_pc_hi, 0x0           // -scc
-
-L_NO_PC_REWIND:
     s_mov_b32       s_save_tmp, 0                                                           //clear saveCtx bit
     s_setreg_b32    hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_SAVECTX_SHIFT, 1), s_save_tmp     //clear saveCtx bit
 
diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm
index 0bb9c577b3a2..6bae2e022c6e 100644
--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm
+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm
@@ -150,10 +150,10 @@ var S_SAVE_SPI_INIT_MTYPE_SHIFT		=   28
 var S_SAVE_SPI_INIT_FIRST_WAVE_MASK	=   0x04000000		//bit[26]: FirstWaveInTG
 var S_SAVE_SPI_INIT_FIRST_WAVE_SHIFT	=   26
 
-var S_SAVE_PC_HI_RCNT_SHIFT		=   28			//FIXME	 check with Brian to ensure all fields other than PC[47:0] can be used
-var S_SAVE_PC_HI_RCNT_MASK		=   0xF0000000		//FIXME
-var S_SAVE_PC_HI_FIRST_REPLAY_SHIFT	=   27			//FIXME
-var S_SAVE_PC_HI_FIRST_REPLAY_MASK	=   0x08000000		//FIXME
+var S_SAVE_PC_HI_RCNT_SHIFT		=   27			//FIXME	 check with Brian to ensure all fields other than PC[47:0] can be used
+var S_SAVE_PC_HI_RCNT_MASK		=   0xF8000000		//FIXME
+var S_SAVE_PC_HI_FIRST_REPLAY_SHIFT	=   26			//FIXME
+var S_SAVE_PC_HI_FIRST_REPLAY_MASK	=   0x04000000		//FIXME
 
 var s_save_spi_init_lo		    =	exec_lo
 var s_save_spi_init_hi		    =	exec_hi
@@ -162,8 +162,8 @@ var s_save_pc_lo	    =	ttmp0		//{TTMP1, TTMP0} = {3'h0,pc_rewind[3:0], HT[0],tra
 var s_save_pc_hi	    =	ttmp1
 var s_save_exec_lo	    =	ttmp2
 var s_save_exec_hi	    =	ttmp3
-var s_save_tmp		    =	ttmp4
-var s_save_trapsts	    =	ttmp5		//not really used until the end of the SAVE routine
+var s_save_tmp		    =	ttmp14
+var s_save_trapsts	    =	ttmp15		//not really used until the end of the SAVE routine
 var s_save_xnack_mask_lo    =	ttmp6
 var s_save_xnack_mask_hi    =	ttmp7
 var s_save_buf_rsrc0	    =	ttmp8
@@ -171,9 +171,9 @@ var s_save_buf_rsrc1	    =	ttmp9
 var s_save_buf_rsrc2	    =	ttmp10
 var s_save_buf_rsrc3	    =	ttmp11
 var s_save_status	    =	ttmp12
-var s_save_mem_offset	    =	ttmp14
+var s_save_mem_offset	    =	ttmp4
 var s_save_alloc_size	    =	s_save_trapsts		//conflict
-var s_save_m0		    =	ttmp15
+var s_save_m0		    =	ttmp5
 var s_save_ttmps_lo	    =	s_save_tmp		//no conflict
 var s_save_ttmps_hi	    =	s_save_trapsts		//no conflict
 
@@ -207,10 +207,10 @@ var s_restore_mode	    =	ttmp7
 
 var s_restore_pc_lo	    =	ttmp0
 var s_restore_pc_hi	    =	ttmp1
-var s_restore_exec_lo	    =	ttmp14
-var s_restore_exec_hi	    = 	ttmp15
-var s_restore_status	    =	ttmp4
-var s_restore_trapsts	    =	ttmp5
+var s_restore_exec_lo	    =	ttmp4
+var s_restore_exec_hi	    = 	ttmp5
+var s_restore_status	    =	ttmp14
+var s_restore_trapsts	    =	ttmp15
 var s_restore_xnack_mask_lo =	xnack_mask_lo
 var s_restore_xnack_mask_hi =	xnack_mask_hi
 var s_restore_buf_rsrc0	    =	ttmp8
@@ -266,10 +266,16 @@ if (!EMU_RUN_HACK)
 
 L_HALT_WAVE:
     // If STATUS.HALT is set then this fault must come from SQC instruction fetch.
-    // We cannot prevent further faults so just terminate the wavefront.
+    // We cannot prevent further faults. Spin wait until context saved.
     s_and_b32       ttmp2, s_save_status, SQ_WAVE_STATUS_HALT_MASK
     s_cbranch_scc0  L_NOT_ALREADY_HALTED
-    s_endpgm
+
+L_WAIT_CTX_SAVE:
+    s_sleep         0x10
+    s_getreg_b32    ttmp2, hwreg(HW_REG_TRAPSTS)
+    s_and_b32       ttmp2, ttmp2, SQ_WAVE_TRAPSTS_SAVECTX_MASK
+    s_cbranch_scc0  L_WAIT_CTX_SAVE
+
 L_NOT_ALREADY_HALTED:
     s_or_b32        s_save_status, s_save_status, SQ_WAVE_STATUS_HALT_MASK
 
@@ -293,12 +299,12 @@ L_FETCH_2ND_TRAP:
     // Read second-level TBA/TMA from first-level TMA and jump if available.
     // ttmp[2:5] and ttmp12 can be used (others hold SPI-initialized debug data)
     // ttmp12 holds SQ_WAVE_STATUS
-    s_getreg_b32    ttmp4, hwreg(HW_REG_SQ_SHADER_TMA_LO)
-    s_getreg_b32    ttmp5, hwreg(HW_REG_SQ_SHADER_TMA_HI)
-    s_lshl_b64      [ttmp4, ttmp5], [ttmp4, ttmp5], 0x8
-    s_load_dwordx2  [ttmp2, ttmp3], [ttmp4, ttmp5], 0x0 glc:1 // second-level TBA
+    s_getreg_b32    ttmp14, hwreg(HW_REG_SQ_SHADER_TMA_LO)
+    s_getreg_b32    ttmp15, hwreg(HW_REG_SQ_SHADER_TMA_HI)
+    s_lshl_b64      [ttmp14, ttmp15], [ttmp14, ttmp15], 0x8
+    s_load_dwordx2  [ttmp2, ttmp3], [ttmp14, ttmp15], 0x0 glc:1 // second-level TBA
     s_waitcnt       lgkmcnt(0)
-    s_load_dwordx2  [ttmp4, ttmp5], [ttmp4, ttmp5], 0x8 glc:1 // second-level TMA
+    s_load_dwordx2  [ttmp14, ttmp15], [ttmp14, ttmp15], 0x8 glc:1 // second-level TMA
     s_waitcnt       lgkmcnt(0)
     s_and_b64       [ttmp2, ttmp3], [ttmp2, ttmp3], [ttmp2, ttmp3]
     s_cbranch_scc0  L_NO_NEXT_TRAP // second-level trap handler not been set
@@ -405,7 +411,7 @@ end
     else
     end
 
-    // Save trap temporaries 6-11, 13-15 initialized by SPI debug dispatch logic
+    // Save trap temporaries 4-11, 13 initialized by SPI debug dispatch logic
     // ttmp SR memory offset : size(VGPR)+size(SGPR)+0x40
     get_vgpr_size_bytes(s_save_ttmps_lo)
     get_sgpr_size_bytes(s_save_ttmps_hi)
@@ -413,13 +419,11 @@ end
     s_add_u32	    s_save_ttmps_lo, s_save_ttmps_lo, s_save_spi_init_lo
     s_addc_u32	    s_save_ttmps_hi, s_save_spi_init_hi, 0x0
     s_and_b32	    s_save_ttmps_hi, s_save_ttmps_hi, 0xFFFF
-    s_store_dwordx2 [ttmp6, ttmp7], [s_save_ttmps_lo, s_save_ttmps_hi], 0x40 glc:1
-    ack_sqc_store_workaround()
-    s_store_dwordx4 [ttmp8, ttmp9, ttmp10, ttmp11], [s_save_ttmps_lo, s_save_ttmps_hi], 0x48 glc:1
+    s_store_dwordx4 [ttmp4, ttmp5, ttmp6, ttmp7], [s_save_ttmps_lo, s_save_ttmps_hi], 0x50 glc:1
     ack_sqc_store_workaround()
-    s_store_dword   ttmp13, [s_save_ttmps_lo, s_save_ttmps_hi], 0x58 glc:1
+    s_store_dwordx4 [ttmp8, ttmp9, ttmp10, ttmp11], [s_save_ttmps_lo, s_save_ttmps_hi], 0x60 glc:1
     ack_sqc_store_workaround()
-    s_store_dwordx2 [ttmp14, ttmp15], [s_save_ttmps_lo, s_save_ttmps_hi], 0x5C glc:1
+    s_store_dword   ttmp13, [s_save_ttmps_lo, s_save_ttmps_hi], 0x74 glc:1
     ack_sqc_store_workaround()
 
     /*	    setup Resource Contants    */
@@ -1093,7 +1097,7 @@ end
     //s_setreg_b32  hwreg(HW_REG_TRAPSTS),  s_restore_trapsts	   //don't overwrite SAVECTX bit as it may be set through external SAVECTX during restore
     s_setreg_b32    hwreg(HW_REG_MODE),	    s_restore_mode
 
-    // Restore trap temporaries 6-11, 13-15 initialized by SPI debug dispatch logic
+    // Restore trap temporaries 4-11, 13 initialized by SPI debug dispatch logic
     // ttmp SR memory offset : size(VGPR)+size(SGPR)+0x40
     get_vgpr_size_bytes(s_restore_ttmps_lo)
     get_sgpr_size_bytes(s_restore_ttmps_hi)
@@ -1101,10 +1105,9 @@ end
     s_add_u32	    s_restore_ttmps_lo, s_restore_ttmps_lo, s_restore_buf_rsrc0
     s_addc_u32	    s_restore_ttmps_hi, s_restore_buf_rsrc1, 0x0
     s_and_b32	    s_restore_ttmps_hi, s_restore_ttmps_hi, 0xFFFF
-    s_load_dwordx2  [ttmp6, ttmp7], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x40 glc:1
-    s_load_dwordx4  [ttmp8, ttmp9, ttmp10, ttmp11], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x48 glc:1
-    s_load_dword    ttmp13, [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x58 glc:1
-    s_load_dwordx2  [ttmp14, ttmp15], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x5C glc:1
+    s_load_dwordx4  [ttmp4, ttmp5, ttmp6, ttmp7], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x50 glc:1
+    s_load_dwordx4  [ttmp8, ttmp9, ttmp10, ttmp11], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x60 glc:1
+    s_load_dword    ttmp13, [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x74 glc:1
     s_waitcnt	    lgkmcnt(0)
 
     //reuse s_restore_m0 as a temp register
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 083bd8114db1..f91126f5f1be 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -213,6 +213,8 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties,
 		q_properties->type = KFD_QUEUE_TYPE_COMPUTE;
 	else if (args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA)
 		q_properties->type = KFD_QUEUE_TYPE_SDMA;
+	else if (args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA_XGMI)
+		q_properties->type = KFD_QUEUE_TYPE_SDMA_XGMI;
 	else
 		return -ENOTSUPP;
 
@@ -522,7 +524,7 @@ static int kfd_ioctl_set_trap_handler(struct file *filep,
 	struct kfd_process_device *pdd;
 
 	dev = kfd_device_by_id(args->gpu_id);
-	if (dev == NULL)
+	if (!dev)
 		return -EINVAL;
 
 	mutex_lock(&p->mutex);
@@ -1272,6 +1274,12 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
 		if (args->size != kfd_doorbell_process_slice(dev))
 			return -EINVAL;
 		offset = kfd_get_process_doorbells(dev, p);
+	} else if (flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP) {
+		if (args->size != PAGE_SIZE)
+			return -EINVAL;
+		offset = amdgpu_amdkfd_get_mmio_remap_phys_addr(dev->kgd);
+		if (!offset)
+			return -ENOMEM;
 	}
 
 	mutex_lock(&p->mutex);
@@ -1301,6 +1309,14 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
 	args->handle = MAKE_HANDLE(args->gpu_id, idr_handle);
 	args->mmap_offset = offset;
 
+	/* MMIO is mapped through kfd device
+	 * Generate a kfd mmap offset
+	 */
+	if (flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP) {
+		args->mmap_offset = KFD_MMAP_TYPE_MMIO | KFD_MMAP_GPU_ID(args->gpu_id);
+		args->mmap_offset <<= PAGE_SHIFT;
+	}
+
 	return 0;
 
 err_free:
@@ -1551,6 +1567,32 @@ copy_from_user_failed:
 	return err;
 }
 
+static int kfd_ioctl_alloc_queue_gws(struct file *filep,
+		struct kfd_process *p, void *data)
+{
+	int retval;
+	struct kfd_ioctl_alloc_queue_gws_args *args = data;
+	struct kfd_dev *dev;
+
+	if (!hws_gws_support)
+		return -ENODEV;
+
+	dev = kfd_device_by_id(args->gpu_id);
+	if (!dev) {
+		pr_debug("Could not find gpu id 0x%x\n", args->gpu_id);
+		return -ENODEV;
+	}
+	if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS)
+		return -ENODEV;
+
+	mutex_lock(&p->mutex);
+	retval = pqm_set_gws(&p->pqm, args->queue_id, args->num_gws ? dev->gws : NULL);
+	mutex_unlock(&p->mutex);
+
+	args->first_gws = 0;
+	return retval;
+}
+
 static int kfd_ioctl_get_dmabuf_info(struct file *filep,
 		struct kfd_process *p, void *data)
 {
@@ -1753,6 +1795,8 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {
 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_IMPORT_DMABUF,
 				kfd_ioctl_import_dmabuf, 0),
 
+	AMDKFD_IOCTL_DEF(AMDKFD_IOC_ALLOC_QUEUE_GWS,
+			kfd_ioctl_alloc_queue_gws, 0),
 };
 
 #define AMDKFD_CORE_IOCTL_COUNT	ARRAY_SIZE(amdkfd_ioctls)
@@ -1845,6 +1889,39 @@ err_i1:
 	return retcode;
 }
 
+static int kfd_mmio_mmap(struct kfd_dev *dev, struct kfd_process *process,
+		      struct vm_area_struct *vma)
+{
+	phys_addr_t address;
+	int ret;
+
+	if (vma->vm_end - vma->vm_start != PAGE_SIZE)
+		return -EINVAL;
+
+	address = amdgpu_amdkfd_get_mmio_remap_phys_addr(dev->kgd);
+
+	vma->vm_flags |= VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_NORESERVE |
+				VM_DONTDUMP | VM_PFNMAP;
+
+	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+	pr_debug("Process %d mapping mmio page\n"
+		 "     target user address == 0x%08llX\n"
+		 "     physical address    == 0x%08llX\n"
+		 "     vm_flags            == 0x%04lX\n"
+		 "     size                == 0x%04lX\n",
+		 process->pasid, (unsigned long long) vma->vm_start,
+		 address, vma->vm_flags, PAGE_SIZE);
+
+	ret = io_remap_pfn_range(vma,
+				vma->vm_start,
+				address >> PAGE_SHIFT,
+				PAGE_SIZE,
+				vma->vm_page_prot);
+	return ret;
+}
+
+
 static int kfd_mmap(struct file *filp, struct vm_area_struct *vma)
 {
 	struct kfd_process *process;
@@ -1875,6 +1952,10 @@ static int kfd_mmap(struct file *filp, struct vm_area_struct *vma)
 		if (!dev)
 			return -ENODEV;
 		return kfd_reserved_mem_mmap(dev, process, vma);
+	case KFD_MMAP_TYPE_MMIO:
+		if (!dev)
+			return -ENODEV;
+		return kfd_mmio_mmap(dev, process, vma);
 	}
 
 	return -EFAULT;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
index 2e7c44955f43..59f8ca4297db 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
@@ -134,6 +134,7 @@ static struct kfd_gpu_cache_info carrizo_cache_info[] = {
 #define polaris10_cache_info carrizo_cache_info
 #define polaris11_cache_info carrizo_cache_info
 #define polaris12_cache_info carrizo_cache_info
+#define vegam_cache_info carrizo_cache_info
 /* TODO - check & update Vega10 cache details */
 #define vega10_cache_info carrizo_cache_info
 #define raven_cache_info carrizo_cache_info
@@ -372,7 +373,7 @@ static int kfd_parse_subtype_iolink(struct crat_subtype_iolink *iolink,
 			if (props->iolink_type == CRAT_IOLINK_TYPE_PCIEXPRESS)
 				props->weight = 20;
 			else if (props->iolink_type == CRAT_IOLINK_TYPE_XGMI)
-				props->weight = 15;
+				props->weight = 15 * iolink->num_hops_xgmi;
 			else
 				props->weight = node_distance(id_from, id_to);
 
@@ -652,6 +653,10 @@ static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev,
 		pcache_info = polaris12_cache_info;
 		num_of_cache_types = ARRAY_SIZE(polaris12_cache_info);
 		break;
+	case CHIP_VEGAM:
+		pcache_info = vegam_cache_info;
+		num_of_cache_types = ARRAY_SIZE(vegam_cache_info);
+		break;
 	case CHIP_VEGA10:
 	case CHIP_VEGA12:
 	case CHIP_VEGA20:
@@ -1092,6 +1097,7 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size,
 
 static int kfd_fill_gpu_xgmi_link_to_gpu(int *avail_size,
 			struct kfd_dev *kdev,
+			struct kfd_dev *peer_kdev,
 			struct crat_subtype_iolink *sub_type_hdr,
 			uint32_t proximity_domain_from,
 			uint32_t proximity_domain_to)
@@ -1110,6 +1116,8 @@ static int kfd_fill_gpu_xgmi_link_to_gpu(int *avail_size,
 	sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_XGMI;
 	sub_type_hdr->proximity_domain_from = proximity_domain_from;
 	sub_type_hdr->proximity_domain_to = proximity_domain_to;
+	sub_type_hdr->num_hops_xgmi =
+		amdgpu_amdkfd_get_xgmi_hops_count(kdev->kgd, peer_kdev->kgd);
 	return 0;
 }
 
@@ -1287,7 +1295,7 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
 				(char *)sub_type_hdr +
 				sizeof(struct crat_subtype_iolink));
 			ret = kfd_fill_gpu_xgmi_link_to_gpu(
-				&avail_size, kdev,
+				&avail_size, kdev, peer_dev->gpu,
 				(struct crat_subtype_iolink *)sub_type_hdr,
 				proximity_domain, nid);
 			if (ret < 0)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.h b/drivers/gpu/drm/amd/amdkfd/kfd_crat.h
index 7c3f192fe25f..d54ceebd346b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.h
@@ -274,7 +274,8 @@ struct crat_subtype_iolink {
 	uint32_t	minimum_bandwidth_mbs;
 	uint32_t	maximum_bandwidth_mbs;
 	uint32_t	recommended_transfer_size;
-	uint8_t		reserved2[CRAT_IOLINK_RESERVED_LENGTH];
+	uint8_t		reserved2[CRAT_IOLINK_RESERVED_LENGTH - 1];
+	uint8_t		num_hops_xgmi;
 };
 
 /*
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 765b58a17dc7..9d1b026e29e9 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -54,6 +54,7 @@ static const struct kfd_device_info kaveri_device_info = {
 	.needs_iommu_device = true,
 	.needs_pci_atomics = false,
 	.num_sdma_engines = 2,
+	.num_xgmi_sdma_engines = 0,
 	.num_sdma_queues_per_engine = 2,
 };
 
@@ -71,6 +72,7 @@ static const struct kfd_device_info carrizo_device_info = {
 	.needs_iommu_device = true,
 	.needs_pci_atomics = false,
 	.num_sdma_engines = 2,
+	.num_xgmi_sdma_engines = 0,
 	.num_sdma_queues_per_engine = 2,
 };
 
@@ -87,6 +89,7 @@ static const struct kfd_device_info raven_device_info = {
 	.needs_iommu_device = true,
 	.needs_pci_atomics = true,
 	.num_sdma_engines = 1,
+	.num_xgmi_sdma_engines = 0,
 	.num_sdma_queues_per_engine = 2,
 };
 #endif
@@ -105,6 +108,7 @@ static const struct kfd_device_info hawaii_device_info = {
 	.needs_iommu_device = false,
 	.needs_pci_atomics = false,
 	.num_sdma_engines = 2,
+	.num_xgmi_sdma_engines = 0,
 	.num_sdma_queues_per_engine = 2,
 };
 
@@ -121,6 +125,7 @@ static const struct kfd_device_info tonga_device_info = {
 	.needs_iommu_device = false,
 	.needs_pci_atomics = true,
 	.num_sdma_engines = 2,
+	.num_xgmi_sdma_engines = 0,
 	.num_sdma_queues_per_engine = 2,
 };
 
@@ -137,6 +142,7 @@ static const struct kfd_device_info fiji_device_info = {
 	.needs_iommu_device = false,
 	.needs_pci_atomics = true,
 	.num_sdma_engines = 2,
+	.num_xgmi_sdma_engines = 0,
 	.num_sdma_queues_per_engine = 2,
 };
 
@@ -153,6 +159,7 @@ static const struct kfd_device_info fiji_vf_device_info = {
 	.needs_iommu_device = false,
 	.needs_pci_atomics = false,
 	.num_sdma_engines = 2,
+	.num_xgmi_sdma_engines = 0,
 	.num_sdma_queues_per_engine = 2,
 };
 
@@ -170,6 +177,7 @@ static const struct kfd_device_info polaris10_device_info = {
 	.needs_iommu_device = false,
 	.needs_pci_atomics = true,
 	.num_sdma_engines = 2,
+	.num_xgmi_sdma_engines = 0,
 	.num_sdma_queues_per_engine = 2,
 };
 
@@ -186,6 +194,7 @@ static const struct kfd_device_info polaris10_vf_device_info = {
 	.needs_iommu_device = false,
 	.needs_pci_atomics = false,
 	.num_sdma_engines = 2,
+	.num_xgmi_sdma_engines = 0,
 	.num_sdma_queues_per_engine = 2,
 };
 
@@ -202,6 +211,7 @@ static const struct kfd_device_info polaris11_device_info = {
 	.needs_iommu_device = false,
 	.needs_pci_atomics = true,
 	.num_sdma_engines = 2,
+	.num_xgmi_sdma_engines = 0,
 	.num_sdma_queues_per_engine = 2,
 };
 
@@ -218,6 +228,24 @@ static const struct kfd_device_info polaris12_device_info = {
 	.needs_iommu_device = false,
 	.needs_pci_atomics = true,
 	.num_sdma_engines = 2,
+	.num_xgmi_sdma_engines = 0,
+	.num_sdma_queues_per_engine = 2,
+};
+
+static const struct kfd_device_info vegam_device_info = {
+	.asic_family = CHIP_VEGAM,
+	.max_pasid_bits = 16,
+	.max_no_of_hqd  = 24,
+	.doorbell_size  = 4,
+	.ih_ring_entry_size = 4 * sizeof(uint32_t),
+	.event_interrupt_class = &event_interrupt_class_cik,
+	.num_of_watch_points = 4,
+	.mqd_size_aligned = MQD_SIZE_ALIGNED,
+	.supports_cwsr = true,
+	.needs_iommu_device = false,
+	.needs_pci_atomics = true,
+	.num_sdma_engines = 2,
+	.num_xgmi_sdma_engines = 0,
 	.num_sdma_queues_per_engine = 2,
 };
 
@@ -234,6 +262,7 @@ static const struct kfd_device_info vega10_device_info = {
 	.needs_iommu_device = false,
 	.needs_pci_atomics = false,
 	.num_sdma_engines = 2,
+	.num_xgmi_sdma_engines = 0,
 	.num_sdma_queues_per_engine = 2,
 };
 
@@ -250,6 +279,7 @@ static const struct kfd_device_info vega10_vf_device_info = {
 	.needs_iommu_device = false,
 	.needs_pci_atomics = false,
 	.num_sdma_engines = 2,
+	.num_xgmi_sdma_engines = 0,
 	.num_sdma_queues_per_engine = 2,
 };
 
@@ -266,6 +296,7 @@ static const struct kfd_device_info vega12_device_info = {
 	.needs_iommu_device = false,
 	.needs_pci_atomics = false,
 	.num_sdma_engines = 2,
+	.num_xgmi_sdma_engines = 0,
 	.num_sdma_queues_per_engine = 2,
 };
 
@@ -282,6 +313,7 @@ static const struct kfd_device_info vega20_device_info = {
 	.needs_iommu_device = false,
 	.needs_pci_atomics = false,
 	.num_sdma_engines = 2,
+	.num_xgmi_sdma_engines = 0,
 	.num_sdma_queues_per_engine = 8,
 };
 
@@ -373,6 +405,9 @@ static const struct kfd_deviceid supported_devices[] = {
 	{ 0x6995, &polaris12_device_info },	/* Polaris12 */
 	{ 0x6997, &polaris12_device_info },	/* Polaris12 */
 	{ 0x699F, &polaris12_device_info },	/* Polaris12 */
+	{ 0x694C, &vegam_device_info },		/* VegaM */
+	{ 0x694E, &vegam_device_info },		/* VegaM */
+	{ 0x694F, &vegam_device_info },		/* VegaM */
 	{ 0x6860, &vega10_device_info },	/* Vega10 */
 	{ 0x6861, &vega10_device_info },	/* Vega10 */
 	{ 0x6862, &vega10_device_info },	/* Vega10 */
@@ -518,6 +553,13 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
 	} else
 		kfd->max_proc_per_quantum = hws_max_conc_proc;
 
+	/* Allocate global GWS that is shared by all KFD processes */
+	if (hws_gws_support && amdgpu_amdkfd_alloc_gws(kfd->kgd,
+			amdgpu_amdkfd_get_num_gws(kfd->kgd), &kfd->gws)) {
+		dev_err(kfd_device, "Could not allocate %d gws\n",
+			amdgpu_amdkfd_get_num_gws(kfd->kgd));
+		goto out;
+	}
 	/* calculate max size of mqds needed for queues */
 	size = max_num_of_queues_per_device *
 			kfd->device_info->mqd_size_aligned;
@@ -541,7 +583,7 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
 			&kfd->gtt_start_gpu_addr, &kfd->gtt_start_cpu_ptr,
 			false)) {
 		dev_err(kfd_device, "Could not allocate %d bytes\n", size);
-		goto out;
+		goto alloc_gtt_mem_failure;
 	}
 
 	dev_info(kfd_device, "Allocated %d bytes on gart\n", size);
@@ -611,6 +653,9 @@ kfd_doorbell_error:
 	kfd_gtt_sa_fini(kfd);
 kfd_gtt_sa_init_error:
 	amdgpu_amdkfd_free_gtt_mem(kfd->kgd, kfd->gtt_mem);
+alloc_gtt_mem_failure:
+	if (hws_gws_support)
+		amdgpu_amdkfd_free_gws(kfd->kgd, kfd->gws);
 	dev_err(kfd_device,
 		"device %x:%x NOT added due to errors\n",
 		kfd->pdev->vendor, kfd->pdev->device);
@@ -628,6 +673,8 @@ void kgd2kfd_device_exit(struct kfd_dev *kfd)
 		kfd_doorbell_fini(kfd);
 		kfd_gtt_sa_fini(kfd);
 		amdgpu_amdkfd_free_gtt_mem(kfd->kgd, kfd->gtt_mem);
+		if (hws_gws_support)
+			amdgpu_amdkfd_free_gws(kfd->kgd, kfd->gws);
 	}
 
 	kfree(kfd);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index ae381450601c..ece35c7a77b5 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -60,14 +60,14 @@ static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm,
 					struct qcm_process_device *qpd);
 
 static void deallocate_sdma_queue(struct device_queue_manager *dqm,
-				unsigned int sdma_queue_id);
+				struct queue *q);
 
 static void kfd_process_hw_exception(struct work_struct *work);
 
 static inline
 enum KFD_MQD_TYPE get_mqd_type_from_queue_type(enum kfd_queue_type type)
 {
-	if (type == KFD_QUEUE_TYPE_SDMA)
+	if (type == KFD_QUEUE_TYPE_SDMA || type == KFD_QUEUE_TYPE_SDMA_XGMI)
 		return KFD_MQD_TYPE_SDMA;
 	return KFD_MQD_TYPE_CP;
 }
@@ -107,12 +107,23 @@ static unsigned int get_num_sdma_engines(struct device_queue_manager *dqm)
 	return dqm->dev->device_info->num_sdma_engines;
 }
 
+static unsigned int get_num_xgmi_sdma_engines(struct device_queue_manager *dqm)
+{
+	return dqm->dev->device_info->num_xgmi_sdma_engines;
+}
+
 unsigned int get_num_sdma_queues(struct device_queue_manager *dqm)
 {
 	return dqm->dev->device_info->num_sdma_engines
 			* dqm->dev->device_info->num_sdma_queues_per_engine;
 }
 
+unsigned int get_num_xgmi_sdma_queues(struct device_queue_manager *dqm)
+{
+	return dqm->dev->device_info->num_xgmi_sdma_engines
+			* dqm->dev->device_info->num_sdma_queues_per_engine;
+}
+
 void program_sh_mem_settings(struct device_queue_manager *dqm,
 					struct qcm_process_device *qpd)
 {
@@ -133,7 +144,8 @@ static int allocate_doorbell(struct qcm_process_device *qpd, struct queue *q)
 		 * preserve the user mode ABI.
 		 */
 		q->doorbell_id = q->properties.queue_id;
-	} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
+	} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
+			q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
 		/* For SDMA queues on SOC15 with 8-byte doorbell, use static
 		 * doorbell assignments based on the engine and queue id.
 		 * The doobell index distance between RLC (2*i) and (2*i+1)
@@ -174,7 +186,8 @@ static void deallocate_doorbell(struct qcm_process_device *qpd,
 	struct kfd_dev *dev = qpd->dqm->dev;
 
 	if (!KFD_IS_SOC15(dev->device_info->asic_family) ||
-	    q->properties.type == KFD_QUEUE_TYPE_SDMA)
+	    q->properties.type == KFD_QUEUE_TYPE_SDMA ||
+	    q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
 		return;
 
 	old = test_and_clear_bit(q->doorbell_id, qpd->doorbell_bitmap);
@@ -289,7 +302,8 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm,
 
 	if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE)
 		retval = create_compute_queue_nocpsch(dqm, q, qpd);
-	else if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
+	else if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
+			q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
 		retval = create_sdma_queue_nocpsch(dqm, q, qpd);
 	else
 		retval = -EINVAL;
@@ -307,6 +321,8 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm,
 
 	if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
 		dqm->sdma_queue_count++;
+	else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
+		dqm->xgmi_sdma_queue_count++;
 
 	/*
 	 * Unconditionally increment this counter, regardless of the queue's
@@ -368,9 +384,7 @@ static int create_compute_queue_nocpsch(struct device_queue_manager *dqm,
 	struct mqd_manager *mqd_mgr;
 	int retval;
 
-	mqd_mgr = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE);
-	if (!mqd_mgr)
-		return -ENOMEM;
+	mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_COMPUTE];
 
 	retval = allocate_hqd(dqm, q);
 	if (retval)
@@ -425,16 +439,17 @@ static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm,
 	int retval;
 	struct mqd_manager *mqd_mgr;
 
-	mqd_mgr = dqm->ops.get_mqd_manager(dqm,
-		get_mqd_type_from_queue_type(q->properties.type));
-	if (!mqd_mgr)
-		return -ENOMEM;
+	mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
+			q->properties.type)];
 
 	if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) {
 		deallocate_hqd(dqm, q);
 	} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
 		dqm->sdma_queue_count--;
-		deallocate_sdma_queue(dqm, q->sdma_id);
+		deallocate_sdma_queue(dqm, q);
+	} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
+		dqm->xgmi_sdma_queue_count--;
+		deallocate_sdma_queue(dqm, q);
 	} else {
 		pr_debug("q->properties.type %d is invalid\n",
 				q->properties.type);
@@ -501,12 +516,8 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q)
 		retval = -ENODEV;
 		goto out_unlock;
 	}
-	mqd_mgr = dqm->ops.get_mqd_manager(dqm,
-			get_mqd_type_from_queue_type(q->properties.type));
-	if (!mqd_mgr) {
-		retval = -ENOMEM;
-		goto out_unlock;
-	}
+	mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
+			q->properties.type)];
 	/*
 	 * Eviction state logic: we only mark active queues as evicted
 	 * to avoid the overhead of restoring inactive queues later
@@ -529,7 +540,8 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q)
 		}
 	} else if (prev_active &&
 		   (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
-		    q->properties.type == KFD_QUEUE_TYPE_SDMA)) {
+		    q->properties.type == KFD_QUEUE_TYPE_SDMA ||
+		    q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
 		retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd,
 				KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN,
 				KFD_UNMAP_LATENCY_MS, q->pipe, q->queue);
@@ -556,7 +568,8 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q)
 		retval = map_queues_cpsch(dqm);
 	else if (q->properties.is_active &&
 		 (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
-		  q->properties.type == KFD_QUEUE_TYPE_SDMA)) {
+		  q->properties.type == KFD_QUEUE_TYPE_SDMA ||
+		  q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
 		if (WARN(q->process->mm != current->mm,
 			 "should only run in user thread"))
 			retval = -EFAULT;
@@ -571,27 +584,6 @@ out_unlock:
 	return retval;
 }
 
-static struct mqd_manager *get_mqd_manager(
-		struct device_queue_manager *dqm, enum KFD_MQD_TYPE type)
-{
-	struct mqd_manager *mqd_mgr;
-
-	if (WARN_ON(type >= KFD_MQD_TYPE_MAX))
-		return NULL;
-
-	pr_debug("mqd type %d\n", type);
-
-	mqd_mgr = dqm->mqd_mgrs[type];
-	if (!mqd_mgr) {
-		mqd_mgr = mqd_manager_init(type, dqm->dev);
-		if (!mqd_mgr)
-			pr_err("mqd manager is NULL");
-		dqm->mqd_mgrs[type] = mqd_mgr;
-	}
-
-	return mqd_mgr;
-}
-
 static int evict_process_queues_nocpsch(struct device_queue_manager *dqm,
 					struct qcm_process_device *qpd)
 {
@@ -612,13 +604,8 @@ static int evict_process_queues_nocpsch(struct device_queue_manager *dqm,
 	list_for_each_entry(q, &qpd->queues_list, list) {
 		if (!q->properties.is_active)
 			continue;
-		mqd_mgr = dqm->ops.get_mqd_manager(dqm,
-			get_mqd_type_from_queue_type(q->properties.type));
-		if (!mqd_mgr) { /* should not be here */
-			pr_err("Cannot evict queue, mqd mgr is NULL\n");
-			retval = -ENOMEM;
-			goto out;
-		}
+		mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
+				q->properties.type)];
 		q->properties.is_evicted = true;
 		q->properties.is_active = false;
 		retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd,
@@ -717,13 +704,8 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm,
 	list_for_each_entry(q, &qpd->queues_list, list) {
 		if (!q->properties.is_evicted)
 			continue;
-		mqd_mgr = dqm->ops.get_mqd_manager(dqm,
-			get_mqd_type_from_queue_type(q->properties.type));
-		if (!mqd_mgr) { /* should not be here */
-			pr_err("Cannot restore queue, mqd mgr is NULL\n");
-			retval = -ENOMEM;
-			goto out;
-		}
+		mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
+				q->properties.type)];
 		q->properties.is_evicted = false;
 		q->properties.is_active = true;
 		retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe,
@@ -812,10 +794,14 @@ static int register_process(struct device_queue_manager *dqm,
 	retval = dqm->asic_ops.update_qpd(dqm, qpd);
 
 	dqm->processes_count++;
-	kfd_inc_compute_active(dqm->dev);
 
 	dqm_unlock(dqm);
 
+	/* Outside the DQM lock because under the DQM lock we can't do
+	 * reclaim or take other locks that others hold while reclaiming.
+	 */
+	kfd_inc_compute_active(dqm->dev);
+
 	return retval;
 }
 
@@ -836,7 +822,6 @@ static int unregister_process(struct device_queue_manager *dqm,
 			list_del(&cur->list);
 			kfree(cur);
 			dqm->processes_count--;
-			kfd_dec_compute_active(dqm->dev);
 			goto out;
 		}
 	}
@@ -844,6 +829,13 @@ static int unregister_process(struct device_queue_manager *dqm,
 	retval = 1;
 out:
 	dqm_unlock(dqm);
+
+	/* Outside the DQM lock because under the DQM lock we can't do
+	 * reclaim or take other locks that others hold while reclaiming.
+	 */
+	if (!retval)
+		kfd_dec_compute_active(dqm->dev);
+
 	return retval;
 }
 
@@ -879,6 +871,7 @@ static int initialize_nocpsch(struct device_queue_manager *dqm)
 	INIT_LIST_HEAD(&dqm->queues);
 	dqm->queue_count = dqm->next_pipe_to_allocate = 0;
 	dqm->sdma_queue_count = 0;
+	dqm->xgmi_sdma_queue_count = 0;
 
 	for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) {
 		int pipe_offset = pipe * get_queues_per_pipe(dqm);
@@ -890,7 +883,8 @@ static int initialize_nocpsch(struct device_queue_manager *dqm)
 	}
 
 	dqm->vmid_bitmap = (1 << dqm->dev->vm_info.vmid_num_kfd) - 1;
-	dqm->sdma_bitmap = (1 << get_num_sdma_queues(dqm)) - 1;
+	dqm->sdma_bitmap = (1ULL << get_num_sdma_queues(dqm)) - 1;
+	dqm->xgmi_sdma_bitmap = (1ULL << get_num_xgmi_sdma_queues(dqm)) - 1;
 
 	return 0;
 }
@@ -921,26 +915,56 @@ static int stop_nocpsch(struct device_queue_manager *dqm)
 }
 
 static int allocate_sdma_queue(struct device_queue_manager *dqm,
-				unsigned int *sdma_queue_id)
+				struct queue *q)
 {
 	int bit;
 
-	if (dqm->sdma_bitmap == 0)
-		return -ENOMEM;
+	if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
+		if (dqm->sdma_bitmap == 0)
+			return -ENOMEM;
+		bit = __ffs64(dqm->sdma_bitmap);
+		dqm->sdma_bitmap &= ~(1ULL << bit);
+		q->sdma_id = bit;
+		q->properties.sdma_engine_id = q->sdma_id %
+				get_num_sdma_engines(dqm);
+		q->properties.sdma_queue_id = q->sdma_id /
+				get_num_sdma_engines(dqm);
+	} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
+		if (dqm->xgmi_sdma_bitmap == 0)
+			return -ENOMEM;
+		bit = __ffs64(dqm->xgmi_sdma_bitmap);
+		dqm->xgmi_sdma_bitmap &= ~(1ULL << bit);
+		q->sdma_id = bit;
+		/* sdma_engine_id is sdma id including
+		 * both PCIe-optimized SDMAs and XGMI-
+		 * optimized SDMAs. The calculation below
+		 * assumes the first N engines are always
+		 * PCIe-optimized ones
+		 */
+		q->properties.sdma_engine_id = get_num_sdma_engines(dqm) +
+				q->sdma_id % get_num_xgmi_sdma_engines(dqm);
+		q->properties.sdma_queue_id = q->sdma_id /
+				get_num_xgmi_sdma_engines(dqm);
+	}
 
-	bit = ffs(dqm->sdma_bitmap) - 1;
-	dqm->sdma_bitmap &= ~(1 << bit);
-	*sdma_queue_id = bit;
+	pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id);
+	pr_debug("SDMA queue id: %d\n", q->properties.sdma_queue_id);
 
 	return 0;
 }
 
 static void deallocate_sdma_queue(struct device_queue_manager *dqm,
-				unsigned int sdma_queue_id)
+				struct queue *q)
 {
-	if (sdma_queue_id >= get_num_sdma_queues(dqm))
-		return;
-	dqm->sdma_bitmap |= (1 << sdma_queue_id);
+	if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
+		if (q->sdma_id >= get_num_sdma_queues(dqm))
+			return;
+		dqm->sdma_bitmap |= (1ULL << q->sdma_id);
+	} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
+		if (q->sdma_id >= get_num_xgmi_sdma_queues(dqm))
+			return;
+		dqm->xgmi_sdma_bitmap |= (1ULL << q->sdma_id);
+	}
 }
 
 static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm,
@@ -950,25 +974,16 @@ static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm,
 	struct mqd_manager *mqd_mgr;
 	int retval;
 
-	mqd_mgr = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_SDMA);
-	if (!mqd_mgr)
-		return -ENOMEM;
+	mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA];
 
-	retval = allocate_sdma_queue(dqm, &q->sdma_id);
+	retval = allocate_sdma_queue(dqm, q);
 	if (retval)
 		return retval;
 
-	q->properties.sdma_queue_id = q->sdma_id / get_num_sdma_engines(dqm);
-	q->properties.sdma_engine_id = q->sdma_id % get_num_sdma_engines(dqm);
-
 	retval = allocate_doorbell(qpd, q);
 	if (retval)
 		goto out_deallocate_sdma_queue;
 
-	pr_debug("SDMA id is:    %d\n", q->sdma_id);
-	pr_debug("SDMA queue id: %d\n", q->properties.sdma_queue_id);
-	pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id);
-
 	dqm->asic_ops.init_sdma_vm(dqm, q, qpd);
 	retval = mqd_mgr->init_mqd(mqd_mgr, &q->mqd, &q->mqd_mem_obj,
 				&q->gart_mqd_addr, &q->properties);
@@ -987,7 +1002,7 @@ out_uninit_mqd:
 out_deallocate_doorbell:
 	deallocate_doorbell(qpd, q);
 out_deallocate_sdma_queue:
-	deallocate_sdma_queue(dqm, q->sdma_id);
+	deallocate_sdma_queue(dqm, q);
 
 	return retval;
 }
@@ -1045,8 +1060,10 @@ static int initialize_cpsch(struct device_queue_manager *dqm)
 	INIT_LIST_HEAD(&dqm->queues);
 	dqm->queue_count = dqm->processes_count = 0;
 	dqm->sdma_queue_count = 0;
+	dqm->xgmi_sdma_queue_count = 0;
 	dqm->active_runlist = false;
-	dqm->sdma_bitmap = (1 << get_num_sdma_queues(dqm)) - 1;
+	dqm->sdma_bitmap = (1ULL << get_num_sdma_queues(dqm)) - 1;
+	dqm->xgmi_sdma_bitmap = (1ULL << get_num_xgmi_sdma_queues(dqm)) - 1;
 
 	INIT_WORK(&dqm->hw_exception_work, kfd_process_hw_exception);
 
@@ -1161,38 +1178,26 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
 	int retval;
 	struct mqd_manager *mqd_mgr;
 
-	retval = 0;
-
-	dqm_lock(dqm);
-
 	if (dqm->total_queue_count >= max_num_of_queues_per_device) {
 		pr_warn("Can't create new usermode queue because %d queues were already created\n",
 				dqm->total_queue_count);
 		retval = -EPERM;
-		goto out_unlock;
+		goto out;
 	}
 
-	if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
-		retval = allocate_sdma_queue(dqm, &q->sdma_id);
+	if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
+		q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
+		retval = allocate_sdma_queue(dqm, q);
 		if (retval)
-			goto out_unlock;
-		q->properties.sdma_queue_id =
-			q->sdma_id / get_num_sdma_engines(dqm);
-		q->properties.sdma_engine_id =
-			q->sdma_id % get_num_sdma_engines(dqm);
+			goto out;
 	}
 
 	retval = allocate_doorbell(qpd, q);
 	if (retval)
 		goto out_deallocate_sdma_queue;
 
-	mqd_mgr = dqm->ops.get_mqd_manager(dqm,
-			get_mqd_type_from_queue_type(q->properties.type));
-
-	if (!mqd_mgr) {
-		retval = -ENOMEM;
-		goto out_deallocate_doorbell;
-	}
+	mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
+			q->properties.type)];
 	/*
 	 * Eviction state logic: we only mark active queues as evicted
 	 * to avoid the overhead of restoring inactive queues later
@@ -1201,9 +1206,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
 		q->properties.is_evicted = (q->properties.queue_size > 0 &&
 					    q->properties.queue_percent > 0 &&
 					    q->properties.queue_address != 0);
-
 	dqm->asic_ops.init_sdma_vm(dqm, q, qpd);
-
 	q->properties.tba_addr = qpd->tba_addr;
 	q->properties.tma_addr = qpd->tma_addr;
 	retval = mqd_mgr->init_mqd(mqd_mgr, &q->mqd, &q->mqd_mem_obj,
@@ -1211,6 +1214,8 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
 	if (retval)
 		goto out_deallocate_doorbell;
 
+	dqm_lock(dqm);
+
 	list_add(&q->list, &qpd->queues_list);
 	qpd->queue_count++;
 	if (q->properties.is_active) {
@@ -1221,6 +1226,8 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
 
 	if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
 		dqm->sdma_queue_count++;
+	else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
+		dqm->xgmi_sdma_queue_count++;
 	/*
 	 * Unconditionally increment this counter, regardless of the queue's
 	 * type or whether the queue is active.
@@ -1236,11 +1243,10 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
 out_deallocate_doorbell:
 	deallocate_doorbell(qpd, q);
 out_deallocate_sdma_queue:
-	if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
-		deallocate_sdma_queue(dqm, q->sdma_id);
-out_unlock:
-	dqm_unlock(dqm);
-
+	if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
+		q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
+		deallocate_sdma_queue(dqm, q);
+out:
 	return retval;
 }
 
@@ -1268,12 +1274,18 @@ int amdkfd_fence_wait_timeout(unsigned int *fence_addr,
 	return 0;
 }
 
-static int unmap_sdma_queues(struct device_queue_manager *dqm,
-				unsigned int sdma_engine)
+static int unmap_sdma_queues(struct device_queue_manager *dqm)
 {
-	return pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_SDMA,
-			KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, false,
-			sdma_engine);
+	int i, retval = 0;
+
+	for (i = 0; i < dqm->dev->device_info->num_sdma_engines +
+		dqm->dev->device_info->num_xgmi_sdma_engines; i++) {
+		retval = pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_SDMA,
+			KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, false, i);
+		if (retval)
+			return retval;
+	}
+	return retval;
 }
 
 /* dqm->lock mutex has to be locked before calling this function */
@@ -1309,13 +1321,11 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm,
 	if (!dqm->active_runlist)
 		return retval;
 
-	pr_debug("Before destroying queues, sdma queue count is : %u\n",
-		dqm->sdma_queue_count);
+	pr_debug("Before destroying queues, sdma queue count is : %u, xgmi sdma queue count is : %u\n",
+		dqm->sdma_queue_count, dqm->xgmi_sdma_queue_count);
 
-	if (dqm->sdma_queue_count > 0) {
-		unmap_sdma_queues(dqm, 0);
-		unmap_sdma_queues(dqm, 1);
-	}
+	if (dqm->sdma_queue_count > 0 || dqm->xgmi_sdma_queue_count)
+		unmap_sdma_queues(dqm);
 
 	retval = pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_COMPUTE,
 			filter, filter_param, false, 0);
@@ -1379,18 +1389,17 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
 
 	}
 
-	mqd_mgr = dqm->ops.get_mqd_manager(dqm,
-			get_mqd_type_from_queue_type(q->properties.type));
-	if (!mqd_mgr) {
-		retval = -ENOMEM;
-		goto failed;
-	}
+	mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
+			q->properties.type)];
 
 	deallocate_doorbell(qpd, q);
 
 	if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
 		dqm->sdma_queue_count--;
-		deallocate_sdma_queue(dqm, q->sdma_id);
+		deallocate_sdma_queue(dqm, q);
+	} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
+		dqm->xgmi_sdma_queue_count--;
+		deallocate_sdma_queue(dqm, q);
 	}
 
 	list_del(&q->list);
@@ -1403,8 +1412,6 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
 			qpd->reset_wavefronts = true;
 	}
 
-	mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
-
 	/*
 	 * Unconditionally decrement this counter, regardless of the queue's
 	 * type
@@ -1415,9 +1422,11 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
 
 	dqm_unlock(dqm);
 
+	/* Do uninit_mqd after dqm_unlock(dqm) to avoid circular locking */
+	mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
+
 	return retval;
 
-failed:
 failed_try_destroy_debugged_queue:
 
 	dqm_unlock(dqm);
@@ -1520,6 +1529,7 @@ static int process_termination_nocpsch(struct device_queue_manager *dqm,
 	struct queue *q, *next;
 	struct device_process_node *cur, *next_dpn;
 	int retval = 0;
+	bool found = false;
 
 	dqm_lock(dqm);
 
@@ -1538,12 +1548,19 @@ static int process_termination_nocpsch(struct device_queue_manager *dqm,
 			list_del(&cur->list);
 			kfree(cur);
 			dqm->processes_count--;
-			kfd_dec_compute_active(dqm->dev);
+			found = true;
 			break;
 		}
 	}
 
 	dqm_unlock(dqm);
+
+	/* Outside the DQM lock because under the DQM lock we can't do
+	 * reclaim or take other locks that others hold while reclaiming.
+	 */
+	if (found)
+		kfd_dec_compute_active(dqm->dev);
+
 	return retval;
 }
 
@@ -1564,11 +1581,7 @@ static int get_wave_state(struct device_queue_manager *dqm,
 		goto dqm_unlock;
 	}
 
-	mqd_mgr = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE);
-	if (!mqd_mgr) {
-		r = -ENOMEM;
-		goto dqm_unlock;
-	}
+	mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_COMPUTE];
 
 	if (!mqd_mgr->get_wave_state) {
 		r = -EINVAL;
@@ -1593,6 +1606,7 @@ static int process_termination_cpsch(struct device_queue_manager *dqm,
 	struct device_process_node *cur, *next_dpn;
 	enum kfd_unmap_queues_filter filter =
 		KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES;
+	bool found = false;
 
 	retval = 0;
 
@@ -1611,7 +1625,10 @@ static int process_termination_cpsch(struct device_queue_manager *dqm,
 	list_for_each_entry(q, &qpd->queues_list, list) {
 		if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
 			dqm->sdma_queue_count--;
-			deallocate_sdma_queue(dqm, q->sdma_id);
+			deallocate_sdma_queue(dqm, q);
+		} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
+			dqm->xgmi_sdma_queue_count--;
+			deallocate_sdma_queue(dqm, q);
 		}
 
 		if (q->properties.is_active)
@@ -1626,7 +1643,7 @@ static int process_termination_cpsch(struct device_queue_manager *dqm,
 			list_del(&cur->list);
 			kfree(cur);
 			dqm->processes_count--;
-			kfd_dec_compute_active(dqm->dev);
+			found = true;
 			break;
 		}
 	}
@@ -1638,21 +1655,68 @@ static int process_termination_cpsch(struct device_queue_manager *dqm,
 		qpd->reset_wavefronts = false;
 	}
 
-	/* lastly, free mqd resources */
+	dqm_unlock(dqm);
+
+	/* Outside the DQM lock because under the DQM lock we can't do
+	 * reclaim or take other locks that others hold while reclaiming.
+	 */
+	if (found)
+		kfd_dec_compute_active(dqm->dev);
+
+	/* Lastly, free mqd resources.
+	 * Do uninit_mqd() after dqm_unlock to avoid circular locking.
+	 */
 	list_for_each_entry_safe(q, next, &qpd->queues_list, list) {
-		mqd_mgr = dqm->ops.get_mqd_manager(dqm,
-			get_mqd_type_from_queue_type(q->properties.type));
-		if (!mqd_mgr) {
-			retval = -ENOMEM;
-			goto out;
-		}
+		mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
+				q->properties.type)];
 		list_del(&q->list);
 		qpd->queue_count--;
 		mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
 	}
 
-out:
-	dqm_unlock(dqm);
+	return retval;
+}
+
+static int init_mqd_managers(struct device_queue_manager *dqm)
+{
+	int i, j;
+	struct mqd_manager *mqd_mgr;
+
+	for (i = 0; i < KFD_MQD_TYPE_MAX; i++) {
+		mqd_mgr = dqm->asic_ops.mqd_manager_init(i, dqm->dev);
+		if (!mqd_mgr) {
+			pr_err("mqd manager [%d] initialization failed\n", i);
+			goto out_free;
+		}
+		dqm->mqd_mgrs[i] = mqd_mgr;
+	}
+
+	return 0;
+
+out_free:
+	for (j = 0; j < i; j++) {
+		kfree(dqm->mqd_mgrs[j]);
+		dqm->mqd_mgrs[j] = NULL;
+	}
+
+	return -ENOMEM;
+}
+
+/* Allocate one hiq mqd (HWS) and all SDMA mqd in a continuous trunk*/
+static int allocate_hiq_sdma_mqd(struct device_queue_manager *dqm)
+{
+	int retval;
+	struct kfd_dev *dev = dqm->dev;
+	struct kfd_mem_obj *mem_obj = &dqm->hiq_sdma_mqd;
+	uint32_t size = dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]->mqd_size *
+		dev->device_info->num_sdma_engines *
+		dev->device_info->num_sdma_queues_per_engine +
+		dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size;
+
+	retval = amdgpu_amdkfd_alloc_gtt_mem(dev->kgd, size,
+		&(mem_obj->gtt_mem), &(mem_obj->gpu_addr),
+		(void *)&(mem_obj->cpu_ptr), true);
+
 	return retval;
 }
 
@@ -1693,7 +1757,6 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
 		dqm->ops.stop = stop_cpsch;
 		dqm->ops.destroy_queue = destroy_queue_cpsch;
 		dqm->ops.update_queue = update_queue;
-		dqm->ops.get_mqd_manager = get_mqd_manager;
 		dqm->ops.register_process = register_process;
 		dqm->ops.unregister_process = unregister_process;
 		dqm->ops.uninitialize = uninitialize;
@@ -1713,7 +1776,6 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
 		dqm->ops.create_queue = create_queue_nocpsch;
 		dqm->ops.destroy_queue = destroy_queue_nocpsch;
 		dqm->ops.update_queue = update_queue;
-		dqm->ops.get_mqd_manager = get_mqd_manager;
 		dqm->ops.register_process = register_process;
 		dqm->ops.unregister_process = unregister_process;
 		dqm->ops.initialize = initialize_nocpsch;
@@ -1749,6 +1811,7 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
 	case CHIP_POLARIS10:
 	case CHIP_POLARIS11:
 	case CHIP_POLARIS12:
+	case CHIP_VEGAM:
 		device_queue_manager_init_vi_tonga(&dqm->asic_ops);
 		break;
 
@@ -1764,6 +1827,14 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
 		goto out_free;
 	}
 
+	if (init_mqd_managers(dqm))
+		goto out_free;
+
+	if (allocate_hiq_sdma_mqd(dqm)) {
+		pr_err("Failed to allocate hiq sdma mqd trunk buffer\n");
+		goto out_free;
+	}
+
 	if (!dqm->ops.initialize(dqm))
 		return dqm;
 
@@ -1772,9 +1843,17 @@ out_free:
 	return NULL;
 }
 
+void deallocate_hiq_sdma_mqd(struct kfd_dev *dev, struct kfd_mem_obj *mqd)
+{
+	WARN(!mqd, "No hiq sdma mqd trunk to free");
+
+	amdgpu_amdkfd_free_gtt_mem(dev->kgd, mqd->gtt_mem);
+}
+
 void device_queue_manager_uninit(struct device_queue_manager *dqm)
 {
 	dqm->ops.uninitialize(dqm);
+	deallocate_hiq_sdma_mqd(dqm->dev, &dqm->hiq_sdma_mqd);
 	kfree(dqm);
 }
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
index 70e38a2e23b9..88b4c007696e 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
@@ -48,8 +48,6 @@ struct device_process_node {
  *
  * @update_queue: Queue update routine.
  *
- * @get_mqd_manager: Returns the mqd manager according to the mqd type.
- *
  * @exeute_queues: Dispatches the queues list to the H/W.
  *
  * @register_process: This routine associates a specific process with device.
@@ -97,10 +95,6 @@ struct device_queue_manager_ops {
 	int	(*update_queue)(struct device_queue_manager *dqm,
 				struct queue *q);
 
-	struct mqd_manager * (*get_mqd_manager)
-					(struct device_queue_manager *dqm,
-					enum KFD_MQD_TYPE type);
-
 	int	(*register_process)(struct device_queue_manager *dqm,
 					struct qcm_process_device *qpd);
 
@@ -158,6 +152,8 @@ struct device_queue_manager_asic_ops {
 	void	(*init_sdma_vm)(struct device_queue_manager *dqm,
 				struct queue *q,
 				struct qcm_process_device *qpd);
+	struct mqd_manager *	(*mqd_manager_init)(enum KFD_MQD_TYPE type,
+				 struct kfd_dev *dev);
 };
 
 /**
@@ -185,10 +181,12 @@ struct device_queue_manager {
 	unsigned int		processes_count;
 	unsigned int		queue_count;
 	unsigned int		sdma_queue_count;
+	unsigned int		xgmi_sdma_queue_count;
 	unsigned int		total_queue_count;
 	unsigned int		next_pipe_to_allocate;
 	unsigned int		*allocated_queues;
-	unsigned int		sdma_bitmap;
+	uint64_t		sdma_bitmap;
+	uint64_t		xgmi_sdma_bitmap;
 	unsigned int		vmid_bitmap;
 	uint64_t		pipelines_addr;
 	struct kfd_mem_obj	*pipeline_mem;
@@ -201,6 +199,7 @@ struct device_queue_manager {
 	/* hw exception  */
 	bool			is_hws_hang;
 	struct work_struct	hw_exception_work;
+	struct kfd_mem_obj	hiq_sdma_mqd;
 };
 
 void device_queue_manager_init_cik(
@@ -219,6 +218,7 @@ unsigned int get_queues_num(struct device_queue_manager *dqm);
 unsigned int get_queues_per_pipe(struct device_queue_manager *dqm);
 unsigned int get_pipes_per_mec(struct device_queue_manager *dqm);
 unsigned int get_num_sdma_queues(struct device_queue_manager *dqm);
+unsigned int get_num_xgmi_sdma_queues(struct device_queue_manager *dqm);
 
 static inline unsigned int get_sh_mem_bases_32(struct kfd_process_device *pdd)
 {
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c
index aed4c21417bf..0d26506798cf 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c
@@ -48,6 +48,7 @@ void device_queue_manager_init_cik(
 	asic_ops->set_cache_memory_policy = set_cache_memory_policy_cik;
 	asic_ops->update_qpd = update_qpd_cik;
 	asic_ops->init_sdma_vm = init_sdma_vm;
+	asic_ops->mqd_manager_init = mqd_manager_init_cik;
 }
 
 void device_queue_manager_init_cik_hawaii(
@@ -56,6 +57,7 @@ void device_queue_manager_init_cik_hawaii(
 	asic_ops->set_cache_memory_policy = set_cache_memory_policy_cik;
 	asic_ops->update_qpd = update_qpd_cik_hawaii;
 	asic_ops->init_sdma_vm = init_sdma_vm_hawaii;
+	asic_ops->mqd_manager_init = mqd_manager_init_cik_hawaii;
 }
 
 static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c
index 417515332c35..e9fe39382371 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c
@@ -37,6 +37,7 @@ void device_queue_manager_init_v9(
 {
 	asic_ops->update_qpd = update_qpd_v9;
 	asic_ops->init_sdma_vm = init_sdma_vm_v9;
+	asic_ops->mqd_manager_init = mqd_manager_init_v9;
 }
 
 static uint32_t compute_sh_mem_bases_64bit(struct kfd_process_device *pdd)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c
index c3a5dcfe877a..3a7cb2f88366 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c
@@ -54,6 +54,7 @@ void device_queue_manager_init_vi(
 	asic_ops->set_cache_memory_policy = set_cache_memory_policy_vi;
 	asic_ops->update_qpd = update_qpd_vi;
 	asic_ops->init_sdma_vm = init_sdma_vm;
+	asic_ops->mqd_manager_init = mqd_manager_init_vi;
 }
 
 void device_queue_manager_init_vi_tonga(
@@ -62,6 +63,7 @@ void device_queue_manager_init_vi_tonga(
 	asic_ops->set_cache_memory_policy = set_cache_memory_policy_vi_tonga;
 	asic_ops->update_qpd = update_qpd_vi_tonga;
 	asic_ops->init_sdma_vm = init_sdma_vm_tonga;
+	asic_ops->mqd_manager_init = mqd_manager_init_vi_tonga;
 }
 
 static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
index 6e1d41c5bf86..d674d4b3340f 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
@@ -983,7 +983,7 @@ void kfd_signal_vm_fault_event(struct kfd_dev *dev, unsigned int pasid,
 		return; /* Presumably process exited. */
 	memset(&memory_exception_data, 0, sizeof(memory_exception_data));
 	memory_exception_data.gpu_id = dev->id;
-	memory_exception_data.failure.imprecise = 1;
+	memory_exception_data.failure.imprecise = true;
 	/* Set failure reason */
 	if (info) {
 		memory_exception_data.va = (info->page_addr) << PAGE_SHIFT;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
index 213ea5454d11..22a8e88b6a67 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
@@ -398,6 +398,7 @@ int kfd_init_apertures(struct kfd_process *process)
 			case CHIP_POLARIS10:
 			case CHIP_POLARIS11:
 			case CHIP_POLARIS12:
+			case CHIP_VEGAM:
 				kfd_init_apertures_vi(pdd, id);
 				break;
 			case CHIP_VEGA10:
@@ -435,5 +436,3 @@ int kfd_init_apertures(struct kfd_process *process)
 
 	return 0;
 }
-
-
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
index f1596881f20a..1cc03b3ddbb9 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
@@ -58,9 +58,10 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev,
 	kq->nop_packet = nop.u32all;
 	switch (type) {
 	case KFD_QUEUE_TYPE_DIQ:
+		kq->mqd_mgr = dev->dqm->mqd_mgrs[KFD_MQD_TYPE_DIQ];
+		break;
 	case KFD_QUEUE_TYPE_HIQ:
-		kq->mqd_mgr = dev->dqm->ops.get_mqd_manager(dev->dqm,
-						KFD_MQD_TYPE_HIQ);
+		kq->mqd_mgr = dev->dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ];
 		break;
 	default:
 		pr_err("Invalid queue type %d\n", type);
@@ -314,6 +315,7 @@ struct kernel_queue *kernel_queue_init(struct kfd_dev *dev,
 	case CHIP_POLARIS10:
 	case CHIP_POLARIS11:
 	case CHIP_POLARIS12:
+	case CHIP_VEGAM:
 		kernel_queue_init_vi(&kq->ops_asic_specific);
 		break;
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
index 33830b1a5a54..07f02f8e4fe4 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
@@ -153,14 +153,13 @@ static int pm_map_queues_v9(struct packet_manager *pm, uint32_t *buffer,
 
 	packet->header.u32All = pm_build_pm4_header(IT_MAP_QUEUES,
 					sizeof(struct pm4_mes_map_queues));
-	packet->bitfields2.alloc_format =
-		alloc_format__mes_map_queues__one_per_pipe_vi;
 	packet->bitfields2.num_queues = 1;
 	packet->bitfields2.queue_sel =
 		queue_sel__mes_map_queues__map_to_hws_determined_queue_slots_vi;
 
 	packet->bitfields2.engine_sel =
 		engine_sel__mes_map_queues__compute_vi;
+	packet->bitfields2.gws_control_queue = q->gws ? 1 : 0;
 	packet->bitfields2.queue_type =
 		queue_type__mes_map_queues__normal_compute_vi;
 
@@ -175,6 +174,7 @@ static int pm_map_queues_v9(struct packet_manager *pm, uint32_t *buffer,
 			queue_type__mes_map_queues__debug_interface_queue_vi;
 		break;
 	case KFD_QUEUE_TYPE_SDMA:
+	case KFD_QUEUE_TYPE_SDMA_XGMI:
 		packet->bitfields2.engine_sel = q->properties.sdma_engine_id +
 				engine_sel__mes_map_queues__sdma0_vi;
 		use_static = false; /* no static queues under SDMA */
@@ -221,6 +221,7 @@ static int pm_unmap_queues_v9(struct packet_manager *pm, uint32_t *buffer,
 			engine_sel__mes_unmap_queues__compute;
 		break;
 	case KFD_QUEUE_TYPE_SDMA:
+	case KFD_QUEUE_TYPE_SDMA_XGMI:
 		packet->bitfields2.engine_sel =
 			engine_sel__mes_unmap_queues__sdma0 + sdma_engine;
 		break;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c
index bf20c6d32ef3..2adaf40027eb 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c
@@ -190,8 +190,6 @@ static int pm_map_queues_vi(struct packet_manager *pm, uint32_t *buffer,
 
 	packet->header.u32All = pm_build_pm4_header(IT_MAP_QUEUES,
 					sizeof(struct pm4_mes_map_queues));
-	packet->bitfields2.alloc_format =
-		alloc_format__mes_map_queues__one_per_pipe_vi;
 	packet->bitfields2.num_queues = 1;
 	packet->bitfields2.queue_sel =
 		queue_sel__mes_map_queues__map_to_hws_determined_queue_slots_vi;
@@ -212,6 +210,7 @@ static int pm_map_queues_vi(struct packet_manager *pm, uint32_t *buffer,
 			queue_type__mes_map_queues__debug_interface_queue_vi;
 		break;
 	case KFD_QUEUE_TYPE_SDMA:
+	case KFD_QUEUE_TYPE_SDMA_XGMI:
 		packet->bitfields2.engine_sel = q->properties.sdma_engine_id +
 				engine_sel__mes_map_queues__sdma0_vi;
 		use_static = false; /* no static queues under SDMA */
@@ -258,6 +257,7 @@ static int pm_unmap_queues_vi(struct packet_manager *pm, uint32_t *buffer,
 			engine_sel__mes_unmap_queues__compute;
 		break;
 	case KFD_QUEUE_TYPE_SDMA:
+	case KFD_QUEUE_TYPE_SDMA_XGMI:
 		packet->bitfields2.engine_sel =
 			engine_sel__mes_unmap_queues__sdma0 + sdma_engine;
 		break;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
index aed9b9b82213..9307811bc427 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
@@ -23,34 +23,54 @@
 
 #include "kfd_mqd_manager.h"
 #include "amdgpu_amdkfd.h"
+#include "kfd_device_queue_manager.h"
 
-struct mqd_manager *mqd_manager_init(enum KFD_MQD_TYPE type,
-					struct kfd_dev *dev)
+struct kfd_mem_obj *allocate_hiq_mqd(struct kfd_dev *dev)
 {
-	switch (dev->device_info->asic_family) {
-	case CHIP_KAVERI:
-		return mqd_manager_init_cik(type, dev);
-	case CHIP_HAWAII:
-		return mqd_manager_init_cik_hawaii(type, dev);
-	case CHIP_CARRIZO:
-		return mqd_manager_init_vi(type, dev);
-	case CHIP_TONGA:
-	case CHIP_FIJI:
-	case CHIP_POLARIS10:
-	case CHIP_POLARIS11:
-	case CHIP_POLARIS12:
-		return mqd_manager_init_vi_tonga(type, dev);
-	case CHIP_VEGA10:
-	case CHIP_VEGA12:
-	case CHIP_VEGA20:
-	case CHIP_RAVEN:
-		return mqd_manager_init_v9(type, dev);
-	default:
-		WARN(1, "Unexpected ASIC family %u",
-		     dev->device_info->asic_family);
-	}
+	struct kfd_mem_obj *mqd_mem_obj = NULL;
+
+	mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL);
+	if (!mqd_mem_obj)
+		return NULL;
+
+	mqd_mem_obj->gtt_mem = dev->dqm->hiq_sdma_mqd.gtt_mem;
+	mqd_mem_obj->gpu_addr = dev->dqm->hiq_sdma_mqd.gpu_addr;
+	mqd_mem_obj->cpu_ptr = dev->dqm->hiq_sdma_mqd.cpu_ptr;
+
+	return mqd_mem_obj;
+}
+
+struct kfd_mem_obj *allocate_sdma_mqd(struct kfd_dev *dev,
+					struct queue_properties *q)
+{
+	struct kfd_mem_obj *mqd_mem_obj = NULL;
+	uint64_t offset;
 
-	return NULL;
+	mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL);
+	if (!mqd_mem_obj)
+		return NULL;
+
+	offset = (q->sdma_engine_id *
+		dev->device_info->num_sdma_queues_per_engine +
+		q->sdma_queue_id) *
+		dev->dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]->mqd_size;
+
+	offset += dev->dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size;
+
+	mqd_mem_obj->gtt_mem = (void *)((uint64_t)dev->dqm->hiq_sdma_mqd.gtt_mem
+				+ offset);
+	mqd_mem_obj->gpu_addr = dev->dqm->hiq_sdma_mqd.gpu_addr + offset;
+	mqd_mem_obj->cpu_ptr = (uint32_t *)((uint64_t)
+				dev->dqm->hiq_sdma_mqd.cpu_ptr + offset);
+
+	return mqd_mem_obj;
+}
+
+void uninit_mqd_hiq_sdma(struct mqd_manager *mm, void *mqd,
+			struct kfd_mem_obj *mqd_mem_obj)
+{
+	WARN_ON(!mqd_mem_obj->gtt_mem);
+	kfree(mqd_mem_obj);
 }
 
 void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h
index f8261313ae7b..56af256a191b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h
@@ -99,8 +99,16 @@ struct mqd_manager {
 
 	struct mutex	mqd_mutex;
 	struct kfd_dev	*dev;
+	uint32_t mqd_size;
 };
 
+struct kfd_mem_obj *allocate_hiq_mqd(struct kfd_dev *dev);
+
+struct kfd_mem_obj *allocate_sdma_mqd(struct kfd_dev *dev,
+					struct queue_properties *q);
+void uninit_mqd_hiq_sdma(struct mqd_manager *mm, void *mqd,
+				struct kfd_mem_obj *mqd_mem_obj);
+
 void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm,
 		const uint32_t *cu_mask, uint32_t cu_mask_count,
 		uint32_t *se_mask);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
index ae90a99909ef..6e8509ec29d9 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
@@ -66,6 +66,22 @@ static void update_cu_mask(struct mqd_manager *mm, void *mqd,
 		m->compute_static_thread_mgmt_se3);
 }
 
+static struct kfd_mem_obj *allocate_mqd(struct kfd_dev *kfd,
+					struct queue_properties *q)
+{
+	struct kfd_mem_obj *mqd_mem_obj;
+
+	if (q->type == KFD_QUEUE_TYPE_HIQ)
+		return allocate_hiq_mqd(kfd);
+
+	if (kfd_gtt_sa_allocate(kfd, sizeof(struct cik_mqd),
+			&mqd_mem_obj))
+		return NULL;
+
+	return mqd_mem_obj;
+}
+
+
 static int init_mqd(struct mqd_manager *mm, void **mqd,
 		struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr,
 		struct queue_properties *q)
@@ -73,11 +89,10 @@ static int init_mqd(struct mqd_manager *mm, void **mqd,
 	uint64_t addr;
 	struct cik_mqd *m;
 	int retval;
+	struct kfd_dev *kfd = mm->dev;
 
-	retval = kfd_gtt_sa_allocate(mm->dev, sizeof(struct cik_mqd),
-					mqd_mem_obj);
-
-	if (retval != 0)
+	*mqd_mem_obj = allocate_mqd(kfd, q);
+	if (!*mqd_mem_obj)
 		return -ENOMEM;
 
 	m = (struct cik_mqd *) (*mqd_mem_obj)->cpu_ptr;
@@ -136,12 +151,10 @@ static int init_mqd_sdma(struct mqd_manager *mm, void **mqd,
 {
 	int retval;
 	struct cik_sdma_rlc_registers *m;
+	struct kfd_dev *dev = mm->dev;
 
-	retval = kfd_gtt_sa_allocate(mm->dev,
-					sizeof(struct cik_sdma_rlc_registers),
-					mqd_mem_obj);
-
-	if (retval != 0)
+	*mqd_mem_obj = allocate_sdma_mqd(dev, q);
+	if (!*mqd_mem_obj)
 		return -ENOMEM;
 
 	m = (struct cik_sdma_rlc_registers *) (*mqd_mem_obj)->cpu_ptr;
@@ -163,11 +176,6 @@ static void uninit_mqd(struct mqd_manager *mm, void *mqd,
 	kfd_gtt_sa_free(mm->dev, mqd_mem_obj);
 }
 
-static void uninit_mqd_sdma(struct mqd_manager *mm, void *mqd,
-				struct kfd_mem_obj *mqd_mem_obj)
-{
-	kfd_gtt_sa_free(mm->dev, mqd_mem_obj);
-}
 
 static int load_mqd(struct mqd_manager *mm, void *mqd, uint32_t pipe_id,
 		    uint32_t queue_id, struct queue_properties *p,
@@ -400,28 +408,43 @@ struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type,
 		mqd->update_mqd = update_mqd;
 		mqd->destroy_mqd = destroy_mqd;
 		mqd->is_occupied = is_occupied;
+		mqd->mqd_size = sizeof(struct cik_mqd);
 #if defined(CONFIG_DEBUG_FS)
 		mqd->debugfs_show_mqd = debugfs_show_mqd;
 #endif
 		break;
 	case KFD_MQD_TYPE_HIQ:
 		mqd->init_mqd = init_mqd_hiq;
+		mqd->uninit_mqd = uninit_mqd_hiq_sdma;
+		mqd->load_mqd = load_mqd;
+		mqd->update_mqd = update_mqd_hiq;
+		mqd->destroy_mqd = destroy_mqd;
+		mqd->is_occupied = is_occupied;
+		mqd->mqd_size = sizeof(struct cik_mqd);
+#if defined(CONFIG_DEBUG_FS)
+		mqd->debugfs_show_mqd = debugfs_show_mqd;
+#endif
+		break;
+	case KFD_MQD_TYPE_DIQ:
+		mqd->init_mqd = init_mqd_hiq;
 		mqd->uninit_mqd = uninit_mqd;
 		mqd->load_mqd = load_mqd;
 		mqd->update_mqd = update_mqd_hiq;
 		mqd->destroy_mqd = destroy_mqd;
 		mqd->is_occupied = is_occupied;
+		mqd->mqd_size = sizeof(struct cik_mqd);
 #if defined(CONFIG_DEBUG_FS)
 		mqd->debugfs_show_mqd = debugfs_show_mqd;
 #endif
 		break;
 	case KFD_MQD_TYPE_SDMA:
 		mqd->init_mqd = init_mqd_sdma;
-		mqd->uninit_mqd = uninit_mqd_sdma;
+		mqd->uninit_mqd = uninit_mqd_hiq_sdma;
 		mqd->load_mqd = load_mqd_sdma;
 		mqd->update_mqd = update_mqd_sdma;
 		mqd->destroy_mqd = destroy_mqd_sdma;
 		mqd->is_occupied = is_occupied_sdma;
+		mqd->mqd_size = sizeof(struct cik_sdma_rlc_registers);
 #if defined(CONFIG_DEBUG_FS)
 		mqd->debugfs_show_mqd = debugfs_show_mqd_sdma;
 #endif
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
index 9dbba609450e..4750338199b6 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
@@ -67,33 +67,54 @@ static void update_cu_mask(struct mqd_manager *mm, void *mqd,
 		m->compute_static_thread_mgmt_se3);
 }
 
-static int init_mqd(struct mqd_manager *mm, void **mqd,
-			struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr,
-			struct queue_properties *q)
+static struct kfd_mem_obj *allocate_mqd(struct kfd_dev *kfd,
+		struct queue_properties *q)
 {
 	int retval;
-	uint64_t addr;
-	struct v9_mqd *m;
-	struct kfd_dev *kfd = mm->dev;
+	struct kfd_mem_obj *mqd_mem_obj = NULL;
+
+	if (q->type == KFD_QUEUE_TYPE_HIQ)
+		return allocate_hiq_mqd(kfd);
 
 	/* From V9,  for CWSR, the control stack is located on the next page
 	 * boundary after the mqd, we will use the gtt allocation function
 	 * instead of sub-allocation function.
 	 */
 	if (kfd->cwsr_enabled && (q->type == KFD_QUEUE_TYPE_COMPUTE)) {
-		*mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL);
-		if (!*mqd_mem_obj)
-			return -ENOMEM;
+		mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_NOIO);
+		if (!mqd_mem_obj)
+			return NULL;
 		retval = amdgpu_amdkfd_alloc_gtt_mem(kfd->kgd,
 			ALIGN(q->ctl_stack_size, PAGE_SIZE) +
 				ALIGN(sizeof(struct v9_mqd), PAGE_SIZE),
-			&((*mqd_mem_obj)->gtt_mem),
-			&((*mqd_mem_obj)->gpu_addr),
-			(void *)&((*mqd_mem_obj)->cpu_ptr), true);
-	} else
-		retval = kfd_gtt_sa_allocate(mm->dev, sizeof(struct v9_mqd),
-				mqd_mem_obj);
-	if (retval != 0)
+			&(mqd_mem_obj->gtt_mem),
+			&(mqd_mem_obj->gpu_addr),
+			(void *)&(mqd_mem_obj->cpu_ptr), true);
+	} else {
+		retval = kfd_gtt_sa_allocate(kfd, sizeof(struct v9_mqd),
+				&mqd_mem_obj);
+	}
+
+	if (retval) {
+		kfree(mqd_mem_obj);
+		return NULL;
+	}
+
+	return mqd_mem_obj;
+
+}
+
+static int init_mqd(struct mqd_manager *mm, void **mqd,
+			struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr,
+			struct queue_properties *q)
+{
+	int retval;
+	uint64_t addr;
+	struct v9_mqd *m;
+	struct kfd_dev *kfd = mm->dev;
+
+	*mqd_mem_obj = allocate_mqd(kfd, q);
+	if (!*mqd_mem_obj)
 		return -ENOMEM;
 
 	m = (struct v9_mqd *) (*mqd_mem_obj)->cpu_ptr;
@@ -328,13 +349,10 @@ static int init_mqd_sdma(struct mqd_manager *mm, void **mqd,
 {
 	int retval;
 	struct v9_sdma_mqd *m;
+	struct kfd_dev *dev = mm->dev;
 
-
-	retval = kfd_gtt_sa_allocate(mm->dev,
-			sizeof(struct v9_sdma_mqd),
-			mqd_mem_obj);
-
-	if (retval != 0)
+	*mqd_mem_obj = allocate_sdma_mqd(dev, q);
+	if (!*mqd_mem_obj)
 		return -ENOMEM;
 
 	m = (struct v9_sdma_mqd *) (*mqd_mem_obj)->cpu_ptr;
@@ -350,12 +368,6 @@ static int init_mqd_sdma(struct mqd_manager *mm, void **mqd,
 	return retval;
 }
 
-static void uninit_mqd_sdma(struct mqd_manager *mm, void *mqd,
-		struct kfd_mem_obj *mqd_mem_obj)
-{
-	kfd_gtt_sa_free(mm->dev, mqd_mem_obj);
-}
-
 static int load_mqd_sdma(struct mqd_manager *mm, void *mqd,
 		uint32_t pipe_id, uint32_t queue_id,
 		struct queue_properties *p, struct mm_struct *mms)
@@ -459,28 +471,43 @@ struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type,
 		mqd->destroy_mqd = destroy_mqd;
 		mqd->is_occupied = is_occupied;
 		mqd->get_wave_state = get_wave_state;
+		mqd->mqd_size = sizeof(struct v9_mqd);
 #if defined(CONFIG_DEBUG_FS)
 		mqd->debugfs_show_mqd = debugfs_show_mqd;
 #endif
 		break;
 	case KFD_MQD_TYPE_HIQ:
 		mqd->init_mqd = init_mqd_hiq;
+		mqd->uninit_mqd = uninit_mqd_hiq_sdma;
+		mqd->load_mqd = load_mqd;
+		mqd->update_mqd = update_mqd_hiq;
+		mqd->destroy_mqd = destroy_mqd;
+		mqd->is_occupied = is_occupied;
+		mqd->mqd_size = sizeof(struct v9_mqd);
+#if defined(CONFIG_DEBUG_FS)
+		mqd->debugfs_show_mqd = debugfs_show_mqd;
+#endif
+		break;
+	case KFD_MQD_TYPE_DIQ:
+		mqd->init_mqd = init_mqd_hiq;
 		mqd->uninit_mqd = uninit_mqd;
 		mqd->load_mqd = load_mqd;
 		mqd->update_mqd = update_mqd_hiq;
 		mqd->destroy_mqd = destroy_mqd;
 		mqd->is_occupied = is_occupied;
+		mqd->mqd_size = sizeof(struct v9_mqd);
 #if defined(CONFIG_DEBUG_FS)
 		mqd->debugfs_show_mqd = debugfs_show_mqd;
 #endif
 		break;
 	case KFD_MQD_TYPE_SDMA:
 		mqd->init_mqd = init_mqd_sdma;
-		mqd->uninit_mqd = uninit_mqd_sdma;
+		mqd->uninit_mqd = uninit_mqd_hiq_sdma;
 		mqd->load_mqd = load_mqd_sdma;
 		mqd->update_mqd = update_mqd_sdma;
 		mqd->destroy_mqd = destroy_mqd_sdma;
 		mqd->is_occupied = is_occupied_sdma;
+		mqd->mqd_size = sizeof(struct v9_sdma_mqd);
 #if defined(CONFIG_DEBUG_FS)
 		mqd->debugfs_show_mqd = debugfs_show_mqd_sdma;
 #endif
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
index 6469b3456f00..b550dea9b10a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
@@ -68,6 +68,21 @@ static void update_cu_mask(struct mqd_manager *mm, void *mqd,
 		m->compute_static_thread_mgmt_se3);
 }
 
+static struct kfd_mem_obj *allocate_mqd(struct kfd_dev *kfd,
+					struct queue_properties *q)
+{
+	struct kfd_mem_obj *mqd_mem_obj;
+
+	if (q->type == KFD_QUEUE_TYPE_HIQ)
+		return allocate_hiq_mqd(kfd);
+
+	if (kfd_gtt_sa_allocate(kfd, sizeof(struct vi_mqd),
+			&mqd_mem_obj))
+		return NULL;
+
+	return mqd_mem_obj;
+}
+
 static int init_mqd(struct mqd_manager *mm, void **mqd,
 			struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr,
 			struct queue_properties *q)
@@ -75,10 +90,10 @@ static int init_mqd(struct mqd_manager *mm, void **mqd,
 	int retval;
 	uint64_t addr;
 	struct vi_mqd *m;
+	struct kfd_dev *kfd = mm->dev;
 
-	retval = kfd_gtt_sa_allocate(mm->dev, sizeof(struct vi_mqd),
-			mqd_mem_obj);
-	if (retval != 0)
+	*mqd_mem_obj = allocate_mqd(kfd, q);
+	if (!*mqd_mem_obj)
 		return -ENOMEM;
 
 	m = (struct vi_mqd *) (*mqd_mem_obj)->cpu_ptr;
@@ -329,13 +344,10 @@ static int init_mqd_sdma(struct mqd_manager *mm, void **mqd,
 {
 	int retval;
 	struct vi_sdma_mqd *m;
+	struct kfd_dev *dev = mm->dev;
 
-
-	retval = kfd_gtt_sa_allocate(mm->dev,
-			sizeof(struct vi_sdma_mqd),
-			mqd_mem_obj);
-
-	if (retval != 0)
+	*mqd_mem_obj = allocate_sdma_mqd(dev, q);
+	if (!*mqd_mem_obj)
 		return -ENOMEM;
 
 	m = (struct vi_sdma_mqd *) (*mqd_mem_obj)->cpu_ptr;
@@ -343,7 +355,7 @@ static int init_mqd_sdma(struct mqd_manager *mm, void **mqd,
 	memset(m, 0, sizeof(struct vi_sdma_mqd));
 
 	*mqd = m;
-	if (gart_addr != NULL)
+	if (gart_addr)
 		*gart_addr = (*mqd_mem_obj)->gpu_addr;
 
 	retval = mm->update_mqd(mm, m, q);
@@ -351,12 +363,6 @@ static int init_mqd_sdma(struct mqd_manager *mm, void **mqd,
 	return retval;
 }
 
-static void uninit_mqd_sdma(struct mqd_manager *mm, void *mqd,
-		struct kfd_mem_obj *mqd_mem_obj)
-{
-	kfd_gtt_sa_free(mm->dev, mqd_mem_obj);
-}
-
 static int load_mqd_sdma(struct mqd_manager *mm, void *mqd,
 		uint32_t pipe_id, uint32_t queue_id,
 		struct queue_properties *p, struct mm_struct *mms)
@@ -459,28 +465,43 @@ struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type,
 		mqd->destroy_mqd = destroy_mqd;
 		mqd->is_occupied = is_occupied;
 		mqd->get_wave_state = get_wave_state;
+		mqd->mqd_size = sizeof(struct vi_mqd);
 #if defined(CONFIG_DEBUG_FS)
 		mqd->debugfs_show_mqd = debugfs_show_mqd;
 #endif
 		break;
 	case KFD_MQD_TYPE_HIQ:
 		mqd->init_mqd = init_mqd_hiq;
+		mqd->uninit_mqd = uninit_mqd_hiq_sdma;
+		mqd->load_mqd = load_mqd;
+		mqd->update_mqd = update_mqd_hiq;
+		mqd->destroy_mqd = destroy_mqd;
+		mqd->is_occupied = is_occupied;
+		mqd->mqd_size = sizeof(struct vi_mqd);
+#if defined(CONFIG_DEBUG_FS)
+		mqd->debugfs_show_mqd = debugfs_show_mqd;
+#endif
+		break;
+	case KFD_MQD_TYPE_DIQ:
+		mqd->init_mqd = init_mqd_hiq;
 		mqd->uninit_mqd = uninit_mqd;
 		mqd->load_mqd = load_mqd;
 		mqd->update_mqd = update_mqd_hiq;
 		mqd->destroy_mqd = destroy_mqd;
 		mqd->is_occupied = is_occupied;
+		mqd->mqd_size = sizeof(struct vi_mqd);
 #if defined(CONFIG_DEBUG_FS)
 		mqd->debugfs_show_mqd = debugfs_show_mqd;
 #endif
 		break;
 	case KFD_MQD_TYPE_SDMA:
 		mqd->init_mqd = init_mqd_sdma;
-		mqd->uninit_mqd = uninit_mqd_sdma;
+		mqd->uninit_mqd = uninit_mqd_hiq_sdma;
 		mqd->load_mqd = load_mqd_sdma;
 		mqd->update_mqd = update_mqd_sdma;
 		mqd->destroy_mqd = destroy_mqd_sdma;
 		mqd->is_occupied = is_occupied_sdma;
+		mqd->mqd_size = sizeof(struct vi_sdma_mqd);
 #if defined(CONFIG_DEBUG_FS)
 		mqd->debugfs_show_mqd = debugfs_show_mqd_sdma;
 #endif
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
index 045a229436a0..808194663a7d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
@@ -48,7 +48,8 @@ static void pm_calc_rlib_size(struct packet_manager *pm,
 
 	process_count = pm->dqm->processes_count;
 	queue_count = pm->dqm->queue_count;
-	compute_queue_count = queue_count - pm->dqm->sdma_queue_count;
+	compute_queue_count = queue_count - pm->dqm->sdma_queue_count -
+				pm->dqm->xgmi_sdma_queue_count;
 
 	/* check if there is over subscription
 	 * Note: the arbitration between the number of VMIDs and
@@ -227,6 +228,7 @@ int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm)
 	case CHIP_POLARIS10:
 	case CHIP_POLARIS11:
 	case CHIP_POLARIS12:
+	case CHIP_VEGAM:
 		pm->pmf = &kfd_vi_pm_funcs;
 		break;
 	case CHIP_VEGA10:
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h
index f2bcf5c092ea..49ab66b703fa 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h
@@ -176,8 +176,7 @@ struct pm4_mes_map_process {
 
 	union {
 		struct {
-			uint32_t num_gws:6;
-			uint32_t reserved7:1;
+			uint32_t num_gws:7;
 			uint32_t sdma_enable:1;
 			uint32_t num_oac:4;
 			uint32_t reserved8:4;
@@ -255,11 +254,6 @@ enum mes_map_queues_queue_type_enum {
 queue_type__mes_map_queues__low_latency_static_queue_vi = 3
 };
 
-enum mes_map_queues_alloc_format_enum {
-	alloc_format__mes_map_queues__one_per_pipe_vi = 0,
-alloc_format__mes_map_queues__all_on_one_pipe_vi = 1
-};
-
 enum mes_map_queues_engine_sel_enum {
 	engine_sel__mes_map_queues__compute_vi = 0,
 	engine_sel__mes_map_queues__sdma0_vi = 2,
@@ -277,9 +271,11 @@ struct pm4_mes_map_queues {
 		struct {
 			uint32_t reserved1:4;
 			enum mes_map_queues_queue_sel_enum queue_sel:2;
-			uint32_t reserved2:15;
+			uint32_t reserved5:6;
+			uint32_t gws_control_queue:1;
+			uint32_t reserved2:8;
 			enum mes_map_queues_queue_type_enum queue_type:3;
-			enum mes_map_queues_alloc_format_enum alloc_format:2;
+			uint32_t reserved3:2;
 			enum mes_map_queues_engine_sel_enum engine_sel:3;
 			uint32_t num_queues:3;
 		} bitfields2;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_vi.h b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_vi.h
index 7c8d9b357749..5466cfe1c3cc 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_vi.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_vi.h
@@ -216,11 +216,6 @@ enum mes_map_queues_queue_type_vi_enum {
 queue_type__mes_map_queues__low_latency_static_queue_vi = 3
 };
 
-enum mes_map_queues_alloc_format_vi_enum {
-	alloc_format__mes_map_queues__one_per_pipe_vi = 0,
-alloc_format__mes_map_queues__all_on_one_pipe_vi = 1
-};
-
 enum mes_map_queues_engine_sel_vi_enum {
 	engine_sel__mes_map_queues__compute_vi = 0,
 	engine_sel__mes_map_queues__sdma0_vi = 2,
@@ -240,7 +235,7 @@ struct pm4_mes_map_queues {
 			enum mes_map_queues_queue_sel_vi_enum queue_sel:2;
 			uint32_t reserved2:15;
 			enum mes_map_queues_queue_type_vi_enum queue_type:3;
-			enum mes_map_queues_alloc_format_vi_enum alloc_format:2;
+			uint32_t reserved3:2;
 			enum mes_map_queues_engine_sel_vi_enum engine_sel:3;
 			uint32_t num_queues:3;
 		} bitfields2;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 487d5da337c1..b61dc53f42d2 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -59,6 +59,7 @@
 #define KFD_MMAP_TYPE_DOORBELL	(0x3ULL << KFD_MMAP_TYPE_SHIFT)
 #define KFD_MMAP_TYPE_EVENTS	(0x2ULL << KFD_MMAP_TYPE_SHIFT)
 #define KFD_MMAP_TYPE_RESERVED_MEM	(0x1ULL << KFD_MMAP_TYPE_SHIFT)
+#define KFD_MMAP_TYPE_MMIO	(0x0ULL << KFD_MMAP_TYPE_SHIFT)
 
 #define KFD_MMAP_GPU_ID_SHIFT (46 - PAGE_SHIFT)
 #define KFD_MMAP_GPU_ID_MASK (((1ULL << KFD_GPU_ID_HASH_WIDTH) - 1) \
@@ -160,6 +161,11 @@ extern int noretry;
  */
 extern int halt_if_hws_hang;
 
+/*
+ * Whether MEC FW support GWS barriers
+ */
+extern bool hws_gws_support;
+
 enum cache_policy {
 	cache_policy_coherent,
 	cache_policy_noncoherent
@@ -188,6 +194,7 @@ struct kfd_device_info {
 	bool needs_iommu_device;
 	bool needs_pci_atomics;
 	unsigned int num_sdma_engines;
+	unsigned int num_xgmi_sdma_engines;
 	unsigned int num_sdma_queues_per_engine;
 };
 
@@ -258,7 +265,7 @@ struct kfd_dev {
 	bool interrupts_active;
 
 	/* Debug manager */
-	struct kfd_dbgmgr           *dbgmgr;
+	struct kfd_dbgmgr *dbgmgr;
 
 	/* Firmware versions */
 	uint16_t mec_fw_version;
@@ -282,6 +289,9 @@ struct kfd_dev {
 
 	/* Compute Profile ref. count */
 	atomic_t compute_profile;
+
+	/* Global GWS resource shared b/t processes*/
+	void *gws;
 };
 
 enum kfd_mempool {
@@ -329,7 +339,8 @@ enum kfd_queue_type  {
 	KFD_QUEUE_TYPE_COMPUTE,
 	KFD_QUEUE_TYPE_SDMA,
 	KFD_QUEUE_TYPE_HIQ,
-	KFD_QUEUE_TYPE_DIQ
+	KFD_QUEUE_TYPE_DIQ,
+	KFD_QUEUE_TYPE_SDMA_XGMI
 };
 
 enum kfd_queue_format {
@@ -444,6 +455,9 @@ struct queue_properties {
  *
  * @device: The kfd device that created this queue.
  *
+ * @gws: Pointing to gws kgd_mem if this is a gws control queue; NULL
+ * otherwise.
+ *
  * This structure represents user mode compute queues.
  * It contains all the necessary data to handle such queues.
  *
@@ -465,6 +479,7 @@ struct queue {
 
 	struct kfd_process	*process;
 	struct kfd_dev		*device;
+	void *gws;
 };
 
 /*
@@ -475,6 +490,7 @@ enum KFD_MQD_TYPE {
 	KFD_MQD_TYPE_HIQ,		/* for hiq */
 	KFD_MQD_TYPE_CP,		/* for cp queues and diq */
 	KFD_MQD_TYPE_SDMA,		/* for sdma queues */
+	KFD_MQD_TYPE_DIQ,		/* for diq */
 	KFD_MQD_TYPE_MAX
 };
 
@@ -819,8 +835,6 @@ void uninit_queue(struct queue *q);
 void print_queue_properties(struct queue_properties *q);
 void print_queue(struct queue *q);
 
-struct mqd_manager *mqd_manager_init(enum KFD_MQD_TYPE type,
-					struct kfd_dev *dev);
 struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type,
 		struct kfd_dev *dev);
 struct mqd_manager *mqd_manager_init_cik_hawaii(enum KFD_MQD_TYPE type,
@@ -859,6 +873,8 @@ int pqm_update_queue(struct process_queue_manager *pqm, unsigned int qid,
 			struct queue_properties *p);
 int pqm_set_cu_mask(struct process_queue_manager *pqm, unsigned int qid,
 			struct queue_properties *p);
+int pqm_set_gws(struct process_queue_manager *pqm, unsigned int qid,
+			void *gws);
 struct kernel_queue *pqm_get_kernel_queue(struct process_queue_manager *pqm,
 						unsigned int qid);
 int pqm_get_wave_state(struct process_queue_manager *pqm,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
index fcaaf93681ac..da0958625861 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
@@ -26,6 +26,7 @@
 #include "kfd_device_queue_manager.h"
 #include "kfd_priv.h"
 #include "kfd_kernel_queue.h"
+#include "amdgpu_amdkfd.h"
 
 static inline struct process_queue_node *get_queue_by_qid(
 			struct process_queue_manager *pqm, unsigned int qid)
@@ -74,6 +75,55 @@ void kfd_process_dequeue_from_device(struct kfd_process_device *pdd)
 	pdd->already_dequeued = true;
 }
 
+int pqm_set_gws(struct process_queue_manager *pqm, unsigned int qid,
+			void *gws)
+{
+	struct kfd_dev *dev = NULL;
+	struct process_queue_node *pqn;
+	struct kfd_process_device *pdd;
+	struct kgd_mem *mem = NULL;
+	int ret;
+
+	pqn = get_queue_by_qid(pqm, qid);
+	if (!pqn) {
+		pr_err("Queue id does not match any known queue\n");
+		return -EINVAL;
+	}
+
+	if (pqn->q)
+		dev = pqn->q->device;
+	if (WARN_ON(!dev))
+		return -ENODEV;
+
+	pdd = kfd_get_process_device_data(dev, pqm->process);
+	if (!pdd) {
+		pr_err("Process device data doesn't exist\n");
+		return -EINVAL;
+	}
+
+	/* Only allow one queue per process can have GWS assigned */
+	if (gws && pdd->qpd.num_gws)
+		return -EBUSY;
+
+	if (!gws && pdd->qpd.num_gws == 0)
+		return -EINVAL;
+
+	if (gws)
+		ret = amdgpu_amdkfd_add_gws_to_process(pdd->process->kgd_process_info,
+			gws, &mem);
+	else
+		ret = amdgpu_amdkfd_remove_gws_from_process(pdd->process->kgd_process_info,
+			pqn->q->gws);
+	if (unlikely(ret))
+		return ret;
+
+	pqn->q->gws = mem;
+	pdd->qpd.num_gws = gws ? amdgpu_amdkfd_get_num_gws(dev->kgd) : 0;
+
+	return pqn->q->device->dqm->ops.update_queue(pqn->q->device->dqm,
+							pqn->q);
+}
+
 void kfd_process_dequeue_from_all_devices(struct kfd_process *p)
 {
 	struct kfd_process_device *pdd;
@@ -186,8 +236,13 @@ int pqm_create_queue(struct process_queue_manager *pqm,
 
 	switch (type) {
 	case KFD_QUEUE_TYPE_SDMA:
-		if (dev->dqm->queue_count >= get_num_sdma_queues(dev->dqm)) {
-			pr_err("Over-subscription is not allowed for SDMA.\n");
+	case KFD_QUEUE_TYPE_SDMA_XGMI:
+		if ((type == KFD_QUEUE_TYPE_SDMA && dev->dqm->sdma_queue_count
+			>= get_num_sdma_queues(dev->dqm)) ||
+			(type == KFD_QUEUE_TYPE_SDMA_XGMI &&
+			dev->dqm->xgmi_sdma_queue_count
+			>= get_num_xgmi_sdma_queues(dev->dqm))) {
+			pr_debug("Over-subscription is not allowed for SDMA.\n");
 			retval = -EPERM;
 			goto err_create_queue;
 		}
@@ -325,6 +380,13 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid)
 			if (retval != -ETIME)
 				goto err_destroy_queue;
 		}
+
+		if (pqn->q->gws) {
+			amdgpu_amdkfd_remove_gws_from_process(pqm->process->kgd_process_info,
+				pqn->q->gws);
+			pdd->qpd.num_gws = 0;
+		}
+
 		kfree(pqn->q->properties.cu_mask);
 		pqn->q->properties.cu_mask = NULL;
 		uninit_queue(pqn->q);
@@ -446,6 +508,7 @@ int pqm_debugfs_mqds(struct seq_file *m, void *data)
 			q = pqn->q;
 			switch (q->properties.type) {
 			case KFD_QUEUE_TYPE_SDMA:
+			case KFD_QUEUE_TYPE_SDMA_XGMI:
 				seq_printf(m, "  SDMA queue on device %x\n",
 					   q->device->id);
 				mqd_type = KFD_MQD_TYPE_SDMA;
@@ -461,8 +524,7 @@ int pqm_debugfs_mqds(struct seq_file *m, void *data)
 					   q->properties.type, q->device->id);
 				continue;
 			}
-			mqd_mgr = q->device->dqm->ops.get_mqd_manager(
-				q->device->dqm, mqd_type);
+			mqd_mgr = q->device->dqm->mqd_mgrs[mqd_type];
 		} else if (pqn->kq) {
 			q = pqn->kq->queue;
 			mqd_mgr = pqn->kq->mqd_mgr;
@@ -470,7 +532,6 @@ int pqm_debugfs_mqds(struct seq_file *m, void *data)
 			case KFD_QUEUE_TYPE_DIQ:
 				seq_printf(m, "  DIQ on device %x\n",
 					   pqn->kq->dev->id);
-				mqd_type = KFD_MQD_TYPE_HIQ;
 				break;
 			default:
 				seq_printf(m,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
index 769dbc7be8cb..d241a8672599 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
@@ -454,6 +454,8 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr,
 			dev->node_props.lds_size_in_kb);
 	sysfs_show_32bit_prop(buffer, "gds_size_in_kb",
 			dev->node_props.gds_size_in_kb);
+	sysfs_show_32bit_prop(buffer, "num_gws",
+			dev->node_props.num_gws);
 	sysfs_show_32bit_prop(buffer, "wave_front_size",
 			dev->node_props.wave_front_size);
 	sysfs_show_32bit_prop(buffer, "array_count",
@@ -476,6 +478,10 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr,
 			dev->node_props.drm_render_minor);
 	sysfs_show_64bit_prop(buffer, "hive_id",
 			dev->node_props.hive_id);
+	sysfs_show_32bit_prop(buffer, "num_sdma_engines",
+			dev->node_props.num_sdma_engines);
+	sysfs_show_32bit_prop(buffer, "num_sdma_xgmi_engines",
+			dev->node_props.num_sdma_xgmi_engines);
 
 	if (dev->gpu) {
 		log_max_watch_addr =
@@ -1078,8 +1084,9 @@ static uint32_t kfd_generate_gpu_id(struct kfd_dev *gpu)
 			local_mem_info.local_mem_size_public;
 
 	buf[0] = gpu->pdev->devfn;
-	buf[1] = gpu->pdev->subsystem_vendor;
-	buf[2] = gpu->pdev->subsystem_device;
+	buf[1] = gpu->pdev->subsystem_vendor |
+		(gpu->pdev->subsystem_device << 16);
+	buf[2] = pci_domain_nr(gpu->pdev->bus);
 	buf[3] = gpu->pdev->device;
 	buf[4] = gpu->pdev->bus->number;
 	buf[5] = lower_32_bits(local_mem_size);
@@ -1281,6 +1288,12 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
 		gpu->shared_resources.drm_render_minor;
 
 	dev->node_props.hive_id = gpu->hive_id;
+	dev->node_props.num_sdma_engines = gpu->device_info->num_sdma_engines;
+	dev->node_props.num_sdma_xgmi_engines =
+				gpu->device_info->num_xgmi_sdma_engines;
+	dev->node_props.num_gws = (hws_gws_support &&
+		dev->gpu->dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) ?
+		amdgpu_amdkfd_get_num_gws(dev->gpu->kgd) : 0;
 
 	kfd_fill_mem_clk_max_info(dev);
 	kfd_fill_iolink_non_crat_info(dev);
@@ -1298,6 +1311,7 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
 	case CHIP_POLARIS10:
 	case CHIP_POLARIS11:
 	case CHIP_POLARIS12:
+	case CHIP_VEGAM:
 		pr_debug("Adding doorbell packet type capability\n");
 		dev->node_props.capability |= ((HSA_CAP_DOORBELL_TYPE_1_0 <<
 			HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT) &
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
index 84710cfd23c2..276354aa0fcc 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
@@ -65,6 +65,7 @@ struct kfd_node_properties {
 	uint32_t max_waves_per_simd;
 	uint32_t lds_size_in_kb;
 	uint32_t gds_size_in_kb;
+	uint32_t num_gws;
 	uint32_t wave_front_size;
 	uint32_t array_count;
 	uint32_t simd_arrays_per_engine;
@@ -78,6 +79,8 @@ struct kfd_node_properties {
 	uint32_t max_engine_clk_fcompute;
 	uint32_t max_engine_clk_ccompute;
 	int32_t  drm_render_minor;
+	uint32_t num_sdma_engines;
+	uint32_t num_sdma_xgmi_engines;
 	uint16_t marketing_name[KFD_TOPOLOGY_PUBLIC_NAME_SIZE];
 };
 
diff --git a/drivers/gpu/drm/amd/display/Kconfig b/drivers/gpu/drm/amd/display/Kconfig
index 0c25baded852..5c826faae240 100644
--- a/drivers/gpu/drm/amd/display/Kconfig
+++ b/drivers/gpu/drm/amd/display/Kconfig
@@ -6,7 +6,6 @@ config DRM_AMD_DC
 	bool "AMD DC - Enable new display engine"
 	default y
 	select DRM_AMD_DC_DCN1_0 if X86 && !(KCOV_INSTRUMENT_ALL && KCOV_ENABLE_COMPARISONS)
-	select DRM_AMD_DC_DCN1_01 if X86 && !(KCOV_INSTRUMENT_ALL && KCOV_ENABLE_COMPARISONS)
 	help
 	  Choose this option if you want to use the new display engine
 	  support for AMDGPU. This adds required support for Vega and
@@ -17,11 +16,6 @@ config DRM_AMD_DC_DCN1_0
 	help
 	  RV family support for display engine
 
-config DRM_AMD_DC_DCN1_01
-	def_bool n
-	help
-	  RV2 family for display engine
-
 config DEBUG_KERNEL_DC
 	bool "Enable kgdb break in DC"
 	depends on DRM_AMD_DC
diff --git a/drivers/gpu/drm/amd/display/Makefile b/drivers/gpu/drm/amd/display/Makefile
index cfde1568c79a..496cee000f10 100644
--- a/drivers/gpu/drm/amd/display/Makefile
+++ b/drivers/gpu/drm/amd/display/Makefile
@@ -28,6 +28,7 @@ AMDDALPATH = $(RELATIVE_AMD_DISPLAY_PATH)
 
 subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/inc/
 subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/inc/hw
+subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/clk_mgr
 subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/modules/inc
 subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/modules/freesync
 subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/modules/color
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index ab7c5c3004ee..b16c658074d2 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -54,15 +54,17 @@
 #include <linux/version.h>
 #include <linux/types.h>
 #include <linux/pm_runtime.h>
+#include <linux/pci.h>
 #include <linux/firmware.h>
 
-#include <drm/drmP.h>
 #include <drm/drm_atomic.h>
 #include <drm/drm_atomic_uapi.h>
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_dp_mst_helper.h>
 #include <drm/drm_fb_helper.h>
+#include <drm/drm_fourcc.h>
 #include <drm/drm_edid.h>
+#include <drm/drm_vblank.h>
 
 #if defined(CONFIG_DRM_AMD_DC_DCN1_0)
 #include "ivsrcid/irqsrcs_dcn_1_0.h"
@@ -616,6 +618,10 @@ error:
 static void amdgpu_dm_fini(struct amdgpu_device *adev)
 {
 	amdgpu_dm_destroy_drm_device(&adev->dm);
+
+	/* DC Destroy TODO: Replace destroy DAL */
+	if (adev->dm.dc)
+		dc_destroy(&adev->dm.dc);
 	/*
 	 * TODO: pageflip, vlank interrupt
 	 *
@@ -630,9 +636,6 @@ static void amdgpu_dm_fini(struct amdgpu_device *adev)
 		mod_freesync_destroy(adev->dm.freesync_module);
 		adev->dm.freesync_module = NULL;
 	}
-	/* DC Destroy TODO: Replace destroy DAL */
-	if (adev->dm.dc)
-		dc_destroy(&adev->dm.dc);
 
 	mutex_destroy(&adev->dm.dc_lock);
 
@@ -664,13 +667,11 @@ static int load_dmcu_fw(struct amdgpu_device *adev)
 	case CHIP_VEGA20:
 		return 0;
 	case CHIP_RAVEN:
-#if defined(CONFIG_DRM_AMD_DC_DCN1_01)
 		if (ASICREV_IS_PICASSO(adev->external_rev_id))
 			fw_name_dmcu = FIRMWARE_RAVEN_DMCU;
 		else if (ASICREV_IS_RAVEN2(adev->external_rev_id))
 			fw_name_dmcu = FIRMWARE_RAVEN_DMCU;
 		else
-#endif
 			return 0;
 		break;
 	default:
@@ -2592,7 +2593,7 @@ fill_plane_buffer_attributes(struct amdgpu_device *adev,
 		address->type = PLN_ADDR_TYPE_GRAPHICS;
 		address->grph.addr.low_part = lower_32_bits(afb->address);
 		address->grph.addr.high_part = upper_32_bits(afb->address);
-	} else {
+	} else if (format < SURFACE_PIXEL_FORMAT_INVALID) {
 		uint64_t chroma_addr = afb->address + fb->offsets[1];
 
 		plane_size->video.luma_size.x = 0;
@@ -2967,16 +2968,16 @@ static void update_stream_scaling_settings(const struct drm_display_mode *mode,
 }
 
 static enum dc_color_depth
-convert_color_depth_from_display_info(const struct drm_connector *connector)
+convert_color_depth_from_display_info(const struct drm_connector *connector,
+				      const struct drm_connector_state *state)
 {
-	struct dm_connector_state *dm_conn_state =
-		to_dm_connector_state(connector->state);
 	uint32_t bpc = connector->display_info.bpc;
 
-	/* TODO: Remove this when there's support for max_bpc in drm */
-	if (dm_conn_state && bpc > dm_conn_state->max_bpc)
-		/* Round down to nearest even number. */
-		bpc = dm_conn_state->max_bpc - (dm_conn_state->max_bpc & 1);
+	if (state) {
+		bpc = state->max_bpc;
+		/* Round down to the nearest even number. */
+		bpc = bpc - (bpc & 1);
+	}
 
 	switch (bpc) {
 	case 0:
@@ -3094,11 +3095,12 @@ static void adjust_colour_depth_from_display_info(struct dc_crtc_timing *timing_
 
 }
 
-static void
-fill_stream_properties_from_drm_display_mode(struct dc_stream_state *stream,
-					     const struct drm_display_mode *mode_in,
-					     const struct drm_connector *connector,
-					     const struct dc_stream_state *old_stream)
+static void fill_stream_properties_from_drm_display_mode(
+	struct dc_stream_state *stream,
+	const struct drm_display_mode *mode_in,
+	const struct drm_connector *connector,
+	const struct drm_connector_state *connector_state,
+	const struct dc_stream_state *old_stream)
 {
 	struct dc_crtc_timing *timing_out = &stream->timing;
 	const struct drm_display_info *info = &connector->display_info;
@@ -3121,7 +3123,7 @@ fill_stream_properties_from_drm_display_mode(struct dc_stream_state *stream,
 
 	timing_out->timing_3d_format = TIMING_3D_FORMAT_NONE;
 	timing_out->display_color_depth = convert_color_depth_from_display_info(
-			connector);
+		connector, connector_state);
 	timing_out->scan_type = SCANNING_TYPE_NODATA;
 	timing_out->hdmi_vic = 0;
 
@@ -3318,6 +3320,8 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector,
 {
 	struct drm_display_mode *preferred_mode = NULL;
 	struct drm_connector *drm_connector;
+	const struct drm_connector_state *con_state =
+		dm_state ? &dm_state->base : NULL;
 	struct dc_stream_state *stream = NULL;
 	struct drm_display_mode mode = *drm_mode;
 	bool native_mode_found = false;
@@ -3390,10 +3394,10 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector,
 	*/
 	if (!scale || mode_refresh != preferred_refresh)
 		fill_stream_properties_from_drm_display_mode(stream,
-			&mode, &aconnector->base, NULL);
+			&mode, &aconnector->base, con_state, NULL);
 	else
 		fill_stream_properties_from_drm_display_mode(stream,
-			&mode, &aconnector->base, old_stream);
+			&mode, &aconnector->base, con_state, old_stream);
 
 	update_stream_scaling_settings(&mode, dm_state, stream);
 
@@ -3618,9 +3622,6 @@ int amdgpu_dm_connector_atomic_set_property(struct drm_connector *connector,
 	} else if (property == adev->mode_info.underscan_property) {
 		dm_new_state->underscan_enable = val;
 		ret = 0;
-	} else if (property == adev->mode_info.max_bpc_property) {
-		dm_new_state->max_bpc = val;
-		ret = 0;
 	} else if (property == adev->mode_info.abm_level_property) {
 		dm_new_state->abm_level = val;
 		ret = 0;
@@ -3666,9 +3667,6 @@ int amdgpu_dm_connector_atomic_get_property(struct drm_connector *connector,
 	} else if (property == adev->mode_info.underscan_property) {
 		*val = dm_state->underscan_enable;
 		ret = 0;
-	} else if (property == adev->mode_info.max_bpc_property) {
-		*val = dm_state->max_bpc;
-		ret = 0;
 	} else if (property == adev->mode_info.abm_level_property) {
 		*val = dm_state->abm_level;
 		ret = 0;
@@ -3677,6 +3675,13 @@ int amdgpu_dm_connector_atomic_get_property(struct drm_connector *connector,
 	return ret;
 }
 
+static void amdgpu_dm_connector_unregister(struct drm_connector *connector)
+{
+	struct amdgpu_dm_connector *amdgpu_dm_connector = to_amdgpu_dm_connector(connector);
+
+	drm_dp_aux_unregister(&amdgpu_dm_connector->dm_dp_aux.aux);
+}
+
 static void amdgpu_dm_connector_destroy(struct drm_connector *connector)
 {
 	struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector);
@@ -3705,6 +3710,11 @@ static void amdgpu_dm_connector_destroy(struct drm_connector *connector)
 	drm_dp_cec_unregister_connector(&aconnector->dm_dp_aux.aux);
 	drm_connector_unregister(connector);
 	drm_connector_cleanup(connector);
+	if (aconnector->i2c) {
+		i2c_del_adapter(&aconnector->i2c->base);
+		kfree(aconnector->i2c);
+	}
+
 	kfree(connector);
 }
 
@@ -3725,7 +3735,6 @@ void amdgpu_dm_connector_funcs_reset(struct drm_connector *connector)
 		state->underscan_enable = false;
 		state->underscan_hborder = 0;
 		state->underscan_vborder = 0;
-		state->max_bpc = 8;
 
 		__drm_atomic_helper_connector_reset(connector, &state->base);
 	}
@@ -3751,7 +3760,6 @@ amdgpu_dm_connector_atomic_duplicate_state(struct drm_connector *connector)
 	new_state->underscan_enable = state->underscan_enable;
 	new_state->underscan_hborder = state->underscan_hborder;
 	new_state->underscan_vborder = state->underscan_vborder;
-	new_state->max_bpc = state->max_bpc;
 
 	return &new_state->base;
 }
@@ -3764,7 +3772,8 @@ static const struct drm_connector_funcs amdgpu_dm_connector_funcs = {
 	.atomic_duplicate_state = amdgpu_dm_connector_atomic_duplicate_state,
 	.atomic_destroy_state = drm_atomic_helper_connector_destroy_state,
 	.atomic_set_property = amdgpu_dm_connector_atomic_set_property,
-	.atomic_get_property = amdgpu_dm_connector_atomic_get_property
+	.atomic_get_property = amdgpu_dm_connector_atomic_get_property,
+	.early_unregister = amdgpu_dm_connector_unregister
 };
 
 static int get_modes(struct drm_connector *connector)
@@ -3879,6 +3888,129 @@ fail:
 	return result;
 }
 
+static int fill_hdr_info_packet(const struct drm_connector_state *state,
+				struct dc_info_packet *out)
+{
+	struct hdmi_drm_infoframe frame;
+	unsigned char buf[30]; /* 26 + 4 */
+	ssize_t len;
+	int ret, i;
+
+	memset(out, 0, sizeof(*out));
+
+	if (!state->hdr_output_metadata)
+		return 0;
+
+	ret = drm_hdmi_infoframe_set_hdr_metadata(&frame, state);
+	if (ret)
+		return ret;
+
+	len = hdmi_drm_infoframe_pack_only(&frame, buf, sizeof(buf));
+	if (len < 0)
+		return (int)len;
+
+	/* Static metadata is a fixed 26 bytes + 4 byte header. */
+	if (len != 30)
+		return -EINVAL;
+
+	/* Prepare the infopacket for DC. */
+	switch (state->connector->connector_type) {
+	case DRM_MODE_CONNECTOR_HDMIA:
+		out->hb0 = 0x87; /* type */
+		out->hb1 = 0x01; /* version */
+		out->hb2 = 0x1A; /* length */
+		out->sb[0] = buf[3]; /* checksum */
+		i = 1;
+		break;
+
+	case DRM_MODE_CONNECTOR_DisplayPort:
+	case DRM_MODE_CONNECTOR_eDP:
+		out->hb0 = 0x00; /* sdp id, zero */
+		out->hb1 = 0x87; /* type */
+		out->hb2 = 0x1D; /* payload len - 1 */
+		out->hb3 = (0x13 << 2); /* sdp version */
+		out->sb[0] = 0x01; /* version */
+		out->sb[1] = 0x1A; /* length */
+		i = 2;
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	memcpy(&out->sb[i], &buf[4], 26);
+	out->valid = true;
+
+	print_hex_dump(KERN_DEBUG, "HDR SB:", DUMP_PREFIX_NONE, 16, 1, out->sb,
+		       sizeof(out->sb), false);
+
+	return 0;
+}
+
+static bool
+is_hdr_metadata_different(const struct drm_connector_state *old_state,
+			  const struct drm_connector_state *new_state)
+{
+	struct drm_property_blob *old_blob = old_state->hdr_output_metadata;
+	struct drm_property_blob *new_blob = new_state->hdr_output_metadata;
+
+	if (old_blob != new_blob) {
+		if (old_blob && new_blob &&
+		    old_blob->length == new_blob->length)
+			return memcmp(old_blob->data, new_blob->data,
+				      old_blob->length);
+
+		return true;
+	}
+
+	return false;
+}
+
+static int
+amdgpu_dm_connector_atomic_check(struct drm_connector *conn,
+				 struct drm_atomic_state *state)
+{
+	struct drm_connector_state *new_con_state =
+		drm_atomic_get_new_connector_state(state, conn);
+	struct drm_connector_state *old_con_state =
+		drm_atomic_get_old_connector_state(state, conn);
+	struct drm_crtc *crtc = new_con_state->crtc;
+	struct drm_crtc_state *new_crtc_state;
+	int ret;
+
+	if (!crtc)
+		return 0;
+
+	if (is_hdr_metadata_different(old_con_state, new_con_state)) {
+		struct dc_info_packet hdr_infopacket;
+
+		ret = fill_hdr_info_packet(new_con_state, &hdr_infopacket);
+		if (ret)
+			return ret;
+
+		new_crtc_state = drm_atomic_get_crtc_state(state, crtc);
+		if (IS_ERR(new_crtc_state))
+			return PTR_ERR(new_crtc_state);
+
+		/*
+		 * DC considers the stream backends changed if the
+		 * static metadata changes. Forcing the modeset also
+		 * gives a simple way for userspace to switch from
+		 * 8bpc to 10bpc when setting the metadata to enter
+		 * or exit HDR.
+		 *
+		 * Changing the static metadata after it's been
+		 * set is permissible, however. So only force a
+		 * modeset if we're entering or exiting HDR.
+		 */
+		new_crtc_state->mode_changed =
+			!old_con_state->hdr_output_metadata ||
+			!new_con_state->hdr_output_metadata;
+	}
+
+	return 0;
+}
+
 static const struct drm_connector_helper_funcs
 amdgpu_dm_connector_helper_funcs = {
 	/*
@@ -3889,6 +4021,7 @@ amdgpu_dm_connector_helper_funcs = {
 	 */
 	.get_modes = get_modes,
 	.mode_valid = amdgpu_dm_connector_mode_valid,
+	.atomic_check = amdgpu_dm_connector_atomic_check,
 };
 
 static void dm_crtc_helper_disable(struct drm_crtc *crtc)
@@ -4098,6 +4231,9 @@ static int dm_plane_helper_prepare_fb(struct drm_plane *plane,
 	struct amdgpu_device *adev;
 	struct amdgpu_bo *rbo;
 	struct dm_plane_state *dm_plane_state_new, *dm_plane_state_old;
+	struct list_head list;
+	struct ttm_validate_buffer tv;
+	struct ww_acquire_ctx ticket;
 	uint64_t tiling_flags;
 	uint32_t domain;
 	int r;
@@ -4114,9 +4250,17 @@ static int dm_plane_helper_prepare_fb(struct drm_plane *plane,
 	obj = new_state->fb->obj[0];
 	rbo = gem_to_amdgpu_bo(obj);
 	adev = amdgpu_ttm_adev(rbo->tbo.bdev);
-	r = amdgpu_bo_reserve(rbo, false);
-	if (unlikely(r != 0))
+	INIT_LIST_HEAD(&list);
+
+	tv.bo = &rbo->tbo;
+	tv.num_shared = 1;
+	list_add(&tv.head, &list);
+
+	r = ttm_eu_reserve_buffers(&ticket, &list, false, NULL, true);
+	if (r) {
+		dev_err(adev->dev, "fail to reserve bo (%d)\n", r);
 		return r;
+	}
 
 	if (plane->type != DRM_PLANE_TYPE_CURSOR)
 		domain = amdgpu_display_supported_domains(adev);
@@ -4127,21 +4271,21 @@ static int dm_plane_helper_prepare_fb(struct drm_plane *plane,
 	if (unlikely(r != 0)) {
 		if (r != -ERESTARTSYS)
 			DRM_ERROR("Failed to pin framebuffer with error %d\n", r);
-		amdgpu_bo_unreserve(rbo);
+		ttm_eu_backoff_reservation(&ticket, &list);
 		return r;
 	}
 
 	r = amdgpu_ttm_alloc_gart(&rbo->tbo);
 	if (unlikely(r != 0)) {
 		amdgpu_bo_unpin(rbo);
-		amdgpu_bo_unreserve(rbo);
+		ttm_eu_backoff_reservation(&ticket, &list);
 		DRM_ERROR("%p bind failed\n", rbo);
 		return r;
 	}
 
 	amdgpu_bo_get_tiling_flags(rbo, &tiling_flags);
 
-	amdgpu_bo_unreserve(rbo);
+	ttm_eu_backoff_reservation(&ticket, &list);
 
 	afb->address = amdgpu_bo_gpu_offset(rbo);
 
@@ -4592,6 +4736,15 @@ static void amdgpu_dm_connector_ddc_get_modes(struct drm_connector *connector,
 		amdgpu_dm_connector->num_modes =
 				drm_add_edid_modes(connector, edid);
 
+		/* sorting the probed modes before calling function
+		 * amdgpu_dm_get_native_mode() since EDID can have
+		 * more than one preferred mode. The modes that are
+		 * later in the probed mode list could be of higher
+		 * and preferred resolution. For example, 3840x2160
+		 * resolution in base EDID preferred timing and 4096x2160
+		 * preferred resolution in DID extension block later.
+		 */
+		drm_mode_sort(&connector->probed_modes);
 		amdgpu_dm_get_native_mode(connector);
 	} else {
 		amdgpu_dm_connector->num_modes = 0;
@@ -4671,9 +4824,12 @@ void amdgpu_dm_connector_init_helper(struct amdgpu_display_manager *dm,
 	drm_object_attach_property(&aconnector->base.base,
 				adev->mode_info.underscan_vborder_property,
 				0);
-	drm_object_attach_property(&aconnector->base.base,
-				adev->mode_info.max_bpc_property,
-				0);
+
+	drm_connector_attach_max_bpc_property(&aconnector->base, 8, 16);
+
+	/* This defaults to the max in the range, but we want 8bpc. */
+	aconnector->base.state->max_bpc = 8;
+	aconnector->base.state->max_requested_bpc = 8;
 
 	if (connector_type == DRM_MODE_CONNECTOR_eDP &&
 	    dc_is_dmcu_initialized(adev->dm.dc)) {
@@ -4684,6 +4840,10 @@ void amdgpu_dm_connector_init_helper(struct amdgpu_display_manager *dm,
 	if (connector_type == DRM_MODE_CONNECTOR_HDMIA ||
 	    connector_type == DRM_MODE_CONNECTOR_DisplayPort ||
 	    connector_type == DRM_MODE_CONNECTOR_eDP) {
+		drm_object_attach_property(
+			&aconnector->base.base,
+			dm->ddev->mode_config.hdr_output_metadata_property, 0);
+
 		drm_connector_attach_vrr_capable_property(
 			&aconnector->base);
 	}
@@ -4952,12 +5112,12 @@ static int get_cursor_position(struct drm_plane *plane, struct drm_crtc *crtc,
 	int x, y;
 	int xorigin = 0, yorigin = 0;
 
-	if (!crtc || !plane->state->fb) {
-		position->enable = false;
-		position->x = 0;
-		position->y = 0;
+	position->enable = false;
+	position->x = 0;
+	position->y = 0;
+
+	if (!crtc || !plane->state->fb)
 		return 0;
-	}
 
 	if ((plane->state->crtc_w > amdgpu_crtc->max_cursor_width) ||
 	    (plane->state->crtc_h > amdgpu_crtc->max_cursor_height)) {
@@ -4971,6 +5131,10 @@ static int get_cursor_position(struct drm_plane *plane, struct drm_crtc *crtc,
 	x = plane->state->crtc_x;
 	y = plane->state->crtc_y;
 
+	if (x <= -amdgpu_crtc->max_cursor_width ||
+	    y <= -amdgpu_crtc->max_cursor_height)
+		return 0;
+
 	if (crtc->primary->state) {
 		/* avivo cursor are offset into the total surface */
 		x += crtc->primary->state->src_x >> 16;
@@ -5114,6 +5278,11 @@ static void update_freesync_state_on_stream(
 		    amdgpu_dm_vrr_active(new_crtc_state)) {
 			mod_freesync_handle_v_update(dm->freesync_module,
 						     new_stream, &vrr_params);
+
+			/* Need to call this before the frame ends. */
+			dc_stream_adjust_vmin_vmax(dm->dc,
+						   new_crtc_state->stream,
+						   &vrr_params.adjust);
 		}
 	}
 
@@ -5452,11 +5621,6 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
 		}
 
 		if (acrtc_state->stream) {
-
-			if (acrtc_state->freesync_timing_changed)
-				bundle->stream_update.adjust =
-					&acrtc_state->stream->adjust;
-
 			if (acrtc_state->freesync_vrr_info_changed)
 				bundle->stream_update.vrr_infopacket =
 					&acrtc_state->stream->vrr_infopacket;
@@ -5477,6 +5641,20 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
 		if (acrtc_state->abm_level != dm_old_crtc_state->abm_level)
 			bundle->stream_update.abm_level = &acrtc_state->abm_level;
 
+		/*
+		 * If FreeSync state on the stream has changed then we need to
+		 * re-adjust the min/max bounds now that DC doesn't handle this
+		 * as part of commit.
+		 */
+		if (amdgpu_dm_vrr_active(dm_old_crtc_state) !=
+		    amdgpu_dm_vrr_active(acrtc_state)) {
+			spin_lock_irqsave(&pcrtc->dev->event_lock, flags);
+			dc_stream_adjust_vmin_vmax(
+				dm->dc, acrtc_state->stream,
+				&acrtc_state->vrr_params.adjust);
+			spin_unlock_irqrestore(&pcrtc->dev->event_lock, flags);
+		}
+
 		mutex_lock(&dm->dc_lock);
 		dc_commit_updates_for_stream(dm->dc,
 						     bundle->surface_updates,
@@ -5768,7 +5946,9 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)
 		struct amdgpu_crtc *acrtc = to_amdgpu_crtc(dm_new_con_state->base.crtc);
 		struct dc_surface_update dummy_updates[MAX_SURFACES];
 		struct dc_stream_update stream_update;
+		struct dc_info_packet hdr_packet;
 		struct dc_stream_status *status = NULL;
+		bool abm_changed, hdr_changed, scaling_changed;
 
 		memset(&dummy_updates, 0, sizeof(dummy_updates));
 		memset(&stream_update, 0, sizeof(stream_update));
@@ -5785,11 +5965,19 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)
 		dm_new_crtc_state = to_dm_crtc_state(new_crtc_state);
 		dm_old_crtc_state = to_dm_crtc_state(old_crtc_state);
 
-		if (!is_scaling_state_different(dm_new_con_state, dm_old_con_state) &&
-				(dm_new_crtc_state->abm_level == dm_old_crtc_state->abm_level))
+		scaling_changed = is_scaling_state_different(dm_new_con_state,
+							     dm_old_con_state);
+
+		abm_changed = dm_new_crtc_state->abm_level !=
+			      dm_old_crtc_state->abm_level;
+
+		hdr_changed =
+			is_hdr_metadata_different(old_con_state, new_con_state);
+
+		if (!scaling_changed && !abm_changed && !hdr_changed)
 			continue;
 
-		if (is_scaling_state_different(dm_new_con_state, dm_old_con_state)) {
+		if (scaling_changed) {
 			update_stream_scaling_settings(&dm_new_con_state->base.crtc->mode,
 					dm_new_con_state, (struct dc_stream_state *)dm_new_crtc_state->stream);
 
@@ -5797,12 +5985,17 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)
 			stream_update.dst = dm_new_crtc_state->stream->dst;
 		}
 
-		if (dm_new_crtc_state->abm_level != dm_old_crtc_state->abm_level) {
+		if (abm_changed) {
 			dm_new_crtc_state->stream->abm_level = dm_new_crtc_state->abm_level;
 
 			stream_update.abm_level = &dm_new_crtc_state->abm_level;
 		}
 
+		if (hdr_changed) {
+			fill_hdr_info_packet(new_con_state, &hdr_packet);
+			stream_update.hdr_static_metadata = &hdr_packet;
+		}
+
 		status = dc_stream_get_status(dm_new_crtc_state->stream);
 		WARN_ON(!status);
 		WARN_ON(!status->plane_count);
@@ -6148,6 +6341,11 @@ static int dm_update_crtc_state(struct amdgpu_display_manager *dm,
 
 		dm_new_crtc_state->abm_level = dm_new_conn_state->abm_level;
 
+		ret = fill_hdr_info_packet(drm_new_conn_state,
+					   &new_stream->hdr_static_metadata);
+		if (ret)
+			goto fail;
+
 		if (dc_is_stream_unchanged(new_stream, dm_old_crtc_state->stream) &&
 		    dc_is_stream_scaling_unchanged(new_stream, dm_old_crtc_state->stream)) {
 			new_crtc_state->mode_changed = false;
@@ -6327,6 +6525,10 @@ static bool should_reset_plane(struct drm_atomic_state *state,
 	if (!new_crtc_state)
 		return true;
 
+	/* CRTC Degamma changes currently require us to recreate planes. */
+	if (new_crtc_state->color_mgmt_changed)
+		return true;
+
 	if (drm_atomic_crtc_needs_modeset(new_crtc_state))
 		return true;
 
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
index 978ff14a7d45..811253d7f157 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
@@ -26,8 +26,11 @@
 #ifndef __AMDGPU_DM_H__
 #define __AMDGPU_DM_H__
 
-#include <drm/drmP.h>
 #include <drm/drm_atomic.h>
+#include <drm/drm_connector.h>
+#include <drm/drm_crtc.h>
+#include <drm/drm_dp_mst_helper.h>
+#include <drm/drm_plane.h>
 
 /*
  * This file contains the definition for amdgpu_display_manager
@@ -304,7 +307,6 @@ struct dm_connector_state {
 	enum amdgpu_rmx_type scaling;
 	uint8_t underscan_vborder;
 	uint8_t underscan_hborder;
-	uint8_t max_bpc;
 	bool underscan_enable;
 	bool freesync_capable;
 	uint8_t abm_level;
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c
index a10e3a50d9ef..bc67e6502733 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c
@@ -24,6 +24,7 @@
  */
 
 #include <drm/drm_crtc.h>
+#include <drm/drm_vblank.h>
 
 #include "amdgpu.h"
 #include "amdgpu_dm.h"
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c
index 1d5fc5ad3bee..e611b5376d8c 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c
@@ -23,7 +23,9 @@
  *
  */
 
-#include <linux/debugfs.h>
+#include <linux/uaccess.h>
+
+#include <drm/drm_debugfs.h>
 
 #include "dc.h"
 #include "amdgpu.h"
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
index e6cd67342df8..97b2c3b16bef 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
@@ -28,7 +28,6 @@
 #include <linux/version.h>
 #include <linux/i2c.h>
 
-#include <drm/drmP.h>
 #include <drm/drm_probe_helper.h>
 #include <drm/amdgpu_drm.h>
 #include <drm/drm_edid.h>
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c
index fd22b4474dbf..1b59d3d42f7b 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c
@@ -23,8 +23,6 @@
  *
  */
 
-#include <drm/drmP.h>
-
 #include "dm_services_types.h"
 #include "dc.h"
 
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c
index 350e7a620d45..b37e8c9653e1 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c
@@ -24,7 +24,6 @@
 #include <linux/string.h>
 #include <linux/acpi.h>
 
-#include <drm/drmP.h>
 #include <drm/drm_probe_helper.h>
 #include <drm/amdgpu_drm.h>
 #include "dm_services.h"
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_services.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_services.c
index d915e8c8769b..022da5d45d4d 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_services.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_services.c
@@ -26,7 +26,6 @@
 #include <linux/string.h>
 #include <linux/acpi.h>
 
-#include <drm/drmP.h>
 #include <drm/drm_probe_helper.h>
 #include <drm/amdgpu_drm.h>
 #include "dm_services.h"
diff --git a/drivers/gpu/drm/amd/display/dc/Makefile b/drivers/gpu/drm/amd/display/dc/Makefile
index b8ddb4acccdb..6da4e4f844b2 100644
--- a/drivers/gpu/drm/amd/display/dc/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/Makefile
@@ -23,7 +23,7 @@
 # Makefile for Display Core (dc) component.
 #
 
-DC_LIBS = basics bios calcs dce gpio irq virtual
+DC_LIBS = basics bios calcs clk_mgr dce gpio irq virtual
 
 ifdef CONFIG_DRM_AMD_DC_DCN1_0
 DC_LIBS += dcn10 dml
diff --git a/drivers/gpu/drm/amd/display/dc/basics/vector.c b/drivers/gpu/drm/amd/display/dc/basics/vector.c
index d28e9cf0e961..8f93d25f91ee 100644
--- a/drivers/gpu/drm/amd/display/dc/basics/vector.c
+++ b/drivers/gpu/drm/amd/display/dc/basics/vector.c
@@ -23,6 +23,8 @@
  *
  */
 
+#include <linux/slab.h>
+
 #include "dm_services.h"
 #include "include/vector.h"
 
diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c
index a4c97d32e751..461eef1de124 100644
--- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c
+++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c
@@ -23,6 +23,8 @@
  *
  */
 
+#include <linux/slab.h>
+
 #include "dm_services.h"
 
 #include "atom.h"
diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c
index fd5266a58297..fecd766ece37 100644
--- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c
+++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c
@@ -23,6 +23,8 @@
  *
  */
 
+#include <linux/slab.h>
+
 #include "dm_services.h"
 
 #include "ObjectID.h"
@@ -1313,6 +1315,8 @@ static enum bp_result bios_parser_get_encoder_cap_info(
 			ATOM_ENCODER_CAP_RECORD_HBR3_EN) ? 1 : 0;
 	info->HDMI_6GB_EN = (record->encodercaps &
 			ATOM_ENCODER_CAP_RECORD_HDMI6Gbps_EN) ? 1 : 0;
+	info->DP_IS_USB_C = (record->encodercaps &
+			ATOM_ENCODER_CAP_RECORD_USB_C_TYPE) ? 1 : 0;
 
 	return BP_RESULT_OK;
 }
diff --git a/drivers/gpu/drm/amd/display/dc/bios/command_table_helper2.c b/drivers/gpu/drm/amd/display/dc/bios/command_table_helper2.c
index 8196f3bb10c7..53deba42007a 100644
--- a/drivers/gpu/drm/amd/display/dc/bios/command_table_helper2.c
+++ b/drivers/gpu/drm/amd/display/dc/bios/command_table_helper2.c
@@ -57,11 +57,6 @@ bool dal_bios_parser_init_cmd_tbl_helper2(
 		return true;
 #if defined(CONFIG_DRM_AMD_DC_DCN1_0)
 	case DCN_VERSION_1_0:
-		*h = dal_cmd_tbl_helper_dce112_get_table2();
-		return true;
-#endif
-
-#if defined(CONFIG_DRM_AMD_DC_DCN1_01)
 	case DCN_VERSION_1_01:
 		*h = dal_cmd_tbl_helper_dce112_get_table2();
 		return true;
diff --git a/drivers/gpu/drm/amd/display/dc/calcs/dce_calcs.c b/drivers/gpu/drm/amd/display/dc/calcs/dce_calcs.c
index f3aa7b53d2aa..7108d51a9c5b 100644
--- a/drivers/gpu/drm/amd/display/dc/calcs/dce_calcs.c
+++ b/drivers/gpu/drm/amd/display/dc/calcs/dce_calcs.c
@@ -23,6 +23,8 @@
  *
  */
 
+#include <linux/slab.h>
+
 #include "dm_services.h"
 #include "dce_calcs.h"
 #include "dc.h"
diff --git a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calc_auto.h b/drivers/gpu/drm/amd/display/dc/calcs/dcn_calc_auto.h
index 03f06f682ead..ce35de79a6c7 100644
--- a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calc_auto.h
+++ b/drivers/gpu/drm/amd/display/dc/calcs/dcn_calc_auto.h
@@ -26,6 +26,7 @@
 #ifndef _DCN_CALC_AUTO_H_
 #define _DCN_CALC_AUTO_H_
 
+#include "dc.h"
 #include "dcn_calcs.h"
 
 void scaler_settings_calculation(struct dcn_bw_internal_vars *v);
diff --git a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c b/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c
index 1b4b51657f5e..5c1e0adb142b 100644
--- a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c
+++ b/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c
@@ -24,11 +24,10 @@
  */
 
 #include "dm_services.h"
+#include "dc.h"
 #include "dcn_calcs.h"
 #include "dcn_calc_auto.h"
-#include "dc.h"
 #include "dal_asic_id.h"
-
 #include "resource.h"
 #include "dcn10/dcn10_resource.h"
 #include "dcn10/dcn10_hubbub.h"
@@ -712,7 +711,7 @@ bool dcn_validate_bandwidth(
 
 	const struct resource_pool *pool = dc->res_pool;
 	struct dcn_bw_internal_vars *v = &context->dcn_bw_vars;
-	int i, input_idx;
+	int i, input_idx, k;
 	int vesa_sync_start, asic_blank_end, asic_blank_start;
 	bool bw_limit_pass;
 	float bw_limit;
@@ -873,8 +872,19 @@ bool dcn_validate_bandwidth(
 			v->lb_bit_per_pixel[input_idx] = 30;
 			v->viewport_width[input_idx] = pipe->stream->timing.h_addressable;
 			v->viewport_height[input_idx] = pipe->stream->timing.v_addressable;
-			v->scaler_rec_out_width[input_idx] = pipe->stream->timing.h_addressable;
-			v->scaler_recout_height[input_idx] = pipe->stream->timing.v_addressable;
+			/*
+			 * for cases where we have no plane, we want to validate up to 1080p
+			 * source size because here we are only interested in if the output
+			 * timing is supported or not. if we cannot support native resolution
+			 * of the high res display, we still want to support lower res up scale
+			 * to native
+			 */
+			if (v->viewport_width[input_idx] > 1920)
+				v->viewport_width[input_idx] = 1920;
+			if (v->viewport_height[input_idx] > 1080)
+				v->viewport_height[input_idx] = 1080;
+			v->scaler_rec_out_width[input_idx] = v->viewport_width[input_idx];
+			v->scaler_recout_height[input_idx] = v->viewport_height[input_idx];
 			v->override_hta_ps[input_idx] = 1;
 			v->override_vta_ps[input_idx] = 1;
 			v->override_hta_pschroma[input_idx] = 1;
@@ -1023,6 +1033,43 @@ bool dcn_validate_bandwidth(
 		mode_support_and_system_configuration(v);
 	}
 
+	display_pipe_configuration(v);
+
+	for (k = 0; k <= v->number_of_active_planes - 1; k++) {
+		if (v->source_scan[k] == dcn_bw_hor)
+			v->swath_width_y[k] = v->viewport_width[k] / v->dpp_per_plane[k];
+		else
+			v->swath_width_y[k] = v->viewport_height[k] / v->dpp_per_plane[k];
+	}
+	for (k = 0; k <= v->number_of_active_planes - 1; k++) {
+		if (v->source_pixel_format[k] == dcn_bw_rgb_sub_64) {
+			v->byte_per_pixel_dety[k] = 8.0;
+			v->byte_per_pixel_detc[k] = 0.0;
+		} else if (v->source_pixel_format[k] == dcn_bw_rgb_sub_32) {
+			v->byte_per_pixel_dety[k] = 4.0;
+			v->byte_per_pixel_detc[k] = 0.0;
+		} else if (v->source_pixel_format[k] == dcn_bw_rgb_sub_16) {
+			v->byte_per_pixel_dety[k] = 2.0;
+			v->byte_per_pixel_detc[k] = 0.0;
+		} else if (v->source_pixel_format[k] == dcn_bw_yuv420_sub_8) {
+			v->byte_per_pixel_dety[k] = 1.0;
+			v->byte_per_pixel_detc[k] = 2.0;
+		} else {
+			v->byte_per_pixel_dety[k] = 4.0f / 3.0f;
+			v->byte_per_pixel_detc[k] = 8.0f / 3.0f;
+		}
+	}
+
+	v->total_data_read_bandwidth = 0.0;
+	for (k = 0; k <= v->number_of_active_planes - 1; k++) {
+		v->read_bandwidth_plane_luma[k] = v->swath_width_y[k] * v->dpp_per_plane[k] *
+				dcn_bw_ceil2(v->byte_per_pixel_dety[k], 1.0) / (v->htotal[k] / v->pixel_clock[k]) * v->v_ratio[k];
+		v->read_bandwidth_plane_chroma[k] = v->swath_width_y[k] / 2.0 * v->dpp_per_plane[k] *
+				dcn_bw_ceil2(v->byte_per_pixel_detc[k], 2.0) / (v->htotal[k] / v->pixel_clock[k]) * v->v_ratio[k] / 2.0;
+		v->total_data_read_bandwidth = v->total_data_read_bandwidth +
+				v->read_bandwidth_plane_luma[k] + v->read_bandwidth_plane_chroma[k];
+	}
+
 	BW_VAL_TRACE_END_VOLTAGE_LEVEL();
 
 	if (v->voltage_level != number_of_states_plus_one && !fast_validate) {
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile b/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile
new file mode 100644
index 000000000000..650e2b88c917
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile
@@ -0,0 +1,75 @@
+#
+# Copyright 2017 Advanced Micro Devices, Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+#
+#
+# Makefile for the 'clk_mgr' sub-component of DAL.
+# It provides the control and status of HW CLK_MGR pins.
+
+CLK_MGR = clk_mgr.o
+
+AMD_DAL_CLK_MGR = $(addprefix $(AMDDALPATH)/dc/clk_mgr/,$(CLK_MGR))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_CLK_MGR)
+
+
+###############################################################################
+# DCE 100 and DCE8x
+###############################################################################
+CLK_MGR_DCE100 = dce_clk_mgr.o
+
+AMD_DAL_CLK_MGR_DCE100 = $(addprefix $(AMDDALPATH)/dc/clk_mgr/dce100/,$(CLK_MGR_DCE100))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_CLK_MGR_DCE100)
+
+###############################################################################
+# DCE 100 and DCE8x
+###############################################################################
+CLK_MGR_DCE110 = dce110_clk_mgr.o
+
+AMD_DAL_CLK_MGR_DCE110 = $(addprefix $(AMDDALPATH)/dc/clk_mgr/dce110/,$(CLK_MGR_DCE110))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_CLK_MGR_DCE110)
+###############################################################################
+# DCE 112
+###############################################################################
+CLK_MGR_DCE112 = dce112_clk_mgr.o
+
+AMD_DAL_CLK_MGR_DCE112 = $(addprefix $(AMDDALPATH)/dc/clk_mgr/dce112/,$(CLK_MGR_DCE112))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_CLK_MGR_DCE112)
+###############################################################################
+# DCE 120
+###############################################################################
+CLK_MGR_DCE120 = dce120_clk_mgr.o
+
+AMD_DAL_CLK_MGR_DCE120 = $(addprefix $(AMDDALPATH)/dc/clk_mgr/dce120/,$(CLK_MGR_DCE120))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_CLK_MGR_DCE120)
+ifdef CONFIG_DRM_AMD_DC_DCN1_0
+###############################################################################
+# DCN10
+###############################################################################
+CLK_MGR_DCN10 = rv1_clk_mgr.o rv1_clk_mgr_vbios_smu.o rv2_clk_mgr.o
+
+AMD_DAL_CLK_MGR_DCN10 = $(addprefix $(AMDDALPATH)/dc/clk_mgr/dcn10/,$(CLK_MGR_DCN10))
+
+AMD_DISPLAY_FILES += $(AMD_DAL_CLK_MGR_DCN10)
+endif
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c
new file mode 100644
index 000000000000..cb3f6a74d9e3
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c
@@ -0,0 +1,136 @@
+/*
+ * Copyright 2012-16 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#include <linux/slab.h>
+
+#include "dal_asic_id.h"
+#include "dc_types.h"
+#include "dccg.h"
+#include "clk_mgr_internal.h"
+
+#include "dce100/dce_clk_mgr.h"
+#include "dce110/dce110_clk_mgr.h"
+#include "dce112/dce112_clk_mgr.h"
+#include "dce120/dce120_clk_mgr.h"
+#include "dcn10/rv1_clk_mgr.h"
+#include "dcn10/rv2_clk_mgr.h"
+
+
+int clk_mgr_helper_get_active_display_cnt(
+		struct dc *dc,
+		struct dc_state *context)
+{
+	int i, display_count;
+
+	display_count = 0;
+	for (i = 0; i < context->stream_count; i++) {
+		const struct dc_stream_state *stream = context->streams[i];
+
+		/*
+		 * Only notify active stream or virtual stream.
+		 * Need to notify virtual stream to work around
+		 * headless case. HPD does not fire when system is in
+		 * S0i2.
+		 */
+		if (!stream->dpms_off || stream->signal == SIGNAL_TYPE_VIRTUAL)
+			display_count++;
+	}
+
+	return display_count;
+}
+
+
+struct clk_mgr *dc_clk_mgr_create(struct dc_context *ctx, struct pp_smu_funcs *pp_smu, struct dccg *dccg)
+{
+	struct hw_asic_id asic_id = ctx->asic_id;
+
+	struct clk_mgr_internal *clk_mgr = kzalloc(sizeof(*clk_mgr), GFP_KERNEL);
+
+	if (clk_mgr == NULL) {
+		BREAK_TO_DEBUGGER();
+		return NULL;
+	}
+
+	switch (asic_id.chip_family) {
+	case FAMILY_CI:
+	case FAMILY_KV:
+		dce_clk_mgr_construct(ctx, clk_mgr);
+		break;
+	case FAMILY_CZ:
+		dce110_clk_mgr_construct(ctx, clk_mgr);
+		break;
+	case FAMILY_VI:
+		if (ASIC_REV_IS_TONGA_P(asic_id.hw_internal_rev) ||
+				ASIC_REV_IS_FIJI_P(asic_id.hw_internal_rev)) {
+			dce_clk_mgr_construct(ctx, clk_mgr);
+			break;
+		}
+		if (ASIC_REV_IS_POLARIS10_P(asic_id.hw_internal_rev) ||
+				ASIC_REV_IS_POLARIS11_M(asic_id.hw_internal_rev) ||
+				ASIC_REV_IS_POLARIS12_V(asic_id.hw_internal_rev)) {
+			dce112_clk_mgr_construct(ctx, clk_mgr);
+			break;
+		}
+		if (ASIC_REV_IS_VEGAM(asic_id.hw_internal_rev)) {
+			dce112_clk_mgr_construct(ctx, clk_mgr);
+			break;
+		}
+		break;
+	case FAMILY_AI:
+		if (ASICREV_IS_VEGA20_P(asic_id.hw_internal_rev))
+			dce121_clk_mgr_construct(ctx, clk_mgr);
+		else
+			dce120_clk_mgr_construct(ctx, clk_mgr);
+		break;
+
+#if defined(CONFIG_DRM_AMD_DC_DCN1_0)
+	case FAMILY_RV:
+		if (ASICREV_IS_RAVEN2(asic_id.hw_internal_rev)) {
+			rv2_clk_mgr_construct(ctx, clk_mgr, pp_smu);
+			break;
+		}
+		if (ASICREV_IS_RAVEN(asic_id.hw_internal_rev) ||
+				ASICREV_IS_PICASSO(asic_id.hw_internal_rev)) {
+			rv1_clk_mgr_construct(ctx, clk_mgr, pp_smu);
+			break;
+		}
+		break;
+#endif	/* Family RV */
+
+	default:
+		ASSERT(0); /* Unknown Asic */
+		break;
+	}
+
+	return &clk_mgr->base;
+}
+
+void dc_destroy_clk_mgr(struct clk_mgr *clk_mgr_base)
+{
+	struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base);
+
+	kfree(clk_mgr);
+}
+
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dce100/dce_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce100/dce_clk_mgr.c
new file mode 100644
index 000000000000..814450fefffa
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce100/dce_clk_mgr.c
@@ -0,0 +1,471 @@
+/*
+ * Copyright 2012-16 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+
+#include "dccg.h"
+#include "clk_mgr_internal.h"
+#include "dce_clk_mgr.h"
+#include "dce110/dce110_clk_mgr.h"
+#include "dce112/dce112_clk_mgr.h"
+#include "reg_helper.h"
+#include "dmcu.h"
+#include "core_types.h"
+#include "dal_asic_id.h"
+
+/*
+ * Currently the register shifts and masks in this file are used for dce100 and dce80
+ * which has identical definitions.
+ * TODO: remove this when DPREFCLK_CNTL and dpref DENTIST_DISPCLK_CNTL
+ * is moved to dccg, where it belongs
+ */
+#include "dce/dce_8_0_d.h"
+#include "dce/dce_8_0_sh_mask.h"
+
+#define REG(reg) \
+	(clk_mgr->regs->reg)
+
+#undef FN
+#define FN(reg_name, field_name) \
+	clk_mgr->clk_mgr_shift->field_name, clk_mgr->clk_mgr_mask->field_name
+
+static const struct clk_mgr_registers disp_clk_regs = {
+		CLK_COMMON_REG_LIST_DCE_BASE()
+};
+
+static const struct clk_mgr_shift disp_clk_shift = {
+		CLK_COMMON_MASK_SH_LIST_DCE_COMMON_BASE(__SHIFT)
+};
+
+static const struct clk_mgr_mask disp_clk_mask = {
+		CLK_COMMON_MASK_SH_LIST_DCE_COMMON_BASE(_MASK)
+};
+
+
+/* Max clock values for each state indexed by "enum clocks_state": */
+static const struct state_dependent_clocks dce80_max_clks_by_state[] = {
+/* ClocksStateInvalid - should not be used */
+{ .display_clk_khz = 0, .pixel_clk_khz = 0 },
+/* ClocksStateUltraLow - not expected to be used for DCE 8.0 */
+{ .display_clk_khz = 0, .pixel_clk_khz = 0 },
+/* ClocksStateLow */
+{ .display_clk_khz = 352000, .pixel_clk_khz = 330000},
+/* ClocksStateNominal */
+{ .display_clk_khz = 600000, .pixel_clk_khz = 400000 },
+/* ClocksStatePerformance */
+{ .display_clk_khz = 600000, .pixel_clk_khz = 400000 } };
+
+int dentist_get_divider_from_did(int did)
+{
+	if (did < DENTIST_BASE_DID_1)
+		did = DENTIST_BASE_DID_1;
+	if (did > DENTIST_MAX_DID)
+		did = DENTIST_MAX_DID;
+
+	if (did < DENTIST_BASE_DID_2) {
+		return DENTIST_DIVIDER_RANGE_1_START + DENTIST_DIVIDER_RANGE_1_STEP
+							* (did - DENTIST_BASE_DID_1);
+	} else if (did < DENTIST_BASE_DID_3) {
+		return DENTIST_DIVIDER_RANGE_2_START + DENTIST_DIVIDER_RANGE_2_STEP
+							* (did - DENTIST_BASE_DID_2);
+	} else if (did < DENTIST_BASE_DID_4) {
+		return DENTIST_DIVIDER_RANGE_3_START + DENTIST_DIVIDER_RANGE_3_STEP
+							* (did - DENTIST_BASE_DID_3);
+	} else {
+		return DENTIST_DIVIDER_RANGE_4_START + DENTIST_DIVIDER_RANGE_4_STEP
+							* (did - DENTIST_BASE_DID_4);
+	}
+}
+
+/* SW will adjust DP REF Clock average value for all purposes
+ * (DP DTO / DP Audio DTO and DP GTC)
+ if clock is spread for all cases:
+ -if SS enabled on DP Ref clock and HW de-spreading enabled with SW
+ calculations for DS_INCR/DS_MODULO (this is planned to be default case)
+ -if SS enabled on DP Ref clock and HW de-spreading enabled with HW
+ calculations (not planned to be used, but average clock should still
+ be valid)
+ -if SS enabled on DP Ref clock and HW de-spreading disabled
+ (should not be case with CIK) then SW should program all rates
+ generated according to average value (case as with previous ASICs)
+  */
+
+int dce_adjust_dp_ref_freq_for_ss(struct clk_mgr_internal *clk_mgr_dce, int dp_ref_clk_khz)
+{
+	if (clk_mgr_dce->ss_on_dprefclk && clk_mgr_dce->dprefclk_ss_divider != 0) {
+		struct fixed31_32 ss_percentage = dc_fixpt_div_int(
+				dc_fixpt_from_fraction(clk_mgr_dce->dprefclk_ss_percentage,
+							clk_mgr_dce->dprefclk_ss_divider), 200);
+		struct fixed31_32 adj_dp_ref_clk_khz;
+
+		ss_percentage = dc_fixpt_sub(dc_fixpt_one, ss_percentage);
+		adj_dp_ref_clk_khz = dc_fixpt_mul_int(ss_percentage, dp_ref_clk_khz);
+		dp_ref_clk_khz = dc_fixpt_floor(adj_dp_ref_clk_khz);
+	}
+	return dp_ref_clk_khz;
+}
+
+int dce_get_dp_ref_freq_khz(struct clk_mgr *clk_mgr_base)
+{
+	struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base);
+	int dprefclk_wdivider;
+	int dprefclk_src_sel;
+	int dp_ref_clk_khz = 600000;
+	int target_div;
+
+	/* ASSERT DP Reference Clock source is from DFS*/
+	REG_GET(DPREFCLK_CNTL, DPREFCLK_SRC_SEL, &dprefclk_src_sel);
+	ASSERT(dprefclk_src_sel == 0);
+
+	/* Read the mmDENTIST_DISPCLK_CNTL to get the currently
+	 * programmed DID DENTIST_DPREFCLK_WDIVIDER*/
+	REG_GET(DENTIST_DISPCLK_CNTL, DENTIST_DPREFCLK_WDIVIDER, &dprefclk_wdivider);
+
+	/* Convert DENTIST_DPREFCLK_WDIVIDERto actual divider*/
+	target_div = dentist_get_divider_from_did(dprefclk_wdivider);
+
+	/* Calculate the current DFS clock, in kHz.*/
+	dp_ref_clk_khz = (DENTIST_DIVIDER_RANGE_SCALE_FACTOR
+		* clk_mgr->dentist_vco_freq_khz) / target_div;
+
+	return dce_adjust_dp_ref_freq_for_ss(clk_mgr, dp_ref_clk_khz);
+}
+
+int dce12_get_dp_ref_freq_khz(struct clk_mgr *clk_mgr_base)
+{
+	struct clk_mgr_internal *clk_mgr_dce = TO_CLK_MGR_INTERNAL(clk_mgr_base);
+
+	return dce_adjust_dp_ref_freq_for_ss(clk_mgr_dce, clk_mgr_base->dprefclk_khz);
+}
+
+/* unit: in_khz before mode set, get pixel clock from context. ASIC register
+ * may not be programmed yet
+ */
+uint32_t dce_get_max_pixel_clock_for_all_paths(struct dc_state *context)
+{
+	uint32_t max_pix_clk = 0;
+	int i;
+
+	for (i = 0; i < MAX_PIPES; i++) {
+		struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
+
+		if (pipe_ctx->stream == NULL)
+			continue;
+
+		/* do not check under lay */
+		if (pipe_ctx->top_pipe)
+			continue;
+
+		if (pipe_ctx->stream_res.pix_clk_params.requested_pix_clk_100hz / 10 > max_pix_clk)
+			max_pix_clk = pipe_ctx->stream_res.pix_clk_params.requested_pix_clk_100hz / 10;
+
+		/* raise clock state for HBR3/2 if required. Confirmed with HW DCE/DPCS
+		 * logic for HBR3 still needs Nominal (0.8V) on VDDC rail
+		 */
+		if (dc_is_dp_signal(pipe_ctx->stream->signal) &&
+				pipe_ctx->stream_res.pix_clk_params.requested_sym_clk > max_pix_clk)
+			max_pix_clk = pipe_ctx->stream_res.pix_clk_params.requested_sym_clk;
+	}
+
+	return max_pix_clk;
+}
+
+enum dm_pp_clocks_state dce_get_required_clocks_state(
+	struct clk_mgr *clk_mgr_base,
+	struct dc_state *context)
+{
+	struct clk_mgr_internal *clk_mgr_dce = TO_CLK_MGR_INTERNAL(clk_mgr_base);
+	int i;
+	enum dm_pp_clocks_state low_req_clk;
+	int max_pix_clk = dce_get_max_pixel_clock_for_all_paths(context);
+
+	/* Iterate from highest supported to lowest valid state, and update
+	 * lowest RequiredState with the lowest state that satisfies
+	 * all required clocks
+	 */
+	for (i = clk_mgr_dce->max_clks_state; i >= DM_PP_CLOCKS_STATE_ULTRA_LOW; i--)
+		if (context->bw_ctx.bw.dce.dispclk_khz >
+				clk_mgr_dce->max_clks_by_state[i].display_clk_khz
+			|| max_pix_clk >
+				clk_mgr_dce->max_clks_by_state[i].pixel_clk_khz)
+			break;
+
+	low_req_clk = i + 1;
+	if (low_req_clk > clk_mgr_dce->max_clks_state) {
+		/* set max clock state for high phyclock, invalid on exceeding display clock */
+		if (clk_mgr_dce->max_clks_by_state[clk_mgr_dce->max_clks_state].display_clk_khz
+				< context->bw_ctx.bw.dce.dispclk_khz)
+			low_req_clk = DM_PP_CLOCKS_STATE_INVALID;
+		else
+			low_req_clk = clk_mgr_dce->max_clks_state;
+	}
+
+	return low_req_clk;
+}
+
+
+/* TODO: remove use the two broken down functions */
+int dce_set_clock(
+	struct clk_mgr *clk_mgr_base,
+	int requested_clk_khz)
+{
+	struct clk_mgr_internal *clk_mgr_dce = TO_CLK_MGR_INTERNAL(clk_mgr_base);
+	struct bp_pixel_clock_parameters pxl_clk_params = { 0 };
+	struct dc_bios *bp = clk_mgr_base->ctx->dc_bios;
+	int actual_clock = requested_clk_khz;
+	struct dmcu *dmcu = clk_mgr_dce->base.ctx->dc->res_pool->dmcu;
+
+	/* Make sure requested clock isn't lower than minimum threshold*/
+	if (requested_clk_khz > 0)
+		requested_clk_khz = max(requested_clk_khz,
+				clk_mgr_dce->dentist_vco_freq_khz / 64);
+
+	/* Prepare to program display clock*/
+	pxl_clk_params.target_pixel_clock_100hz = requested_clk_khz * 10;
+	pxl_clk_params.pll_id = CLOCK_SOURCE_ID_DFS;
+
+	if (clk_mgr_dce->dfs_bypass_active)
+		pxl_clk_params.flags.SET_DISPCLK_DFS_BYPASS = true;
+
+	bp->funcs->program_display_engine_pll(bp, &pxl_clk_params);
+
+	if (clk_mgr_dce->dfs_bypass_active) {
+		/* Cache the fixed display clock*/
+		clk_mgr_dce->dfs_bypass_disp_clk =
+			pxl_clk_params.dfs_bypass_display_clock;
+		actual_clock = pxl_clk_params.dfs_bypass_display_clock;
+	}
+
+	/* from power down, we need mark the clock state as ClocksStateNominal
+	 * from HWReset, so when resume we will call pplib voltage regulator.*/
+	if (requested_clk_khz == 0)
+		clk_mgr_dce->cur_min_clks_state = DM_PP_CLOCKS_STATE_NOMINAL;
+
+	if (dmcu && dmcu->funcs->is_dmcu_initialized(dmcu))
+		dmcu->funcs->set_psr_wait_loop(dmcu, actual_clock / 1000 / 7);
+
+	return actual_clock;
+}
+
+
+static void dce_clock_read_integrated_info(struct clk_mgr_internal *clk_mgr_dce)
+{
+	struct dc_debug_options *debug = &clk_mgr_dce->base.ctx->dc->debug;
+	struct dc_bios *bp = clk_mgr_dce->base.ctx->dc_bios;
+	struct integrated_info info = { { { 0 } } };
+	struct dc_firmware_info fw_info = { { 0 } };
+	int i;
+
+	if (bp->integrated_info)
+		info = *bp->integrated_info;
+
+	clk_mgr_dce->dentist_vco_freq_khz = info.dentist_vco_freq;
+	if (clk_mgr_dce->dentist_vco_freq_khz == 0) {
+		bp->funcs->get_firmware_info(bp, &fw_info);
+		clk_mgr_dce->dentist_vco_freq_khz =
+			fw_info.smu_gpu_pll_output_freq;
+		if (clk_mgr_dce->dentist_vco_freq_khz == 0)
+			clk_mgr_dce->dentist_vco_freq_khz = 3600000;
+	}
+
+	/*update the maximum display clock for each power state*/
+	for (i = 0; i < NUMBER_OF_DISP_CLK_VOLTAGE; ++i) {
+		enum dm_pp_clocks_state clk_state = DM_PP_CLOCKS_STATE_INVALID;
+
+		switch (i) {
+		case 0:
+			clk_state = DM_PP_CLOCKS_STATE_ULTRA_LOW;
+			break;
+
+		case 1:
+			clk_state = DM_PP_CLOCKS_STATE_LOW;
+			break;
+
+		case 2:
+			clk_state = DM_PP_CLOCKS_STATE_NOMINAL;
+			break;
+
+		case 3:
+			clk_state = DM_PP_CLOCKS_STATE_PERFORMANCE;
+			break;
+
+		default:
+			clk_state = DM_PP_CLOCKS_STATE_INVALID;
+			break;
+		}
+
+		/*Do not allow bad VBIOS/SBIOS to override with invalid values,
+		 * check for > 100MHz*/
+		if (info.disp_clk_voltage[i].max_supported_clk >= 100000)
+			clk_mgr_dce->max_clks_by_state[clk_state].display_clk_khz =
+				info.disp_clk_voltage[i].max_supported_clk;
+	}
+
+	if (!debug->disable_dfs_bypass && bp->integrated_info)
+		if (bp->integrated_info->gpu_cap_info & DFS_BYPASS_ENABLE)
+			clk_mgr_dce->dfs_bypass_enabled = true;
+}
+
+void dce_clock_read_ss_info(struct clk_mgr_internal *clk_mgr_dce)
+{
+	struct dc_bios *bp = clk_mgr_dce->base.ctx->dc_bios;
+	int ss_info_num = bp->funcs->get_ss_entry_number(
+			bp, AS_SIGNAL_TYPE_GPU_PLL);
+
+	if (ss_info_num) {
+		struct spread_spectrum_info info = { { 0 } };
+		enum bp_result result = bp->funcs->get_spread_spectrum_info(
+				bp, AS_SIGNAL_TYPE_GPU_PLL, 0, &info);
+
+		/* Based on VBIOS, VBIOS will keep entry for GPU PLL SS
+		 * even if SS not enabled and in that case
+		 * SSInfo.spreadSpectrumPercentage !=0 would be sign
+		 * that SS is enabled
+		 */
+		if (result == BP_RESULT_OK &&
+				info.spread_spectrum_percentage != 0) {
+			clk_mgr_dce->ss_on_dprefclk = true;
+			clk_mgr_dce->dprefclk_ss_divider = info.spread_percentage_divider;
+
+			if (info.type.CENTER_MODE == 0) {
+				/* TODO: Currently for DP Reference clock we
+				 * need only SS percentage for
+				 * downspread */
+				clk_mgr_dce->dprefclk_ss_percentage =
+						info.spread_spectrum_percentage;
+			}
+
+			return;
+		}
+
+		result = bp->funcs->get_spread_spectrum_info(
+				bp, AS_SIGNAL_TYPE_DISPLAY_PORT, 0, &info);
+
+		/* Based on VBIOS, VBIOS will keep entry for DPREFCLK SS
+		 * even if SS not enabled and in that case
+		 * SSInfo.spreadSpectrumPercentage !=0 would be sign
+		 * that SS is enabled
+		 */
+		if (result == BP_RESULT_OK &&
+				info.spread_spectrum_percentage != 0) {
+			clk_mgr_dce->ss_on_dprefclk = true;
+			clk_mgr_dce->dprefclk_ss_divider = info.spread_percentage_divider;
+
+			if (info.type.CENTER_MODE == 0) {
+				/* Currently for DP Reference clock we
+				 * need only SS percentage for
+				 * downspread */
+				clk_mgr_dce->dprefclk_ss_percentage =
+						info.spread_spectrum_percentage;
+			}
+		}
+	}
+}
+
+static void dce_pplib_apply_display_requirements(
+	struct dc *dc,
+	struct dc_state *context)
+{
+	struct dm_pp_display_configuration *pp_display_cfg = &context->pp_display_cfg;
+
+	pp_display_cfg->avail_mclk_switch_time_us = dce110_get_min_vblank_time_us(context);
+
+	dce110_fill_display_configs(context, pp_display_cfg);
+
+	if (memcmp(&dc->current_state->pp_display_cfg, pp_display_cfg, sizeof(*pp_display_cfg)) !=  0)
+		dm_pp_apply_display_requirements(dc->ctx, pp_display_cfg);
+}
+
+static void dce_update_clocks(struct clk_mgr *clk_mgr_base,
+			struct dc_state *context,
+			bool safe_to_lower)
+{
+	struct clk_mgr_internal *clk_mgr_dce = TO_CLK_MGR_INTERNAL(clk_mgr_base);
+	struct dm_pp_power_level_change_request level_change_req;
+	int patched_disp_clk = context->bw_ctx.bw.dce.dispclk_khz;
+
+	/*TODO: W/A for dal3 linux, investigate why this works */
+	if (!clk_mgr_dce->dfs_bypass_active)
+		patched_disp_clk = patched_disp_clk * 115 / 100;
+
+	level_change_req.power_level = dce_get_required_clocks_state(clk_mgr_base, context);
+	/* get max clock state from PPLIB */
+	if ((level_change_req.power_level < clk_mgr_dce->cur_min_clks_state && safe_to_lower)
+			|| level_change_req.power_level > clk_mgr_dce->cur_min_clks_state) {
+		if (dm_pp_apply_power_level_change_request(clk_mgr_base->ctx, &level_change_req))
+			clk_mgr_dce->cur_min_clks_state = level_change_req.power_level;
+	}
+
+	if (should_set_clock(safe_to_lower, patched_disp_clk, clk_mgr_base->clks.dispclk_khz)) {
+		patched_disp_clk = dce_set_clock(clk_mgr_base, patched_disp_clk);
+		clk_mgr_base->clks.dispclk_khz = patched_disp_clk;
+	}
+	dce_pplib_apply_display_requirements(clk_mgr_base->ctx->dc, context);
+}
+
+
+
+
+
+
+
+
+static struct clk_mgr_funcs dce_funcs = {
+	.get_dp_ref_clk_frequency = dce_get_dp_ref_freq_khz,
+	.update_clocks = dce_update_clocks
+};
+
+void dce_clk_mgr_construct(
+		struct dc_context *ctx,
+		struct clk_mgr_internal *clk_mgr)
+{
+	struct clk_mgr *base = &clk_mgr->base;
+	struct dm_pp_static_clock_info static_clk_info = {0};
+
+	memcpy(clk_mgr->max_clks_by_state,
+		dce80_max_clks_by_state,
+		sizeof(dce80_max_clks_by_state));
+
+	base->ctx = ctx;
+	base->funcs = &dce_funcs;
+
+	clk_mgr->regs = &disp_clk_regs;
+	clk_mgr->clk_mgr_shift = &disp_clk_shift;
+	clk_mgr->clk_mgr_mask = &disp_clk_mask;
+	clk_mgr->dfs_bypass_disp_clk = 0;
+
+	clk_mgr->dprefclk_ss_percentage = 0;
+	clk_mgr->dprefclk_ss_divider = 1000;
+	clk_mgr->ss_on_dprefclk = false;
+
+	if (dm_pp_get_static_clocks(ctx, &static_clk_info))
+		clk_mgr->max_clks_state = static_clk_info.max_clocks_state;
+	else
+		clk_mgr->max_clks_state = DM_PP_CLOCKS_STATE_NOMINAL;
+	clk_mgr->cur_min_clks_state = DM_PP_CLOCKS_STATE_INVALID;
+
+	dce_clock_read_integrated_info(clk_mgr);
+	dce_clock_read_ss_info(clk_mgr);
+}
+
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dce100/dce_clk_mgr.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce100/dce_clk_mgr.h
new file mode 100644
index 000000000000..f3bc7ab68aab
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce100/dce_clk_mgr.h
@@ -0,0 +1,81 @@
+/*
+ * Copyright 2012-16 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+
+#ifndef _DCE_CLK_MGR_H_
+#define _DCE_CLK_MGR_H_
+
+#include "dc.h"
+
+/* Starting DID for each range */
+enum dentist_base_divider_id {
+	DENTIST_BASE_DID_1 = 0x08,
+	DENTIST_BASE_DID_2 = 0x40,
+	DENTIST_BASE_DID_3 = 0x60,
+	DENTIST_BASE_DID_4 = 0x7e,
+	DENTIST_MAX_DID = 0x7f
+};
+
+/* Starting point and step size for each divider range.*/
+enum dentist_divider_range {
+	DENTIST_DIVIDER_RANGE_1_START = 8,   /* 2.00  */
+	DENTIST_DIVIDER_RANGE_1_STEP  = 1,   /* 0.25  */
+	DENTIST_DIVIDER_RANGE_2_START = 64,  /* 16.00 */
+	DENTIST_DIVIDER_RANGE_2_STEP  = 2,   /* 0.50  */
+	DENTIST_DIVIDER_RANGE_3_START = 128, /* 32.00 */
+	DENTIST_DIVIDER_RANGE_3_STEP  = 4,   /* 1.00  */
+	DENTIST_DIVIDER_RANGE_4_START = 248, /* 62.00 */
+	DENTIST_DIVIDER_RANGE_4_STEP  = 264, /* 66.00 */
+	DENTIST_DIVIDER_RANGE_SCALE_FACTOR = 4
+};
+
+/* functions shared by other dce clk mgrs */
+int dce_adjust_dp_ref_freq_for_ss(struct clk_mgr_internal *clk_mgr_dce, int dp_ref_clk_khz);
+int dce_get_dp_ref_freq_khz(struct clk_mgr *clk_mgr_base);
+enum dm_pp_clocks_state dce_get_required_clocks_state(
+	struct clk_mgr *clk_mgr_base,
+	struct dc_state *context);
+
+uint32_t dce_get_max_pixel_clock_for_all_paths(struct dc_state *context);
+
+
+void dce_clk_mgr_construct(
+		struct dc_context *ctx,
+		struct clk_mgr_internal *clk_mgr_dce);
+
+void dce_clock_read_ss_info(struct clk_mgr_internal *dccg_dce);
+
+int dce12_get_dp_ref_freq_khz(struct clk_mgr *dccg);
+
+int dce_set_clock(
+	struct clk_mgr *clk_mgr_base,
+	int requested_clk_khz);
+
+
+void dce_clk_mgr_destroy(struct clk_mgr **clk_mgr);
+
+int dentist_get_divider_from_did(int did);
+
+#endif /* _DCE_CLK_MGR_H_ */
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dce110/dce110_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce110/dce110_clk_mgr.c
new file mode 100644
index 000000000000..c1a92c16535c
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce110/dce110_clk_mgr.c
@@ -0,0 +1,276 @@
+/*
+ * Copyright 2012-16 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#include "core_types.h"
+#include "clk_mgr_internal.h"
+
+#include "dce/dce_11_0_d.h"
+#include "dce/dce_11_0_sh_mask.h"
+#include "dce110_clk_mgr.h"
+#include "../clk_mgr/dce100/dce_clk_mgr.h"
+
+/* set register offset */
+#define SR(reg_name)\
+	.reg_name = mm ## reg_name
+
+/* set register offset with instance */
+#define SRI(reg_name, block, id)\
+	.reg_name = mm ## block ## id ## _ ## reg_name
+
+static const struct clk_mgr_registers disp_clk_regs = {
+		CLK_COMMON_REG_LIST_DCE_BASE()
+};
+
+static const struct clk_mgr_shift disp_clk_shift = {
+		CLK_COMMON_MASK_SH_LIST_DCE_COMMON_BASE(__SHIFT)
+};
+
+static const struct clk_mgr_mask disp_clk_mask = {
+		CLK_COMMON_MASK_SH_LIST_DCE_COMMON_BASE(_MASK)
+};
+
+static const struct state_dependent_clocks dce110_max_clks_by_state[] = {
+/*ClocksStateInvalid - should not be used*/
+{ .display_clk_khz = 0, .pixel_clk_khz = 0 },
+/*ClocksStateUltraLow - currently by HW design team not supposed to be used*/
+{ .display_clk_khz = 352000, .pixel_clk_khz = 330000 },
+/*ClocksStateLow*/
+{ .display_clk_khz = 352000, .pixel_clk_khz = 330000 },
+/*ClocksStateNominal*/
+{ .display_clk_khz = 467000, .pixel_clk_khz = 400000 },
+/*ClocksStatePerformance*/
+{ .display_clk_khz = 643000, .pixel_clk_khz = 400000 } };
+
+static int determine_sclk_from_bounding_box(
+		const struct dc *dc,
+		int required_sclk)
+{
+	int i;
+
+	/*
+	 * Some asics do not give us sclk levels, so we just report the actual
+	 * required sclk
+	 */
+	if (dc->sclk_lvls.num_levels == 0)
+		return required_sclk;
+
+	for (i = 0; i < dc->sclk_lvls.num_levels; i++) {
+		if (dc->sclk_lvls.clocks_in_khz[i] >= required_sclk)
+			return dc->sclk_lvls.clocks_in_khz[i];
+	}
+	/*
+	 * even maximum level could not satisfy requirement, this
+	 * is unexpected at this stage, should have been caught at
+	 * validation time
+	 */
+	ASSERT(0);
+	return dc->sclk_lvls.clocks_in_khz[dc->sclk_lvls.num_levels - 1];
+}
+
+uint32_t dce110_get_min_vblank_time_us(const struct dc_state *context)
+{
+	uint8_t j;
+	uint32_t min_vertical_blank_time = -1;
+
+	for (j = 0; j < context->stream_count; j++) {
+		struct dc_stream_state *stream = context->streams[j];
+		uint32_t vertical_blank_in_pixels = 0;
+		uint32_t vertical_blank_time = 0;
+
+		vertical_blank_in_pixels = stream->timing.h_total *
+			(stream->timing.v_total
+			 - stream->timing.v_addressable);
+
+		vertical_blank_time = vertical_blank_in_pixels
+			* 10000 / stream->timing.pix_clk_100hz;
+
+		if (min_vertical_blank_time > vertical_blank_time)
+			min_vertical_blank_time = vertical_blank_time;
+	}
+
+	return min_vertical_blank_time;
+}
+
+void dce110_fill_display_configs(
+	const struct dc_state *context,
+	struct dm_pp_display_configuration *pp_display_cfg)
+{
+	int j;
+	int num_cfgs = 0;
+
+	for (j = 0; j < context->stream_count; j++) {
+		int k;
+
+		const struct dc_stream_state *stream = context->streams[j];
+		struct dm_pp_single_disp_config *cfg =
+			&pp_display_cfg->disp_configs[num_cfgs];
+		const struct pipe_ctx *pipe_ctx = NULL;
+
+		for (k = 0; k < MAX_PIPES; k++)
+			if (stream == context->res_ctx.pipe_ctx[k].stream) {
+				pipe_ctx = &context->res_ctx.pipe_ctx[k];
+				break;
+			}
+
+		ASSERT(pipe_ctx != NULL);
+
+		/* only notify active stream */
+		if (stream->dpms_off)
+			continue;
+
+		num_cfgs++;
+		cfg->signal = pipe_ctx->stream->signal;
+		cfg->pipe_idx = pipe_ctx->stream_res.tg->inst;
+		cfg->src_height = stream->src.height;
+		cfg->src_width = stream->src.width;
+		cfg->ddi_channel_mapping =
+			stream->link->ddi_channel_mapping.raw;
+		cfg->transmitter =
+			stream->link->link_enc->transmitter;
+		cfg->link_settings.lane_count =
+			stream->link->cur_link_settings.lane_count;
+		cfg->link_settings.link_rate =
+			stream->link->cur_link_settings.link_rate;
+		cfg->link_settings.link_spread =
+			stream->link->cur_link_settings.link_spread;
+		cfg->sym_clock = stream->phy_pix_clk;
+		/* Round v_refresh*/
+		cfg->v_refresh = stream->timing.pix_clk_100hz * 100;
+		cfg->v_refresh /= stream->timing.h_total;
+		cfg->v_refresh = (cfg->v_refresh + stream->timing.v_total / 2)
+							/ stream->timing.v_total;
+	}
+
+	pp_display_cfg->display_count = num_cfgs;
+}
+
+void dce11_pplib_apply_display_requirements(
+	struct dc *dc,
+	struct dc_state *context)
+{
+	struct dm_pp_display_configuration *pp_display_cfg = &context->pp_display_cfg;
+
+	pp_display_cfg->all_displays_in_sync =
+		context->bw_ctx.bw.dce.all_displays_in_sync;
+	pp_display_cfg->nb_pstate_switch_disable =
+			context->bw_ctx.bw.dce.nbp_state_change_enable == false;
+	pp_display_cfg->cpu_cc6_disable =
+			context->bw_ctx.bw.dce.cpuc_state_change_enable == false;
+	pp_display_cfg->cpu_pstate_disable =
+			context->bw_ctx.bw.dce.cpup_state_change_enable == false;
+	pp_display_cfg->cpu_pstate_separation_time =
+			context->bw_ctx.bw.dce.blackout_recovery_time_us;
+
+	pp_display_cfg->min_memory_clock_khz = context->bw_ctx.bw.dce.yclk_khz
+		/ MEMORY_TYPE_MULTIPLIER_CZ;
+
+	pp_display_cfg->min_engine_clock_khz = determine_sclk_from_bounding_box(
+			dc,
+			context->bw_ctx.bw.dce.sclk_khz);
+
+	/*
+	 * As workaround for >4x4K lightup set dcfclock to min_engine_clock value.
+	 * This is not required for less than 5 displays,
+	 * thus don't request decfclk in dc to avoid impact
+	 * on power saving.
+	 *
+	 */
+	pp_display_cfg->min_dcfclock_khz = (context->stream_count > 4) ?
+			pp_display_cfg->min_engine_clock_khz : 0;
+
+	pp_display_cfg->min_engine_clock_deep_sleep_khz
+			= context->bw_ctx.bw.dce.sclk_deep_sleep_khz;
+
+	pp_display_cfg->avail_mclk_switch_time_us =
+						dce110_get_min_vblank_time_us(context);
+	/* TODO: dce11.2*/
+	pp_display_cfg->avail_mclk_switch_time_in_disp_active_us = 0;
+
+	pp_display_cfg->disp_clk_khz = dc->clk_mgr->clks.dispclk_khz;
+
+	dce110_fill_display_configs(context, pp_display_cfg);
+
+	/* TODO: is this still applicable?*/
+	if (pp_display_cfg->display_count == 1) {
+		const struct dc_crtc_timing *timing =
+			&context->streams[0]->timing;
+
+		pp_display_cfg->crtc_index =
+			pp_display_cfg->disp_configs[0].pipe_idx;
+		pp_display_cfg->line_time_in_us = timing->h_total * 10000 / timing->pix_clk_100hz;
+	}
+
+	if (memcmp(&dc->current_state->pp_display_cfg, pp_display_cfg, sizeof(*pp_display_cfg)) !=  0)
+		dm_pp_apply_display_requirements(dc->ctx, pp_display_cfg);
+}
+
+static void dce11_update_clocks(struct clk_mgr *clk_mgr_base,
+			struct dc_state *context,
+			bool safe_to_lower)
+{
+	struct clk_mgr_internal *clk_mgr_dce = TO_CLK_MGR_INTERNAL(clk_mgr_base);
+	struct dm_pp_power_level_change_request level_change_req;
+	int patched_disp_clk = context->bw_ctx.bw.dce.dispclk_khz;
+
+	/*TODO: W/A for dal3 linux, investigate why this works */
+	if (!clk_mgr_dce->dfs_bypass_active)
+		patched_disp_clk = patched_disp_clk * 115 / 100;
+
+	level_change_req.power_level = dce_get_required_clocks_state(clk_mgr_base, context);
+	/* get max clock state from PPLIB */
+	if ((level_change_req.power_level < clk_mgr_dce->cur_min_clks_state && safe_to_lower)
+			|| level_change_req.power_level > clk_mgr_dce->cur_min_clks_state) {
+		if (dm_pp_apply_power_level_change_request(clk_mgr_base->ctx, &level_change_req))
+			clk_mgr_dce->cur_min_clks_state = level_change_req.power_level;
+	}
+
+	if (should_set_clock(safe_to_lower, patched_disp_clk, clk_mgr_base->clks.dispclk_khz)) {
+		context->bw_ctx.bw.dce.dispclk_khz = dce_set_clock(clk_mgr_base, patched_disp_clk);
+		clk_mgr_base->clks.dispclk_khz = patched_disp_clk;
+	}
+	dce11_pplib_apply_display_requirements(clk_mgr_base->ctx->dc, context);
+}
+
+static struct clk_mgr_funcs dce110_funcs = {
+	.get_dp_ref_clk_frequency = dce_get_dp_ref_freq_khz,
+	.update_clocks = dce11_update_clocks
+};
+
+void dce110_clk_mgr_construct(
+		struct dc_context *ctx,
+		struct clk_mgr_internal *clk_mgr)
+{
+	memcpy(clk_mgr->max_clks_by_state,
+		dce110_max_clks_by_state,
+		sizeof(dce110_max_clks_by_state));
+
+	dce_clk_mgr_construct(ctx, clk_mgr);
+
+	clk_mgr->regs = &disp_clk_regs;
+	clk_mgr->clk_mgr_shift = &disp_clk_shift;
+	clk_mgr->clk_mgr_mask = &disp_clk_mask;
+	clk_mgr->base.funcs = &dce110_funcs;
+
+}
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dce110/dce110_clk_mgr.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce110/dce110_clk_mgr.h
new file mode 100644
index 000000000000..c0eb2ea6fb3a
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce110/dce110_clk_mgr.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright 2012-16 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef DAL_DC_DCE_DCE110_CLK_MGR_H_
+#define DAL_DC_DCE_DCE110_CLK_MGR_H_
+
+void dce110_clk_mgr_construct(
+		struct dc_context *ctx,
+		struct clk_mgr_internal *clk_mgr);
+
+void dce110_fill_display_configs(
+	const struct dc_state *context,
+	struct dm_pp_display_configuration *pp_display_cfg);
+
+/* functions shared with other clk mgr*/
+void dce11_pplib_apply_display_requirements(
+	struct dc *dc,
+	struct dc_state *context);
+
+uint32_t dce110_get_min_vblank_time_us(const struct dc_state *context);
+
+#endif /* DAL_DC_DCE_DCE110_CLK_MGR_H_ */
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dce112/dce112_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce112/dce112_clk_mgr.c
new file mode 100644
index 000000000000..778392c73187
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce112/dce112_clk_mgr.c
@@ -0,0 +1,239 @@
+/*
+ * Copyright 2012-16 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#include "core_types.h"
+#include "clk_mgr_internal.h"
+
+#include "dce/dce_11_2_d.h"
+#include "dce/dce_11_2_sh_mask.h"
+#include "dce100/dce_clk_mgr.h"
+#include "dce110/dce110_clk_mgr.h"
+#include "dce112_clk_mgr.h"
+#include "dal_asic_id.h"
+
+/* set register offset */
+#define SR(reg_name)\
+	.reg_name = mm ## reg_name
+
+/* set register offset with instance */
+#define SRI(reg_name, block, id)\
+	.reg_name = mm ## block ## id ## _ ## reg_name
+
+static const struct clk_mgr_registers disp_clk_regs = {
+		CLK_COMMON_REG_LIST_DCE_BASE()
+};
+
+static const struct clk_mgr_shift disp_clk_shift = {
+		CLK_COMMON_MASK_SH_LIST_DCE_COMMON_BASE(__SHIFT)
+};
+
+static const struct clk_mgr_mask disp_clk_mask = {
+		CLK_COMMON_MASK_SH_LIST_DCE_COMMON_BASE(_MASK)
+};
+
+static const struct state_dependent_clocks dce112_max_clks_by_state[] = {
+/*ClocksStateInvalid - should not be used*/
+{ .display_clk_khz = 0, .pixel_clk_khz = 0 },
+/*ClocksStateUltraLow - currently by HW design team not supposed to be used*/
+{ .display_clk_khz = 389189, .pixel_clk_khz = 346672 },
+/*ClocksStateLow*/
+{ .display_clk_khz = 459000, .pixel_clk_khz = 400000 },
+/*ClocksStateNominal*/
+{ .display_clk_khz = 667000, .pixel_clk_khz = 600000 },
+/*ClocksStatePerformance*/
+{ .display_clk_khz = 1132000, .pixel_clk_khz = 600000 } };
+
+
+//TODO: remove use the two broken down functions
+int dce112_set_clock(struct clk_mgr *clk_mgr_base, int requested_clk_khz)
+{
+	struct clk_mgr_internal *clk_mgr_dce = TO_CLK_MGR_INTERNAL(clk_mgr_base);
+	struct bp_set_dce_clock_parameters dce_clk_params;
+	struct dc_bios *bp = clk_mgr_base->ctx->dc_bios;
+	struct dc *core_dc = clk_mgr_base->ctx->dc;
+	struct dmcu *dmcu = core_dc->res_pool->dmcu;
+	int actual_clock = requested_clk_khz;
+	/* Prepare to program display clock*/
+	memset(&dce_clk_params, 0, sizeof(dce_clk_params));
+
+	/* Make sure requested clock isn't lower than minimum threshold*/
+	if (requested_clk_khz > 0)
+		requested_clk_khz = max(requested_clk_khz,
+				clk_mgr_dce->dentist_vco_freq_khz / 62);
+
+	dce_clk_params.target_clock_frequency = requested_clk_khz;
+	dce_clk_params.pll_id = CLOCK_SOURCE_ID_DFS;
+	dce_clk_params.clock_type = DCECLOCK_TYPE_DISPLAY_CLOCK;
+
+	bp->funcs->set_dce_clock(bp, &dce_clk_params);
+	actual_clock = dce_clk_params.target_clock_frequency;
+
+	/*
+	 * from power down, we need mark the clock state as ClocksStateNominal
+	 * from HWReset, so when resume we will call pplib voltage regulator.
+	 */
+	if (requested_clk_khz == 0)
+		clk_mgr_dce->cur_min_clks_state = DM_PP_CLOCKS_STATE_NOMINAL;
+
+	/*Program DP ref Clock*/
+	/*VBIOS will determine DPREFCLK frequency, so we don't set it*/
+	dce_clk_params.target_clock_frequency = 0;
+	dce_clk_params.clock_type = DCECLOCK_TYPE_DPREFCLK;
+	if (!ASICREV_IS_VEGA20_P(clk_mgr_base->ctx->asic_id.hw_internal_rev))
+		dce_clk_params.flags.USE_GENLOCK_AS_SOURCE_FOR_DPREFCLK =
+			(dce_clk_params.pll_id ==
+					CLOCK_SOURCE_COMBO_DISPLAY_PLL0);
+	else
+		dce_clk_params.flags.USE_GENLOCK_AS_SOURCE_FOR_DPREFCLK = false;
+
+	bp->funcs->set_dce_clock(bp, &dce_clk_params);
+
+	if (!IS_FPGA_MAXIMUS_DC(core_dc->ctx->dce_environment)) {
+		if (dmcu && dmcu->funcs->is_dmcu_initialized(dmcu)) {
+			if (clk_mgr_dce->dfs_bypass_disp_clk != actual_clock)
+				dmcu->funcs->set_psr_wait_loop(dmcu,
+						actual_clock / 1000 / 7);
+		}
+	}
+
+	clk_mgr_dce->dfs_bypass_disp_clk = actual_clock;
+	return actual_clock;
+}
+
+int dce112_set_dispclk(struct clk_mgr_internal *clk_mgr, int requested_clk_khz)
+{
+	struct bp_set_dce_clock_parameters dce_clk_params;
+	struct dc_bios *bp = clk_mgr->base.ctx->dc_bios;
+	struct dc *core_dc = clk_mgr->base.ctx->dc;
+	struct dmcu *dmcu = core_dc->res_pool->dmcu;
+	int actual_clock = requested_clk_khz;
+	/* Prepare to program display clock*/
+	memset(&dce_clk_params, 0, sizeof(dce_clk_params));
+
+	/* Make sure requested clock isn't lower than minimum threshold*/
+	if (requested_clk_khz > 0)
+		requested_clk_khz = max(requested_clk_khz,
+				clk_mgr->dentist_vco_freq_khz / 62);
+
+	dce_clk_params.target_clock_frequency = requested_clk_khz;
+	dce_clk_params.pll_id = CLOCK_SOURCE_ID_DFS;
+	dce_clk_params.clock_type = DCECLOCK_TYPE_DISPLAY_CLOCK;
+
+	bp->funcs->set_dce_clock(bp, &dce_clk_params);
+	actual_clock = dce_clk_params.target_clock_frequency;
+
+	/*
+	 * from power down, we need mark the clock state as ClocksStateNominal
+	 * from HWReset, so when resume we will call pplib voltage regulator.
+	 */
+	if (requested_clk_khz == 0)
+		clk_mgr->cur_min_clks_state = DM_PP_CLOCKS_STATE_NOMINAL;
+
+
+	if (!IS_FPGA_MAXIMUS_DC(core_dc->ctx->dce_environment)) {
+		if (dmcu && dmcu->funcs->is_dmcu_initialized(dmcu)) {
+			if (clk_mgr->dfs_bypass_disp_clk != actual_clock)
+				dmcu->funcs->set_psr_wait_loop(dmcu,
+						actual_clock / 1000 / 7);
+		}
+	}
+
+	clk_mgr->dfs_bypass_disp_clk = actual_clock;
+	return actual_clock;
+
+}
+
+int dce112_set_dprefclk(struct clk_mgr_internal *clk_mgr)
+{
+	struct bp_set_dce_clock_parameters dce_clk_params;
+	struct dc_bios *bp = clk_mgr->base.ctx->dc_bios;
+
+	memset(&dce_clk_params, 0, sizeof(dce_clk_params));
+
+	/*Program DP ref Clock*/
+	/*VBIOS will determine DPREFCLK frequency, so we don't set it*/
+	dce_clk_params.target_clock_frequency = 0;
+	dce_clk_params.pll_id = CLOCK_SOURCE_ID_DFS;
+	dce_clk_params.clock_type = DCECLOCK_TYPE_DPREFCLK;
+	if (!ASICREV_IS_VEGA20_P(clk_mgr->base.ctx->asic_id.hw_internal_rev))
+		dce_clk_params.flags.USE_GENLOCK_AS_SOURCE_FOR_DPREFCLK =
+			(dce_clk_params.pll_id ==
+					CLOCK_SOURCE_COMBO_DISPLAY_PLL0);
+	else
+		dce_clk_params.flags.USE_GENLOCK_AS_SOURCE_FOR_DPREFCLK = false;
+
+	bp->funcs->set_dce_clock(bp, &dce_clk_params);
+
+	/* Returns the dp_refclk that was set */
+	return dce_clk_params.target_clock_frequency;
+}
+
+static void dce112_update_clocks(struct clk_mgr *clk_mgr_base,
+			struct dc_state *context,
+			bool safe_to_lower)
+{
+	struct clk_mgr_internal *clk_mgr_dce = TO_CLK_MGR_INTERNAL(clk_mgr_base);
+	struct dm_pp_power_level_change_request level_change_req;
+	int patched_disp_clk = context->bw_ctx.bw.dce.dispclk_khz;
+
+	/*TODO: W/A for dal3 linux, investigate why this works */
+	if (!clk_mgr_dce->dfs_bypass_active)
+		patched_disp_clk = patched_disp_clk * 115 / 100;
+
+	level_change_req.power_level = dce_get_required_clocks_state(clk_mgr_base, context);
+	/* get max clock state from PPLIB */
+	if ((level_change_req.power_level < clk_mgr_dce->cur_min_clks_state && safe_to_lower)
+			|| level_change_req.power_level > clk_mgr_dce->cur_min_clks_state) {
+		if (dm_pp_apply_power_level_change_request(clk_mgr_base->ctx, &level_change_req))
+			clk_mgr_dce->cur_min_clks_state = level_change_req.power_level;
+	}
+
+	if (should_set_clock(safe_to_lower, patched_disp_clk, clk_mgr_base->clks.dispclk_khz)) {
+		patched_disp_clk = dce112_set_clock(clk_mgr_base, patched_disp_clk);
+		clk_mgr_base->clks.dispclk_khz = patched_disp_clk;
+	}
+	dce11_pplib_apply_display_requirements(clk_mgr_base->ctx->dc, context);
+}
+
+static struct clk_mgr_funcs dce112_funcs = {
+	.get_dp_ref_clk_frequency = dce_get_dp_ref_freq_khz,
+	.update_clocks = dce112_update_clocks
+};
+
+void dce112_clk_mgr_construct(
+		struct dc_context *ctx,
+		struct clk_mgr_internal *clk_mgr)
+{
+	memcpy(clk_mgr->max_clks_by_state,
+		dce112_max_clks_by_state,
+		sizeof(dce112_max_clks_by_state));
+
+	dce_clk_mgr_construct(ctx, clk_mgr);
+
+	clk_mgr->regs = &disp_clk_regs;
+	clk_mgr->clk_mgr_shift = &disp_clk_shift;
+	clk_mgr->clk_mgr_mask = &disp_clk_mask;
+	clk_mgr->base.funcs = &dce112_funcs;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dce112/dce112_clk_mgr.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce112/dce112_clk_mgr.h
new file mode 100644
index 000000000000..dfb06db118e1
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce112/dce112_clk_mgr.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright 2012-16 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef DAL_DC_DCE_DCE112_CLK_MGR_H_
+#define DAL_DC_DCE_DCE112_CLK_MGR_H_
+
+
+void dce112_clk_mgr_construct(
+		struct dc_context *ctx,
+		struct clk_mgr_internal *clk_mgr);
+
+/* functions shared with other clk mgr */
+int dce112_set_clock(struct clk_mgr *clk_mgr_base, int requested_clk_khz);
+int dce112_set_dispclk(struct clk_mgr_internal *clk_mgr, int requested_clk_khz);
+int dce112_set_dprefclk(struct clk_mgr_internal *clk_mgr);
+
+#endif /* DAL_DC_DCE_DCE112_CLK_MGR_H_ */
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dce120/dce120_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce120/dce120_clk_mgr.c
new file mode 100644
index 000000000000..08f2e253ccb0
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce120/dce120_clk_mgr.c
@@ -0,0 +1,145 @@
+/*
+ * Copyright 2012-16 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#include "core_types.h"
+#include "clk_mgr_internal.h"
+
+#include "dce112/dce112_clk_mgr.h"
+#include "dce110/dce110_clk_mgr.h"
+#include "dce120_clk_mgr.h"
+#include "dce100/dce_clk_mgr.h"
+
+static const struct state_dependent_clocks dce120_max_clks_by_state[] = {
+/*ClocksStateInvalid - should not be used*/
+{ .display_clk_khz = 0, .pixel_clk_khz = 0 },
+/*ClocksStateUltraLow - currently by HW design team not supposed to be used*/
+{ .display_clk_khz = 0, .pixel_clk_khz = 0 },
+/*ClocksStateLow*/
+{ .display_clk_khz = 460000, .pixel_clk_khz = 400000 },
+/*ClocksStateNominal*/
+{ .display_clk_khz = 670000, .pixel_clk_khz = 600000 },
+/*ClocksStatePerformance*/
+{ .display_clk_khz = 1133000, .pixel_clk_khz = 600000 } };
+
+/**
+ * dce121_clock_patch_xgmi_ss_info() - Save XGMI spread spectrum info
+ * @clk_mgr: clock manager base structure
+ *
+ * Reads from VBIOS the XGMI spread spectrum info and saves it within
+ * the dce clock manager. This operation will overwrite the existing dprefclk
+ * SS values if the vBIOS query succeeds. Otherwise, it does nothing. It also
+ * sets the ->xgmi_enabled flag.
+ */
+void dce121_clock_patch_xgmi_ss_info(struct clk_mgr *clk_mgr_base)
+{
+	struct clk_mgr_internal *clk_mgr_dce = TO_CLK_MGR_INTERNAL(clk_mgr_base);
+	enum bp_result result;
+	struct spread_spectrum_info info = { { 0 } };
+	struct dc_bios *bp = clk_mgr_dce->base.ctx->dc_bios;
+
+	clk_mgr_dce->xgmi_enabled = false;
+
+	result = bp->funcs->get_spread_spectrum_info(bp, AS_SIGNAL_TYPE_XGMI,
+						     0, &info);
+	if (result == BP_RESULT_OK && info.spread_spectrum_percentage != 0) {
+		clk_mgr_dce->xgmi_enabled = true;
+		clk_mgr_dce->ss_on_dprefclk = true;
+		clk_mgr_dce->dprefclk_ss_divider =
+				info.spread_percentage_divider;
+
+		if (info.type.CENTER_MODE == 0) {
+			/*
+			 * Currently for DP Reference clock we
+			 * need only SS percentage for
+			 * downspread
+			 */
+			clk_mgr_dce->dprefclk_ss_percentage =
+					info.spread_spectrum_percentage;
+		}
+	}
+}
+
+static void dce12_update_clocks(struct clk_mgr *clk_mgr_base,
+			struct dc_state *context,
+			bool safe_to_lower)
+{
+	struct clk_mgr_internal *clk_mgr_dce = TO_CLK_MGR_INTERNAL(clk_mgr_base);
+	struct dm_pp_clock_for_voltage_req clock_voltage_req = {0};
+	int max_pix_clk = dce_get_max_pixel_clock_for_all_paths(context);
+	int patched_disp_clk = context->bw_ctx.bw.dce.dispclk_khz;
+
+	/*TODO: W/A for dal3 linux, investigate why this works */
+	if (!clk_mgr_dce->dfs_bypass_active)
+		patched_disp_clk = patched_disp_clk * 115 / 100;
+
+	if (should_set_clock(safe_to_lower, patched_disp_clk, clk_mgr_base->clks.dispclk_khz)) {
+		clock_voltage_req.clk_type = DM_PP_CLOCK_TYPE_DISPLAY_CLK;
+		/*
+		 * When xGMI is enabled, the display clk needs to be adjusted
+		 * with the WAFL link's SS percentage.
+		 */
+		if (clk_mgr_dce->xgmi_enabled)
+			patched_disp_clk = dce_adjust_dp_ref_freq_for_ss(
+					clk_mgr_dce, patched_disp_clk);
+		clock_voltage_req.clocks_in_khz = patched_disp_clk;
+		clk_mgr_base->clks.dispclk_khz = dce112_set_clock(clk_mgr_base, patched_disp_clk);
+
+		dm_pp_apply_clock_for_voltage_request(clk_mgr_base->ctx, &clock_voltage_req);
+	}
+
+	if (should_set_clock(safe_to_lower, max_pix_clk, clk_mgr_base->clks.phyclk_khz)) {
+		clock_voltage_req.clk_type = DM_PP_CLOCK_TYPE_DISPLAYPHYCLK;
+		clock_voltage_req.clocks_in_khz = max_pix_clk;
+		clk_mgr_base->clks.phyclk_khz = max_pix_clk;
+
+		dm_pp_apply_clock_for_voltage_request(clk_mgr_base->ctx, &clock_voltage_req);
+	}
+	dce11_pplib_apply_display_requirements(clk_mgr_base->ctx->dc, context);
+}
+
+
+static struct clk_mgr_funcs dce120_funcs = {
+	.get_dp_ref_clk_frequency = dce12_get_dp_ref_freq_khz,
+	.update_clocks = dce12_update_clocks
+};
+
+void dce120_clk_mgr_construct(struct dc_context *ctx, struct clk_mgr_internal *clk_mgr)
+{
+	memcpy(clk_mgr->max_clks_by_state,
+		dce120_max_clks_by_state,
+		sizeof(dce120_max_clks_by_state));
+
+	dce_clk_mgr_construct(ctx, clk_mgr);
+
+	clk_mgr->base.dprefclk_khz = 600000;
+	clk_mgr->base.funcs = &dce120_funcs;
+}
+
+void dce121_clk_mgr_construct(struct dc_context *ctx, struct clk_mgr_internal *clk_mgr)
+{
+	dce120_clk_mgr_construct(ctx, clk_mgr);
+	clk_mgr->base.dprefclk_khz = 625000;
+}
+
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dce120/dce120_clk_mgr.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce120/dce120_clk_mgr.h
new file mode 100644
index 000000000000..d12d6fcb167d
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce120/dce120_clk_mgr.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright 2012-16 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef DAL_DC_DCE_DCE120_CLK_MGR_H_
+#define DAL_DC_DCE_DCE120_CLK_MGR_H_
+
+void dce120_clk_mgr_construct(struct dc_context *ctx, struct clk_mgr_internal *clk_mgr);
+void dce121_clk_mgr_construct(struct dc_context *ctx, struct clk_mgr_internal *clk_mgr);
+
+
+
+#endif /* DAL_DC_DCE_DCE120_CLK_MGR_H_ */
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr.c
index 2b2de1d913c9..04b12bb2243d 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr.c
@@ -23,32 +23,23 @@
  *
  */
 
-#include "dcn10_clk_mgr.h"
+#include <linux/slab.h>
 
 #include "reg_helper.h"
 #include "core_types.h"
-
-#define TO_DCE_CLK_MGR(clocks)\
-	container_of(clocks, struct dce_clk_mgr, base)
-
-#define REG(reg) \
-	(clk_mgr_dce->regs->reg)
-
-#undef FN
-#define FN(reg_name, field_name) \
-	clk_mgr_dce->clk_mgr_shift->field_name, clk_mgr_dce->clk_mgr_mask->field_name
-
-#define CTX \
-	clk_mgr_dce->base.ctx
-#define DC_LOGGER \
-	clk_mgr->ctx->logger
-
-static int dcn1_determine_dppclk_threshold(struct clk_mgr *clk_mgr, struct dc_clocks *new_clocks)
+#include "clk_mgr_internal.h"
+#include "rv1_clk_mgr.h"
+#include "dce100/dce_clk_mgr.h"
+#include "dce112/dce112_clk_mgr.h"
+#include "rv1_clk_mgr_vbios_smu.h"
+#include "rv1_clk_mgr_clk.h"
+
+static int rv1_determine_dppclk_threshold(struct clk_mgr_internal *clk_mgr, struct dc_clocks *new_clocks)
 {
 	bool request_dpp_div = new_clocks->dispclk_khz > new_clocks->dppclk_khz;
-	bool dispclk_increase = new_clocks->dispclk_khz > clk_mgr->clks.dispclk_khz;
+	bool dispclk_increase = new_clocks->dispclk_khz > clk_mgr->base.clks.dispclk_khz;
 	int disp_clk_threshold = new_clocks->max_supported_dppclk_khz;
-	bool cur_dpp_div = clk_mgr->clks.dispclk_khz > clk_mgr->clks.dppclk_khz;
+	bool cur_dpp_div = clk_mgr->base.clks.dispclk_khz > clk_mgr->base.clks.dppclk_khz;
 
 	/* increase clock, looking for div is 0 for current, request div is 1*/
 	if (dispclk_increase) {
@@ -78,7 +69,7 @@ static int dcn1_determine_dppclk_threshold(struct clk_mgr *clk_mgr, struct dc_cl
 		/* current disp clk is lower than current maximum dpp clk,
 		 * no need to ramp
 		 */
-		if (clk_mgr->clks.dispclk_khz <= disp_clk_threshold)
+		if (clk_mgr->base.clks.dispclk_khz <= disp_clk_threshold)
 			return new_clocks->dispclk_khz;
 
 		/* request dpp clk need to be divided by 2 */
@@ -89,15 +80,17 @@ static int dcn1_determine_dppclk_threshold(struct clk_mgr *clk_mgr, struct dc_cl
 	return disp_clk_threshold;
 }
 
-static void dcn1_ramp_up_dispclk_with_dpp(struct clk_mgr *clk_mgr, struct dc_clocks *new_clocks)
+static void ramp_up_dispclk_with_dpp(struct clk_mgr_internal *clk_mgr, struct dc *dc, struct dc_clocks *new_clocks)
 {
-	struct dc *dc = clk_mgr->ctx->dc;
-	int dispclk_to_dpp_threshold = dcn1_determine_dppclk_threshold(clk_mgr, new_clocks);
-	bool request_dpp_div = new_clocks->dispclk_khz > new_clocks->dppclk_khz;
 	int i;
+	int dispclk_to_dpp_threshold = rv1_determine_dppclk_threshold(clk_mgr, new_clocks);
+	bool request_dpp_div = new_clocks->dispclk_khz > new_clocks->dppclk_khz;
 
 	/* set disp clk to dpp clk threshold */
-	dce112_set_clock(clk_mgr, dispclk_to_dpp_threshold);
+
+	clk_mgr->funcs->set_dispclk(clk_mgr, dispclk_to_dpp_threshold);
+	clk_mgr->funcs->set_dprefclk(clk_mgr);
+
 
 	/* update request dpp clk division option */
 	for (i = 0; i < dc->res_pool->pipe_count; i++) {
@@ -113,42 +106,23 @@ static void dcn1_ramp_up_dispclk_with_dpp(struct clk_mgr *clk_mgr, struct dc_clo
 	}
 
 	/* If target clk not same as dppclk threshold, set to target clock */
-	if (dispclk_to_dpp_threshold != new_clocks->dispclk_khz)
-		dce112_set_clock(clk_mgr, new_clocks->dispclk_khz);
-
-	clk_mgr->clks.dispclk_khz = new_clocks->dispclk_khz;
-	clk_mgr->clks.dppclk_khz = new_clocks->dppclk_khz;
-	clk_mgr->clks.max_supported_dppclk_khz = new_clocks->max_supported_dppclk_khz;
-}
-
-static int get_active_display_cnt(
-		struct dc *dc,
-		struct dc_state *context)
-{
-	int i, display_count;
-
-	display_count = 0;
-	for (i = 0; i < context->stream_count; i++) {
-		const struct dc_stream_state *stream = context->streams[i];
-
-		/*
-		 * Only notify active stream or virtual stream.
-		 * Need to notify virtual stream to work around
-		 * headless case. HPD does not fire when system is in
-		 * S0i2.
-		 */
-		if (!stream->dpms_off || stream->signal == SIGNAL_TYPE_VIRTUAL)
-			display_count++;
+	if (dispclk_to_dpp_threshold != new_clocks->dispclk_khz) {
+		clk_mgr->funcs->set_dispclk(clk_mgr, new_clocks->dispclk_khz);
+		clk_mgr->funcs->set_dprefclk(clk_mgr);
 	}
 
-	return display_count;
+
+	clk_mgr->base.clks.dispclk_khz = new_clocks->dispclk_khz;
+	clk_mgr->base.clks.dppclk_khz = new_clocks->dppclk_khz;
+	clk_mgr->base.clks.max_supported_dppclk_khz = new_clocks->max_supported_dppclk_khz;
 }
 
-static void dcn1_update_clocks(struct clk_mgr *clk_mgr,
+static void rv1_update_clocks(struct clk_mgr *clk_mgr_base,
 			struct dc_state *context,
 			bool safe_to_lower)
 {
-	struct dc *dc = clk_mgr->ctx->dc;
+	struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base);
+	struct dc *dc = clk_mgr_base->ctx->dc;
 	struct dc_debug_options *debug = &dc->debug;
 	struct dc_clocks *new_clocks = &context->bw_ctx.bw.dcn.clk;
 	struct pp_smu_funcs_rv *pp_smu = NULL;
@@ -158,9 +132,12 @@ static void dcn1_update_clocks(struct clk_mgr *clk_mgr,
 
 	bool enter_display_off = false;
 
-	display_count = get_active_display_cnt(dc, context);
-	if (dc->res_pool->pp_smu)
-		pp_smu = &dc->res_pool->pp_smu->rv_funcs;
+	ASSERT(clk_mgr->pp_smu);
+
+	pp_smu = &clk_mgr->pp_smu->rv_funcs;
+
+	display_count = clk_mgr_helper_get_active_display_cnt(dc, context);
+
 	if (display_count == 0)
 		enter_display_off = true;
 
@@ -170,18 +147,18 @@ static void dcn1_update_clocks(struct clk_mgr *clk_mgr,
 		 * if function pointer not set up, this message is
 		 * sent as part of pplib_apply_display_requirements.
 		 */
-		if (pp_smu && pp_smu->set_display_count)
+		if (pp_smu->set_display_count)
 			pp_smu->set_display_count(&pp_smu->pp_smu, display_count);
 	}
 
-	if (new_clocks->dispclk_khz > clk_mgr->clks.dispclk_khz
-			|| new_clocks->phyclk_khz > clk_mgr->clks.phyclk_khz
-			|| new_clocks->fclk_khz > clk_mgr->clks.fclk_khz
-			|| new_clocks->dcfclk_khz > clk_mgr->clks.dcfclk_khz)
+	if (new_clocks->dispclk_khz > clk_mgr_base->clks.dispclk_khz
+			|| new_clocks->phyclk_khz > clk_mgr_base->clks.phyclk_khz
+			|| new_clocks->fclk_khz > clk_mgr_base->clks.fclk_khz
+			|| new_clocks->dcfclk_khz > clk_mgr_base->clks.dcfclk_khz)
 		send_request_to_increase = true;
 
-	if (should_set_clock(safe_to_lower, new_clocks->phyclk_khz, clk_mgr->clks.phyclk_khz)) {
-		clk_mgr->clks.phyclk_khz = new_clocks->phyclk_khz;
+	if (should_set_clock(safe_to_lower, new_clocks->phyclk_khz, clk_mgr_base->clks.phyclk_khz)) {
+		clk_mgr_base->clks.phyclk_khz = new_clocks->phyclk_khz;
 		send_request_to_lower = true;
 	}
 
@@ -189,20 +166,20 @@ static void dcn1_update_clocks(struct clk_mgr *clk_mgr,
 	if (debug->force_fclk_khz != 0)
 		new_clocks->fclk_khz = debug->force_fclk_khz;
 
-	if (should_set_clock(safe_to_lower, new_clocks->fclk_khz, clk_mgr->clks.fclk_khz)) {
-		clk_mgr->clks.fclk_khz = new_clocks->fclk_khz;
+	if (should_set_clock(safe_to_lower, new_clocks->fclk_khz, clk_mgr_base->clks.fclk_khz)) {
+		clk_mgr_base->clks.fclk_khz = new_clocks->fclk_khz;
 		send_request_to_lower = true;
 	}
 
 	//DCF Clock
-	if (should_set_clock(safe_to_lower, new_clocks->dcfclk_khz, clk_mgr->clks.dcfclk_khz)) {
-		clk_mgr->clks.dcfclk_khz = new_clocks->dcfclk_khz;
+	if (should_set_clock(safe_to_lower, new_clocks->dcfclk_khz, clk_mgr_base->clks.dcfclk_khz)) {
+		clk_mgr_base->clks.dcfclk_khz = new_clocks->dcfclk_khz;
 		send_request_to_lower = true;
 	}
 
 	if (should_set_clock(safe_to_lower,
-			new_clocks->dcfclk_deep_sleep_khz, clk_mgr->clks.dcfclk_deep_sleep_khz)) {
-		clk_mgr->clks.dcfclk_deep_sleep_khz = new_clocks->dcfclk_deep_sleep_khz;
+			new_clocks->dcfclk_deep_sleep_khz, clk_mgr_base->clks.dcfclk_deep_sleep_khz)) {
+		clk_mgr_base->clks.dcfclk_deep_sleep_khz = new_clocks->dcfclk_deep_sleep_khz;
 		send_request_to_lower = true;
 	}
 
@@ -211,10 +188,9 @@ static void dcn1_update_clocks(struct clk_mgr *clk_mgr,
 	 */
 	if (send_request_to_increase) {
 		/*use dcfclk to request voltage*/
-		if (pp_smu && pp_smu->set_hard_min_fclk_by_freq &&
+		if (pp_smu->set_hard_min_fclk_by_freq &&
 				pp_smu->set_hard_min_dcfclk_by_freq &&
 				pp_smu->set_min_deep_sleep_dcfclk) {
-
 			pp_smu->set_hard_min_fclk_by_freq(&pp_smu->pp_smu, new_clocks->fclk_khz / 1000);
 			pp_smu->set_hard_min_dcfclk_by_freq(&pp_smu->pp_smu, new_clocks->dcfclk_khz / 1000);
 			pp_smu->set_min_deep_sleep_dcfclk(&pp_smu->pp_smu, (new_clocks->dcfclk_deep_sleep_khz + 999) / 1000);
@@ -223,67 +199,67 @@ static void dcn1_update_clocks(struct clk_mgr *clk_mgr,
 
 	/* dcn1 dppclk is tied to dispclk */
 	/* program dispclk on = as a w/a for sleep resume clock ramping issues */
-	if (should_set_clock(safe_to_lower, new_clocks->dispclk_khz, clk_mgr->clks.dispclk_khz)
-			|| new_clocks->dispclk_khz == clk_mgr->clks.dispclk_khz) {
-		dcn1_ramp_up_dispclk_with_dpp(clk_mgr, new_clocks);
-		clk_mgr->clks.dispclk_khz = new_clocks->dispclk_khz;
+	if (should_set_clock(safe_to_lower, new_clocks->dispclk_khz, clk_mgr_base->clks.dispclk_khz)
+			|| new_clocks->dispclk_khz == clk_mgr_base->clks.dispclk_khz) {
+		ramp_up_dispclk_with_dpp(clk_mgr, dc, new_clocks);
+		clk_mgr_base->clks.dispclk_khz = new_clocks->dispclk_khz;
 		send_request_to_lower = true;
 	}
 
 	if (!send_request_to_increase && send_request_to_lower) {
 		/*use dcfclk to request voltage*/
-		if (pp_smu && pp_smu->set_hard_min_fclk_by_freq &&
+		if (pp_smu->set_hard_min_fclk_by_freq &&
 				pp_smu->set_hard_min_dcfclk_by_freq &&
 				pp_smu->set_min_deep_sleep_dcfclk) {
-
 			pp_smu->set_hard_min_fclk_by_freq(&pp_smu->pp_smu, new_clocks->fclk_khz / 1000);
 			pp_smu->set_hard_min_dcfclk_by_freq(&pp_smu->pp_smu, new_clocks->dcfclk_khz / 1000);
 			pp_smu->set_min_deep_sleep_dcfclk(&pp_smu->pp_smu, (new_clocks->dcfclk_deep_sleep_khz + 999) / 1000);
 		}
 	}
 }
-static const struct clk_mgr_funcs dcn1_funcs = {
+
+static struct clk_mgr_funcs rv1_clk_funcs = {
 	.get_dp_ref_clk_frequency = dce12_get_dp_ref_freq_khz,
-	.update_clocks = dcn1_update_clocks
+	.update_clocks = rv1_update_clocks,
+};
+
+static struct clk_mgr_internal_funcs rv1_clk_internal_funcs = {
+	.set_dispclk = rv1_vbios_smu_set_dispclk,
+	.set_dprefclk = dce112_set_dprefclk
 };
-struct clk_mgr *dcn1_clk_mgr_create(struct dc_context *ctx)
+
+void rv1_clk_mgr_construct(struct dc_context *ctx, struct clk_mgr_internal *clk_mgr, struct pp_smu_funcs *pp_smu)
 {
 	struct dc_debug_options *debug = &ctx->dc->debug;
 	struct dc_bios *bp = ctx->dc_bios;
 	struct dc_firmware_info fw_info = { { 0 } };
-	struct dce_clk_mgr *clk_mgr_dce = kzalloc(sizeof(*clk_mgr_dce), GFP_KERNEL);
 
-	if (clk_mgr_dce == NULL) {
-		BREAK_TO_DEBUGGER();
-		return NULL;
-	}
-
-	clk_mgr_dce->base.ctx = ctx;
-	clk_mgr_dce->base.funcs = &dcn1_funcs;
+	clk_mgr->base.ctx = ctx;
+	clk_mgr->pp_smu = pp_smu;
+	clk_mgr->base.funcs = &rv1_clk_funcs;
+	clk_mgr->funcs = &rv1_clk_internal_funcs;
 
-	clk_mgr_dce->dfs_bypass_disp_clk = 0;
+	clk_mgr->dfs_bypass_disp_clk = 0;
 
-	clk_mgr_dce->dprefclk_ss_percentage = 0;
-	clk_mgr_dce->dprefclk_ss_divider = 1000;
-	clk_mgr_dce->ss_on_dprefclk = false;
+	clk_mgr->dprefclk_ss_percentage = 0;
+	clk_mgr->dprefclk_ss_divider = 1000;
+	clk_mgr->ss_on_dprefclk = false;
+	clk_mgr->base.dprefclk_khz = 600000;
 
-	clk_mgr_dce->dprefclk_khz = 600000;
 	if (bp->integrated_info)
-		clk_mgr_dce->dentist_vco_freq_khz = bp->integrated_info->dentist_vco_freq;
-	if (clk_mgr_dce->dentist_vco_freq_khz == 0) {
+		clk_mgr->dentist_vco_freq_khz = bp->integrated_info->dentist_vco_freq;
+	if (clk_mgr->dentist_vco_freq_khz == 0) {
 		bp->funcs->get_firmware_info(bp, &fw_info);
-		clk_mgr_dce->dentist_vco_freq_khz = fw_info.smu_gpu_pll_output_freq;
-		if (clk_mgr_dce->dentist_vco_freq_khz == 0)
-			clk_mgr_dce->dentist_vco_freq_khz = 3600000;
+		clk_mgr->dentist_vco_freq_khz = fw_info.smu_gpu_pll_output_freq;
+		if (clk_mgr->dentist_vco_freq_khz == 0)
+			clk_mgr->dentist_vco_freq_khz = 3600000;
 	}
 
 	if (!debug->disable_dfs_bypass && bp->integrated_info)
 		if (bp->integrated_info->gpu_cap_info & DFS_BYPASS_ENABLE)
-			clk_mgr_dce->dfs_bypass_enabled = true;
-
-	dce_clock_read_ss_info(clk_mgr_dce);
+			clk_mgr->dfs_bypass_enabled = true;
 
-	return &clk_mgr_dce->base;
+	dce_clock_read_ss_info(clk_mgr);
 }
 
 
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr.h
new file mode 100644
index 000000000000..0807478c8212
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __RV1_CLK_MGR_H__
+#define __RV1_CLK_MGR_H__
+
+void rv1_clk_mgr_construct(struct dc_context *ctx, struct clk_mgr_internal *clk_mgr, struct pp_smu_funcs *pp_smu);
+
+#endif //__DCN10_CLK_MGR_H__
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr_clk.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr_clk.c
new file mode 100644
index 000000000000..61dd12198a3c
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr_clk.c
@@ -0,0 +1,79 @@
+/*
+ * Copyright 2012-16 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#include "reg_helper.h"
+#include "clk_mgr_internal.h"
+#include "rv1_clk_mgr_clk.h"
+
+#include "ip/Discovery/hwid.h"
+#include "ip/Discovery/v1/ip_offset_1.h"
+#include "ip/CLK/clk_10_0_default.h"
+#include "ip/CLK/clk_10_0_offset.h"
+#include "ip/CLK/clk_10_0_reg.h"
+#include "ip/CLK/clk_10_0_sh_mask.h"
+
+#include "dce100/dce_clk_mgr.h"
+
+#define CLK_BASE_INNER(inst) \
+	CLK_BASE__INST ## inst ## _SEG0
+
+
+#define CLK_REG(reg_name, block, inst)\
+	CLK_BASE(mm ## block ## _ ## inst ## _ ## reg_name ## _BASE_IDX) + \
+					mm ## block ## _ ## inst ## _ ## reg_name
+
+#define REG(reg_name) \
+	CLK_REG(reg_name, CLK0, 0)
+
+
+/* Only used by testing framework*/
+void rv1_dump_clk_registers(struct clk_state_registers *regs, struct clk_bypass *bypass, struct clk_mgr *clk_mgr_base)
+{
+	struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base);
+
+		regs->CLK0_CLK8_CURRENT_CNT = REG_READ(CLK0_CLK8_CURRENT_CNT) / 10; //dcf clk
+
+		bypass->dcfclk_bypass = REG_READ(CLK0_CLK8_BYPASS_CNTL) & 0x0007;
+		if (bypass->dcfclk_bypass < 0 || bypass->dcfclk_bypass > 4)
+			bypass->dcfclk_bypass = 0;
+
+
+		regs->CLK0_CLK8_DS_CNTL = REG_READ(CLK0_CLK8_DS_CNTL) / 10;	//dcf deep sleep divider
+
+		regs->CLK0_CLK8_ALLOW_DS = REG_READ(CLK0_CLK8_ALLOW_DS); //dcf deep sleep allow
+
+		regs->CLK0_CLK10_CURRENT_CNT = REG_READ(CLK0_CLK10_CURRENT_CNT) / 10; //dpref clk
+
+		bypass->dispclk_pypass = REG_READ(CLK0_CLK10_BYPASS_CNTL) & 0x0007;
+		if (bypass->dispclk_pypass < 0 || bypass->dispclk_pypass > 4)
+			bypass->dispclk_pypass = 0;
+
+		regs->CLK0_CLK11_CURRENT_CNT = REG_READ(CLK0_CLK11_CURRENT_CNT) / 10; //disp clk
+
+		bypass->dprefclk_bypass = REG_READ(CLK0_CLK11_BYPASS_CNTL) & 0x0007;
+		if (bypass->dprefclk_bypass < 0 || bypass->dprefclk_bypass > 4)
+			bypass->dprefclk_bypass = 0;
+
+}
diff --git a/drivers/gpu/drm/i915/i915_gemfs.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr_clk.h
index cca8bdc5b93e..b68e3452efb9 100644
--- a/drivers/gpu/drm/i915/i915_gemfs.h
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr_clk.h
@@ -1,5 +1,5 @@
 /*
- * Copyright © 2017 Intel Corporation
+ * Copyright 2018 Advanced Micro Devices, Inc.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -8,27 +8,22 @@
  * and/or sell copies of the Software, and to permit persons to whom the
  * Software is furnished to do so, subject to the following conditions:
  *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
  *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
  *
  */
 
-#ifndef __I915_GEMFS_H__
-#define __I915_GEMFS_H__
-
-struct drm_i915_private;
-
-int i915_gemfs_init(struct drm_i915_private *i915);
-
-void i915_gemfs_fini(struct drm_i915_private *i915);
+#ifndef DAL_DC_DCN10_RV1_CLK_MGR_CLK_H_
+#define DAL_DC_DCN10_RV1_CLK_MGR_CLK_H_
 
-#endif
+#endif /* DAL_DC_DCN10_RV1_CLK_MGR_CLK_H_ */
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr_vbios_smu.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr_vbios_smu.c
new file mode 100644
index 000000000000..1897e91c8ccb
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr_vbios_smu.c
@@ -0,0 +1,126 @@
+/*
+ * Copyright 2012-16 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#include "core_types.h"
+#include "clk_mgr_internal.h"
+#include "reg_helper.h"
+
+#define MAX_INSTANCE	5
+#define MAX_SEGMENT		5
+
+struct IP_BASE_INSTANCE {
+	unsigned int segment[MAX_SEGMENT];
+};
+
+struct IP_BASE {
+	struct IP_BASE_INSTANCE instance[MAX_INSTANCE];
+};
+
+
+static const struct IP_BASE MP1_BASE  = { { { { 0x00016000, 0, 0, 0, 0 } },
+											 { { 0, 0, 0, 0, 0 } },
+											 { { 0, 0, 0, 0, 0 } },
+											 { { 0, 0, 0, 0, 0 } },
+											 { { 0, 0, 0, 0, 0 } } } };
+
+#define mmMP1_SMN_C2PMSG_91            0x29B
+#define mmMP1_SMN_C2PMSG_83            0x293
+#define mmMP1_SMN_C2PMSG_67            0x283
+#define mmMP1_SMN_C2PMSG_91_BASE_IDX   0
+#define mmMP1_SMN_C2PMSG_83_BASE_IDX   0
+#define mmMP1_SMN_C2PMSG_67_BASE_IDX   0
+
+#define MP1_SMN_C2PMSG_91__CONTENT_MASK                    0xffffffffL
+#define MP1_SMN_C2PMSG_83__CONTENT_MASK                    0xffffffffL
+#define MP1_SMN_C2PMSG_67__CONTENT_MASK                    0xffffffffL
+#define MP1_SMN_C2PMSG_91__CONTENT__SHIFT                  0x00000000
+#define MP1_SMN_C2PMSG_83__CONTENT__SHIFT                  0x00000000
+#define MP1_SMN_C2PMSG_67__CONTENT__SHIFT                  0x00000000
+
+#define REG(reg_name) \
+	(MP1_BASE.instance[0].segment[mm ## reg_name ## _BASE_IDX] + mm ## reg_name)
+
+#define FN(reg_name, field) \
+	FD(reg_name##__##field)
+
+#define VBIOSSMC_MSG_SetDispclkFreq           0x4
+#define VBIOSSMC_MSG_SetDprefclkFreq          0x5
+
+int rv1_vbios_smu_send_msg_with_param(struct clk_mgr_internal *clk_mgr, unsigned int msg_id, unsigned int param)
+{
+	/* First clear response register */
+	REG_WRITE(MP1_SMN_C2PMSG_91, 0);
+
+	/* Set the parameter register for the SMU message, unit is Mhz */
+	REG_WRITE(MP1_SMN_C2PMSG_83, param);
+
+	/* Trigger the message transaction by writing the message ID */
+	REG_WRITE(MP1_SMN_C2PMSG_67, msg_id);
+
+	REG_WAIT(MP1_SMN_C2PMSG_91, CONTENT, 1, 10, 200000);
+
+	/* Actual dispclk set is returned in the parameter register */
+	return REG_READ(MP1_SMN_C2PMSG_83);
+}
+
+int rv1_vbios_smu_set_dispclk(struct clk_mgr_internal *clk_mgr, int requested_dispclk_khz)
+{
+	int actual_dispclk_set_mhz = -1;
+	struct dc *core_dc = clk_mgr->base.ctx->dc;
+	struct dmcu *dmcu = core_dc->res_pool->dmcu;
+
+	/*  Unit of SMU msg parameter is Mhz */
+	actual_dispclk_set_mhz = rv1_vbios_smu_send_msg_with_param(
+			clk_mgr,
+			VBIOSSMC_MSG_SetDispclkFreq,
+			requested_dispclk_khz / 1000);
+
+	/* Actual dispclk set is returned in the parameter register */
+	actual_dispclk_set_mhz = REG_READ(MP1_SMN_C2PMSG_83) * 1000;
+
+	if (!IS_FPGA_MAXIMUS_DC(core_dc->ctx->dce_environment)) {
+		if (dmcu && dmcu->funcs->is_dmcu_initialized(dmcu)) {
+			if (clk_mgr->dfs_bypass_disp_clk != actual_dispclk_set_mhz)
+				dmcu->funcs->set_psr_wait_loop(dmcu,
+						actual_dispclk_set_mhz / 7);
+		}
+	}
+
+	return actual_dispclk_set_mhz * 1000;
+}
+
+int rv1_vbios_smu_set_dprefclk(struct clk_mgr_internal *clk_mgr)
+{
+	int actual_dprefclk_set_mhz = -1;
+
+	actual_dprefclk_set_mhz = rv1_vbios_smu_send_msg_with_param(
+			clk_mgr,
+			VBIOSSMC_MSG_SetDprefclkFreq,
+			clk_mgr->base.dprefclk_khz / 1000);
+
+	/* TODO: add code for programing DP DTO, currently this is down by command table */
+
+	return actual_dprefclk_set_mhz * 1000;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr_vbios_smu.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr_vbios_smu.h
new file mode 100644
index 000000000000..083cb3158859
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr_vbios_smu.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef DAL_DC_DCN10_RV1_CLK_MGR_VBIOS_SMU_H_
+#define DAL_DC_DCN10_RV1_CLK_MGR_VBIOS_SMU_H_
+
+int rv1_vbios_smu_set_dispclk(struct clk_mgr_internal *clk_mgr, int requested_dispclk_khz);
+int rv1_vbios_smu_set_dprefclk(struct clk_mgr_internal *clk_mgr);
+
+#endif /* DAL_DC_DCN10_RV1_CLK_MGR_VBIOS_SMU_H_ */
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv2_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv2_clk_mgr.c
new file mode 100644
index 000000000000..b9ba6dbc2b46
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv2_clk_mgr.c
@@ -0,0 +1,43 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#include "core_types.h"
+#include "clk_mgr_internal.h"
+#include "rv1_clk_mgr.h"
+#include "rv2_clk_mgr.h"
+#include "dce112/dce112_clk_mgr.h"
+
+static struct clk_mgr_internal_funcs rv2_clk_internal_funcs = {
+	.set_dispclk = dce112_set_dispclk,
+	.set_dprefclk = dce112_set_dprefclk
+};
+
+void rv2_clk_mgr_construct(struct dc_context *ctx, struct clk_mgr_internal *clk_mgr, struct pp_smu_funcs *pp_smu)
+
+{
+	rv1_clk_mgr_construct(ctx, clk_mgr, pp_smu);
+
+	clk_mgr->funcs = &rv2_clk_internal_funcs;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_clk_mgr.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv2_clk_mgr.h
index 97007cf33665..0c1f26ca563b 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_clk_mgr.h
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv2_clk_mgr.h
@@ -23,17 +23,10 @@
  *
  */
 
-#ifndef __DCN10_CLK_MGR_H__
-#define __DCN10_CLK_MGR_H__
+#ifndef __RV2_CLK_MGR_H__
+#define __RV2_CLK_MGR_H__
 
-#include "../dce/dce_clk_mgr.h"
+void rv2_clk_mgr_construct(struct dc_context *ctx, struct clk_mgr_internal *clk_mgr, struct pp_smu_funcs *pp_smu);
 
-struct clk_bypass {
-	uint32_t dcfclk_bypass;
-	uint32_t dispclk_pypass;
-	uint32_t dprefclk_bypass;
-};
-
-struct clk_mgr *dcn1_clk_mgr_create(struct dc_context *ctx);
 
 #endif //__DCN10_CLK_MGR_H__
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c
index 18c775a950cc..ed466087c8b5 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
@@ -22,6 +22,8 @@
  * Authors: AMD
  */
 
+#include <linux/slab.h>
+
 #include "dm_services.h"
 
 #include "dc.h"
@@ -33,6 +35,7 @@
 
 #include "resource.h"
 
+#include "clk_mgr.h"
 #include "clock_source.h"
 #include "dc_bios_types.h"
 
@@ -169,9 +172,14 @@ static bool create_links(
 		link = link_create(&link_init_params);
 
 		if (link) {
-			dc->links[dc->link_count] = link;
-			link->dc = dc;
-			++dc->link_count;
+			if (dc->config.edp_not_connected &&
+					link->connector_signal == SIGNAL_TYPE_EDP) {
+				link_destroy(&link);
+			} else {
+				dc->links[dc->link_count] = link;
+				link->dc = dc;
+				++dc->link_count;
+			}
 		}
 	}
 
@@ -257,7 +265,7 @@ bool dc_stream_adjust_vmin_vmax(struct dc *dc,
 	for (i = 0; i < MAX_PIPES; i++) {
 		struct pipe_ctx *pipe = &dc->current_state->res_ctx.pipe_ctx[i];
 
-		if (pipe->stream == stream && pipe->stream_res.stream_enc) {
+		if (pipe->stream == stream && pipe->stream_res.tg) {
 			pipe->stream->adjust = *adjust;
 			dc->hwss.set_drr(&pipe,
 					1,
@@ -484,128 +492,6 @@ void dc_stream_set_static_screen_events(struct dc *dc,
 	dc->hwss.set_static_screen_control(pipes_affected, num_pipes_affected, events);
 }
 
-void dc_link_set_drive_settings(struct dc *dc,
-				struct link_training_settings *lt_settings,
-				const struct dc_link *link)
-{
-
-	int i;
-
-	for (i = 0; i < dc->link_count; i++) {
-		if (dc->links[i] == link)
-			break;
-	}
-
-	if (i >= dc->link_count)
-		ASSERT_CRITICAL(false);
-
-	dc_link_dp_set_drive_settings(dc->links[i], lt_settings);
-}
-
-void dc_link_perform_link_training(struct dc *dc,
-				   struct dc_link_settings *link_setting,
-				   bool skip_video_pattern)
-{
-	int i;
-
-	for (i = 0; i < dc->link_count; i++)
-		dc_link_dp_perform_link_training(
-			dc->links[i],
-			link_setting,
-			skip_video_pattern);
-}
-
-void dc_link_set_preferred_link_settings(struct dc *dc,
-					 struct dc_link_settings *link_setting,
-					 struct dc_link *link)
-{
-	int i;
-	struct pipe_ctx *pipe;
-	struct dc_stream_state *link_stream;
-	struct dc_link_settings store_settings = *link_setting;
-
-	link->preferred_link_setting = store_settings;
-
-	/* Retrain with preferred link settings only relevant for
-	 * DP signal type
-	 */
-	if (!dc_is_dp_signal(link->connector_signal))
-		return;
-
-	for (i = 0; i < MAX_PIPES; i++) {
-		pipe = &dc->current_state->res_ctx.pipe_ctx[i];
-		if (pipe->stream && pipe->stream->link) {
-			if (pipe->stream->link == link)
-				break;
-		}
-	}
-
-	/* Stream not found */
-	if (i == MAX_PIPES)
-		return;
-
-	link_stream = link->dc->current_state->res_ctx.pipe_ctx[i].stream;
-
-	/* Cannot retrain link if backend is off */
-	if (link_stream->dpms_off)
-		return;
-
-	if (link_stream)
-		decide_link_settings(link_stream, &store_settings);
-
-	if ((store_settings.lane_count != LANE_COUNT_UNKNOWN) &&
-		(store_settings.link_rate != LINK_RATE_UNKNOWN))
-		dp_retrain_link_dp_test(link, &store_settings, false);
-}
-
-void dc_link_enable_hpd(const struct dc_link *link)
-{
-	dc_link_dp_enable_hpd(link);
-}
-
-void dc_link_disable_hpd(const struct dc_link *link)
-{
-	dc_link_dp_disable_hpd(link);
-}
-
-
-void dc_link_set_test_pattern(struct dc_link *link,
-			      enum dp_test_pattern test_pattern,
-			      const struct link_training_settings *p_link_settings,
-			      const unsigned char *p_custom_pattern,
-			      unsigned int cust_pattern_size)
-{
-	if (link != NULL)
-		dc_link_dp_set_test_pattern(
-			link,
-			test_pattern,
-			p_link_settings,
-			p_custom_pattern,
-			cust_pattern_size);
-}
-
-uint32_t dc_link_bandwidth_kbps(
-	const struct dc_link *link,
-	const struct dc_link_settings *link_setting)
-{
-	uint32_t link_bw_kbps = link_setting->link_rate * LINK_RATE_REF_FREQ_IN_KHZ; /* bytes per sec */
-
-	link_bw_kbps *= 8;   /* 8 bits per byte*/
-	link_bw_kbps *= link_setting->lane_count;
-
-	return link_bw_kbps;
-
-}
-
-const struct dc_link_settings *dc_link_get_link_cap(
-		const struct dc_link *link)
-{
-	if (link->preferred_link_setting.lane_count != LANE_COUNT_UNKNOWN &&
-			link->preferred_link_setting.link_rate != LINK_RATE_UNKNOWN)
-		return &link->preferred_link_setting;
-	return &link->verified_link_cap;
-}
-
 static void destruct(struct dc *dc)
 {
 	dc_release_state(dc->current_state);
@@ -613,6 +499,11 @@ static void destruct(struct dc *dc)
 
 	destroy_links(dc);
 
+	if (dc->clk_mgr) {
+		dc_destroy_clk_mgr(dc->clk_mgr);
+		dc->clk_mgr = NULL;
+	}
+
 	dc_destroy_resource_pool(dc);
 
 	if (dc->ctx->gpio_service)
@@ -756,6 +647,10 @@ static bool construct(struct dc *dc,
 	if (!dc->res_pool)
 		goto fail;
 
+	dc->clk_mgr = dc_clk_mgr_create(dc->ctx, dc->res_pool->pp_smu, dc->res_pool->dccg);
+	if (!dc->clk_mgr)
+		goto fail;
+
 	/* Creation of current_state must occur after dc->dml
 	 * is initialized in dc_create_resource_pool because
 	 * on creation it copies the contents of dc->dml
@@ -1136,10 +1031,6 @@ static enum dc_status dc_commit_state_no_check(struct dc *dc, struct dc_state *c
 	/* Program all planes within new context*/
 	for (i = 0; i < context->stream_count; i++) {
 		const struct dc_link *link = context->streams[i]->link;
-		struct dc_stream_status *status;
-
-		if (context->streams[i]->apply_seamless_boot_optimization)
-			context->streams[i]->apply_seamless_boot_optimization = false;
 
 		if (!context->streams[i]->mode_changed)
 			continue;
@@ -1164,9 +1055,6 @@ static enum dc_status dc_commit_state_no_check(struct dc *dc, struct dc_state *c
 			}
 		}
 
-		status = dc_stream_get_status_from_state(context, context->streams[i]);
-		context->streams[i]->out.otg_offset = status->primary_otg_inst;
-
 		CONN_MSG_MODE(link, "{%dx%d, %dx%d@%dKhz}",
 				context->streams[i]->timing.h_addressable,
 				context->streams[i]->timing.v_addressable,
@@ -1331,71 +1219,94 @@ static bool is_surface_in_context(
 static enum surface_update_type get_plane_info_update_type(const struct dc_surface_update *u)
 {
 	union surface_update_flags *update_flags = &u->surface->update_flags;
+	enum surface_update_type update_type = UPDATE_TYPE_FAST;
 
 	if (!u->plane_info)
 		return UPDATE_TYPE_FAST;
 
-	if (u->plane_info->color_space != u->surface->color_space)
+	if (u->plane_info->color_space != u->surface->color_space) {
 		update_flags->bits.color_space_change = 1;
+		elevate_update_type(&update_type, UPDATE_TYPE_MED);
+	}
 
-	if (u->plane_info->horizontal_mirror != u->surface->horizontal_mirror)
+	if (u->plane_info->horizontal_mirror != u->surface->horizontal_mirror) {
 		update_flags->bits.horizontal_mirror_change = 1;
+		elevate_update_type(&update_type, UPDATE_TYPE_MED);
+	}
 
-	if (u->plane_info->rotation != u->surface->rotation)
+	if (u->plane_info->rotation != u->surface->rotation) {
 		update_flags->bits.rotation_change = 1;
+		elevate_update_type(&update_type, UPDATE_TYPE_FULL);
+	}
 
-	if (u->plane_info->format != u->surface->format)
+	if (u->plane_info->format != u->surface->format) {
 		update_flags->bits.pixel_format_change = 1;
+		elevate_update_type(&update_type, UPDATE_TYPE_FULL);
+	}
 
-	if (u->plane_info->stereo_format != u->surface->stereo_format)
+	if (u->plane_info->stereo_format != u->surface->stereo_format) {
 		update_flags->bits.stereo_format_change = 1;
+		elevate_update_type(&update_type, UPDATE_TYPE_FULL);
+	}
 
-	if (u->plane_info->per_pixel_alpha != u->surface->per_pixel_alpha)
+	if (u->plane_info->per_pixel_alpha != u->surface->per_pixel_alpha) {
 		update_flags->bits.per_pixel_alpha_change = 1;
+		elevate_update_type(&update_type, UPDATE_TYPE_MED);
+	}
 
-	if (u->plane_info->global_alpha_value != u->surface->global_alpha_value)
+	if (u->plane_info->global_alpha_value != u->surface->global_alpha_value) {
 		update_flags->bits.global_alpha_change = 1;
+		elevate_update_type(&update_type, UPDATE_TYPE_MED);
+	}
+
+	if (u->plane_info->sdr_white_level != u->surface->sdr_white_level) {
+		update_flags->bits.sdr_white_level = 1;
+		elevate_update_type(&update_type, UPDATE_TYPE_MED);
+	}
 
 	if (u->plane_info->dcc.enable != u->surface->dcc.enable
 			|| u->plane_info->dcc.grph.independent_64b_blks != u->surface->dcc.grph.independent_64b_blks
-			|| u->plane_info->dcc.grph.meta_pitch != u->surface->dcc.grph.meta_pitch)
+			|| u->plane_info->dcc.grph.meta_pitch != u->surface->dcc.grph.meta_pitch) {
 		update_flags->bits.dcc_change = 1;
+		elevate_update_type(&update_type, UPDATE_TYPE_MED);
+	}
 
 	if (resource_pixel_format_to_bpp(u->plane_info->format) !=
-			resource_pixel_format_to_bpp(u->surface->format))
+			resource_pixel_format_to_bpp(u->surface->format)) {
 		/* different bytes per element will require full bandwidth
 		 * and DML calculation
 		 */
 		update_flags->bits.bpp_change = 1;
+		elevate_update_type(&update_type, UPDATE_TYPE_FULL);
+	}
 
 	if (u->plane_info->plane_size.grph.surface_pitch != u->surface->plane_size.grph.surface_pitch
 			|| u->plane_info->plane_size.video.luma_pitch != u->surface->plane_size.video.luma_pitch
-			|| u->plane_info->plane_size.video.chroma_pitch != u->surface->plane_size.video.chroma_pitch)
+			|| u->plane_info->plane_size.video.chroma_pitch != u->surface->plane_size.video.chroma_pitch) {
 		update_flags->bits.plane_size_change = 1;
+		elevate_update_type(&update_type, UPDATE_TYPE_MED);
+	}
 
 
 	if (memcmp(&u->plane_info->tiling_info, &u->surface->tiling_info,
 			sizeof(union dc_tiling_info)) != 0) {
 		update_flags->bits.swizzle_change = 1;
+		elevate_update_type(&update_type, UPDATE_TYPE_MED);
+
 		/* todo: below are HW dependent, we should add a hook to
 		 * DCE/N resource and validated there.
 		 */
-		if (u->plane_info->tiling_info.gfx9.swizzle != DC_SW_LINEAR)
+		if (u->plane_info->tiling_info.gfx9.swizzle != DC_SW_LINEAR) {
 			/* swizzled mode requires RQ to be setup properly,
 			 * thus need to run DML to calculate RQ settings
 			 */
 			update_flags->bits.bandwidth_change = 1;
+			elevate_update_type(&update_type, UPDATE_TYPE_FULL);
+		}
 	}
 
-	if (update_flags->bits.rotation_change
-			|| update_flags->bits.stereo_format_change
-			|| update_flags->bits.pixel_format_change
-			|| update_flags->bits.bpp_change
-			|| update_flags->bits.bandwidth_change
-			|| update_flags->bits.output_tf_change)
-		return UPDATE_TYPE_FULL;
-
-	return update_flags->raw ? UPDATE_TYPE_MED : UPDATE_TYPE_FAST;
+	/* This should be UPDATE_TYPE_FAST if nothing has changed. */
+	return update_type;
 }
 
 static enum surface_update_type get_scaling_info_update_type(
@@ -1475,6 +1386,9 @@ static enum surface_update_type det_surface_update(const struct dc *dc,
 	type = get_scaling_info_update_type(u);
 	elevate_update_type(&overall_type, type);
 
+	if (u->flip_addr)
+		update_flags->bits.addr_update = 1;
+
 	if (u->in_transfer_func)
 		update_flags->bits.in_transfer_func_change = 1;
 
@@ -1711,13 +1625,6 @@ static void commit_planes_do_stream_update(struct dc *dc,
 			pipe_ctx->stream &&
 			pipe_ctx->stream == stream) {
 
-			/* Fast update*/
-			// VRR program can be done as part of FAST UPDATE
-			if (stream_update->adjust)
-				dc->hwss.set_drr(&pipe_ctx, 1,
-					stream_update->adjust->v_total_min,
-					stream_update->adjust->v_total_max);
-
 			if (stream_update->periodic_interrupt0 &&
 					dc->hwss.setup_periodic_interrupt)
 				dc->hwss.setup_periodic_interrupt(pipe_ctx, VLINE0);
@@ -1792,10 +1699,15 @@ static void commit_planes_for_stream(struct dc *dc,
 	if (dc->optimize_seamless_boot && surface_count > 0) {
 		/* Optimize seamless boot flag keeps clocks and watermarks high until
 		 * first flip. After first flip, optimization is required to lower
-		 * bandwidth.
+		 * bandwidth. Important to note that it is expected UEFI will
+		 * only light up a single display on POST, therefore we only expect
+		 * one stream with seamless boot flag set.
 		 */
-		dc->optimize_seamless_boot = false;
-		dc->optimized_required = true;
+		if (stream->apply_seamless_boot_optimization) {
+			stream->apply_seamless_boot_optimization = false;
+			dc->optimize_seamless_boot = false;
+			dc->optimized_required = true;
+		}
 	}
 
 	if (update_type == UPDATE_TYPE_FULL && !dc->optimize_seamless_boot) {
@@ -1870,6 +1782,20 @@ static void commit_planes_for_stream(struct dc *dc,
 
 		dc->hwss.pipe_control_lock(dc, top_pipe_to_program, false);
 	}
+
+	// Fire manual trigger only when bottom plane is flipped
+	for (j = 0; j < dc->res_pool->pipe_count; j++) {
+		struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[j];
+
+		if (pipe_ctx->bottom_pipe ||
+				!pipe_ctx->stream ||
+				pipe_ctx->stream != stream ||
+				!pipe_ctx->plane_state->update_flags.bits.addr_update)
+			continue;
+
+		if (pipe_ctx->stream_res.tg->funcs->program_manual_trigger)
+			pipe_ctx->stream_res.tg->funcs->program_manual_trigger(pipe_ctx->stream_res.tg);
+	}
 }
 
 void dc_commit_updates_for_stream(struct dc *dc,
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c
index 83d121510ef5..c026b393f3c5 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c
@@ -23,6 +23,8 @@
  *
  */
 
+#include <linux/delay.h>
+
 #include "dm_services.h"
 #include "core_types.h"
 #include "timing_generator.h"
@@ -45,8 +47,10 @@ enum dc_color_space_type {
 	COLOR_SPACE_RGB_LIMITED_TYPE,
 	COLOR_SPACE_YCBCR601_TYPE,
 	COLOR_SPACE_YCBCR709_TYPE,
+	COLOR_SPACE_YCBCR2020_TYPE,
 	COLOR_SPACE_YCBCR601_LIMITED_TYPE,
-	COLOR_SPACE_YCBCR709_LIMITED_TYPE
+	COLOR_SPACE_YCBCR709_LIMITED_TYPE,
+	COLOR_SPACE_YCBCR709_BLACK_TYPE,
 };
 
 static const struct tg_color black_color_format[] = {
@@ -80,7 +84,6 @@ static const struct out_csc_color_matrix_type output_csc_matrix[] = {
 	{ COLOR_SPACE_YCBCR709_TYPE,
 		{ 0xE04, 0xF345, 0xFEB7, 0x1004, 0x5D3, 0x1399, 0x1FA,
 				0x201, 0xFCCA, 0xF533, 0xE04, 0x1004} },
-
 	/* TODO: correct values below */
 	{ COLOR_SPACE_YCBCR601_LIMITED_TYPE,
 		{ 0xE00, 0xF447, 0xFDB9, 0x1000, 0x991,
@@ -88,6 +91,12 @@ static const struct out_csc_color_matrix_type output_csc_matrix[] = {
 	{ COLOR_SPACE_YCBCR709_LIMITED_TYPE,
 		{ 0xE00, 0xF349, 0xFEB7, 0x1000, 0x6CE, 0x16E3,
 				0x24F, 0x200, 0xFCCB, 0xF535, 0xE00, 0x1000} },
+	{ COLOR_SPACE_YCBCR2020_TYPE,
+		{ 0x1000, 0xF149, 0xFEB7, 0x0000, 0x0868, 0x15B2,
+				0x01E6, 0x0000, 0xFB88, 0xF478, 0x1000, 0x0000} },
+	{ COLOR_SPACE_YCBCR709_BLACK_TYPE,
+		{ 0x0000, 0x0000, 0x0000, 0x1000, 0x0000, 0x0000,
+				0x0000, 0x0200, 0x0000, 0x0000, 0x0000, 0x1000} },
 };
 
 static bool is_rgb_type(
@@ -149,6 +158,16 @@ static bool is_ycbcr709_type(
 	return ret;
 }
 
+static bool is_ycbcr2020_type(
+	enum dc_color_space color_space)
+{
+	bool ret = false;
+
+	if (color_space == COLOR_SPACE_2020_YCBCR)
+		ret = true;
+	return ret;
+}
+
 static bool is_ycbcr709_limited_type(
 		enum dc_color_space color_space)
 {
@@ -174,7 +193,12 @@ enum dc_color_space_type get_color_space_type(enum dc_color_space color_space)
 		type = COLOR_SPACE_YCBCR601_LIMITED_TYPE;
 	else if (is_ycbcr709_limited_type(color_space))
 		type = COLOR_SPACE_YCBCR709_LIMITED_TYPE;
-
+	else if (is_ycbcr2020_type(color_space))
+		type = COLOR_SPACE_YCBCR2020_TYPE;
+	else if (color_space == COLOR_SPACE_YCBCR709)
+		type = COLOR_SPACE_YCBCR709_BLACK_TYPE;
+	else if (color_space == COLOR_SPACE_YCBCR709_BLACK)
+		type = COLOR_SPACE_YCBCR709_BLACK_TYPE;
 	return type;
 }
 
@@ -206,6 +230,7 @@ void color_space_to_black_color(
 	switch (colorspace) {
 	case COLOR_SPACE_YCBCR601:
 	case COLOR_SPACE_YCBCR709:
+	case COLOR_SPACE_YCBCR709_BLACK:
 	case COLOR_SPACE_YCBCR601_LIMITED:
 	case COLOR_SPACE_YCBCR709_LIMITED:
 	case COLOR_SPACE_2020_YCBCR:
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c
index b37ecc3ede61..f48863cf796b 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c
@@ -23,6 +23,8 @@
  *
  */
 
+#include <linux/slab.h>
+
 #include "dm_services.h"
 #include "atom.h"
 #include "dm_helpers.h"
@@ -42,6 +44,7 @@
 #include "fixed31_32.h"
 #include "dpcd_defs.h"
 #include "dmcu.h"
+#include "hw/clk_mgr.h"
 
 #define DC_LOGGER_INIT(logger)
 
@@ -704,6 +707,7 @@ bool dc_link_detect(struct dc_link *link, enum dc_detect_reason reason)
 
 	if (new_connection_type != dc_connection_none) {
 		link->type = new_connection_type;
+		link->link_state_valid = false;
 
 		/* From Disconnected-to-Connected. */
 		switch (link->connector_signal) {
@@ -906,10 +910,10 @@ bool dc_link_detect(struct dc_link *link, enum dc_detect_reason reason)
 			sink->sink_signal = SIGNAL_TYPE_DVI_SINGLE_LINK;
 
 		/* Connectivity log: detection */
-		for (i = 0; i < sink->dc_edid.length / EDID_BLOCK_SIZE; i++) {
+		for (i = 0; i < sink->dc_edid.length / DC_EDID_BLOCK_SIZE; i++) {
 			CONN_DATA_DETECT(link,
-					&sink->dc_edid.raw_edid[i * EDID_BLOCK_SIZE],
-					EDID_BLOCK_SIZE,
+					&sink->dc_edid.raw_edid[i * DC_EDID_BLOCK_SIZE],
+					DC_EDID_BLOCK_SIZE,
 					"%s: [Block %d] ", sink->edid_caps.display_name, i);
 		}
 
@@ -2337,7 +2341,8 @@ void core_link_resume(struct dc_link *link)
 static struct fixed31_32 get_pbn_per_slot(struct dc_stream_state *stream)
 {
 	struct fixed31_32 mbytes_per_sec;
-	uint32_t link_rate_in_mbytes_per_sec = dc_link_bandwidth_kbps(stream->link, &stream->link->cur_link_settings);
+	uint32_t link_rate_in_mbytes_per_sec = dc_link_bandwidth_kbps(stream->link,
+			&stream->link->cur_link_settings);
 	link_rate_in_mbytes_per_sec /= 8000; /* Kbits to MBytes */
 
 	mbytes_per_sec = dc_fixpt_from_int(link_rate_in_mbytes_per_sec);
@@ -2631,6 +2636,8 @@ void core_link_enable_stream(
 			stream->phy_pix_clk,
 			pipe_ctx->stream_res.audio != NULL);
 
+	pipe_ctx->stream->link->link_state_valid = true;
+
 	if (dc_is_dvi_signal(pipe_ctx->stream->signal))
 		pipe_ctx->stream_res.stream_enc->funcs->dvi_set_stream_attribute(
 			pipe_ctx->stream_res.stream_enc,
@@ -2713,17 +2720,37 @@ void core_link_disable_stream(struct pipe_ctx *pipe_ctx, int option)
 {
 	struct dc  *core_dc = pipe_ctx->stream->ctx->dc;
 	struct dc_stream_state *stream = pipe_ctx->stream;
+	struct dc_link *link = stream->sink->link;
 
 	core_dc->hwss.blank_stream(pipe_ctx);
 
 	if (pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST)
 		deallocate_mst_payload(pipe_ctx);
 
-	if (dc_is_hdmi_signal(pipe_ctx->stream->signal))
-		dal_ddc_service_write_scdc_data(
-			stream->link->ddc, 0,
-			stream->timing.flags.LTE_340MCSC_SCRAMBLE);
+	if (dc_is_hdmi_signal(pipe_ctx->stream->signal)) {
+		struct ext_hdmi_settings settings = {0};
+		enum engine_id eng_id = pipe_ctx->stream_res.stream_enc->id;
 
+		unsigned short masked_chip_caps = link->chip_caps &
+				EXT_DISPLAY_PATH_CAPS__EXT_CHIP_MASK;
+		//Need to inform that sink is going to use legacy HDMI mode.
+		dal_ddc_service_write_scdc_data(
+			link->ddc,
+			165000,//vbios only handles 165Mhz.
+			false);
+		if (masked_chip_caps == EXT_DISPLAY_PATH_CAPS__HDMI20_TISN65DP159RSBT) {
+			/* DP159, Retimer settings */
+			if (get_ext_hdmi_settings(pipe_ctx, eng_id, &settings))
+				write_i2c_retimer_setting(pipe_ctx,
+						false, false, &settings);
+			else
+				write_i2c_default_retimer_setting(pipe_ctx,
+						false, false);
+		} else if (masked_chip_caps == EXT_DISPLAY_PATH_CAPS__HDMI20_PI3EQX1204) {
+			/* PI3EQX1204, Redriver settings */
+			write_i2c_redriver_setting(pipe_ctx, false);
+		}
+	}
 	core_dc->hwss.disable_stream(pipe_ctx, option);
 
 	disable_link(pipe_ctx->stream->link, pipe_ctx->stream->signal);
@@ -2834,3 +2861,127 @@ uint32_t dc_bandwidth_in_kbps_from_timing(
 	return kbps;
 
 }
+
+void dc_link_set_drive_settings(struct dc *dc,
+				struct link_training_settings *lt_settings,
+				const struct dc_link *link)
+{
+
+	int i;
+
+	for (i = 0; i < dc->link_count; i++) {
+		if (dc->links[i] == link)
+			break;
+	}
+
+	if (i >= dc->link_count)
+		ASSERT_CRITICAL(false);
+
+	dc_link_dp_set_drive_settings(dc->links[i], lt_settings);
+}
+
+void dc_link_perform_link_training(struct dc *dc,
+				   struct dc_link_settings *link_setting,
+				   bool skip_video_pattern)
+{
+	int i;
+
+	for (i = 0; i < dc->link_count; i++)
+		dc_link_dp_perform_link_training(
+			dc->links[i],
+			link_setting,
+			skip_video_pattern);
+}
+
+void dc_link_set_preferred_link_settings(struct dc *dc,
+					 struct dc_link_settings *link_setting,
+					 struct dc_link *link)
+{
+	int i;
+	struct pipe_ctx *pipe;
+	struct dc_stream_state *link_stream;
+	struct dc_link_settings store_settings = *link_setting;
+
+	link->preferred_link_setting = store_settings;
+
+	/* Retrain with preferred link settings only relevant for
+	 * DP signal type
+	 */
+	if (!dc_is_dp_signal(link->connector_signal))
+		return;
+
+	for (i = 0; i < MAX_PIPES; i++) {
+		pipe = &dc->current_state->res_ctx.pipe_ctx[i];
+		if (pipe->stream && pipe->stream->link) {
+			if (pipe->stream->link == link)
+				break;
+		}
+	}
+
+	/* Stream not found */
+	if (i == MAX_PIPES)
+		return;
+
+	link_stream = link->dc->current_state->res_ctx.pipe_ctx[i].stream;
+
+	/* Cannot retrain link if backend is off */
+	if (link_stream->dpms_off)
+		return;
+
+	if (link_stream)
+		decide_link_settings(link_stream, &store_settings);
+
+	if ((store_settings.lane_count != LANE_COUNT_UNKNOWN) &&
+		(store_settings.link_rate != LINK_RATE_UNKNOWN))
+		dp_retrain_link_dp_test(link, &store_settings, false);
+}
+
+void dc_link_enable_hpd(const struct dc_link *link)
+{
+	dc_link_dp_enable_hpd(link);
+}
+
+void dc_link_disable_hpd(const struct dc_link *link)
+{
+	dc_link_dp_disable_hpd(link);
+}
+
+
+void dc_link_set_test_pattern(struct dc_link *link,
+			      enum dp_test_pattern test_pattern,
+			      const struct link_training_settings *p_link_settings,
+			      const unsigned char *p_custom_pattern,
+			      unsigned int cust_pattern_size)
+{
+	if (link != NULL)
+		dc_link_dp_set_test_pattern(
+			link,
+			test_pattern,
+			p_link_settings,
+			p_custom_pattern,
+			cust_pattern_size);
+}
+
+uint32_t dc_link_bandwidth_kbps(
+	const struct dc_link *link,
+	const struct dc_link_settings *link_setting)
+{
+	uint32_t link_bw_kbps =
+		link_setting->link_rate * LINK_RATE_REF_FREQ_IN_KHZ; /* bytes per sec */
+
+	link_bw_kbps *= 8;   /* 8 bits per byte*/
+	link_bw_kbps *= link_setting->lane_count;
+
+	return link_bw_kbps;
+
+}
+
+const struct dc_link_settings *dc_link_get_link_cap(
+		const struct dc_link *link)
+{
+	if (link->preferred_link_setting.lane_count != LANE_COUNT_UNKNOWN &&
+			link->preferred_link_setting.link_rate != LINK_RATE_UNKNOWN)
+		return &link->preferred_link_setting;
+	return &link->verified_link_cap;
+}
+
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_ddc.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_ddc.c
index f02092a0dc76..eecc631ca4f8 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link_ddc.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_ddc.c
@@ -23,6 +23,8 @@
  *
  */
 
+#include <linux/slab.h>
+
 #include "dm_services.h"
 #include "dm_helpers.h"
 #include "gpio_service_interface.h"
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
index 1ee544a32ebb..65d6caedbd82 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
@@ -2361,6 +2361,7 @@ static bool retrieve_link_cap(struct dc_link *link)
 	/*Only need to read 1 byte starting from DP_DPRX_FEATURE_ENUMERATION_LIST.
 	 */
 	uint8_t dpcd_dprx_data = '\0';
+	uint8_t dpcd_power_state = '\0';
 
 	struct dp_device_vendor_id sink_id;
 	union down_stream_port_count down_strm_port_count;
@@ -2377,6 +2378,17 @@ static bool retrieve_link_cap(struct dc_link *link)
 	memset(&edp_config_cap, '\0',
 		sizeof(union edp_configuration_cap));
 
+	status = core_link_read_dpcd(link, DP_SET_POWER,
+				&dpcd_power_state, sizeof(dpcd_power_state));
+
+	/* Delay 1 ms if AUX CH is in power down state. Based on spec
+	 * section 2.3.1.2, if AUX CH may be powered down due to
+	 * write to DPCD 600h = 2. Sink AUX CH is monitoring differential
+	 * signal and may need up to 1 ms before being able to reply.
+	 */
+	if (status != DC_OK || dpcd_power_state == DP_SET_POWER_D3)
+		udelay(1000);
+
 	for (i = 0; i < read_dpcd_retry_cnt; i++) {
 		status = core_link_read_dpcd(
 				link,
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
index eac7186e4f08..1b5756590a6a 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
@@ -22,6 +22,9 @@
  * Authors: AMD
  *
  */
+
+#include <linux/slab.h>
+
 #include "dm_services.h"
 
 #include "resource.h"
@@ -93,10 +96,8 @@ enum dce_version resource_parse_asic_id(struct hw_asic_id asic_id)
 #if defined(CONFIG_DRM_AMD_DC_DCN1_0)
 	case FAMILY_RV:
 		dc_version = DCN_VERSION_1_0;
-#if defined(CONFIG_DRM_AMD_DC_DCN1_01)
 		if (ASICREV_IS_RAVEN2(asic_id.hw_internal_rev))
 			dc_version = DCN_VERSION_1_01;
-#endif
 		break;
 #endif
 	default:
@@ -147,9 +148,7 @@ struct resource_pool *dc_create_resource_pool(struct dc  *dc,
 
 #if defined(CONFIG_DRM_AMD_DC_DCN1_0)
 	case DCN_VERSION_1_0:
-#if defined(CONFIG_DRM_AMD_DC_DCN1_01)
 	case DCN_VERSION_1_01:
-#endif
 		res_pool = dcn10_create_resource_pool(init_data, dc);
 		break;
 #endif
@@ -1184,24 +1183,27 @@ static int acquire_first_split_pipe(
 	int i;
 
 	for (i = 0; i < pool->pipe_count; i++) {
-		struct pipe_ctx *pipe_ctx = &res_ctx->pipe_ctx[i];
-
-		if (pipe_ctx->top_pipe &&
-				pipe_ctx->top_pipe->plane_state == pipe_ctx->plane_state) {
-			pipe_ctx->top_pipe->bottom_pipe = pipe_ctx->bottom_pipe;
-			if (pipe_ctx->bottom_pipe)
-				pipe_ctx->bottom_pipe->top_pipe = pipe_ctx->top_pipe;
-
-			memset(pipe_ctx, 0, sizeof(*pipe_ctx));
-			pipe_ctx->stream_res.tg = pool->timing_generators[i];
-			pipe_ctx->plane_res.hubp = pool->hubps[i];
-			pipe_ctx->plane_res.ipp = pool->ipps[i];
-			pipe_ctx->plane_res.dpp = pool->dpps[i];
-			pipe_ctx->stream_res.opp = pool->opps[i];
-			pipe_ctx->plane_res.mpcc_inst = pool->dpps[i]->inst;
-			pipe_ctx->pipe_idx = i;
-
-			pipe_ctx->stream = stream;
+		struct pipe_ctx *split_pipe = &res_ctx->pipe_ctx[i];
+
+		if (split_pipe->top_pipe && !dc_res_is_odm_head_pipe(split_pipe) &&
+				split_pipe->top_pipe->plane_state == split_pipe->plane_state) {
+			split_pipe->top_pipe->bottom_pipe = split_pipe->bottom_pipe;
+			if (split_pipe->bottom_pipe)
+				split_pipe->bottom_pipe->top_pipe = split_pipe->top_pipe;
+
+			if (split_pipe->top_pipe->plane_state)
+				resource_build_scaling_params(split_pipe->top_pipe);
+
+			memset(split_pipe, 0, sizeof(*split_pipe));
+			split_pipe->stream_res.tg = pool->timing_generators[i];
+			split_pipe->plane_res.hubp = pool->hubps[i];
+			split_pipe->plane_res.ipp = pool->ipps[i];
+			split_pipe->plane_res.dpp = pool->dpps[i];
+			split_pipe->stream_res.opp = pool->opps[i];
+			split_pipe->plane_res.mpcc_inst = pool->dpps[i]->inst;
+			split_pipe->pipe_idx = i;
+
+			split_pipe->stream = stream;
 			return i;
 		}
 	}
@@ -1647,46 +1649,6 @@ static int acquire_first_free_pipe(
 	return -1;
 }
 
-static struct stream_encoder *find_first_free_match_stream_enc_for_link(
-		struct resource_context *res_ctx,
-		const struct resource_pool *pool,
-		struct dc_stream_state *stream)
-{
-	int i;
-	int j = -1;
-	struct dc_link *link = stream->link;
-
-	for (i = 0; i < pool->stream_enc_count; i++) {
-		if (!res_ctx->is_stream_enc_acquired[i] &&
-				pool->stream_enc[i]) {
-			/* Store first available for MST second display
-			 * in daisy chain use case */
-			j = i;
-			if (pool->stream_enc[i]->id ==
-					link->link_enc->preferred_engine)
-				return pool->stream_enc[i];
-		}
-	}
-
-	/*
-	 * below can happen in cases when stream encoder is acquired:
-	 * 1) for second MST display in chain, so preferred engine already
-	 * acquired;
-	 * 2) for another link, which preferred engine already acquired by any
-	 * MST configuration.
-	 *
-	 * If signal is of DP type and preferred engine not found, return last available
-	 *
-	 * TODO - This is just a patch up and a generic solution is
-	 * required for non DP connectors.
-	 */
-
-	if (j >= 0 && link->connector_signal == SIGNAL_TYPE_DISPLAY_PORT)
-		return pool->stream_enc[j];
-
-	return NULL;
-}
-
 static struct audio *find_first_free_audio(
 		struct resource_context *res_ctx,
 		const struct resource_pool *pool,
@@ -1998,7 +1960,7 @@ enum dc_status resource_map_pool_resources(
 	pipe_ctx = &context->res_ctx.pipe_ctx[pipe_idx];
 
 	pipe_ctx->stream_res.stream_enc =
-		find_first_free_match_stream_enc_for_link(
+		dc->res_pool->funcs->find_first_free_match_stream_enc_for_link(
 			&context->res_ctx, pool, stream);
 
 	if (!pipe_ctx->stream_res.stream_enc)
@@ -2059,7 +2021,7 @@ void dc_resource_state_construct(
 		const struct dc *dc,
 		struct dc_state *dst_ctx)
 {
-	dst_ctx->clk_mgr = dc->res_pool->clk_mgr;
+	dst_ctx->clk_mgr = dc->clk_mgr;
 }
 
 /**
@@ -2354,7 +2316,18 @@ static void set_avi_info_frame(
 			break;
 		}
 	}
+	/* If VIC >= 128, the Source shall use AVI InfoFrame Version 3*/
 	hdmi_info.bits.VIC0_VIC7 = vic;
+	if (vic >= 128)
+		hdmi_info.bits.header.version = 3;
+	/* If (C1, C0)=(1, 1) and (EC2, EC1, EC0)=(1, 1, 1),
+	 * the Source shall use 20 AVI InfoFrame Version 4
+	 */
+	if (hdmi_info.bits.C0_C1 == COLORIMETRY_EXTENDED &&
+			hdmi_info.bits.EC0_EC2 == COLORIMETRYEX_RESERVED) {
+		hdmi_info.bits.header.version = 4;
+		hdmi_info.bits.header.length = 14;
+	}
 
 	/* pixel repetition
 	 * PR0 - PR3 start from 0 whereas pHwPathMode->mode.timing.flags.pixel
@@ -2373,12 +2346,19 @@ static void set_avi_info_frame(
 	hdmi_info.bits.bar_right = (stream->timing.h_total
 			- stream->timing.h_border_right + 1);
 
+    /* Additional Colorimetry Extension
+     * Used in conduction with C0-C1 and EC0-EC2
+     * 0 = DCI-P3 RGB (D65)
+     * 1 = DCI-P3 RGB (theater)
+     */
+	hdmi_info.bits.ACE0_ACE3 = 0;
+
 	/* check_sum - Calculate AFMT_AVI_INFO0 ~ AFMT_AVI_INFO3 */
 	check_sum = &hdmi_info.packet_raw_data.sb[0];
 
-	*check_sum = HDMI_INFOFRAME_TYPE_AVI + HDMI_AVI_INFOFRAME_SIZE + 2;
+	*check_sum = HDMI_INFOFRAME_TYPE_AVI + hdmi_info.bits.header.length + hdmi_info.bits.header.version;
 
-	for (byte_index = 1; byte_index <= HDMI_AVI_INFOFRAME_SIZE; byte_index++)
+	for (byte_index = 1; byte_index <= hdmi_info.bits.header.length; byte_index++)
 		*check_sum += hdmi_info.packet_raw_data.sb[byte_index];
 
 	/* one byte complement */
@@ -2425,21 +2405,6 @@ static void set_spd_info_packet(
 	*info_packet = stream->vrr_infopacket;
 }
 
-static void set_dp_sdp_info_packet(
-		struct dc_info_packet *info_packet,
-		struct dc_stream_state *stream)
-{
-	/* SPD info packet for custom sdp message */
-
-	/* Return if false. If true,
-	 * set the corresponding bit in the info packet
-	 */
-	if (!stream->dpsdp_infopacket.valid)
-		return;
-
-	*info_packet = stream->dpsdp_infopacket;
-}
-
 static void set_hdr_static_info_packet(
 		struct dc_info_packet *info_packet,
 		struct dc_stream_state *stream)
@@ -2495,7 +2460,6 @@ void dc_resource_state_copy_construct(
 
 		if (cur_pipe->bottom_pipe)
 			cur_pipe->bottom_pipe = &dst_ctx->res_ctx.pipe_ctx[cur_pipe->bottom_pipe->pipe_idx];
-
 	}
 
 	for (i = 0; i < dst_ctx->stream_count; i++) {
@@ -2536,7 +2500,6 @@ void resource_build_info_frame(struct pipe_ctx *pipe_ctx)
 	info->spd.valid = false;
 	info->hdrsmd.valid = false;
 	info->vsc.valid = false;
-	info->dpsdp.valid = false;
 
 	signal = pipe_ctx->stream->signal;
 
@@ -2556,8 +2519,6 @@ void resource_build_info_frame(struct pipe_ctx *pipe_ctx)
 		set_spd_info_packet(&info->spd, pipe_ctx->stream);
 
 		set_hdr_static_info_packet(&info->hdrsmd, pipe_ctx->stream);
-
-		set_dp_sdp_info_packet(&info->dpsdp, pipe_ctx->stream);
 	}
 
 	patch_gamut_packet_checksum(&info->gamut);
@@ -2644,6 +2605,10 @@ bool pipe_need_reprogram(
 	if (is_vsc_info_packet_changed(pipe_ctx_old->stream, pipe_ctx->stream))
 		return true;
 
+	if (false == pipe_ctx_old->stream->link->link_state_valid &&
+		false == pipe_ctx_old->stream->dpms_off)
+		return true;
+
 	return false;
 }
 
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_sink.c b/drivers/gpu/drm/amd/display/dc/core/dc_sink.c
index 9971b515c3eb..5cbfdf1c4b11 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_sink.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_sink.c
@@ -23,6 +23,8 @@
  *
  */
 
+#include <linux/slab.h>
+
 #include "dm_services.h"
 #include "dm_helpers.h"
 #include "core_types.h"
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c
index 96e97d25d639..7fe0dbe30666 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c
@@ -23,6 +23,9 @@
  *
  */
 
+#include <linux/delay.h>
+#include <linux/slab.h>
+
 #include "dm_services.h"
 #include "dc.h"
 #include "core_types.h"
@@ -47,8 +50,8 @@ void update_stream_signal(struct dc_stream_state *stream, struct dc_sink *sink)
 
 	if (dc_is_dvi_signal(stream->signal)) {
 		if (stream->ctx->dc->caps.dual_link_dvi &&
-		    (stream->timing.pix_clk_100hz / 10) > TMDS_MAX_PIXEL_CLOCK &&
-		    sink->sink_signal != SIGNAL_TYPE_DVI_SINGLE_LINK)
+			(stream->timing.pix_clk_100hz / 10) > TMDS_MAX_PIXEL_CLOCK &&
+			sink->sink_signal != SIGNAL_TYPE_DVI_SINGLE_LINK)
 			stream->signal = SIGNAL_TYPE_DVI_DUAL_LINK;
 		else
 			stream->signal = SIGNAL_TYPE_DVI_SINGLE_LINK;
@@ -179,6 +182,9 @@ struct dc_stream_state *dc_copy_stream(const struct dc_stream_state *stream)
 	if (new_stream->out_transfer_func)
 		dc_transfer_func_retain(new_stream->out_transfer_func);
 
+	new_stream->stream_id = new_stream->ctx->dc_stream_id_count;
+	new_stream->ctx->dc_stream_id_count++;
+
 	kref_init(&new_stream->refcount);
 
 	return new_stream;
@@ -229,7 +235,7 @@ static void delay_cursor_until_vupdate(struct pipe_ctx *pipe_ctx, struct dc *dc)
 	unsigned int us_per_line;
 
 	if (stream->ctx->asic_id.chip_family == FAMILY_RV &&
-			ASIC_REV_IS_RAVEN(stream->ctx->asic_id.hw_internal_rev)) {
+			ASICREV_IS_RAVEN(stream->ctx->asic_id.hw_internal_rev)) {
 
 		vupdate_line = get_vupdate_offset_from_vsync(pipe_ctx);
 		if (!dc_stream_get_crtc_position(dc, &stream, 1, &vpos, &nvpos))
@@ -371,42 +377,12 @@ uint32_t dc_stream_get_vblank_counter(const struct dc_stream_state *stream)
 	return 0;
 }
 
-static void build_dp_sdp_info_frame(struct pipe_ctx *pipe_ctx,
-		const uint8_t  *custom_sdp_message,
-		unsigned int sdp_message_size)
-{
-	uint8_t i;
-	struct encoder_info_frame *info = &pipe_ctx->stream_res.encoder_info_frame;
-
-	/* set valid info */
-	info->dpsdp.valid = true;
-
-	/* set sdp message header */
-	info->dpsdp.hb0 = custom_sdp_message[0]; /* package id */
-	info->dpsdp.hb1 = custom_sdp_message[1]; /* package type */
-	info->dpsdp.hb2 = custom_sdp_message[2]; /* package specific byte 0 any data */
-	info->dpsdp.hb3 = custom_sdp_message[3]; /* package specific byte 0 any data */
-
-	/* set sdp message data */
-	for (i = 0; i < 32; i++)
-		info->dpsdp.sb[i] = (custom_sdp_message[i+4]);
-
-}
-
-static void invalid_dp_sdp_info_frame(struct pipe_ctx *pipe_ctx)
-{
-	struct encoder_info_frame *info = &pipe_ctx->stream_res.encoder_info_frame;
-
-	/* in-valid info */
-	info->dpsdp.valid = false;
-}
-
 bool dc_stream_send_dp_sdp(const struct dc_stream_state *stream,
 		const uint8_t *custom_sdp_message,
 		unsigned int sdp_message_size)
 {
 	int i;
-	struct dc  *core_dc;
+	struct dc  *dc;
 	struct resource_context *res_ctx;
 
 	if (stream == NULL) {
@@ -414,8 +390,8 @@ bool dc_stream_send_dp_sdp(const struct dc_stream_state *stream,
 		return false;
 	}
 
-	core_dc = stream->ctx->dc;
-	res_ctx = &core_dc->current_state->res_ctx;
+	dc = stream->ctx->dc;
+	res_ctx = &dc->current_state->res_ctx;
 
 	for (i = 0; i < MAX_PIPES; i++) {
 		struct pipe_ctx *pipe_ctx = &res_ctx->pipe_ctx[i];
@@ -423,11 +399,14 @@ bool dc_stream_send_dp_sdp(const struct dc_stream_state *stream,
 		if (pipe_ctx->stream != stream)
 			continue;
 
-		build_dp_sdp_info_frame(pipe_ctx, custom_sdp_message, sdp_message_size);
-
-		core_dc->hwss.update_info_frame(pipe_ctx);
+		if (dc->hwss.send_immediate_sdp_message != NULL)
+			dc->hwss.send_immediate_sdp_message(pipe_ctx,
+								custom_sdp_message,
+								sdp_message_size);
+		else
+			DC_LOG_WARNING("%s:send_immediate_sdp_message not implemented on this ASIC\n",
+			__func__);
 
-		invalid_dp_sdp_info_frame(pipe_ctx);
 	}
 
 	return true;
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_surface.c b/drivers/gpu/drm/amd/display/dc/core/dc_surface.c
index a5e86f9b148f..87b3b03c3556 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_surface.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_surface.c
@@ -23,6 +23,8 @@
  *
  */
 
+#include <linux/mm.h>
+
 /* DC interface (public) */
 #include "dm_services.h"
 #include "dc.h"
diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h
index 44e4b0465587..7ec6884acee4 100644
--- a/drivers/gpu/drm/amd/display/dc/dc.h
+++ b/drivers/gpu/drm/amd/display/dc/dc.h
@@ -39,7 +39,7 @@
 #include "inc/hw/dmcu.h"
 #include "dml/display_mode_lib.h"
 
-#define DC_VER "3.2.27"
+#define DC_VER "3.2.32"
 
 #define MAX_SURFACES 3
 #define MAX_PLANES 6
@@ -205,6 +205,7 @@ struct dc_config {
 	bool disable_fractional_pwm;
 	bool allow_seamless_boot_optimization;
 	bool power_down_display_on_boot;
+	bool edp_not_connected;
 };
 
 enum visual_confirm {
@@ -366,6 +367,7 @@ struct dc_bounding_box_overrides {
 	int urgent_latency_ns;
 	int percent_of_ideal_drambw;
 	int dram_clock_change_latency_ns;
+	int min_dcfclk_mhz;
 };
 
 struct dc_state;
@@ -386,6 +388,8 @@ struct dc {
 	struct dc_state *current_state;
 	struct resource_pool *res_pool;
 
+	struct clk_mgr *clk_mgr;
+
 	/* Display Engine Clock levels */
 	struct dm_pp_clock_levels sclk_lvls;
 
@@ -540,12 +544,14 @@ struct dc_plane_status {
 union surface_update_flags {
 
 	struct {
+		uint32_t addr_update:1;
 		/* Medium updates */
 		uint32_t dcc_change:1;
 		uint32_t color_space_change:1;
 		uint32_t horizontal_mirror_change:1;
 		uint32_t per_pixel_alpha_change:1;
 		uint32_t global_alpha_change:1;
+		uint32_t sdr_white_level:1;
 		uint32_t rotation_change:1;
 		uint32_t swizzle_change:1;
 		uint32_t scaling_change:1;
diff --git a/drivers/gpu/drm/amd/display/dc/dc_helper.c b/drivers/gpu/drm/amd/display/dc/dc_helper.c
index 5e6c5eff49cf..30b2f9edd42f 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_helper.c
+++ b/drivers/gpu/drm/amd/display/dc/dc_helper.c
@@ -26,6 +26,9 @@
  *  Created on: Aug 30, 2016
  *      Author: agrodzov
  */
+
+#include <linux/delay.h>
+
 #include "dm_services.h"
 #include <stdarg.h>
 
@@ -297,7 +300,7 @@ void generic_reg_wait(const struct dc_context *ctx,
 	int i;
 
 	/* something is terribly wrong if time out is > 200ms. (5Hz) */
-	ASSERT(delay_between_poll_us * time_out_num_tries <= 200000);
+	ASSERT(delay_between_poll_us * time_out_num_tries <= 3000000);
 
 	for (i = 0; i <= time_out_num_tries; i++) {
 		if (i) {
diff --git a/drivers/gpu/drm/amd/display/dc/dc_hw_types.h b/drivers/gpu/drm/amd/display/dc/dc_hw_types.h
index da55d623647a..c91b8aad78c9 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_hw_types.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_hw_types.h
@@ -534,6 +534,7 @@ enum dc_color_space {
 	COLOR_SPACE_DOLBYVISION,
 	COLOR_SPACE_APPCTRL,
 	COLOR_SPACE_CUSTOMPOINTS,
+	COLOR_SPACE_YCBCR709_BLACK,
 };
 
 enum dc_dither_option {
diff --git a/drivers/gpu/drm/amd/display/dc/dc_link.h b/drivers/gpu/drm/amd/display/dc/dc_link.h
index 7b9429e30d82..094009127e25 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_link.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_link.h
@@ -75,6 +75,7 @@ struct dc_link {
 	enum dc_irq_source irq_source_hpd_rx;/* aka DP Short Pulse  */
 	bool is_hpd_filter_disabled;
 	bool dp_ss_off;
+	bool link_state_valid;
 
 	/* caps is the same as reported_link_cap. link_traing use
 	 * reported_link_cap. Will clean up.  TODO
diff --git a/drivers/gpu/drm/amd/display/dc/dc_stream.h b/drivers/gpu/drm/amd/display/dc/dc_stream.h
index 189bdab929a5..4da138ded8b7 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_stream.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_stream.h
@@ -172,7 +172,6 @@ struct dc_stream_update {
 	struct periodic_interrupt_config *periodic_interrupt0;
 	struct periodic_interrupt_config *periodic_interrupt1;
 
-	struct dc_crtc_timing_adjust *adjust;
 	struct dc_info_packet *vrr_infopacket;
 	struct dc_info_packet *vsc_infopacket;
 	struct dc_info_packet *vsp_infopacket;
diff --git a/drivers/gpu/drm/amd/display/dc/dc_types.h b/drivers/gpu/drm/amd/display/dc/dc_types.h
index 6c2a3d9a4c2e..92a670894c05 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_types.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_types.h
@@ -104,7 +104,7 @@ struct dc_context {
 
 
 #define DC_MAX_EDID_BUFFER_SIZE 1024
-#define EDID_BLOCK_SIZE 128
+#define DC_EDID_BLOCK_SIZE 128
 #define MAX_SURFACE_NUM 4
 #define NUM_PIXEL_FORMATS 10
 
diff --git a/drivers/gpu/drm/amd/display/dc/dce/Makefile b/drivers/gpu/drm/amd/display/dc/dce/Makefile
index 6d7b64a743ca..fdf3d8f87eee 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dce/Makefile
@@ -28,7 +28,7 @@
 
 DCE = dce_audio.o dce_stream_encoder.o dce_link_encoder.o dce_hwseq.o \
 dce_mem_input.o dce_clock_source.o dce_scl_filters.o dce_transform.o \
-dce_clk_mgr.o dce_opp.o dce_dmcu.o dce_abm.o dce_ipp.o dce_aux.o \
+dce_opp.o dce_dmcu.o dce_abm.o dce_ipp.o dce_aux.o \
 dce_i2c.o dce_i2c_hw.o dce_i2c_sw.o
 
 AMD_DAL_DCE = $(addprefix $(AMDDALPATH)/dc/dce/,$(DCE))
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c b/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c
index da96229db53a..f8903bcabe49 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c
@@ -23,6 +23,8 @@
  *
  */
 
+#include <linux/slab.h>
+
 #include "dce_abm.h"
 #include "dm_services.h"
 #include "reg_helper.h"
@@ -58,6 +60,9 @@ static bool dce_abm_set_pipe(struct abm *abm, uint32_t controller_id)
 	struct dce_abm *abm_dce = TO_DCE_ABM(abm);
 	uint32_t rampingBoundary = 0xFFFF;
 
+	if (abm->dmcu_is_running == false)
+		return true;
+
 	REG_WAIT(MASTER_COMM_CNTL_REG, MASTER_COMM_INTERRUPT, 0,
 			1, 80000);
 
@@ -302,6 +307,9 @@ static bool dce_abm_set_level(struct abm *abm, uint32_t level)
 {
 	struct dce_abm *abm_dce = TO_DCE_ABM(abm);
 
+	if (abm->dmcu_is_running == false)
+		return true;
+
 	REG_WAIT(MASTER_COMM_CNTL_REG, MASTER_COMM_INTERRUPT, 0,
 			1, 80000);
 
@@ -320,6 +328,9 @@ static bool dce_abm_immediate_disable(struct abm *abm)
 {
 	struct dce_abm *abm_dce = TO_DCE_ABM(abm);
 
+	if (abm->dmcu_is_running == false)
+		return true;
+
 	dce_abm_set_pipe(abm, MCP_DISABLE_ABM_IMMEDIATELY);
 
 	abm->stored_backlight_registers.BL_PWM_CNTL =
@@ -443,6 +454,7 @@ static void dce_abm_construct(
 	base->stored_backlight_registers.BL_PWM_CNTL2 = 0;
 	base->stored_backlight_registers.BL_PWM_PERIOD_CNTL = 0;
 	base->stored_backlight_registers.LVTMA_PWRSEQ_REF_DIV_BL_PWM_REF_DIV = 0;
+	base->dmcu_is_running = false;
 
 	abm_dce->regs = regs;
 	abm_dce->abm_shift = abm_shift;
@@ -473,6 +485,9 @@ void dce_abm_destroy(struct abm **abm)
 {
 	struct dce_abm *abm_dce = TO_DCE_ABM(*abm);
 
+	if (abm_dce->base.dmcu_is_running == true)
+		abm_dce->base.funcs->set_abm_immediate_disable(*abm);
+
 	kfree(abm_dce);
 	*abm = NULL;
 }
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_audio.c b/drivers/gpu/drm/amd/display/dc/dce/dce_audio.c
index 7f6d724686f1..9b078a71de2e 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_audio.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_audio.c
@@ -23,6 +23,8 @@
  *
  */
 
+#include <linux/slab.h>
+
 #include "reg_helper.h"
 #include "dce_audio.h"
 #include "dce/dce_11_0_d.h"
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c b/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c
index bd33c47183fc..f2295e780031 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c
@@ -23,6 +23,9 @@
  *
  */
 
+#include <linux/delay.h>
+#include <linux/slab.h>
+
 #include "dm_services.h"
 #include "core_types.h"
 #include "dce_aux.h"
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/dce/dce_clk_mgr.c
index 963686380738..29d69dfc9848 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_clk_mgr.c
@@ -23,6 +23,8 @@
  *
  */
 
+#include <linux/slab.h>
+
 #include "dce_clk_mgr.h"
 
 #include "reg_helper.h"
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c b/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c
index f70437aae8e0..8347be76c60a 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c
@@ -23,6 +23,8 @@
  *
  */
 
+#include <linux/slab.h>
+
 #include "dm_services.h"
 
 
@@ -33,6 +35,7 @@
 #include "include/logger_interface.h"
 
 #include "dce_clock_source.h"
+#include "clk_mgr.h"
 
 #include "reg_helper.h"
 
@@ -183,8 +186,8 @@ static bool calculate_fb_and_fractional_fb_divider(
 *RETURNS:
 * It fills the PLLSettings structure with PLL Dividers values
 * if calculated values are within required tolerance
-* It returns	- true if eror is within tolerance
-*		- false if eror is not within tolerance
+* It returns	- true if error is within tolerance
+*		- false if error is not within tolerance
 */
 static bool calc_fb_divider_checking_tolerance(
 		struct calc_pll_clock_source *calc_pll_cs,
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_dmcu.c b/drivers/gpu/drm/amd/display/dc/dce/dce_dmcu.c
index 818536eea00a..ddd30fc0d76b 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_dmcu.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_dmcu.c
@@ -23,6 +23,9 @@
  *
  */
 
+#include <linux/delay.h>
+#include <linux/slab.h>
+
 #include "core_types.h"
 #include "link_encoder.h"
 #include "dce_dmcu.h"
@@ -388,6 +391,9 @@ static bool dcn10_dmcu_init(struct dmcu *dmcu)
 		/* Set initialized ramping boundary value */
 		REG_WRITE(MASTER_COMM_DATA_REG1, 0xFFFF);
 
+		/* Set backlight ramping stepsize */
+		REG_WRITE(MASTER_COMM_DATA_REG2, abm_gain_stepsize);
+
 		/* Set command to initialize microcontroller */
 		REG_UPDATE(MASTER_COMM_CMD_REG, MASTER_COMM_CMD_REG_BYTE0,
 			MCP_INIT_DMCU);
@@ -813,6 +819,9 @@ void dce_dmcu_destroy(struct dmcu **dmcu)
 {
 	struct dce_dmcu *dmcu_dce = TO_DCE_DMCU(*dmcu);
 
+	if (dmcu_dce->base.dmcu_state == DMCU_RUNNING)
+		dmcu_dce->base.funcs->set_psr_enable(*dmcu, false, true);
+
 	kfree(dmcu_dce);
 	*dmcu = NULL;
 }
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_dmcu.h b/drivers/gpu/drm/amd/display/dc/dce/dce_dmcu.h
index 60ce56f60ae3..5bd0df55aa5d 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_dmcu.h
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_dmcu.h
@@ -263,4 +263,6 @@ struct dmcu *dcn10_dmcu_create(
 
 void dce_dmcu_destroy(struct dmcu **dmcu);
 
+static const uint32_t abm_gain_stepsize = 0x0060;
+
 #endif /* _DCE_ABM_H_ */
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_hw.c b/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_hw.c
index cd26161bcc4d..5ca558766d2e 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_hw.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_hw.c
@@ -22,6 +22,9 @@
  * Authors: AMD
  *
  */
+
+#include <linux/delay.h>
+
 #include "dce_i2c.h"
 #include "dce_i2c_hw.h"
 #include "reg_helper.h"
@@ -268,6 +271,8 @@ static bool setup_engine(
 	struct dce_i2c_hw *dce_i2c_hw)
 {
 	uint32_t i2c_setup_limit = I2C_SETUP_TIME_LIMIT_DCE;
+	/* we have checked I2c not used by DMCU, set SW use I2C REQ to 1 to indicate SW using it*/
+	REG_UPDATE(DC_I2C_ARBITRATION, DC_I2C_SW_USE_I2C_REG_REQ, 1);
 
 	if (dce_i2c_hw->setup_limit != 0)
 		i2c_setup_limit = dce_i2c_hw->setup_limit;
@@ -322,8 +327,6 @@ static void release_engine(
 
 	set_speed(dce_i2c_hw, dce_i2c_hw->original_speed);
 
-	/* Release I2C */
-	REG_UPDATE(DC_I2C_ARBITRATION, DC_I2C_SW_DONE_USING_I2C_REG, 1);
 
 	/* Reset HW engine */
 	{
@@ -343,6 +346,9 @@ static void release_engine(
 	/* HW I2c engine - clock gating feature */
 	if (!dce_i2c_hw->engine_keep_power_up_count)
 		REG_UPDATE_N(SETUP, 1, FN(SETUP, DC_I2C_DDC1_ENABLE), 0);
+	/* Release I2C after reset, so HW or DMCU could use it */
+	REG_UPDATE_2(DC_I2C_ARBITRATION, DC_I2C_SW_DONE_USING_I2C_REG, 1,
+		DC_I2C_SW_USE_I2C_REG_REQ, 0);
 
 }
 
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_hw.h b/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_hw.h
index 575500755b2e..f718e3d396f2 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_hw.h
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_hw.h
@@ -105,6 +105,7 @@ enum {
 	I2C_SF(DC_I2C_DDC1_SETUP, DC_I2C_DDC1_DATA_DRIVE_SEL, mask_sh),\
 	I2C_SF(DC_I2C_DDC1_SETUP, DC_I2C_DDC1_INTRA_TRANSACTION_DELAY, mask_sh),\
 	I2C_SF(DC_I2C_DDC1_SETUP, DC_I2C_DDC1_INTRA_BYTE_DELAY, mask_sh),\
+	I2C_SF(DC_I2C_ARBITRATION, DC_I2C_SW_USE_I2C_REG_REQ, mask_sh),\
 	I2C_SF(DC_I2C_ARBITRATION, DC_I2C_SW_DONE_USING_I2C_REG, mask_sh),\
 	I2C_SF(DC_I2C_ARBITRATION, DC_I2C_NO_QUEUED_SW_GO, mask_sh),\
 	I2C_SF(DC_I2C_ARBITRATION, DC_I2C_SW_PRIORITY, mask_sh),\
@@ -146,6 +147,7 @@ struct dce_i2c_shift {
 	uint8_t DC_I2C_DDC1_INTRA_TRANSACTION_DELAY;
 	uint8_t DC_I2C_DDC1_INTRA_BYTE_DELAY;
 	uint8_t DC_I2C_SW_DONE_USING_I2C_REG;
+	uint8_t DC_I2C_SW_USE_I2C_REG_REQ;
 	uint8_t DC_I2C_NO_QUEUED_SW_GO;
 	uint8_t DC_I2C_SW_PRIORITY;
 	uint8_t DC_I2C_SOFT_RESET;
@@ -184,6 +186,7 @@ struct dce_i2c_mask {
 	uint32_t DC_I2C_DDC1_INTRA_TRANSACTION_DELAY;
 	uint32_t DC_I2C_DDC1_INTRA_BYTE_DELAY;
 	uint32_t DC_I2C_SW_DONE_USING_I2C_REG;
+	uint32_t DC_I2C_SW_USE_I2C_REG_REQ;
 	uint32_t DC_I2C_NO_QUEUED_SW_GO;
 	uint32_t DC_I2C_SW_PRIORITY;
 	uint32_t DC_I2C_SOFT_RESET;
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_sw.c b/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_sw.c
index f0266694cb56..a5a11c251e25 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_sw.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_sw.c
@@ -22,6 +22,9 @@
  * Authors: AMD
  *
  */
+
+#include <linux/delay.h>
+
 #include "dce_i2c.h"
 #include "dce_i2c_sw.h"
 #include "include/gpio_service_interface.h"
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_ipp.c b/drivers/gpu/drm/amd/display/dc/dce/dce_ipp.c
index 5d9506b3d46b..ce30dbf579d4 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_ipp.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_ipp.c
@@ -23,6 +23,8 @@
  *
  */
 
+#include <linux/slab.h>
+
 #include "dce_ipp.h"
 #include "reg_helper.h"
 #include "dm_services.h"
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.c
index 314c04a915d2..8527cce81c6f 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.c
@@ -23,6 +23,9 @@
  *
  */
 
+#include <linux/delay.h>
+#include <linux/slab.h>
+
 #include "reg_helper.h"
 
 #include "core_types.h"
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_opp.c b/drivers/gpu/drm/amd/display/dc/dce/dce_opp.c
index 87093894ea9e..51081d9ae3fb 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_opp.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_opp.c
@@ -23,6 +23,8 @@
  *
  */
 
+#include <linux/slab.h>
+
 #include "dm_services.h"
 #include "basics/conversion.h"
 
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c
index 14309fe6f2e6..5e2b4d47c548 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c
@@ -23,6 +23,8 @@
  *
  */
 
+#include <linux/delay.h>
+
 #include "dc_bios_types.h"
 #include "dce_stream_encoder.h"
 #include "reg_helper.h"
@@ -418,6 +420,7 @@ static void dce110_stream_encoder_dp_set_stream_attribute(
 			break;
 		case COLOR_SPACE_YCBCR709:
 		case COLOR_SPACE_YCBCR709_LIMITED:
+		case COLOR_SPACE_YCBCR709_BLACK:
 			misc0 = misc0 | 0x18; /* bit3=1, bit4=1 */
 			misc1 = misc1 & ~0x80; /* bit7 = 0*/
 			dynamic_range_ycbcr = 1; /*bt709*/
@@ -1123,19 +1126,6 @@ union audio_cea_channels {
 	} channels;
 };
 
-struct audio_clock_info {
-	/* pixel clock frequency*/
-	uint32_t pixel_clock_in_10khz;
-	/* N - 32KHz audio */
-	uint32_t n_32khz;
-	/* CTS - 32KHz audio*/
-	uint32_t cts_32khz;
-	uint32_t n_44khz;
-	uint32_t cts_44khz;
-	uint32_t n_48khz;
-	uint32_t cts_48khz;
-};
-
 /* 25.2MHz/1.001*/
 /* 25.2MHz/1.001*/
 /* 25.2MHz*/
diff --git a/drivers/gpu/drm/amd/display/dc/dce100/dce100_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce100/dce100_hw_sequencer.c
index 87771676acac..799d36299c9b 100644
--- a/drivers/gpu/drm/amd/display/dc/dce100/dce100_hw_sequencer.c
+++ b/drivers/gpu/drm/amd/display/dc/dce100/dce100_hw_sequencer.c
@@ -25,6 +25,7 @@
 #include "dm_services.h"
 #include "dc.h"
 #include "core_types.h"
+#include "clk_mgr.h"
 #include "hw_sequencer.h"
 #include "dce100_hw_sequencer.h"
 #include "resource.h"
@@ -111,8 +112,8 @@ void dce100_prepare_bandwidth(
 {
 	dce110_set_safe_displaymarks(&context->res_ctx, dc->res_pool);
 
-	dc->res_pool->clk_mgr->funcs->update_clocks(
-			dc->res_pool->clk_mgr,
+	dc->clk_mgr->funcs->update_clocks(
+			dc->clk_mgr,
 			context,
 			false);
 }
@@ -123,8 +124,8 @@ void dce100_optimize_bandwidth(
 {
 	dce110_set_safe_displaymarks(&context->res_ctx, dc->res_pool);
 
-	dc->res_pool->clk_mgr->funcs->update_clocks(
-			dc->res_pool->clk_mgr,
+	dc->clk_mgr->funcs->update_clocks(
+			dc->clk_mgr,
 			context,
 			true);
 }
diff --git a/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c b/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c
index e938bf9986d3..6248c8455314 100644
--- a/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c
@@ -22,6 +22,9 @@
  * Authors: AMD
  *
  */
+
+#include <linux/slab.h>
+
 #include "dm_services.h"
 
 #include "link_encoder.h"
@@ -35,8 +38,6 @@
 #include "irq/dce110/irq_service_dce110.h"
 #include "dce/dce_link_encoder.h"
 #include "dce/dce_stream_encoder.h"
-
-#include "dce/dce_clk_mgr.h"
 #include "dce/dce_mem_input.h"
 #include "dce/dce_ipp.h"
 #include "dce/dce_transform.h"
@@ -137,19 +138,6 @@ static const struct dce110_timing_generator_offsets dce100_tg_offsets[] = {
 #define SRI(reg_name, block, id)\
 	.reg_name = mm ## block ## id ## _ ## reg_name
 
-
-static const struct clk_mgr_registers disp_clk_regs = {
-		CLK_COMMON_REG_LIST_DCE_BASE()
-};
-
-static const struct clk_mgr_shift disp_clk_shift = {
-		CLK_COMMON_MASK_SH_LIST_DCE_COMMON_BASE(__SHIFT)
-};
-
-static const struct clk_mgr_mask disp_clk_mask = {
-		CLK_COMMON_MASK_SH_LIST_DCE_COMMON_BASE(_MASK)
-};
-
 #define ipp_regs(id)\
 [id] = {\
 		IPP_DCE100_REG_LIST_DCE_BASE(id)\
@@ -746,9 +734,6 @@ static void destruct(struct dce110_resource_pool *pool)
 			dce_aud_destroy(&pool->base.audios[i]);
 	}
 
-	if (pool->base.clk_mgr != NULL)
-		dce_clk_mgr_destroy(&pool->base.clk_mgr);
-
 	if (pool->base.abm != NULL)
 				dce_abm_destroy(&pool->base.abm);
 
@@ -867,13 +852,55 @@ enum dc_status dce100_validate_plane(const struct dc_plane_state *plane_state, s
 	return DC_FAIL_SURFACE_VALIDATE;
 }
 
+struct stream_encoder *dce100_find_first_free_match_stream_enc_for_link(
+		struct resource_context *res_ctx,
+		const struct resource_pool *pool,
+		struct dc_stream_state *stream)
+{
+	int i;
+	int j = -1;
+	struct dc_link *link = stream->link;
+
+	for (i = 0; i < pool->stream_enc_count; i++) {
+		if (!res_ctx->is_stream_enc_acquired[i] &&
+				pool->stream_enc[i]) {
+			/* Store first available for MST second display
+			 * in daisy chain use case
+			 */
+			j = i;
+			if (pool->stream_enc[i]->id ==
+					link->link_enc->preferred_engine)
+				return pool->stream_enc[i];
+		}
+	}
+
+	/*
+	 * below can happen in cases when stream encoder is acquired:
+	 * 1) for second MST display in chain, so preferred engine already
+	 * acquired;
+	 * 2) for another link, which preferred engine already acquired by any
+	 * MST configuration.
+	 *
+	 * If signal is of DP type and preferred engine not found, return last available
+	 *
+	 * TODO - This is just a patch up and a generic solution is
+	 * required for non DP connectors.
+	 */
+
+	if (j >= 0 && link->connector_signal == SIGNAL_TYPE_DISPLAY_PORT)
+		return pool->stream_enc[j];
+
+	return NULL;
+}
+
 static const struct resource_funcs dce100_res_pool_funcs = {
 	.destroy = dce100_destroy_resource_pool,
 	.link_enc_create = dce100_link_encoder_create,
 	.validate_bandwidth = dce100_validate_bandwidth,
 	.validate_plane = dce100_validate_plane,
 	.add_stream_to_ctx = dce100_add_stream_to_ctx,
-	.validate_global = dce100_validate_global
+	.validate_global = dce100_validate_global,
+	.find_first_free_match_stream_enc_for_link = dce100_find_first_free_match_stream_enc_for_link
 };
 
 static bool construct(
@@ -932,16 +959,6 @@ static bool construct(
 		}
 	}
 
-	pool->base.clk_mgr = dce_clk_mgr_create(ctx,
-			&disp_clk_regs,
-			&disp_clk_shift,
-			&disp_clk_mask);
-	if (pool->base.clk_mgr == NULL) {
-		dm_error("DC: failed to create display clock!\n");
-		BREAK_TO_DEBUGGER();
-		goto res_create_fail;
-	}
-
 	pool->base.dmcu = dce_dmcu_create(ctx,
 			&dmcu_regs,
 			&dmcu_shift,
diff --git a/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.h b/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.h
index 2f366d66635d..fecab7c560f5 100644
--- a/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.h
+++ b/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.h
@@ -46,4 +46,9 @@ enum dc_status dce100_add_stream_to_ctx(
 		struct dc_state *new_ctx,
 		struct dc_stream_state *dc_stream);
 
+struct stream_encoder *dce100_find_first_free_match_stream_enc_for_link(
+		struct resource_context *res_ctx,
+		const struct resource_pool *pool,
+		struct dc_stream_state *stream);
+
 #endif /* DCE100_RESOURCE_H_ */
diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_compressor.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_compressor.c
index 7b23239d33fe..72b580a4eb85 100644
--- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_compressor.c
+++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_compressor.c
@@ -23,6 +23,9 @@
  *
  */
 
+#include <linux/delay.h>
+#include <linux/slab.h>
+
 #include "dm_services.h"
 
 #include "dce/dce_11_0_d.h"
diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
index 7ac50ab1b762..753c96f74af0 100644
--- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
+++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
@@ -22,6 +22,9 @@
  * Authors: AMD
  *
  */
+
+#include <linux/delay.h>
+
 #include "dm_services.h"
 #include "dc.h"
 #include "dc_bios_types.h"
@@ -46,6 +49,7 @@
 #include "link_encoder.h"
 #include "link_hwss.h"
 #include "clock_source.h"
+#include "clk_mgr.h"
 #include "abm.h"
 #include "audio.h"
 #include "reg_helper.h"
@@ -242,6 +246,9 @@ static void build_prescale_params(struct ipp_prescale_params *prescale_params,
 	prescale_params->mode = IPP_PRESCALE_MODE_FIXED_UNSIGNED;
 
 	switch (plane_state->format) {
+	case SURFACE_PIXEL_FORMAT_GRPH_RGB565:
+		prescale_params->scale = 0x2082;
+		break;
 	case SURFACE_PIXEL_FORMAT_GRPH_ARGB8888:
 	case SURFACE_PIXEL_FORMAT_GRPH_ABGR8888:
 		prescale_params->scale = 0x2020;
@@ -957,6 +964,9 @@ void dce110_enable_audio_stream(struct pipe_ctx *pipe_ctx)
 	struct pp_smu_funcs *pp_smu = NULL;
 	unsigned int i, num_audio = 1;
 
+	if (pipe_ctx->stream_res.audio && pipe_ctx->stream_res.audio->enabled == true)
+		return;
+
 	if (core_dc->res_pool->pp_smu)
 		pp_smu = core_dc->res_pool->pp_smu;
 
@@ -976,6 +986,8 @@ void dce110_enable_audio_stream(struct pipe_ctx *pipe_ctx)
 		/* TODO: audio should be per stream rather than per link */
 		pipe_ctx->stream_res.stream_enc->funcs->audio_mute_control(
 					pipe_ctx->stream_res.stream_enc, false);
+		if (pipe_ctx->stream_res.audio)
+			pipe_ctx->stream_res.audio->enabled = true;
 	}
 }
 
@@ -984,6 +996,9 @@ void dce110_disable_audio_stream(struct pipe_ctx *pipe_ctx, int option)
 	struct dc *dc = pipe_ctx->stream->ctx->dc;
 	struct pp_smu_funcs *pp_smu = NULL;
 
+	if (pipe_ctx->stream_res.audio && pipe_ctx->stream_res.audio->enabled == false)
+		return;
+
 	pipe_ctx->stream_res.stream_enc->funcs->audio_mute_control(
 			pipe_ctx->stream_res.stream_enc, true);
 	if (pipe_ctx->stream_res.audio) {
@@ -1017,6 +1032,8 @@ void dce110_disable_audio_stream(struct pipe_ctx *pipe_ctx, int option)
 		/* dal_audio_disable_azalia_audio_jack_presence(stream->audio,
 		 * stream->stream_engine_id);
 		 */
+		if (pipe_ctx->stream_res.audio)
+			pipe_ctx->stream_res.audio->enabled = false;
 	}
 }
 
@@ -1296,6 +1313,11 @@ static enum dc_status dce110_enable_stream_timing(
 		pipe_ctx->stream_res.tg->funcs->program_timing(
 				pipe_ctx->stream_res.tg,
 				&stream->timing,
+				0,
+				0,
+				0,
+				0,
+				pipe_ctx->stream->signal,
 				true);
 	}
 
@@ -1488,10 +1510,11 @@ static void disable_vga_and_power_gate_all_controllers(
 	}
 }
 
-static struct dc_link *get_link_for_edp(struct dc *dc)
+static struct dc_link *get_edp_link(struct dc *dc)
 {
 	int i;
 
+	// report any eDP links, even unconnected DDI's
 	for (i = 0; i < dc->link_count; i++) {
 		if (dc->links[i]->connector_signal == SIGNAL_TYPE_EDP)
 			return dc->links[i];
@@ -1499,23 +1522,13 @@ static struct dc_link *get_link_for_edp(struct dc *dc)
 	return NULL;
 }
 
-static struct dc_link *get_link_for_edp_to_turn_off(
+static struct dc_link *get_edp_link_with_sink(
 		struct dc *dc,
 		struct dc_state *context)
 {
 	int i;
 	struct dc_link *link = NULL;
 
-	/* check if eDP panel is suppose to be set mode, if yes, no need to disable */
-	for (i = 0; i < context->stream_count; i++) {
-		if (context->streams[i]->signal == SIGNAL_TYPE_EDP) {
-			if (context->streams[i]->dpms_off == true)
-				return context->streams[i]->sink->link;
-			else
-				return NULL;
-		}
-	}
-
 	/* check if there is an eDP panel not in use */
 	for (i = 0; i < dc->link_count; i++) {
 		if (dc->links[i]->local_sink &&
@@ -1538,59 +1551,53 @@ static struct dc_link *get_link_for_edp_to_turn_off(
 void dce110_enable_accelerated_mode(struct dc *dc, struct dc_state *context)
 {
 	int i;
-	struct dc_link *edp_link_to_turnoff = NULL;
-	struct dc_link *edp_link = get_link_for_edp(dc);
-	bool can_edp_fast_boot_optimize = false;
-	bool apply_edp_fast_boot_optimization = false;
+	struct dc_link *edp_link_with_sink = get_edp_link_with_sink(dc, context);
+	struct dc_link *edp_link = get_edp_link(dc);
+	bool can_apply_edp_fast_boot = false;
 	bool can_apply_seamless_boot = false;
 
-	for (i = 0; i < context->stream_count; i++) {
-		if (context->streams[i]->apply_seamless_boot_optimization) {
-			can_apply_seamless_boot = true;
-			break;
-		}
-	}
-
 	if (dc->hwss.init_pipes)
 		dc->hwss.init_pipes(dc, context);
 
-	if (edp_link) {
-		/* this seems to cause blank screens on DCE8 */
-		if ((dc->ctx->dce_version == DCE_VERSION_8_0) ||
-		    (dc->ctx->dce_version == DCE_VERSION_8_1) ||
-		    (dc->ctx->dce_version == DCE_VERSION_8_3))
-			can_edp_fast_boot_optimize = false;
-		else
-			can_edp_fast_boot_optimize =
-				edp_link->link_enc->funcs->is_dig_enabled(edp_link->link_enc);
+	// Check fastboot support, disable on DCE8 because of blank screens
+	if (edp_link && dc->ctx->dce_version != DCE_VERSION_8_0 &&
+		    dc->ctx->dce_version != DCE_VERSION_8_1 &&
+		    dc->ctx->dce_version != DCE_VERSION_8_3) {
+
+		// enable fastboot if backend is enabled on eDP
+		if (edp_link->link_enc->funcs->is_dig_enabled(edp_link->link_enc)) {
+			/* Find eDP stream and set optimization flag */
+			for (i = 0; i < context->stream_count; i++) {
+				if (context->streams[i]->signal == SIGNAL_TYPE_EDP) {
+					context->streams[i]->apply_edp_fast_boot_optimization = true;
+					can_apply_edp_fast_boot = true;
+					break;
+				}
+			}
+		}
 	}
 
-	if (can_edp_fast_boot_optimize)
-		edp_link_to_turnoff = get_link_for_edp_to_turn_off(dc, context);
-
-	/* if OS doesn't light up eDP and eDP link is available, we want to disable
-	 * If resume from S4/S5, should optimization.
-	 */
-	if (can_edp_fast_boot_optimize && !edp_link_to_turnoff) {
-		/* Find eDP stream and set optimization flag */
-		for (i = 0; i < context->stream_count; i++) {
-			if (context->streams[i]->signal == SIGNAL_TYPE_EDP) {
-				context->streams[i]->apply_edp_fast_boot_optimization = true;
-				apply_edp_fast_boot_optimization = true;
-			}
+	// Check seamless boot support
+	for (i = 0; i < context->stream_count; i++) {
+		if (context->streams[i]->apply_seamless_boot_optimization) {
+			can_apply_seamless_boot = true;
+			break;
 		}
 	}
 
-	if (!apply_edp_fast_boot_optimization && !can_apply_seamless_boot) {
-		if (edp_link_to_turnoff) {
+	/* eDP should not have stream in resume from S4 and so even with VBios post
+	 * it should get turned off
+	 */
+	if (!can_apply_edp_fast_boot && !can_apply_seamless_boot) {
+		if (edp_link_with_sink) {
 			/*turn off backlight before DP_blank and encoder powered down*/
-			dc->hwss.edp_backlight_control(edp_link_to_turnoff, false);
+			dc->hwss.edp_backlight_control(edp_link_with_sink, false);
 		}
 		/*resume from S3, no vbios posting, no need to power down again*/
 		power_down_all_hw_blocks(dc);
 		disable_vga_and_power_gate_all_controllers(dc);
-		if (edp_link_to_turnoff)
-			dc->hwss.edp_power_control(edp_link_to_turnoff, false);
+		if (edp_link_with_sink)
+			dc->hwss.edp_power_control(edp_link_with_sink, false);
 	}
 	bios_set_scratch_acc_mode_change(dc->ctx->dc_bios);
 }
@@ -2030,8 +2037,10 @@ enum dc_status dce110_apply_ctx_to_hw(
 		if (pipe_ctx->stream == NULL)
 			continue;
 
-		if (pipe_ctx->stream == pipe_ctx_old->stream)
+		if (pipe_ctx->stream == pipe_ctx_old->stream &&
+			pipe_ctx->stream->link->link_state_valid) {
 			continue;
+		}
 
 		if (pipe_ctx_old->stream && !pipe_need_reprogram(pipe_ctx_old, pipe_ctx))
 			continue;
@@ -2318,6 +2327,7 @@ static void init_hw(struct dc *dc)
 	struct dc_bios *bp;
 	struct transform *xfm;
 	struct abm *abm;
+	struct dmcu *dmcu;
 
 	bp = dc->ctx->dc_bios;
 	for (i = 0; i < dc->res_pool->pipe_count; i++) {
@@ -2345,9 +2355,6 @@ static void init_hw(struct dc *dc)
 		 * default signal on connector). */
 		struct dc_link *link = dc->links[i];
 
-		if (link->link_enc->connector.id == CONNECTOR_ID_EDP)
-			dc->hwss.edp_power_control(link, true);
-
 		link->link_enc->funcs->hw_init(link->link_enc);
 	}
 
@@ -2373,6 +2380,10 @@ static void init_hw(struct dc *dc)
 		abm->funcs->abm_init(abm);
 	}
 
+	dmcu = dc->res_pool->dmcu;
+	if (dmcu != NULL && abm != NULL)
+		abm->dmcu_is_running = dmcu->funcs->is_dmcu_initialized(dmcu);
+
 	if (dc->fbc_compressor)
 		dc->fbc_compressor->funcs->power_up_fbc(dc->fbc_compressor);
 
@@ -2383,7 +2394,7 @@ void dce110_prepare_bandwidth(
 		struct dc *dc,
 		struct dc_state *context)
 {
-	struct clk_mgr *dccg = dc->res_pool->clk_mgr;
+	struct clk_mgr *dccg = dc->clk_mgr;
 
 	dce110_set_safe_displaymarks(&context->res_ctx, dc->res_pool);
 
@@ -2397,7 +2408,7 @@ void dce110_optimize_bandwidth(
 		struct dc *dc,
 		struct dc_state *context)
 {
-	struct clk_mgr *dccg = dc->res_pool->clk_mgr;
+	struct clk_mgr *dccg = dc->clk_mgr;
 
 	dce110_set_displaymarks(dc, context);
 
diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_opp_regamma_v.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_opp_regamma_v.c
index 9b65b77e8823..34c5e3c7c6d2 100644
--- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_opp_regamma_v.c
+++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_opp_regamma_v.c
@@ -23,6 +23,8 @@
  *
  */
 
+#include <linux/delay.h>
+
 #include "dm_services.h"
 
 /* include DCE11 register header files */
diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c
index dcd04e9ea76b..764329264c3b 100644
--- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c
@@ -23,6 +23,8 @@
  *
  */
 
+#include <linux/slab.h>
+
 #include "dm_services.h"
 
 #include "link_encoder.h"
@@ -30,8 +32,6 @@
 
 #include "resource.h"
 #include "dce110/dce110_resource.h"
-
-#include "dce/dce_clk_mgr.h"
 #include "include/irq_service_interface.h"
 #include "dce/dce_audio.h"
 #include "dce110/dce110_timing_generator.h"
@@ -149,18 +149,6 @@ static const struct dce110_timing_generator_offsets dce110_tg_offsets[] = {
 #define SRI(reg_name, block, id)\
 	.reg_name = mm ## block ## id ## _ ## reg_name
 
-static const struct clk_mgr_registers disp_clk_regs = {
-		CLK_COMMON_REG_LIST_DCE_BASE()
-};
-
-static const struct clk_mgr_shift disp_clk_shift = {
-		CLK_COMMON_MASK_SH_LIST_DCE_COMMON_BASE(__SHIFT)
-};
-
-static const struct clk_mgr_mask disp_clk_mask = {
-		CLK_COMMON_MASK_SH_LIST_DCE_COMMON_BASE(_MASK)
-};
-
 static const struct dce_dmcu_registers dmcu_regs = {
 		DMCU_DCE110_COMMON_REG_LIST()
 };
@@ -811,9 +799,6 @@ static void destruct(struct dce110_resource_pool *pool)
 	if (pool->base.dmcu != NULL)
 		dce_dmcu_destroy(&pool->base.dmcu);
 
-	if (pool->base.clk_mgr != NULL)
-		dce_clk_mgr_destroy(&pool->base.clk_mgr);
-
 	if (pool->base.irqs != NULL) {
 		dal_irq_service_destroy(&pool->base.irqs);
 	}
@@ -1097,6 +1082,11 @@ static struct pipe_ctx *dce110_acquire_underlay(
 
 		pipe_ctx->stream_res.tg->funcs->program_timing(pipe_ctx->stream_res.tg,
 				&stream->timing,
+				0,
+				0,
+				0,
+				0,
+				pipe_ctx->stream->signal,
 				false);
 
 		pipe_ctx->stream_res.tg->funcs->enable_advanced_request(
@@ -1129,6 +1119,38 @@ static void dce110_destroy_resource_pool(struct resource_pool **pool)
 	*pool = NULL;
 }
 
+struct stream_encoder *dce110_find_first_free_match_stream_enc_for_link(
+		struct resource_context *res_ctx,
+		const struct resource_pool *pool,
+		struct dc_stream_state *stream)
+{
+	int i;
+	int j = -1;
+	struct dc_link *link = stream->link;
+
+	for (i = 0; i < pool->stream_enc_count; i++) {
+		if (!res_ctx->is_stream_enc_acquired[i] &&
+				pool->stream_enc[i]) {
+			/* Store first available for MST second display
+			 * in daisy chain use case
+			 */
+			j = i;
+			if (pool->stream_enc[i]->id ==
+					link->link_enc->preferred_engine)
+				return pool->stream_enc[i];
+		}
+	}
+
+	/*
+	 * For CZ and later, we can allow DIG FE and BE to differ for all display types
+	 */
+
+	if (j >= 0)
+		return pool->stream_enc[j];
+
+	return NULL;
+}
+
 
 static const struct resource_funcs dce110_res_pool_funcs = {
 	.destroy = dce110_destroy_resource_pool,
@@ -1137,7 +1159,8 @@ static const struct resource_funcs dce110_res_pool_funcs = {
 	.validate_plane = dce110_validate_plane,
 	.acquire_idle_pipe_for_layer = dce110_acquire_underlay,
 	.add_stream_to_ctx = dce110_add_stream_to_ctx,
-	.validate_global = dce110_validate_global
+	.validate_global = dce110_validate_global,
+	.find_first_free_match_stream_enc_for_link = dce110_find_first_free_match_stream_enc_for_link
 };
 
 static bool underlay_create(struct dc_context *ctx, struct resource_pool *pool)
@@ -1308,16 +1331,6 @@ static bool construct(
 		}
 	}
 
-	pool->base.clk_mgr = dce110_clk_mgr_create(ctx,
-			&disp_clk_regs,
-			&disp_clk_shift,
-			&disp_clk_mask);
-	if (pool->base.clk_mgr == NULL) {
-		dm_error("DC: failed to create display clock!\n");
-		BREAK_TO_DEBUGGER();
-		goto res_create_fail;
-	}
-
 	pool->base.dmcu = dce_dmcu_create(ctx,
 			&dmcu_regs,
 			&dmcu_shift,
diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.h b/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.h
index e5f168c1f8c8..aa4531e0800e 100644
--- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.h
+++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.h
@@ -45,5 +45,10 @@ struct resource_pool *dce110_create_resource_pool(
 	struct dc *dc,
 	struct hw_asic_id asic_id);
 
+struct stream_encoder *dce110_find_first_free_match_stream_enc_for_link(
+		struct resource_context *res_ctx,
+		const struct resource_pool *pool,
+		struct dc_stream_state *stream);
+
 #endif /* __DC_RESOURCE_DCE110_H__ */
 
diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.c
index 1b2fe0df347f..5f7c2c5641c4 100644
--- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.c
+++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.c
@@ -1952,6 +1952,11 @@ void dce110_tg_set_overscan_color(struct timing_generator *tg,
 
 void dce110_tg_program_timing(struct timing_generator *tg,
 	const struct dc_crtc_timing *timing,
+	int vready_offset,
+	int vstartup_start,
+	int vupdate_offset,
+	int vupdate_width,
+	const enum signal_type signal,
 	bool use_vbios)
 {
 	if (use_vbios)
diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.h b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.h
index 734d4965dab1..768ccf27ada9 100644
--- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.h
+++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.h
@@ -256,6 +256,11 @@ void dce110_tg_set_overscan_color(struct timing_generator *tg,
 
 void dce110_tg_program_timing(struct timing_generator *tg,
 	const struct dc_crtc_timing *timing,
+	int vready_offset,
+	int vstartup_start,
+	int vupdate_offset,
+	int vupdate_width,
+	const enum signal_type signal,
 	bool use_vbios);
 
 bool dce110_tg_is_blanked(struct timing_generator *tg);
diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator_v.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator_v.c
index a3cef60380ed..a13a2f58944e 100644
--- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator_v.c
+++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator_v.c
@@ -435,6 +435,11 @@ static void dce110_timing_generator_v_set_blank(struct timing_generator *tg,
 
 static void dce110_timing_generator_v_program_timing(struct timing_generator *tg,
 	const struct dc_crtc_timing *timing,
+	int vready_offset,
+	int vstartup_start,
+	int vupdate_offset,
+	int vupdate_width,
+	const enum signal_type signal,
 	bool use_vbios)
 {
 	if (use_vbios)
diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_transform_v.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_transform_v.c
index aa8d6b10d2c3..b1aaab5590cc 100644
--- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_transform_v.c
+++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_transform_v.c
@@ -23,6 +23,8 @@
  *
  */
 
+#include <linux/delay.h>
+
 #include "dce110_transform_v.h"
 #include "dm_services.h"
 #include "dc.h"
diff --git a/drivers/gpu/drm/amd/display/dc/dce112/dce112_compressor.c b/drivers/gpu/drm/amd/display/dc/dce112/dce112_compressor.c
index faae12cf7968..51cb45d8b9ab 100644
--- a/drivers/gpu/drm/amd/display/dc/dce112/dce112_compressor.c
+++ b/drivers/gpu/drm/amd/display/dc/dce112/dce112_compressor.c
@@ -23,6 +23,9 @@
  *
  */
 
+#include <linux/delay.h>
+#include <linux/slab.h>
+
 #include "dm_services.h"
 
 #include "dce/dce_11_2_d.h"
diff --git a/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c b/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c
index a480b15f6885..c6136e0ed1a4 100644
--- a/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c
@@ -23,6 +23,8 @@
  *
  */
 
+#include <linux/slab.h>
+
 #include "dm_services.h"
 
 #include "link_encoder.h"
@@ -34,8 +36,6 @@
 #include "dce110/dce110_timing_generator.h"
 
 #include "irq/dce110/irq_service_dce110.h"
-
-#include "dce/dce_clk_mgr.h"
 #include "dce/dce_mem_input.h"
 #include "dce/dce_transform.h"
 #include "dce/dce_link_encoder.h"
@@ -148,19 +148,6 @@ static const struct dce110_timing_generator_offsets dce112_tg_offsets[] = {
 #define SRI(reg_name, block, id)\
 	.reg_name = mm ## block ## id ## _ ## reg_name
 
-
-static const struct clk_mgr_registers disp_clk_regs = {
-		CLK_COMMON_REG_LIST_DCE_BASE()
-};
-
-static const struct clk_mgr_shift disp_clk_shift = {
-		CLK_COMMON_MASK_SH_LIST_DCE_COMMON_BASE(__SHIFT)
-};
-
-static const struct clk_mgr_mask disp_clk_mask = {
-		CLK_COMMON_MASK_SH_LIST_DCE_COMMON_BASE(_MASK)
-};
-
 static const struct dce_dmcu_registers dmcu_regs = {
 		DMCU_DCE110_COMMON_REG_LIST()
 };
@@ -774,9 +761,6 @@ static void destruct(struct dce110_resource_pool *pool)
 	if (pool->base.dmcu != NULL)
 		dce_dmcu_destroy(&pool->base.dmcu);
 
-	if (pool->base.clk_mgr != NULL)
-		dce_clk_mgr_destroy(&pool->base.clk_mgr);
-
 	if (pool->base.irqs != NULL) {
 		dal_irq_service_destroy(&pool->base.irqs);
 	}
@@ -993,7 +977,8 @@ static const struct resource_funcs dce112_res_pool_funcs = {
 	.validate_bandwidth = dce112_validate_bandwidth,
 	.validate_plane = dce100_validate_plane,
 	.add_stream_to_ctx = dce112_add_stream_to_ctx,
-	.validate_global = dce112_validate_global
+	.validate_global = dce112_validate_global,
+	.find_first_free_match_stream_enc_for_link = dce110_find_first_free_match_stream_enc_for_link
 };
 
 static void bw_calcs_data_update_from_pplib(struct dc *dc)
@@ -1224,16 +1209,6 @@ static bool construct(
 		}
 	}
 
-	pool->base.clk_mgr = dce112_clk_mgr_create(ctx,
-			&disp_clk_regs,
-			&disp_clk_shift,
-			&disp_clk_mask);
-	if (pool->base.clk_mgr == NULL) {
-		dm_error("DC: failed to create display clock!\n");
-		BREAK_TO_DEBUGGER();
-		goto res_create_fail;
-	}
-
 	pool->base.dmcu = dce_dmcu_create(ctx,
 			&dmcu_regs,
 			&dmcu_shift,
diff --git a/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c b/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c
index 6d49c7143c67..54be7ab370df 100644
--- a/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c
@@ -24,6 +24,8 @@
  *
  */
 
+#include <linux/slab.h>
+
 #include "dm_services.h"
 
 
@@ -46,8 +48,7 @@
 #include "dce110/dce110_hw_sequencer.h"
 #include "dce120/dce120_hw_sequencer.h"
 #include "dce/dce_transform.h"
-
-#include "dce/dce_clk_mgr.h"
+#include "clk_mgr.h"
 #include "dce/dce_audio.h"
 #include "dce/dce_link_encoder.h"
 #include "dce/dce_stream_encoder.h"
@@ -480,7 +481,7 @@ static const struct dc_debug_options debug_defaults = {
 		.disable_clock_gate = true,
 };
 
-struct clock_source *dce120_clock_source_create(
+static struct clock_source *dce120_clock_source_create(
 	struct dc_context *ctx,
 	struct dc_bios *bios,
 	enum clock_source_id id,
@@ -503,14 +504,14 @@ struct clock_source *dce120_clock_source_create(
 	return NULL;
 }
 
-void dce120_clock_source_destroy(struct clock_source **clk_src)
+static void dce120_clock_source_destroy(struct clock_source **clk_src)
 {
 	kfree(TO_DCE110_CLK_SRC(*clk_src));
 	*clk_src = NULL;
 }
 
 
-bool dce120_hw_sequencer_create(struct dc *dc)
+static bool dce120_hw_sequencer_create(struct dc *dc)
 {
 	/* All registers used by dce11.2 match those in dce11 in offset and
 	 * structure
@@ -609,9 +610,6 @@ static void destruct(struct dce110_resource_pool *pool)
 
 	if (pool->base.dmcu != NULL)
 		dce_dmcu_destroy(&pool->base.dmcu);
-
-	if (pool->base.clk_mgr != NULL)
-		dce_clk_mgr_destroy(&pool->base.clk_mgr);
 }
 
 static void read_dce_straps(
@@ -837,7 +835,8 @@ static const struct resource_funcs dce120_res_pool_funcs = {
 	.link_enc_create = dce120_link_encoder_create,
 	.validate_bandwidth = dce112_validate_bandwidth,
 	.validate_plane = dce100_validate_plane,
-	.add_stream_to_ctx = dce112_add_stream_to_ctx
+	.add_stream_to_ctx = dce112_add_stream_to_ctx,
+	.find_first_free_match_stream_enc_for_link = dce110_find_first_free_match_stream_enc_for_link
 };
 
 static void bw_calcs_data_update_from_pplib(struct dc *dc)
@@ -1047,17 +1046,6 @@ static bool construct(
 		}
 	}
 
-	if (is_vg20)
-		pool->base.clk_mgr = dce121_clk_mgr_create(ctx);
-	else
-		pool->base.clk_mgr = dce120_clk_mgr_create(ctx);
-
-	if (pool->base.clk_mgr == NULL) {
-		dm_error("DC: failed to create display clock!\n");
-		BREAK_TO_DEBUGGER();
-		goto dccg_create_fail;
-	}
-
 	pool->base.dmcu = dce_dmcu_create(ctx,
 			&dmcu_regs,
 			&dmcu_shift,
@@ -1185,7 +1173,7 @@ static bool construct(
 	 * here.
 	 */
 	if (is_vg20 && dce121_xgmi_enabled(dc->hwseq))
-		dce121_clock_patch_xgmi_ss_info(pool->base.clk_mgr);
+		dce121_clock_patch_xgmi_ss_info(dc->clk_mgr);
 
 	/* Create hardware sequencer */
 	if (!dce120_hw_sequencer_create(dc))
@@ -1204,7 +1192,6 @@ static bool construct(
 
 irqs_create_fail:
 controller_create_fail:
-dccg_create_fail:
 clk_src_create_fail:
 res_create_fail:
 
diff --git a/drivers/gpu/drm/amd/display/dc/dce120/dce120_timing_generator.c b/drivers/gpu/drm/amd/display/dc/dce120/dce120_timing_generator.c
index 04b866f0fa1f..098e56962f2a 100644
--- a/drivers/gpu/drm/amd/display/dc/dce120/dce120_timing_generator.c
+++ b/drivers/gpu/drm/amd/display/dc/dce120/dce120_timing_generator.c
@@ -734,8 +734,13 @@ void dce120_tg_set_overscan_color(struct timing_generator *tg,
 		CRTC_OVERSCAN_COLOR_RED, overscan_color->color_r_cr);
 }
 
-void dce120_tg_program_timing(struct timing_generator *tg,
+static void dce120_tg_program_timing(struct timing_generator *tg,
 	const struct dc_crtc_timing *timing,
+	int vready_offset,
+	int vstartup_start,
+	int vupdate_offset,
+	int vupdate_width,
+	const enum signal_type signal,
 	bool use_vbios)
 {
 	if (use_vbios)
@@ -1109,6 +1114,92 @@ static bool dce120_arm_vert_intr(
 	return true;
 }
 
+
+static bool dce120_is_tg_enabled(struct timing_generator *tg)
+{
+	struct dce110_timing_generator *tg110 = DCE110TG_FROM_TG(tg);
+	uint32_t value, field;
+
+	value = dm_read_reg_soc15(tg->ctx, mmCRTC0_CRTC_CONTROL,
+				  tg110->offsets.crtc);
+	field = get_reg_field_value(value, CRTC0_CRTC_CONTROL,
+				    CRTC_CURRENT_MASTER_EN_STATE);
+
+	return field == 1;
+}
+
+static bool dce120_configure_crc(struct timing_generator *tg,
+				 const struct crc_params *params)
+{
+	struct dce110_timing_generator *tg110 = DCE110TG_FROM_TG(tg);
+
+	/* Cannot configure crc on a CRTC that is disabled */
+	if (!dce120_is_tg_enabled(tg))
+		return false;
+
+	/* First, disable CRC before we configure it. */
+	dm_write_reg_soc15(tg->ctx, mmCRTC0_CRTC_CRC_CNTL,
+			   tg110->offsets.crtc, 0);
+
+	if (!params->enable)
+		return true;
+
+	/* Program frame boundaries */
+	/* Window A x axis start and end. */
+	CRTC_REG_UPDATE_2(CRTC0_CRTC_CRC0_WINDOWA_X_CONTROL,
+			  CRTC_CRC0_WINDOWA_X_START, params->windowa_x_start,
+			  CRTC_CRC0_WINDOWA_X_END, params->windowa_x_end);
+
+	/* Window A y axis start and end. */
+	CRTC_REG_UPDATE_2(CRTC0_CRTC_CRC0_WINDOWA_Y_CONTROL,
+			  CRTC_CRC0_WINDOWA_Y_START, params->windowa_y_start,
+			  CRTC_CRC0_WINDOWA_Y_END, params->windowa_y_end);
+
+	/* Window B x axis start and end. */
+	CRTC_REG_UPDATE_2(CRTC0_CRTC_CRC0_WINDOWB_X_CONTROL,
+			  CRTC_CRC0_WINDOWB_X_START, params->windowb_x_start,
+			  CRTC_CRC0_WINDOWB_X_END, params->windowb_x_end);
+
+	/* Window B y axis start and end. */
+	CRTC_REG_UPDATE_2(CRTC0_CRTC_CRC0_WINDOWB_Y_CONTROL,
+			  CRTC_CRC0_WINDOWB_Y_START, params->windowb_y_start,
+			  CRTC_CRC0_WINDOWB_Y_END, params->windowb_y_end);
+
+	/* Set crc mode and selection, and enable. Only using CRC0*/
+	CRTC_REG_UPDATE_3(CRTC0_CRTC_CRC_CNTL,
+			  CRTC_CRC_EN, params->continuous_mode ? 1 : 0,
+			  CRTC_CRC0_SELECT, params->selection,
+			  CRTC_CRC_EN, 1);
+
+	return true;
+}
+
+static bool dce120_get_crc(struct timing_generator *tg, uint32_t *r_cr,
+			   uint32_t *g_y, uint32_t *b_cb)
+{
+	struct dce110_timing_generator *tg110 = DCE110TG_FROM_TG(tg);
+	uint32_t value, field;
+
+	value = dm_read_reg_soc15(tg->ctx, mmCRTC0_CRTC_CRC_CNTL,
+				  tg110->offsets.crtc);
+	field = get_reg_field_value(value, CRTC0_CRTC_CRC_CNTL, CRTC_CRC_EN);
+
+	/* Early return if CRC is not enabled for this CRTC */
+	if (!field)
+		return false;
+
+	value = dm_read_reg_soc15(tg->ctx, mmCRTC0_CRTC_CRC0_DATA_RG,
+				  tg110->offsets.crtc);
+	*r_cr = get_reg_field_value(value, CRTC0_CRTC_CRC0_DATA_RG, CRC0_R_CR);
+	*g_y = get_reg_field_value(value, CRTC0_CRTC_CRC0_DATA_RG, CRC0_G_Y);
+
+	value = dm_read_reg_soc15(tg->ctx, mmCRTC0_CRTC_CRC0_DATA_B,
+				  tg110->offsets.crtc);
+	*b_cb = get_reg_field_value(value, CRTC0_CRTC_CRC0_DATA_B, CRC0_B_CB);
+
+	return true;
+}
+
 static const struct timing_generator_funcs dce120_tg_funcs = {
 		.validate_timing = dce120_tg_validate_timing,
 		.program_timing = dce120_tg_program_timing,
@@ -1140,6 +1231,9 @@ static const struct timing_generator_funcs dce120_tg_funcs = {
 		.set_static_screen_control = dce120_timing_generator_set_static_screen_control,
 		.set_test_pattern = dce120_timing_generator_set_test_pattern,
 		.arm_vert_intr = dce120_arm_vert_intr,
+		.is_tg_enabled = dce120_is_tg_enabled,
+		.configure_crc = dce120_configure_crc,
+		.get_crc = dce120_get_crc,
 };
 
 
diff --git a/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c b/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c
index 27d0cc394963..860a524ebcfa 100644
--- a/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c
@@ -23,6 +23,8 @@
  *
  */
 
+#include <linux/slab.h>
+
 #include "dce/dce_8_0_d.h"
 #include "dce/dce_8_0_sh_mask.h"
 
@@ -37,7 +39,6 @@
 #include "dce110/dce110_timing_generator.h"
 #include "dce110/dce110_resource.h"
 #include "dce80/dce80_timing_generator.h"
-#include "dce/dce_clk_mgr.h"
 #include "dce/dce_mem_input.h"
 #include "dce/dce_link_encoder.h"
 #include "dce/dce_stream_encoder.h"
@@ -154,19 +155,6 @@ static const struct dce110_timing_generator_offsets dce80_tg_offsets[] = {
 #define SRI(reg_name, block, id)\
 	.reg_name = mm ## block ## id ## _ ## reg_name
 
-
-static const struct clk_mgr_registers disp_clk_regs = {
-		CLK_COMMON_REG_LIST_DCE_BASE()
-};
-
-static const struct clk_mgr_shift disp_clk_shift = {
-		CLK_COMMON_MASK_SH_LIST_DCE_COMMON_BASE(__SHIFT)
-};
-
-static const struct clk_mgr_mask disp_clk_mask = {
-		CLK_COMMON_MASK_SH_LIST_DCE_COMMON_BASE(_MASK)
-};
-
 #define ipp_regs(id)\
 [id] = {\
 		IPP_COMMON_REG_LIST_DCE_BASE(id)\
@@ -802,9 +790,6 @@ static void destruct(struct dce110_resource_pool *pool)
 		}
 	}
 
-	if (pool->base.clk_mgr != NULL)
-		dce_clk_mgr_destroy(&pool->base.clk_mgr);
-
 	if (pool->base.irqs != NULL) {
 		dal_irq_service_destroy(&pool->base.irqs);
 	}
@@ -880,7 +865,8 @@ static const struct resource_funcs dce80_res_pool_funcs = {
 	.validate_bandwidth = dce80_validate_bandwidth,
 	.validate_plane = dce100_validate_plane,
 	.add_stream_to_ctx = dce100_add_stream_to_ctx,
-	.validate_global = dce80_validate_global
+	.validate_global = dce80_validate_global,
+	.find_first_free_match_stream_enc_for_link = dce100_find_first_free_match_stream_enc_for_link
 };
 
 static bool dce80_construct(
@@ -954,16 +940,6 @@ static bool dce80_construct(
 		}
 	}
 
-	pool->base.clk_mgr = dce_clk_mgr_create(ctx,
-			&disp_clk_regs,
-			&disp_clk_shift,
-			&disp_clk_mask);
-	if (pool->base.clk_mgr == NULL) {
-		dm_error("DC: failed to create display clock!\n");
-		BREAK_TO_DEBUGGER();
-		goto res_create_fail;
-	}
-
 	pool->base.dmcu = dce_dmcu_create(ctx,
 			&dmcu_regs,
 			&dmcu_shift,
@@ -1163,16 +1139,6 @@ static bool dce81_construct(
 		}
 	}
 
-	pool->base.clk_mgr = dce_clk_mgr_create(ctx,
-			&disp_clk_regs,
-			&disp_clk_shift,
-			&disp_clk_mask);
-	if (pool->base.clk_mgr == NULL) {
-		dm_error("DC: failed to create display clock!\n");
-		BREAK_TO_DEBUGGER();
-		goto res_create_fail;
-	}
-
 	pool->base.dmcu = dce_dmcu_create(ctx,
 			&dmcu_regs,
 			&dmcu_shift,
@@ -1368,16 +1334,6 @@ static bool dce83_construct(
 		}
 	}
 
-	pool->base.clk_mgr = dce_clk_mgr_create(ctx,
-			&disp_clk_regs,
-			&disp_clk_shift,
-			&disp_clk_mask);
-	if (pool->base.clk_mgr == NULL) {
-		dm_error("DC: failed to create display clock!\n");
-		BREAK_TO_DEBUGGER();
-		goto res_create_fail;
-	}
-
 	pool->base.dmcu = dce_dmcu_create(ctx,
 			&dmcu_regs,
 			&dmcu_shift,
diff --git a/drivers/gpu/drm/amd/display/dc/dce80/dce80_timing_generator.c b/drivers/gpu/drm/amd/display/dc/dce80/dce80_timing_generator.c
index 8b5ce557ee71..397e7f94e1e8 100644
--- a/drivers/gpu/drm/amd/display/dc/dce80/dce80_timing_generator.c
+++ b/drivers/gpu/drm/amd/display/dc/dce80/dce80_timing_generator.c
@@ -107,12 +107,17 @@ static void program_pix_dur(struct timing_generator *tg, uint32_t pix_clk_100hz)
 
 static void program_timing(struct timing_generator *tg,
 	const struct dc_crtc_timing *timing,
+	int vready_offset,
+	int vstartup_start,
+	int vupdate_offset,
+	int vupdate_width,
+	const enum signal_type signal,
 	bool use_vbios)
 {
 	if (!use_vbios)
 		program_pix_dur(tg, timing->pix_clk_100hz);
 
-	dce110_tg_program_timing(tg, timing, use_vbios);
+	dce110_tg_program_timing(tg, timing, 0, 0, 0, 0, 0, use_vbios);
 }
 
 static void dce80_timing_generator_enable_advanced_request(
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/Makefile b/drivers/gpu/drm/amd/display/dc/dcn10/Makefile
index 55f293c8a3c0..032f872be89c 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/Makefile
@@ -24,7 +24,7 @@
 
 DCN10 = dcn10_resource.o dcn10_ipp.o dcn10_hw_sequencer.o dcn10_hw_sequencer_debug.o \
 		dcn10_dpp.o dcn10_opp.o dcn10_optc.o \
-		dcn10_hubp.o dcn10_mpc.o dcn10_clk_mgr.o \
+		dcn10_hubp.o dcn10_mpc.o \
 		dcn10_dpp_dscl.o dcn10_dpp_cm.o dcn10_cm_common.o \
 		dcn10_hubbub.o dcn10_stream_encoder.o dcn10_link_encoder.o
 
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.h
index 5ae4d69391a5..3b8cd7410498 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.h
@@ -38,6 +38,22 @@
 	type exp_resion_start_segment;\
 	type field_region_linear_slope
 
+#define TF_HELPER_REG_LIST \
+	uint32_t start_cntl_b; \
+	uint32_t start_cntl_g; \
+	uint32_t start_cntl_r; \
+	uint32_t start_slope_cntl_b; \
+	uint32_t start_slope_cntl_g; \
+	uint32_t start_slope_cntl_r; \
+	uint32_t start_end_cntl1_b; \
+	uint32_t start_end_cntl2_b; \
+	uint32_t start_end_cntl1_g; \
+	uint32_t start_end_cntl2_g; \
+	uint32_t start_end_cntl1_r; \
+	uint32_t start_end_cntl2_r; \
+	uint32_t region_start; \
+	uint32_t region_end
+
 #define TF_CM_REG_FIELD_LIST(type) \
 	type csc_c11; \
 	type csc_c12
@@ -54,20 +70,7 @@ struct xfer_func_reg {
 	struct xfer_func_shift shifts;
 	struct xfer_func_mask masks;
 
-	uint32_t start_cntl_b;
-	uint32_t start_cntl_g;
-	uint32_t start_cntl_r;
-	uint32_t start_slope_cntl_b;
-	uint32_t start_slope_cntl_g;
-	uint32_t start_slope_cntl_r;
-	uint32_t start_end_cntl1_b;
-	uint32_t start_end_cntl2_b;
-	uint32_t start_end_cntl1_g;
-	uint32_t start_end_cntl2_g;
-	uint32_t start_end_cntl1_r;
-	uint32_t start_end_cntl2_r;
-	uint32_t region_start;
-	uint32_t region_end;
+	TF_HELPER_REG_LIST;
 };
 
 struct cm_color_matrix_shift {
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c
index 0db2a6e96fc0..a1c824efa686 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c
@@ -23,6 +23,8 @@
  *
  */
 
+#include <linux/delay.h>
+
 #include "dm_services.h"
 #include "dcn10_hubp.h"
 #include "dcn10_hubbub.h"
@@ -263,20 +265,15 @@ void hubbub1_wm_change_req_wa(struct hubbub *hubbub)
 			DCHUBBUB_ARB_WATERMARK_CHANGE_REQUEST, 1);
 }
 
-void hubbub1_program_watermarks(
+void hubbub1_program_urgent_watermarks(
 		struct hubbub *hubbub,
 		struct dcn_watermark_set *watermarks,
 		unsigned int refclk_mhz,
 		bool safe_to_lower)
 {
 	struct dcn10_hubbub *hubbub1 = TO_DCN10_HUBBUB(hubbub);
-	/*
-	 * Need to clamp to max of the register values (i.e. no wrap)
-	 * for dcn1, all wm registers are 21-bit wide
-	 */
 	uint32_t prog_wm_value;
 
-
 	/* Repeat for water mark set A, B, C and D. */
 	/* clock state A */
 	if (safe_to_lower || watermarks->a.urgent_ns > hubbub1->watermarks.a.urgent_ns) {
@@ -291,60 +288,14 @@ void hubbub1_program_watermarks(
 			watermarks->a.urgent_ns, prog_wm_value);
 	}
 
-	if (REG(DCHUBBUB_ARB_PTE_META_URGENCY_WATERMARK_A)) {
-		if (safe_to_lower || watermarks->a.pte_meta_urgent_ns > hubbub1->watermarks.a.pte_meta_urgent_ns) {
-			hubbub1->watermarks.a.pte_meta_urgent_ns = watermarks->a.pte_meta_urgent_ns;
-			prog_wm_value = convert_and_clamp(watermarks->a.pte_meta_urgent_ns,
-					refclk_mhz, 0x1fffff);
-			REG_WRITE(DCHUBBUB_ARB_PTE_META_URGENCY_WATERMARK_A, prog_wm_value);
-			DC_LOG_BANDWIDTH_CALCS("PTE_META_URGENCY_WATERMARK_A calculated =%d\n"
-				"HW register value = 0x%x\n",
-				watermarks->a.pte_meta_urgent_ns, prog_wm_value);
-		}
-	}
-
-	if (REG(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_A)) {
-		if (safe_to_lower || watermarks->a.cstate_pstate.cstate_enter_plus_exit_ns
-				> hubbub1->watermarks.a.cstate_pstate.cstate_enter_plus_exit_ns) {
-			hubbub1->watermarks.a.cstate_pstate.cstate_enter_plus_exit_ns =
-					watermarks->a.cstate_pstate.cstate_enter_plus_exit_ns;
-			prog_wm_value = convert_and_clamp(
-					watermarks->a.cstate_pstate.cstate_enter_plus_exit_ns,
-					refclk_mhz, 0x1fffff);
-			REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_A, 0,
-					DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_A, prog_wm_value);
-			DC_LOG_BANDWIDTH_CALCS("SR_ENTER_EXIT_WATERMARK_A calculated =%d\n"
-				"HW register value = 0x%x\n",
-				watermarks->a.cstate_pstate.cstate_enter_plus_exit_ns, prog_wm_value);
-		}
-
-		if (safe_to_lower || watermarks->a.cstate_pstate.cstate_exit_ns
-				> hubbub1->watermarks.a.cstate_pstate.cstate_exit_ns) {
-			hubbub1->watermarks.a.cstate_pstate.cstate_exit_ns =
-					watermarks->a.cstate_pstate.cstate_exit_ns;
-			prog_wm_value = convert_and_clamp(
-					watermarks->a.cstate_pstate.cstate_exit_ns,
-					refclk_mhz, 0x1fffff);
-			REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_A, 0,
-					DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_A, prog_wm_value);
-			DC_LOG_BANDWIDTH_CALCS("SR_EXIT_WATERMARK_A calculated =%d\n"
-				"HW register value = 0x%x\n",
-				watermarks->a.cstate_pstate.cstate_exit_ns, prog_wm_value);
-		}
-	}
-
-	if (safe_to_lower || watermarks->a.cstate_pstate.pstate_change_ns
-			> hubbub1->watermarks.a.cstate_pstate.pstate_change_ns) {
-		hubbub1->watermarks.a.cstate_pstate.pstate_change_ns =
-				watermarks->a.cstate_pstate.pstate_change_ns;
-		prog_wm_value = convert_and_clamp(
-				watermarks->a.cstate_pstate.pstate_change_ns,
+	if (safe_to_lower || watermarks->a.pte_meta_urgent_ns > hubbub1->watermarks.a.pte_meta_urgent_ns) {
+		hubbub1->watermarks.a.pte_meta_urgent_ns = watermarks->a.pte_meta_urgent_ns;
+		prog_wm_value = convert_and_clamp(watermarks->a.pte_meta_urgent_ns,
 				refclk_mhz, 0x1fffff);
-		REG_SET(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_A, 0,
-				DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_A, prog_wm_value);
-		DC_LOG_BANDWIDTH_CALCS("DRAM_CLK_CHANGE_WATERMARK_A calculated =%d\n"
-			"HW register value = 0x%x\n\n",
-			watermarks->a.cstate_pstate.pstate_change_ns, prog_wm_value);
+		REG_WRITE(DCHUBBUB_ARB_PTE_META_URGENCY_WATERMARK_A, prog_wm_value);
+		DC_LOG_BANDWIDTH_CALCS("PTE_META_URGENCY_WATERMARK_A calculated =%d\n"
+			"HW register value = 0x%x\n",
+			watermarks->a.pte_meta_urgent_ns, prog_wm_value);
 	}
 
 	/* clock state B */
@@ -360,60 +311,14 @@ void hubbub1_program_watermarks(
 			watermarks->b.urgent_ns, prog_wm_value);
 	}
 
-	if (REG(DCHUBBUB_ARB_PTE_META_URGENCY_WATERMARK_B)) {
-		if (safe_to_lower || watermarks->b.pte_meta_urgent_ns > hubbub1->watermarks.b.pte_meta_urgent_ns) {
-			hubbub1->watermarks.b.pte_meta_urgent_ns = watermarks->b.pte_meta_urgent_ns;
-			prog_wm_value = convert_and_clamp(watermarks->b.pte_meta_urgent_ns,
-					refclk_mhz, 0x1fffff);
-			REG_WRITE(DCHUBBUB_ARB_PTE_META_URGENCY_WATERMARK_B, prog_wm_value);
-			DC_LOG_BANDWIDTH_CALCS("PTE_META_URGENCY_WATERMARK_B calculated =%d\n"
-				"HW register value = 0x%x\n",
-				watermarks->b.pte_meta_urgent_ns, prog_wm_value);
-		}
-	}
-
-	if (REG(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_B)) {
-		if (safe_to_lower || watermarks->b.cstate_pstate.cstate_enter_plus_exit_ns
-				> hubbub1->watermarks.b.cstate_pstate.cstate_enter_plus_exit_ns) {
-			hubbub1->watermarks.b.cstate_pstate.cstate_enter_plus_exit_ns =
-					watermarks->b.cstate_pstate.cstate_enter_plus_exit_ns;
-			prog_wm_value = convert_and_clamp(
-					watermarks->b.cstate_pstate.cstate_enter_plus_exit_ns,
-					refclk_mhz, 0x1fffff);
-			REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_B, 0,
-					DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_B, prog_wm_value);
-			DC_LOG_BANDWIDTH_CALCS("SR_ENTER_EXIT_WATERMARK_B calculated =%d\n"
-				"HW register value = 0x%x\n",
-				watermarks->b.cstate_pstate.cstate_enter_plus_exit_ns, prog_wm_value);
-		}
-
-		if (safe_to_lower || watermarks->b.cstate_pstate.cstate_exit_ns
-				> hubbub1->watermarks.b.cstate_pstate.cstate_exit_ns) {
-			hubbub1->watermarks.b.cstate_pstate.cstate_exit_ns =
-					watermarks->b.cstate_pstate.cstate_exit_ns;
-			prog_wm_value = convert_and_clamp(
-					watermarks->b.cstate_pstate.cstate_exit_ns,
-					refclk_mhz, 0x1fffff);
-			REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_B, 0,
-					DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_B, prog_wm_value);
-			DC_LOG_BANDWIDTH_CALCS("SR_EXIT_WATERMARK_B calculated =%d\n"
-				"HW register value = 0x%x\n",
-				watermarks->b.cstate_pstate.cstate_exit_ns, prog_wm_value);
-		}
-	}
-
-	if (safe_to_lower || watermarks->b.cstate_pstate.pstate_change_ns
-			> hubbub1->watermarks.b.cstate_pstate.pstate_change_ns) {
-		hubbub1->watermarks.b.cstate_pstate.pstate_change_ns =
-				watermarks->b.cstate_pstate.pstate_change_ns;
-		prog_wm_value = convert_and_clamp(
-				watermarks->b.cstate_pstate.pstate_change_ns,
+	if (safe_to_lower || watermarks->b.pte_meta_urgent_ns > hubbub1->watermarks.b.pte_meta_urgent_ns) {
+		hubbub1->watermarks.b.pte_meta_urgent_ns = watermarks->b.pte_meta_urgent_ns;
+		prog_wm_value = convert_and_clamp(watermarks->b.pte_meta_urgent_ns,
 				refclk_mhz, 0x1fffff);
-		REG_SET(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_B, 0,
-				DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_B, prog_wm_value);
-		DC_LOG_BANDWIDTH_CALCS("DRAM_CLK_CHANGE_WATERMARK_B calculated =%d\n"
-			"HW register value = 0x%x\n\n",
-			watermarks->b.cstate_pstate.pstate_change_ns, prog_wm_value);
+		REG_WRITE(DCHUBBUB_ARB_PTE_META_URGENCY_WATERMARK_B, prog_wm_value);
+		DC_LOG_BANDWIDTH_CALCS("PTE_META_URGENCY_WATERMARK_B calculated =%d\n"
+			"HW register value = 0x%x\n",
+			watermarks->b.pte_meta_urgent_ns, prog_wm_value);
 	}
 
 	/* clock state C */
@@ -429,60 +334,14 @@ void hubbub1_program_watermarks(
 			watermarks->c.urgent_ns, prog_wm_value);
 	}
 
-	if (REG(DCHUBBUB_ARB_PTE_META_URGENCY_WATERMARK_C)) {
-		if (safe_to_lower || watermarks->c.pte_meta_urgent_ns > hubbub1->watermarks.c.pte_meta_urgent_ns) {
-			hubbub1->watermarks.c.pte_meta_urgent_ns = watermarks->c.pte_meta_urgent_ns;
-			prog_wm_value = convert_and_clamp(watermarks->c.pte_meta_urgent_ns,
-					refclk_mhz, 0x1fffff);
-			REG_WRITE(DCHUBBUB_ARB_PTE_META_URGENCY_WATERMARK_C, prog_wm_value);
-			DC_LOG_BANDWIDTH_CALCS("PTE_META_URGENCY_WATERMARK_C calculated =%d\n"
-				"HW register value = 0x%x\n",
-				watermarks->c.pte_meta_urgent_ns, prog_wm_value);
-		}
-	}
-
-	if (REG(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_C)) {
-		if (safe_to_lower || watermarks->c.cstate_pstate.cstate_enter_plus_exit_ns
-				> hubbub1->watermarks.c.cstate_pstate.cstate_enter_plus_exit_ns) {
-			hubbub1->watermarks.c.cstate_pstate.cstate_enter_plus_exit_ns =
-					watermarks->c.cstate_pstate.cstate_enter_plus_exit_ns;
-			prog_wm_value = convert_and_clamp(
-					watermarks->c.cstate_pstate.cstate_enter_plus_exit_ns,
-					refclk_mhz, 0x1fffff);
-			REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_C, 0,
-					DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_C, prog_wm_value);
-			DC_LOG_BANDWIDTH_CALCS("SR_ENTER_EXIT_WATERMARK_C calculated =%d\n"
-				"HW register value = 0x%x\n",
-				watermarks->c.cstate_pstate.cstate_enter_plus_exit_ns, prog_wm_value);
-		}
-
-		if (safe_to_lower || watermarks->c.cstate_pstate.cstate_exit_ns
-				> hubbub1->watermarks.c.cstate_pstate.cstate_exit_ns) {
-			hubbub1->watermarks.c.cstate_pstate.cstate_exit_ns =
-					watermarks->c.cstate_pstate.cstate_exit_ns;
-			prog_wm_value = convert_and_clamp(
-					watermarks->c.cstate_pstate.cstate_exit_ns,
-					refclk_mhz, 0x1fffff);
-			REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_C, 0,
-					DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_C, prog_wm_value);
-			DC_LOG_BANDWIDTH_CALCS("SR_EXIT_WATERMARK_C calculated =%d\n"
-				"HW register value = 0x%x\n",
-				watermarks->c.cstate_pstate.cstate_exit_ns, prog_wm_value);
-		}
-	}
-
-	if (safe_to_lower || watermarks->c.cstate_pstate.pstate_change_ns
-			> hubbub1->watermarks.c.cstate_pstate.pstate_change_ns) {
-		hubbub1->watermarks.c.cstate_pstate.pstate_change_ns =
-				watermarks->c.cstate_pstate.pstate_change_ns;
-		prog_wm_value = convert_and_clamp(
-				watermarks->c.cstate_pstate.pstate_change_ns,
+	if (safe_to_lower || watermarks->c.pte_meta_urgent_ns > hubbub1->watermarks.c.pte_meta_urgent_ns) {
+		hubbub1->watermarks.c.pte_meta_urgent_ns = watermarks->c.pte_meta_urgent_ns;
+		prog_wm_value = convert_and_clamp(watermarks->c.pte_meta_urgent_ns,
 				refclk_mhz, 0x1fffff);
-		REG_SET(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_C, 0,
-				DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_C, prog_wm_value);
-		DC_LOG_BANDWIDTH_CALCS("DRAM_CLK_CHANGE_WATERMARK_C calculated =%d\n"
-			"HW register value = 0x%x\n\n",
-			watermarks->c.cstate_pstate.pstate_change_ns, prog_wm_value);
+		REG_WRITE(DCHUBBUB_ARB_PTE_META_URGENCY_WATERMARK_C, prog_wm_value);
+		DC_LOG_BANDWIDTH_CALCS("PTE_META_URGENCY_WATERMARK_C calculated =%d\n"
+			"HW register value = 0x%x\n",
+			watermarks->c.pte_meta_urgent_ns, prog_wm_value);
 	}
 
 	/* clock state D */
@@ -498,48 +357,199 @@ void hubbub1_program_watermarks(
 			watermarks->d.urgent_ns, prog_wm_value);
 	}
 
-	if (REG(DCHUBBUB_ARB_PTE_META_URGENCY_WATERMARK_D)) {
-		if (safe_to_lower || watermarks->d.pte_meta_urgent_ns > hubbub1->watermarks.d.pte_meta_urgent_ns) {
-			hubbub1->watermarks.d.pte_meta_urgent_ns = watermarks->d.pte_meta_urgent_ns;
-			prog_wm_value = convert_and_clamp(watermarks->d.pte_meta_urgent_ns,
-					refclk_mhz, 0x1fffff);
-			REG_WRITE(DCHUBBUB_ARB_PTE_META_URGENCY_WATERMARK_D, prog_wm_value);
-			DC_LOG_BANDWIDTH_CALCS("PTE_META_URGENCY_WATERMARK_D calculated =%d\n"
-				"HW register value = 0x%x\n",
-				watermarks->d.pte_meta_urgent_ns, prog_wm_value);
-		}
+	if (safe_to_lower || watermarks->d.pte_meta_urgent_ns > hubbub1->watermarks.d.pte_meta_urgent_ns) {
+		hubbub1->watermarks.d.pte_meta_urgent_ns = watermarks->d.pte_meta_urgent_ns;
+		prog_wm_value = convert_and_clamp(watermarks->d.pte_meta_urgent_ns,
+				refclk_mhz, 0x1fffff);
+		REG_WRITE(DCHUBBUB_ARB_PTE_META_URGENCY_WATERMARK_D, prog_wm_value);
+		DC_LOG_BANDWIDTH_CALCS("PTE_META_URGENCY_WATERMARK_D calculated =%d\n"
+			"HW register value = 0x%x\n",
+			watermarks->d.pte_meta_urgent_ns, prog_wm_value);
 	}
+}
 
-	if (REG(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_D)) {
-		if (safe_to_lower || watermarks->d.cstate_pstate.cstate_enter_plus_exit_ns
-				> hubbub1->watermarks.d.cstate_pstate.cstate_enter_plus_exit_ns) {
-			hubbub1->watermarks.d.cstate_pstate.cstate_enter_plus_exit_ns =
-					watermarks->d.cstate_pstate.cstate_enter_plus_exit_ns;
-			prog_wm_value = convert_and_clamp(
-					watermarks->d.cstate_pstate.cstate_enter_plus_exit_ns,
-					refclk_mhz, 0x1fffff);
-			REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_D, 0,
-					DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_D, prog_wm_value);
-			DC_LOG_BANDWIDTH_CALCS("SR_ENTER_EXIT_WATERMARK_D calculated =%d\n"
-				"HW register value = 0x%x\n",
-				watermarks->d.cstate_pstate.cstate_enter_plus_exit_ns, prog_wm_value);
-		}
+void hubbub1_program_stutter_watermarks(
+		struct hubbub *hubbub,
+		struct dcn_watermark_set *watermarks,
+		unsigned int refclk_mhz,
+		bool safe_to_lower)
+{
+	struct dcn10_hubbub *hubbub1 = TO_DCN10_HUBBUB(hubbub);
+	uint32_t prog_wm_value;
 
-		if (safe_to_lower || watermarks->d.cstate_pstate.cstate_exit_ns
-				> hubbub1->watermarks.d.cstate_pstate.cstate_exit_ns) {
-			hubbub1->watermarks.d.cstate_pstate.cstate_exit_ns =
-					watermarks->d.cstate_pstate.cstate_exit_ns;
-			prog_wm_value = convert_and_clamp(
-					watermarks->d.cstate_pstate.cstate_exit_ns,
-					refclk_mhz, 0x1fffff);
-			REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_D, 0,
-					DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_D, prog_wm_value);
-			DC_LOG_BANDWIDTH_CALCS("SR_EXIT_WATERMARK_D calculated =%d\n"
-				"HW register value = 0x%x\n",
-				watermarks->d.cstate_pstate.cstate_exit_ns, prog_wm_value);
-		}
+	/* clock state A */
+	if (safe_to_lower || watermarks->a.cstate_pstate.cstate_enter_plus_exit_ns
+			> hubbub1->watermarks.a.cstate_pstate.cstate_enter_plus_exit_ns) {
+		hubbub1->watermarks.a.cstate_pstate.cstate_enter_plus_exit_ns =
+				watermarks->a.cstate_pstate.cstate_enter_plus_exit_ns;
+		prog_wm_value = convert_and_clamp(
+				watermarks->a.cstate_pstate.cstate_enter_plus_exit_ns,
+				refclk_mhz, 0x1fffff);
+		REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_A, 0,
+				DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_A, prog_wm_value);
+		DC_LOG_BANDWIDTH_CALCS("SR_ENTER_EXIT_WATERMARK_A calculated =%d\n"
+			"HW register value = 0x%x\n",
+			watermarks->a.cstate_pstate.cstate_enter_plus_exit_ns, prog_wm_value);
+	}
+
+	if (safe_to_lower || watermarks->a.cstate_pstate.cstate_exit_ns
+			> hubbub1->watermarks.a.cstate_pstate.cstate_exit_ns) {
+		hubbub1->watermarks.a.cstate_pstate.cstate_exit_ns =
+				watermarks->a.cstate_pstate.cstate_exit_ns;
+		prog_wm_value = convert_and_clamp(
+				watermarks->a.cstate_pstate.cstate_exit_ns,
+				refclk_mhz, 0x1fffff);
+		REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_A, 0,
+				DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_A, prog_wm_value);
+		DC_LOG_BANDWIDTH_CALCS("SR_EXIT_WATERMARK_A calculated =%d\n"
+			"HW register value = 0x%x\n",
+			watermarks->a.cstate_pstate.cstate_exit_ns, prog_wm_value);
+	}
+
+	/* clock state B */
+	if (safe_to_lower || watermarks->b.cstate_pstate.cstate_enter_plus_exit_ns
+			> hubbub1->watermarks.b.cstate_pstate.cstate_enter_plus_exit_ns) {
+		hubbub1->watermarks.b.cstate_pstate.cstate_enter_plus_exit_ns =
+				watermarks->b.cstate_pstate.cstate_enter_plus_exit_ns;
+		prog_wm_value = convert_and_clamp(
+				watermarks->b.cstate_pstate.cstate_enter_plus_exit_ns,
+				refclk_mhz, 0x1fffff);
+		REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_B, 0,
+				DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_B, prog_wm_value);
+		DC_LOG_BANDWIDTH_CALCS("SR_ENTER_EXIT_WATERMARK_B calculated =%d\n"
+			"HW register value = 0x%x\n",
+			watermarks->b.cstate_pstate.cstate_enter_plus_exit_ns, prog_wm_value);
+	}
+
+	if (safe_to_lower || watermarks->b.cstate_pstate.cstate_exit_ns
+			> hubbub1->watermarks.b.cstate_pstate.cstate_exit_ns) {
+		hubbub1->watermarks.b.cstate_pstate.cstate_exit_ns =
+				watermarks->b.cstate_pstate.cstate_exit_ns;
+		prog_wm_value = convert_and_clamp(
+				watermarks->b.cstate_pstate.cstate_exit_ns,
+				refclk_mhz, 0x1fffff);
+		REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_B, 0,
+				DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_B, prog_wm_value);
+		DC_LOG_BANDWIDTH_CALCS("SR_EXIT_WATERMARK_B calculated =%d\n"
+			"HW register value = 0x%x\n",
+			watermarks->b.cstate_pstate.cstate_exit_ns, prog_wm_value);
+	}
+
+	/* clock state C */
+	if (safe_to_lower || watermarks->c.cstate_pstate.cstate_enter_plus_exit_ns
+			> hubbub1->watermarks.c.cstate_pstate.cstate_enter_plus_exit_ns) {
+		hubbub1->watermarks.c.cstate_pstate.cstate_enter_plus_exit_ns =
+				watermarks->c.cstate_pstate.cstate_enter_plus_exit_ns;
+		prog_wm_value = convert_and_clamp(
+				watermarks->c.cstate_pstate.cstate_enter_plus_exit_ns,
+				refclk_mhz, 0x1fffff);
+		REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_C, 0,
+				DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_C, prog_wm_value);
+		DC_LOG_BANDWIDTH_CALCS("SR_ENTER_EXIT_WATERMARK_C calculated =%d\n"
+			"HW register value = 0x%x\n",
+			watermarks->c.cstate_pstate.cstate_enter_plus_exit_ns, prog_wm_value);
+	}
+
+	if (safe_to_lower || watermarks->c.cstate_pstate.cstate_exit_ns
+			> hubbub1->watermarks.c.cstate_pstate.cstate_exit_ns) {
+		hubbub1->watermarks.c.cstate_pstate.cstate_exit_ns =
+				watermarks->c.cstate_pstate.cstate_exit_ns;
+		prog_wm_value = convert_and_clamp(
+				watermarks->c.cstate_pstate.cstate_exit_ns,
+				refclk_mhz, 0x1fffff);
+		REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_C, 0,
+				DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_C, prog_wm_value);
+		DC_LOG_BANDWIDTH_CALCS("SR_EXIT_WATERMARK_C calculated =%d\n"
+			"HW register value = 0x%x\n",
+			watermarks->c.cstate_pstate.cstate_exit_ns, prog_wm_value);
+	}
+
+	/* clock state D */
+	if (safe_to_lower || watermarks->d.cstate_pstate.cstate_enter_plus_exit_ns
+			> hubbub1->watermarks.d.cstate_pstate.cstate_enter_plus_exit_ns) {
+		hubbub1->watermarks.d.cstate_pstate.cstate_enter_plus_exit_ns =
+				watermarks->d.cstate_pstate.cstate_enter_plus_exit_ns;
+		prog_wm_value = convert_and_clamp(
+				watermarks->d.cstate_pstate.cstate_enter_plus_exit_ns,
+				refclk_mhz, 0x1fffff);
+		REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_D, 0,
+				DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_D, prog_wm_value);
+		DC_LOG_BANDWIDTH_CALCS("SR_ENTER_EXIT_WATERMARK_D calculated =%d\n"
+			"HW register value = 0x%x\n",
+			watermarks->d.cstate_pstate.cstate_enter_plus_exit_ns, prog_wm_value);
+	}
+
+	if (safe_to_lower || watermarks->d.cstate_pstate.cstate_exit_ns
+			> hubbub1->watermarks.d.cstate_pstate.cstate_exit_ns) {
+		hubbub1->watermarks.d.cstate_pstate.cstate_exit_ns =
+				watermarks->d.cstate_pstate.cstate_exit_ns;
+		prog_wm_value = convert_and_clamp(
+				watermarks->d.cstate_pstate.cstate_exit_ns,
+				refclk_mhz, 0x1fffff);
+		REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_D, 0,
+				DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_D, prog_wm_value);
+		DC_LOG_BANDWIDTH_CALCS("SR_EXIT_WATERMARK_D calculated =%d\n"
+			"HW register value = 0x%x\n",
+			watermarks->d.cstate_pstate.cstate_exit_ns, prog_wm_value);
+	}
+
+}
+
+void hubbub1_program_pstate_watermarks(
+		struct hubbub *hubbub,
+		struct dcn_watermark_set *watermarks,
+		unsigned int refclk_mhz,
+		bool safe_to_lower)
+{
+	struct dcn10_hubbub *hubbub1 = TO_DCN10_HUBBUB(hubbub);
+	uint32_t prog_wm_value;
+
+	/* clock state A */
+	if (safe_to_lower || watermarks->a.cstate_pstate.pstate_change_ns
+			> hubbub1->watermarks.a.cstate_pstate.pstate_change_ns) {
+		hubbub1->watermarks.a.cstate_pstate.pstate_change_ns =
+				watermarks->a.cstate_pstate.pstate_change_ns;
+		prog_wm_value = convert_and_clamp(
+				watermarks->a.cstate_pstate.pstate_change_ns,
+				refclk_mhz, 0x1fffff);
+		REG_SET(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_A, 0,
+				DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_A, prog_wm_value);
+		DC_LOG_BANDWIDTH_CALCS("DRAM_CLK_CHANGE_WATERMARK_A calculated =%d\n"
+			"HW register value = 0x%x\n\n",
+			watermarks->a.cstate_pstate.pstate_change_ns, prog_wm_value);
 	}
 
+	/* clock state B */
+	if (safe_to_lower || watermarks->b.cstate_pstate.pstate_change_ns
+			> hubbub1->watermarks.b.cstate_pstate.pstate_change_ns) {
+		hubbub1->watermarks.b.cstate_pstate.pstate_change_ns =
+				watermarks->b.cstate_pstate.pstate_change_ns;
+		prog_wm_value = convert_and_clamp(
+				watermarks->b.cstate_pstate.pstate_change_ns,
+				refclk_mhz, 0x1fffff);
+		REG_SET(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_B, 0,
+				DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_B, prog_wm_value);
+		DC_LOG_BANDWIDTH_CALCS("DRAM_CLK_CHANGE_WATERMARK_B calculated =%d\n"
+			"HW register value = 0x%x\n\n",
+			watermarks->b.cstate_pstate.pstate_change_ns, prog_wm_value);
+	}
+
+	/* clock state C */
+	if (safe_to_lower || watermarks->c.cstate_pstate.pstate_change_ns
+			> hubbub1->watermarks.c.cstate_pstate.pstate_change_ns) {
+		hubbub1->watermarks.c.cstate_pstate.pstate_change_ns =
+				watermarks->c.cstate_pstate.pstate_change_ns;
+		prog_wm_value = convert_and_clamp(
+				watermarks->c.cstate_pstate.pstate_change_ns,
+				refclk_mhz, 0x1fffff);
+		REG_SET(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_C, 0,
+				DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_C, prog_wm_value);
+		DC_LOG_BANDWIDTH_CALCS("DRAM_CLK_CHANGE_WATERMARK_C calculated =%d\n"
+			"HW register value = 0x%x\n\n",
+			watermarks->c.cstate_pstate.pstate_change_ns, prog_wm_value);
+	}
+
+	/* clock state D */
 	if (safe_to_lower || watermarks->d.cstate_pstate.pstate_change_ns
 			> hubbub1->watermarks.d.cstate_pstate.pstate_change_ns) {
 		hubbub1->watermarks.d.cstate_pstate.pstate_change_ns =
@@ -553,6 +563,22 @@ void hubbub1_program_watermarks(
 			"HW register value = 0x%x\n\n",
 			watermarks->d.cstate_pstate.pstate_change_ns, prog_wm_value);
 	}
+}
+
+void hubbub1_program_watermarks(
+		struct hubbub *hubbub,
+		struct dcn_watermark_set *watermarks,
+		unsigned int refclk_mhz,
+		bool safe_to_lower)
+{
+	struct dcn10_hubbub *hubbub1 = TO_DCN10_HUBBUB(hubbub);
+	/*
+	 * Need to clamp to max of the register values (i.e. no wrap)
+	 * for dcn1, all wm registers are 21-bit wide
+	 */
+	hubbub1_program_urgent_watermarks(hubbub, watermarks, refclk_mhz, safe_to_lower);
+	hubbub1_program_stutter_watermarks(hubbub, watermarks, refclk_mhz, safe_to_lower);
+	hubbub1_program_pstate_watermarks(hubbub, watermarks, refclk_mhz, safe_to_lower);
 
 	REG_UPDATE(DCHUBBUB_ARB_SAT_LEVEL,
 			DCHUBBUB_ARB_SAT_LEVEL, 60 * refclk_mhz);
@@ -903,9 +929,7 @@ void hubbub1_construct(struct hubbub *hubbub,
 	hubbub1->masks = hubbub_mask;
 
 	hubbub1->debug_test_index_pstate = 0x7;
-#if defined(CONFIG_DRM_AMD_DC_DCN1_01)
 	if (ctx->dce_version == DCN_VERSION_1_01)
 		hubbub1->debug_test_index_pstate = 0xB;
-#endif
 }
 
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.h
index 85811b24a497..7c2559c9ae23 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.h
@@ -262,4 +262,20 @@ void hubbub1_construct(struct hubbub *hubbub,
 	const struct dcn_hubbub_shift *hubbub_shift,
 	const struct dcn_hubbub_mask *hubbub_mask);
 
+void hubbub1_program_urgent_watermarks(
+		struct hubbub *hubbub,
+		struct dcn_watermark_set *watermarks,
+		unsigned int refclk_mhz,
+		bool safe_to_lower);
+void hubbub1_program_stutter_watermarks(
+		struct hubbub *hubbub,
+		struct dcn_watermark_set *watermarks,
+		unsigned int refclk_mhz,
+		bool safe_to_lower);
+void hubbub1_program_pstate_watermarks(
+		struct hubbub *hubbub,
+		struct dcn_watermark_set *watermarks,
+		unsigned int refclk_mhz,
+		bool safe_to_lower);
+
 #endif
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c
index 33d311cea28c..821a280eb481 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c
@@ -45,6 +45,8 @@
 #include "dcn10_cm_common.h"
 #include "dc_link_dp.h"
 #include "dccg.h"
+#include "clk_mgr.h"
+
 
 #define DC_LOGGER_INIT(logger)
 
@@ -658,16 +660,15 @@ static enum dc_status dcn10_enable_stream_timing(
 		BREAK_TO_DEBUGGER();
 		return DC_ERROR_UNEXPECTED;
 	}
-	pipe_ctx->stream_res.tg->dlg_otg_param.vready_offset = pipe_ctx->pipe_dlg_param.vready_offset;
-	pipe_ctx->stream_res.tg->dlg_otg_param.vstartup_start = pipe_ctx->pipe_dlg_param.vstartup_start;
-	pipe_ctx->stream_res.tg->dlg_otg_param.vupdate_offset = pipe_ctx->pipe_dlg_param.vupdate_offset;
-	pipe_ctx->stream_res.tg->dlg_otg_param.vupdate_width = pipe_ctx->pipe_dlg_param.vupdate_width;
-
-	pipe_ctx->stream_res.tg->dlg_otg_param.signal =  pipe_ctx->stream->signal;
 
 	pipe_ctx->stream_res.tg->funcs->program_timing(
 			pipe_ctx->stream_res.tg,
 			&stream->timing,
+			pipe_ctx->pipe_dlg_param.vready_offset,
+			pipe_ctx->pipe_dlg_param.vstartup_start,
+			pipe_ctx->pipe_dlg_param.vupdate_offset,
+			pipe_ctx->pipe_dlg_param.vupdate_width,
+			pipe_ctx->stream->signal,
 			true);
 
 #if 0 /* move to after enable_crtc */
@@ -1101,9 +1102,6 @@ static void dcn10_init_hw(struct dc *dc)
 		 */
 		struct dc_link *link = dc->links[i];
 
-		if (link->link_enc->connector.id == CONNECTOR_ID_EDP)
-			dc->hwss.edp_power_control(link, true);
-
 		link->link_enc->funcs->hw_init(link->link_enc);
 
 		/* Check for enabled DIG to identify enabled display */
@@ -1145,6 +1143,9 @@ static void dcn10_init_hw(struct dc *dc)
 	if (dmcu != NULL)
 		dmcu->funcs->dmcu_init(dmcu);
 
+	if (abm != NULL && dmcu != NULL)
+		abm->dmcu_is_running = dmcu->funcs->is_dmcu_initialized(dmcu);
+
 	/* power AFMT HDMI memory TODO: may move to dis/en output save power*/
 	REG_WRITE(DIO_MEM_PWR_CTRL, 0);
 
@@ -1159,7 +1160,7 @@ static void dcn10_init_hw(struct dc *dc)
 
 	enable_power_gating_plane(dc->hwseq, true);
 
-	memset(&dc->res_pool->clk_mgr->clks, 0, sizeof(dc->res_pool->clk_mgr->clks));
+	memset(&dc->clk_mgr->clks, 0, sizeof(dc->clk_mgr->clks));
 }
 
 static void dcn10_reset_hw_ctx_wrap(
@@ -1756,7 +1757,7 @@ static void dcn10_program_output_csc(struct dc *dc,
 
 bool is_lower_pipe_tree_visible(struct pipe_ctx *pipe_ctx)
 {
-	if (pipe_ctx->plane_state->visible)
+	if (pipe_ctx->plane_state && pipe_ctx->plane_state->visible)
 		return true;
 	if (pipe_ctx->bottom_pipe && is_lower_pipe_tree_visible(pipe_ctx->bottom_pipe))
 		return true;
@@ -1765,7 +1766,7 @@ bool is_lower_pipe_tree_visible(struct pipe_ctx *pipe_ctx)
 
 bool is_upper_pipe_tree_visible(struct pipe_ctx *pipe_ctx)
 {
-	if (pipe_ctx->plane_state->visible)
+	if (pipe_ctx->plane_state && pipe_ctx->plane_state->visible)
 		return true;
 	if (pipe_ctx->top_pipe && is_upper_pipe_tree_visible(pipe_ctx->top_pipe))
 		return true;
@@ -1774,7 +1775,7 @@ bool is_upper_pipe_tree_visible(struct pipe_ctx *pipe_ctx)
 
 bool is_pipe_tree_visible(struct pipe_ctx *pipe_ctx)
 {
-	if (pipe_ctx->plane_state->visible)
+	if (pipe_ctx->plane_state && pipe_ctx->plane_state->visible)
 		return true;
 	if (pipe_ctx->top_pipe && is_upper_pipe_tree_visible(pipe_ctx->top_pipe))
 		return true;
@@ -1920,7 +1921,7 @@ static uint16_t fixed_point_to_int_frac(
 	return result;
 }
 
-void build_prescale_params(struct  dc_bias_and_scale *bias_and_scale,
+void dcn10_build_prescale_params(struct  dc_bias_and_scale *bias_and_scale,
 		const struct dc_plane_state *plane_state)
 {
 	if (plane_state->format >= SURFACE_PIXEL_FORMAT_VIDEO_BEGIN
@@ -1953,7 +1954,7 @@ static void update_dpp(struct dpp *dpp, struct dc_plane_state *plane_state)
 			plane_state->color_space);
 
 	//set scale and bias registers
-	build_prescale_params(&bns_params, plane_state);
+	dcn10_build_prescale_params(&bns_params, plane_state);
 	if (dpp->funcs->dpp_program_bias_and_scale)
 		dpp->funcs->dpp_program_bias_and_scale(dpp, &bns_params);
 }
@@ -2071,7 +2072,7 @@ void update_dchubp_dpp(
 	 */
 	if (plane_state->update_flags.bits.full_update) {
 		bool should_divided_by_2 = context->bw_ctx.bw.dcn.clk.dppclk_khz <=
-				dc->res_pool->clk_mgr->clks.dispclk_khz / 2;
+				dc->clk_mgr->clks.dispclk_khz / 2;
 
 		dpp->funcs->dpp_dppclk_control(
 				dpp,
@@ -2084,9 +2085,9 @@ void update_dchubp_dpp(
 					dpp->inst,
 					pipe_ctx->plane_res.bw.dppclk_khz);
 		else
-			dc->res_pool->clk_mgr->clks.dppclk_khz = should_divided_by_2 ?
-						dc->res_pool->clk_mgr->clks.dispclk_khz / 2 :
-							dc->res_pool->clk_mgr->clks.dispclk_khz;
+			dc->clk_mgr->clks.dppclk_khz = should_divided_by_2 ?
+						dc->clk_mgr->clks.dispclk_khz / 2 :
+							dc->clk_mgr->clks.dispclk_khz;
 	}
 
 	/* TODO: Need input parameter to tell current DCHUB pipe tie to which OTG
@@ -2279,14 +2280,15 @@ static void program_all_pipe_in_tree(
 	if (pipe_ctx->top_pipe == NULL) {
 		bool blank = !is_pipe_tree_visible(pipe_ctx);
 
-		pipe_ctx->stream_res.tg->dlg_otg_param.vready_offset = pipe_ctx->pipe_dlg_param.vready_offset;
-		pipe_ctx->stream_res.tg->dlg_otg_param.vstartup_start = pipe_ctx->pipe_dlg_param.vstartup_start;
-		pipe_ctx->stream_res.tg->dlg_otg_param.vupdate_offset = pipe_ctx->pipe_dlg_param.vupdate_offset;
-		pipe_ctx->stream_res.tg->dlg_otg_param.vupdate_width = pipe_ctx->pipe_dlg_param.vupdate_width;
-		pipe_ctx->stream_res.tg->dlg_otg_param.signal =  pipe_ctx->stream->signal;
-
 		pipe_ctx->stream_res.tg->funcs->program_global_sync(
-				pipe_ctx->stream_res.tg);
+				pipe_ctx->stream_res.tg,
+				pipe_ctx->pipe_dlg_param.vready_offset,
+				pipe_ctx->pipe_dlg_param.vstartup_start,
+				pipe_ctx->pipe_dlg_param.vupdate_offset,
+				pipe_ctx->pipe_dlg_param.vupdate_width);
+
+		pipe_ctx->stream_res.tg->funcs->set_vtg_params(
+				pipe_ctx->stream_res.tg, &pipe_ctx->stream->timing);
 
 		dc->hwss.blank_pixel_data(dc, pipe_ctx, blank);
 
@@ -2448,8 +2450,8 @@ static void dcn10_prepare_bandwidth(
 		if (context->stream_count == 0)
 			context->bw_ctx.bw.dcn.clk.phyclk_khz = 0;
 
-		dc->res_pool->clk_mgr->funcs->update_clocks(
-				dc->res_pool->clk_mgr,
+		dc->clk_mgr->funcs->update_clocks(
+				dc->clk_mgr,
 				context,
 				false);
 	}
@@ -2480,8 +2482,8 @@ static void dcn10_optimize_bandwidth(
 		if (context->stream_count == 0)
 			context->bw_ctx.bw.dcn.clk.phyclk_khz = 0;
 
-		dc->res_pool->clk_mgr->funcs->update_clocks(
-				dc->res_pool->clk_mgr,
+		dc->clk_mgr->funcs->update_clocks(
+				dc->clk_mgr,
 				context,
 				true);
 	}
@@ -2504,8 +2506,8 @@ static void set_drr(struct pipe_ctx **pipe_ctx,
 {
 	int i = 0;
 	struct drr_params params = {0};
-	// DRR should set trigger event to monitor surface update event
-	unsigned int event_triggers = 0x80;
+	// DRR set trigger event mapped to OTG_TRIG_A (bit 11) for manual control flow
+	unsigned int event_triggers = 0x800;
 
 	params.vertical_total_max = vmax;
 	params.vertical_total_min = vmin;
@@ -2644,9 +2646,6 @@ static void dcn10_wait_for_mpcc_disconnect(
 			res_pool->mpc->funcs->wait_for_idle(res_pool->mpc, mpcc_inst);
 			pipe_ctx->stream_res.opp->mpcc_disconnect_pending[mpcc_inst] = false;
 			hubp->funcs->set_blank(hubp, true);
-			/*DC_LOG_ERROR(dc->ctx->logger,
-					"[debug_mpo: wait_for_mpcc finished waiting on mpcc %d]\n",
-					i);*/
 		}
 	}
 
@@ -2790,7 +2789,6 @@ static void apply_front_porch_workaround(
 
 int get_vupdate_offset_from_vsync(struct pipe_ctx *pipe_ctx)
 {
-	struct timing_generator *optc = pipe_ctx->stream_res.tg;
 	const struct dc_crtc_timing *dc_crtc_timing = &pipe_ctx->stream->timing;
 	struct dc_crtc_timing patched_crtc_timing;
 	int vesa_sync_start;
@@ -2813,7 +2811,7 @@ int get_vupdate_offset_from_vsync(struct pipe_ctx *pipe_ctx)
 			* interlace_factor;
 
 	vertical_line_start = asic_blank_end -
-			optc->dlg_otg_param.vstartup_start + 1;
+			pipe_ctx->pipe_dlg_param.vstartup_start + 1;
 
 	return vertical_line_start;
 }
@@ -2961,6 +2959,18 @@ static void dcn10_unblank_stream(struct pipe_ctx *pipe_ctx,
 	}
 }
 
+static void dcn10_send_immediate_sdp_message(struct pipe_ctx *pipe_ctx,
+				const uint8_t *custom_sdp_message,
+				unsigned int sdp_message_size)
+{
+	if (dc_is_dp_signal(pipe_ctx->stream->signal)) {
+		pipe_ctx->stream_res.stream_enc->funcs->send_immediate_sdp_message(
+				pipe_ctx->stream_res.stream_enc,
+				custom_sdp_message,
+				sdp_message_size);
+	}
+}
+
 static const struct hw_sequencer_funcs dcn10_funcs = {
 	.program_gamut_remap = program_gamut_remap,
 	.init_hw = dcn10_init_hw,
@@ -2980,6 +2990,7 @@ static const struct hw_sequencer_funcs dcn10_funcs = {
 	.enable_timing_synchronization = dcn10_enable_timing_synchronization,
 	.enable_per_frame_crtc_position_reset = dcn10_enable_per_frame_crtc_position_reset,
 	.update_info_frame = dce110_update_info_frame,
+	.send_immediate_sdp_message = dcn10_send_immediate_sdp_message,
 	.enable_stream = dce110_enable_stream,
 	.disable_stream = dce110_disable_stream,
 	.unblank_stream = dcn10_unblank_stream,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.h
index 4b3b27a5d23b..ef94d6b15843 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.h
@@ -83,6 +83,8 @@ struct pipe_ctx *find_top_pipe_for_stream(
 
 int get_vupdate_offset_from_vsync(struct pipe_ctx *pipe_ctx);
 
+void dcn10_build_prescale_params(struct  dc_bias_and_scale *bias_and_scale,
+		const struct dc_plane_state *plane_state);
 void lock_all_pipes(struct dc *dc,
 	struct dc_state *context,
 	bool lock);
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer_debug.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer_debug.c
index 991622da9ed5..6e47444109d7 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer_debug.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer_debug.c
@@ -43,7 +43,7 @@
 #include "dcn10_hubp.h"
 #include "dcn10_hubbub.h"
 #include "dcn10_cm_common.h"
-#include "dcn10_clk_mgr.h"
+#include "clk_mgr.h"
 
 static unsigned int snprintf_count(char *pBuf, unsigned int bufSize, char *fmt, ...)
 {
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_ipp.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_ipp.c
index 08db1e6b5166..0e0c6850247d 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_ipp.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_ipp.c
@@ -23,6 +23,8 @@
  *
  */
 
+#include <linux/slab.h>
+
 #include "dm_services.h"
 #include "dcn10_ipp.h"
 #include "reg_helper.h"
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.c
index 0126a44ba012..0a520591fd3a 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.c
@@ -23,6 +23,9 @@
  *
  */
 
+#include <linux/delay.h>
+#include <linux/slab.h>
+
 #include "reg_helper.h"
 
 #include "core_types.h"
@@ -726,6 +729,8 @@ void dcn10_link_encoder_construct(
 		enc10->base.features.flags.bits.IS_HBR3_CAPABLE =
 				bp_cap_info.DP_HBR3_EN;
 		enc10->base.features.flags.bits.HDMI_6GB_EN = bp_cap_info.HDMI_6GB_EN;
+		enc10->base.features.flags.bits.DP_IS_USB_C =
+				bp_cap_info.DP_IS_USB_C;
 	} else {
 		DC_LOG_WARNING("%s: Failed to get encoder_cap_info from VBIOS with error code %d!\n",
 				__func__,
@@ -1357,5 +1362,5 @@ void dcn10_aux_initialize(struct dcn10_link_encoder *enc10)
 
 	/* 1/4 window (the maximum allowed) */
 	AUX_REG_UPDATE(AUX_DPHY_RX_CONTROL0,
-			AUX_RX_RECEIVE_WINDOW, 1);
+			AUX_RX_RECEIVE_WINDOW, 0);
 }
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_opp.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_opp.c
index ab958cff3b76..cec69cecf521 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_opp.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_opp.c
@@ -23,6 +23,8 @@
  *
  */
 
+#include <linux/slab.h>
+
 #include "dm_services.h"
 #include "dcn10_opp.h"
 #include "reg_helper.h"
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c
index 0345d51e9d6f..e4b850a2d31f 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c
@@ -46,9 +46,7 @@
 * This is a workaround for a bug that has existed since R5xx and has not been
 * fixed keep Front porch at minimum 2 for Interlaced mode or 1 for progressive.
 */
-static void optc1_apply_front_porch_workaround(
-	struct timing_generator *optc,
-	struct dc_crtc_timing *timing)
+static void apply_front_porch_workaround(struct dc_crtc_timing *timing)
 {
 	if (timing->flags.INTERLACE == 1) {
 		if (timing->v_front_porch < 2)
@@ -60,24 +58,33 @@ static void optc1_apply_front_porch_workaround(
 }
 
 void optc1_program_global_sync(
-		struct timing_generator *optc)
+		struct timing_generator *optc,
+		int vready_offset,
+		int vstartup_start,
+		int vupdate_offset,
+		int vupdate_width)
 {
 	struct optc *optc1 = DCN10TG_FROM_TG(optc);
 
-	if (optc->dlg_otg_param.vstartup_start == 0) {
+	optc1->vready_offset = vready_offset;
+	optc1->vstartup_start = vstartup_start;
+	optc1->vupdate_offset = vupdate_offset;
+	optc1->vupdate_width = vupdate_width;
+
+	if (optc1->vstartup_start == 0) {
 		BREAK_TO_DEBUGGER();
 		return;
 	}
 
 	REG_SET(OTG_VSTARTUP_PARAM, 0,
-		VSTARTUP_START, optc->dlg_otg_param.vstartup_start);
+		VSTARTUP_START, optc1->vstartup_start);
 
 	REG_SET_2(OTG_VUPDATE_PARAM, 0,
-			VUPDATE_OFFSET, optc->dlg_otg_param.vupdate_offset,
-			VUPDATE_WIDTH, optc->dlg_otg_param.vupdate_width);
+			VUPDATE_OFFSET, optc1->vupdate_offset,
+			VUPDATE_WIDTH, optc1->vupdate_width);
 
 	REG_SET(OTG_VREADY_PARAM, 0,
-			VREADY_OFFSET, optc->dlg_otg_param.vready_offset);
+			VREADY_OFFSET, optc1->vready_offset);
 }
 
 static void optc1_disable_stereo(struct timing_generator *optc)
@@ -132,25 +139,32 @@ void optc1_setup_vertical_interrupt2(
 void optc1_program_timing(
 	struct timing_generator *optc,
 	const struct dc_crtc_timing *dc_crtc_timing,
+	int vready_offset,
+	int vstartup_start,
+	int vupdate_offset,
+	int vupdate_width,
+	const enum signal_type signal,
 	bool use_vbios)
 {
 	struct dc_crtc_timing patched_crtc_timing;
-	uint32_t vesa_sync_start;
 	uint32_t asic_blank_end;
 	uint32_t asic_blank_start;
 	uint32_t v_total;
 	uint32_t v_sync_end;
-	uint32_t v_init, v_fp2;
 	uint32_t h_sync_polarity, v_sync_polarity;
 	uint32_t start_point = 0;
 	uint32_t field_num = 0;
 	uint32_t h_div_2;
-	int32_t vertical_line_start;
 
 	struct optc *optc1 = DCN10TG_FROM_TG(optc);
 
+	optc1->signal = signal;
+	optc1->vready_offset = vready_offset;
+	optc1->vstartup_start = vstartup_start;
+	optc1->vupdate_offset = vupdate_offset;
+	optc1->vupdate_width = vupdate_width;
 	patched_crtc_timing = *dc_crtc_timing;
-	optc1_apply_front_porch_workaround(optc, &patched_crtc_timing);
+	apply_front_porch_workaround(&patched_crtc_timing);
 
 	/* Load horizontal timing */
 
@@ -163,24 +177,16 @@ void optc1_program_timing(
 			OTG_H_SYNC_A_START, 0,
 			OTG_H_SYNC_A_END, patched_crtc_timing.h_sync_width);
 
-	/* asic_h_blank_end = HsyncWidth + HbackPorch =
-	 * vesa. usHorizontalTotal - vesa. usHorizontalSyncStart -
-	 * vesa.h_left_border
-	 */
-	vesa_sync_start = patched_crtc_timing.h_addressable +
-			patched_crtc_timing.h_border_right +
+	/* blank_start = line end - front porch */
+	asic_blank_start = patched_crtc_timing.h_total -
 			patched_crtc_timing.h_front_porch;
 
-	asic_blank_end = patched_crtc_timing.h_total -
-			vesa_sync_start -
+	/* blank_end = blank_start - active */
+	asic_blank_end = asic_blank_start -
+			patched_crtc_timing.h_border_right -
+			patched_crtc_timing.h_addressable -
 			patched_crtc_timing.h_border_left;
 
-	/* h_blank_start = v_blank_end + v_active */
-	asic_blank_start = asic_blank_end +
-			patched_crtc_timing.h_border_left +
-			patched_crtc_timing.h_addressable +
-			patched_crtc_timing.h_border_right;
-
 	REG_UPDATE_2(OTG_H_BLANK_START_END,
 			OTG_H_BLANK_START, asic_blank_start,
 			OTG_H_BLANK_END, asic_blank_end);
@@ -212,24 +218,15 @@ void optc1_program_timing(
 			OTG_V_SYNC_A_START, 0,
 			OTG_V_SYNC_A_END, v_sync_end);
 
-	vesa_sync_start = patched_crtc_timing.v_addressable +
-			patched_crtc_timing.v_border_bottom +
+	/* blank_start = frame end - front porch */
+	asic_blank_start = patched_crtc_timing.v_total -
 			patched_crtc_timing.v_front_porch;
 
-	asic_blank_end = (patched_crtc_timing.v_total -
-			vesa_sync_start -
-			patched_crtc_timing.v_border_top);
-
-	/* v_blank_start = v_blank_end + v_active */
-	asic_blank_start = asic_blank_end +
-			(patched_crtc_timing.v_border_top +
-			patched_crtc_timing.v_addressable +
-			patched_crtc_timing.v_border_bottom);
-
-	vertical_line_start = asic_blank_end - optc->dlg_otg_param.vstartup_start + 1;
-	v_fp2 = 0;
-	if (vertical_line_start < 0)
-		v_fp2 = -vertical_line_start;
+	/* blank_end = blank_start - active */
+	asic_blank_end = asic_blank_start -
+			patched_crtc_timing.v_border_bottom -
+			patched_crtc_timing.v_addressable -
+			patched_crtc_timing.v_border_top;
 
 	REG_UPDATE_2(OTG_V_BLANK_START_END,
 			OTG_V_BLANK_START, asic_blank_start,
@@ -242,10 +239,9 @@ void optc1_program_timing(
 	REG_UPDATE(OTG_V_SYNC_A_CNTL,
 		OTG_V_SYNC_A_POL, v_sync_polarity);
 
-	v_init = asic_blank_start;
-	if (optc->dlg_otg_param.signal == SIGNAL_TYPE_DISPLAY_PORT ||
-		optc->dlg_otg_param.signal == SIGNAL_TYPE_DISPLAY_PORT_MST ||
-		optc->dlg_otg_param.signal == SIGNAL_TYPE_EDP) {
+	if (optc1->signal == SIGNAL_TYPE_DISPLAY_PORT ||
+			optc1->signal == SIGNAL_TYPE_DISPLAY_PORT_MST ||
+			optc1->signal == SIGNAL_TYPE_EDP) {
 		start_point = 1;
 		if (patched_crtc_timing.flags.INTERLACE == 1)
 			field_num = 1;
@@ -253,13 +249,10 @@ void optc1_program_timing(
 
 	/* Interlace */
 	if (REG(OTG_INTERLACE_CONTROL)) {
-		if (patched_crtc_timing.flags.INTERLACE == 1) {
+		if (patched_crtc_timing.flags.INTERLACE == 1)
 			REG_UPDATE(OTG_INTERLACE_CONTROL,
 					OTG_INTERLACE_ENABLE, 1);
-			v_init = v_init / 2;
-			if ((optc->dlg_otg_param.vstartup_start/2)*2 > asic_blank_end)
-				v_fp2 = v_fp2 / 2;
-		} else
+		else
 			REG_UPDATE(OTG_INTERLACE_CONTROL,
 					OTG_INTERLACE_ENABLE, 0);
 	}
@@ -268,16 +261,18 @@ void optc1_program_timing(
 	REG_UPDATE(CONTROL,
 			VTG0_ENABLE, 0);
 
-	REG_UPDATE_2(CONTROL,
-			VTG0_FP2, v_fp2,
-			VTG0_VCOUNT_INIT, v_init);
-
 	/* original code is using VTG offset to address OTG reg, seems wrong */
 	REG_UPDATE_2(OTG_CONTROL,
 			OTG_START_POINT_CNTL, start_point,
 			OTG_FIELD_NUMBER_CNTL, field_num);
 
-	optc1_program_global_sync(optc);
+	optc->funcs->program_global_sync(optc,
+			vready_offset,
+			vstartup_start,
+			vupdate_offset,
+			vupdate_width);
+
+	optc->funcs->set_vtg_params(optc, dc_crtc_timing);
 
 	/* TODO
 	 * patched_crtc_timing.flags.HORZ_COUNT_BY_TWO == 1
@@ -296,6 +291,48 @@ void optc1_program_timing(
 
 }
 
+void optc1_set_vtg_params(struct timing_generator *optc,
+		const struct dc_crtc_timing *dc_crtc_timing)
+{
+	struct dc_crtc_timing patched_crtc_timing;
+	uint32_t asic_blank_end;
+	uint32_t v_init;
+	uint32_t v_fp2 = 0;
+	int32_t vertical_line_start;
+
+	struct optc *optc1 = DCN10TG_FROM_TG(optc);
+
+	patched_crtc_timing = *dc_crtc_timing;
+	apply_front_porch_workaround(&patched_crtc_timing);
+
+	/* VCOUNT_INIT is the start of blank */
+	v_init = patched_crtc_timing.v_total - patched_crtc_timing.v_front_porch;
+
+	/* end of blank = v_init - active */
+	asic_blank_end = v_init -
+			patched_crtc_timing.v_border_bottom -
+			patched_crtc_timing.v_addressable -
+			patched_crtc_timing.v_border_top;
+
+	/* if VSTARTUP is before VSYNC, FP2 is the offset, otherwise 0 */
+	vertical_line_start = asic_blank_end - optc1->vstartup_start + 1;
+	if (vertical_line_start < 0)
+		v_fp2 = -vertical_line_start;
+
+	/* Interlace */
+	if (REG(OTG_INTERLACE_CONTROL)) {
+		if (patched_crtc_timing.flags.INTERLACE == 1) {
+			v_init = v_init / 2;
+			if ((optc1->vstartup_start/2)*2 > asic_blank_end)
+				v_fp2 = v_fp2 / 2;
+		}
+	}
+
+	REG_UPDATE_2(CONTROL,
+			VTG0_FP2, v_fp2,
+			VTG0_VCOUNT_INIT, v_init);
+}
+
 void optc1_set_blank_data_double_buffer(struct timing_generator *optc, bool enable)
 {
 	struct optc *optc1 = DCN10TG_FROM_TG(optc);
@@ -754,6 +791,32 @@ void optc1_set_static_screen_control(
 			OTG_STATIC_SCREEN_FRAME_COUNT, 2);
 }
 
+void optc1_setup_manual_trigger(struct timing_generator *optc)
+{
+	struct optc *optc1 = DCN10TG_FROM_TG(optc);
+
+	REG_SET(OTG_GLOBAL_CONTROL2, 0,
+			MANUAL_FLOW_CONTROL_SEL, optc->inst);
+
+	REG_SET_8(OTG_TRIGA_CNTL, 0,
+			OTG_TRIGA_SOURCE_SELECT, 22,
+			OTG_TRIGA_SOURCE_PIPE_SELECT, optc->inst,
+			OTG_TRIGA_RISING_EDGE_DETECT_CNTL, 1,
+			OTG_TRIGA_FALLING_EDGE_DETECT_CNTL, 0,
+			OTG_TRIGA_POLARITY_SELECT, 0,
+			OTG_TRIGA_FREQUENCY_SELECT, 0,
+			OTG_TRIGA_DELAY, 0,
+			OTG_TRIGA_CLEAR, 1);
+}
+
+void optc1_program_manual_trigger(struct timing_generator *optc)
+{
+	struct optc *optc1 = DCN10TG_FROM_TG(optc);
+
+	REG_SET(OTG_MANUAL_FLOW_CONTROL, 0,
+			MANUAL_FLOW_CONTROL, 1);
+}
+
 
 /**
  *****************************************************************************
@@ -786,6 +849,10 @@ void optc1_set_drr(
 				OTG_FORCE_LOCK_ON_EVENT, 0,
 				OTG_SET_V_TOTAL_MIN_MASK_EN, 0,
 				OTG_SET_V_TOTAL_MIN_MASK, 0);
+
+		// Setup manual flow control for EOF via TRIG_A
+		optc->funcs->setup_manual_trigger(optc);
+
 	} else {
 		REG_UPDATE_4(OTG_V_TOTAL_CONTROL,
 				OTG_SET_V_TOTAL_MIN_MASK, 0,
@@ -1420,6 +1487,9 @@ static const struct timing_generator_funcs dcn10_tg_funcs = {
 		.clear_optc_underflow = optc1_clear_optc_underflow,
 		.get_crc = optc1_get_crc,
 		.configure_crc = optc1_configure_crc,
+		.set_vtg_params = optc1_set_vtg_params,
+		.program_manual_trigger = optc1_program_manual_trigger,
+		.setup_manual_trigger = optc1_setup_manual_trigger
 };
 
 void dcn10_timing_generator_init(struct optc *optc1)
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h
index 4eb9a898c237..444c56c8104f 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h
@@ -84,13 +84,18 @@
 	SRI(OTG_CRC0_WINDOWA_X_CONTROL, OTG, inst),\
 	SRI(OTG_CRC0_WINDOWA_Y_CONTROL, OTG, inst),\
 	SRI(OTG_CRC0_WINDOWB_X_CONTROL, OTG, inst),\
-	SRI(OTG_CRC0_WINDOWB_Y_CONTROL, OTG, inst)
+	SRI(OTG_CRC0_WINDOWB_Y_CONTROL, OTG, inst),\
+	SR(GSL_SOURCE_SELECT),\
+	SRI(OTG_GLOBAL_CONTROL2, OTG, inst),\
+	SRI(OTG_TRIGA_MANUAL_TRIG, OTG, inst)
+
 
 #define TG_COMMON_REG_LIST_DCN1_0(inst) \
 	TG_COMMON_REG_LIST_DCN(inst),\
 	SRI(OTG_TEST_PATTERN_PARAMETERS, OTG, inst),\
 	SRI(OTG_TEST_PATTERN_CONTROL, OTG, inst),\
-	SRI(OTG_TEST_PATTERN_COLOR, OTG, inst)
+	SRI(OTG_TEST_PATTERN_COLOR, OTG, inst),\
+	SRI(OTG_MANUAL_FLOW_CONTROL, OTG, inst)
 
 
 struct dcn_optc_registers {
@@ -124,6 +129,8 @@ struct dcn_optc_registers {
 	uint32_t OTG_V_TOTAL_MIN;
 	uint32_t OTG_V_TOTAL_CONTROL;
 	uint32_t OTG_TRIGA_CNTL;
+	uint32_t OTG_TRIGA_MANUAL_TRIG;
+	uint32_t OTG_MANUAL_FLOW_CONTROL;
 	uint32_t OTG_FORCE_COUNT_NOW_CNTL;
 	uint32_t OTG_STATIC_SCREEN_CONTROL;
 	uint32_t OTG_STATUS_FRAME_COUNT;
@@ -156,6 +163,7 @@ struct dcn_optc_registers {
 	uint32_t OTG_CRC0_WINDOWA_Y_CONTROL;
 	uint32_t OTG_CRC0_WINDOWB_X_CONTROL;
 	uint32_t OTG_CRC0_WINDOWB_Y_CONTROL;
+	uint32_t GSL_SOURCE_SELECT;
 };
 
 #define TG_COMMON_MASK_SH_LIST_DCN(mask_sh)\
@@ -213,6 +221,11 @@ struct dcn_optc_registers {
 	SF(OTG0_OTG_TRIGA_CNTL, OTG_TRIGA_SOURCE_PIPE_SELECT, mask_sh),\
 	SF(OTG0_OTG_TRIGA_CNTL, OTG_TRIGA_RISING_EDGE_DETECT_CNTL, mask_sh),\
 	SF(OTG0_OTG_TRIGA_CNTL, OTG_TRIGA_FALLING_EDGE_DETECT_CNTL, mask_sh),\
+	SF(OTG0_OTG_TRIGA_CNTL, OTG_TRIGA_POLARITY_SELECT, mask_sh),\
+	SF(OTG0_OTG_TRIGA_CNTL, OTG_TRIGA_FREQUENCY_SELECT, mask_sh),\
+	SF(OTG0_OTG_TRIGA_CNTL, OTG_TRIGA_DELAY, mask_sh),\
+	SF(OTG0_OTG_TRIGA_CNTL, OTG_TRIGA_CLEAR, mask_sh),\
+	SF(OTG0_OTG_TRIGA_MANUAL_TRIG, OTG_TRIGA_MANUAL_TRIG, mask_sh),\
 	SF(OTG0_OTG_STATIC_SCREEN_CONTROL, OTG_STATIC_SCREEN_EVENT_MASK, mask_sh),\
 	SF(OTG0_OTG_STATIC_SCREEN_CONTROL, OTG_STATIC_SCREEN_FRAME_COUNT, mask_sh),\
 	SF(OTG0_OTG_STATUS_FRAME_COUNT, OTG_FRAME_COUNT, mask_sh),\
@@ -266,8 +279,11 @@ struct dcn_optc_registers {
 	SF(OTG0_OTG_CRC0_WINDOWB_X_CONTROL, OTG_CRC0_WINDOWB_X_START, mask_sh),\
 	SF(OTG0_OTG_CRC0_WINDOWB_X_CONTROL, OTG_CRC0_WINDOWB_X_END, mask_sh),\
 	SF(OTG0_OTG_CRC0_WINDOWB_Y_CONTROL, OTG_CRC0_WINDOWB_Y_START, mask_sh),\
-	SF(OTG0_OTG_CRC0_WINDOWB_Y_CONTROL, OTG_CRC0_WINDOWB_Y_END, mask_sh)
-
+	SF(OTG0_OTG_CRC0_WINDOWB_Y_CONTROL, OTG_CRC0_WINDOWB_Y_END, mask_sh),\
+	SF(GSL_SOURCE_SELECT, GSL0_READY_SOURCE_SEL, mask_sh),\
+	SF(GSL_SOURCE_SELECT, GSL1_READY_SOURCE_SEL, mask_sh),\
+	SF(GSL_SOURCE_SELECT, GSL2_READY_SOURCE_SEL, mask_sh),\
+	SF(OTG0_OTG_GLOBAL_CONTROL2, MANUAL_FLOW_CONTROL_SEL, mask_sh)
 
 #define TG_COMMON_MASK_SH_LIST_DCN1_0(mask_sh)\
 	TG_COMMON_MASK_SH_LIST_DCN(mask_sh),\
@@ -282,7 +298,8 @@ struct dcn_optc_registers {
 	SF(OTG0_OTG_TEST_PATTERN_CONTROL, OTG_TEST_PATTERN_COLOR_FORMAT, mask_sh),\
 	SF(OTG0_OTG_TEST_PATTERN_COLOR, OTG_TEST_PATTERN_MASK, mask_sh),\
 	SF(OTG0_OTG_TEST_PATTERN_COLOR, OTG_TEST_PATTERN_DATA, mask_sh),\
-	SF(ODM0_OPTC_DATA_SOURCE_SELECT, OPTC_SRC_SEL, mask_sh)
+	SF(ODM0_OPTC_DATA_SOURCE_SELECT, OPTC_SRC_SEL, mask_sh),\
+	SF(OTG0_OTG_MANUAL_FLOW_CONTROL, MANUAL_FLOW_CONTROL, mask_sh),\
 
 #define TG_REG_FIELD_LIST_DCN1_0(type) \
 	type VSTARTUP_START;\
@@ -338,6 +355,11 @@ struct dcn_optc_registers {
 	type OTG_TRIGA_SOURCE_PIPE_SELECT;\
 	type OTG_TRIGA_RISING_EDGE_DETECT_CNTL;\
 	type OTG_TRIGA_FALLING_EDGE_DETECT_CNTL;\
+	type OTG_TRIGA_POLARITY_SELECT;\
+	type OTG_TRIGA_FREQUENCY_SELECT;\
+	type OTG_TRIGA_DELAY;\
+	type OTG_TRIGA_CLEAR;\
+	type OTG_TRIGA_MANUAL_TRIG;\
 	type OTG_STATIC_SCREEN_EVENT_MASK;\
 	type OTG_STATIC_SCREEN_FRAME_COUNT;\
 	type OTG_FRAME_COUNT;\
@@ -413,7 +435,12 @@ struct dcn_optc_registers {
 	type OTG_CRC0_WINDOWB_X_START;\
 	type OTG_CRC0_WINDOWB_X_END;\
 	type OTG_CRC0_WINDOWB_Y_START;\
-	type OTG_CRC0_WINDOWB_Y_END;
+	type OTG_CRC0_WINDOWB_Y_END;\
+	type GSL0_READY_SOURCE_SEL;\
+	type GSL1_READY_SOURCE_SEL;\
+	type GSL2_READY_SOURCE_SEL;\
+	type MANUAL_FLOW_CONTROL;\
+	type MANUAL_FLOW_CONTROL_SEL;
 
 
 #define TG_REG_FIELD_LIST(type) \
@@ -446,6 +473,12 @@ struct optc {
 	uint32_t min_v_sync_width;
 	uint32_t min_v_blank;
 	uint32_t min_v_blank_interlace;
+
+	int vstartup_start;
+	int vupdate_offset;
+	int vupdate_width;
+	int vready_offset;
+	enum signal_type signal;
 };
 
 void dcn10_timing_generator_init(struct optc *optc);
@@ -481,6 +514,11 @@ bool optc1_validate_timing(
 void optc1_program_timing(
 	struct timing_generator *optc,
 	const struct dc_crtc_timing *dc_crtc_timing,
+	int vready_offset,
+	int vstartup_start,
+	int vupdate_offset,
+	int vupdate_width,
+	const enum signal_type signal,
 	bool use_vbios);
 
 void optc1_setup_vertical_interrupt0(
@@ -495,7 +533,11 @@ void optc1_setup_vertical_interrupt2(
 		uint32_t start_line);
 
 void optc1_program_global_sync(
-		struct timing_generator *optc);
+		struct timing_generator *optc,
+		int vready_offset,
+		int vstartup_start,
+		int vupdate_offset,
+		int vupdate_width);
 
 bool optc1_disable_crtc(struct timing_generator *optc);
 
@@ -582,4 +624,7 @@ bool optc1_get_crc(struct timing_generator *optc,
 
 bool optc1_is_two_pixels_per_containter(const struct dc_crtc_timing *timing);
 
+void optc1_set_vtg_params(struct timing_generator *optc,
+		const struct dc_crtc_timing *dc_crtc_timing);
+
 #endif /* __DC_TIMING_GENERATOR_DCN10_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c
index 7eccb54c421d..3272030c82c5 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c
@@ -23,13 +23,14 @@
  *
  */
 
+#include <linux/slab.h>
+
 #include "dm_services.h"
 #include "dc.h"
 
 #include "resource.h"
 #include "include/irq_service_interface.h"
 #include "dcn10_resource.h"
-
 #include "dcn10_ipp.h"
 #include "dcn10_mpc.h"
 #include "irq/dcn10/irq_service_dcn10.h"
@@ -40,7 +41,6 @@
 #include "dcn10_opp.h"
 #include "dcn10_link_encoder.h"
 #include "dcn10_stream_encoder.h"
-#include "dcn10_clk_mgr.h"
 #include "dce/dce_clock_source.h"
 #include "dce/dce_audio.h"
 #include "dce/dce_hwseq.h"
@@ -153,9 +153,7 @@ enum dcn10_clk_src_array_id {
 	DCN10_CLK_SRC_PLL2,
 	DCN10_CLK_SRC_PLL3,
 	DCN10_CLK_SRC_TOTAL,
-#if defined(CONFIG_DRM_AMD_DC_DCN1_01)
 	DCN101_CLK_SRC_TOTAL = DCN10_CLK_SRC_PLL3
-#endif
 };
 
 /* begin *********************
@@ -202,6 +200,7 @@ enum dcn10_clk_src_array_id {
 #define MMHUB_SR(reg_name)\
 		.reg_name = MMHUB_BASE(mm ## reg_name ## _BASE_IDX) +  \
 					mm ## reg_name
+
 /* macros to expend register list macro defined in HW object header file
  * end *********************/
 
@@ -445,7 +444,6 @@ static const struct bios_registers bios_regs = {
 	HUBP_REG_LIST_DCN10(id)\
 }
 
-
 static const struct dcn_mi_registers hubp_regs[] = {
 	hubp_regs(0),
 	hubp_regs(1),
@@ -461,7 +459,6 @@ static const struct dcn_mi_mask hubp_mask = {
 		HUBP_MASK_SH_LIST_DCN10(_MASK)
 };
 
-
 static const struct dcn_hubbub_registers hubbub_reg = {
 		HUBBUB_REG_LIST_DCN10(0)
 };
@@ -504,7 +501,6 @@ static const struct resource_caps res_cap = {
 		.num_ddc = 4,
 };
 
-#if defined(CONFIG_DRM_AMD_DC_DCN1_01)
 static const struct resource_caps rv2_res_cap = {
 		.num_timing_generator = 3,
 		.num_opp = 3,
@@ -514,7 +510,6 @@ static const struct resource_caps rv2_res_cap = {
 		.num_pll = 3,
 		.num_ddc = 3,
 };
-#endif
 
 static const struct dc_plane_cap plane_cap = {
 	.type = DC_PLANE_TYPE_DCN_UNIVERSAL,
@@ -966,9 +961,6 @@ static void destruct(struct dcn10_resource_pool *pool)
 	if (pool->base.dmcu != NULL)
 		dce_dmcu_destroy(&pool->base.dmcu);
 
-	if (pool->base.clk_mgr != NULL)
-		dce_clk_mgr_destroy(&pool->base.clk_mgr);
-
 	kfree(pool->base.pp_smu);
 }
 
@@ -1217,6 +1209,38 @@ static enum dc_status dcn10_get_default_swizzle_mode(struct dc_plane_state *plan
 	return result;
 }
 
+struct stream_encoder *dcn10_find_first_free_match_stream_enc_for_link(
+		struct resource_context *res_ctx,
+		const struct resource_pool *pool,
+		struct dc_stream_state *stream)
+{
+	int i;
+	int j = -1;
+	struct dc_link *link = stream->link;
+
+	for (i = 0; i < pool->stream_enc_count; i++) {
+		if (!res_ctx->is_stream_enc_acquired[i] &&
+				pool->stream_enc[i]) {
+			/* Store first available for MST second display
+			 * in daisy chain use case
+			 */
+			j = i;
+			if (pool->stream_enc[i]->id ==
+					link->link_enc->preferred_engine)
+				return pool->stream_enc[i];
+		}
+	}
+
+	/*
+	 * For CZ and later, we can allow DIG FE and BE to differ for all display types
+	 */
+
+	if (j >= 0)
+		return pool->stream_enc[j];
+
+	return NULL;
+}
+
 static const struct dc_cap_funcs cap_funcs = {
 	.get_dcc_compression_cap = dcn10_get_dcc_compression_cap
 };
@@ -1229,7 +1253,8 @@ static const struct resource_funcs dcn10_res_pool_funcs = {
 	.validate_plane = dcn10_validate_plane,
 	.validate_global = dcn10_validate_global,
 	.add_stream_to_ctx = dcn10_add_stream_to_ctx,
-	.get_default_swizzle_mode = dcn10_get_default_swizzle_mode
+	.get_default_swizzle_mode = dcn10_get_default_swizzle_mode,
+	.find_first_free_match_stream_enc_for_link = dcn10_find_first_free_match_stream_enc_for_link
 };
 
 static uint32_t read_pipe_fuses(struct dc_context *ctx)
@@ -1252,11 +1277,9 @@ static bool construct(
 
 	ctx->dc_bios->regs = &bios_regs;
 
-#if defined(CONFIG_DRM_AMD_DC_DCN1_01)
 	if (ctx->dce_version == DCN_VERSION_1_01)
 		pool->base.res_cap = &rv2_res_cap;
 	else
-#endif
 		pool->base.res_cap = &res_cap;
 	pool->base.funcs = &dcn10_res_pool_funcs;
 
@@ -1273,10 +1296,8 @@ static bool construct(
 	/* max pipe num for ASIC before check pipe fuses */
 	pool->base.pipe_count = pool->base.res_cap->num_timing_generator;
 
-#if defined(CONFIG_DRM_AMD_DC_DCN1_01)
 	if (dc->ctx->dce_version == DCN_VERSION_1_01)
 		pool->base.pipe_count = 3;
-#endif
 	dc->caps.max_video_width = 3840;
 	dc->caps.max_downscale_ratio = 200;
 	dc->caps.i2c_speed_in_khz = 100;
@@ -1309,26 +1330,17 @@ static bool construct(
 				CLOCK_SOURCE_COMBO_PHY_PLL2,
 				&clk_src_regs[2], false);
 
-#ifdef CONFIG_DRM_AMD_DC_DCN1_01
 	if (dc->ctx->dce_version == DCN_VERSION_1_0) {
 		pool->base.clock_sources[DCN10_CLK_SRC_PLL3] =
 				dcn10_clock_source_create(ctx, ctx->dc_bios,
 					CLOCK_SOURCE_COMBO_PHY_PLL3,
 					&clk_src_regs[3], false);
 	}
-#else
-	pool->base.clock_sources[DCN10_CLK_SRC_PLL3] =
-			dcn10_clock_source_create(ctx, ctx->dc_bios,
-				CLOCK_SOURCE_COMBO_PHY_PLL3,
-				&clk_src_regs[3], false);
-#endif
 
 	pool->base.clk_src_count = DCN10_CLK_SRC_TOTAL;
 
-#if defined(CONFIG_DRM_AMD_DC_DCN1_01)
 	if (dc->ctx->dce_version == DCN_VERSION_1_01)
 		pool->base.clk_src_count = DCN101_CLK_SRC_TOTAL;
-#endif
 
 	pool->base.dp_clock_source =
 			dcn10_clock_source_create(ctx, ctx->dc_bios,
@@ -1343,12 +1355,6 @@ static bool construct(
 			goto fail;
 		}
 	}
-	pool->base.clk_mgr = dcn1_clk_mgr_create(ctx);
-	if (pool->base.clk_mgr == NULL) {
-		dm_error("DC: failed to create display clock!\n");
-		BREAK_TO_DEBUGGER();
-		goto fail;
-	}
 
 	pool->base.dmcu = dcn10_dmcu_create(ctx,
 			&dmcu_regs,
@@ -1374,7 +1380,6 @@ static bool construct(
 	memcpy(dc->dcn_ip, &dcn10_ip_defaults, sizeof(dcn10_ip_defaults));
 	memcpy(dc->dcn_soc, &dcn10_soc_defaults, sizeof(dcn10_soc_defaults));
 
-#if defined(CONFIG_DRM_AMD_DC_DCN1_01)
 	if (dc->ctx->dce_version == DCN_VERSION_1_01) {
 		struct dcn_soc_bounding_box *dcn_soc = dc->dcn_soc;
 		struct dcn_ip_params *dcn_ip = dc->dcn_ip;
@@ -1385,7 +1390,6 @@ static bool construct(
 		dcn_soc->dram_clock_change_latency = 23;
 		dcn_ip->max_num_dpp = 3;
 	}
-#endif
 	if (ASICREV_IS_RV1_F0(dc->ctx->asic_id.hw_internal_rev)) {
 		dc->dcn_soc->urgent_latency = 3;
 		dc->debug.disable_dmcu = true;
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.h
index 999c684a0b36..633025ccb870 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.h
@@ -42,6 +42,11 @@ struct resource_pool *dcn10_create_resource_pool(
 		const struct dc_init_data *init_data,
 		struct dc *dc);
 
+struct stream_encoder *dcn10_find_first_free_match_stream_enc_for_link(
+		struct resource_context *res_ctx,
+		const struct resource_pool *pool,
+		struct dc_stream_state *stream);
+
 
 #endif /* __DC_RESOURCE_DCN10_H__ */
 
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c
index 8ee9f6dc1d62..b9ffbf6b58ff 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c
@@ -23,6 +23,7 @@
  *
  */
 
+#include <linux/delay.h>
 
 #include "dc_bios_types.h"
 #include "dcn10_stream_encoder.h"
@@ -415,6 +416,7 @@ void enc1_stream_encoder_dp_set_stream_attribute(
 	case COLOR_SPACE_APPCTRL:
 	case COLOR_SPACE_CUSTOMPOINTS:
 	case COLOR_SPACE_UNKNOWN:
+	case COLOR_SPACE_YCBCR709_BLACK:
 		/* do nothing */
 		break;
 	}
@@ -471,7 +473,7 @@ void enc1_stream_encoder_dp_set_stream_attribute(
 		hw_crtc_timing.v_addressable + hw_crtc_timing.v_border_bottom);
 }
 
-static void enc1_stream_encoder_set_stream_attribute_helper(
+void enc1_stream_encoder_set_stream_attribute_helper(
 		struct dcn10_stream_encoder *enc1,
 		struct dc_crtc_timing *crtc_timing)
 {
@@ -726,11 +728,9 @@ void enc1_stream_encoder_update_dp_info_packets(
 				3,  /* packetIndex */
 				&info_frame->hdrsmd);
 
-	if (info_frame->dpsdp.valid)
-		enc1_update_generic_info_packet(
-				enc1,
-				4,/* packetIndex */
-				&info_frame->dpsdp);
+	/* packetIndex 4 is used for send immediate sdp message, and please
+	 * use other packetIndex (such as 5,6) for other info packet
+	 */
 
 	/* enable/disable transmission of packet(s).
 	 * If enabled, packet transmission begins on the next frame
@@ -738,7 +738,101 @@ void enc1_stream_encoder_update_dp_info_packets(
 	REG_UPDATE(DP_SEC_CNTL, DP_SEC_GSP0_ENABLE, info_frame->vsc.valid);
 	REG_UPDATE(DP_SEC_CNTL, DP_SEC_GSP2_ENABLE, info_frame->spd.valid);
 	REG_UPDATE(DP_SEC_CNTL, DP_SEC_GSP3_ENABLE, info_frame->hdrsmd.valid);
-	REG_UPDATE(DP_SEC_CNTL, DP_SEC_GSP4_ENABLE, info_frame->dpsdp.valid);
+
+
+	/* This bit is the master enable bit.
+	 * When enabling secondary stream engine,
+	 * this master bit must also be set.
+	 * This register shared with audio info frame.
+	 * Therefore we need to enable master bit
+	 * if at least on of the fields is not 0
+	 */
+	value = REG_READ(DP_SEC_CNTL);
+	if (value)
+		REG_UPDATE(DP_SEC_CNTL, DP_SEC_STREAM_ENABLE, 1);
+}
+
+void enc1_stream_encoder_send_immediate_sdp_message(
+	struct stream_encoder *enc,
+	const uint8_t *custom_sdp_message,
+	unsigned int sdp_message_size)
+{
+	struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
+	uint32_t value = 0;
+
+	/* TODOFPGA Figure out a proper number for max_retries polling for lock
+	 * use 50 for now.
+	 */
+	uint32_t max_retries = 50;
+
+	/* check if GSP4 is transmitted */
+	REG_WAIT(DP_SEC_CNTL2, DP_SEC_GSP4_SEND_PENDING,
+		0, 10, max_retries);
+
+	/* disable GSP4 transmitting */
+	REG_UPDATE(DP_SEC_CNTL2, DP_SEC_GSP4_SEND, 0);
+
+	/* transmit GSP4 at the earliest time in a frame */
+	REG_UPDATE(DP_SEC_CNTL2, DP_SEC_GSP4_SEND_ANY_LINE, 1);
+
+	/*we need turn on clock before programming AFMT block*/
+	REG_UPDATE(AFMT_CNTL, AFMT_AUDIO_CLOCK_EN, 1);
+
+	/* check if HW reading GSP memory */
+	REG_WAIT(AFMT_VBI_PACKET_CONTROL, AFMT_GENERIC_CONFLICT,
+			0, 10, max_retries);
+
+	/* HW does is not reading GSP memory not reading too long ->
+	 * something wrong. clear GPS memory access and notify?
+	 * hw SW is writing to GSP memory
+	 */
+	REG_UPDATE(AFMT_VBI_PACKET_CONTROL, AFMT_GENERIC_CONFLICT_CLR, 1);
+
+	/* use generic packet 4 for immediate sdp message */
+	REG_UPDATE(AFMT_VBI_PACKET_CONTROL,
+			AFMT_GENERIC_INDEX, 4);
+
+	/* write generic packet header
+	 * (4th byte is for GENERIC0 only)
+	 */
+	REG_SET_4(AFMT_GENERIC_HDR, 0,
+			AFMT_GENERIC_HB0, custom_sdp_message[0],
+			AFMT_GENERIC_HB1, custom_sdp_message[1],
+			AFMT_GENERIC_HB2, custom_sdp_message[2],
+			AFMT_GENERIC_HB3, custom_sdp_message[3]);
+
+	/* write generic packet contents
+	 * (we never use last 4 bytes)
+	 * there are 8 (0-7) mmDIG0_AFMT_GENERIC0_x registers
+	 */
+	{
+		const uint32_t *content =
+			(const uint32_t *) &custom_sdp_message[4];
+
+		REG_WRITE(AFMT_GENERIC_0, *content++);
+		REG_WRITE(AFMT_GENERIC_1, *content++);
+		REG_WRITE(AFMT_GENERIC_2, *content++);
+		REG_WRITE(AFMT_GENERIC_3, *content++);
+		REG_WRITE(AFMT_GENERIC_4, *content++);
+		REG_WRITE(AFMT_GENERIC_5, *content++);
+		REG_WRITE(AFMT_GENERIC_6, *content++);
+		REG_WRITE(AFMT_GENERIC_7, *content);
+	}
+
+	/* check whether GENERIC4 registers double buffer update in immediate mode
+	 * is pending
+	 */
+	REG_WAIT(AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC4_IMMEDIATE_UPDATE_PENDING,
+			0, 10, max_retries);
+
+	/* atomically update double-buffered GENERIC4 registers in immediate mode
+	 * (update immediately)
+	 */
+	REG_UPDATE(AFMT_VBI_PACKET_CONTROL1,
+			AFMT_GENERIC4_IMMEDIATE_UPDATE, 1);
+
+	/* enable GSP4 transmitting */
+	REG_UPDATE(DP_SEC_CNTL2, DP_SEC_GSP4_SEND, 1);
 
 	/* This bit is the master enable bit.
 	 * When enabling secondary stream engine,
@@ -998,19 +1092,6 @@ union audio_cea_channels {
 	} channels;
 };
 
-struct audio_clock_info {
-	/* pixel clock frequency*/
-	uint32_t pixel_clock_in_10khz;
-	/* N - 32KHz audio */
-	uint32_t n_32khz;
-	/* CTS - 32KHz audio*/
-	uint32_t cts_32khz;
-	uint32_t n_44khz;
-	uint32_t cts_44khz;
-	uint32_t n_48khz;
-	uint32_t cts_48khz;
-};
-
 /* 25.2MHz/1.001*/
 /* 25.2MHz/1.001*/
 /* 25.2MHz*/
@@ -1113,7 +1194,7 @@ static union audio_cea_channels speakers_to_channels(
 	return cea_channels;
 }
 
-static void get_audio_clock_info(
+void get_audio_clock_info(
 	enum dc_color_depth color_depth,
 	uint32_t crtc_pixel_clock_in_khz,
 	uint32_t actual_pixel_clock_in_khz,
@@ -1317,7 +1398,7 @@ static void enc1_se_setup_dp_audio(
 	REG_UPDATE(AFMT_60958_0, AFMT_60958_CS_CLOCK_ACCURACY, 0);
 }
 
-static void enc1_se_enable_audio_clock(
+void enc1_se_enable_audio_clock(
 	struct stream_encoder *enc,
 	bool enable)
 {
@@ -1339,7 +1420,7 @@ static void enc1_se_enable_audio_clock(
 	 */
 }
 
-static void enc1_se_enable_dp_audio(
+void enc1_se_enable_dp_audio(
 	struct stream_encoder *enc)
 {
 	struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
@@ -1462,6 +1543,8 @@ static const struct stream_encoder_funcs dcn10_str_enc_funcs = {
 		enc1_stream_encoder_stop_hdmi_info_packets,
 	.update_dp_info_packets =
 		enc1_stream_encoder_update_dp_info_packets,
+	.send_immediate_sdp_message =
+		enc1_stream_encoder_send_immediate_sdp_message,
 	.stop_dp_info_packets =
 		enc1_stream_encoder_stop_dp_info_packets,
 	.dp_blank =
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.h
index e654c2f55971..46c93ffc28d2 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.h
@@ -81,6 +81,7 @@
 	SRI(DP_MSE_RATE_UPDATE, DP, id), \
 	SRI(DP_PIXEL_FORMAT, DP, id), \
 	SRI(DP_SEC_CNTL, DP, id), \
+	SRI(DP_SEC_CNTL2, DP, id), \
 	SRI(DP_STEER_FIFO, DP, id), \
 	SRI(DP_VID_M, DP, id), \
 	SRI(DP_VID_N, DP, id), \
@@ -118,10 +119,12 @@ struct dcn10_stream_enc_registers {
 	uint32_t AFMT_60958_1;
 	uint32_t AFMT_60958_2;
 	uint32_t DIG_FE_CNTL;
+	uint32_t DIG_FE_CNTL2;
 	uint32_t DP_MSE_RATE_CNTL;
 	uint32_t DP_MSE_RATE_UPDATE;
 	uint32_t DP_PIXEL_FORMAT;
 	uint32_t DP_SEC_CNTL;
+	uint32_t DP_SEC_CNTL2;
 	uint32_t DP_STEER_FIFO;
 	uint32_t DP_VID_M;
 	uint32_t DP_VID_N;
@@ -191,6 +194,10 @@ struct dcn10_stream_enc_registers {
 	SE_SF(DP0_DP_SEC_CNTL, DP_SEC_GSP2_ENABLE, mask_sh),\
 	SE_SF(DP0_DP_SEC_CNTL, DP_SEC_GSP3_ENABLE, mask_sh),\
 	SE_SF(DP0_DP_SEC_CNTL, DP_SEC_MPG_ENABLE, mask_sh),\
+	SE_SF(DP0_DP_SEC_CNTL2, DP_SEC_GSP4_SEND, mask_sh),\
+	SE_SF(DP0_DP_SEC_CNTL2, DP_SEC_GSP4_SEND_PENDING, mask_sh),\
+	SE_SF(DP0_DP_SEC_CNTL4, DP_SEC_GSP4_LINE_NUM, mask_sh),\
+	SE_SF(DP0_DP_SEC_CNTL2, DP_SEC_GSP4_SEND_ANY_LINE, mask_sh),\
 	SE_SF(DP0_DP_VID_STREAM_CNTL, DP_VID_STREAM_DIS_DEFER, mask_sh),\
 	SE_SF(DP0_DP_VID_STREAM_CNTL, DP_VID_STREAM_ENABLE, mask_sh),\
 	SE_SF(DP0_DP_VID_STREAM_CNTL, DP_VID_STREAM_STATUS, mask_sh),\
@@ -245,6 +252,7 @@ struct dcn10_stream_enc_registers {
 	SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC2_FRAME_UPDATE_PENDING, mask_sh),\
 	SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC3_FRAME_UPDATE_PENDING, mask_sh),\
 	SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC4_FRAME_UPDATE_PENDING, mask_sh),\
+	SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC4_IMMEDIATE_UPDATE_PENDING, mask_sh),\
 	SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC5_FRAME_UPDATE_PENDING, mask_sh),\
 	SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC6_FRAME_UPDATE_PENDING, mask_sh),\
 	SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC7_FRAME_UPDATE_PENDING, mask_sh),\
@@ -253,6 +261,7 @@ struct dcn10_stream_enc_registers {
 	SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC2_FRAME_UPDATE, mask_sh),\
 	SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC3_FRAME_UPDATE, mask_sh),\
 	SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC4_FRAME_UPDATE, mask_sh),\
+	SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC4_IMMEDIATE_UPDATE, mask_sh),\
 	SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC5_FRAME_UPDATE, mask_sh),\
 	SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC6_FRAME_UPDATE, mask_sh),\
 	SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC7_FRAME_UPDATE, mask_sh),\
@@ -260,6 +269,7 @@ struct dcn10_stream_enc_registers {
 	SE_SF(DP0_DP_SEC_CNTL, DP_SEC_GSP5_ENABLE, mask_sh),\
 	SE_SF(DP0_DP_SEC_CNTL, DP_SEC_GSP6_ENABLE, mask_sh),\
 	SE_SF(DP0_DP_SEC_CNTL, DP_SEC_GSP7_ENABLE, mask_sh),\
+	SE_SF(DP0_DP_SEC_CNTL2, DP_SEC_GSP7_PPS, mask_sh),\
 	SE_SF(DP0_DP_SEC_CNTL2, DP_SEC_GSP7_SEND, mask_sh),\
 	SE_SF(DP0_DP_DB_CNTL, DP_DB_DISABLE, mask_sh),\
 	SE_SF(DP0_DP_MSA_COLORIMETRY, DP_MSA_MISC0, mask_sh),\
@@ -304,6 +314,7 @@ struct dcn10_stream_enc_registers {
 	type AFMT_GENERIC2_FRAME_UPDATE_PENDING;\
 	type AFMT_GENERIC3_FRAME_UPDATE_PENDING;\
 	type AFMT_GENERIC4_FRAME_UPDATE_PENDING;\
+	type AFMT_GENERIC4_IMMEDIATE_UPDATE_PENDING;\
 	type AFMT_GENERIC5_FRAME_UPDATE_PENDING;\
 	type AFMT_GENERIC6_FRAME_UPDATE_PENDING;\
 	type AFMT_GENERIC7_FRAME_UPDATE_PENDING;\
@@ -312,6 +323,7 @@ struct dcn10_stream_enc_registers {
 	type AFMT_GENERIC2_FRAME_UPDATE;\
 	type AFMT_GENERIC3_FRAME_UPDATE;\
 	type AFMT_GENERIC4_FRAME_UPDATE;\
+	type AFMT_GENERIC4_IMMEDIATE_UPDATE;\
 	type AFMT_GENERIC5_FRAME_UPDATE;\
 	type AFMT_GENERIC6_FRAME_UPDATE;\
 	type AFMT_GENERIC7_FRAME_UPDATE;\
@@ -366,7 +378,12 @@ struct dcn10_stream_enc_registers {
 	type DP_SEC_GSP5_ENABLE;\
 	type DP_SEC_GSP6_ENABLE;\
 	type DP_SEC_GSP7_ENABLE;\
+	type DP_SEC_GSP7_PPS;\
 	type DP_SEC_GSP7_SEND;\
+	type DP_SEC_GSP4_SEND;\
+	type DP_SEC_GSP4_SEND_PENDING;\
+	type DP_SEC_GSP4_LINE_NUM;\
+	type DP_SEC_GSP4_SEND_ANY_LINE;\
 	type DP_SEC_MPG_ENABLE;\
 	type DP_VID_STREAM_DIS_DEFER;\
 	type DP_VID_STREAM_ENABLE;\
@@ -484,6 +501,11 @@ void enc1_stream_encoder_update_dp_info_packets(
 	struct stream_encoder *enc,
 	const struct encoder_info_frame *info_frame);
 
+void enc1_stream_encoder_send_immediate_sdp_message(
+	struct stream_encoder *enc,
+	const uint8_t *custom_sdp_message,
+				unsigned int sdp_message_size);
+
 void enc1_stream_encoder_stop_dp_info_packets(
 	struct stream_encoder *enc);
 
@@ -530,4 +552,21 @@ void enc1_dig_connect_to_otg(
 	struct stream_encoder *enc,
 	int tg_inst);
 
+void enc1_stream_encoder_set_stream_attribute_helper(
+	struct dcn10_stream_encoder *enc1,
+	struct dc_crtc_timing *crtc_timing);
+
+void enc1_se_enable_audio_clock(
+	struct stream_encoder *enc,
+	bool enable);
+
+void enc1_se_enable_dp_audio(
+	struct stream_encoder *enc);
+
+void get_audio_clock_info(
+	enum dc_color_depth color_depth,
+	uint32_t crtc_pixel_clock_in_khz,
+	uint32_t actual_pixel_clock_in_khz,
+	struct audio_clock_info *audio_clock_info);
+
 #endif /* __DC_STREAM_ENCODER_DCN10_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dm_pp_smu.h b/drivers/gpu/drm/amd/display/dc/dm_pp_smu.h
index 4fc4208d1472..471f3df88c92 100644
--- a/drivers/gpu/drm/amd/display/dc/dm_pp_smu.h
+++ b/drivers/gpu/drm/amd/display/dc/dm_pp_smu.h
@@ -41,6 +41,7 @@ enum pp_smu_ver {
 	 */
 	PP_SMU_UNSUPPORTED,
 	PP_SMU_VER_RV,
+
 	PP_SMU_VER_MAX
 };
 
@@ -56,12 +57,31 @@ struct pp_smu {
 	const void *dm;
 };
 
+enum pp_smu_status {
+	PP_SMU_RESULT_UNDEFINED = 0,
+	PP_SMU_RESULT_OK = 1,
+	PP_SMU_RESULT_FAIL,
+	PP_SMU_RESULT_UNSUPPORTED
+};
+
+
+#define PP_SMU_WM_SET_RANGE_CLK_UNCONSTRAINED_MIN 0x0
+#define PP_SMU_WM_SET_RANGE_CLK_UNCONSTRAINED_MAX 0xFFFF
+
+enum wm_type {
+	WM_TYPE_PSTATE_CHG = 0,
+	WM_TYPE_RETRAINING = 1,
+};
+
+/* This structure is a copy of WatermarkRowGeneric_t defined by smuxx_driver_if.h*/
 struct pp_smu_wm_set_range {
-	unsigned int wm_inst;
-	uint32_t min_fill_clk_mhz;
-	uint32_t max_fill_clk_mhz;
-	uint32_t min_drain_clk_mhz;
-	uint32_t max_drain_clk_mhz;
+	uint16_t min_fill_clk_mhz;
+	uint16_t max_fill_clk_mhz;
+	uint16_t min_drain_clk_mhz;
+	uint16_t max_drain_clk_mhz;
+
+	uint8_t wm_inst;
+	uint8_t wm_type;
 };
 
 #define MAX_WATERMARK_SETS 4
@@ -80,6 +100,7 @@ struct pp_smu_funcs_rv {
 	/* PPSMC_MSG_SetDisplayCount
 	 * 0 triggers S0i2 optimization
 	 */
+
 	void (*set_display_count)(struct pp_smu *pp, int count);
 
 	/* reader and writer WM's are sent together as part of one table*/
@@ -115,13 +136,13 @@ struct pp_smu_funcs_rv {
 
 	/* PME w/a */
 	void (*set_pme_wa_enable)(struct pp_smu *pp);
-
 };
 
 struct pp_smu_funcs {
 	struct pp_smu ctx;
 	union {
 		struct pp_smu_funcs_rv rv_funcs;
+
 	};
 };
 
diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_enums.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_enums.h
index c59e582c1f40..174c414e0982 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_enums.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_enums.h
@@ -147,4 +147,10 @@ enum dm_validation_status {
 	DML_FAIL_V_RATIO_PREFETCH,
 };
 
+enum writeback_config {
+	dm_normal,
+	dm_whole_buffer_for_single_stream_no_interleave,
+	dm_whole_buffer_for_single_stream_interleave,
+};
+
 #endif
diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h
index c5b791d158a7..6cc59f138095 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h
@@ -219,6 +219,9 @@ struct _vcs_dpi_display_pipe_source_params_st {
 	unsigned char xfc_enable;
 	unsigned char xfc_slave;
 	struct _vcs_dpi_display_xfc_params_st xfc_params;
+	//for vstartuplines calculation freesync
+	unsigned char v_total_min;
+	unsigned char v_total_max;
 };
 struct writeback_st {
 	int wb_src_height;
@@ -289,6 +292,8 @@ struct _vcs_dpi_display_pipe_dest_params_st {
 	unsigned char otg_inst;
 	unsigned char odm_combine;
 	unsigned char use_maximum_vstartup;
+	unsigned int vtotal_max;
+	unsigned int vtotal_min;
 };
 
 struct _vcs_dpi_display_pipe_params_st {
diff --git a/drivers/gpu/drm/amd/display/dc/gpio/gpio_base.c b/drivers/gpu/drm/amd/display/dc/gpio/gpio_base.c
index cf76ea2d9f5a..d03b38e80d9b 100644
--- a/drivers/gpu/drm/amd/display/dc/gpio/gpio_base.c
+++ b/drivers/gpu/drm/amd/display/dc/gpio/gpio_base.c
@@ -27,6 +27,8 @@
  * Pre-requisites: headers required by header of this unit
  */
 
+#include <linux/slab.h>
+
 #include "dm_services.h"
 
 #include "include/gpio_interface.h"
diff --git a/drivers/gpu/drm/amd/display/dc/gpio/gpio_service.c b/drivers/gpu/drm/amd/display/dc/gpio/gpio_service.c
index 3c63a3c04dbb..a7fab44f66b6 100644
--- a/drivers/gpu/drm/amd/display/dc/gpio/gpio_service.c
+++ b/drivers/gpu/drm/amd/display/dc/gpio/gpio_service.c
@@ -27,6 +27,8 @@
  * Pre-requisites: headers required by header of this unit
  */
 
+#include <linux/slab.h>
+
 #include "dm_services.h"
 #include "include/gpio_interface.h"
 #include "include/gpio_service_interface.h"
diff --git a/drivers/gpu/drm/amd/display/dc/gpio/hw_ddc.c b/drivers/gpu/drm/amd/display/dc/gpio/hw_ddc.c
index 310f48965b27..240cdd8d9689 100644
--- a/drivers/gpu/drm/amd/display/dc/gpio/hw_ddc.c
+++ b/drivers/gpu/drm/amd/display/dc/gpio/hw_ddc.c
@@ -23,6 +23,9 @@
  *
  */
 
+#include <linux/delay.h>
+#include <linux/slab.h>
+
 #include "dm_services.h"
 
 #include "include/gpio_types.h"
diff --git a/drivers/gpu/drm/amd/display/dc/gpio/hw_factory.c b/drivers/gpu/drm/amd/display/dc/gpio/hw_factory.c
index c2028c4744a6..a15aca47342c 100644
--- a/drivers/gpu/drm/amd/display/dc/gpio/hw_factory.c
+++ b/drivers/gpu/drm/amd/display/dc/gpio/hw_factory.c
@@ -23,6 +23,8 @@
  *
  */
 
+#include <linux/slab.h>
+
 #include "dm_services.h"
 
 /*
@@ -84,10 +86,6 @@ bool dal_hw_factory_init(
 		return true;
 #if defined(CONFIG_DRM_AMD_DC_DCN1_0)
 	case DCN_VERSION_1_0:
-		dal_hw_factory_dcn10_init(factory);
-		return true;
-#endif
-#if defined(CONFIG_DRM_AMD_DC_DCN1_01)
 	case DCN_VERSION_1_01:
 		dal_hw_factory_dcn10_init(factory);
 		return true;
diff --git a/drivers/gpu/drm/amd/display/dc/gpio/hw_hpd.c b/drivers/gpu/drm/amd/display/dc/gpio/hw_hpd.c
index 784feccc5853..5e11d748e6f3 100644
--- a/drivers/gpu/drm/amd/display/dc/gpio/hw_hpd.c
+++ b/drivers/gpu/drm/amd/display/dc/gpio/hw_hpd.c
@@ -23,6 +23,8 @@
  *
  */
 
+#include <linux/slab.h>
+
 #include "dm_services.h"
 
 #include "include/gpio_types.h"
diff --git a/drivers/gpu/drm/amd/display/dc/gpio/hw_translate.c b/drivers/gpu/drm/amd/display/dc/gpio/hw_translate.c
index 236ca28784a9..77615146b96e 100644
--- a/drivers/gpu/drm/amd/display/dc/gpio/hw_translate.c
+++ b/drivers/gpu/drm/amd/display/dc/gpio/hw_translate.c
@@ -84,11 +84,6 @@ bool dal_hw_translate_init(
 		dal_hw_translate_dcn10_init(translate);
 		return true;
 #endif
-#if defined(CONFIG_DRM_AMD_DC_DCN1_01)
-	case DCN_VERSION_1_01:
-		dal_hw_translate_dcn10_init(translate);
-		return true;
-#endif
 
 	default:
 		BREAK_TO_DEBUGGER();
diff --git a/drivers/gpu/drm/amd/display/dc/inc/core_types.h b/drivers/gpu/drm/amd/display/dc/inc/core_types.h
index 6f5ab05d6467..80709c9343c1 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/core_types.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/core_types.h
@@ -82,7 +82,6 @@ void core_link_disable_stream(struct pipe_ctx *pipe_ctx, int option);
 
 void core_link_set_avmute(struct pipe_ctx *pipe_ctx, bool enable);
 /********** DAL Core*********************/
-#include "hw/clk_mgr.h"
 #include "transform.h"
 #include "dpp.h"
 
@@ -123,6 +122,11 @@ struct resource_funcs {
 	enum dc_status (*get_default_swizzle_mode)(
 			struct dc_plane_state *plane_state);
 
+	struct stream_encoder *(*find_first_free_match_stream_enc_for_link)(
+			struct resource_context *res_ctx,
+			const struct resource_pool *pool,
+			struct dc_stream_state *stream);
+
 };
 
 struct audio_support{
@@ -173,7 +177,6 @@ struct resource_pool {
 	unsigned int audio_count;
 	struct audio_support audio_support;
 
-	struct clk_mgr *clk_mgr;
 	struct dccg *dccg;
 	struct irq_service *irqs;
 
@@ -212,6 +215,25 @@ struct plane_resource {
 	struct dcn_fe_bandwidth bw;
 };
 
+union pipe_update_flags {
+	struct {
+		uint32_t enable : 1;
+		uint32_t disable : 1;
+		uint32_t odm : 1;
+		uint32_t global_sync : 1;
+		uint32_t opp_changed : 1;
+		uint32_t tg_changed : 1;
+		uint32_t mpcc : 1;
+		uint32_t dppclk : 1;
+		uint32_t hubp_interdependent : 1;
+		uint32_t hubp_rq_dlg_ttu : 1;
+		uint32_t gamut_remap : 1;
+		uint32_t scaler : 1;
+		uint32_t viewport : 1;
+	} bits;
+	uint32_t raw;
+};
+
 struct pipe_ctx {
 	struct dc_plane_state *plane_state;
 	struct dc_stream_state *stream;
@@ -234,6 +256,7 @@ struct pipe_ctx {
 	struct _vcs_dpi_display_rq_regs_st rq_regs;
 	struct _vcs_dpi_display_pipe_dest_params_st pipe_dlg_param;
 #endif
+	union pipe_update_flags update_flags;
 };
 
 struct resource_context {
diff --git a/drivers/gpu/drm/amd/display/dc/inc/dcn_calcs.h b/drivers/gpu/drm/amd/display/dc/inc/dcn_calcs.h
index 263c09630c06..806f3041db14 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/dcn_calcs.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/dcn_calcs.h
@@ -32,7 +32,7 @@
 
 #include "bw_fixed.h"
 #include "../dml/display_mode_lib.h"
-#include "hw/clk_mgr.h"
+
 
 struct dc;
 struct dc_state;
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/abm.h b/drivers/gpu/drm/amd/display/dc/inc/hw/abm.h
index 86dc39a02408..d607b3191954 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/abm.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/abm.h
@@ -37,7 +37,7 @@ struct abm_backlight_registers {
 struct abm {
 	struct dc_context *ctx;
 	const struct abm_funcs *funcs;
-
+	bool dmcu_is_running;
 	/* registers setting needs to be saved and restored at InitBacklight */
 	struct abm_backlight_registers stored_backlight_registers;
 };
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/audio.h b/drivers/gpu/drm/amd/display/dc/inc/hw/audio.h
index 925204f49717..6ed1fb8c9300 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/audio.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/audio.h
@@ -57,6 +57,7 @@ struct audio {
 	const struct audio_funcs *funcs;
 	struct dc_context *ctx;
 	unsigned int inst;
+	bool enabled;
 };
 
 #endif  /* __DAL_AUDIO__ */
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h
index 31bd6d5183ab..721e13135e76 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h
@@ -26,17 +26,22 @@
 #ifndef __DAL_CLK_MGR_H__
 #define __DAL_CLK_MGR_H__
 
-#include "dm_services_types.h"
 #include "dc.h"
 
-struct clk_mgr {
-	struct dc_context *ctx;
-	const struct clk_mgr_funcs *funcs;
+/* Public interfaces */
 
-	struct dc_clocks clks;
+struct clk_states {
+	uint32_t dprefclk_khz;
 };
 
 struct clk_mgr_funcs {
+	/*
+	 * This function should set new clocks based on the input "safe_to_lower".
+	 * If safe_to_lower == false, then only clocks which are to be increased
+	 * should changed.
+	 * If safe_to_lower == true, then only clocks which are to be decreased
+	 * should be changed.
+	 */
 	void (*update_clocks)(struct clk_mgr *clk_mgr,
 			struct dc_state *context,
 			bool safe_to_lower);
@@ -44,6 +49,24 @@ struct clk_mgr_funcs {
 	int (*get_dp_ref_clk_frequency)(struct clk_mgr *clk_mgr);
 
 	void (*init_clocks)(struct clk_mgr *clk_mgr);
+
+	void (*enable_pme_wa) (struct clk_mgr *clk_mgr);
 };
 
+void dce121_clock_patch_xgmi_ss_info(struct clk_mgr *clk_mgr_base);
+
+struct clk_mgr {
+	struct dc_context *ctx;
+	struct clk_mgr_funcs *funcs;
+	struct dc_clocks clks;
+	int dprefclk_khz; // Used by program pixel clock in clock source funcs, need to figureout where this goes
+};
+
+/* forward declarations */
+struct dccg;
+
+struct clk_mgr *dc_clk_mgr_create(struct dc_context *ctx, struct pp_smu_funcs *pp_smu, struct dccg *dccg);
+
+void dc_destroy_clk_mgr(struct clk_mgr *clk_mgr);
+
 #endif /* __DAL_CLK_MGR_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_clk_mgr.h b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr_internal.h
index c8f8c442142a..6e189b1283aa 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_clk_mgr.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr_internal.h
@@ -1,5 +1,5 @@
 /*
- * Copyright 2012-16 Advanced Micro Devices, Inc.
+ * Copyright 2018 Advanced Micro Devices, Inc.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -23,14 +23,40 @@
  *
  */
 
-
-#ifndef _DCE_CLK_MGR_H_
-#define _DCE_CLK_MGR_H_
+#ifndef __DAL_CLK_MGR_INTERNAL_H__
+#define __DAL_CLK_MGR_INTERNAL_H__
 
 #include "clk_mgr.h"
-#include "dccg.h"
 
-#define MEMORY_TYPE_MULTIPLIER_CZ 4
+/*
+ * only thing needed from here is MEMORY_TYPE_MULTIPLIER_CZ, which is also
+ * used in resource, perhaps this should be defined somewhere more common.
+ */
+#include "resource.h"
+
+/*
+ ***************************************************************************************
+ ****************** Clock Manager Private Macros and Defines ***************************
+ ***************************************************************************************
+ */
+
+#define TO_CLK_MGR_INTERNAL(clk_mgr)\
+	container_of(clk_mgr, struct clk_mgr_internal, base)
+
+#define CTX \
+	clk_mgr->base.ctx
+#define DC_LOGGER \
+	clk_mgr->ctx->logger
+
+
+
+
+#define CLK_BASE(inst) \
+	CLK_BASE_INNER(inst)
+
+#define CLK_SRI(reg_name, block, inst)\
+	.reg_name = CLK_BASE(mm ## block ## _ ## inst ## _ ## reg_name ## _BASE_IDX) + \
+					mm ## block ## _ ## inst ## _ ## reg_name
 
 #define CLK_COMMON_REG_LIST_DCE_BASE() \
 	.DPREFCLK_CNTL = mmDPREFCLK_CNTL, \
@@ -50,12 +76,31 @@
 	CLK_SF(DENTIST_DISPCLK_CNTL, DENTIST_DISPCLK_WDIVIDER, mask_sh),\
 	CLK_SF(DENTIST_DISPCLK_CNTL, DENTIST_DISPCLK_CHG_DONE, mask_sh)
 
+#define CLK_MASK_SH_LIST_RV1(mask_sh) \
+	CLK_COMMON_MASK_SH_LIST_DCN_COMMON_BASE(mask_sh),\
+	CLK_SF(MP1_SMN_C2PMSG_67, CONTENT, mask_sh),\
+	CLK_SF(MP1_SMN_C2PMSG_83, CONTENT, mask_sh),\
+	CLK_SF(MP1_SMN_C2PMSG_91, CONTENT, mask_sh),
+
+
 #define CLK_REG_FIELD_LIST(type) \
 	type DPREFCLK_SRC_SEL; \
 	type DENTIST_DPREFCLK_WDIVIDER; \
 	type DENTIST_DISPCLK_WDIVIDER; \
 	type DENTIST_DISPCLK_CHG_DONE;
 
+/*
+ ***************************************************************************************
+ ****************** Clock Manager Private Structures ***********************************
+ ***************************************************************************************
+ */
+
+struct clk_mgr_registers {
+	uint32_t DPREFCLK_CNTL;
+	uint32_t DENTIST_DISPCLK_CNTL;
+
+};
+
 struct clk_mgr_shift {
 	CLK_REG_FIELD_LIST(uint8_t)
 };
@@ -64,34 +109,42 @@ struct clk_mgr_mask {
 	CLK_REG_FIELD_LIST(uint32_t)
 };
 
-struct clk_mgr_registers {
-	uint32_t DPREFCLK_CNTL;
-	uint32_t DENTIST_DISPCLK_CNTL;
-};
 
 struct state_dependent_clocks {
 	int display_clk_khz;
 	int pixel_clk_khz;
 };
 
-struct dce_clk_mgr {
+struct clk_mgr_internal {
 	struct clk_mgr base;
+	struct pp_smu_funcs *pp_smu;
+	struct clk_mgr_internal_funcs *funcs;
+
+	struct dccg *dccg;
+
+	/*
+	 * For backwards compatbility with previous implementation
+	 * TODO: remove these after everything transitions to new pattern
+	 * Rationale is that clk registers change a lot across DCE versions
+	 * and a shared data structure doesn't really make sense.
+	 */
 	const struct clk_mgr_registers *regs;
 	const struct clk_mgr_shift *clk_mgr_shift;
 	const struct clk_mgr_mask *clk_mgr_mask;
 
-	struct dccg *dccg;
-
 	struct state_dependent_clocks max_clks_by_state[DM_PP_CLOCKS_MAX_STATES];
 
+	/*TODO: figure out which of the below fields should be here vs in asic specific portion */
 	int dentist_vco_freq_khz;
 
 	/* Cache the status of DFS-bypass feature*/
 	bool dfs_bypass_enabled;
 	/* True if the DFS-bypass feature is enabled and active. */
 	bool dfs_bypass_active;
-	/* Cache the display clock returned by VBIOS if DFS-bypass is enabled.
-	 * This is basically "Crystal Frequency In KHz" (XTALIN) frequency */
+	/*
+	 * Cache the display clock returned by VBIOS if DFS-bypass is enabled.
+	 * This is basically "Crystal Frequency In KHz" (XTALIN) frequency
+	 */
 	int dfs_bypass_disp_clk;
 
 	/**
@@ -126,74 +179,33 @@ struct dce_clk_mgr {
 	 * DPREFCLK SS percentage Divider (100 or 1000).
 	 */
 	int dprefclk_ss_divider;
-	int dprefclk_khz;
 
 	enum dm_pp_clocks_state max_clks_state;
 	enum dm_pp_clocks_state cur_min_clks_state;
 };
 
-/* Starting DID for each range */
-enum dentist_base_divider_id {
-	DENTIST_BASE_DID_1 = 0x08,
-	DENTIST_BASE_DID_2 = 0x40,
-	DENTIST_BASE_DID_3 = 0x60,
-	DENTIST_BASE_DID_4 = 0x7e,
-	DENTIST_MAX_DID = 0x7f
+struct clk_mgr_internal_funcs {
+	int (*set_dispclk)(struct clk_mgr_internal *clk_mgr, int requested_dispclk_khz);
+	int (*set_dprefclk)(struct clk_mgr_internal *clk_mgr);
 };
 
-/* Starting point and step size for each divider range.*/
-enum dentist_divider_range {
-	DENTIST_DIVIDER_RANGE_1_START = 8,   /* 2.00  */
-	DENTIST_DIVIDER_RANGE_1_STEP  = 1,   /* 0.25  */
-	DENTIST_DIVIDER_RANGE_2_START = 64,  /* 16.00 */
-	DENTIST_DIVIDER_RANGE_2_STEP  = 2,   /* 0.50  */
-	DENTIST_DIVIDER_RANGE_3_START = 128, /* 32.00 */
-	DENTIST_DIVIDER_RANGE_3_STEP  = 4,   /* 1.00  */
-	DENTIST_DIVIDER_RANGE_4_START = 248, /* 62.00 */
-	DENTIST_DIVIDER_RANGE_4_STEP  = 264, /* 66.00 */
-	DENTIST_DIVIDER_RANGE_SCALE_FACTOR = 4
-};
+
+/*
+ ***************************************************************************************
+ ****************** Clock Manager Level Helper functions *******************************
+ ***************************************************************************************
+ */
+
 
 static inline bool should_set_clock(bool safe_to_lower, int calc_clk, int cur_clk)
 {
 	return ((safe_to_lower && calc_clk < cur_clk) || calc_clk > cur_clk);
 }
 
-void dce_clock_read_ss_info(struct dce_clk_mgr *dccg_dce);
-
-int dce12_get_dp_ref_freq_khz(struct clk_mgr *dccg);
-
-void dce110_fill_display_configs(
-	const struct dc_state *context,
-	struct dm_pp_display_configuration *pp_display_cfg);
-
-int dce112_set_clock(struct clk_mgr *dccg, int requested_clk_khz);
-
-struct clk_mgr *dce_clk_mgr_create(
-	struct dc_context *ctx,
-	const struct clk_mgr_registers *regs,
-	const struct clk_mgr_shift *clk_shift,
-	const struct clk_mgr_mask *clk_mask);
-
-struct clk_mgr *dce110_clk_mgr_create(
-	struct dc_context *ctx,
-	const struct clk_mgr_registers *regs,
-	const struct clk_mgr_shift *clk_shift,
-	const struct clk_mgr_mask *clk_mask);
-
-struct clk_mgr *dce112_clk_mgr_create(
-	struct dc_context *ctx,
-	const struct clk_mgr_registers *regs,
-	const struct clk_mgr_shift *clk_shift,
-	const struct clk_mgr_mask *clk_mask);
-
-struct clk_mgr *dce120_clk_mgr_create(struct dc_context *ctx);
-
-struct clk_mgr *dce121_clk_mgr_create(struct dc_context *ctx);
-void dce121_clock_patch_xgmi_ss_info(struct clk_mgr *clk_mgr);
+int clk_mgr_helper_get_active_display_cnt(
+		struct dc *dc,
+		struct dc_state *context);
 
-void dce_clk_mgr_destroy(struct clk_mgr **clk_mgr);
 
-int dentist_get_divider_from_did(int did);
 
-#endif /* _DCE_CLK_MGR_H_ */
+#endif //__DAL_CLK_MGR_INTERNAL_H__
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h b/drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h
index fb7967b39edb..b55c5a2e56e2 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h
@@ -64,7 +64,22 @@ struct dcn_dpp_state {
 	uint32_t gamut_remap_c33_c34;
 };
 
+struct CM_bias_params {
+	uint32_t cm_bias_cr_r;
+	uint32_t cm_bias_y_g;
+	uint32_t cm_bias_cb_b;
+	uint32_t cm_bias_format;
+};
+
 struct dpp_funcs {
+
+	void (*dpp_program_cm_dealpha)(struct dpp *dpp_base,
+		uint32_t enable, uint32_t additive_blending);
+
+	void (*dpp_program_cm_bias)(
+		struct dpp *dpp_base,
+		struct CM_bias_params *bias_params);
+
 	void (*dpp_read_state)(struct dpp *dpp, struct dcn_dpp_state *s);
 
 	void (*dpp_reset)(struct dpp *dpp);
@@ -155,9 +170,11 @@ struct dpp_funcs {
 			uint32_t width,
 			uint32_t height
 			);
+
 	void (*dpp_set_hdr_multiplier)(
 			struct dpp *dpp_base,
 			uint32_t multiplier);
+
 	void (*set_optional_cursor_attributes)(
 			struct dpp *dpp_base,
 			struct dpp_cursor_attributes *attr);
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/link_encoder.h b/drivers/gpu/drm/amd/display/dc/inc/hw/link_encoder.h
index c9d3e37e9531..ca162079a41b 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/link_encoder.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/link_encoder.h
@@ -59,6 +59,7 @@ struct encoder_feature_support {
 			uint32_t IS_TPS3_CAPABLE:1;
 			uint32_t IS_TPS4_CAPABLE:1;
 			uint32_t HDMI_6GB_EN:1;
+			uint32_t DP_IS_USB_C:1;
 		} bits;
 		uint32_t raw;
 	} flags;
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h b/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h
index 49854eb73d1d..74db1d82fa35 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h
@@ -52,6 +52,19 @@ enum dp_component_depth {
 	DP_COMPONENT_PIXEL_DEPTH_16BPC		= 0x00000004
 };
 
+struct audio_clock_info {
+	/* pixel clock frequency*/
+	uint32_t pixel_clock_in_10khz;
+	/* N - 32KHz audio */
+	uint32_t n_32khz;
+	/* CTS - 32KHz audio*/
+	uint32_t cts_32khz;
+	uint32_t n_44khz;
+	uint32_t cts_44khz;
+	uint32_t n_48khz;
+	uint32_t cts_48khz;
+};
+
 struct encoder_info_frame {
 	/* auxiliary video information */
 	struct dc_info_packet avi;
@@ -63,8 +76,6 @@ struct encoder_info_frame {
 	struct dc_info_packet vsc;
 	/* HDR Static MetaData */
 	struct dc_info_packet hdrsmd;
-	/* custom sdp message */
-	struct dc_info_packet dpsdp;
 };
 
 struct encoder_unblank_param {
@@ -123,6 +134,11 @@ struct stream_encoder_funcs {
 		struct stream_encoder *enc,
 		const struct encoder_info_frame *info_frame);
 
+	void (*send_immediate_sdp_message)(
+				struct stream_encoder *enc,
+				const uint8_t *custom_sdp_message,
+				unsigned int sdp_message_size);
+
 	void (*stop_dp_info_packets)(
 		struct stream_encoder *enc);
 
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h b/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h
index 067d53caf28a..a89d0cf59cca 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h
@@ -70,14 +70,6 @@ enum crtc_state {
 	CRTC_STATE_VACTIVE
 };
 
-struct _dlg_otg_param {
-	int vstartup_start;
-	int vupdate_offset;
-	int vupdate_width;
-	int vready_offset;
-	enum signal_type signal;
-};
-
 struct vupdate_keepout_params {
 	int start_offset;
 	int end_offset;
@@ -126,7 +118,6 @@ struct timing_generator {
 	const struct timing_generator_funcs *funcs;
 	struct dc_bios *bp;
 	struct dc_context *ctx;
-	struct _dlg_otg_param dlg_otg_param;
 	int inst;
 };
 
@@ -140,7 +131,13 @@ struct timing_generator_funcs {
 							const struct dc_crtc_timing *timing);
 	void (*program_timing)(struct timing_generator *tg,
 							const struct dc_crtc_timing *timing,
-							bool use_vbios);
+							int vready_offset,
+							int vstartup_start,
+							int vupdate_offset,
+							int vupdate_width,
+							const enum signal_type signal,
+							bool use_vbios
+	);
 	void (*setup_vertical_interrupt0)(
 			struct timing_generator *optc,
 			uint32_t start_line,
@@ -210,7 +207,11 @@ struct timing_generator_funcs {
 
 	bool (*arm_vert_intr)(struct timing_generator *tg, uint8_t width);
 
-	void (*program_global_sync)(struct timing_generator *tg);
+	void (*program_global_sync)(struct timing_generator *tg,
+			int vready_offset,
+			int vstartup_start,
+			int vupdate_offset,
+			int vupdate_width);
 	void (*enable_optc_clock)(struct timing_generator *tg, bool enable);
 	void (*program_stereo)(struct timing_generator *tg,
 		const struct dc_crtc_timing *timing, struct crtc_stereo_flags *flags);
@@ -237,6 +238,11 @@ struct timing_generator_funcs {
 	bool (*get_crc)(struct timing_generator *tg,
 			uint32_t *r_cr, uint32_t *g_y, uint32_t *b_cb);
 
+	void (*program_manual_trigger)(struct timing_generator *optc);
+	void (*setup_manual_trigger)(struct timing_generator *optc);
+
+	void (*set_vtg_params)(struct timing_generator *optc,
+			const struct dc_crtc_timing *dc_crtc_timing);
 };
 
 #endif
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h
index 33905468e2b9..eb1c12ed026a 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h
@@ -158,6 +158,11 @@ struct hw_sequencer_funcs {
 
 	void (*update_info_frame)(struct pipe_ctx *pipe_ctx);
 
+	void (*send_immediate_sdp_message)(
+				struct pipe_ctx *pipe_ctx,
+				const uint8_t *custom_sdp_message,
+				unsigned int sdp_message_size);
+
 	void (*enable_stream)(struct pipe_ctx *pipe_ctx);
 
 	void (*disable_stream)(struct pipe_ctx *pipe_ctx,
diff --git a/drivers/gpu/drm/amd/display/dc/inc/resource.h b/drivers/gpu/drm/amd/display/dc/inc/resource.h
index 3ce0a4fc5822..08915b737799 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/resource.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/resource.h
@@ -30,6 +30,8 @@
 #include "dal_asic_id.h"
 #include "dm_pp_smu.h"
 
+#define MEMORY_TYPE_MULTIPLIER_CZ 4
+
 enum dce_version resource_parse_asic_id(
 		struct hw_asic_id asic_id);
 
diff --git a/drivers/gpu/drm/amd/display/dc/irq/dce110/irq_service_dce110.c b/drivers/gpu/drm/amd/display/dc/irq/dce110/irq_service_dce110.c
index 86987f5e8bd5..1a581c464345 100644
--- a/drivers/gpu/drm/amd/display/dc/irq/dce110/irq_service_dce110.c
+++ b/drivers/gpu/drm/amd/display/dc/irq/dce110/irq_service_dce110.c
@@ -23,6 +23,8 @@
  *
  */
 
+#include <linux/slab.h>
+
 #include "dm_services.h"
 
 #include "include/logger_interface.h"
diff --git a/drivers/gpu/drm/amd/display/dc/irq/dce120/irq_service_dce120.c b/drivers/gpu/drm/amd/display/dc/irq/dce120/irq_service_dce120.c
index 750ba0ab4106..15380336cb51 100644
--- a/drivers/gpu/drm/amd/display/dc/irq/dce120/irq_service_dce120.c
+++ b/drivers/gpu/drm/amd/display/dc/irq/dce120/irq_service_dce120.c
@@ -23,6 +23,8 @@
  *
  */
 
+#include <linux/slab.h>
+
 #include "dm_services.h"
 
 #include "include/logger_interface.h"
diff --git a/drivers/gpu/drm/amd/display/dc/irq/dce80/irq_service_dce80.c b/drivers/gpu/drm/amd/display/dc/irq/dce80/irq_service_dce80.c
index de218fe84a43..281fee8ad1e5 100644
--- a/drivers/gpu/drm/amd/display/dc/irq/dce80/irq_service_dce80.c
+++ b/drivers/gpu/drm/amd/display/dc/irq/dce80/irq_service_dce80.c
@@ -23,6 +23,8 @@
  *
  */
 
+#include <linux/slab.h>
+
 #include "dm_services.h"
 
 #include "include/logger_interface.h"
diff --git a/drivers/gpu/drm/amd/display/dc/irq/dcn10/irq_service_dcn10.c b/drivers/gpu/drm/amd/display/dc/irq/dcn10/irq_service_dcn10.c
index 10ac6deff5ff..ebf483e3f098 100644
--- a/drivers/gpu/drm/amd/display/dc/irq/dcn10/irq_service_dcn10.c
+++ b/drivers/gpu/drm/amd/display/dc/irq/dcn10/irq_service_dcn10.c
@@ -23,6 +23,8 @@
  *
  */
 
+#include <linux/slab.h>
+
 #include "dm_services.h"
 
 #include "include/logger_interface.h"
diff --git a/drivers/gpu/drm/amd/display/dc/irq/irq_service.c b/drivers/gpu/drm/amd/display/dc/irq/irq_service.c
index 604bea01fc13..0878550a8178 100644
--- a/drivers/gpu/drm/amd/display/dc/irq/irq_service.c
+++ b/drivers/gpu/drm/amd/display/dc/irq/irq_service.c
@@ -23,6 +23,8 @@
  *
  */
 
+#include <linux/slab.h>
+
 #include "dm_services.h"
 
 #include "include/irq_service_interface.h"
diff --git a/drivers/gpu/drm/amd/display/dc/os_types.h b/drivers/gpu/drm/amd/display/dc/os_types.h
index c0d9f332baed..30ec80ac6fc8 100644
--- a/drivers/gpu/drm/amd/display/dc/os_types.h
+++ b/drivers/gpu/drm/amd/display/dc/os_types.h
@@ -26,11 +26,13 @@
 #ifndef _OS_TYPES_H_
 #define _OS_TYPES_H_
 
-#include <asm/byteorder.h>
+#include <linux/kgdb.h>
+#include <linux/kref.h>
 #include <linux/types.h>
-#include <drm/drmP.h>
 
-#include <linux/kref.h>
+#include <asm/byteorder.h>
+
+#include <drm/drm_print.h>
 
 #include "cgs_common.h"
 
diff --git a/drivers/gpu/drm/amd/display/dc/virtual/virtual_link_encoder.c b/drivers/gpu/drm/amd/display/dc/virtual/virtual_link_encoder.c
index 1c079ba37c30..3464b2d5b89a 100644
--- a/drivers/gpu/drm/amd/display/dc/virtual/virtual_link_encoder.c
+++ b/drivers/gpu/drm/amd/display/dc/virtual/virtual_link_encoder.c
@@ -23,6 +23,8 @@
  *
  */
 
+#include <linux/slab.h>
+
 #include "dm_services.h"
 #include "dm_services_types.h"
 
diff --git a/drivers/gpu/drm/amd/display/dc/virtual/virtual_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/virtual/virtual_stream_encoder.c
index fdcf9e66d852..484047155aae 100644
--- a/drivers/gpu/drm/amd/display/dc/virtual/virtual_stream_encoder.c
+++ b/drivers/gpu/drm/amd/display/dc/virtual/virtual_stream_encoder.c
@@ -23,6 +23,8 @@
  *
  */
 
+#include <linux/slab.h>
+
 #include "dm_services.h"
 #include "virtual_stream_encoder.h"
 
diff --git a/drivers/gpu/drm/amd/display/include/bios_parser_types.h b/drivers/gpu/drm/amd/display/include/bios_parser_types.h
index 01bf01a34a08..c30437ae8395 100644
--- a/drivers/gpu/drm/amd/display/include/bios_parser_types.h
+++ b/drivers/gpu/drm/amd/display/include/bios_parser_types.h
@@ -307,7 +307,8 @@ struct bp_encoder_cap_info {
 	uint32_t DP_HBR2_EN:1;
 	uint32_t DP_HBR3_EN:1;
 	uint32_t HDMI_6GB_EN:1;
-	uint32_t RESERVED:30;
+	uint32_t DP_IS_USB_C:1;
+	uint32_t RESERVED:27;
 };
 
 #endif /*__DAL_BIOS_PARSER_TYPES_H__ */
diff --git a/drivers/gpu/drm/amd/display/include/dal_asic_id.h b/drivers/gpu/drm/amd/display/include/dal_asic_id.h
index 4c8ce7938f01..b302ff3180a4 100644
--- a/drivers/gpu/drm/amd/display/include/dal_asic_id.h
+++ b/drivers/gpu/drm/amd/display/include/dal_asic_id.h
@@ -132,20 +132,18 @@
 #define RAVEN_A0 0x01
 #define RAVEN_B0 0x21
 #define PICASSO_A0 0x41
-#if defined(CONFIG_DRM_AMD_DC_DCN1_01)
 /* DCN1_01 */
 #define RAVEN2_A0 0x81
-#endif
-#define RAVEN_UNKNOWN 0xFF
-
-#define ASIC_REV_IS_RAVEN(eChipRev) ((eChipRev >= RAVEN_A0) && eChipRev < RAVEN_UNKNOWN)
 #define RAVEN1_F0 0xF0
-#define ASICREV_IS_RV1_F0(eChipRev) ((eChipRev >= RAVEN1_F0) && (eChipRev < RAVEN_UNKNOWN))
+#define RAVEN_UNKNOWN 0xFF
 
-#if defined(CONFIG_DRM_AMD_DC_DCN1_01)
+#define ASICREV_IS_RAVEN(eChipRev) ((eChipRev >= RAVEN_A0) && eChipRev < RAVEN_UNKNOWN)
 #define ASICREV_IS_PICASSO(eChipRev) ((eChipRev >= PICASSO_A0) && (eChipRev < RAVEN2_A0))
 #define ASICREV_IS_RAVEN2(eChipRev) ((eChipRev >= RAVEN2_A0) && (eChipRev < 0xF0))
-#endif /* DCN1_01 */
+
+
+#define ASICREV_IS_RV1_F0(eChipRev) ((eChipRev >= RAVEN1_F0) && (eChipRev < RAVEN_UNKNOWN))
+
 
 #define FAMILY_RV 142 /* DCN 1*/
 
diff --git a/drivers/gpu/drm/amd/display/include/dal_types.h b/drivers/gpu/drm/amd/display/include/dal_types.h
index f5bd869d4320..dabdbc0999d4 100644
--- a/drivers/gpu/drm/amd/display/include/dal_types.h
+++ b/drivers/gpu/drm/amd/display/include/dal_types.h
@@ -45,9 +45,7 @@ enum dce_version {
 	DCE_VERSION_12_1,
 	DCE_VERSION_MAX,
 	DCN_VERSION_1_0,
-#if defined(CONFIG_DRM_AMD_DC_DCN1_01)
 	DCN_VERSION_1_01,
-#endif /* DCN1_01 */
 	DCN_VERSION_MAX
 };
 
diff --git a/drivers/gpu/drm/amd/display/include/set_mode_types.h b/drivers/gpu/drm/amd/display/include/set_mode_types.h
index 2b836e582c08..845fea8a387f 100644
--- a/drivers/gpu/drm/amd/display/include/set_mode_types.h
+++ b/drivers/gpu/drm/amd/display/include/set_mode_types.h
@@ -84,7 +84,10 @@ union hdmi_info_packet {
 		uint16_t bar_left;
 		uint16_t bar_right;
 
-		uint8_t reserved[14];
+		uint8_t F140_F143:4;
+		uint8_t ACE0_ACE3:4;
+
+		uint8_t reserved[13];
 	} bits;
 
 	struct info_packet_raw_data packet_raw_data;
diff --git a/drivers/gpu/drm/amd/display/modules/color/color_gamma.c b/drivers/gpu/drm/amd/display/modules/color/color_gamma.c
index a1055413bade..b31af9be41eb 100644
--- a/drivers/gpu/drm/amd/display/modules/color/color_gamma.c
+++ b/drivers/gpu/drm/amd/display/modules/color/color_gamma.c
@@ -23,6 +23,9 @@
  *
  */
 
+#include <linux/mm.h>
+#include <linux/slab.h>
+
 #include "dc.h"
 #include "opp.h"
 #include "color_gamma.h"
@@ -240,16 +243,27 @@ struct dividers {
 	struct fixed31_32 divider3;
 };
 
-static void build_coefficients(struct gamma_coefficients *coefficients, bool is_2_4)
+enum gamma_type_index {
+	gamma_type_index_2_4,
+	gamma_type_index_2_2,
+	gamma_type_index_2_2_flat
+};
+
+static void build_coefficients(struct gamma_coefficients *coefficients, enum gamma_type_index type)
 {
-	static const int32_t numerator01[] = { 31308, 180000};
-	static const int32_t numerator02[] = { 12920, 4500};
-	static const int32_t numerator03[] = { 55, 99};
-	static const int32_t numerator04[] = { 55, 99};
-	static const int32_t numerator05[] = { 2400, 2200};
+	static const int32_t numerator01[] = { 31308,	180000,	0};
+	static const int32_t numerator02[] = { 12920,	4500,	0};
+	static const int32_t numerator03[] = { 55,		99,		0};
+	static const int32_t numerator04[] = { 55,		99,		0};
+	static const int32_t numerator05[] = { 2400,	2200, 2200};
 
 	uint32_t i = 0;
-	uint32_t index = is_2_4 == true ? 0:1;
+	uint32_t index = 0;
+
+	if (type == gamma_type_index_2_2)
+		index = 1;
+	else if (type == gamma_type_index_2_2_flat)
+		index = 2;
 
 	do {
 		coefficients->a0[i] = dc_fixpt_from_fraction(
@@ -697,7 +711,7 @@ static void build_de_pq(struct pwl_float_data_ex *de_pq,
 
 static void build_regamma(struct pwl_float_data_ex *rgb_regamma,
 		uint32_t hw_points_num,
-		const struct hw_x_point *coordinate_x, bool is_2_4)
+		const struct hw_x_point *coordinate_x, enum gamma_type_index type)
 {
 	uint32_t i;
 
@@ -705,7 +719,7 @@ static void build_regamma(struct pwl_float_data_ex *rgb_regamma,
 	struct pwl_float_data_ex *rgb = rgb_regamma;
 	const struct hw_x_point *coord_x = coordinate_x;
 
-	build_coefficients(&coeff, is_2_4);
+	build_coefficients(&coeff, type);
 
 	i = 0;
 
@@ -892,13 +906,13 @@ static bool build_freesync_hdr(struct pwl_float_data_ex *rgb_regamma,
 
 static void build_degamma(struct pwl_float_data_ex *curve,
 		uint32_t hw_points_num,
-		const struct hw_x_point *coordinate_x, bool is_2_4)
+		const struct hw_x_point *coordinate_x, enum gamma_type_index type)
 {
 	uint32_t i;
 	struct gamma_coefficients coeff;
 	uint32_t begin_index, end_index;
 
-	build_coefficients(&coeff, is_2_4);
+	build_coefficients(&coeff, type);
 	i = 0;
 
 	/* X points is 2^-25 to 2^7
@@ -1558,13 +1572,15 @@ bool mod_color_calculate_regamma_params(struct dc_transfer_func *output_tf,
 			output_tf->tf == TRANSFER_FUNCTION_SRGB) {
 		if (ramp == NULL)
 			return true;
-		if (ramp->is_identity || (!mapUserRamp && ramp->type == GAMMA_RGB_256))
+		if ((ramp->is_identity && ramp->type != GAMMA_CS_TFM_1D) ||
+				(!mapUserRamp && ramp->type == GAMMA_RGB_256))
 			return true;
 	}
 
 	output_tf->type = TF_TYPE_DISTRIBUTED_POINTS;
 
-	if (ramp && (mapUserRamp || ramp->type != GAMMA_RGB_256)) {
+	if (ramp && ramp->type != GAMMA_CS_TFM_1D &&
+			(mapUserRamp || ramp->type != GAMMA_RGB_256)) {
 		rgb_user = kvcalloc(ramp->num_entries + _EXTRA_POINTS,
 			    sizeof(*rgb_user),
 			    GFP_KERNEL);
@@ -1614,7 +1630,7 @@ bool mod_color_calculate_regamma_params(struct dc_transfer_func *output_tf,
 				coordinates_x,
 				output_tf->sdr_ref_white_level);
 	} else if (tf == TRANSFER_FUNCTION_GAMMA22 &&
-			fs_params != NULL) {
+			fs_params != NULL && fs_params->skip_tm == 0) {
 		build_freesync_hdr(rgb_regamma,
 				MAX_HW_POINTS,
 				coordinates_x,
@@ -1627,7 +1643,9 @@ bool mod_color_calculate_regamma_params(struct dc_transfer_func *output_tf,
 
 		build_regamma(rgb_regamma,
 				MAX_HW_POINTS,
-				coordinates_x, tf == TRANSFER_FUNCTION_SRGB ? true:false);
+				coordinates_x, tf == TRANSFER_FUNCTION_SRGB ? gamma_type_index_2_4 :
+					tf == TRANSFER_FUNCTION_GAMMA22 ?
+					gamma_type_index_2_2_flat : gamma_type_index_2_2);
 	}
 	map_regamma_hw_to_x_user(ramp, coeff, rgb_user,
 			coordinates_x, axis_x, rgb_regamma,
@@ -1832,7 +1850,9 @@ bool mod_color_calculate_degamma_params(struct dc_transfer_func *input_tf,
 		build_degamma(curve,
 				MAX_HW_POINTS,
 				coordinates_x,
-				tf == TRANSFER_FUNCTION_SRGB ? true : false);
+				tf == TRANSFER_FUNCTION_SRGB ?
+				gamma_type_index_2_4 : tf == TRANSFER_FUNCTION_GAMMA22 ?
+				gamma_type_index_2_2_flat : gamma_type_index_2_2);
 	else if (tf == TRANSFER_FUNCTION_LINEAR) {
 		// just copy coordinates_x into curve
 		i = 0;
@@ -1932,7 +1952,10 @@ bool  mod_color_calculate_curve(enum dc_transfer_func_predefined trans,
 
 		build_regamma(rgb_regamma,
 				MAX_HW_POINTS,
-				coordinates_x, trans == TRANSFER_FUNCTION_SRGB ? true:false);
+				coordinates_x,
+				trans == TRANSFER_FUNCTION_SRGB ?
+				gamma_type_index_2_4 : trans == TRANSFER_FUNCTION_GAMMA22 ?
+				gamma_type_index_2_2_flat : gamma_type_index_2_2);
 		for (i = 0; i <= MAX_HW_POINTS ; i++) {
 			points->red[i]    = rgb_regamma[i].r;
 			points->green[i]  = rgb_regamma[i].g;
@@ -2002,7 +2025,8 @@ bool  mod_color_calculate_degamma_curve(enum dc_transfer_func_predefined trans,
 
 		kvfree(rgb_degamma);
 	} else if (trans == TRANSFER_FUNCTION_SRGB ||
-			  trans == TRANSFER_FUNCTION_BT709) {
+			  trans == TRANSFER_FUNCTION_BT709 ||
+			  trans == TRANSFER_FUNCTION_GAMMA22) {
 		rgb_degamma = kvcalloc(MAX_HW_POINTS + _EXTRA_POINTS,
 				       sizeof(*rgb_degamma),
 				       GFP_KERNEL);
@@ -2011,7 +2035,10 @@ bool  mod_color_calculate_degamma_curve(enum dc_transfer_func_predefined trans,
 
 		build_degamma(rgb_degamma,
 				MAX_HW_POINTS,
-				coordinates_x, trans == TRANSFER_FUNCTION_SRGB ? true:false);
+				coordinates_x,
+				trans == TRANSFER_FUNCTION_SRGB ?
+				gamma_type_index_2_4 : trans == TRANSFER_FUNCTION_GAMMA22 ?
+				gamma_type_index_2_2_flat : gamma_type_index_2_2);
 		for (i = 0; i <= MAX_HW_POINTS ; i++) {
 			points->red[i]    = rgb_degamma[i].r;
 			points->green[i]  = rgb_degamma[i].g;
diff --git a/drivers/gpu/drm/amd/display/modules/color/color_gamma.h b/drivers/gpu/drm/amd/display/modules/color/color_gamma.h
index a6e164df090a..369953fafadf 100644
--- a/drivers/gpu/drm/amd/display/modules/color/color_gamma.h
+++ b/drivers/gpu/drm/amd/display/modules/color/color_gamma.h
@@ -79,6 +79,7 @@ struct freesync_hdr_tf_params {
 	unsigned int max_content; // luminance in nits
 	unsigned int min_display; // luminance in 1/10000 nits
 	unsigned int max_display; // luminance in nits
+	unsigned int skip_tm; // skip tm
 };
 
 void setup_x_points_distribution(void);
diff --git a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c
index 19b1eaebe484..7c20171a3b6d 100644
--- a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c
+++ b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c
@@ -23,6 +23,8 @@
  *
  */
 
+#include <linux/slab.h>
+
 #include "dm_services.h"
 #include "dc.h"
 #include "mod_freesync.h"
diff --git a/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c b/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c
index db06fab2ad5c..bc13c552797f 100644
--- a/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c
+++ b/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c
@@ -63,7 +63,9 @@ void mod_build_vsc_infopacket(const struct dc_stream_state *stream,
 	if (stream->psr_version != 0)
 		vscPacketRevision = 2;
 
-	if (stream->timing.pixel_encoding == PIXEL_ENCODING_YCBCR420)
+	/* Update to revision 5 for extended colorimetry support for DPCD 1.4+ */
+	if (stream->link->dpcd_caps.dpcd_rev.raw >= 0x14 &&
+			stream->link->dpcd_caps.dprx_feature.bits.VSC_SDP_COLORIMETRY_SUPPORTED)
 		vscPacketRevision = 5;
 
 	/* VSC packet not needed based on the features
diff --git a/drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_offset.h b/drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_offset.h
index a9575db8d7aa..6efcaa93e17b 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_offset.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_offset.h
@@ -30,4 +30,22 @@
 #define mmDF_CS_UMC_AON0_DramBaseAddress0								0x0044
 #define mmDF_CS_UMC_AON0_DramBaseAddress0_BASE_IDX							0
 
+#define smnPerfMonCtlLo0					0x01d440UL
+#define smnPerfMonCtlHi0					0x01d444UL
+#define smnPerfMonCtlLo1					0x01d450UL
+#define smnPerfMonCtlHi1					0x01d454UL
+#define smnPerfMonCtlLo2					0x01d460UL
+#define smnPerfMonCtlHi2					0x01d464UL
+#define smnPerfMonCtlLo3					0x01d470UL
+#define smnPerfMonCtlHi3					0x01d474UL
+
+#define smnPerfMonCtrLo0					0x01d448UL
+#define smnPerfMonCtrHi0					0x01d44cUL
+#define smnPerfMonCtrLo1					0x01d458UL
+#define smnPerfMonCtrHi1					0x01d45cUL
+#define smnPerfMonCtrLo2					0x01d468UL
+#define smnPerfMonCtrHi2					0x01d46cUL
+#define smnPerfMonCtrLo3					0x01d478UL
+#define smnPerfMonCtrHi3					0x01d47cUL
+
 #endif
diff --git a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_0_offset.h b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_0_offset.h
index 529b37db274c..f1d048e0ed2c 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_0_offset.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_9_0_offset.h
@@ -829,6 +829,8 @@
 #define mmTD_CNTL_BASE_IDX                                                                             0
 #define mmTD_STATUS                                                                                    0x0526
 #define mmTD_STATUS_BASE_IDX                                                                           0
+#define mmTD_EDC_CNT                                                                                   0x052e
+#define mmTD_EDC_CNT_BASE_IDX                                                                          0
 #define mmTD_DSM_CNTL                                                                                  0x052f
 #define mmTD_DSM_CNTL_BASE_IDX                                                                         0
 #define mmTD_DSM_CNTL2                                                                                 0x0530
@@ -845,6 +847,8 @@
 #define mmTA_STATUS_BASE_IDX                                                                           0
 #define mmTA_SCRATCH                                                                                   0x0564
 #define mmTA_SCRATCH_BASE_IDX                                                                          0
+#define mmTA_EDC_CNT                                                                                   0x0586
+#define mmTA_EDC_CNT_BASE_IDX                                                                          0
 
 
 // addressBlock: gc_gdsdec
@@ -1051,6 +1055,13 @@
 #define mmGC_USER_RB_BACKEND_DISABLE_BASE_IDX                                                          0
 
 
+// addressBlock: gc_ea_gceadec2
+// base address: 0x9c00
+#define mmGCEA_EDC_CNT                                                                                 0x0706
+#define mmGCEA_EDC_CNT_BASE_IDX                                                                        0
+#define mmGCEA_EDC_CNT2                                                                                0x0707
+#define mmGCEA_EDC_CNT2_BASE_IDX                                                                       0
+
 // addressBlock: gc_rmi_rmidec
 // base address: 0x9e00
 #define mmRMI_GENERAL_CNTL                                                                             0x0780
@@ -1709,6 +1720,8 @@
 #define mmTC_CFG_L1_VOLATILE_BASE_IDX                                                                  0
 #define mmTC_CFG_L2_VOLATILE                                                                           0x0b23
 #define mmTC_CFG_L2_VOLATILE_BASE_IDX                                                                  0
+#define mmTCI_EDC_CNT                                                                                  0x0b60
+#define mmTCI_EDC_CNT_BASE_IDX                                                                         0
 #define mmTCI_STATUS                                                                                   0x0b61
 #define mmTCI_STATUS_BASE_IDX                                                                          0
 #define mmTCI_CNTL_1                                                                                   0x0b62
@@ -2594,6 +2607,24 @@
 #define mmCP_RB_DOORBELL_CONTROL_SCH_7_BASE_IDX                                                        0
 #define mmCP_RB_DOORBELL_CLEAR                                                                         0x1188
 #define mmCP_RB_DOORBELL_CLEAR_BASE_IDX                                                                0
+#define mmCPF_EDC_TAG_CNT                                                                              0x1189
+#define mmCPF_EDC_TAG_CNT_BASE_IDX                                                                     0
+#define mmCPF_EDC_ROQ_CNT                                                                              0x118a
+#define mmCPF_EDC_ROQ_CNT_BASE_IDX                                                                     0
+#define mmCPG_EDC_TAG_CNT                                                                              0x118b
+#define mmCPG_EDC_TAG_CNT_BASE_IDX                                                                     0
+#define mmCPG_EDC_DMA_CNT                                                                              0x118d
+#define mmCPG_EDC_DMA_CNT_BASE_IDX                                                                     0
+#define mmCPC_EDC_SCRATCH_CNT                                                                          0x118e
+#define mmCPC_EDC_SCRATCH_CNT_BASE_IDX                                                                 0
+#define mmCPC_EDC_UCODE_CNT                                                                            0x118f
+#define mmCPC_EDC_UCODE_CNT_BASE_IDX                                                                   0
+#define mmDC_EDC_STATE_CNT                                                                             0x1191
+#define mmDC_EDC_STATE_CNT_BASE_IDX                                                                    0
+#define mmDC_EDC_CSINVOC_CNT                                                                           0x1192
+#define mmDC_EDC_CSINVOC_CNT_BASE_IDX                                                                  0
+#define mmDC_EDC_RESTORE_CNT                                                                           0x1193
+#define mmDC_EDC_RESTORE_CNT_BASE_IDX                                                                  0
 #define mmCP_GFX_MQD_CONTROL                                                                           0x11a0
 #define mmCP_GFX_MQD_CONTROL_BASE_IDX                                                                  0
 #define mmCP_GFX_MQD_BASE_ADDR                                                                         0x11a1
diff --git a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_6_1_smn.h b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_6_1_smn.h
index 8c75669eb500..9470ec5e0f42 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_6_1_smn.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_6_1_smn.h
@@ -54,5 +54,8 @@
 #define smnPCIE_PERF_COUNT0_TXCLK2			0x11180258
 #define smnPCIE_PERF_COUNT1_TXCLK2			0x1118025c
 
+#define smnPCIE_RX_NUM_NAK				0x11180038
+#define smnPCIE_RX_NUM_NAK_GENERATED			0x1118003c
+
 #endif	// _nbio_6_1_SMN_HEADER
 
diff --git a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_0_smn.h b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_0_smn.h
index 5563f0715896..caf5ffdc130a 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_0_smn.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_0_smn.h
@@ -51,4 +51,7 @@
 #define smnPCIE_PERF_COUNT0_TXCLK2			0x11180258
 #define smnPCIE_PERF_COUNT1_TXCLK2			0x1118025c
 
+#define smnPCIE_RX_NUM_NAK				0x11180038
+#define smnPCIE_RX_NUM_NAK_GENERATED			0x1118003c
+
 #endif	// _nbio_7_0_SMN_HEADER
diff --git a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_4_0_smn.h b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_4_0_smn.h
index c1457d880c4d..4bcacf529852 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_4_0_smn.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/nbio/nbio_7_4_0_smn.h
@@ -50,4 +50,7 @@
 #define smnPCIE_PERF_CNTL_EVENT_LC_PORT_SEL		0x1118024c
 #define smnPCIE_PERF_CNTL_EVENT_CI_PORT_SEL		0x11180250
 
+#define smnPCIE_RX_NUM_NAK				0x11180038
+#define smnPCIE_RX_NUM_NAK_GENERATED			0x1118003c
+
 #endif	// _nbio_7_4_0_SMN_HEADER
diff --git a/drivers/gpu/drm/amd/include/cik_structs.h b/drivers/gpu/drm/amd/include/cik_structs.h
index 749eab94e335..699e658c3cec 100644
--- a/drivers/gpu/drm/amd/include/cik_structs.h
+++ b/drivers/gpu/drm/amd/include/cik_structs.h
@@ -282,8 +282,7 @@ struct cik_sdma_rlc_registers {
 	uint32_t reserved_123;
 	uint32_t reserved_124;
 	uint32_t reserved_125;
-	uint32_t reserved_126;
-	uint32_t reserved_127;
+	/* reserved_126,127: repurposed for driver-internal use */
 	uint32_t sdma_engine_id;
 	uint32_t sdma_queue_id;
 };
diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
index b897aca9b4c9..98b9533e672b 100644
--- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
@@ -174,6 +174,7 @@ struct tile_config {
 #define ALLOC_MEM_FLAGS_GTT		(1 << 1)
 #define ALLOC_MEM_FLAGS_USERPTR		(1 << 2)
 #define ALLOC_MEM_FLAGS_DOORBELL	(1 << 3)
+#define ALLOC_MEM_FLAGS_MMIO_REMAP	(1 << 4)
 
 /*
  * Allocation flags attributes/access options.
diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
index 2b579ba9b685..9f661bf96ed0 100644
--- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
@@ -109,8 +109,12 @@ enum amd_pp_sensors {
 	AMDGPU_PP_SENSOR_UVD_DCLK,
 	AMDGPU_PP_SENSOR_VCE_ECCLK,
 	AMDGPU_PP_SENSOR_GPU_LOAD,
+	AMDGPU_PP_SENSOR_MEM_LOAD,
 	AMDGPU_PP_SENSOR_GFX_MCLK,
 	AMDGPU_PP_SENSOR_GPU_TEMP,
+	AMDGPU_PP_SENSOR_EDGE_TEMP = AMDGPU_PP_SENSOR_GPU_TEMP,
+	AMDGPU_PP_SENSOR_HOTSPOT_TEMP,
+	AMDGPU_PP_SENSOR_MEM_TEMP,
 	AMDGPU_PP_SENSOR_VCE_POWER,
 	AMDGPU_PP_SENSOR_UVD_POWER,
 	AMDGPU_PP_SENSOR_GPU_POWER,
@@ -159,6 +163,13 @@ struct pp_states_info {
 	uint32_t states[16];
 };
 
+enum PP_HWMON_TEMP {
+	PP_TEMP_EDGE = 0,
+	PP_TEMP_JUNCTION,
+	PP_TEMP_MEM,
+	PP_TEMP_MAX
+};
+
 #define PP_GROUP_MASK        0xF0000000
 #define PP_GROUP_SHIFT       28
 
diff --git a/drivers/gpu/drm/amd/include/v9_structs.h b/drivers/gpu/drm/amd/include/v9_structs.h
index ceaf4932258d..8b383dbe1cda 100644
--- a/drivers/gpu/drm/amd/include/v9_structs.h
+++ b/drivers/gpu/drm/amd/include/v9_structs.h
@@ -151,8 +151,7 @@ struct v9_sdma_mqd {
 	uint32_t reserved_123;
 	uint32_t reserved_124;
 	uint32_t reserved_125;
-	uint32_t reserved_126;
-	uint32_t reserved_127;
+	/* reserved_126,127: repurposed for driver-internal use */
 	uint32_t sdma_engine_id;
 	uint32_t sdma_queue_id;
 };
diff --git a/drivers/gpu/drm/amd/include/vi_structs.h b/drivers/gpu/drm/amd/include/vi_structs.h
index 717fbae1d362..c17613287cd0 100644
--- a/drivers/gpu/drm/amd/include/vi_structs.h
+++ b/drivers/gpu/drm/amd/include/vi_structs.h
@@ -151,8 +151,7 @@ struct vi_sdma_mqd {
 	uint32_t reserved_123;
 	uint32_t reserved_124;
 	uint32_t reserved_125;
-	uint32_t reserved_126;
-	uint32_t reserved_127;
+	/* reserved_126,127: repurposed for driver-internal use */
 	uint32_t sdma_engine_id;
 	uint32_t sdma_queue_id;
 };
diff --git a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
index eec329ab6037..9c67adee2c9e 100644
--- a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
@@ -20,9 +20,9 @@
  * OTHER DEALINGS IN THE SOFTWARE.
  */
 
-#include "pp_debug.h"
 #include <linux/firmware.h>
-#include <drm/drmP.h>
+
+#include "pp_debug.h"
 #include "amdgpu.h"
 #include "amdgpu_smu.h"
 #include "soc15_common.h"
@@ -30,6 +30,36 @@
 #include "atom.h"
 #include "amd_pcie.h"
 
+int smu_get_smc_version(struct smu_context *smu, uint32_t *if_version, uint32_t *smu_version)
+{
+	int ret = 0;
+
+	if (!if_version && !smu_version)
+		return -EINVAL;
+
+	if (if_version) {
+		ret = smu_send_smc_msg(smu, SMU_MSG_GetDriverIfVersion);
+		if (ret)
+			return ret;
+
+		ret = smu_read_smc_arg(smu, if_version);
+		if (ret)
+			return ret;
+	}
+
+	if (smu_version) {
+		ret = smu_send_smc_msg(smu, SMU_MSG_GetSmuVersion);
+		if (ret)
+			return ret;
+
+		ret = smu_read_smc_arg(smu, smu_version);
+		if (ret)
+			return ret;
+	}
+
+	return ret;
+}
+
 int smu_dpm_set_power_gate(struct smu_context *smu, uint32_t block_type,
 			   bool gate)
 {
@@ -168,6 +198,8 @@ int smu_sys_set_pp_table(struct smu_context *smu,  void *buf, size_t size)
 	ATOM_COMMON_TABLE_HEADER *header = (ATOM_COMMON_TABLE_HEADER *)buf;
 	int ret = 0;
 
+	if (!smu->pm_enabled)
+		return -EINVAL;
 	if (header->usStructureSize != size) {
 		pr_err("pp table size not matched !\n");
 		return -EIO;
@@ -203,6 +235,8 @@ int smu_feature_init_dpm(struct smu_context *smu)
 	int ret = 0;
 	uint32_t unallowed_feature_mask[SMU_FEATURE_MAX/32];
 
+	if (!smu->pm_enabled)
+		return ret;
 	mutex_lock(&feature->mutex);
 	bitmap_fill(feature->allowed, SMU_FEATURE_MAX);
 	mutex_unlock(&feature->mutex);
@@ -314,6 +348,7 @@ static int smu_early_init(void *handle)
 	struct smu_context *smu = &adev->smu;
 
 	smu->adev = adev;
+	smu->pm_enabled = !!amdgpu_dpm;
 	mutex_init(&smu->mutex);
 
 	return smu_set_funcs(adev);
@@ -323,6 +358,9 @@ static int smu_late_init(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	struct smu_context *smu = &adev->smu;
+
+	if (!smu->pm_enabled)
+		return 0;
 	mutex_lock(&smu->mutex);
 	smu_handle_task(&adev->smu,
 			smu->smu_dpm.dpm_level,
@@ -406,9 +444,6 @@ static int smu_sw_init(void *handle)
 	struct smu_context *smu = &adev->smu;
 	int ret;
 
-	if (!is_support_sw_smu(adev))
-		return -EINVAL;
-
 	smu->pool_size = adev->pm.smu_prv_buffer_size;
 	smu->smu_feature.feature_num = SMU_FEATURE_MAX;
 	mutex_init(&smu->smu_feature.mutex);
@@ -460,9 +495,6 @@ static int smu_sw_fini(void *handle)
 	struct smu_context *smu = &adev->smu;
 	int ret;
 
-	if (!is_support_sw_smu(adev))
-		return -EINVAL;
-
 	ret = smu_smc_table_sw_fini(smu);
 	if (ret) {
 		pr_err("Failed to sw fini smc table!\n");
@@ -612,10 +644,6 @@ static int smu_smc_table_hw_init(struct smu_context *smu,
 		 * check if the format_revision in vbios is up to pptable header
 		 * version, and the structure size is not 0.
 		 */
-		ret = smu_get_clk_info_from_vbios(smu);
-		if (ret)
-			return ret;
-
 		ret = smu_check_pptable(smu);
 		if (ret)
 			return ret;
@@ -716,6 +744,9 @@ static int smu_smc_table_hw_init(struct smu_context *smu,
 	 */
 	ret = smu_set_tool_table_location(smu);
 
+	if (!smu_is_dpm_running(smu))
+		pr_info("dpm has been disabled\n");
+
 	return ret;
 }
 
@@ -788,9 +819,6 @@ static int smu_hw_init(void *handle)
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	struct smu_context *smu = &adev->smu;
 
-	if (!is_support_sw_smu(adev))
-		return -EINVAL;
-
 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
 		ret = smu_load_microcode(smu);
 		if (ret)
@@ -831,7 +859,10 @@ static int smu_hw_init(void *handle)
 
 	mutex_unlock(&smu->mutex);
 
-	adev->pm.dpm_enabled = true;
+	if (!smu->pm_enabled)
+		adev->pm.dpm_enabled = false;
+	else
+		adev->pm.dpm_enabled = true;
 
 	pr_info("SMU is initialized successfully!\n");
 
@@ -849,9 +880,6 @@ static int smu_hw_fini(void *handle)
 	struct smu_table_context *table_context = &smu->smu_table;
 	int ret = 0;
 
-	if (!is_support_sw_smu(adev))
-		return -EINVAL;
-
 	kfree(table_context->driver_pptable);
 	table_context->driver_pptable = NULL;
 
@@ -906,9 +934,6 @@ static int smu_suspend(void *handle)
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	struct smu_context *smu = &adev->smu;
 
-	if (!is_support_sw_smu(adev))
-		return -EINVAL;
-
 	ret = smu_system_features_control(smu, false);
 	if (ret)
 		return ret;
@@ -924,9 +949,6 @@ static int smu_resume(void *handle)
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	struct smu_context *smu = &adev->smu;
 
-	if (!is_support_sw_smu(adev))
-		return -EINVAL;
-
 	pr_info("SMU is resuming...\n");
 
 	mutex_lock(&smu->mutex);
@@ -955,7 +977,7 @@ int smu_display_configuration_change(struct smu_context *smu,
 	int index = 0;
 	int num_of_active_display = 0;
 
-	if (!is_support_sw_smu(smu->adev))
+	if (!smu->pm_enabled || !is_support_sw_smu(smu->adev))
 		return -EINVAL;
 
 	if (!display_config)
@@ -1083,7 +1105,7 @@ static int smu_enable_umd_pstate(void *handle,
 
 	struct smu_context *smu = (struct smu_context*)(handle);
 	struct smu_dpm_context *smu_dpm_ctx = &(smu->smu_dpm);
-	if (!smu_dpm_ctx->dpm_context)
+	if (!smu->pm_enabled || !smu_dpm_ctx->dpm_context)
 		return -EINVAL;
 
 	if (!(smu_dpm_ctx->dpm_level & profile_mode_mask)) {
@@ -1126,6 +1148,8 @@ int smu_adjust_power_state_dynamic(struct smu_context *smu,
 	long workload;
 	struct smu_dpm_context *smu_dpm_ctx = &(smu->smu_dpm);
 
+	if (!smu->pm_enabled)
+		return -EINVAL;
 	if (!skip_display_settings) {
 		ret = smu_display_config_changed(smu);
 		if (ret) {
@@ -1134,6 +1158,8 @@ int smu_adjust_power_state_dynamic(struct smu_context *smu,
 		}
 	}
 
+	if (!smu->pm_enabled)
+		return -EINVAL;
 	ret = smu_apply_clocks_adjust_rules(smu);
 	if (ret) {
 		pr_err("Failed to apply clocks adjust rules!");
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c b/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c
index 70f7f47a2fcf..cc57fb953e62 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c
@@ -225,7 +225,16 @@ int phm_register_irq_handlers(struct pp_hwmgr *hwmgr)
 int phm_start_thermal_controller(struct pp_hwmgr *hwmgr)
 {
 	int ret = 0;
-	struct PP_TemperatureRange range = {TEMP_RANGE_MIN, TEMP_RANGE_MAX};
+	struct PP_TemperatureRange range = {
+		TEMP_RANGE_MIN,
+		TEMP_RANGE_MAX,
+		TEMP_RANGE_MAX,
+		TEMP_RANGE_MIN,
+		TEMP_RANGE_MAX,
+		TEMP_RANGE_MAX,
+		TEMP_RANGE_MIN,
+		TEMP_RANGE_MAX,
+		TEMP_RANGE_MAX};
 	struct amdgpu_device *adev = hwmgr->adev;
 
 	if (hwmgr->hwmgr_func->get_thermal_temperature_range)
@@ -239,6 +248,13 @@ int phm_start_thermal_controller(struct pp_hwmgr *hwmgr)
 
 	adev->pm.dpm.thermal.min_temp = range.min;
 	adev->pm.dpm.thermal.max_temp = range.max;
+	adev->pm.dpm.thermal.max_edge_emergency_temp = range.edge_emergency_max;
+	adev->pm.dpm.thermal.min_hotspot_temp = range.hotspot_min;
+	adev->pm.dpm.thermal.max_hotspot_crit_temp = range.hotspot_crit_max;
+	adev->pm.dpm.thermal.max_hotspot_emergency_temp = range.hotspot_emergency_max;
+	adev->pm.dpm.thermal.min_mem_temp = range.mem_min;
+	adev->pm.dpm.thermal.max_mem_crit_temp = range.mem_crit_max;
+	adev->pm.dpm.thermal.max_mem_emergency_temp = range.mem_emergency_max;
 
 	return ret;
 }
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
index 048757e8f494..c5986d28fbf1 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
@@ -24,6 +24,7 @@
 #include <linux/delay.h>
 #include <linux/fb.h>
 #include <linux/module.h>
+#include <linux/pci.h>
 #include <linux/slab.h>
 #include <asm/div64.h>
 #include <drm/amdgpu_drm.h>
@@ -3532,9 +3533,12 @@ static int smu7_read_sensor(struct pp_hwmgr *hwmgr, int idx,
 		*size = 4;
 		return 0;
 	case AMDGPU_PP_SENSOR_GPU_LOAD:
+	case AMDGPU_PP_SENSOR_MEM_LOAD:
 		offset = data->soft_regs_start + smum_get_offsetof(hwmgr,
 								SMU_SoftRegisters,
-								AverageGraphicsActivity);
+								(idx == AMDGPU_PP_SENSOR_GPU_LOAD) ?
+								AverageGraphicsActivity:
+								AverageMemoryActivity);
 
 		activity_percent = cgs_read_ind_register(hwmgr->device, CGS_IND_REG__SMC, offset);
 		activity_percent += 0x80;
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu_helper.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu_helper.c
index 101c09b212ad..d09690fca452 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu_helper.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu_helper.c
@@ -20,6 +20,9 @@
  * OTHER DEALINGS IN THE SOFTWARE.
  *
  */
+
+#include <linux/pci.h>
+
 #include "hwmgr.h"
 #include "pp_debug.h"
 #include "ppatomctrl.h"
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
index 384c37875cd0..3be8eb21fd6e 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
@@ -24,6 +24,7 @@
 #include <linux/delay.h>
 #include <linux/fb.h>
 #include <linux/module.h>
+#include <linux/pci.h>
 #include <linux/slab.h>
 
 #include "hwmgr.h"
@@ -356,6 +357,7 @@ static void vega10_init_dpm_defaults(struct pp_hwmgr *hwmgr)
 	struct vega10_hwmgr *data = hwmgr->backend;
 	int i;
 	uint32_t sub_vendor_id, hw_revision;
+	uint32_t top32, bottom32;
 	struct amdgpu_device *adev = hwmgr->adev;
 
 	vega10_initialize_power_tune_defaults(hwmgr);
@@ -499,6 +501,14 @@ static void vega10_init_dpm_defaults(struct pp_hwmgr *hwmgr)
 		(hw_revision == 0) &&
 		(sub_vendor_id != 0x1002))
 		data->smu_features[GNLD_PCC_LIMIT].supported = true;
+
+	/* Get the SN to turn into a Unique ID */
+	smum_send_msg_to_smc(hwmgr, PPSMC_MSG_ReadSerialNumTop32);
+	top32 = smum_get_argument(hwmgr);
+	smum_send_msg_to_smc(hwmgr, PPSMC_MSG_ReadSerialNumBottom32);
+	bottom32 = smum_get_argument(hwmgr);
+
+	adev->unique_id = ((uint64_t)bottom32 << 32) | top32;
 }
 
 #ifdef PPLIB_VEGA10_EVV_SUPPORT
@@ -2267,8 +2277,8 @@ static int vega10_populate_avfs_parameters(struct pp_hwmgr *hwmgr)
 			pp_table->AcgAvfsGb.m1                   = avfs_params.ulAcgGbFuseTableM1;
 			pp_table->AcgAvfsGb.m2                   = avfs_params.ulAcgGbFuseTableM2;
 			pp_table->AcgAvfsGb.b                    = avfs_params.ulAcgGbFuseTableB;
-			pp_table->AcgAvfsGb.m1_shift             = 0;
-			pp_table->AcgAvfsGb.m2_shift             = 0;
+			pp_table->AcgAvfsGb.m1_shift             = 24;
+			pp_table->AcgAvfsGb.m2_shift             = 12;
 			pp_table->AcgAvfsGb.b_shift              = 0;
 
 		} else {
@@ -2364,6 +2374,10 @@ static int vega10_avfs_enable(struct pp_hwmgr *hwmgr, bool enable)
 	struct vega10_hwmgr *data = hwmgr->backend;
 
 	if (data->smu_features[GNLD_AVFS].supported) {
+		/* Already enabled or disabled */
+		if (!(enable ^ data->smu_features[GNLD_AVFS].enabled))
+			return 0;
+
 		if (enable) {
 			PP_ASSERT_WITH_CODE(!vega10_enable_smc_features(hwmgr,
 					true,
@@ -2466,11 +2480,6 @@ static void vega10_check_dpm_table_updated(struct pp_hwmgr *hwmgr)
 			return;
 		}
 	}
-
-	if (data->need_update_dpm_table & DPMTABLE_OD_UPDATE_VDDC) {
-		data->need_update_dpm_table &= ~DPMTABLE_OD_UPDATE_VDDC;
-		data->need_update_dpm_table |= DPMTABLE_OD_UPDATE_SCLK | DPMTABLE_OD_UPDATE_MCLK;
-	}
 }
 
 /**
@@ -3683,6 +3692,10 @@ static int vega10_set_power_state_tasks(struct pp_hwmgr *hwmgr,
 
 	vega10_update_avfs(hwmgr);
 
+	/*
+	 * Clear all OD flags except DPMTABLE_OD_UPDATE_VDDC.
+	 * That will help to keep AVFS disabled.
+	 */
 	data->need_update_dpm_table &= DPMTABLE_OD_UPDATE_VDDC;
 
 	return 0;
@@ -3785,6 +3798,18 @@ static int vega10_read_sensor(struct pp_hwmgr *hwmgr, int idx,
 		*((uint32_t *)value) = vega10_thermal_get_temperature(hwmgr);
 		*size = 4;
 		break;
+	case AMDGPU_PP_SENSOR_HOTSPOT_TEMP:
+		smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetTemperatureHotspot);
+		*((uint32_t *)value) = smum_get_argument(hwmgr) *
+			PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
+		*size = 4;
+		break;
+	case AMDGPU_PP_SENSOR_MEM_TEMP:
+		smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetTemperatureHBM);
+		*((uint32_t *)value) = smum_get_argument(hwmgr) *
+			PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
+		*size = 4;
+		break;
 	case AMDGPU_PP_SENSOR_UVD_POWER:
 		*((uint32_t *)value) = data->uvd_power_gated ? 0 : 1;
 		*size = 4;
@@ -4852,12 +4877,22 @@ static int vega10_notify_cac_buffer_info(struct pp_hwmgr *hwmgr,
 static int vega10_get_thermal_temperature_range(struct pp_hwmgr *hwmgr,
 		struct PP_TemperatureRange *thermal_data)
 {
-	struct phm_ppt_v2_information *table_info =
-			(struct phm_ppt_v2_information *)hwmgr->pptable;
+	struct vega10_hwmgr *data = hwmgr->backend;
+	PPTable_t *pp_table = &(data->smc_state_table.pp_table);
 
 	memcpy(thermal_data, &SMU7ThermalWithDelayPolicy[0], sizeof(struct PP_TemperatureRange));
 
-	thermal_data->max = table_info->tdp_table->usSoftwareShutdownTemp *
+	thermal_data->max = pp_table->TedgeLimit *
+		PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
+	thermal_data->edge_emergency_max = (pp_table->TedgeLimit + CTF_OFFSET_EDGE) *
+		PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
+	thermal_data->hotspot_crit_max = pp_table->ThotspotLimit *
+		PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
+	thermal_data->hotspot_emergency_max = (pp_table->ThotspotLimit + CTF_OFFSET_HOTSPOT) *
+		PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
+	thermal_data->mem_crit_max = pp_table->ThbmLimit *
+		PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
+	thermal_data->mem_emergency_max = (pp_table->ThbmLimit + CTF_OFFSET_HBM)*
 		PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
 
 	return 0;
@@ -4988,13 +5023,70 @@ static bool vega10_check_clk_voltage_valid(struct pp_hwmgr *hwmgr,
 	return true;
 }
 
+static void vega10_odn_update_power_state(struct pp_hwmgr *hwmgr)
+{
+	struct vega10_hwmgr *data = hwmgr->backend;
+	struct pp_power_state *ps = hwmgr->request_ps;
+	struct vega10_power_state *vega10_ps;
+	struct vega10_single_dpm_table *gfx_dpm_table =
+		&data->dpm_table.gfx_table;
+	struct vega10_single_dpm_table *soc_dpm_table =
+		&data->dpm_table.soc_table;
+	struct vega10_single_dpm_table *mem_dpm_table =
+		&data->dpm_table.mem_table;
+	int max_level;
+
+	if (!ps)
+		return;
+
+	vega10_ps = cast_phw_vega10_power_state(&ps->hardware);
+	max_level = vega10_ps->performance_level_count - 1;
+
+	if (vega10_ps->performance_levels[max_level].gfx_clock !=
+	    gfx_dpm_table->dpm_levels[gfx_dpm_table->count - 1].value)
+		vega10_ps->performance_levels[max_level].gfx_clock =
+			gfx_dpm_table->dpm_levels[gfx_dpm_table->count - 1].value;
+
+	if (vega10_ps->performance_levels[max_level].soc_clock !=
+	    soc_dpm_table->dpm_levels[soc_dpm_table->count - 1].value)
+		vega10_ps->performance_levels[max_level].soc_clock =
+			soc_dpm_table->dpm_levels[soc_dpm_table->count - 1].value;
+
+	if (vega10_ps->performance_levels[max_level].mem_clock !=
+	    mem_dpm_table->dpm_levels[mem_dpm_table->count - 1].value)
+		vega10_ps->performance_levels[max_level].mem_clock =
+			mem_dpm_table->dpm_levels[mem_dpm_table->count - 1].value;
+
+	if (!hwmgr->ps)
+		return;
+
+	ps = (struct pp_power_state *)((unsigned long)(hwmgr->ps) + hwmgr->ps_size * (hwmgr->num_ps - 1));
+	vega10_ps = cast_phw_vega10_power_state(&ps->hardware);
+	max_level = vega10_ps->performance_level_count - 1;
+
+	if (vega10_ps->performance_levels[max_level].gfx_clock !=
+	    gfx_dpm_table->dpm_levels[gfx_dpm_table->count - 1].value)
+		vega10_ps->performance_levels[max_level].gfx_clock =
+			gfx_dpm_table->dpm_levels[gfx_dpm_table->count - 1].value;
+
+	if (vega10_ps->performance_levels[max_level].soc_clock !=
+	    soc_dpm_table->dpm_levels[soc_dpm_table->count - 1].value)
+		vega10_ps->performance_levels[max_level].soc_clock =
+			soc_dpm_table->dpm_levels[soc_dpm_table->count - 1].value;
+
+	if (vega10_ps->performance_levels[max_level].mem_clock !=
+	    mem_dpm_table->dpm_levels[mem_dpm_table->count - 1].value)
+		vega10_ps->performance_levels[max_level].mem_clock =
+			mem_dpm_table->dpm_levels[mem_dpm_table->count - 1].value;
+}
+
 static void vega10_odn_update_soc_table(struct pp_hwmgr *hwmgr,
 						enum PP_OD_DPM_TABLE_COMMAND type)
 {
 	struct vega10_hwmgr *data = hwmgr->backend;
 	struct phm_ppt_v2_information *table_info = hwmgr->pptable;
 	struct phm_ppt_v1_clock_voltage_dependency_table *dep_table = table_info->vdd_dep_on_socclk;
-	struct vega10_single_dpm_table *dpm_table = &data->golden_dpm_table.soc_table;
+	struct vega10_single_dpm_table *dpm_table = &data->golden_dpm_table.mem_table;
 
 	struct vega10_odn_clock_voltage_dependency_table *podn_vdd_dep_on_socclk =
 							&data->odn_dpm_table.vdd_dep_on_socclk;
@@ -5018,7 +5110,8 @@ static void vega10_odn_update_soc_table(struct pp_hwmgr *hwmgr,
 					break;
 			}
 			if (j == od_vddc_lookup_table->count) {
-				od_vddc_lookup_table->entries[j-1].us_vdd =
+				j = od_vddc_lookup_table->count - 1;
+				od_vddc_lookup_table->entries[j].us_vdd =
 					podn_vdd_dep->entries[i].vddc;
 				data->need_update_dpm_table |= DPMTABLE_OD_UPDATE_VDDC;
 			}
@@ -5026,25 +5119,38 @@ static void vega10_odn_update_soc_table(struct pp_hwmgr *hwmgr,
 		}
 		dpm_table = &data->dpm_table.soc_table;
 		for (i = 0; i < dep_table->count; i++) {
-			if (dep_table->entries[i].vddInd == podn_vdd_dep->entries[dep_table->count-1].vddInd &&
-					dep_table->entries[i].clk < podn_vdd_dep->entries[dep_table->count-1].clk) {
+			if (dep_table->entries[i].vddInd == podn_vdd_dep->entries[podn_vdd_dep->count-1].vddInd &&
+					dep_table->entries[i].clk < podn_vdd_dep->entries[podn_vdd_dep->count-1].clk) {
 				data->need_update_dpm_table |= DPMTABLE_UPDATE_SOCCLK;
-				podn_vdd_dep_on_socclk->entries[i].clk = podn_vdd_dep->entries[dep_table->count-1].clk;
-				dpm_table->dpm_levels[i].value = podn_vdd_dep_on_socclk->entries[i].clk;
+				for (; (i < dep_table->count) &&
+				       (dep_table->entries[i].clk < podn_vdd_dep->entries[podn_vdd_dep->count - 1].clk); i++) {
+					podn_vdd_dep_on_socclk->entries[i].clk = podn_vdd_dep->entries[podn_vdd_dep->count-1].clk;
+					dpm_table->dpm_levels[i].value = podn_vdd_dep_on_socclk->entries[i].clk;
+				}
+				break;
+			} else {
+				dpm_table->dpm_levels[i].value = dep_table->entries[i].clk;
+				podn_vdd_dep_on_socclk->entries[i].vddc = dep_table->entries[i].vddc;
+				podn_vdd_dep_on_socclk->entries[i].vddInd = dep_table->entries[i].vddInd;
+				podn_vdd_dep_on_socclk->entries[i].clk = dep_table->entries[i].clk;
 			}
 		}
 		if (podn_vdd_dep_on_socclk->entries[podn_vdd_dep_on_socclk->count - 1].clk <
-					podn_vdd_dep->entries[dep_table->count-1].clk) {
+					podn_vdd_dep->entries[podn_vdd_dep->count - 1].clk) {
 			data->need_update_dpm_table |= DPMTABLE_UPDATE_SOCCLK;
-			podn_vdd_dep_on_socclk->entries[podn_vdd_dep_on_socclk->count - 1].clk = podn_vdd_dep->entries[dep_table->count-1].clk;
-			dpm_table->dpm_levels[podn_vdd_dep_on_socclk->count - 1].value = podn_vdd_dep->entries[dep_table->count-1].clk;
+			podn_vdd_dep_on_socclk->entries[podn_vdd_dep_on_socclk->count - 1].clk =
+				podn_vdd_dep->entries[podn_vdd_dep->count - 1].clk;
+			dpm_table->dpm_levels[podn_vdd_dep_on_socclk->count - 1].value =
+				podn_vdd_dep->entries[podn_vdd_dep->count - 1].clk;
 		}
 		if (podn_vdd_dep_on_socclk->entries[podn_vdd_dep_on_socclk->count - 1].vddInd <
-					podn_vdd_dep->entries[dep_table->count-1].vddInd) {
+					podn_vdd_dep->entries[podn_vdd_dep->count - 1].vddInd) {
 			data->need_update_dpm_table |= DPMTABLE_UPDATE_SOCCLK;
-			podn_vdd_dep_on_socclk->entries[podn_vdd_dep_on_socclk->count - 1].vddInd = podn_vdd_dep->entries[dep_table->count-1].vddInd;
+			podn_vdd_dep_on_socclk->entries[podn_vdd_dep_on_socclk->count - 1].vddInd =
+				podn_vdd_dep->entries[podn_vdd_dep->count - 1].vddInd;
 		}
 	}
+	vega10_odn_update_power_state(hwmgr);
 }
 
 static int vega10_odn_edit_dpm_table(struct pp_hwmgr *hwmgr,
@@ -5079,6 +5185,11 @@ static int vega10_odn_edit_dpm_table(struct pp_hwmgr *hwmgr,
 	} else if (PP_OD_RESTORE_DEFAULT_TABLE == type) {
 		memcpy(&(data->dpm_table), &(data->golden_dpm_table), sizeof(struct vega10_dpm_table));
 		vega10_odn_initial_default_setting(hwmgr);
+		vega10_odn_update_power_state(hwmgr);
+		/* force to update all clock tables */
+		data->need_update_dpm_table = DPMTABLE_UPDATE_SCLK |
+					      DPMTABLE_UPDATE_MCLK |
+					      DPMTABLE_UPDATE_SOCCLK;
 		return 0;
 	} else if (PP_OD_COMMIT_DPM_TABLE == type) {
 		vega10_check_dpm_table_updated(hwmgr);
@@ -5201,8 +5312,12 @@ static const struct pp_hwmgr_func vega10_hwmgr_funcs = {
 
 int vega10_hwmgr_init(struct pp_hwmgr *hwmgr)
 {
+	struct amdgpu_device *adev = hwmgr->adev;
+
 	hwmgr->hwmgr_func = &vega10_hwmgr_funcs;
 	hwmgr->pptable_func = &vega10_pptable_funcs;
+	if (amdgpu_passthrough(adev))
+		return vega10_baco_set_cap(hwmgr);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c
index b6767d74dc85..f29af5ca0aa0 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c
@@ -21,6 +21,7 @@
  *
  */
 #include <linux/module.h>
+#include <linux/pci.h>
 #include <linux/slab.h>
 #include <linux/fb.h>
 
@@ -1371,3 +1372,27 @@ int vega10_get_powerplay_table_entry(struct pp_hwmgr *hwmgr,
 
 	return result;
 }
+
+int vega10_baco_set_cap(struct pp_hwmgr *hwmgr)
+{
+	int result = 0;
+
+	const ATOM_Vega10_POWERPLAYTABLE *powerplay_table;
+
+	powerplay_table = get_powerplay_table(hwmgr);
+
+	PP_ASSERT_WITH_CODE((powerplay_table != NULL),
+		"Missing PowerPlay Table!", return -1);
+
+	result = check_powerplay_tables(hwmgr, powerplay_table);
+
+	PP_ASSERT_WITH_CODE((result == 0),
+			    "check_powerplay_tables failed", return result);
+
+	set_hw_cap(
+			hwmgr,
+			0 != (le32_to_cpu(powerplay_table->ulPlatformCaps) & ATOM_VEGA10_PP_PLATFORM_CAP_BACO),
+			PHM_PlatformCaps_BACO);
+	return result;
+}
+
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.h b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.h
index d83ed2af7aa3..da5fbec9b0cd 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.h
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.h
@@ -59,4 +59,5 @@ extern int vega10_get_number_of_powerplay_table_entries(struct pp_hwmgr *hwmgr);
 extern int vega10_get_powerplay_table_entry(struct pp_hwmgr *hwmgr, uint32_t entry_index,
 		struct pp_power_state *power_state, int (*call_back_func)(struct pp_hwmgr *, void *,
 				struct pp_power_state *, void *, uint32_t));
+extern int vega10_baco_set_cap(struct pp_hwmgr *hwmgr);
 #endif
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c
index 707cd4b0357f..efb6d3762feb 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c
@@ -289,6 +289,8 @@ static int vega12_set_features_platform_caps(struct pp_hwmgr *hwmgr)
 static void vega12_init_dpm_defaults(struct pp_hwmgr *hwmgr)
 {
 	struct vega12_hwmgr *data = (struct vega12_hwmgr *)(hwmgr->backend);
+	struct amdgpu_device *adev = hwmgr->adev;
+	uint32_t top32, bottom32;
 	int i;
 
 	data->smu_features[GNLD_DPM_PREFETCHER].smu_feature_id =
@@ -353,6 +355,14 @@ static void vega12_init_dpm_defaults(struct pp_hwmgr *hwmgr)
 			((data->registry_data.disallowed_features >> i) & 1) ?
 			false : true;
 	}
+
+	/* Get the SN to turn into a Unique ID */
+	smum_send_msg_to_smc(hwmgr, PPSMC_MSG_ReadSerialNumTop32);
+	top32 = smum_get_argument(hwmgr);
+	smum_send_msg_to_smc(hwmgr, PPSMC_MSG_ReadSerialNumBottom32);
+	bottom32 = smum_get_argument(hwmgr);
+
+	adev->unique_id = ((uint64_t)bottom32 << 32) | top32;
 }
 
 static int vega12_set_private_data_based_on_pptable(struct pp_hwmgr *hwmgr)
@@ -1237,21 +1247,39 @@ static uint32_t vega12_dpm_get_mclk(struct pp_hwmgr *hwmgr, bool low)
 	return (mem_clk * 100);
 }
 
+static int vega12_get_metrics_table(struct pp_hwmgr *hwmgr, SmuMetrics_t *metrics_table)
+{
+	struct vega12_hwmgr *data =
+			(struct vega12_hwmgr *)(hwmgr->backend);
+	int ret = 0;
+
+	if (!data->metrics_time || time_after(jiffies, data->metrics_time + HZ / 2)) {
+		ret = smum_smc_table_manager(hwmgr, (uint8_t *)metrics_table,
+				TABLE_SMU_METRICS, true);
+		if (ret) {
+			pr_info("Failed to export SMU metrics table!\n");
+			return ret;
+		}
+		memcpy(&data->metrics_table, metrics_table, sizeof(SmuMetrics_t));
+		data->metrics_time = jiffies;
+	} else
+		memcpy(metrics_table, &data->metrics_table, sizeof(SmuMetrics_t));
+
+	return ret;
+}
+
 static int vega12_get_gpu_power(struct pp_hwmgr *hwmgr, uint32_t *query)
 {
-#if 0
-	uint32_t value;
+	SmuMetrics_t metrics_table;
+	int ret = 0;
 
-	PP_ASSERT_WITH_CODE(!smum_send_msg_to_smc(hwmgr,
-			PPSMC_MSG_GetCurrPkgPwr),
-			"Failed to get current package power!",
-			return -EINVAL);
+	ret = vega12_get_metrics_table(hwmgr, &metrics_table);
+	if (ret)
+		return ret;
 
-	value = smum_get_argument(hwmgr);
-	/* power value is an integer */
-	*query = value << 8;
-#endif
-	return 0;
+	*query = metrics_table.CurrSocketPower << 8;
+
+	return ret;
 }
 
 static int vega12_get_current_gfx_clk_freq(struct pp_hwmgr *hwmgr, uint32_t *gfx_freq)
@@ -1290,25 +1318,27 @@ static int vega12_get_current_mclk_freq(struct pp_hwmgr *hwmgr, uint32_t *mclk_f
 
 static int vega12_get_current_activity_percent(
 		struct pp_hwmgr *hwmgr,
+		int idx,
 		uint32_t *activity_percent)
 {
+	SmuMetrics_t metrics_table;
 	int ret = 0;
-	uint32_t current_activity = 50;
 
-#if 0
-	ret = smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_GetAverageGfxActivity, 0);
-	if (!ret) {
-		current_activity = smum_get_argument(hwmgr);
-		if (current_activity > 100) {
-			PP_ASSERT(false,
-				  "[GetCurrentActivityPercent] Activity Percentage Exceeds 100!");
-			current_activity = 100;
-		}
-	} else
-		PP_ASSERT(false,
-			"[GetCurrentActivityPercent] Attempt To Send Get Average Graphics Activity to SMU Failed!");
-#endif
-	*activity_percent = current_activity;
+	ret = vega12_get_metrics_table(hwmgr, &metrics_table);
+	if (ret)
+		return ret;
+
+	switch (idx) {
+	case AMDGPU_PP_SENSOR_GPU_LOAD:
+		*activity_percent = metrics_table.AverageGfxActivity;
+		break;
+	case AMDGPU_PP_SENSOR_MEM_LOAD:
+		*activity_percent = metrics_table.AverageUclkActivity;
+		break;
+	default:
+		pr_err("Invalid index for retrieving clock activity\n");
+		return -EINVAL;
+	}
 
 	return ret;
 }
@@ -1317,6 +1347,7 @@ static int vega12_read_sensor(struct pp_hwmgr *hwmgr, int idx,
 			      void *value, int *size)
 {
 	struct vega12_hwmgr *data = (struct vega12_hwmgr *)(hwmgr->backend);
+	SmuMetrics_t metrics_table;
 	int ret = 0;
 
 	switch (idx) {
@@ -1331,7 +1362,8 @@ static int vega12_read_sensor(struct pp_hwmgr *hwmgr, int idx,
 			*size = 4;
 		break;
 	case AMDGPU_PP_SENSOR_GPU_LOAD:
-		ret = vega12_get_current_activity_percent(hwmgr, (uint32_t *)value);
+	case AMDGPU_PP_SENSOR_MEM_LOAD:
+		ret = vega12_get_current_activity_percent(hwmgr, idx, (uint32_t *)value);
 		if (!ret)
 			*size = 4;
 		break;
@@ -1339,6 +1371,24 @@ static int vega12_read_sensor(struct pp_hwmgr *hwmgr, int idx,
 		*((uint32_t *)value) = vega12_thermal_get_temperature(hwmgr);
 		*size = 4;
 		break;
+	case AMDGPU_PP_SENSOR_HOTSPOT_TEMP:
+		ret = vega12_get_metrics_table(hwmgr, &metrics_table);
+		if (ret)
+			return ret;
+
+		*((uint32_t *)value) = metrics_table.TemperatureHotspot *
+			PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
+		*size = 4;
+		break;
+	case AMDGPU_PP_SENSOR_MEM_TEMP:
+		ret = vega12_get_metrics_table(hwmgr, &metrics_table);
+		if (ret)
+			return ret;
+
+		*((uint32_t *)value) = metrics_table.TemperatureHBM *
+			PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
+		*size = 4;
+		break;
 	case AMDGPU_PP_SENSOR_UVD_POWER:
 		*((uint32_t *)value) = data->uvd_power_gated ? 0 : 1;
 		*size = 4;
@@ -1349,6 +1399,8 @@ static int vega12_read_sensor(struct pp_hwmgr *hwmgr, int idx,
 		break;
 	case AMDGPU_PP_SENSOR_GPU_POWER:
 		ret = vega12_get_gpu_power(hwmgr, (uint32_t *)value);
+		if (!ret)
+			*size = 4;
 		break;
 	case AMDGPU_PP_SENSOR_ENABLED_SMC_FEATURES_MASK:
 		ret = vega12_get_enabled_smc_features(hwmgr, (uint64_t *)value);
@@ -2526,12 +2578,23 @@ static int vega12_notify_cac_buffer_info(struct pp_hwmgr *hwmgr,
 static int vega12_get_thermal_temperature_range(struct pp_hwmgr *hwmgr,
 		struct PP_TemperatureRange *thermal_data)
 {
-	struct phm_ppt_v3_information *pptable_information =
-		(struct phm_ppt_v3_information *)hwmgr->pptable;
+	struct vega12_hwmgr *data =
+			(struct vega12_hwmgr *)(hwmgr->backend);
+	PPTable_t *pp_table = &(data->smc_state_table.pp_table);
 
 	memcpy(thermal_data, &SMU7ThermalWithDelayPolicy[0], sizeof(struct PP_TemperatureRange));
 
-	thermal_data->max = pptable_information->us_software_shutdown_temp *
+	thermal_data->max = pp_table->TedgeLimit *
+		PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
+	thermal_data->edge_emergency_max = (pp_table->TedgeLimit + CTF_OFFSET_EDGE) *
+		PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
+	thermal_data->hotspot_crit_max = pp_table->ThotspotLimit *
+		PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
+	thermal_data->hotspot_emergency_max = (pp_table->ThotspotLimit + CTF_OFFSET_HOTSPOT) *
+		PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
+	thermal_data->mem_crit_max = pp_table->ThbmLimit *
+		PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
+	thermal_data->mem_emergency_max = (pp_table->ThbmLimit + CTF_OFFSET_HBM)*
 		PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
 
 	return 0;
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.h b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.h
index b3e424d28994..73875399666a 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.h
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.h
@@ -396,6 +396,9 @@ struct vega12_hwmgr {
 
 	/* ---- Gfxoff ---- */
 	bool                           gfxoff_controlled_by_driver;
+
+	unsigned long                  metrics_time;
+	SmuMetrics_t                   metrics_table;
 };
 
 #define VEGA12_DPM2_NEAR_TDP_DEC                      10
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
index 9b9f87b84910..f27c6fbb192e 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
@@ -97,6 +97,27 @@ static void vega20_set_default_registry_data(struct pp_hwmgr *hwmgr)
 	if (hwmgr->smu_version < 0x282100)
 		data->registry_data.disallowed_features |= FEATURE_ECC_MASK;
 
+	if (!(hwmgr->feature_mask & PP_PCIE_DPM_MASK))
+		data->registry_data.disallowed_features |= FEATURE_DPM_LINK_MASK;
+
+	if (!(hwmgr->feature_mask & PP_SCLK_DPM_MASK))
+		data->registry_data.disallowed_features |= FEATURE_DPM_GFXCLK_MASK;
+
+	if (!(hwmgr->feature_mask & PP_SOCCLK_DPM_MASK))
+		data->registry_data.disallowed_features |= FEATURE_DPM_SOCCLK_MASK;
+
+	if (!(hwmgr->feature_mask & PP_MCLK_DPM_MASK))
+		data->registry_data.disallowed_features |= FEATURE_DPM_UCLK_MASK;
+
+	if (!(hwmgr->feature_mask & PP_DCEFCLK_DPM_MASK))
+		data->registry_data.disallowed_features |= FEATURE_DPM_DCEFCLK_MASK;
+
+	if (!(hwmgr->feature_mask & PP_ULV_MASK))
+		data->registry_data.disallowed_features |= FEATURE_ULV_MASK;
+
+	if (!(hwmgr->feature_mask & PP_SCLK_DEEP_SLEEP_MASK))
+		data->registry_data.disallowed_features |= FEATURE_DS_GFXCLK_MASK;
+
 	data->registry_data.od_state_in_dc_support = 0;
 	data->registry_data.thermal_support = 1;
 	data->registry_data.skip_baco_hardware = 0;
@@ -303,6 +324,8 @@ static int vega20_set_features_platform_caps(struct pp_hwmgr *hwmgr)
 static void vega20_init_dpm_defaults(struct pp_hwmgr *hwmgr)
 {
 	struct vega20_hwmgr *data = (struct vega20_hwmgr *)(hwmgr->backend);
+	struct amdgpu_device *adev = hwmgr->adev;
+	uint32_t top32, bottom32;
 	int i;
 
 	data->smu_features[GNLD_DPM_PREFETCHER].smu_feature_id =
@@ -372,6 +395,14 @@ static void vega20_init_dpm_defaults(struct pp_hwmgr *hwmgr)
 			((data->registry_data.disallowed_features >> i) & 1) ?
 			false : true;
 	}
+
+	/* Get the SN to turn into a Unique ID */
+	smum_send_msg_to_smc(hwmgr, PPSMC_MSG_ReadSerialNumTop32);
+	top32 = smum_get_argument(hwmgr);
+	smum_send_msg_to_smc(hwmgr, PPSMC_MSG_ReadSerialNumBottom32);
+	bottom32 = smum_get_argument(hwmgr);
+
+	adev->unique_id = ((uint64_t)bottom32 << 32) | top32;
 }
 
 static int vega20_set_private_data_based_on_pptable(struct pp_hwmgr *hwmgr)
@@ -2094,6 +2125,7 @@ static int vega20_get_current_clk_freq(struct pp_hwmgr *hwmgr,
 }
 
 static int vega20_get_current_activity_percent(struct pp_hwmgr *hwmgr,
+		int idx,
 		uint32_t *activity_percent)
 {
 	int ret = 0;
@@ -2103,7 +2135,17 @@ static int vega20_get_current_activity_percent(struct pp_hwmgr *hwmgr,
 	if (ret)
 		return ret;
 
-	*activity_percent = metrics_table.AverageGfxActivity;
+	switch (idx) {
+	case AMDGPU_PP_SENSOR_GPU_LOAD:
+		*activity_percent = metrics_table.AverageGfxActivity;
+		break;
+	case AMDGPU_PP_SENSOR_MEM_LOAD:
+		*activity_percent = metrics_table.AverageUclkActivity;
+		break;
+	default:
+		pr_err("Invalid index for retrieving clock activity\n");
+		return -EINVAL;
+	}
 
 	return ret;
 }
@@ -2134,14 +2176,33 @@ static int vega20_read_sensor(struct pp_hwmgr *hwmgr, int idx,
 			*size = 4;
 		break;
 	case AMDGPU_PP_SENSOR_GPU_LOAD:
-		ret = vega20_get_current_activity_percent(hwmgr, (uint32_t *)value);
+	case AMDGPU_PP_SENSOR_MEM_LOAD:
+		ret = vega20_get_current_activity_percent(hwmgr, idx, (uint32_t *)value);
 		if (!ret)
 			*size = 4;
 		break;
-	case AMDGPU_PP_SENSOR_GPU_TEMP:
+	case AMDGPU_PP_SENSOR_HOTSPOT_TEMP:
 		*((uint32_t *)value) = vega20_thermal_get_temperature(hwmgr);
 		*size = 4;
 		break;
+	case AMDGPU_PP_SENSOR_EDGE_TEMP:
+		ret = vega20_get_metrics_table(hwmgr, &metrics_table);
+		if (ret)
+			return ret;
+
+		*((uint32_t *)value) = metrics_table.TemperatureEdge *
+			PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
+		*size = 4;
+		break;
+	case AMDGPU_PP_SENSOR_MEM_TEMP:
+		ret = vega20_get_metrics_table(hwmgr, &metrics_table);
+		if (ret)
+			return ret;
+
+		*((uint32_t *)value) = metrics_table.TemperatureHBM *
+			PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
+		*size = 4;
+		break;
 	case AMDGPU_PP_SENSOR_UVD_POWER:
 		*((uint32_t *)value) = data->uvd_power_gated ? 0 : 1;
 		*size = 4;
@@ -3974,12 +4035,23 @@ static int vega20_notify_cac_buffer_info(struct pp_hwmgr *hwmgr,
 static int vega20_get_thermal_temperature_range(struct pp_hwmgr *hwmgr,
 		struct PP_TemperatureRange *thermal_data)
 {
-	struct phm_ppt_v3_information *pptable_information =
-		(struct phm_ppt_v3_information *)hwmgr->pptable;
+	struct vega20_hwmgr *data =
+			(struct vega20_hwmgr *)(hwmgr->backend);
+	PPTable_t *pp_table = &(data->smc_state_table.pp_table);
 
 	memcpy(thermal_data, &SMU7ThermalWithDelayPolicy[0], sizeof(struct PP_TemperatureRange));
 
-	thermal_data->max = pptable_information->us_software_shutdown_temp *
+	thermal_data->max = pp_table->TedgeLimit *
+		PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
+	thermal_data->edge_emergency_max = (pp_table->TedgeLimit + CTF_OFFSET_EDGE) *
+		PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
+	thermal_data->hotspot_crit_max = pp_table->ThotspotLimit *
+		PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
+	thermal_data->hotspot_emergency_max = (pp_table->ThotspotLimit + CTF_OFFSET_HOTSPOT) *
+		PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
+	thermal_data->mem_crit_max = pp_table->ThbmLimit *
+		PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
+	thermal_data->mem_emergency_max = (pp_table->ThbmLimit + CTF_OFFSET_HBM)*
 		PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
 
 	return 0;
diff --git a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h
index c8b168b3413b..3eb1de9ecf73 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h
@@ -401,8 +401,12 @@ struct smu_context
 	uint32_t workload_setting[WORKLOAD_POLICY_MAX];
 	uint32_t power_profile_mode;
 	uint32_t default_power_profile_mode;
+	bool pm_enabled;
 
 	uint32_t smc_if_version;
+
+	unsigned long metrics_time;
+	void *metrics_table;
 };
 
 struct pptable_funcs {
@@ -458,6 +462,8 @@ struct pptable_funcs {
 				      uint32_t *mclk_mask,
 				      uint32_t *soc_mask);
 	int (*set_cpu_power_state)(struct smu_context *smu);
+	int (*set_ppfeature_status)(struct smu_context *smu, uint64_t ppfeatures);
+	int (*get_ppfeature_status)(struct smu_context *smu, char *buf);
 };
 
 struct smu_funcs
@@ -727,7 +733,10 @@ struct smu_funcs
 	((smu)->funcs->get_mclk ? (smu)->funcs->get_mclk((smu), (low)) : 0)
 #define smu_set_xgmi_pstate(smu, pstate) \
 		((smu)->funcs->set_xgmi_pstate ? (smu)->funcs->set_xgmi_pstate((smu), (pstate)) : 0)
-
+#define smu_set_ppfeature_status(smu, ppfeatures) \
+	((smu)->ppt_funcs->set_ppfeature_status ? (smu)->ppt_funcs->set_ppfeature_status((smu), (ppfeatures)) : -EINVAL)
+#define smu_get_ppfeature_status(smu, buf) \
+	((smu)->ppt_funcs->get_ppfeature_status ? (smu)->ppt_funcs->get_ppfeature_status((smu), (buf)) : -EINVAL)
 
 extern int smu_get_atom_data_table(struct smu_context *smu, uint32_t table,
 				   uint16_t *size, uint8_t *frev, uint8_t *crev,
@@ -767,4 +776,5 @@ extern int smu_dpm_set_power_gate(struct smu_context *smu,uint32_t block_type, b
 extern int smu_handle_task(struct smu_context *smu,
 			   enum amd_dpm_forced_level level,
 			   enum amd_pp_task task_id);
+int smu_get_smc_version(struct smu_context *smu, uint32_t *if_version, uint32_t *smu_version);
 #endif
diff --git a/drivers/gpu/drm/amd/powerplay/inc/power_state.h b/drivers/gpu/drm/amd/powerplay/inc/power_state.h
index a99b5cbb113e..a5f2227a3971 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/power_state.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/power_state.h
@@ -124,6 +124,13 @@ struct PP_StateSoftwareAlgorithmBlock {
 struct PP_TemperatureRange {
 	int min;
 	int max;
+	int edge_emergency_max;
+	int hotspot_min;
+	int hotspot_crit_max;
+	int hotspot_emergency_max;
+	int mem_min;
+	int mem_crit_max;
+	int mem_emergency_max;
 };
 
 struct PP_StateValidationBlock {
diff --git a/drivers/gpu/drm/amd/powerplay/inc/pp_thermal.h b/drivers/gpu/drm/amd/powerplay/inc/pp_thermal.h
index 201d2b6329ab..3e30768f9e1c 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/pp_thermal.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/pp_thermal.h
@@ -27,14 +27,18 @@
 
 static const struct PP_TemperatureRange SMU7ThermalWithDelayPolicy[] =
 {
-	{-273150,  99000},
-	{ 120000, 120000},
+	{-273150,  99000, 99000, -273150, 99000, 99000, -273150, 99000, 99000},
+	{ 120000, 120000, 120000, 120000, 120000, 120000, 120000, 120000, 120000},
 };
 
 static const struct PP_TemperatureRange SMU7ThermalPolicy[] =
 {
-	{-273150,  99000},
-	{ 120000, 120000},
+	{-273150,  99000, 99000, -273150, 99000, 99000, -273150, 99000, 99000},
+	{ 120000, 120000, 120000, 120000, 120000, 120000, 120000, 120000, 120000},
 };
 
+#define CTF_OFFSET_EDGE			5
+#define CTF_OFFSET_HOTSPOT		5
+#define CTF_OFFSET_HBM			5
+
 #endif
diff --git a/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0.h b/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0.h
index aa8d81f4111e..02c965d64256 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0.h
@@ -36,6 +36,9 @@
 #define smnMP0_FW_INTF			0x30101c0
 #define smnMP1_PUB_CTRL			0x3010b14
 
+#define TEMP_RANGE_MIN			(0)
+#define TEMP_RANGE_MAX			(80 * 1000)
+
 struct smu_11_0_max_sustainable_clocks {
 	uint32_t display_clock;
 	uint32_t phy_clock;
diff --git a/drivers/gpu/drm/amd/powerplay/inc/smumgr.h b/drivers/gpu/drm/amd/powerplay/inc/smumgr.h
index 82550a8a3a3f..c5288831aa15 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/smumgr.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/smumgr.h
@@ -41,6 +41,7 @@ enum SMU_MEMBER {
 	HandshakeDisables = 0,
 	VoltageChangeTimeout,
 	AverageGraphicsActivity,
+	AverageMemoryActivity,
 	PreVBlankGap,
 	VBlankTimeout,
 	UcodeLoadStatus,
diff --git a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c
index 92903a4cc4d8..463275f88e89 100644
--- a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c
+++ b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c
@@ -20,8 +20,10 @@
  * OTHER DEALINGS IN THE SOFTWARE.
  */
 
-#include "pp_debug.h"
 #include <linux/firmware.h>
+#include <linux/module.h>
+
+#include "pp_debug.h"
 #include "amdgpu.h"
 #include "amdgpu_smu.h"
 #include "atomfirmware.h"
@@ -223,20 +225,27 @@ static int smu_v11_0_check_fw_status(struct smu_context *smu)
 
 static int smu_v11_0_check_fw_version(struct smu_context *smu)
 {
-	uint32_t smu_version = 0xff;
+	uint32_t if_version = 0xff, smu_version = 0xff;
+	uint16_t smu_major;
+	uint8_t smu_minor, smu_debug;
 	int ret = 0;
 
-	ret = smu_send_smc_msg(smu, SMU_MSG_GetDriverIfVersion);
+	ret = smu_get_smc_version(smu, &if_version, &smu_version);
 	if (ret)
-		goto err;
+		return ret;
 
-	ret = smu_read_smc_arg(smu, &smu_version);
-	if (ret)
-		goto err;
+	smu_major = (smu_version >> 16) & 0xffff;
+	smu_minor = (smu_version >> 8) & 0xff;
+	smu_debug = (smu_version >> 0) & 0xff;
 
-	if (smu_version != smu->smc_if_version)
+	pr_info("SMU Driver IF Version = 0x%08x, SMU FW Version = 0x%08x (%d.%d.%d)\n",
+		if_version, smu_version, smu_major, smu_minor, smu_debug);
+
+	if (if_version != smu->smc_if_version) {
+		pr_err("SMU driver if version not matched\n");
 		ret = -EINVAL;
-err:
+	}
+
 	return ret;
 }
 
@@ -353,6 +362,8 @@ static int smu_v11_0_init_power(struct smu_context *smu)
 {
 	struct smu_power_context *smu_power = &smu->smu_power;
 
+	if (!smu->pm_enabled)
+		return 0;
 	if (smu_power->power_context || smu_power->power_context_size != 0)
 		return -EINVAL;
 
@@ -362,6 +373,13 @@ static int smu_v11_0_init_power(struct smu_context *smu)
 		return -ENOMEM;
 	smu_power->power_context_size = sizeof(struct smu_11_0_dpm_context);
 
+	smu->metrics_time = 0;
+	smu->metrics_table = kzalloc(sizeof(SmuMetrics_t), GFP_KERNEL);
+	if (!smu->metrics_table) {
+		kfree(smu_power->power_context);
+		return -ENOMEM;
+	}
+
 	return 0;
 }
 
@@ -369,10 +387,14 @@ static int smu_v11_0_fini_power(struct smu_context *smu)
 {
 	struct smu_power_context *smu_power = &smu->smu_power;
 
+	if (!smu->pm_enabled)
+		return 0;
 	if (!smu_power->power_context || smu_power->power_context_size == 0)
 		return -EINVAL;
 
+	kfree(smu->metrics_table);
 	kfree(smu_power->power_context);
+	smu->metrics_table = NULL;
 	smu_power->power_context = NULL;
 	smu_power->power_context_size = 0;
 
@@ -634,6 +656,8 @@ static int smu_v11_0_set_min_dcef_deep_sleep(struct smu_context *smu)
 {
 	struct smu_table_context *table_context = &smu->smu_table;
 
+	if (!smu->pm_enabled)
+		return 0;
 	if (!table_context)
 		return -EINVAL;
 
@@ -662,6 +686,9 @@ static int smu_v11_0_set_tool_table_location(struct smu_context *smu)
 static int smu_v11_0_init_display(struct smu_context *smu)
 {
 	int ret = 0;
+
+	if (!smu->pm_enabled)
+		return ret;
 	ret = smu_send_smc_msg_with_param(smu, SMU_MSG_NumOfDisplays, 0);
 	return ret;
 }
@@ -671,6 +698,8 @@ static int smu_v11_0_update_feature_enable_state(struct smu_context *smu, uint32
 	uint32_t feature_low = 0, feature_high = 0;
 	int ret = 0;
 
+	if (!smu->pm_enabled)
+		return ret;
 	if (feature_id >= 0 && feature_id < 31)
 		feature_low = (1 << feature_id);
 	else if (feature_id > 31 && feature_id < 63)
@@ -777,10 +806,13 @@ static int smu_v11_0_system_features_control(struct smu_context *smu,
 	uint32_t feature_mask[2];
 	int ret = 0;
 
-	ret = smu_send_smc_msg(smu, (en ? SMU_MSG_EnableAllSmuFeatures :
-				     SMU_MSG_DisableAllSmuFeatures));
-	if (ret)
-		return ret;
+	if (smu->pm_enabled) {
+		ret = smu_send_smc_msg(smu, (en ? SMU_MSG_EnableAllSmuFeatures :
+					     SMU_MSG_DisableAllSmuFeatures));
+		if (ret)
+			return ret;
+	}
+
 	ret = smu_feature_get_enabled_mask(smu, feature_mask, 2);
 	if (ret)
 		return ret;
@@ -797,6 +829,8 @@ static int smu_v11_0_notify_display_change(struct smu_context *smu)
 {
 	int ret = 0;
 
+	if (!smu->pm_enabled)
+		return ret;
 	if (smu_feature_is_enabled(smu, FEATURE_DPM_UCLK_BIT))
 	    ret = smu_send_smc_msg_with_param(smu, SMU_MSG_SetUclkFastSwitch, 1);
 
@@ -809,6 +843,8 @@ smu_v11_0_get_max_sustainable_clock(struct smu_context *smu, uint32_t *clock,
 {
 	int ret = 0;
 
+	if (!smu->pm_enabled)
+		return ret;
 	ret = smu_send_smc_msg_with_param(smu, SMU_MSG_GetDcModeMaxDpmFreq,
 					  clock_select << 16);
 	if (ret) {
@@ -995,9 +1031,20 @@ static int smu_v11_0_get_current_clk_freq(struct smu_context *smu, uint32_t clk_
 static int smu_v11_0_get_thermal_range(struct smu_context *smu,
 				struct PP_TemperatureRange *range)
 {
+	PPTable_t *pptable = smu->smu_table.driver_pptable;
 	memcpy(range, &SMU7ThermalWithDelayPolicy[0], sizeof(struct PP_TemperatureRange));
 
-	range->max = smu->smu_table.software_shutdown_temp *
+	range->max = pptable->TedgeLimit *
+		PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
+	range->edge_emergency_max = (pptable->TedgeLimit + CTF_OFFSET_EDGE) *
+		PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
+	range->hotspot_crit_max = pptable->ThotspotLimit *
+		PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
+	range->hotspot_emergency_max = (pptable->ThotspotLimit + CTF_OFFSET_HOTSPOT) *
+		PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
+	range->mem_crit_max = pptable->ThbmLimit *
+		PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
+	range->mem_emergency_max = (pptable->ThbmLimit + CTF_OFFSET_HBM)*
 		PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
 
 	return 0;
@@ -1062,9 +1109,20 @@ static int smu_v11_0_set_thermal_fan_table(struct smu_context *smu)
 static int smu_v11_0_start_thermal_control(struct smu_context *smu)
 {
 	int ret = 0;
-	struct PP_TemperatureRange range;
+	struct PP_TemperatureRange range = {
+		TEMP_RANGE_MIN,
+		TEMP_RANGE_MAX,
+		TEMP_RANGE_MAX,
+		TEMP_RANGE_MIN,
+		TEMP_RANGE_MAX,
+		TEMP_RANGE_MAX,
+		TEMP_RANGE_MIN,
+		TEMP_RANGE_MAX,
+		TEMP_RANGE_MAX};
 	struct amdgpu_device *adev = smu->adev;
 
+	if (!smu->pm_enabled)
+		return ret;
 	smu_v11_0_get_thermal_range(smu, &range);
 
 	if (smu->smu_table.thermal_controller_type) {
@@ -1082,11 +1140,39 @@ static int smu_v11_0_start_thermal_control(struct smu_context *smu)
 
 	adev->pm.dpm.thermal.min_temp = range.min;
 	adev->pm.dpm.thermal.max_temp = range.max;
+	adev->pm.dpm.thermal.max_edge_emergency_temp = range.edge_emergency_max;
+	adev->pm.dpm.thermal.min_hotspot_temp = range.hotspot_min;
+	adev->pm.dpm.thermal.max_hotspot_crit_temp = range.hotspot_crit_max;
+	adev->pm.dpm.thermal.max_hotspot_emergency_temp = range.hotspot_emergency_max;
+	adev->pm.dpm.thermal.min_mem_temp = range.mem_min;
+	adev->pm.dpm.thermal.max_mem_crit_temp = range.mem_crit_max;
+	adev->pm.dpm.thermal.max_mem_emergency_temp = range.mem_emergency_max;
+
+	return ret;
+}
+
+static int smu_v11_0_get_metrics_table(struct smu_context *smu,
+		SmuMetrics_t *metrics_table)
+{
+	int ret = 0;
+
+	if (!smu->metrics_time || time_after(jiffies, smu->metrics_time + HZ / 1000)) {
+		ret = smu_update_table(smu, TABLE_SMU_METRICS,
+				(void *)metrics_table, false);
+		if (ret) {
+			pr_info("Failed to export SMU metrics table!\n");
+			return ret;
+		}
+		memcpy(smu->metrics_table, metrics_table, sizeof(SmuMetrics_t));
+		smu->metrics_time = jiffies;
+	} else
+		memcpy(metrics_table, smu->metrics_table, sizeof(SmuMetrics_t));
 
 	return ret;
 }
 
 static int smu_v11_0_get_current_activity_percent(struct smu_context *smu,
+						  enum amd_pp_sensors sensor,
 						  uint32_t *value)
 {
 	int ret = 0;
@@ -1095,31 +1181,64 @@ static int smu_v11_0_get_current_activity_percent(struct smu_context *smu,
 	if (!value)
 		return -EINVAL;
 
-	ret = smu_update_table(smu, TABLE_SMU_METRICS, (void *)&metrics, false);
+	ret = smu_v11_0_get_metrics_table(smu, &metrics);
 	if (ret)
 		return ret;
 
-	*value = metrics.AverageGfxActivity;
+	switch (sensor) {
+	case AMDGPU_PP_SENSOR_GPU_LOAD:
+		*value = metrics.AverageGfxActivity;
+		break;
+	case AMDGPU_PP_SENSOR_MEM_LOAD:
+		*value = metrics.AverageUclkActivity;
+		break;
+	default:
+		pr_err("Invalid sensor for retrieving clock activity\n");
+		return -EINVAL;
+	}
 
 	return 0;
 }
 
-static int smu_v11_0_thermal_get_temperature(struct smu_context *smu, uint32_t *value)
+static int smu_v11_0_thermal_get_temperature(struct smu_context *smu,
+					     enum amd_pp_sensors sensor,
+					     uint32_t *value)
 {
 	struct amdgpu_device *adev = smu->adev;
+	SmuMetrics_t metrics;
 	uint32_t temp = 0;
+	int ret = 0;
 
 	if (!value)
 		return -EINVAL;
 
-	temp = RREG32_SOC15(THM, 0, mmCG_MULT_THERMAL_STATUS);
-	temp = (temp & CG_MULT_THERMAL_STATUS__CTF_TEMP_MASK) >>
-			CG_MULT_THERMAL_STATUS__CTF_TEMP__SHIFT;
+	ret = smu_v11_0_get_metrics_table(smu, &metrics);
+	if (ret)
+		return ret;
 
-	temp = temp & 0x1ff;
-	temp *= SMU11_TEMPERATURE_UNITS_PER_CENTIGRADES;
+	switch (sensor) {
+	case AMDGPU_PP_SENSOR_HOTSPOT_TEMP:
+		temp = RREG32_SOC15(THM, 0, mmCG_MULT_THERMAL_STATUS);
+		temp = (temp & CG_MULT_THERMAL_STATUS__CTF_TEMP_MASK) >>
+				CG_MULT_THERMAL_STATUS__CTF_TEMP__SHIFT;
 
-	*value = temp;
+		temp = temp & 0x1ff;
+		temp *= SMU11_TEMPERATURE_UNITS_PER_CENTIGRADES;
+
+		*value = temp;
+		break;
+	case AMDGPU_PP_SENSOR_EDGE_TEMP:
+		*value = metrics.TemperatureEdge *
+			PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
+		break;
+	case AMDGPU_PP_SENSOR_MEM_TEMP:
+		*value = metrics.TemperatureHBM *
+			PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
+		break;
+	default:
+		pr_err("Invalid sensor for retrieving temp\n");
+		return -EINVAL;
+	}
 
 	return 0;
 }
@@ -1132,7 +1251,7 @@ static int smu_v11_0_get_gpu_power(struct smu_context *smu, uint32_t *value)
 	if (!value)
 		return -EINVAL;
 
-	ret = smu_update_table(smu, TABLE_SMU_METRICS, (void *)&metrics, false);
+	ret = smu_v11_0_get_metrics_table(smu, &metrics);
 	if (ret)
 		return ret;
 
@@ -1174,7 +1293,9 @@ static int smu_v11_0_read_sensor(struct smu_context *smu,
 	int ret = 0;
 	switch (sensor) {
 	case AMDGPU_PP_SENSOR_GPU_LOAD:
+	case AMDGPU_PP_SENSOR_MEM_LOAD:
 		ret = smu_v11_0_get_current_activity_percent(smu,
+							     sensor,
 							     (uint32_t *)data);
 		*size = 4;
 		break;
@@ -1186,8 +1307,10 @@ static int smu_v11_0_read_sensor(struct smu_context *smu,
 		ret = smu_get_current_clk_freq(smu, PPCLK_GFXCLK, (uint32_t *)data);
 		*size = 4;
 		break;
-	case AMDGPU_PP_SENSOR_GPU_TEMP:
-		ret = smu_v11_0_thermal_get_temperature(smu, (uint32_t *)data);
+	case AMDGPU_PP_SENSOR_HOTSPOT_TEMP:
+	case AMDGPU_PP_SENSOR_EDGE_TEMP:
+	case AMDGPU_PP_SENSOR_MEM_TEMP:
+		ret = smu_v11_0_thermal_get_temperature(smu, sensor, (uint32_t *)data);
 		*size = 4;
 		break;
 	case AMDGPU_PP_SENSOR_GPU_POWER:
@@ -1235,6 +1358,8 @@ smu_v11_0_display_clock_voltage_request(struct smu_context *smu,
 	PPCLK_e clk_select = 0;
 	uint32_t clk_freq = clock_req->clock_freq_in_khz / 1000;
 
+	if (!smu->pm_enabled)
+		return -EINVAL;
 	if (smu_feature_is_enabled(smu, FEATURE_DPM_DCEFCLK_BIT)) {
 		switch (clk_type) {
 		case amd_pp_dcef_clock:
@@ -1518,7 +1643,7 @@ static int smu_v11_0_get_power_profile_mode(struct smu_context *smu, char *buf)
 			"PD_Data_error_rate_coeff"};
 	int result = 0;
 
-	if (!buf)
+	if (!smu->pm_enabled || !buf)
 		return -EINVAL;
 
 	size += sprintf(buf + size, "%16s %s %s %s %s %s %s %s %s %s %s\n",
@@ -1605,6 +1730,8 @@ static int smu_v11_0_set_power_profile_mode(struct smu_context *smu, long *input
 
 	smu->power_profile_mode = input[size];
 
+	if (!smu->pm_enabled)
+		return ret;
 	if (smu->power_profile_mode > PP_SMC_POWER_PROFILE_CUSTOM) {
 		pr_err("Invalid power profile mode %d\n", smu->power_profile_mode);
 		return -EINVAL;
@@ -1710,24 +1837,24 @@ static int smu_v11_0_update_od8_settings(struct smu_context *smu,
 
 static int smu_v11_0_dpm_set_uvd_enable(struct smu_context *smu, bool enable)
 {
-	if (!smu_feature_is_supported(smu, FEATURE_DPM_VCE_BIT))
+	if (!smu_feature_is_supported(smu, FEATURE_DPM_UVD_BIT))
 		return 0;
 
-	if (enable == smu_feature_is_enabled(smu, FEATURE_DPM_VCE_BIT))
+	if (enable == smu_feature_is_enabled(smu, FEATURE_DPM_UVD_BIT))
 		return 0;
 
-	return smu_feature_set_enabled(smu, FEATURE_DPM_VCE_BIT, enable);
+	return smu_feature_set_enabled(smu, FEATURE_DPM_UVD_BIT, enable);
 }
 
 static int smu_v11_0_dpm_set_vce_enable(struct smu_context *smu, bool enable)
 {
-	if (!smu_feature_is_supported(smu, FEATURE_DPM_UVD_BIT))
+	if (!smu_feature_is_supported(smu, FEATURE_DPM_VCE_BIT))
 		return 0;
 
-	if (enable == smu_feature_is_enabled(smu, FEATURE_DPM_UVD_BIT))
+	if (enable == smu_feature_is_enabled(smu, FEATURE_DPM_VCE_BIT))
 		return 0;
 
-	return smu_feature_set_enabled(smu, FEATURE_DPM_UVD_BIT, enable);
+	return smu_feature_set_enabled(smu, FEATURE_DPM_VCE_BIT, enable);
 }
 
 static int smu_v11_0_get_current_rpm(struct smu_context *smu,
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c
index 669bd0c2a16c..7184d39dcbee 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c
@@ -25,6 +25,7 @@
 #include <linux/fb.h>
 #include "linux/delay.h"
 #include <linux/types.h>
+#include <linux/pci.h>
 
 #include "smumgr.h"
 #include "pp_debug.h"
@@ -2254,6 +2255,8 @@ static uint32_t ci_get_offsetof(uint32_t type, uint32_t member)
 			return offsetof(SMU7_SoftRegisters, VoltageChangeTimeout);
 		case AverageGraphicsActivity:
 			return offsetof(SMU7_SoftRegisters, AverageGraphicsA);
+		case AverageMemoryActivity:
+			return offsetof(SMU7_SoftRegisters, AverageMemoryA);
 		case PreVBlankGap:
 			return offsetof(SMU7_SoftRegisters, PreVBlankGap);
 		case VBlankTimeout:
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c
index bc8375cbf297..0ce85b73338e 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c
@@ -2304,6 +2304,8 @@ static uint32_t fiji_get_offsetof(uint32_t type, uint32_t member)
 			return offsetof(SMU73_SoftRegisters, VoltageChangeTimeout);
 		case AverageGraphicsActivity:
 			return offsetof(SMU73_SoftRegisters, AverageGraphicsActivity);
+		case AverageMemoryActivity:
+			return offsetof(SMU73_SoftRegisters, AverageMemoryActivity);
 		case PreVBlankGap:
 			return offsetof(SMU73_SoftRegisters, PreVBlankGap);
 		case VBlankTimeout:
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/iceland_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/iceland_smumgr.c
index 375ccf6ff5f2..73091ac0b647 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/iceland_smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/iceland_smumgr.c
@@ -25,6 +25,7 @@
 #include "pp_debug.h"
 #include <linux/types.h>
 #include <linux/kernel.h>
+#include <linux/pci.h>
 #include <linux/slab.h>
 #include <linux/gfp.h>
 
@@ -2219,6 +2220,8 @@ static uint32_t iceland_get_offsetof(uint32_t type, uint32_t member)
 			return offsetof(SMU71_SoftRegisters, VoltageChangeTimeout);
 		case AverageGraphicsActivity:
 			return offsetof(SMU71_SoftRegisters, AverageGraphicsActivity);
+		case AverageMemoryActivity:
+			return offsetof(SMU71_SoftRegisters, AverageMemoryActivity);
 		case PreVBlankGap:
 			return offsetof(SMU71_SoftRegisters, PreVBlankGap);
 		case VBlankTimeout:
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c
index 2d4cfe14f72e..d6052e6daef2 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c
@@ -21,6 +21,8 @@
  *
  */
 
+#include <linux/pci.h>
+
 #include "pp_debug.h"
 #include "smumgr.h"
 #include "smu74.h"
@@ -2313,6 +2315,8 @@ static uint32_t polaris10_get_offsetof(uint32_t type, uint32_t member)
 			return offsetof(SMU74_SoftRegisters, VoltageChangeTimeout);
 		case AverageGraphicsActivity:
 			return offsetof(SMU74_SoftRegisters, AverageGraphicsActivity);
+		case AverageMemoryActivity:
+			return offsetof(SMU74_SoftRegisters, AverageMemoryActivity);
 		case PreVBlankGap:
 			return offsetof(SMU74_SoftRegisters, PreVBlankGap);
 		case VBlankTimeout:
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/smu10_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/smu10_smumgr.c
index 6d11076a79ba..d409925d1f7d 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/smu10_smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/smu10_smumgr.c
@@ -21,6 +21,8 @@
  *
  */
 
+#include <linux/pci.h>
+
 #include "smumgr.h"
 #include "smu10_inc.h"
 #include "soc15_common.h"
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c
index 3ed6c5f1e5cf..e4e976b9d64e 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c
@@ -23,6 +23,7 @@
 #include "pp_debug.h"
 #include <linux/types.h>
 #include <linux/kernel.h>
+#include <linux/pci.h>
 #include <linux/slab.h>
 #include <linux/gfp.h>
 
@@ -2611,6 +2612,8 @@ static uint32_t tonga_get_offsetof(uint32_t type, uint32_t member)
 			return offsetof(SMU72_SoftRegisters, VoltageChangeTimeout);
 		case AverageGraphicsActivity:
 			return offsetof(SMU72_SoftRegisters, AverageGraphicsActivity);
+		case AverageMemoryActivity:
+			return offsetof(SMU72_SoftRegisters, AverageMemoryActivity);
 		case PreVBlankGap:
 			return offsetof(SMU72_SoftRegisters, PreVBlankGap);
 		case VBlankTimeout:
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/vega10_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/vega10_smumgr.c
index c81acc3192ad..672986e9eecb 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/vega10_smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/vega10_smumgr.c
@@ -21,6 +21,8 @@
  *
  */
 
+#include <linux/pci.h>
+
 #include "smumgr.h"
 #include "vega10_inc.h"
 #include "soc15_common.h"
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/vega12_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/vega12_smumgr.c
index ddb801517667..1eaf0fa28ef7 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/vega12_smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/vega12_smumgr.c
@@ -287,8 +287,26 @@ static int vega12_smu_init(struct pp_hwmgr *hwmgr)
 	priv->smu_tables.entry[TABLE_OVERDRIVE].version = 0x01;
 	priv->smu_tables.entry[TABLE_OVERDRIVE].size = sizeof(OverDriveTable_t);
 
+	/* allocate space for SMU_METRICS table */
+	ret = amdgpu_bo_create_kernel((struct amdgpu_device *)hwmgr->adev,
+				      sizeof(SmuMetrics_t),
+				      PAGE_SIZE,
+				      AMDGPU_GEM_DOMAIN_VRAM,
+				      &priv->smu_tables.entry[TABLE_SMU_METRICS].handle,
+				      &priv->smu_tables.entry[TABLE_SMU_METRICS].mc_addr,
+				      &priv->smu_tables.entry[TABLE_SMU_METRICS].table);
+	if (ret)
+		goto err4;
+
+	priv->smu_tables.entry[TABLE_SMU_METRICS].version = 0x01;
+	priv->smu_tables.entry[TABLE_SMU_METRICS].size = sizeof(SmuMetrics_t);
+
 	return 0;
 
+err4:
+	amdgpu_bo_free_kernel(&priv->smu_tables.entry[TABLE_OVERDRIVE].handle,
+				&priv->smu_tables.entry[TABLE_OVERDRIVE].mc_addr,
+				&priv->smu_tables.entry[TABLE_OVERDRIVE].table);
 err3:
 	amdgpu_bo_free_kernel(&priv->smu_tables.entry[TABLE_AVFS_FUSE_OVERRIDE].handle,
 				&priv->smu_tables.entry[TABLE_AVFS_FUSE_OVERRIDE].mc_addr,
@@ -334,6 +352,9 @@ static int vega12_smu_fini(struct pp_hwmgr *hwmgr)
 		amdgpu_bo_free_kernel(&priv->smu_tables.entry[TABLE_OVERDRIVE].handle,
 				      &priv->smu_tables.entry[TABLE_OVERDRIVE].mc_addr,
 				      &priv->smu_tables.entry[TABLE_OVERDRIVE].table);
+		amdgpu_bo_free_kernel(&priv->smu_tables.entry[TABLE_SMU_METRICS].handle,
+				      &priv->smu_tables.entry[TABLE_SMU_METRICS].mc_addr,
+				      &priv->smu_tables.entry[TABLE_SMU_METRICS].table);
 		kfree(hwmgr->smu_backend);
 		hwmgr->smu_backend = NULL;
 	}
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/vegam_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/vegam_smumgr.c
index 1e69300f6175..d499204b2184 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/vegam_smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/vegam_smumgr.c
@@ -2167,6 +2167,8 @@ static uint32_t vegam_get_offsetof(uint32_t type, uint32_t member)
 			return offsetof(SMU75_SoftRegisters, VoltageChangeTimeout);
 		case AverageGraphicsActivity:
 			return offsetof(SMU75_SoftRegisters, AverageGraphicsActivity);
+		case AverageMemoryActivity:
+			return offsetof(SMU75_SoftRegisters, AverageMemoryActivity);
 		case PreVBlankGap:
 			return offsetof(SMU75_SoftRegisters, PreVBlankGap);
 		case VBlankTimeout:
diff --git a/drivers/gpu/drm/amd/powerplay/vega20_ppt.c b/drivers/gpu/drm/amd/powerplay/vega20_ppt.c
index 8fafcbdb1dfd..4aa8f5a69c4c 100644
--- a/drivers/gpu/drm/amd/powerplay/vega20_ppt.c
+++ b/drivers/gpu/drm/amd/powerplay/vega20_ppt.c
@@ -2374,6 +2374,157 @@ static int vega20_odn_edit_dpm_table(struct smu_context *smu,
 	return ret;
 }
 
+static int vega20_get_enabled_smc_features(struct smu_context *smu,
+		uint64_t *features_enabled)
+{
+	uint32_t feature_mask[2] = {0, 0};
+	int ret = 0;
+
+	ret = smu_feature_get_enabled_mask(smu, feature_mask, 2);
+	if (ret)
+		return ret;
+
+	*features_enabled = ((((uint64_t)feature_mask[0] << SMU_FEATURES_LOW_SHIFT) & SMU_FEATURES_LOW_MASK) |
+			(((uint64_t)feature_mask[1] << SMU_FEATURES_HIGH_SHIFT) & SMU_FEATURES_HIGH_MASK));
+
+	return ret;
+}
+
+static int vega20_enable_smc_features(struct smu_context *smu,
+		bool enable, uint64_t feature_mask)
+{
+	uint32_t smu_features_low, smu_features_high;
+	int ret = 0;
+
+	smu_features_low = (uint32_t)((feature_mask & SMU_FEATURES_LOW_MASK) >> SMU_FEATURES_LOW_SHIFT);
+	smu_features_high = (uint32_t)((feature_mask & SMU_FEATURES_HIGH_MASK) >> SMU_FEATURES_HIGH_SHIFT);
+
+	if (enable) {
+		ret = smu_send_smc_msg_with_param(smu, SMU_MSG_EnableSmuFeaturesLow,
+						  smu_features_low);
+		if (ret)
+			return ret;
+		ret = smu_send_smc_msg_with_param(smu, SMU_MSG_EnableSmuFeaturesHigh,
+						  smu_features_high);
+		if (ret)
+			return ret;
+	} else {
+		ret = smu_send_smc_msg_with_param(smu, SMU_MSG_DisableSmuFeaturesLow,
+						  smu_features_low);
+		if (ret)
+			return ret;
+		ret = smu_send_smc_msg_with_param(smu, SMU_MSG_DisableSmuFeaturesHigh,
+						  smu_features_high);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+
+}
+
+static int vega20_get_ppfeature_status(struct smu_context *smu, char *buf)
+{
+	static const char *ppfeature_name[] = {
+				"DPM_PREFETCHER",
+				"GFXCLK_DPM",
+				"UCLK_DPM",
+				"SOCCLK_DPM",
+				"UVD_DPM",
+				"VCE_DPM",
+				"ULV",
+				"MP0CLK_DPM",
+				"LINK_DPM",
+				"DCEFCLK_DPM",
+				"GFXCLK_DS",
+				"SOCCLK_DS",
+				"LCLK_DS",
+				"PPT",
+				"TDC",
+				"THERMAL",
+				"GFX_PER_CU_CG",
+				"RM",
+				"DCEFCLK_DS",
+				"ACDC",
+				"VR0HOT",
+				"VR1HOT",
+				"FW_CTF",
+				"LED_DISPLAY",
+				"FAN_CONTROL",
+				"GFX_EDC",
+				"GFXOFF",
+				"CG",
+				"FCLK_DPM",
+				"FCLK_DS",
+				"MP1CLK_DS",
+				"MP0CLK_DS",
+				"XGMI",
+				"ECC"};
+	static const char *output_title[] = {
+				"FEATURES",
+				"BITMASK",
+				"ENABLEMENT"};
+	uint64_t features_enabled;
+	int i;
+	int ret = 0;
+	int size = 0;
+
+	ret = vega20_get_enabled_smc_features(smu, &features_enabled);
+	if (ret)
+		return ret;
+
+	size += sprintf(buf + size, "Current ppfeatures: 0x%016llx\n", features_enabled);
+	size += sprintf(buf + size, "%-19s %-22s %s\n",
+				output_title[0],
+				output_title[1],
+				output_title[2]);
+	for (i = 0; i < GNLD_FEATURES_MAX; i++) {
+		size += sprintf(buf + size, "%-19s 0x%016llx %6s\n",
+					ppfeature_name[i],
+					1ULL << i,
+					(features_enabled & (1ULL << i)) ? "Y" : "N");
+	}
+
+	return size;
+}
+
+static int vega20_set_ppfeature_status(struct smu_context *smu, uint64_t new_ppfeature_masks)
+{
+	uint64_t features_enabled;
+	uint64_t features_to_enable;
+	uint64_t features_to_disable;
+	int ret = 0;
+
+	if (new_ppfeature_masks >= (1ULL << GNLD_FEATURES_MAX))
+		return -EINVAL;
+
+	ret = vega20_get_enabled_smc_features(smu, &features_enabled);
+	if (ret)
+		return ret;
+
+	features_to_disable =
+		features_enabled & ~new_ppfeature_masks;
+	features_to_enable =
+		~features_enabled & new_ppfeature_masks;
+
+	pr_debug("features_to_disable 0x%llx\n", features_to_disable);
+	pr_debug("features_to_enable 0x%llx\n", features_to_enable);
+
+	if (features_to_disable) {
+		ret = vega20_enable_smc_features(smu, false, features_to_disable);
+		if (ret)
+			return ret;
+	}
+
+	if (features_to_enable) {
+		ret = vega20_enable_smc_features(smu, true, features_to_enable);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
 static const struct pptable_funcs vega20_ppt_funcs = {
 	.alloc_dpm_context = vega20_allocate_dpm_context,
 	.store_powerplay_table = vega20_store_powerplay_table,
@@ -2404,6 +2555,8 @@ static const struct pptable_funcs vega20_ppt_funcs = {
 	.unforce_dpm_levels = vega20_unforce_dpm_levels,
 	.upload_dpm_level = vega20_upload_dpm_level,
 	.get_profiling_clk_mask = vega20_get_profiling_clk_mask,
+	.set_ppfeature_status = vega20_set_ppfeature_status,
+	.get_ppfeature_status = vega20_get_ppfeature_status,
 };
 
 void vega20_set_ppt_funcs(struct smu_context *smu)
diff --git a/drivers/gpu/drm/amd/powerplay/vega20_ppt.h b/drivers/gpu/drm/amd/powerplay/vega20_ppt.h
index 5a0d2af63173..87f3a8303645 100644
--- a/drivers/gpu/drm/amd/powerplay/vega20_ppt.h
+++ b/drivers/gpu/drm/amd/powerplay/vega20_ppt.h
@@ -36,6 +36,50 @@
 #define AVFS_CURVE 0
 #define OD8_HOTCURVE_TEMPERATURE 85
 
+#define SMU_FEATURES_LOW_MASK        0x00000000FFFFFFFF
+#define SMU_FEATURES_LOW_SHIFT       0
+#define SMU_FEATURES_HIGH_MASK       0xFFFFFFFF00000000
+#define SMU_FEATURES_HIGH_SHIFT      32
+
+enum {
+	GNLD_DPM_PREFETCHER = 0,
+	GNLD_DPM_GFXCLK,
+	GNLD_DPM_UCLK,
+	GNLD_DPM_SOCCLK,
+	GNLD_DPM_UVD,
+	GNLD_DPM_VCE,
+	GNLD_ULV,
+	GNLD_DPM_MP0CLK,
+	GNLD_DPM_LINK,
+	GNLD_DPM_DCEFCLK,
+	GNLD_DS_GFXCLK,
+	GNLD_DS_SOCCLK,
+	GNLD_DS_LCLK,
+	GNLD_PPT,
+	GNLD_TDC,
+	GNLD_THERMAL,
+	GNLD_GFX_PER_CU_CG,
+	GNLD_RM,
+	GNLD_DS_DCEFCLK,
+	GNLD_ACDC,
+	GNLD_VR0HOT,
+	GNLD_VR1HOT,
+	GNLD_FW_CTF,
+	GNLD_LED_DISPLAY,
+	GNLD_FAN_CONTROL,
+	GNLD_DIDT,
+	GNLD_GFXOFF,
+	GNLD_CG,
+	GNLD_DPM_FCLK,
+	GNLD_DS_FCLK,
+	GNLD_DS_MP1CLK,
+	GNLD_DS_MP0CLK,
+	GNLD_XGMI,
+	GNLD_ECC,
+
+	GNLD_FEATURES_MAX
+};
+
 struct vega20_dpm_level {
         bool            enabled;
         uint32_t        value;
diff --git a/drivers/gpu/drm/arm/display/include/malidp_io.h b/drivers/gpu/drm/arm/display/include/malidp_io.h
index 4fb3caf864ce..9440dff94212 100644
--- a/drivers/gpu/drm/arm/display/include/malidp_io.h
+++ b/drivers/gpu/drm/arm/display/include/malidp_io.h
@@ -22,6 +22,13 @@ malidp_write32(u32 __iomem *base, u32 offset, u32 v)
 }
 
 static inline void
+malidp_write64(u32 __iomem *base, u32 offset, u64 v)
+{
+	writel(lower_32_bits(v), (base + (offset >> 2)));
+	writel(upper_32_bits(v), (base + (offset >> 2) + 1));
+}
+
+static inline void
 malidp_write32_mask(u32 __iomem *base, u32 offset, u32 m, u32 v)
 {
 	u32 tmp = malidp_read32(base, offset);
diff --git a/drivers/gpu/drm/arm/display/include/malidp_utils.h b/drivers/gpu/drm/arm/display/include/malidp_utils.h
index 8cfd91196e15..3bc383d5bf73 100644
--- a/drivers/gpu/drm/arm/display/include/malidp_utils.h
+++ b/drivers/gpu/drm/arm/display/include/malidp_utils.h
@@ -8,6 +8,7 @@
 #define _MALIDP_UTILS_
 
 #include <linux/delay.h>
+#include <linux/errno.h>
 
 #define has_bit(nr, mask)	(BIT(nr) & (mask))
 #define has_bits(bits, mask)	(((bits) & (mask)) == (bits))
@@ -20,11 +21,9 @@
 	int num_tries = __tries;			\
 	while (!__cond && (num_tries > 0)) {		\
 		usleep_range(__min_range, __max_range);	\
-		if (__cond)				\
-			break;				\
 		num_tries--;				\
 	}						\
-	num_tries;					\
+	(__cond) ? 0 : -ETIMEDOUT;			\
 })
 
 /* the restriction of range is [start, end] */
diff --git a/drivers/gpu/drm/arm/display/komeda/Makefile b/drivers/gpu/drm/arm/display/komeda/Makefile
index 412eeba8c39f..5c3900c2e764 100644
--- a/drivers/gpu/drm/arm/display/komeda/Makefile
+++ b/drivers/gpu/drm/arm/display/komeda/Makefile
@@ -8,12 +8,14 @@ komeda-y := \
 	komeda_drv.o \
 	komeda_dev.o \
 	komeda_format_caps.o \
+	komeda_color_mgmt.o \
 	komeda_pipeline.o \
 	komeda_pipeline_state.o \
 	komeda_framebuffer.o \
 	komeda_kms.o \
 	komeda_crtc.o \
 	komeda_plane.o \
+	komeda_wb_connector.o \
 	komeda_private_obj.o
 
 komeda-y += \
diff --git a/drivers/gpu/drm/arm/display/komeda/d71/d71_component.c b/drivers/gpu/drm/arm/display/komeda/d71/d71_component.c
index 6bab816ed8e7..4073a452e24a 100644
--- a/drivers/gpu/drm/arm/display/komeda/d71/d71_component.c
+++ b/drivers/gpu/drm/arm/display/komeda/d71/d71_component.c
@@ -10,6 +10,7 @@
 #include "komeda_kms.h"
 #include "malidp_io.h"
 #include "komeda_framebuffer.h"
+#include "komeda_color_mgmt.h"
 
 static void get_resources_id(u32 hw_id, u32 *pipe_id, u32 *comp_id)
 {
@@ -134,11 +135,60 @@ static u32 to_rot_ctrl(u32 rot)
 	return lr_ctrl;
 }
 
-static inline u32 to_d71_input_id(struct komeda_component_output *output)
+static u32 to_ad_ctrl(u64 modifier)
 {
-	struct komeda_component *comp = output->component;
+	u32 afbc_ctrl = AD_AEN;
 
-	return comp ? (comp->hw_id + output->output_port) : 0;
+	if (!modifier)
+		return 0;
+
+	if ((modifier & AFBC_FORMAT_MOD_BLOCK_SIZE_MASK) ==
+	    AFBC_FORMAT_MOD_BLOCK_SIZE_32x8)
+		afbc_ctrl |= AD_WB;
+
+	if (modifier & AFBC_FORMAT_MOD_YTR)
+		afbc_ctrl |= AD_YT;
+	if (modifier & AFBC_FORMAT_MOD_SPLIT)
+		afbc_ctrl |= AD_BS;
+	if (modifier & AFBC_FORMAT_MOD_TILED)
+		afbc_ctrl |= AD_TH;
+
+	return afbc_ctrl;
+}
+
+static inline u32 to_d71_input_id(struct komeda_component_state *st, int idx)
+{
+	struct komeda_component_output *input = &st->inputs[idx];
+
+	/* if input is not active, set hw input_id(0) to disable it */
+	if (has_bit(idx, st->active_inputs))
+		return input->component->hw_id + input->output_port;
+	else
+		return 0;
+}
+
+static void d71_layer_update_fb(struct komeda_component *c,
+				struct komeda_fb *kfb,
+				dma_addr_t *addr)
+{
+	struct drm_framebuffer *fb = &kfb->base;
+	const struct drm_format_info *info = fb->format;
+	u32 __iomem *reg = c->reg;
+	int block_h;
+
+	if (info->num_planes > 2)
+		malidp_write64(reg, BLK_P2_PTR_LOW, addr[2]);
+
+	if (info->num_planes > 1) {
+		block_h = drm_format_info_block_height(info, 1);
+		malidp_write32(reg, BLK_P1_STRIDE, fb->pitches[1] * block_h);
+		malidp_write64(reg, BLK_P1_PTR_LOW, addr[1]);
+	}
+
+	block_h = drm_format_info_block_height(info, 0);
+	malidp_write32(reg, BLK_P0_STRIDE, fb->pitches[0] * block_h);
+	malidp_write64(reg, BLK_P0_PTR_LOW, addr[0]);
+	malidp_write32(reg, LAYER_FMT, kfb->format_caps->hw_id);
 }
 
 static void d71_layer_disable(struct komeda_component *c)
@@ -156,26 +206,65 @@ static void d71_layer_update(struct komeda_component *c,
 	u32 __iomem *reg = c->reg;
 	u32 ctrl_mask = L_EN | L_ROT(L_ROT_R270) | L_HFLIP | L_VFLIP | L_TBU_EN;
 	u32 ctrl = L_EN | to_rot_ctrl(st->rot);
-	int i;
 
-	for (i = 0; i < fb->format->num_planes; i++) {
-		malidp_write32(reg,
-			       BLK_P0_PTR_LOW + i * LAYER_PER_PLANE_REGS * 4,
-			       lower_32_bits(st->addr[i]));
-		malidp_write32(reg,
-			       BLK_P0_PTR_HIGH + i * LAYER_PER_PLANE_REGS * 4,
-			       upper_32_bits(st->addr[i]));
-		if (i >= 2)
+	d71_layer_update_fb(c, kfb, st->addr);
+
+	malidp_write32(reg, AD_CONTROL, to_ad_ctrl(fb->modifier));
+	if (fb->modifier) {
+		u64 addr;
+
+		malidp_write32(reg, LAYER_AD_H_CROP, HV_CROP(st->afbc_crop_l,
+							     st->afbc_crop_r));
+		malidp_write32(reg, LAYER_AD_V_CROP, HV_CROP(st->afbc_crop_t,
+							     st->afbc_crop_b));
+		/* afbc 1.2 wants payload, afbc 1.0/1.1 wants end_addr */
+		if (fb->modifier & AFBC_FORMAT_MOD_TILED)
+			addr = st->addr[0] + kfb->offset_payload;
+		else
+			addr = st->addr[0] + kfb->afbc_size - 1;
+
+		malidp_write32(reg, BLK_P1_PTR_LOW, lower_32_bits(addr));
+		malidp_write32(reg, BLK_P1_PTR_HIGH, upper_32_bits(addr));
+	}
+
+	if (fb->format->is_yuv) {
+		u32 upsampling = 0;
+
+		switch (kfb->format_caps->fourcc) {
+		case DRM_FORMAT_YUYV:
+			upsampling = fb->modifier ? LR_CHI422_BILINEAR :
+				     LR_CHI422_REPLICATION;
+			break;
+		case DRM_FORMAT_UYVY:
+			upsampling = LR_CHI422_REPLICATION;
 			break;
+		case DRM_FORMAT_NV12:
+		case DRM_FORMAT_YUV420_8BIT:
+		case DRM_FORMAT_YUV420_10BIT:
+		case DRM_FORMAT_YUV420:
+		case DRM_FORMAT_P010:
+		/* these fmt support MPGE/JPEG both, here perfer JPEG*/
+			upsampling = LR_CHI420_JPEG;
+			break;
+		case DRM_FORMAT_X0L2:
+			upsampling = LR_CHI420_JPEG;
+			break;
+		default:
+			break;
+		}
 
-		malidp_write32(reg,
-			       BLK_P0_STRIDE + i * LAYER_PER_PLANE_REGS * 4,
-			       fb->pitches[i] & 0xFFFF);
+		malidp_write32(reg, LAYER_R_CONTROL, upsampling);
+		malidp_write_group(reg, LAYER_YUV_RGB_COEFF0,
+				   KOMEDA_N_YUV2RGB_COEFFS,
+				   komeda_select_yuv2rgb_coeffs(
+					plane_st->color_encoding,
+					plane_st->color_range));
 	}
 
-	malidp_write32(reg, LAYER_FMT, kfb->format_caps->hw_id);
 	malidp_write32(reg, BLK_IN_SIZE, HV_SIZE(st->hsize, st->vsize));
 
+	if (kfb->is_va)
+		ctrl |= L_TBU_EN;
 	malidp_write32_mask(reg, BLK_CONTROL, ctrl_mask, ctrl);
 }
 
@@ -288,10 +377,90 @@ static int d71_layer_init(struct d71_dev *d71,
 	return 0;
 }
 
+static void d71_wb_layer_update(struct komeda_component *c,
+				struct komeda_component_state *state)
+{
+	struct komeda_layer_state *st = to_layer_st(state);
+	struct drm_connector_state *conn_st = state->wb_conn->state;
+	struct komeda_fb *kfb = to_kfb(conn_st->writeback_job->fb);
+	u32 ctrl = L_EN | LW_OFM, mask = L_EN | LW_OFM | LW_TBU_EN;
+	u32 __iomem *reg = c->reg;
+
+	d71_layer_update_fb(c, kfb, st->addr);
+
+	if (kfb->is_va)
+		ctrl |= LW_TBU_EN;
+
+	malidp_write32(reg, BLK_IN_SIZE, HV_SIZE(st->hsize, st->vsize));
+	malidp_write32(reg, BLK_INPUT_ID0, to_d71_input_id(state, 0));
+	malidp_write32_mask(reg, BLK_CONTROL, mask, ctrl);
+}
+
+static void d71_wb_layer_dump(struct komeda_component *c, struct seq_file *sf)
+{
+	u32 v[12], i;
+
+	dump_block_header(sf, c->reg);
+
+	get_values_from_reg(c->reg, 0x80, 1, v);
+	seq_printf(sf, "LW_INPUT_ID0:\t\t0x%X\n", v[0]);
+
+	get_values_from_reg(c->reg, 0xD0, 3, v);
+	seq_printf(sf, "LW_CONTROL:\t\t0x%X\n", v[0]);
+	seq_printf(sf, "LW_PROG_LINE:\t\t0x%X\n", v[1]);
+	seq_printf(sf, "LW_FORMAT:\t\t0x%X\n", v[2]);
+
+	get_values_from_reg(c->reg, 0xE0, 1, v);
+	seq_printf(sf, "LW_IN_SIZE:\t\t0x%X\n", v[0]);
+
+	for (i = 0; i < 2; i++) {
+		get_values_from_reg(c->reg, 0x100 + i * 0x10, 3, v);
+		seq_printf(sf, "LW_P%u_PTR_LOW:\t\t0x%X\n", i, v[0]);
+		seq_printf(sf, "LW_P%u_PTR_HIGH:\t\t0x%X\n", i, v[1]);
+		seq_printf(sf, "LW_P%u_STRIDE:\t\t0x%X\n", i, v[2]);
+	}
+
+	get_values_from_reg(c->reg, 0x130, 12, v);
+	for (i = 0; i < 12; i++)
+		seq_printf(sf, "LW_RGB_YUV_COEFF%u:\t0x%X\n", i, v[i]);
+}
+
+static void d71_wb_layer_disable(struct komeda_component *c)
+{
+	malidp_write32(c->reg, BLK_INPUT_ID0, 0);
+	malidp_write32_mask(c->reg, BLK_CONTROL, L_EN, 0);
+}
+
+static const struct komeda_component_funcs d71_wb_layer_funcs = {
+	.update		= d71_wb_layer_update,
+	.disable	= d71_wb_layer_disable,
+	.dump_register	= d71_wb_layer_dump,
+};
+
 static int d71_wb_layer_init(struct d71_dev *d71,
 			     struct block_header *blk, u32 __iomem *reg)
 {
-	DRM_DEBUG("Detect D71_Wb_Layer.\n");
+	struct komeda_component *c;
+	struct komeda_layer *wb_layer;
+	u32 pipe_id, layer_id;
+
+	get_resources_id(blk->block_info, &pipe_id, &layer_id);
+
+	c = komeda_component_add(&d71->pipes[pipe_id]->base, sizeof(*wb_layer),
+				 layer_id, BLOCK_INFO_INPUT_ID(blk->block_info),
+				 &d71_wb_layer_funcs,
+				 1, get_valid_inputs(blk), 0, reg,
+				 "LPU%d_LAYER_WR", pipe_id);
+	if (IS_ERR(c)) {
+		DRM_ERROR("Failed to add wb_layer component\n");
+		return PTR_ERR(c);
+	}
+
+	wb_layer = to_layer(c);
+	wb_layer->layer_type = KOMEDA_FMT_WB_LAYER;
+
+	set_range(&wb_layer->hsize_in, D71_MIN_LINE_SIZE, d71->max_line_size);
+	set_range(&wb_layer->vsize_in, D71_MIN_VERTICAL_SIZE, d71->max_vsize);
 
 	return 0;
 }
@@ -303,8 +472,18 @@ static void d71_component_disable(struct komeda_component *c)
 
 	malidp_write32(reg, BLK_CONTROL, 0);
 
-	for (i = 0; i < c->max_active_inputs; i++)
+	for (i = 0; i < c->max_active_inputs; i++) {
 		malidp_write32(reg, BLK_INPUT_ID0 + (i << 2), 0);
+
+		/* Besides clearing the input ID to zero, D71 compiz also has
+		 * input enable bit in CU_INPUTx_CONTROL which need to be
+		 * cleared.
+		 */
+		if (has_bit(c->id, KOMEDA_PIPELINE_COMPIZS))
+			malidp_write32(reg, CU_INPUT0_CONTROL +
+				       i * CU_PER_INPUT_REGS * 4,
+				       CU_INPUT_CTRL_ALPHA(0xFF));
+	}
 }
 
 static void compiz_enable_input(u32 __iomem *id_reg,
@@ -337,15 +516,15 @@ static void d71_compiz_update(struct komeda_component *c,
 	struct komeda_compiz_state *st = to_compiz_st(state);
 	u32 __iomem *reg = c->reg;
 	u32 __iomem *id_reg, *cfg_reg;
-	u32 index, input_hw_id;
+	u32 index;
 
 	for_each_changed_input(state, index) {
 		id_reg = reg + index;
 		cfg_reg = reg + index * CU_PER_INPUT_REGS;
-		input_hw_id = to_d71_input_id(&state->inputs[index]);
 		if (state->active_inputs & BIT(index)) {
 			compiz_enable_input(id_reg, cfg_reg,
-					    input_hw_id, &st->cins[index]);
+					    to_d71_input_id(state, index),
+					    &st->cins[index]);
 		} else {
 			malidp_write32(id_reg, BLK_INPUT_ID0, 0);
 			malidp_write32(cfg_reg, CU_INPUT0_CONTROL, 0);
@@ -424,18 +603,354 @@ static int d71_compiz_init(struct d71_dev *d71,
 	return 0;
 }
 
+static void d71_scaler_update_filter_lut(u32 __iomem *reg, u32 hsize_in,
+					 u32 vsize_in, u32 hsize_out,
+					 u32 vsize_out)
+{
+	u32 val = 0;
+
+	if (hsize_in <= hsize_out)
+		val  |= 0x62;
+	else if (hsize_in <= (hsize_out + hsize_out / 2))
+		val |= 0x63;
+	else if (hsize_in <= hsize_out * 2)
+		val |= 0x64;
+	else if (hsize_in <= hsize_out * 2 + (hsize_out * 3) / 4)
+		val |= 0x65;
+	else
+		val |= 0x66;
+
+	if (vsize_in <= vsize_out)
+		val  |= SC_VTSEL(0x6A);
+	else if (vsize_in <= (vsize_out + vsize_out / 2))
+		val |= SC_VTSEL(0x6B);
+	else if (vsize_in <= vsize_out * 2)
+		val |= SC_VTSEL(0x6C);
+	else if (vsize_in <= vsize_out * 2 + vsize_out * 3 / 4)
+		val |= SC_VTSEL(0x6D);
+	else
+		val |= SC_VTSEL(0x6E);
+
+	malidp_write32(reg, SC_COEFFTAB, val);
+}
+
+static void d71_scaler_update(struct komeda_component *c,
+			      struct komeda_component_state *state)
+{
+	struct komeda_scaler_state *st = to_scaler_st(state);
+	u32 __iomem *reg = c->reg;
+	u32 init_ph, delta_ph, ctrl;
+
+	d71_scaler_update_filter_lut(reg, st->hsize_in, st->vsize_in,
+				     st->hsize_out, st->vsize_out);
+
+	malidp_write32(reg, BLK_IN_SIZE, HV_SIZE(st->hsize_in, st->vsize_in));
+	malidp_write32(reg, SC_OUT_SIZE, HV_SIZE(st->hsize_out, st->vsize_out));
+	malidp_write32(reg, SC_H_CROP, HV_CROP(st->left_crop, st->right_crop));
+
+	/* for right part, HW only sample the valid pixel which means the pixels
+	 * in left_crop will be jumpped, and the first sample pixel is:
+	 *
+	 * dst_a = st->total_hsize_out - st->hsize_out + st->left_crop + 0.5;
+	 *
+	 * Then the corresponding texel in src is:
+	 *
+	 * h_delta_phase = st->total_hsize_in / st->total_hsize_out;
+	 * src_a = dst_A * h_delta_phase;
+	 *
+	 * and h_init_phase is src_a deduct the real source start src_S;
+	 *
+	 * src_S = st->total_hsize_in - st->hsize_in;
+	 * h_init_phase = src_a - src_S;
+	 *
+	 * And HW precision for the initial/delta_phase is 16:16 fixed point,
+	 * the following is the simplified formula
+	 */
+	if (st->right_part) {
+		u32 dst_a = st->total_hsize_out - st->hsize_out + st->left_crop;
+
+		if (st->en_img_enhancement)
+			dst_a -= 1;
+
+		init_ph = ((st->total_hsize_in * (2 * dst_a + 1) -
+			    2 * st->total_hsize_out * (st->total_hsize_in -
+			    st->hsize_in)) << 15) / st->total_hsize_out;
+	} else {
+		init_ph = (st->total_hsize_in << 15) / st->total_hsize_out;
+	}
+
+	malidp_write32(reg, SC_H_INIT_PH, init_ph);
+
+	delta_ph = (st->total_hsize_in << 16) / st->total_hsize_out;
+	malidp_write32(reg, SC_H_DELTA_PH, delta_ph);
+
+	init_ph = (st->total_vsize_in << 15) / st->vsize_out;
+	malidp_write32(reg, SC_V_INIT_PH, init_ph);
+
+	delta_ph = (st->total_vsize_in << 16) / st->vsize_out;
+	malidp_write32(reg, SC_V_DELTA_PH, delta_ph);
+
+	ctrl = 0;
+	ctrl |= st->en_scaling ? SC_CTRL_SCL : 0;
+	ctrl |= st->en_alpha ? SC_CTRL_AP : 0;
+	ctrl |= st->en_img_enhancement ? SC_CTRL_IENH : 0;
+	/* If we use the hardware splitter we shouldn't set SC_CTRL_LS */
+	if (st->en_split &&
+	    state->inputs[0].component->id != KOMEDA_COMPONENT_SPLITTER)
+		ctrl |= SC_CTRL_LS;
+
+	malidp_write32(reg, BLK_CONTROL, ctrl);
+	malidp_write32(reg, BLK_INPUT_ID0, to_d71_input_id(state, 0));
+}
+
+static void d71_scaler_dump(struct komeda_component *c, struct seq_file *sf)
+{
+	u32 v[9];
+
+	dump_block_header(sf, c->reg);
+
+	get_values_from_reg(c->reg, 0x80, 1, v);
+	seq_printf(sf, "SC_INPUT_ID0:\t\t0x%X\n", v[0]);
+
+	get_values_from_reg(c->reg, 0xD0, 1, v);
+	seq_printf(sf, "SC_CONTROL:\t\t0x%X\n", v[0]);
+
+	get_values_from_reg(c->reg, 0xDC, 9, v);
+	seq_printf(sf, "SC_COEFFTAB:\t\t0x%X\n", v[0]);
+	seq_printf(sf, "SC_IN_SIZE:\t\t0x%X\n", v[1]);
+	seq_printf(sf, "SC_OUT_SIZE:\t\t0x%X\n", v[2]);
+	seq_printf(sf, "SC_H_CROP:\t\t0x%X\n", v[3]);
+	seq_printf(sf, "SC_V_CROP:\t\t0x%X\n", v[4]);
+	seq_printf(sf, "SC_H_INIT_PH:\t\t0x%X\n", v[5]);
+	seq_printf(sf, "SC_H_DELTA_PH:\t\t0x%X\n", v[6]);
+	seq_printf(sf, "SC_V_INIT_PH:\t\t0x%X\n", v[7]);
+	seq_printf(sf, "SC_V_DELTA_PH:\t\t0x%X\n", v[8]);
+}
+
+static const struct komeda_component_funcs d71_scaler_funcs = {
+	.update		= d71_scaler_update,
+	.disable	= d71_component_disable,
+	.dump_register	= d71_scaler_dump,
+};
+
+static int d71_scaler_init(struct d71_dev *d71,
+			   struct block_header *blk, u32 __iomem *reg)
+{
+	struct komeda_component *c;
+	struct komeda_scaler *scaler;
+	u32 pipe_id, comp_id;
+
+	get_resources_id(blk->block_info, &pipe_id, &comp_id);
+
+	c = komeda_component_add(&d71->pipes[pipe_id]->base, sizeof(*scaler),
+				 comp_id, BLOCK_INFO_INPUT_ID(blk->block_info),
+				 &d71_scaler_funcs,
+				 1, get_valid_inputs(blk), 1, reg,
+				 "CU%d_SCALER%d",
+				 pipe_id, BLOCK_INFO_BLK_ID(blk->block_info));
+
+	if (IS_ERR(c)) {
+		DRM_ERROR("Failed to initialize scaler");
+		return PTR_ERR(c);
+	}
+
+	scaler = to_scaler(c);
+	set_range(&scaler->hsize, 4, 2048);
+	set_range(&scaler->vsize, 4, 4096);
+	scaler->max_downscaling = 6;
+	scaler->max_upscaling = 64;
+	scaler->scaling_split_overlap = 8;
+	scaler->enh_split_overlap = 1;
+
+	malidp_write32(c->reg, BLK_CONTROL, 0);
+
+	return 0;
+}
+
+static int d71_downscaling_clk_check(struct komeda_pipeline *pipe,
+				     struct drm_display_mode *mode,
+				     unsigned long aclk_rate,
+				     struct komeda_data_flow_cfg *dflow)
+{
+	u32 h_in = dflow->in_w;
+	u32 v_in = dflow->in_h;
+	u32 v_out = dflow->out_h;
+	u64 fraction, denominator;
+
+	/* D71 downscaling must satisfy the following equation
+	 *
+	 *   ACLK                   h_in * v_in
+	 * ------- >= ---------------------------------------------
+	 *  PXLCLK     (h_total - (1 + 2 * v_in / v_out)) * v_out
+	 *
+	 * In only horizontal downscaling situation, the right side should be
+	 * multiplied by (h_total - 3) / (h_active - 3), then equation becomes
+	 *
+	 *   ACLK          h_in
+	 * ------- >= ----------------
+	 *  PXLCLK     (h_active - 3)
+	 *
+	 * To avoid precision lost the equation 1 will be convert to:
+	 *
+	 *   ACLK             h_in * v_in
+	 * ------- >= -----------------------------------
+	 *  PXLCLK     (h_total -1 ) * v_out -  2 * v_in
+	 */
+	if (v_in == v_out) {
+		fraction = h_in;
+		denominator = mode->hdisplay - 3;
+	} else {
+		fraction = h_in * v_in;
+		denominator = (mode->htotal - 1) * v_out -  2 * v_in;
+	}
+
+	return aclk_rate * denominator >= mode->clock * 1000 * fraction ?
+	       0 : -EINVAL;
+}
+
+static void d71_splitter_update(struct komeda_component *c,
+				struct komeda_component_state *state)
+{
+	struct komeda_splitter_state *st = to_splitter_st(state);
+	u32 __iomem *reg = c->reg;
+
+	malidp_write32(reg, BLK_INPUT_ID0, to_d71_input_id(state, 0));
+	malidp_write32(reg, BLK_SIZE, HV_SIZE(st->hsize, st->vsize));
+	malidp_write32(reg, SP_OVERLAP_SIZE, st->overlap & 0x1FFF);
+	malidp_write32(reg, BLK_CONTROL, BLK_CTRL_EN);
+}
+
+static void d71_splitter_dump(struct komeda_component *c, struct seq_file *sf)
+{
+	u32 v[3];
+
+	dump_block_header(sf, c->reg);
+
+	get_values_from_reg(c->reg, BLK_INPUT_ID0, 1, v);
+	seq_printf(sf, "SP_INPUT_ID0:\t\t0x%X\n", v[0]);
+
+	get_values_from_reg(c->reg, BLK_CONTROL, 3, v);
+	seq_printf(sf, "SP_CONTROL:\t\t0x%X\n", v[0]);
+	seq_printf(sf, "SP_SIZE:\t\t0x%X\n", v[1]);
+	seq_printf(sf, "SP_OVERLAP_SIZE:\t0x%X\n", v[2]);
+}
+
+static const struct komeda_component_funcs d71_splitter_funcs = {
+	.update		= d71_splitter_update,
+	.disable	= d71_component_disable,
+	.dump_register	= d71_splitter_dump,
+};
+
+static int d71_splitter_init(struct d71_dev *d71,
+			     struct block_header *blk, u32 __iomem *reg)
+{
+	struct komeda_component *c;
+	struct komeda_splitter *splitter;
+	u32 pipe_id, comp_id;
+
+	get_resources_id(blk->block_info, &pipe_id, &comp_id);
+
+	c = komeda_component_add(&d71->pipes[pipe_id]->base, sizeof(*splitter),
+				 comp_id,
+				 BLOCK_INFO_INPUT_ID(blk->block_info),
+				 &d71_splitter_funcs,
+				 1, get_valid_inputs(blk), 2, reg,
+				 "CU%d_SPLITTER", pipe_id);
+
+	if (IS_ERR(c)) {
+		DRM_ERROR("Failed to initialize splitter");
+		return -1;
+	}
+
+	splitter = to_splitter(c);
+
+	set_range(&splitter->hsize, 4, d71->max_line_size);
+	set_range(&splitter->vsize, 4, d71->max_vsize);
+
+	return 0;
+}
+
+static void d71_merger_update(struct komeda_component *c,
+			      struct komeda_component_state *state)
+{
+	struct komeda_merger_state *st = to_merger_st(state);
+	u32 __iomem *reg = c->reg;
+	u32 index;
+
+	for_each_changed_input(state, index)
+		malidp_write32(reg, MG_INPUT_ID0 + index * 4,
+			       to_d71_input_id(state, index));
+
+	malidp_write32(reg, MG_SIZE, HV_SIZE(st->hsize_merged,
+					     st->vsize_merged));
+	malidp_write32(reg, BLK_CONTROL, BLK_CTRL_EN);
+}
+
+static void d71_merger_dump(struct komeda_component *c, struct seq_file *sf)
+{
+	u32 v;
+
+	dump_block_header(sf, c->reg);
+
+	get_values_from_reg(c->reg, MG_INPUT_ID0, 1, &v);
+	seq_printf(sf, "MG_INPUT_ID0:\t\t0x%X\n", v);
+
+	get_values_from_reg(c->reg, MG_INPUT_ID1, 1, &v);
+	seq_printf(sf, "MG_INPUT_ID1:\t\t0x%X\n", v);
+
+	get_values_from_reg(c->reg, BLK_CONTROL, 1, &v);
+	seq_printf(sf, "MG_CONTROL:\t\t0x%X\n", v);
+
+	get_values_from_reg(c->reg, MG_SIZE, 1, &v);
+	seq_printf(sf, "MG_SIZE:\t\t0x%X\n", v);
+}
+
+static const struct komeda_component_funcs d71_merger_funcs = {
+	.update		= d71_merger_update,
+	.disable	= d71_component_disable,
+	.dump_register	= d71_merger_dump,
+};
+
+static int d71_merger_init(struct d71_dev *d71,
+			   struct block_header *blk, u32 __iomem *reg)
+{
+	struct komeda_component *c;
+	struct komeda_merger *merger;
+	u32 pipe_id, comp_id;
+
+	get_resources_id(blk->block_info, &pipe_id, &comp_id);
+
+	c = komeda_component_add(&d71->pipes[pipe_id]->base, sizeof(*merger),
+				 comp_id,
+				 BLOCK_INFO_INPUT_ID(blk->block_info),
+				 &d71_merger_funcs,
+				 MG_NUM_INPUTS_IDS, get_valid_inputs(blk),
+				 MG_NUM_OUTPUTS_IDS, reg,
+				 "CU%d_MERGER", pipe_id);
+
+	if (IS_ERR(c)) {
+		DRM_ERROR("Failed to initialize merger.\n");
+		return PTR_ERR(c);
+	}
+
+	merger = to_merger(c);
+
+	set_range(&merger->hsize_merged, 4, 4032);
+	set_range(&merger->vsize_merged, 4, 4096);
+
+	return 0;
+}
+
 static void d71_improc_update(struct komeda_component *c,
 			      struct komeda_component_state *state)
 {
 	struct komeda_improc_state *st = to_improc_st(state);
 	u32 __iomem *reg = c->reg;
-	u32 index, input_hw_id;
+	u32 index;
 
-	for_each_changed_input(state, index) {
-		input_hw_id = state->active_inputs & BIT(index) ?
-			      to_d71_input_id(&state->inputs[index]) : 0;
-		malidp_write32(reg, BLK_INPUT_ID0 + index * 4, input_hw_id);
-	}
+	for_each_changed_input(state, index)
+		malidp_write32(reg, BLK_INPUT_ID0 + index * 4,
+			       to_d71_input_id(state, index));
 
 	malidp_write32(reg, BLK_SIZE, HV_SIZE(st->hsize, st->vsize));
 }
@@ -644,9 +1159,16 @@ int d71_probe_block(struct d71_dev *d71,
 		err = d71_compiz_init(d71, blk, reg);
 		break;
 
-	case D71_BLK_TYPE_CU_SPLITTER:
 	case D71_BLK_TYPE_CU_SCALER:
+		err = d71_scaler_init(d71, blk, reg);
+		break;
+
+	case D71_BLK_TYPE_CU_SPLITTER:
+		err = d71_splitter_init(d71, blk, reg);
+		break;
+
 	case D71_BLK_TYPE_CU_MERGER:
+		err = d71_merger_init(d71, blk, reg);
 		break;
 
 	case D71_BLK_TYPE_DOU:
@@ -683,3 +1205,7 @@ int d71_probe_block(struct d71_dev *d71,
 
 	return err;
 }
+
+const struct komeda_pipeline_funcs d71_pipeline_funcs = {
+	.downscaling_clk_check = d71_downscaling_clk_check,
+};
diff --git a/drivers/gpu/drm/arm/display/komeda/d71/d71_dev.c b/drivers/gpu/drm/arm/display/komeda/d71/d71_dev.c
index 3a7248d42376..d567ab7ed314 100644
--- a/drivers/gpu/drm/arm/display/komeda/d71/d71_dev.c
+++ b/drivers/gpu/drm/arm/display/komeda/d71/d71_dev.c
@@ -280,7 +280,7 @@ static int d71_change_opmode(struct komeda_dev *mdev, int new_mode)
 	ret = dp_wait_cond(((malidp_read32(d71->gcu_addr, BLK_CONTROL) & 0x7) == opmode),
 			   100, 1000, 10000);
 
-	return ret > 0 ? 0 : -ETIMEDOUT;
+	return ret;
 }
 
 static void d71_flush(struct komeda_dev *mdev,
@@ -304,7 +304,7 @@ static int d71_reset(struct d71_dev *d71)
 	ret = dp_wait_cond(!(malidp_read32(gcu, BLK_CONTROL) & GCU_CONTROL_SRST),
 			   100, 1000, 10000);
 
-	return ret > 0 ? 0 : -ETIMEDOUT;
+	return ret;
 }
 
 void d71_read_block_header(u32 __iomem *reg, struct block_header *blk)
@@ -390,7 +390,7 @@ static int d71_enum_resources(struct komeda_dev *mdev)
 
 	for (i = 0; i < d71->num_pipelines; i++) {
 		pipe = komeda_pipeline_add(mdev, sizeof(struct d71_pipeline),
-					   NULL);
+					   &d71_pipeline_funcs);
 		if (IS_ERR(pipe)) {
 			err = PTR_ERR(pipe);
 			goto err_cleanup;
@@ -447,61 +447,119 @@ err_cleanup:
 #define AFB_TH_SC_YTR_BS AFBC(_TILED | _SC | _SPARSE | _YTR | _SPLIT)
 
 static struct komeda_format_caps d71_format_caps_table[] = {
-	/*   HW_ID    |        fourcc        | tile_sz |   layer_types |   rots    | afbc_layouts | afbc_features */
+	/*   HW_ID    |        fourcc         |   layer_types |   rots    | afbc_layouts | afbc_features */
 	/* ABGR_2101010*/
-	{__HW_ID(0, 0),	DRM_FORMAT_ARGB2101010,	1,	RICH_SIMPLE_WB,	Flip_H_V,		0, 0},
-	{__HW_ID(0, 1),	DRM_FORMAT_ABGR2101010,	1,	RICH_SIMPLE_WB,	Flip_H_V,		0, 0},
-	{__HW_ID(0, 1),	DRM_FORMAT_ABGR2101010,	1,	RICH_SIMPLE,	Rot_ALL_H_V,	LYT_NM_WB, AFB_TH_SC_YTR_BS}, /* afbc */
-	{__HW_ID(0, 2),	DRM_FORMAT_RGBA1010102,	1,	RICH_SIMPLE_WB,	Flip_H_V,		0, 0},
-	{__HW_ID(0, 3),	DRM_FORMAT_BGRA1010102,	1,	RICH_SIMPLE_WB,	Flip_H_V,		0, 0},
+	{__HW_ID(0, 0),	DRM_FORMAT_ARGB2101010,	RICH_SIMPLE_WB,	Flip_H_V,		0, 0},
+	{__HW_ID(0, 1),	DRM_FORMAT_ABGR2101010,	RICH_SIMPLE_WB,	Flip_H_V,		0, 0},
+	{__HW_ID(0, 1),	DRM_FORMAT_ABGR2101010,	RICH_SIMPLE,	Rot_ALL_H_V,	LYT_NM_WB, AFB_TH_SC_YTR_BS}, /* afbc */
+	{__HW_ID(0, 2),	DRM_FORMAT_RGBA1010102,	RICH_SIMPLE_WB,	Flip_H_V,		0, 0},
+	{__HW_ID(0, 3),	DRM_FORMAT_BGRA1010102,	RICH_SIMPLE_WB,	Flip_H_V,		0, 0},
 	/* ABGR_8888*/
-	{__HW_ID(1, 0),	DRM_FORMAT_ARGB8888,	1,	RICH_SIMPLE_WB,	Flip_H_V,		0, 0},
-	{__HW_ID(1, 1),	DRM_FORMAT_ABGR8888,	1,	RICH_SIMPLE_WB,	Flip_H_V,		0, 0},
-	{__HW_ID(1, 1),	DRM_FORMAT_ABGR8888,	1,	RICH_SIMPLE,	Rot_ALL_H_V,	LYT_NM_WB, AFB_TH_SC_YTR_BS}, /* afbc */
-	{__HW_ID(1, 2),	DRM_FORMAT_RGBA8888,	1,	RICH_SIMPLE_WB,	Flip_H_V,		0, 0},
-	{__HW_ID(1, 3),	DRM_FORMAT_BGRA8888,	1,	RICH_SIMPLE_WB,	Flip_H_V,		0, 0},
+	{__HW_ID(1, 0),	DRM_FORMAT_ARGB8888,	RICH_SIMPLE_WB,	Flip_H_V,		0, 0},
+	{__HW_ID(1, 1),	DRM_FORMAT_ABGR8888,	RICH_SIMPLE_WB,	Flip_H_V,		0, 0},
+	{__HW_ID(1, 1),	DRM_FORMAT_ABGR8888,	RICH_SIMPLE,	Rot_ALL_H_V,	LYT_NM_WB, AFB_TH_SC_YTR_BS}, /* afbc */
+	{__HW_ID(1, 2),	DRM_FORMAT_RGBA8888,	RICH_SIMPLE_WB,	Flip_H_V,		0, 0},
+	{__HW_ID(1, 3),	DRM_FORMAT_BGRA8888,	RICH_SIMPLE_WB,	Flip_H_V,		0, 0},
 	/* XBGB_8888 */
-	{__HW_ID(2, 0),	DRM_FORMAT_XRGB8888,	1,	RICH_SIMPLE_WB,	Flip_H_V,		0, 0},
-	{__HW_ID(2, 1),	DRM_FORMAT_XBGR8888,	1,	RICH_SIMPLE_WB,	Flip_H_V,		0, 0},
-	{__HW_ID(2, 2),	DRM_FORMAT_RGBX8888,	1,	RICH_SIMPLE_WB,	Flip_H_V,		0, 0},
-	{__HW_ID(2, 3),	DRM_FORMAT_BGRX8888,	1,	RICH_SIMPLE_WB,	Flip_H_V,		0, 0},
+	{__HW_ID(2, 0),	DRM_FORMAT_XRGB8888,	RICH_SIMPLE_WB,	Flip_H_V,		0, 0},
+	{__HW_ID(2, 1),	DRM_FORMAT_XBGR8888,	RICH_SIMPLE_WB,	Flip_H_V,		0, 0},
+	{__HW_ID(2, 2),	DRM_FORMAT_RGBX8888,	RICH_SIMPLE_WB,	Flip_H_V,		0, 0},
+	{__HW_ID(2, 3),	DRM_FORMAT_BGRX8888,	RICH_SIMPLE_WB,	Flip_H_V,		0, 0},
 	/* BGR_888 */ /* none-afbc RGB888 doesn't support rotation and flip */
-	{__HW_ID(3, 0),	DRM_FORMAT_RGB888,	1,	RICH_SIMPLE_WB,	Rot_0,			0, 0},
-	{__HW_ID(3, 1),	DRM_FORMAT_BGR888,	1,	RICH_SIMPLE_WB,	Rot_0,			0, 0},
-	{__HW_ID(3, 1),	DRM_FORMAT_BGR888,	1,	RICH_SIMPLE,	Rot_ALL_H_V,	LYT_NM_WB, AFB_TH_SC_YTR_BS}, /* afbc */
+	{__HW_ID(3, 0),	DRM_FORMAT_RGB888,	RICH_SIMPLE_WB,	Rot_0,			0, 0},
+	{__HW_ID(3, 1),	DRM_FORMAT_BGR888,	RICH_SIMPLE_WB,	Rot_0,			0, 0},
+	{__HW_ID(3, 1),	DRM_FORMAT_BGR888,	RICH_SIMPLE,	Rot_ALL_H_V,	LYT_NM_WB, AFB_TH_SC_YTR_BS}, /* afbc */
 	/* BGR 16bpp */
-	{__HW_ID(4, 0),	DRM_FORMAT_RGBA5551,	1,	RICH_SIMPLE,	Flip_H_V,		0, 0},
-	{__HW_ID(4, 1),	DRM_FORMAT_ABGR1555,	1,	RICH_SIMPLE,	Flip_H_V,		0, 0},
-	{__HW_ID(4, 1),	DRM_FORMAT_ABGR1555,	1,	RICH_SIMPLE,	Rot_ALL_H_V,	LYT_NM_WB, AFB_TH_SC_YTR}, /* afbc */
-	{__HW_ID(4, 2),	DRM_FORMAT_RGB565,	1,	RICH_SIMPLE,	Flip_H_V,		0, 0},
-	{__HW_ID(4, 3),	DRM_FORMAT_BGR565,	1,	RICH_SIMPLE,	Flip_H_V,		0, 0},
-	{__HW_ID(4, 3),	DRM_FORMAT_BGR565,	1,	RICH_SIMPLE,	Rot_ALL_H_V,	LYT_NM_WB, AFB_TH_SC_YTR}, /* afbc */
-	{__HW_ID(4, 4), DRM_FORMAT_R8,		1,	SIMPLE,		Rot_0,			0, 0},
+	{__HW_ID(4, 0),	DRM_FORMAT_RGBA5551,	RICH_SIMPLE,	Flip_H_V,		0, 0},
+	{__HW_ID(4, 1),	DRM_FORMAT_ABGR1555,	RICH_SIMPLE,	Flip_H_V,		0, 0},
+	{__HW_ID(4, 1),	DRM_FORMAT_ABGR1555,	RICH_SIMPLE,	Rot_ALL_H_V,	LYT_NM_WB, AFB_TH_SC_YTR}, /* afbc */
+	{__HW_ID(4, 2),	DRM_FORMAT_RGB565,	RICH_SIMPLE,	Flip_H_V,		0, 0},
+	{__HW_ID(4, 3),	DRM_FORMAT_BGR565,	RICH_SIMPLE,	Flip_H_V,		0, 0},
+	{__HW_ID(4, 3),	DRM_FORMAT_BGR565,	RICH_SIMPLE,	Rot_ALL_H_V,	LYT_NM_WB, AFB_TH_SC_YTR}, /* afbc */
+	{__HW_ID(4, 4), DRM_FORMAT_R8,		SIMPLE,		Rot_0,			0, 0},
 	/* YUV 444/422/420 8bit  */
-	{__HW_ID(5, 0),	0 /*XYUV8888*/,		1,	0,		0,			0, 0},
-	/* XYUV unsupported*/
-	{__HW_ID(5, 1),	DRM_FORMAT_YUYV,	1,	RICH,		Rot_ALL_H_V,	LYT_NM, AFB_TH}, /* afbc */
-	{__HW_ID(5, 2),	DRM_FORMAT_YUYV,	1,	RICH,		Flip_H_V,		0, 0},
-	{__HW_ID(5, 3),	DRM_FORMAT_UYVY,	1,	RICH,		Flip_H_V,		0, 0},
-	{__HW_ID(5, 4),	0, /*X0L0 */		2,		0,			0, 0}, /* Y0L0 unsupported */
-	{__HW_ID(5, 6),	DRM_FORMAT_NV12,	1,	RICH,		Flip_H_V,		0, 0},
-	{__HW_ID(5, 6),	0/*DRM_FORMAT_YUV420_8BIT*/,	1,	RICH,	Rot_ALL_H_V,	LYT_NM, AFB_TH}, /* afbc */
-	{__HW_ID(5, 7),	DRM_FORMAT_YUV420,	1,	RICH,		Flip_H_V,		0, 0},
+	{__HW_ID(5, 1),	DRM_FORMAT_YUYV,	RICH,		Rot_ALL_H_V,	LYT_NM, AFB_TH}, /* afbc */
+	{__HW_ID(5, 2),	DRM_FORMAT_YUYV,	RICH,		Flip_H_V,		0, 0},
+	{__HW_ID(5, 3),	DRM_FORMAT_UYVY,	RICH,		Flip_H_V,		0, 0},
+	{__HW_ID(5, 6),	DRM_FORMAT_NV12,	RICH,		Flip_H_V,		0, 0},
+	{__HW_ID(5, 6),	DRM_FORMAT_YUV420_8BIT,	RICH,		Rot_ALL_H_V,	LYT_NM, AFB_TH}, /* afbc */
+	{__HW_ID(5, 7),	DRM_FORMAT_YUV420,	RICH,		Flip_H_V,		0, 0},
 	/* YUV 10bit*/
-	{__HW_ID(6, 0),	0,/*XVYU2101010*/	1,	0,		0,			0, 0},/* VYV30 unsupported */
-	{__HW_ID(6, 6),	0/*DRM_FORMAT_X0L2*/,	2,	RICH,		Flip_H_V,		0, 0},
-	{__HW_ID(6, 7),	0/*DRM_FORMAT_P010*/,	1,	RICH,		Flip_H_V,		0, 0},
-	{__HW_ID(6, 7),	0/*DRM_FORMAT_YUV420_10BIT*/, 1,	RICH,	Rot_ALL_H_V,	LYT_NM, AFB_TH},
+	{__HW_ID(6, 6),	DRM_FORMAT_X0L2,	RICH,		Flip_H_V,		0, 0},
+	{__HW_ID(6, 7),	DRM_FORMAT_P010,	RICH,		Flip_H_V,		0, 0},
+	{__HW_ID(6, 7),	DRM_FORMAT_YUV420_10BIT, RICH,		Rot_ALL_H_V,	LYT_NM, AFB_TH},
 };
 
+static bool d71_format_mod_supported(const struct komeda_format_caps *caps,
+				     u32 layer_type, u64 modifier, u32 rot)
+{
+	uint64_t layout = modifier & AFBC_FORMAT_MOD_BLOCK_SIZE_MASK;
+
+	if ((layout == AFBC_FORMAT_MOD_BLOCK_SIZE_32x8) &&
+	    drm_rotation_90_or_270(rot)) {
+		DRM_DEBUG_ATOMIC("D71 doesn't support ROT90 for WB-AFBC.\n");
+		return false;
+	}
+
+	return true;
+}
+
 static void d71_init_fmt_tbl(struct komeda_dev *mdev)
 {
 	struct komeda_format_caps_table *table = &mdev->fmt_tbl;
 
 	table->format_caps = d71_format_caps_table;
+	table->format_mod_supported = d71_format_mod_supported;
 	table->n_formats = ARRAY_SIZE(d71_format_caps_table);
 }
 
+static int d71_connect_iommu(struct komeda_dev *mdev)
+{
+	struct d71_dev *d71 = mdev->chip_data;
+	u32 __iomem *reg = d71->gcu_addr;
+	u32 check_bits = (d71->num_pipelines == 2) ?
+			 GCU_STATUS_TCS0 | GCU_STATUS_TCS1 : GCU_STATUS_TCS0;
+	int i, ret;
+
+	if (!d71->integrates_tbu)
+		return -1;
+
+	malidp_write32_mask(reg, BLK_CONTROL, 0x7, TBU_CONNECT_MODE);
+
+	ret = dp_wait_cond(has_bits(check_bits, malidp_read32(reg, BLK_STATUS)),
+			100, 1000, 1000);
+	if (ret < 0) {
+		DRM_ERROR("timed out connecting to TCU!\n");
+		malidp_write32_mask(reg, BLK_CONTROL, 0x7, INACTIVE_MODE);
+		return ret;
+	}
+
+	for (i = 0; i < d71->num_pipelines; i++)
+		malidp_write32_mask(d71->pipes[i]->lpu_addr, LPU_TBU_CONTROL,
+				    LPU_TBU_CTRL_TLBPEN, LPU_TBU_CTRL_TLBPEN);
+	return 0;
+}
+
+static int d71_disconnect_iommu(struct komeda_dev *mdev)
+{
+	struct d71_dev *d71 = mdev->chip_data;
+	u32 __iomem *reg = d71->gcu_addr;
+	u32 check_bits = (d71->num_pipelines == 2) ?
+			 GCU_STATUS_TCS0 | GCU_STATUS_TCS1 : GCU_STATUS_TCS0;
+	int ret;
+
+	malidp_write32_mask(reg, BLK_CONTROL, 0x7, TBU_DISCONNECT_MODE);
+
+	ret = dp_wait_cond(((malidp_read32(reg, BLK_STATUS) & check_bits) == 0),
+			100, 1000, 1000);
+	if (ret < 0) {
+		DRM_ERROR("timed out disconnecting from TCU!\n");
+		malidp_write32_mask(reg, BLK_CONTROL, 0x7, INACTIVE_MODE);
+	}
+
+	return ret;
+}
+
 static const struct komeda_dev_funcs d71_chip_funcs = {
 	.init_format_table = d71_init_fmt_tbl,
 	.enum_resources	= d71_enum_resources,
@@ -512,6 +570,8 @@ static const struct komeda_dev_funcs d71_chip_funcs = {
 	.on_off_vblank	= d71_on_off_vblank,
 	.change_opmode	= d71_change_opmode,
 	.flush		= d71_flush,
+	.connect_iommu	= d71_connect_iommu,
+	.disconnect_iommu = d71_disconnect_iommu,
 };
 
 const struct komeda_dev_funcs *
diff --git a/drivers/gpu/drm/arm/display/komeda/d71/d71_dev.h b/drivers/gpu/drm/arm/display/komeda/d71/d71_dev.h
index 7465c57d9774..84f1878b647d 100644
--- a/drivers/gpu/drm/arm/display/komeda/d71/d71_dev.h
+++ b/drivers/gpu/drm/arm/display/komeda/d71/d71_dev.h
@@ -43,6 +43,8 @@ struct d71_dev {
 
 #define to_d71_pipeline(x)	container_of(x, struct d71_pipeline, base)
 
+extern const struct komeda_pipeline_funcs d71_pipeline_funcs;
+
 int d71_probe_block(struct d71_dev *d71,
 		    struct block_header *blk, u32 __iomem *reg);
 void d71_read_block_header(u32 __iomem *reg, struct block_header *blk);
diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_color_mgmt.c b/drivers/gpu/drm/arm/display/komeda/komeda_color_mgmt.c
new file mode 100644
index 000000000000..9d14a92dbb17
--- /dev/null
+++ b/drivers/gpu/drm/arm/display/komeda/komeda_color_mgmt.c
@@ -0,0 +1,67 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * (C) COPYRIGHT 2019 ARM Limited. All rights reserved.
+ * Author: James.Qian.Wang <james.qian.wang@arm.com>
+ *
+ */
+
+#include "komeda_color_mgmt.h"
+
+/* 10bit precision YUV2RGB matrix */
+static const s32 yuv2rgb_bt601_narrow[KOMEDA_N_YUV2RGB_COEFFS] = {
+	1192,    0, 1634,
+	1192, -401, -832,
+	1192, 2066,    0,
+	  64,  512,  512
+};
+
+static const s32 yuv2rgb_bt601_wide[KOMEDA_N_YUV2RGB_COEFFS] = {
+	1024,    0, 1436,
+	1024, -352, -731,
+	1024, 1815,    0,
+	   0,  512,  512
+};
+
+static const s32 yuv2rgb_bt709_narrow[KOMEDA_N_YUV2RGB_COEFFS] = {
+	1192,    0, 1836,
+	1192, -218, -546,
+	1192, 2163,    0,
+	  64,  512,  512
+};
+
+static const s32 yuv2rgb_bt709_wide[KOMEDA_N_YUV2RGB_COEFFS] = {
+	1024,    0, 1613,
+	1024, -192, -479,
+	1024, 1900,    0,
+	   0,  512,  512
+};
+
+static const s32 yuv2rgb_bt2020[KOMEDA_N_YUV2RGB_COEFFS] = {
+	1024,    0, 1476,
+	1024, -165, -572,
+	1024, 1884,    0,
+	   0,  512,  512
+};
+
+const s32 *komeda_select_yuv2rgb_coeffs(u32 color_encoding, u32 color_range)
+{
+	bool narrow = color_range == DRM_COLOR_YCBCR_LIMITED_RANGE;
+	const s32 *coeffs;
+
+	switch (color_encoding) {
+	case DRM_COLOR_YCBCR_BT709:
+		coeffs = narrow ? yuv2rgb_bt709_narrow : yuv2rgb_bt709_wide;
+		break;
+	case DRM_COLOR_YCBCR_BT601:
+		coeffs = narrow ? yuv2rgb_bt601_narrow : yuv2rgb_bt601_wide;
+		break;
+	case DRM_COLOR_YCBCR_BT2020:
+		coeffs = yuv2rgb_bt2020;
+		break;
+	default:
+		coeffs = NULL;
+		break;
+	}
+
+	return coeffs;
+}
diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_color_mgmt.h b/drivers/gpu/drm/arm/display/komeda/komeda_color_mgmt.h
new file mode 100644
index 000000000000..a2df218f58e7
--- /dev/null
+++ b/drivers/gpu/drm/arm/display/komeda/komeda_color_mgmt.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * (C) COPYRIGHT 2019 ARM Limited. All rights reserved.
+ * Author: James.Qian.Wang <james.qian.wang@arm.com>
+ *
+ */
+
+#ifndef _KOMEDA_COLOR_MGMT_H_
+#define _KOMEDA_COLOR_MGMT_H_
+
+#include <drm/drm_color_mgmt.h>
+
+#define KOMEDA_N_YUV2RGB_COEFFS		12
+
+const s32 *komeda_select_yuv2rgb_coeffs(u32 color_encoding, u32 color_range);
+
+#endif
diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_crtc.c b/drivers/gpu/drm/arm/display/komeda/komeda_crtc.c
index 284ce079d8c4..3f222f464eb2 100644
--- a/drivers/gpu/drm/arm/display/komeda/komeda_crtc.c
+++ b/drivers/gpu/drm/arm/display/komeda/komeda_crtc.c
@@ -18,6 +18,21 @@
 #include "komeda_dev.h"
 #include "komeda_kms.h"
 
+static void komeda_crtc_update_clock_ratio(struct komeda_crtc_state *kcrtc_st)
+{
+	u64 pxlclk, aclk;
+
+	if (!kcrtc_st->base.active) {
+		kcrtc_st->clock_ratio = 0;
+		return;
+	}
+
+	pxlclk = kcrtc_st->base.adjusted_mode.clock * 1000;
+	aclk = komeda_calc_aclk(kcrtc_st);
+
+	kcrtc_st->clock_ratio = div64_u64(aclk << 32, pxlclk);
+}
+
 /**
  * komeda_crtc_atomic_check - build display output data flow
  * @crtc: DRM crtc
@@ -38,6 +53,9 @@ komeda_crtc_atomic_check(struct drm_crtc *crtc,
 	struct komeda_crtc_state *kcrtc_st = to_kcrtc_st(state);
 	int err;
 
+	if (drm_atomic_crtc_needs_modeset(state))
+		komeda_crtc_update_clock_ratio(kcrtc_st);
+
 	if (state->active) {
 		err = komeda_build_display_data_flow(kcrtc, kcrtc_st);
 		if (err)
@@ -45,6 +63,10 @@ komeda_crtc_atomic_check(struct drm_crtc *crtc,
 	}
 
 	/* release unclaimed pipeline resources */
+	err = komeda_release_unclaimed_resources(kcrtc->slave, kcrtc_st);
+	if (err)
+		return err;
+
 	err = komeda_release_unclaimed_resources(kcrtc->master, kcrtc_st);
 	if (err)
 		return err;
@@ -52,11 +74,12 @@ komeda_crtc_atomic_check(struct drm_crtc *crtc,
 	return 0;
 }
 
-static u32 komeda_calc_mclk(struct komeda_crtc_state *kcrtc_st)
+unsigned long komeda_calc_aclk(struct komeda_crtc_state *kcrtc_st)
 {
-	unsigned long mclk = kcrtc_st->base.adjusted_mode.clock * 1000;
+	struct komeda_dev *mdev = kcrtc_st->base.crtc->dev->dev_private;
+	unsigned long pxlclk = kcrtc_st->base.adjusted_mode.clock;
 
-	return mclk;
+	return clk_round_rate(mdev->aclk, pxlclk * 1000);
 }
 
 /* For active a crtc, mainly need two parts of preparation
@@ -89,23 +112,20 @@ komeda_crtc_prepare(struct komeda_crtc *kcrtc)
 	}
 
 	mdev->dpmode = new_mode;
-	/* Only need to enable mclk on single display mode, but no need to
-	 * enable mclk it on dual display mode, since the dual mode always
-	 * switch from single display mode, the mclk already enabled, no need
+	/* Only need to enable aclk on single display mode, but no need to
+	 * enable aclk it on dual display mode, since the dual mode always
+	 * switch from single display mode, the aclk already enabled, no need
 	 * to enable it again.
 	 */
 	if (new_mode != KOMEDA_MODE_DUAL_DISP) {
-		err = clk_set_rate(mdev->mclk, komeda_calc_mclk(kcrtc_st));
+		err = clk_set_rate(mdev->aclk, komeda_calc_aclk(kcrtc_st));
 		if (err)
-			DRM_ERROR("failed to set mclk.\n");
-		err = clk_prepare_enable(mdev->mclk);
+			DRM_ERROR("failed to set aclk.\n");
+		err = clk_prepare_enable(mdev->aclk);
 		if (err)
-			DRM_ERROR("failed to enable mclk.\n");
+			DRM_ERROR("failed to enable aclk.\n");
 	}
 
-	err = clk_prepare_enable(master->aclk);
-	if (err)
-		DRM_ERROR("failed to enable axi clk for pipe%d.\n", master->id);
 	err = clk_set_rate(master->pxlclk, pxlclk_rate);
 	if (err)
 		DRM_ERROR("failed to set pxlclk for pipe%d\n", master->id);
@@ -146,9 +166,8 @@ komeda_crtc_unprepare(struct komeda_crtc *kcrtc)
 	mdev->dpmode = new_mode;
 
 	clk_disable_unprepare(master->pxlclk);
-	clk_disable_unprepare(master->aclk);
 	if (new_mode == KOMEDA_MODE_INACTIVE)
-		clk_disable_unprepare(mdev->mclk);
+		clk_disable_unprepare(mdev->aclk);
 
 unlock:
 	mutex_unlock(&mdev->lock);
@@ -165,6 +184,15 @@ void komeda_crtc_handle_event(struct komeda_crtc   *kcrtc,
 	if (events & KOMEDA_EVENT_VSYNC)
 		drm_crtc_handle_vblank(crtc);
 
+	if (events & KOMEDA_EVENT_EOW) {
+		struct komeda_wb_connector *wb_conn = kcrtc->wb_conn;
+
+		if (wb_conn)
+			drm_writeback_signal_completion(&wb_conn->base, 0);
+		else
+			DRM_WARN("CRTC[%d]: EOW happen but no wb_connector.\n",
+				 drm_crtc_index(&kcrtc->base));
+	}
 	/* will handle it together with the write back support */
 	if (events & KOMEDA_EVENT_EOW)
 		DRM_DEBUG("EOW.\n");
@@ -201,6 +229,9 @@ komeda_crtc_do_flush(struct drm_crtc *crtc,
 	struct komeda_crtc_state *kcrtc_st = to_kcrtc_st(crtc->state);
 	struct komeda_dev *mdev = kcrtc->base.dev->dev_private;
 	struct komeda_pipeline *master = kcrtc->master;
+	struct komeda_pipeline *slave = kcrtc->slave;
+	struct komeda_wb_connector *wb_conn = kcrtc->wb_conn;
+	struct drm_connector_state *conn_st;
 
 	DRM_DEBUG_ATOMIC("CRTC%d_FLUSH: active_pipes: 0x%x, affected: 0x%x.\n",
 			 drm_crtc_index(crtc),
@@ -210,6 +241,13 @@ komeda_crtc_do_flush(struct drm_crtc *crtc,
 	if (has_bit(master->id, kcrtc_st->affected_pipes))
 		komeda_pipeline_update(master, old->state);
 
+	if (slave && has_bit(slave->id, kcrtc_st->affected_pipes))
+		komeda_pipeline_update(slave, old->state);
+
+	conn_st = wb_conn ? wb_conn->base.base.state : NULL;
+	if (conn_st && conn_st->writeback_job)
+		drm_writeback_queue_job(&wb_conn->base, conn_st);
+
 	/* step 2: notify the HW to kickoff the update */
 	mdev->funcs->flush(mdev, master->id, kcrtc_st->active_pipes);
 }
@@ -231,6 +269,7 @@ komeda_crtc_atomic_disable(struct drm_crtc *crtc,
 	struct komeda_crtc_state *old_st = to_kcrtc_st(old);
 	struct komeda_dev *mdev = crtc->dev->dev_private;
 	struct komeda_pipeline *master = kcrtc->master;
+	struct komeda_pipeline *slave  = kcrtc->slave;
 	struct completion *disable_done = &crtc->state->commit->flip_done;
 	struct completion temp;
 	int timeout;
@@ -239,6 +278,9 @@ komeda_crtc_atomic_disable(struct drm_crtc *crtc,
 			 drm_crtc_index(crtc),
 			 old_st->active_pipes, old_st->affected_pipes);
 
+	if (slave && has_bit(slave->id, old_st->active_pipes))
+		komeda_pipeline_disable(slave, old->state);
+
 	if (has_bit(master->id, old_st->active_pipes))
 		komeda_pipeline_disable(master, old->state);
 
@@ -311,7 +353,6 @@ komeda_crtc_mode_valid(struct drm_crtc *crtc, const struct drm_display_mode *m)
 	if (m->flags & DRM_MODE_FLAG_INTERLACE)
 		return MODE_NO_INTERLACE;
 
-	/* main clock/AXI clk must be faster than pxlclk*/
 	mode_clk = m->clock * 1000;
 	pxlclk = clk_round_rate(master->pxlclk, mode_clk);
 	if (pxlclk != mode_clk) {
@@ -320,15 +361,9 @@ komeda_crtc_mode_valid(struct drm_crtc *crtc, const struct drm_display_mode *m)
 		return MODE_NOCLOCK;
 	}
 
-	if (clk_round_rate(mdev->mclk, mode_clk) < pxlclk) {
-		DRM_DEBUG_ATOMIC("mclk can't satisfy the requirement of %s-clk: %ld.\n",
-				 m->name, pxlclk);
-
-		return MODE_CLOCK_HIGH;
-	}
-
-	if (clk_round_rate(master->aclk, mode_clk) < pxlclk) {
-		DRM_DEBUG_ATOMIC("aclk can't satisfy the requirement of %s-clk: %ld.\n",
+	/* main engine clock must be faster than pxlclk*/
+	if (clk_round_rate(mdev->aclk, mode_clk) < pxlclk) {
+		DRM_DEBUG_ATOMIC("engine clk can't satisfy the requirement of %s-clk: %ld.\n",
 				 m->name, pxlclk);
 
 		return MODE_CLOCK_HIGH;
@@ -389,6 +424,8 @@ komeda_crtc_atomic_duplicate_state(struct drm_crtc *crtc)
 	__drm_atomic_helper_crtc_duplicate_state(crtc, &new->base);
 
 	new->affected_pipes = old->active_pipes;
+	new->clock_ratio = old->clock_ratio;
+	new->max_slave_zorder = old->max_slave_zorder;
 
 	return &new->base;
 }
@@ -417,6 +454,24 @@ static void komeda_crtc_vblank_disable(struct drm_crtc *crtc)
 	mdev->funcs->on_off_vblank(mdev, kcrtc->master->id, false);
 }
 
+static int
+komeda_crtc_atomic_get_property(struct drm_crtc *crtc,
+				const struct drm_crtc_state *state,
+				struct drm_property *property, uint64_t *val)
+{
+	struct komeda_crtc *kcrtc = to_kcrtc(crtc);
+	struct komeda_crtc_state *kcrtc_st = to_kcrtc_st(state);
+
+	if (property == kcrtc->clock_ratio_property) {
+		*val = kcrtc_st->clock_ratio;
+	} else {
+		DRM_DEBUG_DRIVER("Unknown property %s\n", property->name);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 static const struct drm_crtc_funcs komeda_crtc_funcs = {
 	.gamma_set		= drm_atomic_helper_legacy_gamma_set,
 	.destroy		= drm_crtc_cleanup,
@@ -427,6 +482,7 @@ static const struct drm_crtc_funcs komeda_crtc_funcs = {
 	.atomic_destroy_state	= komeda_crtc_atomic_destroy_state,
 	.enable_vblank		= komeda_crtc_vblank_enable,
 	.disable_vblank		= komeda_crtc_vblank_disable,
+	.atomic_get_property	= komeda_crtc_atomic_get_property,
 };
 
 int komeda_kms_setup_crtcs(struct komeda_kms_dev *kms,
@@ -444,7 +500,7 @@ int komeda_kms_setup_crtcs(struct komeda_kms_dev *kms,
 		master = mdev->pipelines[i];
 
 		crtc->master = master;
-		crtc->slave  = NULL;
+		crtc->slave  = komeda_pipeline_get_slave(master);
 
 		if (crtc->slave)
 			sprintf(str, "pipe-%d", crtc->slave->id);
@@ -462,6 +518,42 @@ int komeda_kms_setup_crtcs(struct komeda_kms_dev *kms,
 	return 0;
 }
 
+static int komeda_crtc_create_clock_ratio_property(struct komeda_crtc *kcrtc)
+{
+	struct drm_crtc *crtc = &kcrtc->base;
+	struct drm_property *prop;
+
+	prop = drm_property_create_range(crtc->dev, DRM_MODE_PROP_ATOMIC,
+					 "CLOCK_RATIO", 0, U64_MAX);
+	if (!prop)
+		return -ENOMEM;
+
+	drm_object_attach_property(&crtc->base, prop, 0);
+	kcrtc->clock_ratio_property = prop;
+
+	return 0;
+}
+
+static int komeda_crtc_create_slave_planes_property(struct komeda_crtc *kcrtc)
+{
+	struct drm_crtc *crtc = &kcrtc->base;
+	struct drm_property *prop;
+
+	if (kcrtc->slave_planes == 0)
+		return 0;
+
+	prop = drm_property_create_range(crtc->dev, DRM_MODE_PROP_IMMUTABLE,
+					 "slave_planes", 0, U32_MAX);
+	if (!prop)
+		return -ENOMEM;
+
+	drm_object_attach_property(&crtc->base, prop, kcrtc->slave_planes);
+
+	kcrtc->slave_planes_property = prop;
+
+	return 0;
+}
+
 static struct drm_plane *
 get_crtc_primary(struct komeda_kms_dev *kms, struct komeda_crtc *crtc)
 {
@@ -498,7 +590,15 @@ static int komeda_crtc_add(struct komeda_kms_dev *kms,
 
 	crtc->port = kcrtc->master->of_output_port;
 
-	return 0;
+	err = komeda_crtc_create_clock_ratio_property(kcrtc);
+	if (err)
+		return err;
+
+	err = komeda_crtc_create_slave_planes_property(kcrtc);
+	if (err)
+		return err;
+
+	return err;
 }
 
 int komeda_kms_add_crtcs(struct komeda_kms_dev *kms, struct komeda_dev *mdev)
diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_dev.c b/drivers/gpu/drm/arm/display/komeda/komeda_dev.c
index b67030a9f056..5a118984de33 100644
--- a/drivers/gpu/drm/arm/display/komeda/komeda_dev.c
+++ b/drivers/gpu/drm/arm/display/komeda/komeda_dev.c
@@ -5,6 +5,7 @@
  *
  */
 #include <linux/io.h>
+#include <linux/iommu.h>
 #include <linux/of_device.h>
 #include <linux/of_graph.h>
 #include <linux/platform_device.h>
@@ -52,9 +53,6 @@ static void komeda_debugfs_init(struct komeda_dev *mdev)
 		return;
 
 	mdev->debugfs_root = debugfs_create_dir("komeda", NULL);
-	if (IS_ERR_OR_NULL(mdev->debugfs_root))
-		return;
-
 	debugfs_create_file("register", 0444, mdev->debugfs_root,
 			    mdev, &komeda_register_fops);
 }
@@ -115,13 +113,6 @@ static int komeda_parse_pipe_dt(struct komeda_dev *mdev, struct device_node *np)
 
 	pipe = mdev->pipelines[pipe_id];
 
-	clk = of_clk_get_by_name(np, "aclk");
-	if (IS_ERR(clk)) {
-		DRM_ERROR("get aclk for pipeline %d failed!\n", pipe_id);
-		return PTR_ERR(clk);
-	}
-	pipe->aclk = clk;
-
 	clk = of_clk_get_by_name(np, "pxclk");
 	if (IS_ERR(clk)) {
 		DRM_ERROR("get pxclk for pipeline %d failed!\n", pipe_id);
@@ -144,14 +135,8 @@ static int komeda_parse_dt(struct device *dev, struct komeda_dev *mdev)
 {
 	struct platform_device *pdev = to_platform_device(dev);
 	struct device_node *child, *np = dev->of_node;
-	struct clk *clk;
 	int ret;
 
-	clk = devm_clk_get(dev, "mclk");
-	if (IS_ERR(clk))
-		return PTR_ERR(clk);
-
-	mdev->mclk = clk;
 	mdev->irq  = platform_get_irq(pdev, 0);
 	if (mdev->irq < 0) {
 		DRM_ERROR("could not get IRQ number.\n");
@@ -205,16 +190,15 @@ struct komeda_dev *komeda_dev_create(struct device *dev)
 		goto err_cleanup;
 	}
 
-	mdev->pclk = devm_clk_get(dev, "pclk");
-	if (IS_ERR(mdev->pclk)) {
-		DRM_ERROR("Get APB clk failed.\n");
-		err = PTR_ERR(mdev->pclk);
-		mdev->pclk = NULL;
+	mdev->aclk = devm_clk_get(dev, "aclk");
+	if (IS_ERR(mdev->aclk)) {
+		DRM_ERROR("Get engine clk failed.\n");
+		err = PTR_ERR(mdev->aclk);
+		mdev->aclk = NULL;
 		goto err_cleanup;
 	}
 
-	/* Enable APB clock to access the registers */
-	clk_prepare_enable(mdev->pclk);
+	clk_prepare_enable(mdev->aclk);
 
 	mdev->funcs = product->identify(mdev->reg_base, &mdev->chip);
 	if (!komeda_product_match(mdev, product->product_id)) {
@@ -253,6 +237,18 @@ struct komeda_dev *komeda_dev_create(struct device *dev)
 	dev->dma_parms = &mdev->dma_parms;
 	dma_set_max_seg_size(dev, DMA_BIT_MASK(32));
 
+	mdev->iommu = iommu_get_domain_for_dev(mdev->dev);
+	if (!mdev->iommu)
+		DRM_INFO("continue without IOMMU support!\n");
+
+	if (mdev->iommu && mdev->funcs->connect_iommu) {
+		err = mdev->funcs->connect_iommu(mdev);
+		if (err) {
+			mdev->iommu = NULL;
+			goto err_cleanup;
+		}
+	}
+
 	err = sysfs_create_group(&dev->kobj, &komeda_sysfs_attr_group);
 	if (err) {
 		DRM_ERROR("create sysfs group failed.\n");
@@ -282,6 +278,10 @@ void komeda_dev_destroy(struct komeda_dev *mdev)
 	debugfs_remove_recursive(mdev->debugfs_root);
 #endif
 
+	if (mdev->iommu && mdev->funcs->disconnect_iommu)
+		mdev->funcs->disconnect_iommu(mdev);
+	mdev->iommu = NULL;
+
 	for (i = 0; i < mdev->n_pipelines; i++) {
 		komeda_pipeline_destroy(mdev, mdev->pipelines[i]);
 		mdev->pipelines[i] = NULL;
@@ -297,15 +297,10 @@ void komeda_dev_destroy(struct komeda_dev *mdev)
 		mdev->reg_base = NULL;
 	}
 
-	if (mdev->mclk) {
-		devm_clk_put(dev, mdev->mclk);
-		mdev->mclk = NULL;
-	}
-
-	if (mdev->pclk) {
-		clk_disable_unprepare(mdev->pclk);
-		devm_clk_put(dev, mdev->pclk);
-		mdev->pclk = NULL;
+	if (mdev->aclk) {
+		clk_disable_unprepare(mdev->aclk);
+		devm_clk_put(dev, mdev->aclk);
+		mdev->aclk = NULL;
 	}
 
 	devm_kfree(dev, mdev);
diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_dev.h b/drivers/gpu/drm/arm/display/komeda/komeda_dev.h
index 973fd5e0eb98..d1c86b6174c8 100644
--- a/drivers/gpu/drm/arm/display/komeda/komeda_dev.h
+++ b/drivers/gpu/drm/arm/display/komeda/komeda_dev.h
@@ -92,6 +92,10 @@ struct komeda_dev_funcs {
 	int (*enum_resources)(struct komeda_dev *mdev);
 	/** @cleanup: call to chip to cleanup komeda_dev->chip data */
 	void (*cleanup)(struct komeda_dev *mdev);
+	/** @connect_iommu: Optional, connect to external iommu */
+	int (*connect_iommu)(struct komeda_dev *mdev);
+	/** @disconnect_iommu: Optional, disconnect to external iommu */
+	int (*disconnect_iommu)(struct komeda_dev *mdev);
 	/**
 	 * @irq_handler:
 	 *
@@ -156,10 +160,8 @@ struct komeda_dev {
 	struct komeda_chip_info chip;
 	/** @fmt_tbl: initialized by &komeda_dev_funcs->init_format_table */
 	struct komeda_format_caps_table fmt_tbl;
-	/** @pclk: APB clock for register access */
-	struct clk *pclk;
-	/** @mclk: HW main engine clk */
-	struct clk *mclk;
+	/** @aclk: HW main engine clk */
+	struct clk *aclk;
 
 	/** @irq: irq number */
 	int irq;
@@ -184,6 +186,9 @@ struct komeda_dev {
 	 */
 	void *chip_data;
 
+	/** @iommu: iommu domain */
+	struct iommu_domain *iommu;
+
 	/** @debugfs_root: root directory of komeda debugfs */
 	struct dentry *debugfs_root;
 };
diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_format_caps.c b/drivers/gpu/drm/arm/display/komeda/komeda_format_caps.c
index 1e17bd6107a4..cd4d9f53ddef 100644
--- a/drivers/gpu/drm/arm/display/komeda/komeda_format_caps.c
+++ b/drivers/gpu/drm/arm/display/komeda/komeda_format_caps.c
@@ -35,6 +35,64 @@ komeda_get_format_caps(struct komeda_format_caps_table *table,
 	return NULL;
 }
 
+/* Two assumptions
+ * 1. RGB always has YTR
+ * 2. Tiled RGB always has SC
+ */
+u64 komeda_supported_modifiers[] = {
+	/* AFBC_16x16 + features: YUV+RGB both */
+	AFBC_16x16(0),
+	/* SPARSE */
+	AFBC_16x16(_SPARSE),
+	/* YTR + (SPARSE) */
+	AFBC_16x16(_YTR | _SPARSE),
+	AFBC_16x16(_YTR),
+	/* SPLIT + SPARSE + YTR RGB only */
+	/* split mode is only allowed for sparse mode */
+	AFBC_16x16(_SPLIT | _SPARSE | _YTR),
+	/* TILED + (SPARSE) */
+	/* TILED YUV format only */
+	AFBC_16x16(_TILED | _SPARSE),
+	AFBC_16x16(_TILED),
+	/* TILED + SC + (SPLIT+SPARSE | SPARSE) + (YTR) */
+	AFBC_16x16(_TILED | _SC | _SPLIT | _SPARSE | _YTR),
+	AFBC_16x16(_TILED | _SC | _SPARSE | _YTR),
+	AFBC_16x16(_TILED | _SC | _YTR),
+	/* AFBC_32x8 + features: which are RGB formats only */
+	/* YTR + (SPARSE) */
+	AFBC_32x8(_YTR | _SPARSE),
+	AFBC_32x8(_YTR),
+	/* SPLIT + SPARSE + (YTR) */
+	/* split mode is only allowed for sparse mode */
+	AFBC_32x8(_SPLIT | _SPARSE | _YTR),
+	/* TILED + SC + (SPLIT+SPARSE | SPARSE) + YTR */
+	AFBC_32x8(_TILED | _SC | _SPLIT | _SPARSE | _YTR),
+	AFBC_32x8(_TILED | _SC | _SPARSE | _YTR),
+	AFBC_32x8(_TILED | _SC | _YTR),
+	DRM_FORMAT_MOD_LINEAR,
+	DRM_FORMAT_MOD_INVALID
+};
+
+bool komeda_format_mod_supported(struct komeda_format_caps_table *table,
+				 u32 layer_type, u32 fourcc, u64 modifier,
+				 u32 rot)
+{
+	const struct komeda_format_caps *caps;
+
+	caps = komeda_get_format_caps(table, fourcc, modifier);
+	if (!caps)
+		return false;
+
+	if (!(caps->supported_layer_types & layer_type))
+		return false;
+
+	if (table->format_mod_supported)
+		return table->format_mod_supported(caps, layer_type, modifier,
+						   rot);
+
+	return true;
+}
+
 u32 *komeda_get_layer_fourcc_list(struct komeda_format_caps_table *table,
 				  u32 layer_type, u32 *n_fmts)
 {
diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_format_caps.h b/drivers/gpu/drm/arm/display/komeda/komeda_format_caps.h
index 60f39e77b098..3631910d33b5 100644
--- a/drivers/gpu/drm/arm/display/komeda/komeda_format_caps.h
+++ b/drivers/gpu/drm/arm/display/komeda/komeda_format_caps.h
@@ -50,7 +50,6 @@
  *
  * @hw_id: hw format id, hw specific value.
  * @fourcc: drm fourcc format.
- * @tile_size: format tiled size, used by ARM format X0L0/X0L2
  * @supported_layer_types: indicate which layer supports this format
  * @supported_rots: allowed rotations for this format
  * @supported_afbc_layouts: supported afbc layerout
@@ -59,7 +58,6 @@
 struct komeda_format_caps {
 	u32 hw_id;
 	u32 fourcc;
-	u32 tile_size;
 	u32 supported_layer_types;
 	u32 supported_rots;
 	u32 supported_afbc_layouts;
@@ -71,12 +69,30 @@ struct komeda_format_caps {
  *
  * @n_formats: the size of format_caps list.
  * @format_caps: format_caps list.
+ * @format_mod_supported: Optional. Some HW may have special requirements or
+ * limitations which can not be described by format_caps, this func supply HW
+ * the ability to do the further HW specific check.
  */
 struct komeda_format_caps_table {
 	u32 n_formats;
 	const struct komeda_format_caps *format_caps;
+	bool (*format_mod_supported)(const struct komeda_format_caps *caps,
+				     u32 layer_type, u64 modifier, u32 rot);
 };
 
+extern u64 komeda_supported_modifiers[];
+
+static inline const char *komeda_get_format_name(u32 fourcc, u64 modifier)
+{
+	struct drm_format_name_buf buf;
+	static char name[64];
+
+	snprintf(name, sizeof(name), "%s with modifier: 0x%llx.",
+		 drm_get_format_name(fourcc, &buf), modifier);
+
+	return name;
+}
+
 const struct komeda_format_caps *
 komeda_get_format_caps(struct komeda_format_caps_table *table,
 		       u32 fourcc, u64 modifier);
@@ -86,4 +102,8 @@ u32 *komeda_get_layer_fourcc_list(struct komeda_format_caps_table *table,
 
 void komeda_put_fourcc_list(u32 *fourcc_list);
 
+bool komeda_format_mod_supported(struct komeda_format_caps_table *table,
+				 u32 layer_type, u32 fourcc, u64 modifier,
+				 u32 rot);
+
 #endif
diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_framebuffer.c b/drivers/gpu/drm/arm/display/komeda/komeda_framebuffer.c
index 9cc9935024f7..3b0a70ed6aa0 100644
--- a/drivers/gpu/drm/arm/display/komeda/komeda_framebuffer.c
+++ b/drivers/gpu/drm/arm/display/komeda/komeda_framebuffer.c
@@ -37,51 +37,111 @@ static const struct drm_framebuffer_funcs komeda_fb_funcs = {
 };
 
 static int
+komeda_fb_afbc_size_check(struct komeda_fb *kfb, struct drm_file *file,
+			  const struct drm_mode_fb_cmd2 *mode_cmd)
+{
+	struct drm_framebuffer *fb = &kfb->base;
+	const struct drm_format_info *info = fb->format;
+	struct drm_gem_object *obj;
+	u32 alignment_w = 0, alignment_h = 0, alignment_header, n_blocks;
+	u64 min_size;
+
+	obj = drm_gem_object_lookup(file, mode_cmd->handles[0]);
+	if (!obj) {
+		DRM_DEBUG_KMS("Failed to lookup GEM object\n");
+		return -ENOENT;
+	}
+
+	switch (fb->modifier & AFBC_FORMAT_MOD_BLOCK_SIZE_MASK) {
+	case AFBC_FORMAT_MOD_BLOCK_SIZE_32x8:
+		alignment_w = 32;
+		alignment_h = 8;
+		break;
+	case AFBC_FORMAT_MOD_BLOCK_SIZE_16x16:
+		alignment_w = 16;
+		alignment_h = 16;
+		break;
+	default:
+		WARN(1, "Invalid AFBC_FORMAT_MOD_BLOCK_SIZE: %lld.\n",
+		     fb->modifier & AFBC_FORMAT_MOD_BLOCK_SIZE_MASK);
+		break;
+	}
+
+	/* tiled header afbc */
+	if (fb->modifier & AFBC_FORMAT_MOD_TILED) {
+		alignment_w *= AFBC_TH_LAYOUT_ALIGNMENT;
+		alignment_h *= AFBC_TH_LAYOUT_ALIGNMENT;
+		alignment_header = AFBC_TH_BODY_START_ALIGNMENT;
+	} else {
+		alignment_header = AFBC_BODY_START_ALIGNMENT;
+	}
+
+	kfb->aligned_w = ALIGN(fb->width, alignment_w);
+	kfb->aligned_h = ALIGN(fb->height, alignment_h);
+
+	if (fb->offsets[0] % alignment_header) {
+		DRM_DEBUG_KMS("afbc offset alignment check failed.\n");
+		goto check_failed;
+	}
+
+	n_blocks = (kfb->aligned_w * kfb->aligned_h) / AFBC_SUPERBLK_PIXELS;
+	kfb->offset_payload = ALIGN(n_blocks * AFBC_HEADER_SIZE,
+				    alignment_header);
+
+	kfb->afbc_size = kfb->offset_payload + n_blocks *
+			 ALIGN(info->cpp[0] * AFBC_SUPERBLK_PIXELS,
+			       AFBC_SUPERBLK_ALIGNMENT);
+	min_size = kfb->afbc_size + fb->offsets[0];
+	if (min_size > obj->size) {
+		DRM_DEBUG_KMS("afbc size check failed, obj_size: 0x%zx. min_size 0x%llx.\n",
+			      obj->size, min_size);
+		goto check_failed;
+	}
+
+	fb->obj[0] = obj;
+	return 0;
+
+check_failed:
+	drm_gem_object_put_unlocked(obj);
+	return -EINVAL;
+}
+
+static int
 komeda_fb_none_afbc_size_check(struct komeda_dev *mdev, struct komeda_fb *kfb,
 			       struct drm_file *file,
 			       const struct drm_mode_fb_cmd2 *mode_cmd)
 {
 	struct drm_framebuffer *fb = &kfb->base;
+	const struct drm_format_info *info = fb->format;
 	struct drm_gem_object *obj;
-	u32 min_size = 0;
-	u32 i;
+	u32 i, block_h;
+	u64 min_size;
+
+	if (komeda_fb_check_src_coords(kfb, 0, 0, fb->width, fb->height))
+		return -EINVAL;
 
-	for (i = 0; i < fb->format->num_planes; i++) {
+	for (i = 0; i < info->num_planes; i++) {
 		obj = drm_gem_object_lookup(file, mode_cmd->handles[i]);
 		if (!obj) {
 			DRM_DEBUG_KMS("Failed to lookup GEM object\n");
-			fb->obj[i] = NULL;
-
 			return -ENOENT;
 		}
+		fb->obj[i] = obj;
 
-		kfb->aligned_w = fb->width / (i ? fb->format->hsub : 1);
-		kfb->aligned_h = fb->height / (i ? fb->format->vsub : 1);
-
-		if (fb->pitches[i] % mdev->chip.bus_width) {
+		block_h = drm_format_info_block_height(info, i);
+		if ((fb->pitches[i] * block_h) % mdev->chip.bus_width) {
 			DRM_DEBUG_KMS("Pitch[%d]: 0x%x doesn't align to 0x%x\n",
 				      i, fb->pitches[i], mdev->chip.bus_width);
-			drm_gem_object_put_unlocked(obj);
-			fb->obj[i] = NULL;
-
 			return -EINVAL;
 		}
 
-		min_size = ((kfb->aligned_h / kfb->format_caps->tile_size - 1)
-			    * fb->pitches[i])
-			    + (kfb->aligned_w * fb->format->cpp[i]
-			       * kfb->format_caps->tile_size)
-			    + fb->offsets[i];
-
+		min_size = komeda_fb_get_pixel_addr(kfb, 0, fb->height, i)
+			 - to_drm_gem_cma_obj(obj)->paddr;
 		if (obj->size < min_size) {
-			DRM_DEBUG_KMS("Fail to check none afbc fb size.\n");
-			drm_gem_object_put_unlocked(obj);
-			fb->obj[i] = NULL;
-
+			DRM_DEBUG_KMS("The fb->obj[%d] size: 0x%zx lower than the minimum requirement: 0x%llx.\n",
+				      i, obj->size, min_size);
 			return -EINVAL;
 		}
-
-		fb->obj[i] = obj;
 	}
 
 	if (fb->format->num_planes == 3) {
@@ -118,7 +178,10 @@ komeda_fb_create(struct drm_device *dev, struct drm_file *file,
 
 	drm_helper_mode_fill_fb_struct(dev, &kfb->base, mode_cmd);
 
-	ret = komeda_fb_none_afbc_size_check(mdev, kfb, file, mode_cmd);
+	if (kfb->base.modifier)
+		ret = komeda_fb_afbc_size_check(kfb, file, mode_cmd);
+	else
+		ret = komeda_fb_none_afbc_size_check(mdev, kfb, file, mode_cmd);
 	if (ret < 0)
 		goto err_cleanup;
 
@@ -129,6 +192,8 @@ komeda_fb_create(struct drm_device *dev, struct drm_file *file,
 		goto err_cleanup;
 	}
 
+	kfb->is_va = mdev->iommu ? true : false;
+
 	return &kfb->base;
 
 err_cleanup:
@@ -139,12 +204,42 @@ err_cleanup:
 	return ERR_PTR(ret);
 }
 
+int komeda_fb_check_src_coords(const struct komeda_fb *kfb,
+			       u32 src_x, u32 src_y, u32 src_w, u32 src_h)
+{
+	const struct drm_framebuffer *fb = &kfb->base;
+	const struct drm_format_info *info = fb->format;
+	u32 block_w = drm_format_info_block_width(fb->format, 0);
+	u32 block_h = drm_format_info_block_height(fb->format, 0);
+
+	if ((src_x + src_w > fb->width) || (src_y + src_h > fb->height)) {
+		DRM_DEBUG_ATOMIC("Invalid source coordinate.\n");
+		return -EINVAL;
+	}
+
+	if ((src_x % info->hsub) || (src_w % info->hsub) ||
+	    (src_y % info->vsub) || (src_h % info->vsub)) {
+		DRM_DEBUG_ATOMIC("Wrong subsampling dimension x:%d, y:%d, w:%d, h:%d for format: %x.\n",
+				 src_x, src_y, src_w, src_h, info->format);
+		return -EINVAL;
+	}
+
+	if ((src_x % block_w) || (src_w % block_w) ||
+	    (src_y % block_h) || (src_h % block_h)) {
+		DRM_DEBUG_ATOMIC("x:%d, y:%d, w:%d, h:%d should be multiple of block_w/h for format: %x.\n",
+				 src_x, src_y, src_w, src_h, info->format);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 dma_addr_t
 komeda_fb_get_pixel_addr(struct komeda_fb *kfb, int x, int y, int plane)
 {
 	struct drm_framebuffer *fb = &kfb->base;
 	const struct drm_gem_cma_object *obj;
-	u32 plane_x, plane_y, cpp, pitch, offset;
+	u32 offset, plane_x, plane_y, block_w, block_sz;
 
 	if (plane >= fb->format->num_planes) {
 		DRM_DEBUG_KMS("Out of max plane num.\n");
@@ -155,13 +250,33 @@ komeda_fb_get_pixel_addr(struct komeda_fb *kfb, int x, int y, int plane)
 
 	offset = fb->offsets[plane];
 	if (!fb->modifier) {
+		block_w = drm_format_info_block_width(fb->format, plane);
+		block_sz = fb->format->char_per_block[plane];
 		plane_x = x / (plane ? fb->format->hsub : 1);
 		plane_y = y / (plane ? fb->format->vsub : 1);
-		cpp = fb->format->cpp[plane];
-		pitch = fb->pitches[plane];
-		offset += plane_x * cpp *  kfb->format_caps->tile_size +
-				(plane_y * pitch) / kfb->format_caps->tile_size;
+
+		offset += (plane_x / block_w) * block_sz
+			+ plane_y * fb->pitches[plane];
 	}
 
 	return obj->paddr + offset;
 }
+
+/* if the fb can be supported by a specific layer */
+bool komeda_fb_is_layer_supported(struct komeda_fb *kfb, u32 layer_type,
+				  u32 rot)
+{
+	struct drm_framebuffer *fb = &kfb->base;
+	struct komeda_dev *mdev = fb->dev->dev_private;
+	u32 fourcc = fb->format->format;
+	u64 modifier = fb->modifier;
+	bool supported;
+
+	supported = komeda_format_mod_supported(&mdev->fmt_tbl, layer_type,
+						fourcc, modifier, rot);
+	if (!supported)
+		DRM_DEBUG_ATOMIC("Layer TYPE: %d doesn't support fb FMT: %s.\n",
+			layer_type, komeda_get_format_name(fourcc, modifier));
+
+	return supported;
+}
diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_framebuffer.h b/drivers/gpu/drm/arm/display/komeda/komeda_framebuffer.h
index ea2fe190c1e3..c61ca98a3a63 100644
--- a/drivers/gpu/drm/arm/display/komeda/komeda_framebuffer.h
+++ b/drivers/gpu/drm/arm/display/komeda/komeda_framebuffer.h
@@ -21,19 +21,28 @@ struct komeda_fb {
 	 * extends drm_format_info for komeda specific information
 	 */
 	const struct komeda_format_caps *format_caps;
+	/** @is_va: if smmu is enabled, it will be true */
+	bool is_va;
 	/** @aligned_w: aligned frame buffer width */
 	u32 aligned_w;
 	/** @aligned_h: aligned frame buffer height */
 	u32 aligned_h;
+	/** @afbc_size: minimum size of afbc */
+	u32 afbc_size;
+	/** @offset_payload: start of afbc body buffer */
+	u32 offset_payload;
 };
 
 #define to_kfb(dfb)	container_of(dfb, struct komeda_fb, base)
 
 struct drm_framebuffer *
 komeda_fb_create(struct drm_device *dev, struct drm_file *file,
-		 const struct drm_mode_fb_cmd2 *mode_cmd);
+		const struct drm_mode_fb_cmd2 *mode_cmd);
+int komeda_fb_check_src_coords(const struct komeda_fb *kfb,
+			       u32 src_x, u32 src_y, u32 src_w, u32 src_h);
 dma_addr_t
 komeda_fb_get_pixel_addr(struct komeda_fb *kfb, int x, int y, int plane);
-bool komeda_fb_is_layer_supported(struct komeda_fb *kfb, u32 layer_type);
+bool komeda_fb_is_layer_supported(struct komeda_fb *kfb, u32 layer_type,
+		u32 rot);
 
 #endif
diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_kms.c b/drivers/gpu/drm/arm/display/komeda/komeda_kms.c
index 86f6542afb40..419a8b0e5de8 100644
--- a/drivers/gpu/drm/arm/display/komeda/komeda_kms.c
+++ b/drivers/gpu/drm/arm/display/komeda/komeda_kms.c
@@ -58,7 +58,6 @@ static struct drm_driver komeda_kms_driver = {
 	.driver_features = DRIVER_GEM | DRIVER_MODESET | DRIVER_ATOMIC |
 			   DRIVER_PRIME | DRIVER_HAVE_IRQ,
 	.lastclose			= drm_fb_helper_lastclose,
-	.irq_handler			= komeda_kms_irq_handler,
 	.gem_free_object_unlocked	= drm_gem_cma_free_object,
 	.gem_vm_ops			= &drm_gem_cma_vm_ops,
 	.dumb_create			= komeda_gem_cma_dumb_create,
@@ -100,6 +99,108 @@ static const struct drm_mode_config_helper_funcs komeda_mode_config_helpers = {
 	.atomic_commit_tail = komeda_kms_commit_tail,
 };
 
+static int komeda_plane_state_list_add(struct drm_plane_state *plane_st,
+				       struct list_head *zorder_list)
+{
+	struct komeda_plane_state *new = to_kplane_st(plane_st);
+	struct komeda_plane_state *node, *last;
+
+	last = list_empty(zorder_list) ?
+	       NULL : list_last_entry(zorder_list, typeof(*last), zlist_node);
+
+	/* Considering the list sequence is zpos increasing, so if list is empty
+	 * or the zpos of new node bigger than the last node in list, no need
+	 * loop and just insert the new one to the tail of the list.
+	 */
+	if (!last || (new->base.zpos > last->base.zpos)) {
+		list_add_tail(&new->zlist_node, zorder_list);
+		return 0;
+	}
+
+	/* Build the list by zpos increasing */
+	list_for_each_entry(node, zorder_list, zlist_node) {
+		if (new->base.zpos < node->base.zpos) {
+			list_add_tail(&new->zlist_node, &node->zlist_node);
+			break;
+		} else if (node->base.zpos == new->base.zpos) {
+			struct drm_plane *a = node->base.plane;
+			struct drm_plane *b = new->base.plane;
+
+			/* Komeda doesn't support setting a same zpos for
+			 * different planes.
+			 */
+			DRM_DEBUG_ATOMIC("PLANE: %s and PLANE: %s are configured same zpos: %d.\n",
+					 a->name, b->name, node->base.zpos);
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
+static int komeda_crtc_normalize_zpos(struct drm_crtc *crtc,
+				      struct drm_crtc_state *crtc_st)
+{
+	struct drm_atomic_state *state = crtc_st->state;
+	struct komeda_crtc *kcrtc = to_kcrtc(crtc);
+	struct komeda_crtc_state *kcrtc_st = to_kcrtc_st(crtc_st);
+	struct komeda_plane_state *kplane_st;
+	struct drm_plane_state *plane_st;
+	struct drm_framebuffer *fb;
+	struct drm_plane *plane;
+	struct list_head zorder_list;
+	int order = 0, err;
+
+	DRM_DEBUG_ATOMIC("[CRTC:%d:%s] calculating normalized zpos values\n",
+			 crtc->base.id, crtc->name);
+
+	INIT_LIST_HEAD(&zorder_list);
+
+	/* This loop also added all effected planes into the new state */
+	drm_for_each_plane_mask(plane, crtc->dev, crtc_st->plane_mask) {
+		plane_st = drm_atomic_get_plane_state(state, plane);
+		if (IS_ERR(plane_st))
+			return PTR_ERR(plane_st);
+
+		/* Build a list by zpos increasing */
+		err = komeda_plane_state_list_add(plane_st, &zorder_list);
+		if (err)
+			return err;
+	}
+
+	kcrtc_st->max_slave_zorder = 0;
+
+	list_for_each_entry(kplane_st, &zorder_list, zlist_node) {
+		plane_st = &kplane_st->base;
+		fb = plane_st->fb;
+		plane = plane_st->plane;
+
+		plane_st->normalized_zpos = order++;
+		/* When layer_split has been enabled, one plane will be handled
+		 * by two separated komeda layers (left/right), which may needs
+		 * two zorders.
+		 * - zorder: for left_layer for left display part.
+		 * - zorder + 1: will be reserved for right layer.
+		 */
+		if (to_kplane_st(plane_st)->layer_split)
+			order++;
+
+		DRM_DEBUG_ATOMIC("[PLANE:%d:%s] zpos:%d, normalized zpos: %d\n",
+				 plane->base.id, plane->name,
+				 plane_st->zpos, plane_st->normalized_zpos);
+
+		/* calculate max slave zorder */
+		if (has_bit(drm_plane_index(plane), kcrtc->slave_planes))
+			kcrtc_st->max_slave_zorder =
+				max(plane_st->normalized_zpos,
+				    kcrtc_st->max_slave_zorder);
+	}
+
+	crtc_st->zpos_changed = true;
+
+	return 0;
+}
+
 static int komeda_kms_check(struct drm_device *dev,
 			    struct drm_atomic_state *state)
 {
@@ -111,7 +212,7 @@ static int komeda_kms_check(struct drm_device *dev,
 	if (err)
 		return err;
 
-	/* komeda need to re-calculate resource assumption in every commit
+	/* Komeda need to re-calculate resource assumption in every commit
 	 * so need to add all affected_planes (even unchanged) to
 	 * drm_atomic_state.
 	 */
@@ -119,6 +220,10 @@ static int komeda_kms_check(struct drm_device *dev,
 		err = drm_atomic_add_affected_planes(state, crtc);
 		if (err)
 			return err;
+
+		err = komeda_crtc_normalize_zpos(crtc, new_crtc_st);
+		if (err)
+			return err;
 	}
 
 	err = drm_atomic_helper_check_planes(dev, state);
@@ -148,7 +253,7 @@ static void komeda_kms_mode_config_init(struct komeda_kms_dev *kms,
 	config->min_height	= 0;
 	config->max_width	= 4096;
 	config->max_height	= 4096;
-	config->allow_fb_modifiers = false;
+	config->allow_fb_modifiers = true;
 
 	config->funcs = &komeda_mode_config_funcs;
 	config->helper_private = &komeda_mode_config_helpers;
@@ -188,29 +293,36 @@ struct komeda_kms_dev *komeda_kms_attach(struct komeda_dev *mdev)
 	if (err)
 		goto cleanup_mode_config;
 
+	err = komeda_kms_add_wb_connectors(kms, mdev);
+	if (err)
+		goto cleanup_mode_config;
+
 	err = component_bind_all(mdev->dev, kms);
 	if (err)
 		goto cleanup_mode_config;
 
 	drm_mode_config_reset(drm);
 
-	err = drm_irq_install(drm, mdev->irq);
+	err = devm_request_irq(drm->dev, mdev->irq,
+			       komeda_kms_irq_handler, IRQF_SHARED,
+			       drm->driver->name, drm);
 	if (err)
 		goto cleanup_mode_config;
 
 	err = mdev->funcs->enable_irq(mdev);
 	if (err)
-		goto uninstall_irq;
+		goto cleanup_mode_config;
+
+	drm->irq_enabled = true;
 
 	err = drm_dev_register(drm, 0);
 	if (err)
-		goto uninstall_irq;
+		goto cleanup_mode_config;
 
 	return kms;
 
-uninstall_irq:
-	drm_irq_uninstall(drm);
 cleanup_mode_config:
+	drm->irq_enabled = false;
 	drm_mode_config_cleanup(drm);
 	komeda_kms_cleanup_private_objs(kms);
 free_kms:
@@ -223,9 +335,9 @@ void komeda_kms_detach(struct komeda_kms_dev *kms)
 	struct drm_device *drm = &kms->base;
 	struct komeda_dev *mdev = drm->dev_private;
 
+	drm->irq_enabled = false;
 	mdev->funcs->disable_irq(mdev);
 	drm_dev_unregister(drm);
-	drm_irq_uninstall(drm);
 	component_unbind_all(mdev->dev, drm);
 	komeda_kms_cleanup_private_objs(kms);
 	drm_mode_config_cleanup(drm);
diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_kms.h b/drivers/gpu/drm/arm/display/komeda/komeda_kms.h
index ac3d9209b4d9..219fa3f0c336 100644
--- a/drivers/gpu/drm/arm/display/komeda/komeda_kms.h
+++ b/drivers/gpu/drm/arm/display/komeda/komeda_kms.h
@@ -7,11 +7,13 @@
 #ifndef _KOMEDA_KMS_H_
 #define _KOMEDA_KMS_H_
 
+#include <linux/list.h>
 #include <drm/drm_atomic.h>
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_crtc_helper.h>
 #include <drm/drm_device.h>
 #include <drm/drm_writeback.h>
+#include <drm/drm_print.h>
 #include <video/videomode.h>
 #include <video/display_timing.h>
 
@@ -31,6 +33,11 @@ struct komeda_plane {
 	 * Layers with same capabilities.
 	 */
 	struct komeda_layer *layer;
+
+	/** @prop_img_enhancement: for on/off image enhancement */
+	struct drm_property *prop_img_enhancement;
+	/** @prop_layer_split: for on/off layer_split */
+	struct drm_property *prop_layer_split;
 };
 
 /**
@@ -42,8 +49,14 @@ struct komeda_plane {
 struct komeda_plane_state {
 	/** @base: &drm_plane_state */
 	struct drm_plane_state base;
+	/** @zlist_node: zorder list node */
+	struct list_head zlist_node;
 
-	/* private properties */
+	/* @img_enhancement: on/off image enhancement
+	 * @layer_split: on/off layer_split
+	 */
+	u8 img_enhancement : 1,
+	   layer_split : 1;
 };
 
 /**
@@ -73,8 +86,20 @@ struct komeda_crtc {
 	 */
 	struct komeda_pipeline *slave;
 
+	/** @slave_planes: komeda slave planes mask */
+	u32 slave_planes;
+
+	/** @wb_conn: komeda write back connector */
+	struct komeda_wb_connector *wb_conn;
+
 	/** @disable_done: this flip_done is for tracing the disable */
 	struct completion *disable_done;
+
+	/** @clock_ratio_property: property for ratio of (aclk << 32)/pxlclk */
+	struct drm_property *clock_ratio_property;
+
+	/** @slave_planes_property: property for slaves of the planes */
+	struct drm_property *slave_planes_property;
 };
 
 /**
@@ -97,6 +122,12 @@ struct komeda_crtc_state {
 	 * the active pipelines in once display instance
 	 */
 	u32 active_pipes;
+
+	/** @clock_ratio: ratio of (aclk << 32)/pxlclk */
+	u64 clock_ratio;
+
+	/** @max_slave_zorder: the maximum of slave zorder */
+	u32 max_slave_zorder;
 };
 
 /** struct komeda_kms_dev - for gather KMS related things */
@@ -116,6 +147,42 @@ struct komeda_kms_dev {
 #define to_kcrtc(p)	container_of(p, struct komeda_crtc, base)
 #define to_kcrtc_st(p)	container_of(p, struct komeda_crtc_state, base)
 #define to_kdev(p)	container_of(p, struct komeda_kms_dev, base)
+#define to_wb_conn(x)	container_of(x, struct drm_writeback_connector, base)
+
+static inline bool is_writeback_only(struct drm_crtc_state *st)
+{
+	struct komeda_wb_connector *wb_conn = to_kcrtc(st->crtc)->wb_conn;
+	struct drm_connector *conn = wb_conn ? &wb_conn->base.base : NULL;
+
+	return conn && (st->connector_mask == BIT(drm_connector_index(conn)));
+}
+
+static inline bool
+is_only_changed_connector(struct drm_crtc_state *st, struct drm_connector *conn)
+{
+	struct drm_crtc_state *old_st;
+	u32 changed_connectors;
+
+	old_st = drm_atomic_get_old_crtc_state(st->state, st->crtc);
+	changed_connectors = st->connector_mask ^ old_st->connector_mask;
+
+	return BIT(drm_connector_index(conn)) == changed_connectors;
+}
+
+static inline bool has_flip_h(u32 rot)
+{
+	u32 rotation = drm_rotation_simplify(rot,
+					     DRM_MODE_ROTATE_0 |
+					     DRM_MODE_ROTATE_90 |
+					     DRM_MODE_REFLECT_MASK);
+
+	if (rotation & DRM_MODE_ROTATE_90)
+		return !!(rotation & DRM_MODE_REFLECT_Y);
+	else
+		return !!(rotation & DRM_MODE_REFLECT_X);
+}
+
+unsigned long komeda_calc_aclk(struct komeda_crtc_state *kcrtc_st);
 
 int komeda_kms_setup_crtcs(struct komeda_kms_dev *kms, struct komeda_dev *mdev);
 
@@ -123,6 +190,8 @@ int komeda_kms_add_crtcs(struct komeda_kms_dev *kms, struct komeda_dev *mdev);
 int komeda_kms_add_planes(struct komeda_kms_dev *kms, struct komeda_dev *mdev);
 int komeda_kms_add_private_objs(struct komeda_kms_dev *kms,
 				struct komeda_dev *mdev);
+int komeda_kms_add_wb_connectors(struct komeda_kms_dev *kms,
+				 struct komeda_dev *mdev);
 void komeda_kms_cleanup_private_objs(struct komeda_kms_dev *kms);
 
 void komeda_crtc_handle_event(struct komeda_crtc   *kcrtc,
diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_pipeline.c b/drivers/gpu/drm/arm/display/komeda/komeda_pipeline.c
index a130b62fa6d1..78e44d9e1520 100644
--- a/drivers/gpu/drm/arm/display/komeda/komeda_pipeline.c
+++ b/drivers/gpu/drm/arm/display/komeda/komeda_pipeline.c
@@ -53,7 +53,6 @@ void komeda_pipeline_destroy(struct komeda_dev *mdev,
 	}
 
 	clk_put(pipe->pxlclk);
-	clk_put(pipe->aclk);
 
 	of_node_put(pipe->of_output_dev);
 	of_node_put(pipe->of_output_port);
@@ -92,6 +91,12 @@ komeda_pipeline_get_component_pos(struct komeda_pipeline *pipe, int id)
 	case KOMEDA_COMPONENT_SCALER1:
 		pos = to_cpos(pipe->scalers[id - KOMEDA_COMPONENT_SCALER0]);
 		break;
+	case KOMEDA_COMPONENT_SPLITTER:
+		pos = to_cpos(pipe->splitter);
+		break;
+	case KOMEDA_COMPONENT_MERGER:
+		pos = to_cpos(pipe->merger);
+		break;
 	case KOMEDA_COMPONENT_IPS0:
 	case KOMEDA_COMPONENT_IPS1:
 		temp = mdev->pipelines[id - KOMEDA_COMPONENT_IPS0];
@@ -126,6 +131,28 @@ komeda_pipeline_get_component(struct komeda_pipeline *pipe, int id)
 	return c;
 }
 
+struct komeda_component *
+komeda_pipeline_get_first_component(struct komeda_pipeline *pipe,
+				    u32 comp_mask)
+{
+	struct komeda_component *c = NULL;
+	int id;
+
+	id = find_first_bit((unsigned long *)&comp_mask, 32);
+	if (id < 32)
+		c = komeda_pipeline_get_component(pipe, id);
+
+	return c;
+}
+
+static struct komeda_component *
+komeda_component_pickup_input(struct komeda_component *c, u32 avail_comps)
+{
+	u32 avail_inputs = c->supported_inputs & (avail_comps);
+
+	return komeda_pipeline_get_first_component(c->pipeline, avail_inputs);
+}
+
 /** komeda_component_add - Add a component to &komeda_pipeline */
 struct komeda_component *
 komeda_component_add(struct komeda_pipeline *pipe,
@@ -249,16 +276,49 @@ static void komeda_component_verify_inputs(struct komeda_component *c)
 	}
 }
 
+static struct komeda_layer *
+komeda_get_layer_split_right_layer(struct komeda_pipeline *pipe,
+				   struct komeda_layer *left)
+{
+	int index = left->base.id - KOMEDA_COMPONENT_LAYER0;
+	int i;
+
+	for (i = index + 1; i < pipe->n_layers; i++)
+		if (left->layer_type == pipe->layers[i]->layer_type)
+			return pipe->layers[i];
+	return NULL;
+}
+
 static void komeda_pipeline_assemble(struct komeda_pipeline *pipe)
 {
 	struct komeda_component *c;
-	int id;
+	struct komeda_layer *layer;
+	int i, id;
 
 	dp_for_each_set_bit(id, pipe->avail_comps) {
 		c = komeda_pipeline_get_component(pipe, id);
-
 		komeda_component_verify_inputs(c);
 	}
+	/* calculate right layer for the layer split */
+	for (i = 0; i < pipe->n_layers; i++) {
+		layer = pipe->layers[i];
+
+		layer->right = komeda_get_layer_split_right_layer(pipe, layer);
+	}
+}
+
+/* if pipeline_A accept another pipeline_B's component as input, treat
+ * pipeline_B as slave of pipeline_A.
+ */
+struct komeda_pipeline *
+komeda_pipeline_get_slave(struct komeda_pipeline *master)
+{
+	struct komeda_component *slave;
+
+	slave = komeda_component_pickup_input(&master->compiz->base,
+					      KOMEDA_PIPELINE_COMPIZS);
+
+	return slave ? slave->pipeline : NULL;
 }
 
 int komeda_assemble_pipelines(struct komeda_dev *mdev)
diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_pipeline.h b/drivers/gpu/drm/arm/display/komeda/komeda_pipeline.h
index bae8a32b81a6..fc1b8613385e 100644
--- a/drivers/gpu/drm/arm/display/komeda/komeda_pipeline.h
+++ b/drivers/gpu/drm/arm/display/komeda/komeda_pipeline.h
@@ -228,6 +228,12 @@ struct komeda_layer {
 	struct malidp_range hsize_in, vsize_in;
 	u32 layer_type; /* RICH, SIMPLE or WB */
 	u32 supported_rots;
+	/* komeda supports layer split which splits a whole image to two parts
+	 * left and right and handle them by two individual layer processors
+	 * Note: left/right are always according to the final display rect,
+	 * not the source buffer.
+	 */
+	struct komeda_layer *right;
 };
 
 struct komeda_layer_state {
@@ -235,16 +241,34 @@ struct komeda_layer_state {
 	/* layer specific configuration state */
 	u16 hsize, vsize;
 	u32 rot;
+	u16 afbc_crop_l;
+	u16 afbc_crop_r;
+	u16 afbc_crop_t;
+	u16 afbc_crop_b;
 	dma_addr_t addr[3];
 };
 
 struct komeda_scaler {
 	struct komeda_component base;
-	/* scaler features and caps */
+	struct malidp_range hsize, vsize;
+	u32 max_upscaling;
+	u32 max_downscaling;
+	u8 scaling_split_overlap; /* split overlap for scaling */
+	u8 enh_split_overlap; /* split overlap for image enhancement */
 };
 
 struct komeda_scaler_state {
 	struct komeda_component_state base;
+	u16 hsize_in, vsize_in;
+	u16 hsize_out, vsize_out;
+	u16 total_hsize_in, total_vsize_in;
+	u16 total_hsize_out; /* total_xxxx are size before split */
+	u16 left_crop, right_crop;
+	u8 en_scaling : 1,
+	   en_alpha : 1, /* enable alpha processing */
+	   en_img_enhancement : 1,
+	   en_split : 1,
+	   right_part : 1; /* right part of split image */
 };
 
 struct komeda_compiz {
@@ -265,6 +289,29 @@ struct komeda_compiz_state {
 	struct komeda_compiz_input_cfg cins[KOMEDA_COMPONENT_N_INPUTS];
 };
 
+struct komeda_merger {
+	struct komeda_component base;
+	struct malidp_range hsize_merged;
+	struct malidp_range vsize_merged;
+};
+
+struct komeda_merger_state {
+	struct komeda_component_state base;
+	u16 hsize_merged;
+	u16 vsize_merged;
+};
+
+struct komeda_splitter {
+	struct komeda_component base;
+	struct malidp_range hsize, vsize;
+};
+
+struct komeda_splitter_state {
+	struct komeda_component_state base;
+	u16 hsize, vsize;
+	u16 overlap;
+};
+
 struct komeda_improc {
 	struct komeda_component base;
 	u32 supported_color_formats;  /* DRM_RGB/YUV444/YUV420*/
@@ -300,13 +347,27 @@ struct komeda_data_flow_cfg {
 	struct komeda_component_output input;
 	u16 in_x, in_y, in_w, in_h;
 	u32 out_x, out_y, out_w, out_h;
+	u16 total_in_h, total_in_w;
+	u16 total_out_w;
+	u16 left_crop, right_crop, overlap;
 	u32 rot;
 	int blending_zorder;
 	u8 pixel_blend_mode, layer_alpha;
+	u8 en_scaling : 1,
+	   en_img_enhancement : 1,
+	   en_split : 1,
+	   is_yuv : 1,
+	   right_part : 1; /* right part of display image if split enabled */
 };
 
-/** struct komeda_pipeline_funcs */
 struct komeda_pipeline_funcs {
+	/* check if the aclk (main engine clock) can satisfy the clock
+	 * requirements of the downscaling that specified by dflow
+	 */
+	int (*downscaling_clk_check)(struct komeda_pipeline *pipe,
+				     struct drm_display_mode *mode,
+				     unsigned long aclk_rate,
+				     struct komeda_data_flow_cfg *dflow);
 	/* dump_register: Optional, dump registers to seq_file */
 	void (*dump_register)(struct komeda_pipeline *pipe,
 			      struct seq_file *sf);
@@ -324,8 +385,6 @@ struct komeda_pipeline {
 	struct komeda_dev *mdev;
 	/** @pxlclk: pixel clock */
 	struct clk *pxlclk;
-	/** @aclk: AXI clock */
-	struct clk *aclk;
 	/** @id: pipeline id */
 	int id;
 	/** @avail_comps: available components mask of pipeline */
@@ -340,6 +399,10 @@ struct komeda_pipeline {
 	struct komeda_scaler *scalers[KOMEDA_PIPELINE_MAX_SCALERS];
 	/** @compiz: compositor */
 	struct komeda_compiz *compiz;
+	/** @splitter: for split the compiz output to two half data flows */
+	struct komeda_splitter *splitter;
+	/** @merger: merger */
+	struct komeda_merger *merger;
 	/** @wb_layer: writeback layer */
 	struct komeda_layer  *wb_layer;
 	/** @improc: post image processor */
@@ -382,17 +445,21 @@ struct komeda_pipeline_state {
 #define to_layer(c)	container_of(c, struct komeda_layer, base)
 #define to_compiz(c)	container_of(c, struct komeda_compiz, base)
 #define to_scaler(c)	container_of(c, struct komeda_scaler, base)
+#define to_splitter(c)	container_of(c, struct komeda_splitter, base)
+#define to_merger(c)	container_of(c, struct komeda_merger, base)
 #define to_improc(c)	container_of(c, struct komeda_improc, base)
 #define to_ctrlr(c)	container_of(c, struct komeda_timing_ctrlr, base)
 
 #define to_layer_st(c)	container_of(c, struct komeda_layer_state, base)
 #define to_compiz_st(c)	container_of(c, struct komeda_compiz_state, base)
-#define to_scaler_st(c) container_of(c, struct komeda_scaler_state, base)
+#define to_scaler_st(c)	container_of(c, struct komeda_scaler_state, base)
+#define to_splitter_st(c) container_of(c, struct komeda_splitter_state, base)
+#define to_merger_st(c)	container_of(c, struct komeda_merger_state, base)
 #define to_improc_st(c)	container_of(c, struct komeda_improc_state, base)
 #define to_ctrlr_st(c)	container_of(c, struct komeda_timing_ctrlr_state, base)
 
 #define priv_to_comp_st(o) container_of(o, struct komeda_component_state, obj)
-#define priv_to_pipe_st(o)  container_of(o, struct komeda_pipeline_state, obj)
+#define priv_to_pipe_st(o) container_of(o, struct komeda_pipeline_state, obj)
 
 /* pipeline APIs */
 struct komeda_pipeline *
@@ -400,9 +467,14 @@ komeda_pipeline_add(struct komeda_dev *mdev, size_t size,
 		    const struct komeda_pipeline_funcs *funcs);
 void komeda_pipeline_destroy(struct komeda_dev *mdev,
 			     struct komeda_pipeline *pipe);
+struct komeda_pipeline *
+komeda_pipeline_get_slave(struct komeda_pipeline *master);
 int komeda_assemble_pipelines(struct komeda_dev *mdev);
 struct komeda_component *
 komeda_pipeline_get_component(struct komeda_pipeline *pipe, int id);
+struct komeda_component *
+komeda_pipeline_get_first_component(struct komeda_pipeline *pipe,
+				    u32 comp_mask);
 
 void komeda_pipeline_dump_register(struct komeda_pipeline *pipe,
 				   struct seq_file *sf);
@@ -419,17 +491,41 @@ komeda_component_add(struct komeda_pipeline *pipe,
 void komeda_component_destroy(struct komeda_dev *mdev,
 			      struct komeda_component *c);
 
+static inline struct komeda_component *
+komeda_component_pickup_output(struct komeda_component *c, u32 avail_comps)
+{
+	u32 avail_inputs = c->supported_outputs & (avail_comps);
+
+	return komeda_pipeline_get_first_component(c->pipeline, avail_inputs);
+}
+
 struct komeda_plane_state;
 struct komeda_crtc_state;
 struct komeda_crtc;
 
+void pipeline_composition_size(struct komeda_crtc_state *kcrtc_st,
+			       u16 *hsize, u16 *vsize);
+
 int komeda_build_layer_data_flow(struct komeda_layer *layer,
 				 struct komeda_plane_state *kplane_st,
 				 struct komeda_crtc_state *kcrtc_st,
 				 struct komeda_data_flow_cfg *dflow);
+int komeda_build_wb_data_flow(struct komeda_layer *wb_layer,
+			      struct drm_connector_state *conn_st,
+			      struct komeda_crtc_state *kcrtc_st,
+			      struct komeda_data_flow_cfg *dflow);
 int komeda_build_display_data_flow(struct komeda_crtc *kcrtc,
 				   struct komeda_crtc_state *kcrtc_st);
 
+int komeda_build_layer_split_data_flow(struct komeda_layer *left,
+				       struct komeda_plane_state *kplane_st,
+				       struct komeda_crtc_state *kcrtc_st,
+				       struct komeda_data_flow_cfg *dflow);
+int komeda_build_wb_split_data_flow(struct komeda_layer *wb_layer,
+				    struct drm_connector_state *conn_st,
+				    struct komeda_crtc_state *kcrtc_st,
+				    struct komeda_data_flow_cfg *dflow);
+
 int komeda_release_unclaimed_resources(struct komeda_pipeline *pipe,
 				       struct komeda_crtc_state *kcrtc_st);
 
@@ -441,4 +537,7 @@ void komeda_pipeline_disable(struct komeda_pipeline *pipe,
 void komeda_pipeline_update(struct komeda_pipeline *pipe,
 			    struct drm_atomic_state *old_state);
 
+void komeda_complete_data_flow_cfg(struct komeda_data_flow_cfg *dflow,
+				   struct drm_framebuffer *fb);
+
 #endif /* _KOMEDA_PIPELINE_H_*/
diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_pipeline_state.c b/drivers/gpu/drm/arm/display/komeda/komeda_pipeline_state.c
index 36570d7dad61..2b415ef2b7d3 100644
--- a/drivers/gpu/drm/arm/display/komeda/komeda_pipeline_state.c
+++ b/drivers/gpu/drm/arm/display/komeda/komeda_pipeline_state.c
@@ -211,13 +211,14 @@ komeda_component_check_input(struct komeda_component_state *state,
 	struct komeda_component *c = state->component;
 
 	if ((idx < 0) || (idx >= c->max_active_inputs)) {
-		DRM_DEBUG_ATOMIC("%s invalid input id: %d.\n", c->name, idx);
+		DRM_DEBUG_ATOMIC("%s required an invalid %s-input[%d].\n",
+				 input->component->name, c->name, idx);
 		return -EINVAL;
 	}
 
 	if (has_bit(idx, state->active_inputs)) {
-		DRM_DEBUG_ATOMIC("%s required input_id: %d has been occupied already.\n",
-				 c->name, idx);
+		DRM_DEBUG_ATOMIC("%s required %s-input[%d] has been occupied already.\n",
+				 input->component->name, c->name, idx);
 		return -EINVAL;
 	}
 
@@ -249,18 +250,67 @@ komeda_component_validate_private(struct komeda_component *c,
 	return err;
 }
 
+/* Get current available scaler from the component->supported_outputs */
+static struct komeda_scaler *
+komeda_component_get_avail_scaler(struct komeda_component *c,
+				  struct drm_atomic_state *state)
+{
+	struct komeda_pipeline_state *pipe_st;
+	u32 avail_scalers;
+
+	pipe_st = komeda_pipeline_get_state(c->pipeline, state);
+	if (!pipe_st)
+		return NULL;
+
+	avail_scalers = (pipe_st->active_comps & KOMEDA_PIPELINE_SCALERS) ^
+			KOMEDA_PIPELINE_SCALERS;
+
+	c = komeda_component_pickup_output(c, avail_scalers);
+
+	return to_scaler(c);
+}
+
+static void
+komeda_rotate_data_flow(struct komeda_data_flow_cfg *dflow, u32 rot)
+{
+	if (drm_rotation_90_or_270(rot)) {
+		swap(dflow->in_h, dflow->in_w);
+		swap(dflow->total_in_h, dflow->total_in_w);
+	}
+}
+
 static int
 komeda_layer_check_cfg(struct komeda_layer *layer,
-		       struct komeda_plane_state *kplane_st,
+		       struct komeda_fb *kfb,
 		       struct komeda_data_flow_cfg *dflow)
 {
-	if (!in_range(&layer->hsize_in, dflow->in_w)) {
-		DRM_DEBUG_ATOMIC("src_w: %d is out of range.\n", dflow->in_w);
+	u32 src_x, src_y, src_w, src_h;
+
+	if (!komeda_fb_is_layer_supported(kfb, layer->layer_type, dflow->rot))
+		return -EINVAL;
+
+	if (layer->base.id == KOMEDA_COMPONENT_WB_LAYER) {
+		src_x = dflow->out_x;
+		src_y = dflow->out_y;
+		src_w = dflow->out_w;
+		src_h = dflow->out_h;
+	} else {
+		src_x = dflow->in_x;
+		src_y = dflow->in_y;
+		src_w = dflow->in_w;
+		src_h = dflow->in_h;
+	}
+
+	if (komeda_fb_check_src_coords(kfb, src_x, src_y, src_w, src_h))
+		return -EINVAL;
+
+	if (!in_range(&layer->hsize_in, src_w)) {
+		DRM_DEBUG_ATOMIC("invalidate src_w %d.\n", src_w);
 		return -EINVAL;
 	}
 
-	if (!in_range(&layer->vsize_in, dflow->in_h)) {
-		DRM_DEBUG_ATOMIC("src_h: %d is out of range.\n", dflow->in_h);
+	if (!in_range(&layer->vsize_in, src_h)) {
+		DRM_DEBUG_ATOMIC("invalidate src_h %d.\n", src_h);
 		return -EINVAL;
 	}
 
@@ -279,7 +329,7 @@ komeda_layer_validate(struct komeda_layer *layer,
 	struct komeda_layer_state *st;
 	int i, err;
 
-	err = komeda_layer_check_cfg(layer, kplane_st, dflow);
+	err = komeda_layer_check_cfg(layer, kfb, dflow);
 	if (err)
 		return err;
 
@@ -291,8 +341,22 @@ komeda_layer_validate(struct komeda_layer *layer,
 	st = to_layer_st(c_st);
 
 	st->rot = dflow->rot;
-	st->hsize = kfb->aligned_w;
-	st->vsize = kfb->aligned_h;
+
+	if (fb->modifier) {
+		st->hsize = kfb->aligned_w;
+		st->vsize = kfb->aligned_h;
+		st->afbc_crop_l = dflow->in_x;
+		st->afbc_crop_r = kfb->aligned_w - dflow->in_x - dflow->in_w;
+		st->afbc_crop_t = dflow->in_y;
+		st->afbc_crop_b = kfb->aligned_h - dflow->in_y - dflow->in_h;
+	} else {
+		st->hsize = dflow->in_w;
+		st->vsize = dflow->in_h;
+		st->afbc_crop_l = 0;
+		st->afbc_crop_r = 0;
+		st->afbc_crop_t = 0;
+		st->afbc_crop_b = 0;
+	}
 
 	for (i = 0; i < fb->format->num_planes; i++)
 		st->addr[i] = komeda_fb_get_pixel_addr(kfb, dflow->in_x,
@@ -305,11 +369,275 @@ komeda_layer_validate(struct komeda_layer *layer,
 	/* update the data flow for the next stage */
 	komeda_component_set_output(&dflow->input, &layer->base, 0);
 
+	/*
+	 * The rotation has been handled by layer, so adjusted the data flow for
+	 * the next stage.
+	 */
+	komeda_rotate_data_flow(dflow, st->rot);
+
+	return 0;
+}
+
+static int
+komeda_wb_layer_validate(struct komeda_layer *wb_layer,
+			 struct drm_connector_state *conn_st,
+			 struct komeda_data_flow_cfg *dflow)
+{
+	struct komeda_fb *kfb = to_kfb(conn_st->writeback_job->fb);
+	struct komeda_component_state *c_st;
+	struct komeda_layer_state *st;
+	int i, err;
+
+	err = komeda_layer_check_cfg(wb_layer, kfb, dflow);
+	if (err)
+		return err;
+
+	c_st = komeda_component_get_state_and_set_user(&wb_layer->base,
+			conn_st->state, conn_st->connector, conn_st->crtc);
+	if (IS_ERR(c_st))
+		return PTR_ERR(c_st);
+
+	st = to_layer_st(c_st);
+
+	st->hsize = dflow->out_w;
+	st->vsize = dflow->out_h;
+
+	for (i = 0; i < kfb->base.format->num_planes; i++)
+		st->addr[i] = komeda_fb_get_pixel_addr(kfb, dflow->out_x,
+						       dflow->out_y, i);
+
+	komeda_component_add_input(&st->base, &dflow->input, 0);
+	komeda_component_set_output(&dflow->input, &wb_layer->base, 0);
+
 	return 0;
 }
 
-static void pipeline_composition_size(struct komeda_crtc_state *kcrtc_st,
-				      u16 *hsize, u16 *vsize)
+static bool scaling_ratio_valid(u32 size_in, u32 size_out,
+				u32 max_upscaling, u32 max_downscaling)
+{
+	if (size_out > size_in * max_upscaling)
+		return false;
+	else if (size_in > size_out * max_downscaling)
+		return false;
+	return true;
+}
+
+static int
+komeda_scaler_check_cfg(struct komeda_scaler *scaler,
+			struct komeda_crtc_state *kcrtc_st,
+			struct komeda_data_flow_cfg *dflow)
+{
+	u32 hsize_in, vsize_in, hsize_out, vsize_out;
+	u32 max_upscaling;
+
+	hsize_in = dflow->in_w;
+	vsize_in = dflow->in_h;
+	hsize_out = dflow->out_w;
+	vsize_out = dflow->out_h;
+
+	if (!in_range(&scaler->hsize, hsize_in) ||
+	    !in_range(&scaler->hsize, hsize_out)) {
+		DRM_DEBUG_ATOMIC("Invalid horizontal sizes");
+		return -EINVAL;
+	}
+
+	if (!in_range(&scaler->vsize, vsize_in) ||
+	    !in_range(&scaler->vsize, vsize_out)) {
+		DRM_DEBUG_ATOMIC("Invalid vertical sizes");
+		return -EINVAL;
+	}
+
+	/* If input comes from compiz that means the scaling is for writeback
+	 * and scaler can not do upscaling for writeback
+	 */
+	if (has_bit(dflow->input.component->id, KOMEDA_PIPELINE_COMPIZS))
+		max_upscaling = 1;
+	else
+		max_upscaling = scaler->max_upscaling;
+
+	if (!scaling_ratio_valid(hsize_in, hsize_out, max_upscaling,
+				 scaler->max_downscaling)) {
+		DRM_DEBUG_ATOMIC("Invalid horizontal scaling ratio");
+		return -EINVAL;
+	}
+
+	if (!scaling_ratio_valid(vsize_in, vsize_out, max_upscaling,
+				 scaler->max_downscaling)) {
+		DRM_DEBUG_ATOMIC("Invalid vertical scaling ratio");
+		return -EINVAL;
+	}
+
+	if (hsize_in > hsize_out || vsize_in > vsize_out) {
+		struct komeda_pipeline *pipe = scaler->base.pipeline;
+		int err;
+
+		err = pipe->funcs->downscaling_clk_check(pipe,
+					&kcrtc_st->base.adjusted_mode,
+					komeda_calc_aclk(kcrtc_st), dflow);
+		if (err) {
+			DRM_DEBUG_ATOMIC("aclk can't satisfy the clock requirement of the downscaling\n");
+			return err;
+		}
+	}
+
+	return 0;
+}
+
+static int
+komeda_scaler_validate(void *user,
+		       struct komeda_crtc_state *kcrtc_st,
+		       struct komeda_data_flow_cfg *dflow)
+{
+	struct drm_atomic_state *drm_st = kcrtc_st->base.state;
+	struct komeda_component_state *c_st;
+	struct komeda_scaler_state *st;
+	struct komeda_scaler *scaler;
+	int err = 0;
+
+	if (!(dflow->en_scaling || dflow->en_img_enhancement))
+		return 0;
+
+	scaler = komeda_component_get_avail_scaler(dflow->input.component,
+						   drm_st);
+	if (!scaler) {
+		DRM_DEBUG_ATOMIC("No scaler available");
+		return -EINVAL;
+	}
+
+	err = komeda_scaler_check_cfg(scaler, kcrtc_st, dflow);
+	if (err)
+		return err;
+
+	c_st = komeda_component_get_state_and_set_user(&scaler->base,
+			drm_st, user, kcrtc_st->base.crtc);
+	if (IS_ERR(c_st))
+		return PTR_ERR(c_st);
+
+	st = to_scaler_st(c_st);
+
+	st->hsize_in = dflow->in_w;
+	st->vsize_in = dflow->in_h;
+	st->hsize_out = dflow->out_w;
+	st->vsize_out = dflow->out_h;
+	st->right_crop = dflow->right_crop;
+	st->left_crop = dflow->left_crop;
+	st->total_vsize_in = dflow->total_in_h;
+	st->total_hsize_in = dflow->total_in_w;
+	st->total_hsize_out = dflow->total_out_w;
+
+	/* Enable alpha processing if the next stage needs the pixel alpha */
+	st->en_alpha = dflow->pixel_blend_mode != DRM_MODE_BLEND_PIXEL_NONE;
+	st->en_scaling = dflow->en_scaling;
+	st->en_img_enhancement = dflow->en_img_enhancement;
+	st->en_split = dflow->en_split;
+	st->right_part = dflow->right_part;
+
+	komeda_component_add_input(&st->base, &dflow->input, 0);
+	komeda_component_set_output(&dflow->input, &scaler->base, 0);
+	return err;
+}
+
+static void komeda_split_data_flow(struct komeda_scaler *scaler,
+				   struct komeda_data_flow_cfg *dflow,
+				   struct komeda_data_flow_cfg *l_dflow,
+				   struct komeda_data_flow_cfg *r_dflow);
+
+static int
+komeda_splitter_validate(struct komeda_splitter *splitter,
+			 struct drm_connector_state *conn_st,
+			 struct komeda_data_flow_cfg *dflow,
+			 struct komeda_data_flow_cfg *l_output,
+			 struct komeda_data_flow_cfg *r_output)
+{
+	struct komeda_component_state *c_st;
+	struct komeda_splitter_state *st;
+
+	if (!splitter) {
+		DRM_DEBUG_ATOMIC("Current HW doesn't support splitter.\n");
+		return -EINVAL;
+	}
+
+	if (!in_range(&splitter->hsize, dflow->in_w)) {
+		DRM_DEBUG_ATOMIC("split in_w:%d is out of the acceptable range.\n",
+				 dflow->in_w);
+		return -EINVAL;
+	}
+
+	if (!in_range(&splitter->vsize, dflow->in_h)) {
+		DRM_DEBUG_ATOMIC("split in_in: %d exceed the acceptable range.\n",
+				 dflow->in_w);
+		return -EINVAL;
+	}
+
+	c_st = komeda_component_get_state_and_set_user(&splitter->base,
+			conn_st->state, conn_st->connector, conn_st->crtc);
+
+	if (IS_ERR(c_st))
+		return PTR_ERR(c_st);
+
+	komeda_split_data_flow(splitter->base.pipeline->scalers[0],
+			       dflow, l_output, r_output);
+
+	st = to_splitter_st(c_st);
+	st->hsize = dflow->in_w;
+	st->vsize = dflow->in_h;
+	st->overlap = dflow->overlap;
+
+	komeda_component_add_input(&st->base, &dflow->input, 0);
+	komeda_component_set_output(&l_output->input, &splitter->base, 0);
+	komeda_component_set_output(&r_output->input, &splitter->base, 1);
+
+	return 0;
+}
+
+static int
+komeda_merger_validate(struct komeda_merger *merger,
+		       void *user,
+		       struct komeda_crtc_state *kcrtc_st,
+		       struct komeda_data_flow_cfg *left_input,
+		       struct komeda_data_flow_cfg *right_input,
+		       struct komeda_data_flow_cfg *output)
+{
+	struct komeda_component_state *c_st;
+	struct komeda_merger_state *st;
+	int err = 0;
+
+	if (!merger) {
+		DRM_DEBUG_ATOMIC("No merger is available");
+		return -EINVAL;
+	}
+
+	if (!in_range(&merger->hsize_merged, output->out_w)) {
+		DRM_DEBUG_ATOMIC("merged_w: %d is out of the accepted range.\n",
+				 output->out_w);
+		return -EINVAL;
+	}
+
+	if (!in_range(&merger->vsize_merged, output->out_h)) {
+		DRM_DEBUG_ATOMIC("merged_h: %d is out of the accepted range.\n",
+				 output->out_h);
+		return -EINVAL;
+	}
+
+	c_st = komeda_component_get_state_and_set_user(&merger->base,
+			kcrtc_st->base.state, kcrtc_st->base.crtc, kcrtc_st->base.crtc);
+
+	if (IS_ERR(c_st))
+		return PTR_ERR(c_st);
+
+	st = to_merger_st(c_st);
+	st->hsize_merged = output->out_w;
+	st->vsize_merged = output->out_h;
+
+	komeda_component_add_input(c_st, &left_input->input, 0);
+	komeda_component_add_input(c_st, &right_input->input, 1);
+	komeda_component_set_output(&output->input, &merger->base, 0);
+
+	return err;
+}
+
+void pipeline_composition_size(struct komeda_crtc_state *kcrtc_st,
+			       u16 *hsize, u16 *vsize)
 {
 	struct drm_display_mode *m = &kcrtc_st->base.adjusted_mode;
 
@@ -366,6 +694,7 @@ komeda_compiz_set_input(struct komeda_compiz *compiz,
 		c_st->changed_active_inputs |= BIT(idx);
 
 	komeda_component_add_input(c_st, &dflow->input, idx);
+	komeda_component_set_output(&dflow->input, &compiz->base, 0);
 
 	return 0;
 }
@@ -455,6 +784,36 @@ komeda_timing_ctrlr_validate(struct komeda_timing_ctrlr *ctrlr,
 	return 0;
 }
 
+void komeda_complete_data_flow_cfg(struct komeda_data_flow_cfg *dflow,
+				   struct drm_framebuffer *fb)
+{
+	u32 w = dflow->in_w;
+	u32 h = dflow->in_h;
+
+	dflow->total_in_w = dflow->in_w;
+	dflow->total_in_h = dflow->in_h;
+	dflow->total_out_w = dflow->out_w;
+
+	/* if format doesn't have alpha, fix blend mode to PIXEL_NONE */
+	if (!fb->format->has_alpha)
+		dflow->pixel_blend_mode = DRM_MODE_BLEND_PIXEL_NONE;
+
+	if (drm_rotation_90_or_270(dflow->rot))
+		swap(w, h);
+
+	dflow->en_scaling = (w != dflow->out_w) || (h != dflow->out_h);
+	dflow->is_yuv = fb->format->is_yuv;
+}
+
+static bool merger_is_available(struct komeda_pipeline *pipe,
+				struct komeda_data_flow_cfg *dflow)
+{
+	u32 avail_inputs = pipe->merger ?
+			   pipe->merger->base.supported_inputs : 0;
+
+	return has_bit(dflow->input.component->id, avail_inputs);
+}
+
 int komeda_build_layer_data_flow(struct komeda_layer *layer,
 				 struct komeda_plane_state *kplane_st,
 				 struct komeda_crtc_state *kcrtc_st,
@@ -473,11 +832,290 @@ int komeda_build_layer_data_flow(struct komeda_layer *layer,
 	if (err)
 		return err;
 
+	err = komeda_scaler_validate(plane, kcrtc_st, dflow);
+	if (err)
+		return err;
+
+	/* if split, check if can put the data flow into merger */
+	if (dflow->en_split && merger_is_available(pipe, dflow))
+		return 0;
+
+	err = komeda_compiz_set_input(pipe->compiz, kcrtc_st, dflow);
+
+	return err;
+}
+
+/*
+ * Split is introduced for workaround scaler's input/output size limitation.
+ * The idea is simple, if one scaler can not fit the requirement, use two.
+ * So split splits the big source image to two half parts (left/right) and do
+ * the scaling by two scaler separately and independently.
+ * But split also imports an edge problem in the middle of the image when
+ * scaling, to avoid it, split isn't a simple half-and-half, but add an extra
+ * pixels (overlap) to both side, after split the left/right will be:
+ * - left: [0, src_length/2 + overlap]
+ * - right: [src_length/2 - overlap, src_length]
+ * The extra overlap do eliminate the edge problem, but which may also generates
+ * unnecessary pixels when scaling, we need to crop them before scaler output
+ * the result to the next stage. and for the how to crop, it depends on the
+ * unneeded pixels, another words the position where overlay has been added.
+ * - left: crop the right
+ * - right: crop the left
+ *
+ * The diagram for how to do the split
+ *
+ *  <---------------------left->out_w ---------------->
+ * |--------------------------------|---right_crop-----| <- left after split
+ *  \                                \                /
+ *   \                                \<--overlap--->/
+ *   |-----------------|-------------|(Middle)------|-----------------| <- src
+ *                     /<---overlap--->\                               \
+ *                    /                 \                               \
+ * right after split->|-----left_crop---|--------------------------------|
+ *                    ^<------------------- right->out_w --------------->^
+ *
+ * NOTE: To consistent with HW the output_w always contains the crop size.
+ */
+
+static void komeda_split_data_flow(struct komeda_scaler *scaler,
+				   struct komeda_data_flow_cfg *dflow,
+				   struct komeda_data_flow_cfg *l_dflow,
+				   struct komeda_data_flow_cfg *r_dflow)
+{
+	bool r90 = drm_rotation_90_or_270(dflow->rot);
+	bool flip_h = has_flip_h(dflow->rot);
+	u32 l_out, r_out, overlap;
+
+	memcpy(l_dflow, dflow, sizeof(*dflow));
+	memcpy(r_dflow, dflow, sizeof(*dflow));
+
+	l_dflow->right_part = false;
+	r_dflow->right_part = true;
+	r_dflow->blending_zorder = dflow->blending_zorder + 1;
+
+	overlap = 0;
+	if (dflow->en_scaling && scaler)
+		overlap += scaler->scaling_split_overlap;
+
+	/* original dflow may fed into splitter, and which doesn't need
+	 * enhancement overlap
+	 */
+	dflow->overlap = overlap;
+
+	if (dflow->en_img_enhancement && scaler)
+		overlap += scaler->enh_split_overlap;
+
+	l_dflow->overlap = overlap;
+	r_dflow->overlap = overlap;
+
+	/* split the origin content */
+	/* left/right here always means the left/right part of display image,
+	 * not the source Image
+	 */
+	/* DRM rotation is anti-clockwise */
+	if (r90) {
+		if (dflow->en_scaling) {
+			l_dflow->in_h = ALIGN(dflow->in_h, 2) / 2 + l_dflow->overlap;
+			r_dflow->in_h = l_dflow->in_h;
+		} else if (dflow->en_img_enhancement) {
+			/* enhancer only */
+			l_dflow->in_h = ALIGN(dflow->in_h, 2) / 2 + l_dflow->overlap;
+			r_dflow->in_h = dflow->in_h / 2 + r_dflow->overlap;
+		} else {
+			/* split without scaler, no overlap */
+			l_dflow->in_h = ALIGN(((dflow->in_h + 1) >> 1), 2);
+			r_dflow->in_h = dflow->in_h - l_dflow->in_h;
+		}
+
+		/* Consider YUV format, after split, the split source w/h
+		 * may not aligned to 2. we have two choices for such case.
+		 * 1. scaler is enabled (overlap != 0), we can do a alignment
+		 *    both left/right and crop the extra data by scaler.
+		 * 2. scaler is not enabled, only align the split left
+		 *    src/disp, and the rest part assign to right
+		 */
+		if ((overlap != 0) && dflow->is_yuv) {
+			l_dflow->in_h = ALIGN(l_dflow->in_h, 2);
+			r_dflow->in_h = ALIGN(r_dflow->in_h, 2);
+		}
+
+		if (flip_h)
+			l_dflow->in_y = dflow->in_y + dflow->in_h - l_dflow->in_h;
+		else
+			r_dflow->in_y = dflow->in_y + dflow->in_h - r_dflow->in_h;
+	} else {
+		if (dflow->en_scaling) {
+			l_dflow->in_w = ALIGN(dflow->in_w, 2) / 2 + l_dflow->overlap;
+			r_dflow->in_w = l_dflow->in_w;
+		} else if (dflow->en_img_enhancement) {
+			l_dflow->in_w = ALIGN(dflow->in_w, 2) / 2 + l_dflow->overlap;
+			r_dflow->in_w = dflow->in_w / 2 + r_dflow->overlap;
+		} else {
+			l_dflow->in_w = ALIGN(((dflow->in_w + 1) >> 1), 2);
+			r_dflow->in_w = dflow->in_w - l_dflow->in_w;
+		}
+
+		/* do YUV alignment when scaler enabled */
+		if ((overlap != 0) && dflow->is_yuv) {
+			l_dflow->in_w = ALIGN(l_dflow->in_w, 2);
+			r_dflow->in_w = ALIGN(r_dflow->in_w, 2);
+		}
+
+		/* on flip_h, the left display content from the right-source */
+		if (flip_h)
+			l_dflow->in_x = dflow->in_w + dflow->in_x - l_dflow->in_w;
+		else
+			r_dflow->in_x = dflow->in_w + dflow->in_x - r_dflow->in_w;
+	}
+
+	/* split the disp_rect */
+	if (dflow->en_scaling || dflow->en_img_enhancement)
+		l_dflow->out_w = ((dflow->out_w + 1) >> 1);
+	else
+		l_dflow->out_w = ALIGN(((dflow->out_w + 1) >> 1), 2);
+
+	r_dflow->out_w = dflow->out_w - l_dflow->out_w;
+
+	l_dflow->out_x = dflow->out_x;
+	r_dflow->out_x = l_dflow->out_w + l_dflow->out_x;
+
+	/* calculate the scaling crop */
+	/* left scaler output more data and do crop */
+	if (r90) {
+		l_out = (dflow->out_w * l_dflow->in_h) / dflow->in_h;
+		r_out = (dflow->out_w * r_dflow->in_h) / dflow->in_h;
+	} else {
+		l_out = (dflow->out_w * l_dflow->in_w) / dflow->in_w;
+		r_out = (dflow->out_w * r_dflow->in_w) / dflow->in_w;
+	}
+
+	l_dflow->left_crop  = 0;
+	l_dflow->right_crop = l_out - l_dflow->out_w;
+	r_dflow->left_crop  = r_out - r_dflow->out_w;
+	r_dflow->right_crop = 0;
+
+	/* out_w includes the crop length */
+	l_dflow->out_w += l_dflow->right_crop + l_dflow->left_crop;
+	r_dflow->out_w += r_dflow->right_crop + r_dflow->left_crop;
+}
+
+/* For layer split, a plane state will be split to two data flows and handled
+ * by two separated komeda layer input pipelines. komeda supports two types of
+ * layer split:
+ * - none-scaling split:
+ *             / layer-left -> \
+ * plane_state                  compiz-> ...
+ *             \ layer-right-> /
+ *
+ * - scaling split:
+ *             / layer-left -> scaler->\
+ * plane_state                          merger -> compiz-> ...
+ *             \ layer-right-> scaler->/
+ *
+ * Since merger only supports scaler as input, so for none-scaling split, two
+ * layer data flows will be output to compiz directly. for scaling_split, two
+ * data flow will be merged by merger firstly, then merger outputs one merged
+ * data flow to compiz.
+ */
+int komeda_build_layer_split_data_flow(struct komeda_layer *left,
+				       struct komeda_plane_state *kplane_st,
+				       struct komeda_crtc_state *kcrtc_st,
+				       struct komeda_data_flow_cfg *dflow)
+{
+	struct drm_plane *plane = kplane_st->base.plane;
+	struct komeda_pipeline *pipe = left->base.pipeline;
+	struct komeda_layer *right = left->right;
+	struct komeda_data_flow_cfg l_dflow, r_dflow;
+	int err;
+
+	komeda_split_data_flow(pipe->scalers[0], dflow, &l_dflow, &r_dflow);
+
+	DRM_DEBUG_ATOMIC("Assign %s + %s to [PLANE:%d:%s]: "
+			 "src[x/y:%d/%d, w/h:%d/%d] disp[x/y:%d/%d, w/h:%d/%d]",
+			 left->base.name, right->base.name,
+			 plane->base.id, plane->name,
+			 dflow->in_x, dflow->in_y, dflow->in_w, dflow->in_h,
+			 dflow->out_x, dflow->out_y, dflow->out_w, dflow->out_h);
+
+	err = komeda_build_layer_data_flow(left, kplane_st, kcrtc_st, &l_dflow);
+	if (err)
+		return err;
+
+	err = komeda_build_layer_data_flow(right, kplane_st, kcrtc_st, &r_dflow);
+	if (err)
+		return err;
+
+	/* The rotation has been handled by layer, so adjusted the data flow */
+	komeda_rotate_data_flow(dflow, dflow->rot);
+
+	/* left and right dflow has been merged to compiz already,
+	 * no need merger to merge them anymore.
+	 */
+	if (r_dflow.input.component == l_dflow.input.component)
+		return 0;
+
+	/* line merger path */
+	err = komeda_merger_validate(pipe->merger, plane, kcrtc_st,
+				     &l_dflow, &r_dflow, dflow);
+	if (err)
+		return err;
+
 	err = komeda_compiz_set_input(pipe->compiz, kcrtc_st, dflow);
 
 	return err;
 }
 
+/* writeback data path: compiz -> scaler -> wb_layer -> memory */
+int komeda_build_wb_data_flow(struct komeda_layer *wb_layer,
+			      struct drm_connector_state *conn_st,
+			      struct komeda_crtc_state *kcrtc_st,
+			      struct komeda_data_flow_cfg *dflow)
+{
+	struct drm_connector *conn = conn_st->connector;
+	int err;
+
+	err = komeda_scaler_validate(conn, kcrtc_st, dflow);
+	if (err)
+		return err;
+
+	return komeda_wb_layer_validate(wb_layer, conn_st, dflow);
+}
+
+/* writeback scaling split data path:
+ *                   /-> scaler ->\
+ * compiz -> splitter              merger -> wb_layer -> memory
+ *                   \-> scaler ->/
+ */
+int komeda_build_wb_split_data_flow(struct komeda_layer *wb_layer,
+				    struct drm_connector_state *conn_st,
+				    struct komeda_crtc_state *kcrtc_st,
+				    struct komeda_data_flow_cfg *dflow)
+{
+	struct komeda_pipeline *pipe = wb_layer->base.pipeline;
+	struct drm_connector *conn = conn_st->connector;
+	struct komeda_data_flow_cfg l_dflow, r_dflow;
+	int err;
+
+	err = komeda_splitter_validate(pipe->splitter, conn_st,
+				       dflow, &l_dflow, &r_dflow);
+	if (err)
+		return err;
+	err = komeda_scaler_validate(conn, kcrtc_st, &l_dflow);
+	if (err)
+		return err;
+
+	err = komeda_scaler_validate(conn, kcrtc_st, &r_dflow);
+	if (err)
+		return err;
+
+	err = komeda_merger_validate(pipe->merger, conn_st, kcrtc_st,
+				     &l_dflow, &r_dflow, dflow);
+	if (err)
+		return err;
+
+	return komeda_wb_layer_validate(wb_layer, conn_st, dflow);
+}
+
 /* build display output data flow, the data path is:
  * compiz -> improc -> timing_ctrlr
  */
@@ -485,10 +1123,25 @@ int komeda_build_display_data_flow(struct komeda_crtc *kcrtc,
 				   struct komeda_crtc_state *kcrtc_st)
 {
 	struct komeda_pipeline *master = kcrtc->master;
+	struct komeda_pipeline *slave  = kcrtc->slave;
 	struct komeda_data_flow_cfg m_dflow; /* master data flow */
+	struct komeda_data_flow_cfg s_dflow; /* slave data flow */
 	int err;
 
 	memset(&m_dflow, 0, sizeof(m_dflow));
+	memset(&s_dflow, 0, sizeof(s_dflow));
+
+	if (slave && has_bit(slave->id, kcrtc_st->active_pipes)) {
+		err = komeda_compiz_validate(slave->compiz, kcrtc_st, &s_dflow);
+		if (err)
+			return err;
+
+		/* merge the slave dflow into master pipeline */
+		err = komeda_compiz_set_input(master->compiz, kcrtc_st,
+					      &s_dflow);
+		if (err)
+			return err;
+	}
 
 	err = komeda_compiz_validate(master->compiz, kcrtc_st, &m_dflow);
 	if (err)
diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_plane.c b/drivers/gpu/drm/arm/display/komeda/komeda_plane.c
index c97062bdd69b..04b122f28fb6 100644
--- a/drivers/gpu/drm/arm/display/komeda/komeda_plane.c
+++ b/drivers/gpu/drm/arm/display/komeda/komeda_plane.c
@@ -10,20 +10,32 @@
 #include <drm/drm_print.h>
 #include "komeda_dev.h"
 #include "komeda_kms.h"
+#include "komeda_framebuffer.h"
 
 static int
 komeda_plane_init_data_flow(struct drm_plane_state *st,
+			    struct komeda_crtc_state *kcrtc_st,
 			    struct komeda_data_flow_cfg *dflow)
 {
+	struct komeda_plane *kplane = to_kplane(st->plane);
+	struct komeda_plane_state *kplane_st = to_kplane_st(st);
 	struct drm_framebuffer *fb = st->fb;
+	const struct komeda_format_caps *caps = to_kfb(fb)->format_caps;
+	struct komeda_pipeline *pipe = kplane->layer->base.pipeline;
 
 	memset(dflow, 0, sizeof(*dflow));
 
-	dflow->blending_zorder = st->zpos;
+	dflow->blending_zorder = st->normalized_zpos;
+	if (pipe == to_kcrtc(st->crtc)->master)
+		dflow->blending_zorder -= kcrtc_st->max_slave_zorder;
+	if (dflow->blending_zorder < 0) {
+		DRM_DEBUG_ATOMIC("%s zorder:%d < max_slave_zorder: %d.\n",
+				 st->plane->name, st->normalized_zpos,
+				 kcrtc_st->max_slave_zorder);
+		return -EINVAL;
+	}
 
-	/* if format doesn't have alpha, fix blend mode to PIXEL_NONE */
-	dflow->pixel_blend_mode = fb->format->has_alpha ?
-		st->pixel_blend_mode : DRM_MODE_BLEND_PIXEL_NONE;
+	dflow->pixel_blend_mode = st->pixel_blend_mode;
 	dflow->layer_alpha = st->alpha >> 8;
 
 	dflow->out_x = st->crtc_x;
@@ -36,6 +48,20 @@ komeda_plane_init_data_flow(struct drm_plane_state *st,
 	dflow->in_w = st->src_w >> 16;
 	dflow->in_h = st->src_h >> 16;
 
+	dflow->rot = drm_rotation_simplify(st->rotation, caps->supported_rots);
+	if (!has_bits(dflow->rot, caps->supported_rots)) {
+		DRM_DEBUG_ATOMIC("rotation(0x%x) isn't supported by %s.\n",
+				 dflow->rot,
+				 komeda_get_format_name(caps->fourcc,
+							fb->modifier));
+		return -EINVAL;
+	}
+
+	dflow->en_img_enhancement = !!kplane_st->img_enhancement;
+	dflow->en_split = !!kplane_st->layer_split;
+
+	komeda_complete_data_flow_cfg(dflow, fb);
+
 	return 0;
 }
 
@@ -74,11 +100,16 @@ komeda_plane_atomic_check(struct drm_plane *plane,
 
 	kcrtc_st = to_kcrtc_st(crtc_st);
 
-	err = komeda_plane_init_data_flow(state, &dflow);
+	err = komeda_plane_init_data_flow(state, kcrtc_st, &dflow);
 	if (err)
 		return err;
 
-	err = komeda_build_layer_data_flow(layer, kplane_st, kcrtc_st, &dflow);
+	if (dflow.en_split)
+		err = komeda_build_layer_split_data_flow(layer,
+				kplane_st, kcrtc_st, &dflow);
+	else
+		err = komeda_build_layer_data_flow(layer,
+				kplane_st, kcrtc_st, &dflow);
 
 	return err;
 }
@@ -121,6 +152,8 @@ static void komeda_plane_reset(struct drm_plane *plane)
 		state->base.pixel_blend_mode = DRM_MODE_BLEND_PREMULTI;
 		state->base.alpha = DRM_BLEND_ALPHA_OPAQUE;
 		state->base.zpos = kplane->layer->base.id;
+		state->base.color_encoding = DRM_COLOR_YCBCR_BT601;
+		state->base.color_range = DRM_COLOR_YCBCR_LIMITED_RANGE;
 		plane->state = &state->base;
 		plane->state->plane = plane;
 	}
@@ -129,7 +162,7 @@ static void komeda_plane_reset(struct drm_plane *plane)
 static struct drm_plane_state *
 komeda_plane_atomic_duplicate_state(struct drm_plane *plane)
 {
-	struct komeda_plane_state *new;
+	struct komeda_plane_state *new, *old;
 
 	if (WARN_ON(!plane->state))
 		return NULL;
@@ -140,6 +173,10 @@ komeda_plane_atomic_duplicate_state(struct drm_plane *plane)
 
 	__drm_atomic_helper_plane_duplicate_state(plane, &new->base);
 
+	old = to_kplane_st(plane->state);
+
+	new->img_enhancement = old->img_enhancement;
+
 	return &new->base;
 }
 
@@ -151,6 +188,56 @@ komeda_plane_atomic_destroy_state(struct drm_plane *plane,
 	kfree(to_kplane_st(state));
 }
 
+static int
+komeda_plane_atomic_get_property(struct drm_plane *plane,
+				 const struct drm_plane_state *state,
+				 struct drm_property *property,
+				 uint64_t *val)
+{
+	struct komeda_plane *kplane = to_kplane(plane);
+	struct komeda_plane_state *st = to_kplane_st(state);
+
+	if (property == kplane->prop_img_enhancement)
+		*val = st->img_enhancement;
+	else if (property == kplane->prop_layer_split)
+		*val = st->layer_split;
+	else
+		return -EINVAL;
+
+	return 0;
+}
+
+static int
+komeda_plane_atomic_set_property(struct drm_plane *plane,
+				 struct drm_plane_state *state,
+				 struct drm_property *property,
+				 uint64_t val)
+{
+	struct komeda_plane *kplane = to_kplane(plane);
+	struct komeda_plane_state *st = to_kplane_st(state);
+
+	if (property == kplane->prop_img_enhancement)
+		st->img_enhancement = !!val;
+	else if (property == kplane->prop_layer_split)
+		st->layer_split = !!val;
+	else
+		return -EINVAL;
+
+	return 0;
+}
+
+static bool
+komeda_plane_format_mod_supported(struct drm_plane *plane,
+				  u32 format, u64 modifier)
+{
+	struct komeda_dev *mdev = plane->dev->dev_private;
+	struct komeda_plane *kplane = to_kplane(plane);
+	u32 layer_type = kplane->layer->layer_type;
+
+	return komeda_format_mod_supported(&mdev->fmt_tbl, layer_type,
+					   format, modifier, 0);
+}
+
 static const struct drm_plane_funcs komeda_plane_funcs = {
 	.update_plane		= drm_atomic_helper_update_plane,
 	.disable_plane		= drm_atomic_helper_disable_plane,
@@ -158,8 +245,43 @@ static const struct drm_plane_funcs komeda_plane_funcs = {
 	.reset			= komeda_plane_reset,
 	.atomic_duplicate_state	= komeda_plane_atomic_duplicate_state,
 	.atomic_destroy_state	= komeda_plane_atomic_destroy_state,
+	.atomic_get_property	= komeda_plane_atomic_get_property,
+	.atomic_set_property	= komeda_plane_atomic_set_property,
+	.format_mod_supported	= komeda_plane_format_mod_supported,
 };
 
+static int
+komeda_plane_create_layer_properties(struct komeda_plane *kplane,
+				     struct komeda_layer *layer)
+{
+	struct drm_device *drm = kplane->base.dev;
+	struct drm_plane *plane = &kplane->base;
+	struct drm_property *prop = NULL;
+
+	/* property: layer image_enhancement */
+	if (layer->base.supported_outputs & KOMEDA_PIPELINE_SCALERS) {
+		prop = drm_property_create_bool(drm, DRM_MODE_PROP_ATOMIC,
+						"img_enhancement");
+		if (!prop)
+			return -ENOMEM;
+
+		drm_object_attach_property(&plane->base, prop, 0);
+		kplane->prop_img_enhancement = prop;
+	}
+
+	/* property: layer split */
+	if (layer->right) {
+		prop = drm_property_create_bool(drm, DRM_MODE_PROP_ATOMIC,
+						"layer_split");
+		if (!prop)
+			return -ENOMEM;
+		kplane->prop_layer_split = prop;
+		drm_object_attach_property(&plane->base, prop, 0);
+	}
+
+	return 0;
+}
+
 /* for komeda, which is pipeline can be share between crtcs */
 static u32 get_possible_crtcs(struct komeda_kms_dev *kms,
 			      struct komeda_pipeline *pipe)
@@ -178,6 +300,22 @@ static u32 get_possible_crtcs(struct komeda_kms_dev *kms,
 	return possible_crtcs;
 }
 
+static void
+komeda_set_crtc_plane_mask(struct komeda_kms_dev *kms,
+			   struct komeda_pipeline *pipe,
+			   struct drm_plane *plane)
+{
+	struct komeda_crtc *kcrtc;
+	int i;
+
+	for (i = 0; i < kms->n_crtcs; i++) {
+		kcrtc = &kms->crtcs[i];
+
+		if (pipe == kcrtc->slave)
+			kcrtc->slave_planes |= BIT(drm_plane_index(plane));
+	}
+}
+
 /* use Layer0 as primary */
 static u32 get_plane_type(struct komeda_kms_dev *kms,
 			  struct komeda_component *c)
@@ -210,7 +348,7 @@ static int komeda_plane_add(struct komeda_kms_dev *kms,
 	err = drm_universal_plane_init(&kms->base, plane,
 			get_possible_crtcs(kms, c->pipeline),
 			&komeda_plane_funcs,
-			formats, n_formats, NULL,
+			formats, n_formats, komeda_supported_modifiers,
 			get_plane_type(kms, c),
 			"%s", c->name);
 
@@ -221,6 +359,43 @@ static int komeda_plane_add(struct komeda_kms_dev *kms,
 
 	drm_plane_helper_add(plane, &komeda_plane_helper_funcs);
 
+	err = drm_plane_create_rotation_property(plane, DRM_MODE_ROTATE_0,
+						 layer->supported_rots);
+	if (err)
+		goto cleanup;
+
+	err = drm_plane_create_alpha_property(plane);
+	if (err)
+		goto cleanup;
+
+	err = drm_plane_create_blend_mode_property(plane,
+			BIT(DRM_MODE_BLEND_PIXEL_NONE) |
+			BIT(DRM_MODE_BLEND_PREMULTI)   |
+			BIT(DRM_MODE_BLEND_COVERAGE));
+	if (err)
+		goto cleanup;
+
+	err = komeda_plane_create_layer_properties(kplane, layer);
+	if (err)
+		goto cleanup;
+
+	err = drm_plane_create_color_properties(plane,
+			BIT(DRM_COLOR_YCBCR_BT601) |
+			BIT(DRM_COLOR_YCBCR_BT709) |
+			BIT(DRM_COLOR_YCBCR_BT2020),
+			BIT(DRM_COLOR_YCBCR_LIMITED_RANGE) |
+			BIT(DRM_COLOR_YCBCR_FULL_RANGE),
+			DRM_COLOR_YCBCR_BT601,
+			DRM_COLOR_YCBCR_LIMITED_RANGE);
+	if (err)
+		goto cleanup;
+
+	err = drm_plane_create_zpos_property(plane, layer->base.id, 0, 8);
+	if (err)
+		goto cleanup;
+
+	komeda_set_crtc_plane_mask(kms, c->pipeline, plane);
+
 	return 0;
 cleanup:
 	komeda_plane_destroy(plane);
diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_private_obj.c b/drivers/gpu/drm/arm/display/komeda/komeda_private_obj.c
index a54878cbd6e4..914400c4af73 100644
--- a/drivers/gpu/drm/arm/display/komeda/komeda_private_obj.c
+++ b/drivers/gpu/drm/arm/display/komeda/komeda_private_obj.c
@@ -61,6 +61,49 @@ static int komeda_layer_obj_add(struct komeda_kms_dev *kms,
 }
 
 static struct drm_private_state *
+komeda_scaler_atomic_duplicate_state(struct drm_private_obj *obj)
+{
+	struct komeda_scaler_state *st;
+
+	st = kmemdup(obj->state, sizeof(*st), GFP_KERNEL);
+	if (!st)
+		return NULL;
+
+	komeda_component_state_reset(&st->base);
+	__drm_atomic_helper_private_obj_duplicate_state(obj, &st->base.obj);
+
+	return &st->base.obj;
+}
+
+static void
+komeda_scaler_atomic_destroy_state(struct drm_private_obj *obj,
+				   struct drm_private_state *state)
+{
+	kfree(to_scaler_st(priv_to_comp_st(state)));
+}
+
+static const struct drm_private_state_funcs komeda_scaler_obj_funcs = {
+	.atomic_duplicate_state	= komeda_scaler_atomic_duplicate_state,
+	.atomic_destroy_state	= komeda_scaler_atomic_destroy_state,
+};
+
+static int komeda_scaler_obj_add(struct komeda_kms_dev *kms,
+				 struct komeda_scaler *scaler)
+{
+	struct komeda_scaler_state *st;
+
+	st = kzalloc(sizeof(*st), GFP_KERNEL);
+	if (!st)
+		return -ENOMEM;
+
+	st->base.component = &scaler->base;
+	drm_atomic_private_obj_init(&kms->base,
+				    &scaler->base.obj, &st->base.obj,
+				    &komeda_scaler_obj_funcs);
+	return 0;
+}
+
+static struct drm_private_state *
 komeda_compiz_atomic_duplicate_state(struct drm_private_obj *obj)
 {
 	struct komeda_compiz_state *st;
@@ -104,6 +147,93 @@ static int komeda_compiz_obj_add(struct komeda_kms_dev *kms,
 }
 
 static struct drm_private_state *
+komeda_splitter_atomic_duplicate_state(struct drm_private_obj *obj)
+{
+	struct komeda_splitter_state *st;
+
+	st = kmemdup(obj->state, sizeof(*st), GFP_KERNEL);
+	if (!st)
+		return NULL;
+
+	komeda_component_state_reset(&st->base);
+	__drm_atomic_helper_private_obj_duplicate_state(obj, &st->base.obj);
+
+	return &st->base.obj;
+}
+
+static void
+komeda_splitter_atomic_destroy_state(struct drm_private_obj *obj,
+				     struct drm_private_state *state)
+{
+	kfree(to_splitter_st(priv_to_comp_st(state)));
+}
+
+static const struct drm_private_state_funcs komeda_splitter_obj_funcs = {
+	.atomic_duplicate_state	= komeda_splitter_atomic_duplicate_state,
+	.atomic_destroy_state	= komeda_splitter_atomic_destroy_state,
+};
+
+static int komeda_splitter_obj_add(struct komeda_kms_dev *kms,
+				   struct komeda_splitter *splitter)
+{
+	struct komeda_splitter_state *st;
+
+	st = kzalloc(sizeof(*st), GFP_KERNEL);
+	if (!st)
+		return -ENOMEM;
+
+	st->base.component = &splitter->base;
+	drm_atomic_private_obj_init(&kms->base,
+				    &splitter->base.obj, &st->base.obj,
+				    &komeda_splitter_obj_funcs);
+
+	return 0;
+}
+
+static struct drm_private_state *
+komeda_merger_atomic_duplicate_state(struct drm_private_obj *obj)
+{
+	struct komeda_merger_state *st;
+
+	st = kmemdup(obj->state, sizeof(*st), GFP_KERNEL);
+	if (!st)
+		return NULL;
+
+	komeda_component_state_reset(&st->base);
+	__drm_atomic_helper_private_obj_duplicate_state(obj, &st->base.obj);
+
+	return &st->base.obj;
+}
+
+static void komeda_merger_atomic_destroy_state(struct drm_private_obj *obj,
+					       struct drm_private_state *state)
+{
+	kfree(to_merger_st(priv_to_comp_st(state)));
+}
+
+static const struct drm_private_state_funcs komeda_merger_obj_funcs = {
+	.atomic_duplicate_state	= komeda_merger_atomic_duplicate_state,
+	.atomic_destroy_state	= komeda_merger_atomic_destroy_state,
+};
+
+static int komeda_merger_obj_add(struct komeda_kms_dev *kms,
+				 struct komeda_merger *merger)
+{
+	struct komeda_merger_state *st;
+
+	st = kzalloc(sizeof(*st), GFP_KERNEL);
+	if (!st)
+		return -ENOMEM;
+
+	st->base.component = &merger->base;
+	drm_atomic_private_obj_init(&kms->base,
+				    &merger->base.obj, &st->base.obj,
+				    &komeda_merger_obj_funcs);
+
+	return 0;
+}
+
+static struct drm_private_state *
 komeda_improc_atomic_duplicate_state(struct drm_private_obj *obj)
 {
 	struct komeda_improc_state *st;
@@ -252,10 +382,34 @@ int komeda_kms_add_private_objs(struct komeda_kms_dev *kms,
 				return err;
 		}
 
+		if (pipe->wb_layer) {
+			err = komeda_layer_obj_add(kms, pipe->wb_layer);
+			if (err)
+				return err;
+		}
+
+		for (j = 0; j < pipe->n_scalers; j++) {
+			err = komeda_scaler_obj_add(kms, pipe->scalers[j]);
+			if (err)
+				return err;
+		}
+
 		err = komeda_compiz_obj_add(kms, pipe->compiz);
 		if (err)
 			return err;
 
+		if (pipe->splitter) {
+			err = komeda_splitter_obj_add(kms, pipe->splitter);
+			if (err)
+				return err;
+		}
+
+		if (pipe->merger) {
+			err = komeda_merger_obj_add(kms, pipe->merger);
+			if (err)
+				return err;
+		}
+
 		err = komeda_improc_obj_add(kms, pipe->improc);
 		if (err)
 			return err;
diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_wb_connector.c b/drivers/gpu/drm/arm/display/komeda/komeda_wb_connector.c
new file mode 100644
index 000000000000..bb8a61f6e9a4
--- /dev/null
+++ b/drivers/gpu/drm/arm/display/komeda/komeda_wb_connector.c
@@ -0,0 +1,199 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * (C) COPYRIGHT 2018 ARM Limited. All rights reserved.
+ * Author: James.Qian.Wang <james.qian.wang@arm.com>
+ *
+ */
+#include "komeda_dev.h"
+#include "komeda_kms.h"
+
+static int
+komeda_wb_init_data_flow(struct komeda_layer *wb_layer,
+			 struct drm_connector_state *conn_st,
+			 struct komeda_crtc_state *kcrtc_st,
+			 struct komeda_data_flow_cfg *dflow)
+{
+	struct komeda_scaler *scaler = wb_layer->base.pipeline->scalers[0];
+	struct drm_framebuffer *fb = conn_st->writeback_job->fb;
+
+	memset(dflow, 0, sizeof(*dflow));
+
+	dflow->out_w = fb->width;
+	dflow->out_h = fb->height;
+
+	/* the write back data comes from the compiz */
+	pipeline_composition_size(kcrtc_st, &dflow->in_w, &dflow->in_h);
+	dflow->input.component = &wb_layer->base.pipeline->compiz->base;
+	/* compiz doesn't output alpha */
+	dflow->pixel_blend_mode = DRM_MODE_BLEND_PIXEL_NONE;
+	dflow->rot = DRM_MODE_ROTATE_0;
+
+	komeda_complete_data_flow_cfg(dflow, fb);
+
+	/* if scaling exceed the acceptable scaler input/output range, try to
+	 * enable split.
+	 */
+	if (dflow->en_scaling && scaler)
+		dflow->en_split = !in_range(&scaler->hsize, dflow->in_w) ||
+				  !in_range(&scaler->hsize, dflow->out_w);
+
+	return 0;
+}
+
+static int
+komeda_wb_encoder_atomic_check(struct drm_encoder *encoder,
+			       struct drm_crtc_state *crtc_st,
+			       struct drm_connector_state *conn_st)
+{
+	struct komeda_crtc_state *kcrtc_st = to_kcrtc_st(crtc_st);
+	struct drm_writeback_job *writeback_job = conn_st->writeback_job;
+	struct komeda_layer *wb_layer;
+	struct komeda_data_flow_cfg dflow;
+	int err;
+
+	if (!writeback_job || !writeback_job->fb) {
+		return 0;
+	}
+
+	if (!crtc_st->active) {
+		DRM_DEBUG_ATOMIC("Cannot write the composition result out on a inactive CRTC.\n");
+		return -EINVAL;
+	}
+
+	wb_layer = to_kconn(to_wb_conn(conn_st->connector))->wb_layer;
+
+	/*
+	 * No need for a full modested when the only connector changed is the
+	 * writeback connector.
+	 */
+	if (crtc_st->connectors_changed &&
+	    is_only_changed_connector(crtc_st, conn_st->connector))
+		crtc_st->connectors_changed = false;
+
+	err = komeda_wb_init_data_flow(wb_layer, conn_st, kcrtc_st, &dflow);
+	if (err)
+		return err;
+
+	if (dflow.en_split)
+		err = komeda_build_wb_split_data_flow(wb_layer,
+				conn_st, kcrtc_st, &dflow);
+	else
+		err = komeda_build_wb_data_flow(wb_layer,
+				conn_st, kcrtc_st, &dflow);
+
+	return err;
+}
+
+static const struct drm_encoder_helper_funcs komeda_wb_encoder_helper_funcs = {
+	.atomic_check = komeda_wb_encoder_atomic_check,
+};
+
+static int
+komeda_wb_connector_get_modes(struct drm_connector *connector)
+{
+	return 0;
+}
+
+static enum drm_mode_status
+komeda_wb_connector_mode_valid(struct drm_connector *connector,
+			       struct drm_display_mode *mode)
+{
+	struct drm_device *dev = connector->dev;
+	struct drm_mode_config *mode_config = &dev->mode_config;
+	int w = mode->hdisplay, h = mode->vdisplay;
+
+	if ((w < mode_config->min_width) || (w > mode_config->max_width))
+		return MODE_BAD_HVALUE;
+
+	if ((h < mode_config->min_height) || (h > mode_config->max_height))
+		return MODE_BAD_VVALUE;
+
+	return MODE_OK;
+}
+
+static const struct drm_connector_helper_funcs komeda_wb_conn_helper_funcs = {
+	.get_modes	= komeda_wb_connector_get_modes,
+	.mode_valid	= komeda_wb_connector_mode_valid,
+};
+
+static enum drm_connector_status
+komeda_wb_connector_detect(struct drm_connector *connector, bool force)
+{
+	return connector_status_connected;
+}
+
+static int
+komeda_wb_connector_fill_modes(struct drm_connector *connector,
+			       uint32_t maxX, uint32_t maxY)
+{
+	return 0;
+}
+
+static void komeda_wb_connector_destroy(struct drm_connector *connector)
+{
+	drm_connector_cleanup(connector);
+	kfree(to_kconn(to_wb_conn(connector)));
+}
+
+static const struct drm_connector_funcs komeda_wb_connector_funcs = {
+	.reset			= drm_atomic_helper_connector_reset,
+	.detect			= komeda_wb_connector_detect,
+	.fill_modes		= komeda_wb_connector_fill_modes,
+	.destroy		= komeda_wb_connector_destroy,
+	.atomic_duplicate_state	= drm_atomic_helper_connector_duplicate_state,
+	.atomic_destroy_state	= drm_atomic_helper_connector_destroy_state,
+};
+
+static int komeda_wb_connector_add(struct komeda_kms_dev *kms,
+				   struct komeda_crtc *kcrtc)
+{
+	struct komeda_dev *mdev = kms->base.dev_private;
+	struct komeda_wb_connector *kwb_conn;
+	struct drm_writeback_connector *wb_conn;
+	u32 *formats, n_formats = 0;
+	int err;
+
+	if (!kcrtc->master->wb_layer)
+		return 0;
+
+	kwb_conn = kzalloc(sizeof(*wb_conn), GFP_KERNEL);
+	if (!kwb_conn)
+		return -ENOMEM;
+
+	kwb_conn->wb_layer = kcrtc->master->wb_layer;
+
+	wb_conn = &kwb_conn->base;
+	wb_conn->encoder.possible_crtcs = BIT(drm_crtc_index(&kcrtc->base));
+
+	formats = komeda_get_layer_fourcc_list(&mdev->fmt_tbl,
+					       kwb_conn->wb_layer->layer_type,
+					       &n_formats);
+
+	err = drm_writeback_connector_init(&kms->base, wb_conn,
+					   &komeda_wb_connector_funcs,
+					   &komeda_wb_encoder_helper_funcs,
+					   formats, n_formats);
+	komeda_put_fourcc_list(formats);
+	if (err)
+		return err;
+
+	drm_connector_helper_add(&wb_conn->base, &komeda_wb_conn_helper_funcs);
+
+	kcrtc->wb_conn = kwb_conn;
+
+	return 0;
+}
+
+int komeda_kms_add_wb_connectors(struct komeda_kms_dev *kms,
+				 struct komeda_dev *mdev)
+{
+	int i, err;
+
+	for (i = 0; i < kms->n_crtcs; i++) {
+		err = komeda_wb_connector_add(kms, &kms->crtcs[i]);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/arm/malidp_crtc.c b/drivers/gpu/drm/arm/malidp_crtc.c
index eb33a811fd4a..db4451260fff 100644
--- a/drivers/gpu/drm/arm/malidp_crtc.c
+++ b/drivers/gpu/drm/arm/malidp_crtc.c
@@ -459,23 +459,6 @@ static struct drm_crtc_state *malidp_crtc_duplicate_state(struct drm_crtc *crtc)
 	return &state->base;
 }
 
-static void malidp_crtc_reset(struct drm_crtc *crtc)
-{
-	struct malidp_crtc_state *state = NULL;
-
-	if (crtc->state) {
-		state = to_malidp_crtc_state(crtc->state);
-		__drm_atomic_helper_crtc_destroy_state(crtc->state);
-	}
-
-	kfree(state);
-	state = kzalloc(sizeof(*state), GFP_KERNEL);
-	if (state) {
-		crtc->state = &state->base;
-		crtc->state->crtc = crtc;
-	}
-}
-
 static void malidp_crtc_destroy_state(struct drm_crtc *crtc,
 				      struct drm_crtc_state *state)
 {
@@ -489,6 +472,17 @@ static void malidp_crtc_destroy_state(struct drm_crtc *crtc,
 	kfree(mali_state);
 }
 
+static void malidp_crtc_reset(struct drm_crtc *crtc)
+{
+	struct malidp_crtc_state *state =
+		kzalloc(sizeof(*state), GFP_KERNEL);
+
+	if (crtc->state)
+		malidp_crtc_destroy_state(crtc, crtc->state);
+
+	__drm_atomic_helper_crtc_reset(crtc, &state->base);
+}
+
 static int malidp_crtc_enable_vblank(struct drm_crtc *crtc)
 {
 	struct malidp_drm *malidp = crtc_to_malidp_device(crtc);
diff --git a/drivers/gpu/drm/arm/malidp_drv.c b/drivers/gpu/drm/arm/malidp_drv.c
index af1992f06a1d..f25ec4382277 100644
--- a/drivers/gpu/drm/arm/malidp_drv.c
+++ b/drivers/gpu/drm/arm/malidp_drv.c
@@ -549,19 +549,12 @@ static const struct file_operations malidp_debugfs_fops = {
 static int malidp_debugfs_init(struct drm_minor *minor)
 {
 	struct malidp_drm *malidp = minor->dev->dev_private;
-	struct dentry *dentry = NULL;
 
 	malidp_error_stats_init(&malidp->de_errors);
 	malidp_error_stats_init(&malidp->se_errors);
 	spin_lock_init(&malidp->errors_lock);
-	dentry = debugfs_create_file("debug",
-				     S_IRUGO | S_IWUSR,
-				     minor->debugfs_root, minor->dev,
-				     &malidp_debugfs_fops);
-	if (!dentry) {
-		DRM_ERROR("Cannot create debug file\n");
-		return -ENOMEM;
-	}
+	debugfs_create_file("debug", S_IRUGO | S_IWUSR, minor->debugfs_root,
+			    minor->dev, &malidp_debugfs_fops);
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/arm/malidp_hw.c b/drivers/gpu/drm/arm/malidp_hw.c
index 5aa7b81ba4c9..50af399d7f6f 100644
--- a/drivers/gpu/drm/arm/malidp_hw.c
+++ b/drivers/gpu/drm/arm/malidp_hw.c
@@ -378,7 +378,8 @@ static void malidp500_modeset(struct malidp_hw_device *hwdev, struct videomode *
 
 int malidp_format_get_bpp(u32 fmt)
 {
-	int bpp = drm_format_plane_cpp(fmt, 0) * 8;
+	const struct drm_format_info *info = drm_format_info(fmt);
+	int bpp = info->cpp[0] * 8;
 
 	if (bpp == 0) {
 		switch (fmt) {
diff --git a/drivers/gpu/drm/arm/malidp_mw.c b/drivers/gpu/drm/arm/malidp_mw.c
index 5f102bdaf841..2e812525025d 100644
--- a/drivers/gpu/drm/arm/malidp_mw.c
+++ b/drivers/gpu/drm/arm/malidp_mw.c
@@ -158,7 +158,7 @@ malidp_mw_encoder_atomic_check(struct drm_encoder *encoder,
 		return -EINVAL;
 	}
 
-	n_planes = drm_format_num_planes(fb->format->format);
+	n_planes = fb->format->num_planes;
 	for (i = 0; i < n_planes; i++) {
 		struct drm_gem_cma_object *obj = drm_fb_cma_get_gem_obj(fb, i);
 		/* memory write buffers are never rotated */
diff --git a/drivers/gpu/drm/arm/malidp_planes.c b/drivers/gpu/drm/arm/malidp_planes.c
index 062e88e238dd..488375bd133d 100644
--- a/drivers/gpu/drm/arm/malidp_planes.c
+++ b/drivers/gpu/drm/arm/malidp_planes.c
@@ -223,14 +223,13 @@ bool malidp_format_mod_supported(struct drm_device *drm,
 
 	if (modifier & AFBC_SPLIT) {
 		if (!info->is_yuv) {
-			if (drm_format_plane_cpp(format, 0) <= 2) {
+			if (info->cpp[0] <= 2) {
 				DRM_DEBUG_KMS("RGB formats <= 16bpp are not supported with SPLIT\n");
 				return false;
 			}
 		}
 
-		if ((drm_format_horz_chroma_subsampling(format) != 1) ||
-		    (drm_format_vert_chroma_subsampling(format) != 1)) {
+		if ((info->hsub != 1) || (info->vsub != 1)) {
 			if (!(format == DRM_FORMAT_YUV420_10BIT &&
 			      (map->features & MALIDP_DEVICE_AFBC_YUV_420_10_SUPPORT_SPLIT))) {
 				DRM_DEBUG_KMS("Formats which are sub-sampled should never be split\n");
@@ -240,8 +239,7 @@ bool malidp_format_mod_supported(struct drm_device *drm,
 	}
 
 	if (modifier & AFBC_CBR) {
-		if ((drm_format_horz_chroma_subsampling(format) == 1) ||
-		    (drm_format_vert_chroma_subsampling(format) == 1)) {
+		if ((info->hsub == 1) || (info->vsub == 1)) {
 			DRM_DEBUG_KMS("Formats which are not sub-sampled should not have CBR set\n");
 			return false;
 		}
diff --git a/drivers/gpu/drm/armada/armada_fb.c b/drivers/gpu/drm/armada/armada_fb.c
index 058ac7d9920f..a2f6472eb482 100644
--- a/drivers/gpu/drm/armada/armada_fb.c
+++ b/drivers/gpu/drm/armada/armada_fb.c
@@ -87,6 +87,7 @@ struct armada_framebuffer *armada_framebuffer_create(struct drm_device *dev,
 struct drm_framebuffer *armada_fb_create(struct drm_device *dev,
 	struct drm_file *dfile, const struct drm_mode_fb_cmd2 *mode)
 {
+	const struct drm_format_info *info = drm_get_format_info(dev, mode);
 	struct armada_gem_object *obj;
 	struct armada_framebuffer *dfb;
 	int ret;
@@ -97,7 +98,7 @@ struct drm_framebuffer *armada_fb_create(struct drm_device *dev,
 		mode->pitches[2]);
 
 	/* We can only handle a single plane at the moment */
-	if (drm_format_num_planes(mode->pixel_format) > 1 &&
+	if (info->num_planes > 1 &&
 	    (mode->handles[0] != mode->handles[1] ||
 	     mode->handles[0] != mode->handles[2])) {
 		ret = -EINVAL;
diff --git a/drivers/gpu/drm/ast/Kconfig b/drivers/gpu/drm/ast/Kconfig
index ac47ecfe7801..829620d5326c 100644
--- a/drivers/gpu/drm/ast/Kconfig
+++ b/drivers/gpu/drm/ast/Kconfig
@@ -2,9 +2,8 @@
 config DRM_AST
 	tristate "AST server chips"
 	depends on DRM && PCI && MMU
-	select DRM_TTM
 	select DRM_KMS_HELPER
-	select DRM_TTM
+	select DRM_VRAM_HELPER
 	help
 	 Say yes for experimental AST GPU driver. Do not enable
 	 this driver without having a working -modesetting,
diff --git a/drivers/gpu/drm/ast/ast_drv.c b/drivers/gpu/drm/ast/ast_drv.c
index 3871b39d4dea..3811997e78c4 100644
--- a/drivers/gpu/drm/ast/ast_drv.c
+++ b/drivers/gpu/drm/ast/ast_drv.c
@@ -205,13 +205,7 @@ static struct pci_driver ast_pci_driver = {
 
 static const struct file_operations ast_fops = {
 	.owner = THIS_MODULE,
-	.open = drm_open,
-	.release = drm_release,
-	.unlocked_ioctl = drm_ioctl,
-	.mmap = ast_mmap,
-	.poll = drm_poll,
-	.compat_ioctl = drm_compat_ioctl,
-	.read = drm_read,
+	DRM_VRAM_MM_FILE_OPERATIONS
 };
 
 static struct drm_driver driver = {
@@ -228,10 +222,7 @@ static struct drm_driver driver = {
 	.minor = DRIVER_MINOR,
 	.patchlevel = DRIVER_PATCHLEVEL,
 
-	.gem_free_object_unlocked = ast_gem_free_object,
-	.dumb_create = ast_dumb_create,
-	.dumb_map_offset = ast_dumb_mmap_offset,
-
+	DRM_GEM_VRAM_DRIVER
 };
 
 static int __init ast_init(void)
diff --git a/drivers/gpu/drm/ast/ast_drv.h b/drivers/gpu/drm/ast/ast_drv.h
index 1cf0c75e411d..684e15e64a62 100644
--- a/drivers/gpu/drm/ast/ast_drv.h
+++ b/drivers/gpu/drm/ast/ast_drv.h
@@ -31,13 +31,10 @@
 #include <drm/drm_encoder.h>
 #include <drm/drm_fb_helper.h>
 
-#include <drm/ttm/ttm_bo_api.h>
-#include <drm/ttm/ttm_bo_driver.h>
-#include <drm/ttm/ttm_placement.h>
-#include <drm/ttm/ttm_memory.h>
-#include <drm/ttm/ttm_module.h>
-
 #include <drm/drm_gem.h>
+#include <drm/drm_gem_vram_helper.h>
+
+#include <drm/drm_vram_mm_helper.h>
 
 #include <linux/i2c.h>
 #include <linux/i2c-algo-bit.h>
@@ -103,15 +100,7 @@ struct ast_private {
 
 	int fb_mtrr;
 
-	struct {
-		struct ttm_bo_device bdev;
-	} ttm;
-
 	struct drm_gem_object *cursor_cache;
-	uint64_t cursor_cache_gpu_addr;
-	/* Acces to this cache is protected by the crtc->mutex of the only crtc
-	 * we have. */
-	struct ttm_bo_kmap_obj cache_kmap;
 	int next_cursor;
 	bool support_wide_screen;
 	enum {
@@ -243,9 +232,6 @@ struct ast_connector {
 
 struct ast_crtc {
 	struct drm_crtc base;
-	struct drm_gem_object *cursor_bo;
-	uint64_t cursor_addr;
-	int cursor_width, cursor_height;
 	u8 offset_x, offset_y;
 };
 
@@ -263,7 +249,6 @@ struct ast_fbdev {
 	struct ast_framebuffer afb;
 	void *sysram;
 	int size;
-	struct ttm_bo_kmap_obj mapping;
 	int x1, y1, x2, y2; /* dirty rect */
 	spinlock_t dirty_lock;
 };
@@ -321,73 +306,16 @@ void ast_fbdev_fini(struct drm_device *dev);
 void ast_fbdev_set_suspend(struct drm_device *dev, int state);
 void ast_fbdev_set_base(struct ast_private *ast, unsigned long gpu_addr);
 
-struct ast_bo {
-	struct ttm_buffer_object bo;
-	struct ttm_placement placement;
-	struct ttm_bo_kmap_obj kmap;
-	struct drm_gem_object gem;
-	struct ttm_place placements[3];
-	int pin_count;
-};
-#define gem_to_ast_bo(gobj) container_of((gobj), struct ast_bo, gem)
-
-static inline struct ast_bo *
-ast_bo(struct ttm_buffer_object *bo)
-{
-	return container_of(bo, struct ast_bo, bo);
-}
-
-
-#define to_ast_obj(x) container_of(x, struct ast_gem_object, base)
-
 #define AST_MM_ALIGN_SHIFT 4
 #define AST_MM_ALIGN_MASK ((1 << AST_MM_ALIGN_SHIFT) - 1)
 
-extern int ast_dumb_create(struct drm_file *file,
-			   struct drm_device *dev,
-			   struct drm_mode_create_dumb *args);
-
-extern void ast_gem_free_object(struct drm_gem_object *obj);
-extern int ast_dumb_mmap_offset(struct drm_file *file,
-				struct drm_device *dev,
-				uint32_t handle,
-				uint64_t *offset);
-
 int ast_mm_init(struct ast_private *ast);
 void ast_mm_fini(struct ast_private *ast);
 
-int ast_bo_create(struct drm_device *dev, int size, int align,
-		  uint32_t flags, struct ast_bo **pastbo);
-
 int ast_gem_create(struct drm_device *dev,
 		   u32 size, bool iskernel,
 		   struct drm_gem_object **obj);
 
-int ast_bo_pin(struct ast_bo *bo, u32 pl_flag, u64 *gpu_addr);
-int ast_bo_unpin(struct ast_bo *bo);
-
-static inline int ast_bo_reserve(struct ast_bo *bo, bool no_wait)
-{
-	int ret;
-
-	ret = ttm_bo_reserve(&bo->bo, true, no_wait, NULL);
-	if (ret) {
-		if (ret != -ERESTARTSYS && ret != -EBUSY)
-			DRM_ERROR("reserve failed %p\n", bo);
-		return ret;
-	}
-	return 0;
-}
-
-static inline void ast_bo_unreserve(struct ast_bo *bo)
-{
-	ttm_bo_unreserve(&bo->bo);
-}
-
-void ast_ttm_placement(struct ast_bo *bo, int domain);
-int ast_bo_push_sysram(struct ast_bo *bo);
-int ast_mmap(struct file *filp, struct vm_area_struct *vma);
-
 /* ast post */
 void ast_enable_vga(struct drm_device *dev);
 void ast_enable_mmio(struct drm_device *dev);
diff --git a/drivers/gpu/drm/ast/ast_fb.c b/drivers/gpu/drm/ast/ast_fb.c
index e718d0f60d6b..8200b25dad16 100644
--- a/drivers/gpu/drm/ast/ast_fb.c
+++ b/drivers/gpu/drm/ast/ast_fb.c
@@ -48,30 +48,30 @@ static void ast_dirty_update(struct ast_fbdev *afbdev,
 			     int x, int y, int width, int height)
 {
 	int i;
-	struct drm_gem_object *obj;
-	struct ast_bo *bo;
+	struct drm_gem_vram_object *gbo;
 	int src_offset, dst_offset;
 	int bpp = afbdev->afb.base.format->cpp[0];
-	int ret = -EBUSY;
+	int ret;
+	u8 *dst;
 	bool unmap = false;
 	bool store_for_later = false;
 	int x2, y2;
 	unsigned long flags;
 
-	obj = afbdev->afb.obj;
-	bo = gem_to_ast_bo(obj);
-
-	/*
-	 * try and reserve the BO, if we fail with busy
-	 * then the BO is being moved and we should
-	 * store up the damage until later.
-	 */
-	if (drm_can_sleep())
-		ret = ast_bo_reserve(bo, true);
-	if (ret) {
-		if (ret != -EBUSY)
-			return;
+	gbo = drm_gem_vram_of_gem(afbdev->afb.obj);
 
+	if (drm_can_sleep()) {
+		/* We pin the BO so it won't be moved during the
+		 * update. The actual location, video RAM or system
+		 * memory, is not important.
+		 */
+		ret = drm_gem_vram_pin(gbo, 0);
+		if (ret) {
+			if (ret != -EBUSY)
+				return;
+			store_for_later = true;
+		}
+	} else {
 		store_for_later = true;
 	}
 
@@ -101,25 +101,32 @@ static void ast_dirty_update(struct ast_fbdev *afbdev,
 	afbdev->x2 = afbdev->y2 = 0;
 	spin_unlock_irqrestore(&afbdev->dirty_lock, flags);
 
-	if (!bo->kmap.virtual) {
-		ret = ttm_bo_kmap(&bo->bo, 0, bo->bo.num_pages, &bo->kmap);
-		if (ret) {
+	dst = drm_gem_vram_kmap(gbo, false, NULL);
+	if (IS_ERR(dst)) {
+		DRM_ERROR("failed to kmap fb updates\n");
+		goto out;
+	} else if (!dst) {
+		dst = drm_gem_vram_kmap(gbo, true, NULL);
+		if (IS_ERR(dst)) {
 			DRM_ERROR("failed to kmap fb updates\n");
-			ast_bo_unreserve(bo);
-			return;
+			goto out;
 		}
 		unmap = true;
 	}
+
 	for (i = y; i <= y2; i++) {
 		/* assume equal stride for now */
-		src_offset = dst_offset = i * afbdev->afb.base.pitches[0] + (x * bpp);
-		memcpy_toio(bo->kmap.virtual + src_offset, afbdev->sysram + src_offset, (x2 - x + 1) * bpp);
-
+		src_offset = dst_offset =
+			i * afbdev->afb.base.pitches[0] + (x * bpp);
+		memcpy_toio(dst + dst_offset, afbdev->sysram + src_offset,
+			    (x2 - x + 1) * bpp);
 	}
+
 	if (unmap)
-		ttm_bo_kunmap(&bo->kmap);
+		drm_gem_vram_kunmap(gbo);
 
-	ast_bo_unreserve(bo);
+out:
+	drm_gem_vram_unpin(gbo);
 }
 
 static void ast_fillrect(struct fb_info *info,
@@ -159,8 +166,6 @@ static struct fb_ops astfb_ops = {
 	.fb_pan_display = drm_fb_helper_pan_display,
 	.fb_blank = drm_fb_helper_blank,
 	.fb_setcmap = drm_fb_helper_setcmap,
-	.fb_debug_enter = drm_fb_helper_debug_enter,
-	.fb_debug_leave = drm_fb_helper_debug_leave,
 };
 
 static int astfb_create_object(struct ast_fbdev *afbdev,
diff --git a/drivers/gpu/drm/ast/ast_main.c b/drivers/gpu/drm/ast/ast_main.c
index 2854399856ba..4c7e31cb45ff 100644
--- a/drivers/gpu/drm/ast/ast_main.c
+++ b/drivers/gpu/drm/ast/ast_main.c
@@ -593,7 +593,7 @@ int ast_gem_create(struct drm_device *dev,
 		   u32 size, bool iskernel,
 		   struct drm_gem_object **obj)
 {
-	struct ast_bo *astbo;
+	struct drm_gem_vram_object *gbo;
 	int ret;
 
 	*obj = NULL;
@@ -602,80 +602,13 @@ int ast_gem_create(struct drm_device *dev,
 	if (size == 0)
 		return -EINVAL;
 
-	ret = ast_bo_create(dev, size, 0, 0, &astbo);
-	if (ret) {
+	gbo = drm_gem_vram_create(dev, &dev->vram_mm->bdev, size, 0, false);
+	if (IS_ERR(gbo)) {
+		ret = PTR_ERR(gbo);
 		if (ret != -ERESTARTSYS)
 			DRM_ERROR("failed to allocate GEM object\n");
 		return ret;
 	}
-	*obj = &astbo->gem;
+	*obj = &gbo->gem;
 	return 0;
 }
-
-int ast_dumb_create(struct drm_file *file,
-		    struct drm_device *dev,
-		    struct drm_mode_create_dumb *args)
-{
-	int ret;
-	struct drm_gem_object *gobj;
-	u32 handle;
-
-	args->pitch = args->width * ((args->bpp + 7) / 8);
-	args->size = args->pitch * args->height;
-
-	ret = ast_gem_create(dev, args->size, false,
-			     &gobj);
-	if (ret)
-		return ret;
-
-	ret = drm_gem_handle_create(file, gobj, &handle);
-	drm_gem_object_put_unlocked(gobj);
-	if (ret)
-		return ret;
-
-	args->handle = handle;
-	return 0;
-}
-
-static void ast_bo_unref(struct ast_bo **bo)
-{
-	if ((*bo) == NULL)
-		return;
-	ttm_bo_put(&((*bo)->bo));
-	*bo = NULL;
-}
-
-void ast_gem_free_object(struct drm_gem_object *obj)
-{
-	struct ast_bo *ast_bo = gem_to_ast_bo(obj);
-
-	ast_bo_unref(&ast_bo);
-}
-
-
-static inline u64 ast_bo_mmap_offset(struct ast_bo *bo)
-{
-	return drm_vma_node_offset_addr(&bo->bo.vma_node);
-}
-int
-ast_dumb_mmap_offset(struct drm_file *file,
-		     struct drm_device *dev,
-		     uint32_t handle,
-		     uint64_t *offset)
-{
-	struct drm_gem_object *obj;
-	struct ast_bo *bo;
-
-	obj = drm_gem_object_lookup(file, handle);
-	if (obj == NULL)
-		return -ENOENT;
-
-	bo = gem_to_ast_bo(obj);
-	*offset = ast_bo_mmap_offset(bo);
-
-	drm_gem_object_put_unlocked(obj);
-
-	return 0;
-
-}
-
diff --git a/drivers/gpu/drm/ast/ast_mode.c b/drivers/gpu/drm/ast/ast_mode.c
index 97fed0627d1c..ffccbef962a4 100644
--- a/drivers/gpu/drm/ast/ast_mode.c
+++ b/drivers/gpu/drm/ast/ast_mode.c
@@ -521,7 +521,6 @@ static void ast_crtc_dpms(struct drm_crtc *crtc, int mode)
 	}
 }
 
-/* ast is different - we will force move buffers out of VRAM */
 static int ast_crtc_do_set_base(struct drm_crtc *crtc,
 				struct drm_framebuffer *fb,
 				int x, int y, int atomic)
@@ -529,50 +528,54 @@ static int ast_crtc_do_set_base(struct drm_crtc *crtc,
 	struct ast_private *ast = crtc->dev->dev_private;
 	struct drm_gem_object *obj;
 	struct ast_framebuffer *ast_fb;
-	struct ast_bo *bo;
+	struct drm_gem_vram_object *gbo;
 	int ret;
-	u64 gpu_addr;
+	s64 gpu_addr;
+	void *base;
 
-	/* push the previous fb to system ram */
 	if (!atomic && fb) {
 		ast_fb = to_ast_framebuffer(fb);
 		obj = ast_fb->obj;
-		bo = gem_to_ast_bo(obj);
-		ret = ast_bo_reserve(bo, false);
-		if (ret)
-			return ret;
-		ast_bo_push_sysram(bo);
-		ast_bo_unreserve(bo);
+		gbo = drm_gem_vram_of_gem(obj);
+
+		/* unmap if console */
+		if (&ast->fbdev->afb == ast_fb)
+			drm_gem_vram_kunmap(gbo);
+		drm_gem_vram_unpin(gbo);
 	}
 
 	ast_fb = to_ast_framebuffer(crtc->primary->fb);
 	obj = ast_fb->obj;
-	bo = gem_to_ast_bo(obj);
+	gbo = drm_gem_vram_of_gem(obj);
 
-	ret = ast_bo_reserve(bo, false);
+	ret = drm_gem_vram_pin(gbo, DRM_GEM_VRAM_PL_FLAG_VRAM);
 	if (ret)
 		return ret;
-
-	ret = ast_bo_pin(bo, TTM_PL_FLAG_VRAM, &gpu_addr);
-	if (ret) {
-		ast_bo_unreserve(bo);
-		return ret;
+	gpu_addr = drm_gem_vram_offset(gbo);
+	if (gpu_addr < 0) {
+		ret = (int)gpu_addr;
+		goto err_drm_gem_vram_unpin;
 	}
 
 	if (&ast->fbdev->afb == ast_fb) {
 		/* if pushing console in kmap it */
-		ret = ttm_bo_kmap(&bo->bo, 0, bo->bo.num_pages, &bo->kmap);
-		if (ret)
+		base = drm_gem_vram_kmap(gbo, true, NULL);
+		if (IS_ERR(base)) {
+			ret = PTR_ERR(base);
 			DRM_ERROR("failed to kmap fbcon\n");
-		else
+		} else {
 			ast_fbdev_set_base(ast, gpu_addr);
+		}
 	}
-	ast_bo_unreserve(bo);
 
 	ast_set_offset_reg(crtc);
 	ast_set_start_address_crt1(crtc, (u32)gpu_addr);
 
 	return 0;
+
+err_drm_gem_vram_unpin:
+	drm_gem_vram_unpin(gbo);
+	return ret;
 }
 
 static int ast_crtc_mode_set_base(struct drm_crtc *crtc, int x, int y,
@@ -618,21 +621,18 @@ static int ast_crtc_mode_set(struct drm_crtc *crtc,
 
 static void ast_crtc_disable(struct drm_crtc *crtc)
 {
-	int ret;
-
 	DRM_DEBUG_KMS("\n");
 	ast_crtc_dpms(crtc, DRM_MODE_DPMS_OFF);
 	if (crtc->primary->fb) {
+		struct ast_private *ast = crtc->dev->dev_private;
 		struct ast_framebuffer *ast_fb = to_ast_framebuffer(crtc->primary->fb);
 		struct drm_gem_object *obj = ast_fb->obj;
-		struct ast_bo *bo = gem_to_ast_bo(obj);
+		struct drm_gem_vram_object *gbo = drm_gem_vram_of_gem(obj);
 
-		ret = ast_bo_reserve(bo, false);
-		if (ret)
-			return;
-
-		ast_bo_push_sysram(bo);
-		ast_bo_unreserve(bo);
+		/* unmap if console */
+		if (&ast->fbdev->afb == ast_fb)
+			drm_gem_vram_kunmap(gbo);
+		drm_gem_vram_unpin(gbo);
 	}
 	crtc->primary->fb = NULL;
 }
@@ -918,32 +918,34 @@ static int ast_cursor_init(struct drm_device *dev)
 	int size;
 	int ret;
 	struct drm_gem_object *obj;
-	struct ast_bo *bo;
-	uint64_t gpu_addr;
+	struct drm_gem_vram_object *gbo;
+	s64 gpu_addr;
+	void *base;
 
 	size = (AST_HWC_SIZE + AST_HWC_SIGNATURE_SIZE) * AST_DEFAULT_HWC_NUM;
 
 	ret = ast_gem_create(dev, size, true, &obj);
 	if (ret)
 		return ret;
-	bo = gem_to_ast_bo(obj);
-	ret = ast_bo_reserve(bo, false);
-	if (unlikely(ret != 0))
-		goto fail;
-
-	ret = ast_bo_pin(bo, TTM_PL_FLAG_VRAM, &gpu_addr);
-	ast_bo_unreserve(bo);
+	gbo = drm_gem_vram_of_gem(obj);
+	ret = drm_gem_vram_pin(gbo, DRM_GEM_VRAM_PL_FLAG_VRAM);
 	if (ret)
 		goto fail;
+	gpu_addr = drm_gem_vram_offset(gbo);
+	if (gpu_addr < 0) {
+		drm_gem_vram_unpin(gbo);
+		ret = (int)gpu_addr;
+		goto fail;
+	}
 
 	/* kmap the object */
-	ret = ttm_bo_kmap(&bo->bo, 0, bo->bo.num_pages, &ast->cache_kmap);
-	if (ret)
+	base = drm_gem_vram_kmap(gbo, true, NULL);
+	if (IS_ERR(base)) {
+		ret = PTR_ERR(base);
 		goto fail;
+	}
 
 	ast->cursor_cache = obj;
-	ast->cursor_cache_gpu_addr = gpu_addr;
-	DRM_DEBUG_KMS("pinned cursor cache at %llx\n", ast->cursor_cache_gpu_addr);
 	return 0;
 fail:
 	return ret;
@@ -952,7 +954,10 @@ fail:
 static void ast_cursor_fini(struct drm_device *dev)
 {
 	struct ast_private *ast = dev->dev_private;
-	ttm_bo_kunmap(&ast->cache_kmap);
+	struct drm_gem_vram_object *gbo =
+		drm_gem_vram_of_gem(ast->cursor_cache);
+	drm_gem_vram_kunmap(gbo);
+	drm_gem_vram_unpin(gbo);
 	drm_gem_object_put_unlocked(ast->cursor_cache);
 }
 
@@ -1173,13 +1178,13 @@ static int ast_cursor_set(struct drm_crtc *crtc,
 	struct ast_private *ast = crtc->dev->dev_private;
 	struct ast_crtc *ast_crtc = to_ast_crtc(crtc);
 	struct drm_gem_object *obj;
-	struct ast_bo *bo;
-	uint64_t gpu_addr;
+	struct drm_gem_vram_object *gbo;
+	s64 dst_gpu;
+	u64 gpu_addr;
 	u32 csum;
 	int ret;
-	struct ttm_bo_kmap_obj uobj_map;
 	u8 *src, *dst;
-	bool src_isiomem, dst_isiomem;
+
 	if (!handle) {
 		ast_hide_cursor(crtc);
 		return 0;
@@ -1193,21 +1198,28 @@ static int ast_cursor_set(struct drm_crtc *crtc,
 		DRM_ERROR("Cannot find cursor object %x for crtc\n", handle);
 		return -ENOENT;
 	}
-	bo = gem_to_ast_bo(obj);
+	gbo = drm_gem_vram_of_gem(obj);
 
-	ret = ast_bo_reserve(bo, false);
+	ret = drm_gem_vram_pin(gbo, 0);
 	if (ret)
-		goto fail;
-
-	ret = ttm_bo_kmap(&bo->bo, 0, bo->bo.num_pages, &uobj_map);
-
-	src = ttm_kmap_obj_virtual(&uobj_map, &src_isiomem);
-	dst = ttm_kmap_obj_virtual(&ast->cache_kmap, &dst_isiomem);
+		goto err_drm_gem_object_put_unlocked;
+	src = drm_gem_vram_kmap(gbo, true, NULL);
+	if (IS_ERR(src)) {
+		ret = PTR_ERR(src);
+		goto err_drm_gem_vram_unpin;
+	}
 
-	if (src_isiomem == true)
-		DRM_ERROR("src cursor bo should be in main memory\n");
-	if (dst_isiomem == false)
-		DRM_ERROR("dst bo should be in VRAM\n");
+	dst = drm_gem_vram_kmap(drm_gem_vram_of_gem(ast->cursor_cache),
+				false, NULL);
+	if (IS_ERR(dst)) {
+		ret = PTR_ERR(dst);
+		goto err_drm_gem_vram_kunmap;
+	}
+	dst_gpu = drm_gem_vram_offset(drm_gem_vram_of_gem(ast->cursor_cache));
+	if (dst_gpu < 0) {
+		ret = (int)dst_gpu;
+		goto err_drm_gem_vram_kunmap;
+	}
 
 	dst += (AST_HWC_SIZE + AST_HWC_SIGNATURE_SIZE)*ast->next_cursor;
 
@@ -1215,10 +1227,11 @@ static int ast_cursor_set(struct drm_crtc *crtc,
 	csum = copy_cursor_image(src, dst, width, height);
 
 	/* write checksum + signature */
-	ttm_bo_kunmap(&uobj_map);
-	ast_bo_unreserve(bo);
 	{
-		u8 *dst = (u8 *)ast->cache_kmap.virtual + (AST_HWC_SIZE + AST_HWC_SIGNATURE_SIZE)*ast->next_cursor + AST_HWC_SIZE;
+		struct drm_gem_vram_object *dst_gbo =
+			drm_gem_vram_of_gem(ast->cursor_cache);
+		u8 *dst = drm_gem_vram_kmap(dst_gbo, false, NULL);
+		dst += (AST_HWC_SIZE + AST_HWC_SIGNATURE_SIZE)*ast->next_cursor + AST_HWC_SIZE;
 		writel(csum, dst);
 		writel(width, dst + AST_HWC_SIGNATURE_SizeX);
 		writel(height, dst + AST_HWC_SIGNATURE_SizeY);
@@ -1226,15 +1239,13 @@ static int ast_cursor_set(struct drm_crtc *crtc,
 		writel(0, dst + AST_HWC_SIGNATURE_HOTSPOTY);
 
 		/* set pattern offset */
-		gpu_addr = ast->cursor_cache_gpu_addr;
+		gpu_addr = (u64)dst_gpu;
 		gpu_addr += (AST_HWC_SIZE + AST_HWC_SIGNATURE_SIZE)*ast->next_cursor;
 		gpu_addr >>= 3;
 		ast_set_index_reg(ast, AST_IO_CRTC_PORT, 0xc8, gpu_addr & 0xff);
 		ast_set_index_reg(ast, AST_IO_CRTC_PORT, 0xc9, (gpu_addr >> 8) & 0xff);
 		ast_set_index_reg(ast, AST_IO_CRTC_PORT, 0xca, (gpu_addr >> 16) & 0xff);
 	}
-	ast_crtc->cursor_width = width;
-	ast_crtc->cursor_height = height;
 	ast_crtc->offset_x = AST_MAX_HWC_WIDTH - width;
 	ast_crtc->offset_y = AST_MAX_HWC_WIDTH - height;
 
@@ -1242,9 +1253,17 @@ static int ast_cursor_set(struct drm_crtc *crtc,
 
 	ast_show_cursor(crtc);
 
+	drm_gem_vram_kunmap(gbo);
+	drm_gem_vram_unpin(gbo);
 	drm_gem_object_put_unlocked(obj);
+
 	return 0;
-fail:
+
+err_drm_gem_vram_kunmap:
+	drm_gem_vram_kunmap(gbo);
+err_drm_gem_vram_unpin:
+	drm_gem_vram_unpin(gbo);
+err_drm_gem_object_put_unlocked:
 	drm_gem_object_put_unlocked(obj);
 	return ret;
 }
@@ -1257,7 +1276,9 @@ static int ast_cursor_move(struct drm_crtc *crtc,
 	int x_offset, y_offset;
 	u8 *sig;
 
-	sig = (u8 *)ast->cache_kmap.virtual + (AST_HWC_SIZE + AST_HWC_SIGNATURE_SIZE)*ast->next_cursor + AST_HWC_SIZE;
+	sig = drm_gem_vram_kmap(drm_gem_vram_of_gem(ast->cursor_cache),
+				false, NULL);
+	sig += (AST_HWC_SIZE + AST_HWC_SIGNATURE_SIZE)*ast->next_cursor + AST_HWC_SIZE;
 	writel(x, sig + AST_HWC_SIGNATURE_X);
 	writel(y, sig + AST_HWC_SIGNATURE_Y);
 
diff --git a/drivers/gpu/drm/ast/ast_ttm.c b/drivers/gpu/drm/ast/ast_ttm.c
index 75d477b37854..779c53efee8e 100644
--- a/drivers/gpu/drm/ast/ast_ttm.c
+++ b/drivers/gpu/drm/ast/ast_ttm.c
@@ -26,168 +26,21 @@
  * Authors: Dave Airlie <airlied@redhat.com>
  */
 #include <drm/drmP.h>
-#include <drm/ttm/ttm_page_alloc.h>
 
 #include "ast_drv.h"
 
-static inline struct ast_private *
-ast_bdev(struct ttm_bo_device *bd)
-{
-	return container_of(bd, struct ast_private, ttm.bdev);
-}
-
-static void ast_bo_ttm_destroy(struct ttm_buffer_object *tbo)
-{
-	struct ast_bo *bo;
-
-	bo = container_of(tbo, struct ast_bo, bo);
-
-	drm_gem_object_release(&bo->gem);
-	kfree(bo);
-}
-
-static bool ast_ttm_bo_is_ast_bo(struct ttm_buffer_object *bo)
-{
-	if (bo->destroy == &ast_bo_ttm_destroy)
-		return true;
-	return false;
-}
-
-static int
-ast_bo_init_mem_type(struct ttm_bo_device *bdev, uint32_t type,
-		     struct ttm_mem_type_manager *man)
-{
-	switch (type) {
-	case TTM_PL_SYSTEM:
-		man->flags = TTM_MEMTYPE_FLAG_MAPPABLE;
-		man->available_caching = TTM_PL_MASK_CACHING;
-		man->default_caching = TTM_PL_FLAG_CACHED;
-		break;
-	case TTM_PL_VRAM:
-		man->func = &ttm_bo_manager_func;
-		man->flags = TTM_MEMTYPE_FLAG_FIXED |
-			TTM_MEMTYPE_FLAG_MAPPABLE;
-		man->available_caching = TTM_PL_FLAG_UNCACHED |
-			TTM_PL_FLAG_WC;
-		man->default_caching = TTM_PL_FLAG_WC;
-		break;
-	default:
-		DRM_ERROR("Unsupported memory type %u\n", (unsigned)type);
-		return -EINVAL;
-	}
-	return 0;
-}
-
-static void
-ast_bo_evict_flags(struct ttm_buffer_object *bo, struct ttm_placement *pl)
-{
-	struct ast_bo *astbo = ast_bo(bo);
-
-	if (!ast_ttm_bo_is_ast_bo(bo))
-		return;
-
-	ast_ttm_placement(astbo, TTM_PL_FLAG_SYSTEM);
-	*pl = astbo->placement;
-}
-
-static int ast_bo_verify_access(struct ttm_buffer_object *bo, struct file *filp)
-{
-	struct ast_bo *astbo = ast_bo(bo);
-
-	return drm_vma_node_verify_access(&astbo->gem.vma_node,
-					  filp->private_data);
-}
-
-static int ast_ttm_io_mem_reserve(struct ttm_bo_device *bdev,
-				  struct ttm_mem_reg *mem)
-{
-	struct ttm_mem_type_manager *man = &bdev->man[mem->mem_type];
-	struct ast_private *ast = ast_bdev(bdev);
-
-	mem->bus.addr = NULL;
-	mem->bus.offset = 0;
-	mem->bus.size = mem->num_pages << PAGE_SHIFT;
-	mem->bus.base = 0;
-	mem->bus.is_iomem = false;
-	if (!(man->flags & TTM_MEMTYPE_FLAG_MAPPABLE))
-		return -EINVAL;
-	switch (mem->mem_type) {
-	case TTM_PL_SYSTEM:
-		/* system memory */
-		return 0;
-	case TTM_PL_VRAM:
-		mem->bus.offset = mem->start << PAGE_SHIFT;
-		mem->bus.base = pci_resource_start(ast->dev->pdev, 0);
-		mem->bus.is_iomem = true;
-		break;
-	default:
-		return -EINVAL;
-		break;
-	}
-	return 0;
-}
-
-static void ast_ttm_io_mem_free(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem)
-{
-}
-
-static void ast_ttm_backend_destroy(struct ttm_tt *tt)
-{
-	ttm_tt_fini(tt);
-	kfree(tt);
-}
-
-static struct ttm_backend_func ast_tt_backend_func = {
-	.destroy = &ast_ttm_backend_destroy,
-};
-
-
-static struct ttm_tt *ast_ttm_tt_create(struct ttm_buffer_object *bo,
-					uint32_t page_flags)
-{
-	struct ttm_tt *tt;
-
-	tt = kzalloc(sizeof(struct ttm_tt), GFP_KERNEL);
-	if (tt == NULL)
-		return NULL;
-	tt->func = &ast_tt_backend_func;
-	if (ttm_tt_init(tt, bo, page_flags)) {
-		kfree(tt);
-		return NULL;
-	}
-	return tt;
-}
-
-struct ttm_bo_driver ast_bo_driver = {
-	.ttm_tt_create = ast_ttm_tt_create,
-	.init_mem_type = ast_bo_init_mem_type,
-	.eviction_valuable = ttm_bo_eviction_valuable,
-	.evict_flags = ast_bo_evict_flags,
-	.move = NULL,
-	.verify_access = ast_bo_verify_access,
-	.io_mem_reserve = &ast_ttm_io_mem_reserve,
-	.io_mem_free = &ast_ttm_io_mem_free,
-};
-
 int ast_mm_init(struct ast_private *ast)
 {
+	struct drm_vram_mm *vmm;
 	int ret;
 	struct drm_device *dev = ast->dev;
-	struct ttm_bo_device *bdev = &ast->ttm.bdev;
-
-	ret = ttm_bo_device_init(&ast->ttm.bdev,
-				 &ast_bo_driver,
-				 dev->anon_inode->i_mapping,
-				 true);
-	if (ret) {
-		DRM_ERROR("Error initialising bo driver; %d\n", ret);
-		return ret;
-	}
 
-	ret = ttm_bo_init_mm(bdev, TTM_PL_VRAM,
-			     ast->vram_size >> PAGE_SHIFT);
-	if (ret) {
-		DRM_ERROR("Failed ttm VRAM init: %d\n", ret);
+	vmm = drm_vram_helper_alloc_mm(
+		dev, pci_resource_start(dev->pdev, 0),
+		ast->vram_size, &drm_gem_vram_mm_funcs);
+	if (IS_ERR(vmm)) {
+		ret = PTR_ERR(vmm);
+		DRM_ERROR("Error initializing VRAM MM; %d\n", ret);
 		return ret;
 	}
 
@@ -203,148 +56,9 @@ void ast_mm_fini(struct ast_private *ast)
 {
 	struct drm_device *dev = ast->dev;
 
-	ttm_bo_device_release(&ast->ttm.bdev);
+	drm_vram_helper_release_mm(dev);
 
 	arch_phys_wc_del(ast->fb_mtrr);
 	arch_io_free_memtype_wc(pci_resource_start(dev->pdev, 0),
 				pci_resource_len(dev->pdev, 0));
 }
-
-void ast_ttm_placement(struct ast_bo *bo, int domain)
-{
-	u32 c = 0;
-	unsigned i;
-
-	bo->placement.placement = bo->placements;
-	bo->placement.busy_placement = bo->placements;
-	if (domain & TTM_PL_FLAG_VRAM)
-		bo->placements[c++].flags = TTM_PL_FLAG_WC | TTM_PL_FLAG_UNCACHED | TTM_PL_FLAG_VRAM;
-	if (domain & TTM_PL_FLAG_SYSTEM)
-		bo->placements[c++].flags = TTM_PL_FLAG_CACHED | TTM_PL_FLAG_SYSTEM;
-	if (!c)
-		bo->placements[c++].flags = TTM_PL_FLAG_CACHED | TTM_PL_FLAG_SYSTEM;
-	bo->placement.num_placement = c;
-	bo->placement.num_busy_placement = c;
-	for (i = 0; i < c; ++i) {
-		bo->placements[i].fpfn = 0;
-		bo->placements[i].lpfn = 0;
-	}
-}
-
-int ast_bo_create(struct drm_device *dev, int size, int align,
-		  uint32_t flags, struct ast_bo **pastbo)
-{
-	struct ast_private *ast = dev->dev_private;
-	struct ast_bo *astbo;
-	size_t acc_size;
-	int ret;
-
-	astbo = kzalloc(sizeof(struct ast_bo), GFP_KERNEL);
-	if (!astbo)
-		return -ENOMEM;
-
-	ret = drm_gem_object_init(dev, &astbo->gem, size);
-	if (ret)
-		goto error;
-
-	astbo->bo.bdev = &ast->ttm.bdev;
-
-	ast_ttm_placement(astbo, TTM_PL_FLAG_VRAM | TTM_PL_FLAG_SYSTEM);
-
-	acc_size = ttm_bo_dma_acc_size(&ast->ttm.bdev, size,
-				       sizeof(struct ast_bo));
-
-	ret = ttm_bo_init(&ast->ttm.bdev, &astbo->bo, size,
-			  ttm_bo_type_device, &astbo->placement,
-			  align >> PAGE_SHIFT, false, acc_size,
-			  NULL, NULL, ast_bo_ttm_destroy);
-	if (ret)
-		goto error;
-
-	*pastbo = astbo;
-	return 0;
-error:
-	kfree(astbo);
-	return ret;
-}
-
-static inline u64 ast_bo_gpu_offset(struct ast_bo *bo)
-{
-	return bo->bo.offset;
-}
-
-int ast_bo_pin(struct ast_bo *bo, u32 pl_flag, u64 *gpu_addr)
-{
-	struct ttm_operation_ctx ctx = { false, false };
-	int i, ret;
-
-	if (bo->pin_count) {
-		bo->pin_count++;
-		if (gpu_addr)
-			*gpu_addr = ast_bo_gpu_offset(bo);
-	}
-
-	ast_ttm_placement(bo, pl_flag);
-	for (i = 0; i < bo->placement.num_placement; i++)
-		bo->placements[i].flags |= TTM_PL_FLAG_NO_EVICT;
-	ret = ttm_bo_validate(&bo->bo, &bo->placement, &ctx);
-	if (ret)
-		return ret;
-
-	bo->pin_count = 1;
-	if (gpu_addr)
-		*gpu_addr = ast_bo_gpu_offset(bo);
-	return 0;
-}
-
-int ast_bo_unpin(struct ast_bo *bo)
-{
-	struct ttm_operation_ctx ctx = { false, false };
-	int i;
-	if (!bo->pin_count) {
-		DRM_ERROR("unpin bad %p\n", bo);
-		return 0;
-	}
-	bo->pin_count--;
-	if (bo->pin_count)
-		return 0;
-
-	for (i = 0; i < bo->placement.num_placement ; i++)
-		bo->placements[i].flags &= ~TTM_PL_FLAG_NO_EVICT;
-	return ttm_bo_validate(&bo->bo, &bo->placement, &ctx);
-}
-
-int ast_bo_push_sysram(struct ast_bo *bo)
-{
-	struct ttm_operation_ctx ctx = { false, false };
-	int i, ret;
-	if (!bo->pin_count) {
-		DRM_ERROR("unpin bad %p\n", bo);
-		return 0;
-	}
-	bo->pin_count--;
-	if (bo->pin_count)
-		return 0;
-
-	if (bo->kmap.virtual)
-		ttm_bo_kunmap(&bo->kmap);
-
-	ast_ttm_placement(bo, TTM_PL_FLAG_SYSTEM);
-	for (i = 0; i < bo->placement.num_placement ; i++)
-		bo->placements[i].flags |= TTM_PL_FLAG_NO_EVICT;
-
-	ret = ttm_bo_validate(&bo->bo, &bo->placement, &ctx);
-	if (ret) {
-		DRM_ERROR("pushing to VRAM failed\n");
-		return ret;
-	}
-	return 0;
-}
-
-int ast_mmap(struct file *filp, struct vm_area_struct *vma)
-{
-	struct drm_file *file_priv = filp->private_data;
-	struct ast_private *ast = file_priv->minor->dev->dev_private;
-
-	return ttm_bo_mmap(filp, vma, &ast->ttm.bdev);
-}
diff --git a/drivers/gpu/drm/ati_pcigart.c b/drivers/gpu/drm/ati_pcigart.c
index 2362f07fe1fc..2a413e291a60 100644
--- a/drivers/gpu/drm/ati_pcigart.c
+++ b/drivers/gpu/drm/ati_pcigart.c
@@ -32,9 +32,12 @@
  */
 
 #include <linux/export.h>
-#include <drm/drmP.h>
 
 #include <drm/ati_pcigart.h>
+#include <drm/drm_device.h>
+#include <drm/drm_os_linux.h>
+#include <drm/drm_pci.h>
+#include <drm/drm_print.h>
 
 # define ATI_PCIGART_PAGE_SIZE		4096	/**< PCI GART page size */
 
diff --git a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_crtc.c b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_crtc.c
index 8070a558d7b1..81c50772df05 100644
--- a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_crtc.c
+++ b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_crtc.c
@@ -78,7 +78,8 @@ static void atmel_hlcdc_crtc_mode_set_nofb(struct drm_crtc *c)
 	unsigned long mode_rate;
 	struct videomode vm;
 	unsigned long prate;
-	unsigned int cfg;
+	unsigned int mask = ATMEL_HLCDC_CLKDIV_MASK | ATMEL_HLCDC_CLKPOL;
+	unsigned int cfg = 0;
 	int div;
 
 	vm.vfront_porch = adj->crtc_vsync_start - adj->crtc_vdisplay;
@@ -101,7 +102,10 @@ static void atmel_hlcdc_crtc_mode_set_nofb(struct drm_crtc *c)
 		     (adj->crtc_hdisplay - 1) |
 		     ((adj->crtc_vdisplay - 1) << 16));
 
-	cfg = ATMEL_HLCDC_CLKSEL;
+	if (!crtc->dc->desc->fixed_clksrc) {
+		cfg |= ATMEL_HLCDC_CLKSEL;
+		mask |= ATMEL_HLCDC_CLKSEL;
+	}
 
 	prate = 2 * clk_get_rate(crtc->dc->hlcdc->sys_clk);
 	mode_rate = adj->crtc_clock * 1000;
@@ -132,11 +136,10 @@ static void atmel_hlcdc_crtc_mode_set_nofb(struct drm_crtc *c)
 
 	cfg |= ATMEL_HLCDC_CLKDIV(div);
 
-	regmap_update_bits(regmap, ATMEL_HLCDC_CFG(0),
-			   ATMEL_HLCDC_CLKSEL | ATMEL_HLCDC_CLKDIV_MASK |
-			   ATMEL_HLCDC_CLKPOL, cfg);
+	regmap_update_bits(regmap, ATMEL_HLCDC_CFG(0), mask, cfg);
 
-	cfg = 0;
+	state = drm_crtc_state_to_atmel_hlcdc_crtc_state(c->state);
+	cfg = state->output_mode << 8;
 
 	if (adj->flags & DRM_MODE_FLAG_NVSYNC)
 		cfg |= ATMEL_HLCDC_VSPOL;
@@ -144,9 +147,6 @@ static void atmel_hlcdc_crtc_mode_set_nofb(struct drm_crtc *c)
 	if (adj->flags & DRM_MODE_FLAG_NHSYNC)
 		cfg |= ATMEL_HLCDC_HSPOL;
 
-	state = drm_crtc_state_to_atmel_hlcdc_crtc_state(c->state);
-	cfg |= state->output_mode << 8;
-
 	regmap_update_bits(regmap, ATMEL_HLCDC_CFG(5),
 			   ATMEL_HLCDC_HSPOL | ATMEL_HLCDC_VSPOL |
 			   ATMEL_HLCDC_VSPDLYS | ATMEL_HLCDC_VSPDLYE |
diff --git a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.c b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.c
index 0be13eceedba..fb2e7646daeb 100644
--- a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.c
+++ b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.c
@@ -364,6 +364,103 @@ static const struct atmel_hlcdc_dc_desc atmel_hlcdc_dc_sama5d4 = {
 	.nlayers = ARRAY_SIZE(atmel_hlcdc_sama5d4_layers),
 	.layers = atmel_hlcdc_sama5d4_layers,
 };
+
+static const struct atmel_hlcdc_layer_desc atmel_hlcdc_sam9x60_layers[] = {
+	{
+		.name = "base",
+		.formats = &atmel_hlcdc_plane_rgb_formats,
+		.regs_offset = 0x60,
+		.id = 0,
+		.type = ATMEL_HLCDC_BASE_LAYER,
+		.cfgs_offset = 0x2c,
+		.layout = {
+			.xstride = { 2 },
+			.default_color = 3,
+			.general_config = 4,
+			.disc_pos = 5,
+			.disc_size = 6,
+		},
+		.clut_offset = 0x600,
+	},
+	{
+		.name = "overlay1",
+		.formats = &atmel_hlcdc_plane_rgb_formats,
+		.regs_offset = 0x160,
+		.id = 1,
+		.type = ATMEL_HLCDC_OVERLAY_LAYER,
+		.cfgs_offset = 0x2c,
+		.layout = {
+			.pos = 2,
+			.size = 3,
+			.xstride = { 4 },
+			.pstride = { 5 },
+			.default_color = 6,
+			.chroma_key = 7,
+			.chroma_key_mask = 8,
+			.general_config = 9,
+		},
+		.clut_offset = 0xa00,
+	},
+	{
+		.name = "overlay2",
+		.formats = &atmel_hlcdc_plane_rgb_formats,
+		.regs_offset = 0x260,
+		.id = 2,
+		.type = ATMEL_HLCDC_OVERLAY_LAYER,
+		.cfgs_offset = 0x2c,
+		.layout = {
+			.pos = 2,
+			.size = 3,
+			.xstride = { 4 },
+			.pstride = { 5 },
+			.default_color = 6,
+			.chroma_key = 7,
+			.chroma_key_mask = 8,
+			.general_config = 9,
+		},
+		.clut_offset = 0xe00,
+	},
+	{
+		.name = "high-end-overlay",
+		.formats = &atmel_hlcdc_plane_rgb_and_yuv_formats,
+		.regs_offset = 0x360,
+		.id = 3,
+		.type = ATMEL_HLCDC_OVERLAY_LAYER,
+		.cfgs_offset = 0x4c,
+		.layout = {
+			.pos = 2,
+			.size = 3,
+			.memsize = 4,
+			.xstride = { 5, 7 },
+			.pstride = { 6, 8 },
+			.default_color = 9,
+			.chroma_key = 10,
+			.chroma_key_mask = 11,
+			.general_config = 12,
+			.scaler_config = 13,
+			.phicoeffs = {
+				.x = 17,
+				.y = 33,
+			},
+			.csc = 14,
+		},
+		.clut_offset = 0x1200,
+	},
+};
+
+static const struct atmel_hlcdc_dc_desc atmel_hlcdc_dc_sam9x60 = {
+	.min_width = 0,
+	.min_height = 0,
+	.max_width = 2048,
+	.max_height = 2048,
+	.max_spw = 0xff,
+	.max_vpw = 0xff,
+	.max_hpw = 0x3ff,
+	.fixed_clksrc = true,
+	.nlayers = ARRAY_SIZE(atmel_hlcdc_sam9x60_layers),
+	.layers = atmel_hlcdc_sam9x60_layers,
+};
+
 static const struct of_device_id atmel_hlcdc_of_match[] = {
 	{
 		.compatible = "atmel,at91sam9n12-hlcdc",
@@ -385,6 +482,10 @@ static const struct of_device_id atmel_hlcdc_of_match[] = {
 		.compatible = "atmel,sama5d4-hlcdc",
 		.data = &atmel_hlcdc_dc_sama5d4,
 	},
+	{
+		.compatible = "microchip,sam9x60-hlcdc",
+		.data = &atmel_hlcdc_dc_sam9x60,
+	},
 	{ /* sentinel */ },
 };
 MODULE_DEVICE_TABLE(of, atmel_hlcdc_of_match);
@@ -625,10 +726,18 @@ static int atmel_hlcdc_dc_load(struct drm_device *dev)
 	dc->hlcdc = dev_get_drvdata(dev->dev->parent);
 	dev->dev_private = dc;
 
+	if (dc->desc->fixed_clksrc) {
+		ret = clk_prepare_enable(dc->hlcdc->sys_clk);
+		if (ret) {
+			dev_err(dev->dev, "failed to enable sys_clk\n");
+			goto err_destroy_wq;
+		}
+	}
+
 	ret = clk_prepare_enable(dc->hlcdc->periph_clk);
 	if (ret) {
 		dev_err(dev->dev, "failed to enable periph_clk\n");
-		goto err_destroy_wq;
+		goto err_sys_clk_disable;
 	}
 
 	pm_runtime_enable(dev->dev);
@@ -664,6 +773,9 @@ static int atmel_hlcdc_dc_load(struct drm_device *dev)
 err_periph_clk_disable:
 	pm_runtime_disable(dev->dev);
 	clk_disable_unprepare(dc->hlcdc->periph_clk);
+err_sys_clk_disable:
+	if (dc->desc->fixed_clksrc)
+		clk_disable_unprepare(dc->hlcdc->sys_clk);
 
 err_destroy_wq:
 	destroy_workqueue(dc->wq);
@@ -688,6 +800,8 @@ static void atmel_hlcdc_dc_unload(struct drm_device *dev)
 
 	pm_runtime_disable(dev->dev);
 	clk_disable_unprepare(dc->hlcdc->periph_clk);
+	if (dc->desc->fixed_clksrc)
+		clk_disable_unprepare(dc->hlcdc->sys_clk);
 	destroy_workqueue(dc->wq);
 }
 
@@ -805,6 +919,8 @@ static int atmel_hlcdc_dc_drm_suspend(struct device *dev)
 	regmap_read(regmap, ATMEL_HLCDC_IMR, &dc->suspend.imr);
 	regmap_write(regmap, ATMEL_HLCDC_IDR, dc->suspend.imr);
 	clk_disable_unprepare(dc->hlcdc->periph_clk);
+	if (dc->desc->fixed_clksrc)
+		clk_disable_unprepare(dc->hlcdc->sys_clk);
 
 	return 0;
 }
@@ -814,6 +930,8 @@ static int atmel_hlcdc_dc_drm_resume(struct device *dev)
 	struct drm_device *drm_dev = dev_get_drvdata(dev);
 	struct atmel_hlcdc_dc *dc = drm_dev->dev_private;
 
+	if (dc->desc->fixed_clksrc)
+		clk_prepare_enable(dc->hlcdc->sys_clk);
 	clk_prepare_enable(dc->hlcdc->periph_clk);
 	regmap_write(dc->hlcdc->regmap, ATMEL_HLCDC_IER, dc->suspend.imr);
 
diff --git a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.h b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.h
index 70bd540d644e..0155efb9c443 100644
--- a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.h
+++ b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.h
@@ -328,6 +328,7 @@ atmel_hlcdc_layer_to_plane(struct atmel_hlcdc_layer *layer)
  * @max_hpw: maximum horizontal back/front porch width
  * @conflicting_output_formats: true if RGBXXX output formats conflict with
  *				each other.
+ * @fixed_clksrc: true if clock source is fixed
  * @layers: a layer description table describing available layers
  * @nlayers: layer description table size
  */
@@ -340,6 +341,7 @@ struct atmel_hlcdc_dc_desc {
 	int max_vpw;
 	int max_hpw;
 	bool conflicting_output_formats;
+	bool fixed_clksrc;
 	const struct atmel_hlcdc_layer_desc *layers;
 	int nlayers;
 };
diff --git a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c
index e836e2de35ce..0ee5b7a3a4b0 100644
--- a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c
+++ b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c
@@ -382,7 +382,7 @@ atmel_hlcdc_plane_update_general_settings(struct atmel_hlcdc_plane *plane,
 			cfg |= ATMEL_HLCDC_LAYER_LAEN;
 		else
 			cfg |= ATMEL_HLCDC_LAYER_GAEN |
-			       ATMEL_HLCDC_LAYER_GA(state->base.alpha >> 8);
+			       ATMEL_HLCDC_LAYER_GA(state->base.alpha);
 	}
 
 	if (state->disc_h && state->disc_w)
@@ -603,8 +603,6 @@ static int atmel_hlcdc_plane_atomic_check(struct drm_plane *p,
 	const struct drm_display_mode *mode;
 	struct drm_crtc_state *crtc_state;
 	unsigned int tmp;
-	int hsub = 1;
-	int vsub = 1;
 	int ret;
 	int i;
 
@@ -642,13 +640,10 @@ static int atmel_hlcdc_plane_atomic_check(struct drm_plane *p,
 	if (state->nplanes > ATMEL_HLCDC_LAYER_MAX_PLANES)
 		return -EINVAL;
 
-	hsub = drm_format_horz_chroma_subsampling(fb->format->format);
-	vsub = drm_format_vert_chroma_subsampling(fb->format->format);
-
 	for (i = 0; i < state->nplanes; i++) {
 		unsigned int offset = 0;
-		int xdiv = i ? hsub : 1;
-		int ydiv = i ? vsub : 1;
+		int xdiv = i ? fb->format->hsub : 1;
+		int ydiv = i ? fb->format->vsub : 1;
 
 		state->bpp[i] = fb->format->cpp[i];
 		if (!state->bpp[i])
diff --git a/drivers/gpu/drm/bochs/Kconfig b/drivers/gpu/drm/bochs/Kconfig
index 17885fab131d..32b043abb668 100644
--- a/drivers/gpu/drm/bochs/Kconfig
+++ b/drivers/gpu/drm/bochs/Kconfig
@@ -3,7 +3,7 @@ config DRM_BOCHS
 	tristate "DRM Support for bochs dispi vga interface (qemu stdvga)"
 	depends on DRM && PCI && MMU
 	select DRM_KMS_HELPER
-	select DRM_TTM
+	select DRM_VRAM_HELPER
 	help
 	  Choose this option for qemu.
 	  If M is selected the module will be called bochs-drm.
diff --git a/drivers/gpu/drm/bochs/bochs.h b/drivers/gpu/drm/bochs/bochs.h
index 341cc9d1bab4..cc35d492142c 100644
--- a/drivers/gpu/drm/bochs/bochs.h
+++ b/drivers/gpu/drm/bochs/bochs.h
@@ -10,9 +10,9 @@
 #include <drm/drm_simple_kms_helper.h>
 
 #include <drm/drm_gem.h>
+#include <drm/drm_gem_vram_helper.h>
 
-#include <drm/ttm/ttm_bo_driver.h>
-#include <drm/ttm/ttm_page_alloc.h>
+#include <drm/drm_vram_mm_helper.h>
 
 /* ---------------------------------------------------------------------- */
 
@@ -73,38 +73,8 @@ struct bochs_device {
 	struct drm_device *dev;
 	struct drm_simple_display_pipe pipe;
 	struct drm_connector connector;
-
-	/* ttm */
-	struct {
-		struct ttm_bo_device bdev;
-		bool initialized;
-	} ttm;
-};
-
-struct bochs_bo {
-	struct ttm_buffer_object bo;
-	struct ttm_placement placement;
-	struct ttm_bo_kmap_obj kmap;
-	struct drm_gem_object gem;
-	struct ttm_place placements[3];
-	int pin_count;
 };
 
-static inline struct bochs_bo *bochs_bo(struct ttm_buffer_object *bo)
-{
-	return container_of(bo, struct bochs_bo, bo);
-}
-
-static inline struct bochs_bo *gem_to_bochs_bo(struct drm_gem_object *gem)
-{
-	return container_of(gem, struct bochs_bo, gem);
-}
-
-static inline u64 bochs_bo_mmap_offset(struct bochs_bo *bo)
-{
-	return drm_vma_node_offset_addr(&bo->bo.vma_node);
-}
-
 /* ---------------------------------------------------------------------- */
 
 /* bochs_hw.c */
@@ -122,26 +92,6 @@ int bochs_hw_load_edid(struct bochs_device *bochs);
 /* bochs_mm.c */
 int bochs_mm_init(struct bochs_device *bochs);
 void bochs_mm_fini(struct bochs_device *bochs);
-int bochs_mmap(struct file *filp, struct vm_area_struct *vma);
-
-int bochs_gem_create(struct drm_device *dev, u32 size, bool iskernel,
-		     struct drm_gem_object **obj);
-int bochs_gem_init_object(struct drm_gem_object *obj);
-void bochs_gem_free_object(struct drm_gem_object *obj);
-int bochs_dumb_create(struct drm_file *file, struct drm_device *dev,
-		      struct drm_mode_create_dumb *args);
-int bochs_dumb_mmap_offset(struct drm_file *file, struct drm_device *dev,
-			   uint32_t handle, uint64_t *offset);
-
-int bochs_bo_pin(struct bochs_bo *bo, u32 pl_flag);
-int bochs_bo_unpin(struct bochs_bo *bo);
-
-int bochs_gem_prime_pin(struct drm_gem_object *obj);
-void bochs_gem_prime_unpin(struct drm_gem_object *obj);
-void *bochs_gem_prime_vmap(struct drm_gem_object *obj);
-void bochs_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr);
-int bochs_gem_prime_mmap(struct drm_gem_object *obj,
-			 struct vm_area_struct *vma);
 
 /* bochs_kms.c */
 int bochs_kms_init(struct bochs_device *bochs);
diff --git a/drivers/gpu/drm/bochs/bochs_drv.c b/drivers/gpu/drm/bochs/bochs_drv.c
index b86cc705138c..8f3a5bda9d03 100644
--- a/drivers/gpu/drm/bochs/bochs_drv.c
+++ b/drivers/gpu/drm/bochs/bochs_drv.c
@@ -7,6 +7,7 @@
 #include <linux/slab.h>
 #include <drm/drm_fb_helper.h>
 #include <drm/drm_probe_helper.h>
+#include <drm/drm_atomic_helper.h>
 
 #include "bochs.h"
 
@@ -60,14 +61,7 @@ err:
 
 static const struct file_operations bochs_fops = {
 	.owner		= THIS_MODULE,
-	.open		= drm_open,
-	.release	= drm_release,
-	.unlocked_ioctl	= drm_ioctl,
-	.compat_ioctl	= drm_compat_ioctl,
-	.poll		= drm_poll,
-	.read		= drm_read,
-	.llseek		= no_llseek,
-	.mmap           = bochs_mmap,
+	DRM_VRAM_MM_FILE_OPERATIONS
 };
 
 static struct drm_driver bochs_driver = {
@@ -79,17 +73,8 @@ static struct drm_driver bochs_driver = {
 	.date			= "20130925",
 	.major			= 1,
 	.minor			= 0,
-	.gem_free_object_unlocked = bochs_gem_free_object,
-	.dumb_create            = bochs_dumb_create,
-	.dumb_map_offset        = bochs_dumb_mmap_offset,
-
-	.gem_prime_export = drm_gem_prime_export,
-	.gem_prime_import = drm_gem_prime_import,
-	.gem_prime_pin = bochs_gem_prime_pin,
-	.gem_prime_unpin = bochs_gem_prime_unpin,
-	.gem_prime_vmap = bochs_gem_prime_vmap,
-	.gem_prime_vunmap = bochs_gem_prime_vunmap,
-	.gem_prime_mmap = bochs_gem_prime_mmap,
+	DRM_GEM_VRAM_DRIVER,
+	DRM_GEM_VRAM_DRIVER_PRIME,
 };
 
 /* ---------------------------------------------------------------------- */
@@ -171,6 +156,7 @@ static void bochs_pci_remove(struct pci_dev *pdev)
 {
 	struct drm_device *dev = pci_get_drvdata(pdev);
 
+	drm_atomic_helper_shutdown(dev);
 	drm_dev_unregister(dev);
 	bochs_unload(dev);
 	drm_dev_put(dev);
diff --git a/drivers/gpu/drm/bochs/bochs_kms.c b/drivers/gpu/drm/bochs/bochs_kms.c
index 37e515221ad8..5904eddc83a5 100644
--- a/drivers/gpu/drm/bochs/bochs_kms.c
+++ b/drivers/gpu/drm/bochs/bochs_kms.c
@@ -27,16 +27,16 @@ static const uint32_t bochs_formats[] = {
 static void bochs_plane_update(struct bochs_device *bochs,
 			       struct drm_plane_state *state)
 {
-	struct bochs_bo *bo;
+	struct drm_gem_vram_object *gbo;
 
 	if (!state->fb || !bochs->stride)
 		return;
 
-	bo = gem_to_bochs_bo(state->fb->obj[0]);
+	gbo = drm_gem_vram_of_gem(state->fb->obj[0]);
 	bochs_hw_setbase(bochs,
 			 state->crtc_x,
 			 state->crtc_y,
-			 bo->bo.offset);
+			 gbo->bo.offset);
 	bochs_hw_setformat(bochs, state->fb->format);
 }
 
@@ -69,23 +69,23 @@ static void bochs_pipe_update(struct drm_simple_display_pipe *pipe,
 static int bochs_pipe_prepare_fb(struct drm_simple_display_pipe *pipe,
 				 struct drm_plane_state *new_state)
 {
-	struct bochs_bo *bo;
+	struct drm_gem_vram_object *gbo;
 
 	if (!new_state->fb)
 		return 0;
-	bo = gem_to_bochs_bo(new_state->fb->obj[0]);
-	return bochs_bo_pin(bo, TTM_PL_FLAG_VRAM);
+	gbo = drm_gem_vram_of_gem(new_state->fb->obj[0]);
+	return drm_gem_vram_pin(gbo, DRM_GEM_VRAM_PL_FLAG_VRAM);
 }
 
 static void bochs_pipe_cleanup_fb(struct drm_simple_display_pipe *pipe,
 				  struct drm_plane_state *old_state)
 {
-	struct bochs_bo *bo;
+	struct drm_gem_vram_object *gbo;
 
 	if (!old_state->fb)
 		return;
-	bo = gem_to_bochs_bo(old_state->fb->obj[0]);
-	bochs_bo_unpin(bo);
+	gbo = drm_gem_vram_of_gem(old_state->fb->obj[0]);
+	drm_gem_vram_unpin(gbo);
 }
 
 static const struct drm_simple_display_pipe_funcs bochs_pipe_funcs = {
diff --git a/drivers/gpu/drm/bochs/bochs_mm.c b/drivers/gpu/drm/bochs/bochs_mm.c
index 807c80f1f024..8f9bb886f7ad 100644
--- a/drivers/gpu/drm/bochs/bochs_mm.c
+++ b/drivers/gpu/drm/bochs/bochs_mm.c
@@ -4,435 +4,22 @@
 
 #include "bochs.h"
 
-static void bochs_ttm_placement(struct bochs_bo *bo, int domain);
-
 /* ---------------------------------------------------------------------- */
 
-static inline struct bochs_device *bochs_bdev(struct ttm_bo_device *bd)
-{
-	return container_of(bd, struct bochs_device, ttm.bdev);
-}
-
-static void bochs_bo_ttm_destroy(struct ttm_buffer_object *tbo)
-{
-	struct bochs_bo *bo;
-
-	bo = container_of(tbo, struct bochs_bo, bo);
-	drm_gem_object_release(&bo->gem);
-	kfree(bo);
-}
-
-static bool bochs_ttm_bo_is_bochs_bo(struct ttm_buffer_object *bo)
-{
-	if (bo->destroy == &bochs_bo_ttm_destroy)
-		return true;
-	return false;
-}
-
-static int bochs_bo_init_mem_type(struct ttm_bo_device *bdev, uint32_t type,
-				  struct ttm_mem_type_manager *man)
-{
-	switch (type) {
-	case TTM_PL_SYSTEM:
-		man->flags = TTM_MEMTYPE_FLAG_MAPPABLE;
-		man->available_caching = TTM_PL_MASK_CACHING;
-		man->default_caching = TTM_PL_FLAG_CACHED;
-		break;
-	case TTM_PL_VRAM:
-		man->func = &ttm_bo_manager_func;
-		man->flags = TTM_MEMTYPE_FLAG_FIXED |
-			TTM_MEMTYPE_FLAG_MAPPABLE;
-		man->available_caching = TTM_PL_FLAG_UNCACHED |
-			TTM_PL_FLAG_WC;
-		man->default_caching = TTM_PL_FLAG_WC;
-		break;
-	default:
-		DRM_ERROR("Unsupported memory type %u\n", (unsigned)type);
-		return -EINVAL;
-	}
-	return 0;
-}
-
-static void
-bochs_bo_evict_flags(struct ttm_buffer_object *bo, struct ttm_placement *pl)
-{
-	struct bochs_bo *bochsbo = bochs_bo(bo);
-
-	if (!bochs_ttm_bo_is_bochs_bo(bo))
-		return;
-
-	bochs_ttm_placement(bochsbo, TTM_PL_FLAG_SYSTEM);
-	*pl = bochsbo->placement;
-}
-
-static int bochs_bo_verify_access(struct ttm_buffer_object *bo,
-				  struct file *filp)
-{
-	struct bochs_bo *bochsbo = bochs_bo(bo);
-
-	return drm_vma_node_verify_access(&bochsbo->gem.vma_node,
-					  filp->private_data);
-}
-
-static int bochs_ttm_io_mem_reserve(struct ttm_bo_device *bdev,
-				    struct ttm_mem_reg *mem)
-{
-	struct ttm_mem_type_manager *man = &bdev->man[mem->mem_type];
-	struct bochs_device *bochs = bochs_bdev(bdev);
-
-	mem->bus.addr = NULL;
-	mem->bus.offset = 0;
-	mem->bus.size = mem->num_pages << PAGE_SHIFT;
-	mem->bus.base = 0;
-	mem->bus.is_iomem = false;
-	if (!(man->flags & TTM_MEMTYPE_FLAG_MAPPABLE))
-		return -EINVAL;
-	switch (mem->mem_type) {
-	case TTM_PL_SYSTEM:
-		/* system memory */
-		return 0;
-	case TTM_PL_VRAM:
-		mem->bus.offset = mem->start << PAGE_SHIFT;
-		mem->bus.base = bochs->fb_base;
-		mem->bus.is_iomem = true;
-		break;
-	default:
-		return -EINVAL;
-		break;
-	}
-	return 0;
-}
-
-static void bochs_ttm_io_mem_free(struct ttm_bo_device *bdev,
-				  struct ttm_mem_reg *mem)
-{
-}
-
-static void bochs_ttm_backend_destroy(struct ttm_tt *tt)
-{
-	ttm_tt_fini(tt);
-	kfree(tt);
-}
-
-static struct ttm_backend_func bochs_tt_backend_func = {
-	.destroy = &bochs_ttm_backend_destroy,
-};
-
-static struct ttm_tt *bochs_ttm_tt_create(struct ttm_buffer_object *bo,
-					  uint32_t page_flags)
-{
-	struct ttm_tt *tt;
-
-	tt = kzalloc(sizeof(struct ttm_tt), GFP_KERNEL);
-	if (tt == NULL)
-		return NULL;
-	tt->func = &bochs_tt_backend_func;
-	if (ttm_tt_init(tt, bo, page_flags)) {
-		kfree(tt);
-		return NULL;
-	}
-	return tt;
-}
-
-static struct ttm_bo_driver bochs_bo_driver = {
-	.ttm_tt_create = bochs_ttm_tt_create,
-	.init_mem_type = bochs_bo_init_mem_type,
-	.eviction_valuable = ttm_bo_eviction_valuable,
-	.evict_flags = bochs_bo_evict_flags,
-	.move = NULL,
-	.verify_access = bochs_bo_verify_access,
-	.io_mem_reserve = &bochs_ttm_io_mem_reserve,
-	.io_mem_free = &bochs_ttm_io_mem_free,
-};
-
 int bochs_mm_init(struct bochs_device *bochs)
 {
-	struct ttm_bo_device *bdev = &bochs->ttm.bdev;
-	int ret;
+	struct drm_vram_mm *vmm;
 
-	ret = ttm_bo_device_init(&bochs->ttm.bdev,
-				 &bochs_bo_driver,
-				 bochs->dev->anon_inode->i_mapping,
-				 true);
-	if (ret) {
-		DRM_ERROR("Error initialising bo driver; %d\n", ret);
-		return ret;
-	}
-
-	ret = ttm_bo_init_mm(bdev, TTM_PL_VRAM,
-			     bochs->fb_size >> PAGE_SHIFT);
-	if (ret) {
-		DRM_ERROR("Failed ttm VRAM init: %d\n", ret);
-		return ret;
-	}
-
-	bochs->ttm.initialized = true;
-	return 0;
+	vmm = drm_vram_helper_alloc_mm(bochs->dev, bochs->fb_base,
+				       bochs->fb_size,
+				       &drm_gem_vram_mm_funcs);
+	return PTR_ERR_OR_ZERO(vmm);
 }
 
 void bochs_mm_fini(struct bochs_device *bochs)
 {
-	if (!bochs->ttm.initialized)
+	if (!bochs->dev->vram_mm)
 		return;
 
-	ttm_bo_device_release(&bochs->ttm.bdev);
-	bochs->ttm.initialized = false;
-}
-
-static void bochs_ttm_placement(struct bochs_bo *bo, int domain)
-{
-	unsigned i;
-	u32 c = 0;
-	bo->placement.placement = bo->placements;
-	bo->placement.busy_placement = bo->placements;
-	if (domain & TTM_PL_FLAG_VRAM) {
-		bo->placements[c++].flags = TTM_PL_FLAG_WC
-			| TTM_PL_FLAG_UNCACHED
-			| TTM_PL_FLAG_VRAM;
-	}
-	if (domain & TTM_PL_FLAG_SYSTEM) {
-		bo->placements[c++].flags = TTM_PL_MASK_CACHING
-			| TTM_PL_FLAG_SYSTEM;
-	}
-	if (!c) {
-		bo->placements[c++].flags = TTM_PL_MASK_CACHING
-			| TTM_PL_FLAG_SYSTEM;
-	}
-	for (i = 0; i < c; ++i) {
-		bo->placements[i].fpfn = 0;
-		bo->placements[i].lpfn = 0;
-	}
-	bo->placement.num_placement = c;
-	bo->placement.num_busy_placement = c;
-}
-
-int bochs_bo_pin(struct bochs_bo *bo, u32 pl_flag)
-{
-	struct ttm_operation_ctx ctx = { false, false };
-	int i, ret;
-
-	if (bo->pin_count) {
-		bo->pin_count++;
-		return 0;
-	}
-
-	bochs_ttm_placement(bo, pl_flag);
-	for (i = 0; i < bo->placement.num_placement; i++)
-		bo->placements[i].flags |= TTM_PL_FLAG_NO_EVICT;
-	ret = ttm_bo_reserve(&bo->bo, true, false, NULL);
-	if (ret)
-		return ret;
-	ret = ttm_bo_validate(&bo->bo, &bo->placement, &ctx);
-	ttm_bo_unreserve(&bo->bo);
-	if (ret)
-		return ret;
-
-	bo->pin_count = 1;
-	return 0;
-}
-
-int bochs_bo_unpin(struct bochs_bo *bo)
-{
-	struct ttm_operation_ctx ctx = { false, false };
-	int i, ret;
-
-	if (!bo->pin_count) {
-		DRM_ERROR("unpin bad %p\n", bo);
-		return 0;
-	}
-	bo->pin_count--;
-
-	if (bo->pin_count)
-		return 0;
-
-	for (i = 0; i < bo->placement.num_placement; i++)
-		bo->placements[i].flags &= ~TTM_PL_FLAG_NO_EVICT;
-	ret = ttm_bo_reserve(&bo->bo, true, false, NULL);
-	if (ret)
-		return ret;
-	ret = ttm_bo_validate(&bo->bo, &bo->placement, &ctx);
-	ttm_bo_unreserve(&bo->bo);
-	if (ret)
-		return ret;
-
-	return 0;
-}
-
-int bochs_mmap(struct file *filp, struct vm_area_struct *vma)
-{
-	struct drm_file *file_priv = filp->private_data;
-	struct bochs_device *bochs = file_priv->minor->dev->dev_private;
-
-	return ttm_bo_mmap(filp, vma, &bochs->ttm.bdev);
-}
-
-/* ---------------------------------------------------------------------- */
-
-static int bochs_bo_create(struct drm_device *dev, int size, int align,
-			   uint32_t flags, struct bochs_bo **pbochsbo)
-{
-	struct bochs_device *bochs = dev->dev_private;
-	struct bochs_bo *bochsbo;
-	size_t acc_size;
-	int ret;
-
-	bochsbo = kzalloc(sizeof(struct bochs_bo), GFP_KERNEL);
-	if (!bochsbo)
-		return -ENOMEM;
-
-	ret = drm_gem_object_init(dev, &bochsbo->gem, size);
-	if (ret) {
-		kfree(bochsbo);
-		return ret;
-	}
-
-	bochsbo->bo.bdev = &bochs->ttm.bdev;
-	bochsbo->bo.bdev->dev_mapping = dev->anon_inode->i_mapping;
-
-	bochs_ttm_placement(bochsbo, TTM_PL_FLAG_VRAM | TTM_PL_FLAG_SYSTEM);
-
-	acc_size = ttm_bo_dma_acc_size(&bochs->ttm.bdev, size,
-				       sizeof(struct bochs_bo));
-
-	ret = ttm_bo_init(&bochs->ttm.bdev, &bochsbo->bo, size,
-			  ttm_bo_type_device, &bochsbo->placement,
-			  align >> PAGE_SHIFT, false, acc_size,
-			  NULL, NULL, bochs_bo_ttm_destroy);
-	if (ret)
-		return ret;
-
-	*pbochsbo = bochsbo;
-	return 0;
-}
-
-int bochs_gem_create(struct drm_device *dev, u32 size, bool iskernel,
-		     struct drm_gem_object **obj)
-{
-	struct bochs_bo *bochsbo;
-	int ret;
-
-	*obj = NULL;
-
-	size = PAGE_ALIGN(size);
-	if (size == 0)
-		return -EINVAL;
-
-	ret = bochs_bo_create(dev, size, 0, 0, &bochsbo);
-	if (ret) {
-		if (ret != -ERESTARTSYS)
-			DRM_ERROR("failed to allocate GEM object\n");
-		return ret;
-	}
-	*obj = &bochsbo->gem;
-	return 0;
-}
-
-int bochs_dumb_create(struct drm_file *file, struct drm_device *dev,
-		      struct drm_mode_create_dumb *args)
-{
-	struct drm_gem_object *gobj;
-	u32 handle;
-	int ret;
-
-	args->pitch = args->width * ((args->bpp + 7) / 8);
-	args->size = args->pitch * args->height;
-
-	ret = bochs_gem_create(dev, args->size, false,
-			       &gobj);
-	if (ret)
-		return ret;
-
-	ret = drm_gem_handle_create(file, gobj, &handle);
-	drm_gem_object_put_unlocked(gobj);
-	if (ret)
-		return ret;
-
-	args->handle = handle;
-	return 0;
-}
-
-static void bochs_bo_unref(struct bochs_bo **bo)
-{
-	struct ttm_buffer_object *tbo;
-
-	if ((*bo) == NULL)
-		return;
-
-	tbo = &((*bo)->bo);
-	ttm_bo_put(tbo);
-	*bo = NULL;
-}
-
-void bochs_gem_free_object(struct drm_gem_object *obj)
-{
-	struct bochs_bo *bochs_bo = gem_to_bochs_bo(obj);
-
-	bochs_bo_unref(&bochs_bo);
-}
-
-int bochs_dumb_mmap_offset(struct drm_file *file, struct drm_device *dev,
-			   uint32_t handle, uint64_t *offset)
-{
-	struct drm_gem_object *obj;
-	struct bochs_bo *bo;
-
-	obj = drm_gem_object_lookup(file, handle);
-	if (obj == NULL)
-		return -ENOENT;
-
-	bo = gem_to_bochs_bo(obj);
-	*offset = bochs_bo_mmap_offset(bo);
-
-	drm_gem_object_put_unlocked(obj);
-	return 0;
-}
-
-/* ---------------------------------------------------------------------- */
-
-int bochs_gem_prime_pin(struct drm_gem_object *obj)
-{
-	struct bochs_bo *bo = gem_to_bochs_bo(obj);
-
-	return bochs_bo_pin(bo, TTM_PL_FLAG_VRAM);
-}
-
-void bochs_gem_prime_unpin(struct drm_gem_object *obj)
-{
-	struct bochs_bo *bo = gem_to_bochs_bo(obj);
-
-	bochs_bo_unpin(bo);
-}
-
-void *bochs_gem_prime_vmap(struct drm_gem_object *obj)
-{
-	struct bochs_bo *bo = gem_to_bochs_bo(obj);
-	bool is_iomem;
-	int ret;
-
-	ret = bochs_bo_pin(bo, TTM_PL_FLAG_VRAM);
-	if (ret)
-		return NULL;
-	ret = ttm_bo_kmap(&bo->bo, 0, bo->bo.num_pages, &bo->kmap);
-	if (ret) {
-		bochs_bo_unpin(bo);
-		return NULL;
-	}
-	return ttm_kmap_obj_virtual(&bo->kmap, &is_iomem);
-}
-
-void bochs_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr)
-{
-	struct bochs_bo *bo = gem_to_bochs_bo(obj);
-
-	ttm_bo_kunmap(&bo->kmap);
-	bochs_bo_unpin(bo);
-}
-
-int bochs_gem_prime_mmap(struct drm_gem_object *obj,
-			 struct vm_area_struct *vma)
-{
-	struct bochs_bo *bo = gem_to_bochs_bo(obj);
-
-	bo->gem.vma_node.vm_node.start = bo->bo.vma_node.vm_node.start;
-	return drm_gem_prime_mmap(obj, vma);
+	drm_vram_helper_release_mm(bochs->dev);
 }
diff --git a/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c b/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c
index 0e3e868850d5..f6d2681f6927 100644
--- a/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c
+++ b/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c
@@ -5,21 +5,21 @@
  * Copyright 2012 Analog Devices Inc.
  */
 
+#include <linux/clk.h>
 #include <linux/device.h>
 #include <linux/gpio/consumer.h>
 #include <linux/module.h>
 #include <linux/of_device.h>
 #include <linux/slab.h>
-#include <linux/clk.h>
 
-#include <drm/drmP.h>
+#include <media/cec.h>
+
 #include <drm/drm_atomic.h>
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_edid.h>
+#include <drm/drm_print.h>
 #include <drm/drm_probe_helper.h>
 
-#include <media/cec.h>
-
 #include "adv7511.h"
 
 /* ADI recommended values for proper operation. */
diff --git a/drivers/gpu/drm/bridge/analogix-anx78xx.c b/drivers/gpu/drm/bridge/analogix-anx78xx.c
index 6bcc36e77c65..3c7cc5af735c 100644
--- a/drivers/gpu/drm/bridge/analogix-anx78xx.c
+++ b/drivers/gpu/drm/bridge/analogix-anx78xx.c
@@ -7,23 +7,22 @@
  */
 #include <linux/delay.h>
 #include <linux/err.h>
-#include <linux/interrupt.h>
+#include <linux/gpio/consumer.h>
 #include <linux/i2c.h>
+#include <linux/interrupt.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
-#include <linux/of_gpio.h>
 #include <linux/of_irq.h>
 #include <linux/of_platform.h>
 #include <linux/regmap.h>
-#include <linux/types.h>
-#include <linux/gpio/consumer.h>
 #include <linux/regulator/consumer.h>
+#include <linux/types.h>
 
-#include <drm/drmP.h>
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_crtc.h>
 #include <drm/drm_dp_helper.h>
 #include <drm/drm_edid.h>
+#include <drm/drm_print.h>
 #include <drm/drm_probe_helper.h>
 
 #include "analogix-anx78xx.h"
diff --git a/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c b/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c
index 3666c308c34a..3f7f4880be09 100644
--- a/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c
+++ b/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c
@@ -6,27 +6,26 @@
 * Author: Jingoo Han <jg1.han@samsung.com>
 */
 
-#include <linux/module.h>
-#include <linux/platform_device.h>
-#include <linux/err.h>
 #include <linux/clk.h>
+#include <linux/component.h>
+#include <linux/err.h>
+#include <linux/gpio/consumer.h>
+#include <linux/interrupt.h>
 #include <linux/io.h>
 #include <linux/iopoll.h>
-#include <linux/interrupt.h>
+#include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_gpio.h>
-#include <linux/gpio.h>
-#include <linux/component.h>
 #include <linux/phy/phy.h>
+#include <linux/platform_device.h>
 
-#include <drm/drmP.h>
+#include <drm/bridge/analogix_dp.h>
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_crtc.h>
+#include <drm/drm_device.h>
 #include <drm/drm_panel.h>
+#include <drm/drm_print.h>
 #include <drm/drm_probe_helper.h>
 
-#include <drm/bridge/analogix_dp.h>
-
 #include "analogix_dp_core.h"
 #include "analogix_dp_reg.h"
 
@@ -111,7 +110,7 @@ EXPORT_SYMBOL_GPL(analogix_dp_psr_enabled);
 
 int analogix_dp_enable_psr(struct analogix_dp_device *dp)
 {
-	struct edp_vsc_psr psr_vsc;
+	struct dp_sdp psr_vsc;
 
 	if (!dp->psr_enable)
 		return 0;
@@ -123,8 +122,8 @@ int analogix_dp_enable_psr(struct analogix_dp_device *dp)
 	psr_vsc.sdp_header.HB2 = 0x2;
 	psr_vsc.sdp_header.HB3 = 0x8;
 
-	psr_vsc.DB0 = 0;
-	psr_vsc.DB1 = EDP_VSC_PSR_STATE_ACTIVE | EDP_VSC_PSR_CRC_VALUES_VALID;
+	psr_vsc.db[0] = 0;
+	psr_vsc.db[1] = EDP_VSC_PSR_STATE_ACTIVE | EDP_VSC_PSR_CRC_VALUES_VALID;
 
 	return analogix_dp_send_psr_spd(dp, &psr_vsc, true);
 }
@@ -132,7 +131,7 @@ EXPORT_SYMBOL_GPL(analogix_dp_enable_psr);
 
 int analogix_dp_disable_psr(struct analogix_dp_device *dp)
 {
-	struct edp_vsc_psr psr_vsc;
+	struct dp_sdp psr_vsc;
 	int ret;
 
 	if (!dp->psr_enable)
@@ -145,8 +144,8 @@ int analogix_dp_disable_psr(struct analogix_dp_device *dp)
 	psr_vsc.sdp_header.HB2 = 0x2;
 	psr_vsc.sdp_header.HB3 = 0x8;
 
-	psr_vsc.DB0 = 0;
-	psr_vsc.DB1 = 0;
+	psr_vsc.db[0] = 0;
+	psr_vsc.db[1] = 0;
 
 	ret = drm_dp_dpcd_writeb(&dp->aux, DP_SET_POWER, DP_SET_POWER_D0);
 	if (ret != 1) {
@@ -1407,8 +1406,6 @@ static void analogix_dp_bridge_mode_set(struct drm_bridge *bridge,
 		video->color_space = COLOR_YCBCR444;
 	else if (display_info->color_formats & DRM_COLOR_FORMAT_YCRCB422)
 		video->color_space = COLOR_YCBCR422;
-	else if (display_info->color_formats & DRM_COLOR_FORMAT_RGB444)
-		video->color_space = COLOR_RGB;
 	else
 		video->color_space = COLOR_RGB;
 
@@ -1581,12 +1578,18 @@ analogix_dp_bind(struct device *dev, struct drm_device *drm_dev,
 
 	dp->force_hpd = of_property_read_bool(dev->of_node, "force-hpd");
 
-	dp->hpd_gpio = of_get_named_gpio(dev->of_node, "hpd-gpios", 0);
-	if (!gpio_is_valid(dp->hpd_gpio))
-		dp->hpd_gpio = of_get_named_gpio(dev->of_node,
-						 "samsung,hpd-gpio", 0);
+	/* Try two different names */
+	dp->hpd_gpiod = devm_gpiod_get_optional(dev, "hpd", GPIOD_IN);
+	if (!dp->hpd_gpiod)
+		dp->hpd_gpiod = devm_gpiod_get_optional(dev, "samsung,hpd",
+							GPIOD_IN);
+	if (IS_ERR(dp->hpd_gpiod)) {
+		dev_err(dev, "error getting HDP GPIO: %ld\n",
+			PTR_ERR(dp->hpd_gpiod));
+		return ERR_CAST(dp->hpd_gpiod);
+	}
 
-	if (gpio_is_valid(dp->hpd_gpio)) {
+	if (dp->hpd_gpiod) {
 		/*
 		 * Set up the hotplug GPIO from the device tree as an interrupt.
 		 * Simply specifying a different interrupt in the device tree
@@ -1594,16 +1597,9 @@ analogix_dp_bind(struct device *dev, struct drm_device *drm_dev,
 		 * using a GPIO.  We also need the actual GPIO specifier so
 		 * that we can get the current state of the GPIO.
 		 */
-		ret = devm_gpio_request_one(&pdev->dev, dp->hpd_gpio, GPIOF_IN,
-					    "hpd_gpio");
-		if (ret) {
-			dev_err(&pdev->dev, "failed to get hpd gpio\n");
-			return ERR_PTR(ret);
-		}
-		dp->irq = gpio_to_irq(dp->hpd_gpio);
+		dp->irq = gpiod_to_irq(dp->hpd_gpiod);
 		irq_flags = IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING;
 	} else {
-		dp->hpd_gpio = -ENODEV;
 		dp->irq = platform_get_irq(pdev, 0);
 		irq_flags = 0;
 	}
diff --git a/drivers/gpu/drm/bridge/analogix/analogix_dp_core.h b/drivers/gpu/drm/bridge/analogix/analogix_dp_core.h
index 8d82f2555880..da058252dcaf 100644
--- a/drivers/gpu/drm/bridge/analogix/analogix_dp_core.h
+++ b/drivers/gpu/drm/bridge/analogix/analogix_dp_core.h
@@ -34,6 +34,8 @@
 #define DPCD_VOLTAGE_SWING_SET(x)		(((x) & 0x3) << 0)
 #define DPCD_VOLTAGE_SWING_GET(x)		(((x) >> 0) & 0x3)
 
+struct gpio_desc;
+
 enum link_lane_count_type {
 	LANE_COUNT1 = 1,
 	LANE_COUNT2 = 2,
@@ -167,7 +169,7 @@ struct analogix_dp_device {
 	struct link_train	link_train;
 	struct phy		*phy;
 	int			dpms_mode;
-	int			hpd_gpio;
+	struct gpio_desc	*hpd_gpiod;
 	bool                    force_hpd;
 	bool			psr_enable;
 	bool			fast_train_enable;
@@ -250,7 +252,7 @@ void analogix_dp_enable_scrambling(struct analogix_dp_device *dp);
 void analogix_dp_disable_scrambling(struct analogix_dp_device *dp);
 void analogix_dp_enable_psr_crc(struct analogix_dp_device *dp);
 int analogix_dp_send_psr_spd(struct analogix_dp_device *dp,
-			     struct edp_vsc_psr *vsc, bool blocking);
+			     struct dp_sdp *vsc, bool blocking);
 ssize_t analogix_dp_transfer(struct analogix_dp_device *dp,
 			     struct drm_dp_aux_msg *msg);
 
diff --git a/drivers/gpu/drm/bridge/analogix/analogix_dp_reg.c b/drivers/gpu/drm/bridge/analogix/analogix_dp_reg.c
index 7ae311aa13a5..914c569ab8c1 100644
--- a/drivers/gpu/drm/bridge/analogix/analogix_dp_reg.c
+++ b/drivers/gpu/drm/bridge/analogix/analogix_dp_reg.c
@@ -8,7 +8,7 @@
 
 #include <linux/delay.h>
 #include <linux/device.h>
-#include <linux/gpio.h>
+#include <linux/gpio/consumer.h>
 #include <linux/io.h>
 #include <linux/iopoll.h>
 
@@ -393,7 +393,7 @@ void analogix_dp_clear_hotplug_interrupts(struct analogix_dp_device *dp)
 {
 	u32 reg;
 
-	if (gpio_is_valid(dp->hpd_gpio))
+	if (dp->hpd_gpiod)
 		return;
 
 	reg = HOTPLUG_CHG | HPD_LOST | PLUG;
@@ -407,7 +407,7 @@ void analogix_dp_init_hpd(struct analogix_dp_device *dp)
 {
 	u32 reg;
 
-	if (gpio_is_valid(dp->hpd_gpio))
+	if (dp->hpd_gpiod)
 		return;
 
 	analogix_dp_clear_hotplug_interrupts(dp);
@@ -430,8 +430,8 @@ enum dp_irq_type analogix_dp_get_irq_type(struct analogix_dp_device *dp)
 {
 	u32 reg;
 
-	if (gpio_is_valid(dp->hpd_gpio)) {
-		reg = gpio_get_value(dp->hpd_gpio);
+	if (dp->hpd_gpiod) {
+		reg = gpiod_get_value(dp->hpd_gpiod);
 		if (reg)
 			return DP_IRQ_TYPE_HP_CABLE_IN;
 		else
@@ -503,8 +503,8 @@ int analogix_dp_get_plug_in_status(struct analogix_dp_device *dp)
 {
 	u32 reg;
 
-	if (gpio_is_valid(dp->hpd_gpio)) {
-		if (gpio_get_value(dp->hpd_gpio))
+	if (dp->hpd_gpiod) {
+		if (gpiod_get_value(dp->hpd_gpiod))
 			return 0;
 	} else {
 		reg = readl(dp->reg_base + ANALOGIX_DP_SYS_CTL_3);
@@ -1037,7 +1037,7 @@ static ssize_t analogix_dp_get_psr_status(struct analogix_dp_device *dp)
 }
 
 int analogix_dp_send_psr_spd(struct analogix_dp_device *dp,
-			     struct edp_vsc_psr *vsc, bool blocking)
+			     struct dp_sdp *vsc, bool blocking)
 {
 	unsigned int val;
 	int ret;
@@ -1065,8 +1065,8 @@ int analogix_dp_send_psr_spd(struct analogix_dp_device *dp,
 	writel(0x5D, dp->reg_base + ANALOGIX_DP_SPD_PB3);
 
 	/* configure DB0 / DB1 values */
-	writel(vsc->DB0, dp->reg_base + ANALOGIX_DP_VSC_SHADOW_DB0);
-	writel(vsc->DB1, dp->reg_base + ANALOGIX_DP_VSC_SHADOW_DB1);
+	writel(vsc->db[0], dp->reg_base + ANALOGIX_DP_VSC_SHADOW_DB0);
+	writel(vsc->db[1], dp->reg_base + ANALOGIX_DP_VSC_SHADOW_DB1);
 
 	/* set reuse spd inforframe */
 	val = readl(dp->reg_base + ANALOGIX_DP_VIDEO_CTL_3);
@@ -1088,8 +1088,8 @@ int analogix_dp_send_psr_spd(struct analogix_dp_device *dp,
 
 	ret = readx_poll_timeout(analogix_dp_get_psr_status, dp, psr_status,
 		psr_status >= 0 &&
-		((vsc->DB1 && psr_status == DP_PSR_SINK_ACTIVE_RFB) ||
-		(!vsc->DB1 && psr_status == DP_PSR_SINK_INACTIVE)), 1500,
+		((vsc->db[1] && psr_status == DP_PSR_SINK_ACTIVE_RFB) ||
+		(!vsc->db[1] && psr_status == DP_PSR_SINK_INACTIVE)), 1500,
 		DP_TIMEOUT_PSR_LOOP_MS * 1000);
 	if (ret) {
 		dev_warn(dp->dev, "Failed to apply PSR %d\n", ret);
diff --git a/drivers/gpu/drm/bridge/dumb-vga-dac.c b/drivers/gpu/drm/bridge/dumb-vga-dac.c
index 2a4ff77c18de..d32885b906ae 100644
--- a/drivers/gpu/drm/bridge/dumb-vga-dac.c
+++ b/drivers/gpu/drm/bridge/dumb-vga-dac.c
@@ -11,9 +11,9 @@
 #include <linux/of_graph.h>
 #include <linux/regulator/consumer.h>
 
-#include <drm/drmP.h>
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_crtc.h>
+#include <drm/drm_print.h>
 #include <drm/drm_probe_helper.h>
 
 struct dumb_vga {
diff --git a/drivers/gpu/drm/bridge/lvds-encoder.c b/drivers/gpu/drm/bridge/lvds-encoder.c
index 27e55c32f823..2ab2c234f26c 100644
--- a/drivers/gpu/drm/bridge/lvds-encoder.c
+++ b/drivers/gpu/drm/bridge/lvds-encoder.c
@@ -3,12 +3,14 @@
  * Copyright (C) 2016 Laurent Pinchart <laurent.pinchart@ideasonboard.com>
  */
 
-#include <drm/drmP.h>
-#include <drm/drm_bridge.h>
-#include <drm/drm_panel.h>
-
 #include <linux/gpio/consumer.h>
+#include <linux/module.h>
+#include <linux/of.h>
 #include <linux/of_graph.h>
+#include <linux/platform_device.h>
+
+#include <drm/drm_bridge.h>
+#include <drm/drm_panel.h>
 
 struct lvds_encoder {
 	struct drm_bridge bridge;
diff --git a/drivers/gpu/drm/bridge/megachips-stdpxxxx-ge-b850v3-fw.c b/drivers/gpu/drm/bridge/megachips-stdpxxxx-ge-b850v3-fw.c
index 823db80cbd19..79311f8354bd 100644
--- a/drivers/gpu/drm/bridge/megachips-stdpxxxx-ge-b850v3-fw.c
+++ b/drivers/gpu/drm/bridge/megachips-stdpxxxx-ge-b850v3-fw.c
@@ -23,11 +23,12 @@
 #include <linux/i2c.h>
 #include <linux/module.h>
 #include <linux/of.h>
+
 #include <drm/drm_atomic.h>
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_edid.h>
+#include <drm/drm_print.h>
 #include <drm/drm_probe_helper.h>
-#include <drm/drmP.h>
 
 #define EDID_EXT_BLOCK_CNT 0x7E
 
diff --git a/drivers/gpu/drm/bridge/nxp-ptn3460.c b/drivers/gpu/drm/bridge/nxp-ptn3460.c
index 9fd231c5887f..98bc650b8c95 100644
--- a/drivers/gpu/drm/bridge/nxp-ptn3460.c
+++ b/drivers/gpu/drm/bridge/nxp-ptn3460.c
@@ -12,13 +12,14 @@
 #include <linux/module.h>
 #include <linux/of.h>
 #include <linux/of_gpio.h>
+
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_crtc.h>
 #include <drm/drm_edid.h>
 #include <drm/drm_of.h>
 #include <drm/drm_panel.h>
+#include <drm/drm_print.h>
 #include <drm/drm_probe_helper.h>
-#include <drm/drmP.h>
 
 #define PTN3460_EDID_ADDR			0x0
 #define PTN3460_EDID_EMULATION_ADDR		0x84
diff --git a/drivers/gpu/drm/bridge/panel.c b/drivers/gpu/drm/bridge/panel.c
index 04a513319c8f..b12ae3a4c5f1 100644
--- a/drivers/gpu/drm/bridge/panel.c
+++ b/drivers/gpu/drm/bridge/panel.c
@@ -4,14 +4,13 @@
  * Copyright (C) 2017 Broadcom
  */
 
-#include <drm/drmP.h>
-#include <drm/drm_panel.h>
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_connector.h>
 #include <drm/drm_encoder.h>
 #include <drm/drm_modeset_helper_vtables.h>
-#include <drm/drm_probe_helper.h>
 #include <drm/drm_panel.h>
+#include <drm/drm_print.h>
+#include <drm/drm_probe_helper.h>
 
 struct panel_bridge {
 	struct drm_bridge bridge;
diff --git a/drivers/gpu/drm/bridge/parade-ps8622.c b/drivers/gpu/drm/bridge/parade-ps8622.c
index 699c8dfb0fcb..2d88146e4836 100644
--- a/drivers/gpu/drm/bridge/parade-ps8622.c
+++ b/drivers/gpu/drm/bridge/parade-ps8622.c
@@ -16,12 +16,13 @@
 #include <linux/of_device.h>
 #include <linux/pm.h>
 #include <linux/regulator/consumer.h>
+
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_crtc.h>
 #include <drm/drm_of.h>
 #include <drm/drm_panel.h>
+#include <drm/drm_print.h>
 #include <drm/drm_probe_helper.h>
-#include <drm/drmP.h>
 
 /* Brightness scale on the Parade chip */
 #define PS8622_MAX_BRIGHTNESS 0xff
diff --git a/drivers/gpu/drm/bridge/sii902x.c b/drivers/gpu/drm/bridge/sii902x.c
index 1211b5379df1..dd7aa466b280 100644
--- a/drivers/gpu/drm/bridge/sii902x.c
+++ b/drivers/gpu/drm/bridge/sii902x.c
@@ -17,12 +17,16 @@
 #include <linux/i2c.h>
 #include <linux/module.h>
 #include <linux/regmap.h>
+#include <linux/clk.h>
 
-#include <drm/drmP.h>
 #include <drm/drm_atomic_helper.h>
+#include <drm/drm_drv.h>
 #include <drm/drm_edid.h>
+#include <drm/drm_print.h>
 #include <drm/drm_probe_helper.h>
 
+#include <sound/hdmi-codec.h>
+
 #define SII902X_TPI_VIDEO_DATA			0x0
 
 #define SII902X_TPI_PIXEL_REPETITION		0x8
@@ -64,6 +68,77 @@
 #define SII902X_AVI_POWER_STATE_MSK		GENMASK(1, 0)
 #define SII902X_AVI_POWER_STATE_D(l)		((l) & SII902X_AVI_POWER_STATE_MSK)
 
+/* Audio  */
+#define SII902X_TPI_I2S_ENABLE_MAPPING_REG	0x1f
+#define SII902X_TPI_I2S_CONFIG_FIFO0			(0 << 0)
+#define SII902X_TPI_I2S_CONFIG_FIFO1			(1 << 0)
+#define SII902X_TPI_I2S_CONFIG_FIFO2			(2 << 0)
+#define SII902X_TPI_I2S_CONFIG_FIFO3			(3 << 0)
+#define SII902X_TPI_I2S_LEFT_RIGHT_SWAP			(1 << 2)
+#define SII902X_TPI_I2S_AUTO_DOWNSAMPLE			(1 << 3)
+#define SII902X_TPI_I2S_SELECT_SD0			(0 << 4)
+#define SII902X_TPI_I2S_SELECT_SD1			(1 << 4)
+#define SII902X_TPI_I2S_SELECT_SD2			(2 << 4)
+#define SII902X_TPI_I2S_SELECT_SD3			(3 << 4)
+#define SII902X_TPI_I2S_FIFO_ENABLE			(1 << 7)
+
+#define SII902X_TPI_I2S_INPUT_CONFIG_REG	0x20
+#define SII902X_TPI_I2S_FIRST_BIT_SHIFT_YES		(0 << 0)
+#define SII902X_TPI_I2S_FIRST_BIT_SHIFT_NO		(1 << 0)
+#define SII902X_TPI_I2S_SD_DIRECTION_MSB_FIRST		(0 << 1)
+#define SII902X_TPI_I2S_SD_DIRECTION_LSB_FIRST		(1 << 1)
+#define SII902X_TPI_I2S_SD_JUSTIFY_LEFT			(0 << 2)
+#define SII902X_TPI_I2S_SD_JUSTIFY_RIGHT		(1 << 2)
+#define SII902X_TPI_I2S_WS_POLARITY_LOW			(0 << 3)
+#define SII902X_TPI_I2S_WS_POLARITY_HIGH		(1 << 3)
+#define SII902X_TPI_I2S_MCLK_MULTIPLIER_128		(0 << 4)
+#define SII902X_TPI_I2S_MCLK_MULTIPLIER_256		(1 << 4)
+#define SII902X_TPI_I2S_MCLK_MULTIPLIER_384		(2 << 4)
+#define SII902X_TPI_I2S_MCLK_MULTIPLIER_512		(3 << 4)
+#define SII902X_TPI_I2S_MCLK_MULTIPLIER_768		(4 << 4)
+#define SII902X_TPI_I2S_MCLK_MULTIPLIER_1024		(5 << 4)
+#define SII902X_TPI_I2S_MCLK_MULTIPLIER_1152		(6 << 4)
+#define SII902X_TPI_I2S_MCLK_MULTIPLIER_192		(7 << 4)
+#define SII902X_TPI_I2S_SCK_EDGE_FALLING		(0 << 7)
+#define SII902X_TPI_I2S_SCK_EDGE_RISING			(1 << 7)
+
+#define SII902X_TPI_I2S_STRM_HDR_BASE	0x21
+#define SII902X_TPI_I2S_STRM_HDR_SIZE	5
+
+#define SII902X_TPI_AUDIO_CONFIG_BYTE2_REG	0x26
+#define SII902X_TPI_AUDIO_CODING_STREAM_HEADER		(0 << 0)
+#define SII902X_TPI_AUDIO_CODING_PCM			(1 << 0)
+#define SII902X_TPI_AUDIO_CODING_AC3			(2 << 0)
+#define SII902X_TPI_AUDIO_CODING_MPEG1			(3 << 0)
+#define SII902X_TPI_AUDIO_CODING_MP3			(4 << 0)
+#define SII902X_TPI_AUDIO_CODING_MPEG2			(5 << 0)
+#define SII902X_TPI_AUDIO_CODING_AAC			(6 << 0)
+#define SII902X_TPI_AUDIO_CODING_DTS			(7 << 0)
+#define SII902X_TPI_AUDIO_CODING_ATRAC			(8 << 0)
+#define SII902X_TPI_AUDIO_MUTE_DISABLE			(0 << 4)
+#define SII902X_TPI_AUDIO_MUTE_ENABLE			(1 << 4)
+#define SII902X_TPI_AUDIO_LAYOUT_2_CHANNELS		(0 << 5)
+#define SII902X_TPI_AUDIO_LAYOUT_8_CHANNELS		(1 << 5)
+#define SII902X_TPI_AUDIO_INTERFACE_DISABLE		(0 << 6)
+#define SII902X_TPI_AUDIO_INTERFACE_SPDIF		(1 << 6)
+#define SII902X_TPI_AUDIO_INTERFACE_I2S			(2 << 6)
+
+#define SII902X_TPI_AUDIO_CONFIG_BYTE3_REG	0x27
+#define SII902X_TPI_AUDIO_FREQ_STREAM			(0 << 3)
+#define SII902X_TPI_AUDIO_FREQ_32KHZ			(1 << 3)
+#define SII902X_TPI_AUDIO_FREQ_44KHZ			(2 << 3)
+#define SII902X_TPI_AUDIO_FREQ_48KHZ			(3 << 3)
+#define SII902X_TPI_AUDIO_FREQ_88KHZ			(4 << 3)
+#define SII902X_TPI_AUDIO_FREQ_96KHZ			(5 << 3)
+#define SII902X_TPI_AUDIO_FREQ_176KHZ			(6 << 3)
+#define SII902X_TPI_AUDIO_FREQ_192KHZ			(7 << 3)
+#define SII902X_TPI_AUDIO_SAMPLE_SIZE_STREAM		(0 << 6)
+#define SII902X_TPI_AUDIO_SAMPLE_SIZE_16		(1 << 6)
+#define SII902X_TPI_AUDIO_SAMPLE_SIZE_20		(2 << 6)
+#define SII902X_TPI_AUDIO_SAMPLE_SIZE_24		(3 << 6)
+
+#define SII902X_TPI_AUDIO_CONFIG_BYTE4_REG	0x28
+
 #define SII902X_INT_ENABLE			0x3c
 #define SII902X_INT_STATUS			0x3d
 #define SII902X_HOTPLUG_EVENT			BIT(0)
@@ -71,6 +146,16 @@
 
 #define SII902X_REG_TPI_RQB			0xc7
 
+/* Indirect internal register access */
+#define SII902X_IND_SET_PAGE			0xbc
+#define SII902X_IND_OFFSET			0xbd
+#define SII902X_IND_VALUE			0xbe
+
+#define SII902X_TPI_MISC_INFOFRAME_BASE		0xbf
+#define SII902X_TPI_MISC_INFOFRAME_END		0xde
+#define SII902X_TPI_MISC_INFOFRAME_SIZE	\
+	(SII902X_TPI_MISC_INFOFRAME_END - SII902X_TPI_MISC_INFOFRAME_BASE)
+
 #define SII902X_I2C_BUS_ACQUISITION_TIMEOUT_MS	500
 
 struct sii902x {
@@ -80,6 +165,16 @@ struct sii902x {
 	struct drm_connector connector;
 	struct gpio_desc *reset_gpio;
 	struct i2c_mux_core *i2cmux;
+	/*
+	 * Mutex protects audio and video functions from interfering
+	 * each other, by keeping their i2c command sequences atomic.
+	 */
+	struct mutex mutex;
+	struct sii902x_audio {
+		struct platform_device *pdev;
+		struct clk *mclk;
+		u32 i2s_fifo_sequence[4];
+	} audio;
 };
 
 static int sii902x_read_unlocked(struct i2c_client *i2c, u8 reg, u8 *val)
@@ -151,8 +246,12 @@ sii902x_connector_detect(struct drm_connector *connector, bool force)
 	struct sii902x *sii902x = connector_to_sii902x(connector);
 	unsigned int status;
 
+	mutex_lock(&sii902x->mutex);
+
 	regmap_read(sii902x->regmap, SII902X_INT_STATUS, &status);
 
+	mutex_unlock(&sii902x->mutex);
+
 	return (status & SII902X_PLUGGED_STATUS) ?
 	       connector_status_connected : connector_status_disconnected;
 }
@@ -170,12 +269,18 @@ static int sii902x_get_modes(struct drm_connector *connector)
 {
 	struct sii902x *sii902x = connector_to_sii902x(connector);
 	u32 bus_format = MEDIA_BUS_FMT_RGB888_1X24;
+	u8 output_mode = SII902X_SYS_CTRL_OUTPUT_DVI;
 	struct edid *edid;
 	int num = 0, ret;
 
+	mutex_lock(&sii902x->mutex);
+
 	edid = drm_get_edid(connector, sii902x->i2cmux->adapter[0]);
 	drm_connector_update_edid_property(connector, edid);
 	if (edid) {
+		if (drm_detect_hdmi_monitor(edid))
+			output_mode = SII902X_SYS_CTRL_OUTPUT_HDMI;
+
 		num = drm_add_edid_modes(connector, edid);
 		kfree(edid);
 	}
@@ -183,9 +288,19 @@ static int sii902x_get_modes(struct drm_connector *connector)
 	ret = drm_display_info_set_bus_formats(&connector->display_info,
 					       &bus_format, 1);
 	if (ret)
-		return ret;
+		goto error_out;
+
+	ret = regmap_update_bits(sii902x->regmap, SII902X_SYS_CTRL_DATA,
+				 SII902X_SYS_CTRL_OUTPUT_MODE, output_mode);
+	if (ret)
+		goto error_out;
 
-	return num;
+	ret = num;
+
+error_out:
+	mutex_unlock(&sii902x->mutex);
+
+	return ret;
 }
 
 static enum drm_mode_status sii902x_mode_valid(struct drm_connector *connector,
@@ -205,20 +320,28 @@ static void sii902x_bridge_disable(struct drm_bridge *bridge)
 {
 	struct sii902x *sii902x = bridge_to_sii902x(bridge);
 
+	mutex_lock(&sii902x->mutex);
+
 	regmap_update_bits(sii902x->regmap, SII902X_SYS_CTRL_DATA,
 			   SII902X_SYS_CTRL_PWR_DWN,
 			   SII902X_SYS_CTRL_PWR_DWN);
+
+	mutex_unlock(&sii902x->mutex);
 }
 
 static void sii902x_bridge_enable(struct drm_bridge *bridge)
 {
 	struct sii902x *sii902x = bridge_to_sii902x(bridge);
 
+	mutex_lock(&sii902x->mutex);
+
 	regmap_update_bits(sii902x->regmap, SII902X_PWR_STATE_CTRL,
 			   SII902X_AVI_POWER_STATE_MSK,
 			   SII902X_AVI_POWER_STATE_D(0));
 	regmap_update_bits(sii902x->regmap, SII902X_SYS_CTRL_DATA,
 			   SII902X_SYS_CTRL_PWR_DWN, 0);
+
+	mutex_unlock(&sii902x->mutex);
 }
 
 static void sii902x_bridge_mode_set(struct drm_bridge *bridge,
@@ -229,10 +352,11 @@ static void sii902x_bridge_mode_set(struct drm_bridge *bridge,
 	struct regmap *regmap = sii902x->regmap;
 	u8 buf[HDMI_INFOFRAME_SIZE(AVI)];
 	struct hdmi_avi_infoframe frame;
+	u16 pixel_clock_10kHz = adj->clock / 10;
 	int ret;
 
-	buf[0] = adj->clock;
-	buf[1] = adj->clock >> 8;
+	buf[0] = pixel_clock_10kHz & 0xff;
+	buf[1] = pixel_clock_10kHz >> 8;
 	buf[2] = adj->vrefresh;
 	buf[3] = 0x00;
 	buf[4] = adj->hdisplay;
@@ -244,27 +368,32 @@ static void sii902x_bridge_mode_set(struct drm_bridge *bridge,
 	buf[9] = SII902X_TPI_AVI_INPUT_RANGE_AUTO |
 		 SII902X_TPI_AVI_INPUT_COLORSPACE_RGB;
 
+	mutex_lock(&sii902x->mutex);
+
 	ret = regmap_bulk_write(regmap, SII902X_TPI_VIDEO_DATA, buf, 10);
 	if (ret)
-		return;
+		goto out;
 
 	ret = drm_hdmi_avi_infoframe_from_display_mode(&frame,
 						       &sii902x->connector, adj);
 	if (ret < 0) {
 		DRM_ERROR("couldn't fill AVI infoframe\n");
-		return;
+		goto out;
 	}
 
 	ret = hdmi_avi_infoframe_pack(&frame, buf, sizeof(buf));
 	if (ret < 0) {
 		DRM_ERROR("failed to pack AVI infoframe: %d\n", ret);
-		return;
+		goto out;
 	}
 
 	/* Do not send the infoframe header, but keep the CRC field. */
 	regmap_bulk_write(regmap, SII902X_TPI_AVI_INFOFRAME,
 			  buf + HDMI_INFOFRAME_HEADER_SIZE - 1,
 			  HDMI_AVI_INFOFRAME_SIZE + 1);
+
+out:
+	mutex_unlock(&sii902x->mutex);
 }
 
 static int sii902x_bridge_attach(struct drm_bridge *bridge)
@@ -305,6 +434,335 @@ static const struct drm_bridge_funcs sii902x_bridge_funcs = {
 	.enable = sii902x_bridge_enable,
 };
 
+static int sii902x_mute(struct sii902x *sii902x, bool mute)
+{
+	struct device *dev = &sii902x->i2c->dev;
+	unsigned int val = mute ? SII902X_TPI_AUDIO_MUTE_ENABLE :
+		SII902X_TPI_AUDIO_MUTE_DISABLE;
+
+	dev_dbg(dev, "%s: %s\n", __func__, mute ? "Muted" : "Unmuted");
+
+	return regmap_update_bits(sii902x->regmap,
+				  SII902X_TPI_AUDIO_CONFIG_BYTE2_REG,
+				  SII902X_TPI_AUDIO_MUTE_ENABLE, val);
+}
+
+static const int sii902x_mclk_div_table[] = {
+	128, 256, 384, 512, 768, 1024, 1152, 192 };
+
+static int sii902x_select_mclk_div(u8 *i2s_config_reg, unsigned int rate,
+				   unsigned int mclk)
+{
+	int div = mclk / rate;
+	int distance = 100000;
+	u8 i, nearest = 0;
+
+	for (i = 0; i < ARRAY_SIZE(sii902x_mclk_div_table); i++) {
+		unsigned int d = abs(div - sii902x_mclk_div_table[i]);
+
+		if (d >= distance)
+			continue;
+
+		nearest = i;
+		distance = d;
+		if (d == 0)
+			break;
+	}
+
+	*i2s_config_reg |= nearest << 4;
+
+	return sii902x_mclk_div_table[nearest];
+}
+
+static const struct sii902x_sample_freq {
+	u32 freq;
+	u8 val;
+} sii902x_sample_freq[] = {
+	{ .freq = 32000,	.val = SII902X_TPI_AUDIO_FREQ_32KHZ },
+	{ .freq = 44000,	.val = SII902X_TPI_AUDIO_FREQ_44KHZ },
+	{ .freq = 48000,	.val = SII902X_TPI_AUDIO_FREQ_48KHZ },
+	{ .freq = 88000,	.val = SII902X_TPI_AUDIO_FREQ_88KHZ },
+	{ .freq = 96000,	.val = SII902X_TPI_AUDIO_FREQ_96KHZ },
+	{ .freq = 176000,	.val = SII902X_TPI_AUDIO_FREQ_176KHZ },
+	{ .freq = 192000,	.val = SII902X_TPI_AUDIO_FREQ_192KHZ },
+};
+
+static int sii902x_audio_hw_params(struct device *dev, void *data,
+				   struct hdmi_codec_daifmt *daifmt,
+				   struct hdmi_codec_params *params)
+{
+	struct sii902x *sii902x = dev_get_drvdata(dev);
+	u8 i2s_config_reg = SII902X_TPI_I2S_SD_DIRECTION_MSB_FIRST;
+	u8 config_byte2_reg = (SII902X_TPI_AUDIO_INTERFACE_I2S |
+			       SII902X_TPI_AUDIO_MUTE_ENABLE |
+			       SII902X_TPI_AUDIO_CODING_PCM);
+	u8 config_byte3_reg = 0;
+	u8 infoframe_buf[HDMI_INFOFRAME_SIZE(AUDIO)];
+	unsigned long mclk_rate;
+	int i, ret;
+
+	if (daifmt->bit_clk_master || daifmt->frame_clk_master) {
+		dev_dbg(dev, "%s: I2S master mode not supported\n", __func__);
+		return -EINVAL;
+	}
+
+	switch (daifmt->fmt) {
+	case HDMI_I2S:
+		i2s_config_reg |= SII902X_TPI_I2S_FIRST_BIT_SHIFT_YES |
+			SII902X_TPI_I2S_SD_JUSTIFY_LEFT;
+		break;
+	case HDMI_RIGHT_J:
+		i2s_config_reg |= SII902X_TPI_I2S_SD_JUSTIFY_RIGHT;
+		break;
+	case HDMI_LEFT_J:
+		i2s_config_reg |= SII902X_TPI_I2S_SD_JUSTIFY_LEFT;
+		break;
+	default:
+		dev_dbg(dev, "%s: Unsupported i2s format %u\n", __func__,
+			daifmt->fmt);
+		return -EINVAL;
+	}
+
+	if (daifmt->bit_clk_inv)
+		i2s_config_reg |= SII902X_TPI_I2S_SCK_EDGE_FALLING;
+	else
+		i2s_config_reg |= SII902X_TPI_I2S_SCK_EDGE_RISING;
+
+	if (daifmt->frame_clk_inv)
+		i2s_config_reg |= SII902X_TPI_I2S_WS_POLARITY_LOW;
+	else
+		i2s_config_reg |= SII902X_TPI_I2S_WS_POLARITY_HIGH;
+
+	if (params->channels > 2)
+		config_byte2_reg |= SII902X_TPI_AUDIO_LAYOUT_8_CHANNELS;
+	else
+		config_byte2_reg |= SII902X_TPI_AUDIO_LAYOUT_2_CHANNELS;
+
+	switch (params->sample_width) {
+	case 16:
+		config_byte3_reg |= SII902X_TPI_AUDIO_SAMPLE_SIZE_16;
+		break;
+	case 20:
+		config_byte3_reg |= SII902X_TPI_AUDIO_SAMPLE_SIZE_20;
+		break;
+	case 24:
+	case 32:
+		config_byte3_reg |= SII902X_TPI_AUDIO_SAMPLE_SIZE_24;
+		break;
+	default:
+		dev_err(dev, "%s: Unsupported sample width %u\n", __func__,
+			params->sample_width);
+		return -EINVAL;
+	}
+
+	for (i = 0; i < ARRAY_SIZE(sii902x_sample_freq); i++) {
+		if (params->sample_rate == sii902x_sample_freq[i].freq) {
+			config_byte3_reg |= sii902x_sample_freq[i].val;
+			break;
+		}
+	}
+
+	ret = clk_prepare_enable(sii902x->audio.mclk);
+	if (ret) {
+		dev_err(dev, "Enabling mclk failed: %d\n", ret);
+		return ret;
+	}
+
+	mclk_rate = clk_get_rate(sii902x->audio.mclk);
+
+	ret = sii902x_select_mclk_div(&i2s_config_reg, params->sample_rate,
+				      mclk_rate);
+	if (mclk_rate != ret * params->sample_rate)
+		dev_dbg(dev, "Inaccurate reference clock (%ld/%d != %u)\n",
+			mclk_rate, ret, params->sample_rate);
+
+	mutex_lock(&sii902x->mutex);
+
+	ret = regmap_write(sii902x->regmap,
+			   SII902X_TPI_AUDIO_CONFIG_BYTE2_REG,
+			   config_byte2_reg);
+	if (ret < 0)
+		goto out;
+
+	ret = regmap_write(sii902x->regmap, SII902X_TPI_I2S_INPUT_CONFIG_REG,
+			   i2s_config_reg);
+	if (ret)
+		goto out;
+
+	for (i = 0; i < ARRAY_SIZE(sii902x->audio.i2s_fifo_sequence) &&
+		    sii902x->audio.i2s_fifo_sequence[i]; i++)
+		regmap_write(sii902x->regmap,
+			     SII902X_TPI_I2S_ENABLE_MAPPING_REG,
+			     sii902x->audio.i2s_fifo_sequence[i]);
+
+	ret = regmap_write(sii902x->regmap, SII902X_TPI_AUDIO_CONFIG_BYTE3_REG,
+			   config_byte3_reg);
+	if (ret)
+		goto out;
+
+	ret = regmap_bulk_write(sii902x->regmap, SII902X_TPI_I2S_STRM_HDR_BASE,
+				params->iec.status,
+				min((size_t) SII902X_TPI_I2S_STRM_HDR_SIZE,
+				    sizeof(params->iec.status)));
+	if (ret)
+		goto out;
+
+	ret = hdmi_audio_infoframe_pack(&params->cea, infoframe_buf,
+					sizeof(infoframe_buf));
+	if (ret < 0) {
+		dev_err(dev, "%s: Failed to pack audio infoframe: %d\n",
+			__func__, ret);
+		goto out;
+	}
+
+	ret = regmap_bulk_write(sii902x->regmap,
+				SII902X_TPI_MISC_INFOFRAME_BASE,
+				infoframe_buf,
+				min(ret, SII902X_TPI_MISC_INFOFRAME_SIZE));
+	if (ret)
+		goto out;
+
+	/* Decode Level 0 Packets */
+	ret = regmap_write(sii902x->regmap, SII902X_IND_SET_PAGE, 0x02);
+	if (ret)
+		goto out;
+
+	ret = regmap_write(sii902x->regmap, SII902X_IND_OFFSET, 0x24);
+	if (ret)
+		goto out;
+
+	ret = regmap_write(sii902x->regmap, SII902X_IND_VALUE, 0x02);
+	if (ret)
+		goto out;
+
+	dev_dbg(dev, "%s: hdmi audio enabled\n", __func__);
+out:
+	mutex_unlock(&sii902x->mutex);
+
+	if (ret) {
+		clk_disable_unprepare(sii902x->audio.mclk);
+		dev_err(dev, "%s: hdmi audio enable failed: %d\n", __func__,
+			ret);
+	}
+
+	return ret;
+}
+
+static void sii902x_audio_shutdown(struct device *dev, void *data)
+{
+	struct sii902x *sii902x = dev_get_drvdata(dev);
+
+	mutex_lock(&sii902x->mutex);
+
+	regmap_write(sii902x->regmap, SII902X_TPI_AUDIO_CONFIG_BYTE2_REG,
+		     SII902X_TPI_AUDIO_INTERFACE_DISABLE);
+
+	mutex_unlock(&sii902x->mutex);
+
+	clk_disable_unprepare(sii902x->audio.mclk);
+}
+
+int sii902x_audio_digital_mute(struct device *dev, void *data, bool enable)
+{
+	struct sii902x *sii902x = dev_get_drvdata(dev);
+
+	mutex_lock(&sii902x->mutex);
+
+	sii902x_mute(sii902x, enable);
+
+	mutex_unlock(&sii902x->mutex);
+
+	return 0;
+}
+
+static int sii902x_audio_get_eld(struct device *dev, void *data,
+				 uint8_t *buf, size_t len)
+{
+	struct sii902x *sii902x = dev_get_drvdata(dev);
+
+	mutex_lock(&sii902x->mutex);
+
+	memcpy(buf, sii902x->connector.eld,
+	       min(sizeof(sii902x->connector.eld), len));
+
+	mutex_unlock(&sii902x->mutex);
+
+	return 0;
+}
+
+static const struct hdmi_codec_ops sii902x_audio_codec_ops = {
+	.hw_params = sii902x_audio_hw_params,
+	.audio_shutdown = sii902x_audio_shutdown,
+	.digital_mute = sii902x_audio_digital_mute,
+	.get_eld = sii902x_audio_get_eld,
+};
+
+static int sii902x_audio_codec_init(struct sii902x *sii902x,
+				    struct device *dev)
+{
+	static const u8 audio_fifo_id[] = {
+		SII902X_TPI_I2S_CONFIG_FIFO0,
+		SII902X_TPI_I2S_CONFIG_FIFO1,
+		SII902X_TPI_I2S_CONFIG_FIFO2,
+		SII902X_TPI_I2S_CONFIG_FIFO3,
+	};
+	static const u8 i2s_lane_id[] = {
+		SII902X_TPI_I2S_SELECT_SD0,
+		SII902X_TPI_I2S_SELECT_SD1,
+		SII902X_TPI_I2S_SELECT_SD2,
+		SII902X_TPI_I2S_SELECT_SD3,
+	};
+	struct hdmi_codec_pdata codec_data = {
+		.ops = &sii902x_audio_codec_ops,
+		.i2s = 1, /* Only i2s support for now. */
+		.spdif = 0,
+		.max_i2s_channels = 0,
+	};
+	u8 lanes[4];
+	int num_lanes, i;
+
+	if (!of_property_read_bool(dev->of_node, "#sound-dai-cells")) {
+		dev_dbg(dev, "%s: No \"#sound-dai-cells\", no audio\n",
+			__func__);
+		return 0;
+	}
+
+	num_lanes = of_property_read_variable_u8_array(dev->of_node,
+						       "sil,i2s-data-lanes",
+						       lanes, 1,
+						       ARRAY_SIZE(lanes));
+
+	if (num_lanes == -EINVAL) {
+		dev_dbg(dev,
+			"%s: No \"sil,i2s-data-lanes\", use default <0>\n",
+			__func__);
+		num_lanes = 1;
+		lanes[0] = 0;
+	} else if (num_lanes < 0) {
+		dev_err(dev,
+			"%s: Error gettin \"sil,i2s-data-lanes\": %d\n",
+			__func__, num_lanes);
+		return num_lanes;
+	}
+	codec_data.max_i2s_channels = 2 * num_lanes;
+
+	for (i = 0; i < num_lanes; i++)
+		sii902x->audio.i2s_fifo_sequence[i] |= audio_fifo_id[i] |
+			i2s_lane_id[lanes[i]] |	SII902X_TPI_I2S_FIFO_ENABLE;
+
+	if (IS_ERR(sii902x->audio.mclk)) {
+		dev_err(dev, "%s: No clock (audio mclk) found: %ld\n",
+			__func__, PTR_ERR(sii902x->audio.mclk));
+		return 0;
+	}
+
+	sii902x->audio.pdev = platform_device_register_data(
+		dev, HDMI_CODEC_DRV_NAME, PLATFORM_DEVID_AUTO,
+		&codec_data, sizeof(codec_data));
+
+	return PTR_ERR_OR_ZERO(sii902x->audio.pdev);
+}
+
 static const struct regmap_range sii902x_volatile_ranges[] = {
 	{ .range_min = 0, .range_max = 0xff },
 };
@@ -317,6 +775,8 @@ static const struct regmap_access_table sii902x_volatile_table = {
 static const struct regmap_config sii902x_regmap_config = {
 	.reg_bits = 8,
 	.val_bits = 8,
+	.disable_locking = true, /* struct sii902x mutex should be enough */
+	.max_register = SII902X_TPI_MISC_INFOFRAME_END,
 	.volatile_table = &sii902x_volatile_table,
 	.cache_type = REGCACHE_NONE,
 };
@@ -326,9 +786,13 @@ static irqreturn_t sii902x_interrupt(int irq, void *data)
 	struct sii902x *sii902x = data;
 	unsigned int status = 0;
 
+	mutex_lock(&sii902x->mutex);
+
 	regmap_read(sii902x->regmap, SII902X_INT_STATUS, &status);
 	regmap_write(sii902x->regmap, SII902X_INT_STATUS, status);
 
+	mutex_unlock(&sii902x->mutex);
+
 	if ((status & SII902X_HOTPLUG_EVENT) && sii902x->bridge.dev)
 		drm_helper_hpd_irq_event(sii902x->bridge.dev);
 
@@ -450,6 +914,12 @@ static int sii902x_i2c_bypass_deselect(struct i2c_mux_core *mux, u32 chan_id)
 	return 0;
 }
 
+static const struct drm_bridge_timings default_sii902x_timings = {
+	.input_bus_flags = DRM_BUS_FLAG_PIXDATA_SAMPLE_NEGEDGE
+		 | DRM_BUS_FLAG_SYNC_SAMPLE_NEGEDGE
+		 | DRM_BUS_FLAG_DE_HIGH,
+};
+
 static int sii902x_probe(struct i2c_client *client,
 			 const struct i2c_device_id *id)
 {
@@ -483,6 +953,8 @@ static int sii902x_probe(struct i2c_client *client,
 		return PTR_ERR(sii902x->reset_gpio);
 	}
 
+	mutex_init(&sii902x->mutex);
+
 	sii902x_reset(sii902x);
 
 	ret = regmap_write(sii902x->regmap, SII902X_REG_TPI_RQB, 0x0);
@@ -520,8 +992,11 @@ static int sii902x_probe(struct i2c_client *client,
 
 	sii902x->bridge.funcs = &sii902x_bridge_funcs;
 	sii902x->bridge.of_node = dev->of_node;
+	sii902x->bridge.timings = &default_sii902x_timings;
 	drm_bridge_add(&sii902x->bridge);
 
+	sii902x_audio_codec_init(sii902x, dev);
+
 	i2c_set_clientdata(client, sii902x);
 
 	sii902x->i2cmux = i2c_mux_alloc(client->adapter, dev,
diff --git a/drivers/gpu/drm/bridge/sii9234.c b/drivers/gpu/drm/bridge/sii9234.c
index b36bbafb0e43..25d4ad8c7ad6 100644
--- a/drivers/gpu/drm/bridge/sii9234.c
+++ b/drivers/gpu/drm/bridge/sii9234.c
@@ -815,7 +815,7 @@ static irqreturn_t sii9234_irq_thread(int irq, void *data)
 static int sii9234_init_resources(struct sii9234 *ctx,
 				  struct i2c_client *client)
 {
-	struct i2c_adapter *adapter = to_i2c_adapter(client->dev.parent);
+	struct i2c_adapter *adapter = client->adapter;
 	int ret;
 
 	if (!ctx->dev->of_node) {
@@ -897,7 +897,7 @@ static const struct drm_bridge_funcs sii9234_bridge_funcs = {
 static int sii9234_probe(struct i2c_client *client,
 			 const struct i2c_device_id *id)
 {
-	struct i2c_adapter *adapter = to_i2c_adapter(client->dev.parent);
+	struct i2c_adapter *adapter = client->adapter;
 	struct sii9234 *ctx;
 	struct device *dev = &client->dev;
 	int ret;
diff --git a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c
index 045b1b13fd0e..c6490949d9db 100644
--- a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c
+++ b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c
@@ -6,34 +6,36 @@
  * Copyright (C) 2011-2013 Freescale Semiconductor, Inc.
  * Copyright (C) 2010, Guennadi Liakhovetski <g.liakhovetski@gmx.de>
  */
-#include <linux/module.h>
-#include <linux/irq.h>
+#include <linux/clk.h>
 #include <linux/delay.h>
 #include <linux/err.h>
-#include <linux/clk.h>
 #include <linux/hdmi.h>
+#include <linux/irq.h>
+#include <linux/module.h>
 #include <linux/mutex.h>
 #include <linux/of_device.h>
+#include <linux/pinctrl/consumer.h>
 #include <linux/regmap.h>
+#include <linux/dma-mapping.h>
 #include <linux/spinlock.h>
 
-#include <drm/drm_of.h>
-#include <drm/drmP.h>
+#include <media/cec-notifier.h>
+
+#include <uapi/linux/media-bus-format.h>
+#include <uapi/linux/videodev2.h>
+
+#include <drm/bridge/dw_hdmi.h>
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_edid.h>
 #include <drm/drm_encoder_slave.h>
-#include <drm/drm_scdc_helper.h>
+#include <drm/drm_of.h>
+#include <drm/drm_print.h>
 #include <drm/drm_probe_helper.h>
-#include <drm/bridge/dw_hdmi.h>
-
-#include <uapi/linux/media-bus-format.h>
-#include <uapi/linux/videodev2.h>
+#include <drm/drm_scdc_helper.h>
 
-#include "dw-hdmi.h"
 #include "dw-hdmi-audio.h"
 #include "dw-hdmi-cec.h"
-
-#include <media/cec-notifier.h>
+#include "dw-hdmi.h"
 
 #define DDC_SEGMENT_ADDR	0x30
 
@@ -164,6 +166,10 @@ struct dw_hdmi {
 	bool sink_is_hdmi;
 	bool sink_has_audio;
 
+	struct pinctrl *pinctrl;
+	struct pinctrl_state *default_state;
+	struct pinctrl_state *unwedge_state;
+
 	struct mutex mutex;		/* for state below and previous_mode */
 	enum drm_connector_force force;	/* mutex-protected force state */
 	bool disabled;			/* DRM has disabled our bridge */
@@ -222,6 +228,13 @@ static void hdmi_mask_writeb(struct dw_hdmi *hdmi, u8 data, unsigned int reg,
 
 static void dw_hdmi_i2c_init(struct dw_hdmi *hdmi)
 {
+	hdmi_writeb(hdmi, HDMI_PHY_I2CM_INT_ADDR_DONE_POL,
+		    HDMI_PHY_I2CM_INT_ADDR);
+
+	hdmi_writeb(hdmi, HDMI_PHY_I2CM_CTLINT_ADDR_NAC_POL |
+		    HDMI_PHY_I2CM_CTLINT_ADDR_ARBITRATION_POL,
+		    HDMI_PHY_I2CM_CTLINT_ADDR);
+
 	/* Software reset */
 	hdmi_writeb(hdmi, 0x00, HDMI_I2CM_SOFTRSTZ);
 
@@ -242,11 +255,82 @@ static void dw_hdmi_i2c_init(struct dw_hdmi *hdmi)
 		    HDMI_IH_MUTE_I2CM_STAT0);
 }
 
+static bool dw_hdmi_i2c_unwedge(struct dw_hdmi *hdmi)
+{
+	/* If no unwedge state then give up */
+	if (!hdmi->unwedge_state)
+		return false;
+
+	dev_info(hdmi->dev, "Attempting to unwedge stuck i2c bus\n");
+
+	/*
+	 * This is a huge hack to workaround a problem where the dw_hdmi i2c
+	 * bus could sometimes get wedged.  Once wedged there doesn't appear
+	 * to be any way to unwedge it (including the HDMI_I2CM_SOFTRSTZ)
+	 * other than pulsing the SDA line.
+	 *
+	 * We appear to be able to pulse the SDA line (in the eyes of dw_hdmi)
+	 * by:
+	 * 1. Remux the pin as a GPIO output, driven low.
+	 * 2. Wait a little while.  1 ms seems to work, but we'll do 10.
+	 * 3. Immediately jump to remux the pin as dw_hdmi i2c again.
+	 *
+	 * At the moment of remuxing, the line will still be low due to its
+	 * recent stint as an output, but then it will be pulled high by the
+	 * (presumed) external pullup.  dw_hdmi seems to see this as a rising
+	 * edge and that seems to get it out of its jam.
+	 *
+	 * This wedging was only ever seen on one TV, and only on one of
+	 * its HDMI ports.  It happened when the TV was powered on while the
+	 * device was plugged in.  A scope trace shows the TV bringing both SDA
+	 * and SCL low, then bringing them both back up at roughly the same
+	 * time.  Presumably this confuses dw_hdmi because it saw activity but
+	 * no real STOP (maybe it thinks there's another master on the bus?).
+	 * Giving it a clean rising edge of SDA while SCL is already high
+	 * presumably makes dw_hdmi see a STOP which seems to bring dw_hdmi out
+	 * of its stupor.
+	 *
+	 * Note that after coming back alive, transfers seem to immediately
+	 * resume, so if we unwedge due to a timeout we should wait a little
+	 * longer for our transfer to finish, since it might have just started
+	 * now.
+	 */
+	pinctrl_select_state(hdmi->pinctrl, hdmi->unwedge_state);
+	msleep(10);
+	pinctrl_select_state(hdmi->pinctrl, hdmi->default_state);
+
+	return true;
+}
+
+static int dw_hdmi_i2c_wait(struct dw_hdmi *hdmi)
+{
+	struct dw_hdmi_i2c *i2c = hdmi->i2c;
+	int stat;
+
+	stat = wait_for_completion_timeout(&i2c->cmp, HZ / 10);
+	if (!stat) {
+		/* If we can't unwedge, return timeout */
+		if (!dw_hdmi_i2c_unwedge(hdmi))
+			return -EAGAIN;
+
+		/* We tried to unwedge; give it another chance */
+		stat = wait_for_completion_timeout(&i2c->cmp, HZ / 10);
+		if (!stat)
+			return -EAGAIN;
+	}
+
+	/* Check for error condition on the bus */
+	if (i2c->stat & HDMI_IH_I2CM_STAT0_ERROR)
+		return -EIO;
+
+	return 0;
+}
+
 static int dw_hdmi_i2c_read(struct dw_hdmi *hdmi,
 			    unsigned char *buf, unsigned int length)
 {
 	struct dw_hdmi_i2c *i2c = hdmi->i2c;
-	int stat;
+	int ret;
 
 	if (!i2c->is_regaddr) {
 		dev_dbg(hdmi->dev, "set read register address to 0\n");
@@ -265,13 +349,9 @@ static int dw_hdmi_i2c_read(struct dw_hdmi *hdmi,
 			hdmi_writeb(hdmi, HDMI_I2CM_OPERATION_READ,
 				    HDMI_I2CM_OPERATION);
 
-		stat = wait_for_completion_timeout(&i2c->cmp, HZ / 10);
-		if (!stat)
-			return -EAGAIN;
-
-		/* Check for error condition on the bus */
-		if (i2c->stat & HDMI_IH_I2CM_STAT0_ERROR)
-			return -EIO;
+		ret = dw_hdmi_i2c_wait(hdmi);
+		if (ret)
+			return ret;
 
 		*buf++ = hdmi_readb(hdmi, HDMI_I2CM_DATAI);
 	}
@@ -284,7 +364,7 @@ static int dw_hdmi_i2c_write(struct dw_hdmi *hdmi,
 			     unsigned char *buf, unsigned int length)
 {
 	struct dw_hdmi_i2c *i2c = hdmi->i2c;
-	int stat;
+	int ret;
 
 	if (!i2c->is_regaddr) {
 		/* Use the first write byte as register address */
@@ -302,13 +382,9 @@ static int dw_hdmi_i2c_write(struct dw_hdmi *hdmi,
 		hdmi_writeb(hdmi, HDMI_I2CM_OPERATION_WRITE,
 			    HDMI_I2CM_OPERATION);
 
-		stat = wait_for_completion_timeout(&i2c->cmp, HZ / 10);
-		if (!stat)
-			return -EAGAIN;
-
-		/* Check for error condition on the bus */
-		if (i2c->stat & HDMI_IH_I2CM_STAT0_ERROR)
-			return -EIO;
+		ret = dw_hdmi_i2c_wait(hdmi);
+		if (ret)
+			return ret;
 	}
 
 	return 0;
@@ -1920,16 +1996,6 @@ static int dw_hdmi_setup(struct dw_hdmi *hdmi, struct drm_display_mode *mode)
 	return 0;
 }
 
-static void dw_hdmi_setup_i2c(struct dw_hdmi *hdmi)
-{
-	hdmi_writeb(hdmi, HDMI_PHY_I2CM_INT_ADDR_DONE_POL,
-		    HDMI_PHY_I2CM_INT_ADDR);
-
-	hdmi_writeb(hdmi, HDMI_PHY_I2CM_CTLINT_ADDR_NAC_POL |
-		    HDMI_PHY_I2CM_CTLINT_ADDR_ARBITRATION_POL,
-		    HDMI_PHY_I2CM_CTLINT_ADDR);
-}
-
 static void initialize_hdmi_ih_mutes(struct dw_hdmi *hdmi)
 {
 	u8 ih_mute;
@@ -2430,6 +2496,21 @@ static const struct regmap_config hdmi_regmap_32bit_config = {
 	.max_register	= HDMI_I2CM_FS_SCL_LCNT_0_ADDR << 2,
 };
 
+static void dw_hdmi_init_hw(struct dw_hdmi *hdmi)
+{
+	initialize_hdmi_ih_mutes(hdmi);
+
+	/*
+	 * Reset HDMI DDC I2C master controller and mute I2CM interrupts.
+	 * Even if we are using a separate i2c adapter doing this doesn't
+	 * hurt.
+	 */
+	dw_hdmi_i2c_init(hdmi);
+
+	if (hdmi->phy.ops->setup_hpd)
+		hdmi->phy.ops->setup_hpd(hdmi, hdmi->phy.data);
+}
+
 static struct dw_hdmi *
 __dw_hdmi_probe(struct platform_device *pdev,
 		const struct dw_hdmi_plat_data *plat_data)
@@ -2581,7 +2662,7 @@ __dw_hdmi_probe(struct platform_device *pdev,
 		 prod_id1 & HDMI_PRODUCT_ID1_HDCP ? "with" : "without",
 		 hdmi->phy.name);
 
-	initialize_hdmi_ih_mutes(hdmi);
+	dw_hdmi_init_hw(hdmi);
 
 	irq = platform_get_irq(pdev, 0);
 	if (irq < 0) {
@@ -2609,6 +2690,24 @@ __dw_hdmi_probe(struct platform_device *pdev,
 
 	/* If DDC bus is not specified, try to register HDMI I2C bus */
 	if (!hdmi->ddc) {
+		/* Look for (optional) stuff related to unwedging */
+		hdmi->pinctrl = devm_pinctrl_get(dev);
+		if (!IS_ERR(hdmi->pinctrl)) {
+			hdmi->unwedge_state =
+				pinctrl_lookup_state(hdmi->pinctrl, "unwedge");
+			hdmi->default_state =
+				pinctrl_lookup_state(hdmi->pinctrl, "default");
+
+			if (IS_ERR(hdmi->default_state) ||
+			    IS_ERR(hdmi->unwedge_state)) {
+				if (!IS_ERR(hdmi->unwedge_state))
+					dev_warn(dev,
+						 "Unwedge requires default pinctrl\n");
+				hdmi->default_state = NULL;
+				hdmi->unwedge_state = NULL;
+			}
+		}
+
 		hdmi->ddc = dw_hdmi_i2c_adapter(hdmi);
 		if (IS_ERR(hdmi->ddc))
 			hdmi->ddc = NULL;
@@ -2620,10 +2719,6 @@ __dw_hdmi_probe(struct platform_device *pdev,
 	hdmi->bridge.of_node = pdev->dev.of_node;
 #endif
 
-	dw_hdmi_setup_i2c(hdmi);
-	if (hdmi->phy.ops->setup_hpd)
-		hdmi->phy.ops->setup_hpd(hdmi, hdmi->phy.data);
-
 	memset(&pdevinfo, 0, sizeof(pdevinfo));
 	pdevinfo.parent = dev;
 	pdevinfo.id = PLATFORM_DEVID_AUTO;
@@ -2676,10 +2771,6 @@ __dw_hdmi_probe(struct platform_device *pdev,
 		hdmi->cec = platform_device_register_full(&pdevinfo);
 	}
 
-	/* Reset HDMI DDC I2C master controller and mute I2CM interrupts */
-	if (hdmi->i2c)
-		dw_hdmi_i2c_init(hdmi);
-
 	return hdmi;
 
 err_iahb:
@@ -2783,6 +2874,12 @@ void dw_hdmi_unbind(struct dw_hdmi *hdmi)
 }
 EXPORT_SYMBOL_GPL(dw_hdmi_unbind);
 
+void dw_hdmi_resume(struct dw_hdmi *hdmi)
+{
+	dw_hdmi_init_hw(hdmi);
+}
+EXPORT_SYMBOL_GPL(dw_hdmi_resume);
+
 MODULE_AUTHOR("Sascha Hauer <s.hauer@pengutronix.de>");
 MODULE_AUTHOR("Andy Yan <andy.yan@rock-chips.com>");
 MODULE_AUTHOR("Yakir Yang <ykk@rock-chips.com>");
diff --git a/drivers/gpu/drm/bridge/synopsys/dw-mipi-dsi.c b/drivers/gpu/drm/bridge/synopsys/dw-mipi-dsi.c
index e915ae8c9a92..281c58bab1a1 100644
--- a/drivers/gpu/drm/bridge/synopsys/dw-mipi-dsi.c
+++ b/drivers/gpu/drm/bridge/synopsys/dw-mipi-dsi.c
@@ -15,15 +15,18 @@
 #include <linux/of_device.h>
 #include <linux/pm_runtime.h>
 #include <linux/reset.h>
-#include <drm/drmP.h>
+
+#include <video/mipi_display.h>
+
+#include <drm/bridge/dw_mipi_dsi.h>
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_bridge.h>
 #include <drm/drm_crtc.h>
 #include <drm/drm_mipi_dsi.h>
+#include <drm/drm_modes.h>
 #include <drm/drm_of.h>
+#include <drm/drm_print.h>
 #include <drm/drm_probe_helper.h>
-#include <drm/bridge/dw_mipi_dsi.h>
-#include <video/mipi_display.h>
 
 #define HWVER_131			0x31333100	/* IP version 1.31 */
 
@@ -775,6 +778,10 @@ static void dw_mipi_dsi_clear_err(struct dw_mipi_dsi *dsi)
 static void dw_mipi_dsi_bridge_post_disable(struct drm_bridge *bridge)
 {
 	struct dw_mipi_dsi *dsi = bridge_to_dsi(bridge);
+	const struct dw_mipi_dsi_phy_ops *phy_ops = dsi->plat_data->phy_ops;
+
+	if (phy_ops->power_off)
+		phy_ops->power_off(dsi->plat_data->priv_data);
 
 	/*
 	 * Switch to command mode before panel-bridge post_disable &
@@ -874,11 +881,15 @@ static void dw_mipi_dsi_bridge_mode_set(struct drm_bridge *bridge,
 static void dw_mipi_dsi_bridge_enable(struct drm_bridge *bridge)
 {
 	struct dw_mipi_dsi *dsi = bridge_to_dsi(bridge);
+	const struct dw_mipi_dsi_phy_ops *phy_ops = dsi->plat_data->phy_ops;
 
 	/* Switch to video mode for panel-bridge enable & panel enable */
 	dw_mipi_dsi_set_mode(dsi, MIPI_DSI_MODE_VIDEO);
 	if (dsi->slave)
 		dw_mipi_dsi_set_mode(dsi->slave, MIPI_DSI_MODE_VIDEO);
+
+	if (phy_ops->power_on)
+		phy_ops->power_on(dsi->plat_data->priv_data);
 }
 
 static enum drm_mode_status
diff --git a/drivers/gpu/drm/bridge/tc358764.c b/drivers/gpu/drm/bridge/tc358764.c
index a20e454ddd64..170f162ffa55 100644
--- a/drivers/gpu/drm/bridge/tc358764.c
+++ b/drivers/gpu/drm/bridge/tc358764.c
@@ -7,18 +7,22 @@
  *	Maciej Purski <m.purski@samsung.com>
  */
 
+#include <linux/delay.h>
+#include <linux/gpio/consumer.h>
+#include <linux/module.h>
+#include <linux/of_graph.h>
+#include <linux/regulator/consumer.h>
+
+#include <video/mipi_display.h>
+
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_crtc.h>
 #include <drm/drm_fb_helper.h>
 #include <drm/drm_mipi_dsi.h>
 #include <drm/drm_of.h>
 #include <drm/drm_panel.h>
+#include <drm/drm_print.h>
 #include <drm/drm_probe_helper.h>
-#include <drm/drmP.h>
-#include <linux/gpio/consumer.h>
-#include <linux/of_graph.h>
-#include <linux/regulator/consumer.h>
-#include <video/mipi_display.h>
 
 #define FLD_MASK(start, end)    (((1 << ((start) - (end) + 1)) - 1) << (end))
 #define FLD_VAL(val, start, end) (((val) << (end)) & FLD_MASK(start, end))
diff --git a/drivers/gpu/drm/bridge/tc358767.c b/drivers/gpu/drm/bridge/tc358767.c
index 4655bb1eb88f..13ade28a36a8 100644
--- a/drivers/gpu/drm/bridge/tc358767.c
+++ b/drivers/gpu/drm/bridge/tc358767.c
@@ -62,6 +62,7 @@
 
 /* System */
 #define TC_IDREG		0x0500
+#define SYSSTAT			0x0508
 #define SYSCTRL			0x0510
 #define DP0_AUDSRC_NO_INPUT		(0 << 3)
 #define DP0_AUDSRC_I2S_RX		(1 << 3)
@@ -69,6 +70,19 @@
 #define DP0_VIDSRC_DSI_RX		(1 << 0)
 #define DP0_VIDSRC_DPI_RX		(2 << 0)
 #define DP0_VIDSRC_COLOR_BAR		(3 << 0)
+#define GPIOM			0x0540
+#define GPIOC			0x0544
+#define GPIOO			0x0548
+#define GPIOI			0x054c
+#define INTCTL_G		0x0560
+#define INTSTS_G		0x0564
+
+#define INT_SYSERR		BIT(16)
+#define INT_GPIO_H(x)		(1 << (x == 0 ? 2 : 10))
+#define INT_GPIO_LC(x)		(1 << (x == 0 ? 3 : 11))
+
+#define INT_GP0_LCNT		0x0584
+#define INT_GP1_LCNT		0x0588
 
 /* Control */
 #define DP0CTL			0x0600
@@ -177,11 +191,8 @@ module_param_named(test, tc_test_pattern, bool, 0644);
 struct tc_edp_link {
 	struct drm_dp_link	base;
 	u8			assr;
-	int			scrambler_dis;
-	int			spread;
-	int			coding8b10b;
-	u8			swing;
-	u8			preemp;
+	bool			scrambler_dis;
+	bool			spread;
 };
 
 struct tc_data {
@@ -199,7 +210,7 @@ struct tc_data {
 	/* display edid */
 	struct edid		*edid;
 	/* current mode */
-	const struct drm_display_mode	*mode;
+	struct drm_display_mode	mode;
 
 	u32			rev;
 	u8			assr;
@@ -207,6 +218,12 @@ struct tc_data {
 	struct gpio_desc	*sd_gpio;
 	struct gpio_desc	*reset_gpio;
 	struct clk		*refclk;
+
+	/* do we have IRQ */
+	bool			have_irq;
+
+	/* HPD pin number (0 or 1) or -ENODEV */
+	int			hpd_pin;
 };
 
 static inline struct tc_data *aux_to_tc(struct drm_dp_aux *a)
@@ -277,14 +294,17 @@ static int tc_aux_get_status(struct tc_data *tc, u8 *reply)
 	ret = regmap_read(tc->regmap, DP0_AUXSTATUS, &value);
 	if (ret < 0)
 		return ret;
+
 	if (value & AUX_BUSY) {
-		if (value & AUX_TIMEOUT) {
-			dev_err(tc->dev, "i2c access timeout!\n");
-			return -ETIMEDOUT;
-		}
+		dev_err(tc->dev, "aux busy!\n");
 		return -EBUSY;
 	}
 
+	if (value & AUX_TIMEOUT) {
+		dev_err(tc->dev, "aux access timeout!\n");
+		return -ETIMEDOUT;
+	}
+
 	*reply = (value & AUX_STATUS_MASK) >> AUX_STATUS_SHIFT;
 	return 0;
 }
@@ -378,13 +398,10 @@ static u32 tc_srcctrl(struct tc_data *tc)
 	 * No training pattern, skew lane 1 data by two LSCLK cycles with
 	 * respect to lane 0 data, AutoCorrect Mode = 0
 	 */
-	u32 reg = DP0_SRCCTRL_NOTP | DP0_SRCCTRL_LANESKEW;
+	u32 reg = DP0_SRCCTRL_NOTP | DP0_SRCCTRL_LANESKEW | DP0_SRCCTRL_EN810B;
 
 	if (tc->link.scrambler_dis)
 		reg |= DP0_SRCCTRL_SCRMBLDIS;	/* Scrambler Disabled */
-	if (tc->link.coding8b10b)
-		/* Enable 8/10B Encoder (TxData[19:16] not used) */
-		reg |= DP0_SRCCTRL_EN810B;
 	if (tc->link.spread)
 		reg |= DP0_SRCCTRL_SSCG;	/* Spread Spectrum Enable */
 	if (tc->link.base.num_lanes == 2)
@@ -536,7 +553,6 @@ static int tc_aux_link_setup(struct tc_data *tc)
 	unsigned long rate;
 	u32 value;
 	int ret;
-	u32 dp_phy_ctrl;
 
 	rate = clk_get_rate(tc->refclk);
 	switch (rate) {
@@ -561,10 +577,7 @@ static int tc_aux_link_setup(struct tc_data *tc)
 	value |= SYSCLK_SEL_LSCLK | LSCLK_DIV_2;
 	tc_write(SYS_PLLPARAM, value);
 
-	dp_phy_ctrl = BGREN | PWR_SW_EN | PHY_A0_EN;
-	if (tc->link.base.num_lanes == 2)
-		dp_phy_ctrl |= PHY_2LANE;
-	tc_write(DP_PHY_CTRL, dp_phy_ctrl);
+	tc_write(DP_PHY_CTRL, BGREN | PWR_SW_EN | PHY_A0_EN);
 
 	/*
 	 * Initially PLLs are in bypass. Force PLL parameter update,
@@ -581,8 +594,9 @@ static int tc_aux_link_setup(struct tc_data *tc)
 	if (ret == -ETIMEDOUT) {
 		dev_err(tc->dev, "Timeout waiting for PHY to become ready");
 		return ret;
-	} else if (ret)
+	} else if (ret) {
 		goto err;
+	}
 
 	/* Setup AUX link */
 	tc_write(DP0_AUXCFG1, AUX_RX_FILTER_EN |
@@ -618,13 +632,13 @@ static int tc_get_display_props(struct tc_data *tc)
 	ret = drm_dp_dpcd_readb(&tc->aux, DP_MAX_DOWNSPREAD, tmp);
 	if (ret < 0)
 		goto err_dpcd_read;
-	tc->link.spread = tmp[0] & BIT(0); /* 0.5% down spread */
+	tc->link.spread = tmp[0] & DP_MAX_DOWNSPREAD_0_5;
 
 	ret = drm_dp_dpcd_readb(&tc->aux, DP_MAIN_LINK_CHANNEL_CODING, tmp);
 	if (ret < 0)
 		goto err_dpcd_read;
-	tc->link.coding8b10b = tmp[0] & BIT(0);
-	tc->link.scrambler_dis = 0;
+
+	tc->link.scrambler_dis = false;
 	/* read assr */
 	ret = drm_dp_dpcd_readb(&tc->aux, DP_EDP_CONFIGURATION_SET, tmp);
 	if (ret < 0)
@@ -637,7 +651,9 @@ static int tc_get_display_props(struct tc_data *tc)
 		tc->link.base.num_lanes,
 		(tc->link.base.capabilities & DP_LINK_CAP_ENHANCED_FRAMING) ?
 		"enhanced" : "non-enhanced");
-	dev_dbg(tc->dev, "ANSI 8B/10B: %d\n", tc->link.coding8b10b);
+	dev_dbg(tc->dev, "Downspread: %s, scrambler: %s\n",
+		tc->link.spread ? "0.5%" : "0.0%",
+		tc->link.scrambler_dis ? "disabled" : "enabled");
 	dev_dbg(tc->dev, "Display ASSR: %d, TC358767 ASSR: %d\n",
 		tc->link.assr, tc->assr);
 
@@ -735,89 +751,29 @@ err:
 	return ret;
 }
 
-static int tc_link_training(struct tc_data *tc, int pattern)
+static int tc_wait_link_training(struct tc_data *tc)
 {
-	const char * const *errors;
-	u32 srcctrl = tc_srcctrl(tc) | DP0_SRCCTRL_SCRMBLDIS |
-		      DP0_SRCCTRL_AUTOCORRECT;
-	int timeout;
-	int retry;
+	u32 timeout = 1000;
 	u32 value;
 	int ret;
 
-	if (pattern == DP_TRAINING_PATTERN_1) {
-		srcctrl |= DP0_SRCCTRL_TP1;
-		errors = training_pattern1_errors;
-	} else {
-		srcctrl |= DP0_SRCCTRL_TP2;
-		errors = training_pattern2_errors;
-	}
-
-	/* Set DPCD 0x102 for Training Part 1 or 2 */
-	tc_write(DP0_SNKLTCTRL, DP_LINK_SCRAMBLING_DISABLE | pattern);
-
-	tc_write(DP0_LTLOOPCTRL,
-		 (0x0f << 28) |	/* Defer Iteration Count */
-		 (0x0f << 24) |	/* Loop Iteration Count */
-		 (0x0d << 0));	/* Loop Timer Delay */
-
-	retry = 5;
 	do {
-		/* Set DP0 Training Pattern */
-		tc_write(DP0_SRCCTRL, srcctrl);
-
-		/* Enable DP0 to start Link Training */
-		tc_write(DP0CTL, DP_EN);
-
-		/* wait */
-		timeout = 1000;
-		do {
-			tc_read(DP0_LTSTAT, &value);
-			udelay(1);
-		} while ((!(value & LT_LOOPDONE)) && (--timeout));
-		if (timeout == 0) {
-			dev_err(tc->dev, "Link training timeout!\n");
-		} else {
-			int pattern = (value >> 11) & 0x3;
-			int error = (value >> 8) & 0x7;
-
-			dev_dbg(tc->dev,
-				"Link training phase %d done after %d uS: %s\n",
-				pattern, 1000 - timeout, errors[error]);
-			if (pattern == DP_TRAINING_PATTERN_1 && error == 0)
-				break;
-			if (pattern == DP_TRAINING_PATTERN_2) {
-				value &= LT_CHANNEL1_EQ_BITS |
-					 LT_INTERLANE_ALIGN_DONE |
-					 LT_CHANNEL0_EQ_BITS;
-				/* in case of two lanes */
-				if ((tc->link.base.num_lanes == 2) &&
-				    (value == (LT_CHANNEL1_EQ_BITS |
-					       LT_INTERLANE_ALIGN_DONE |
-					       LT_CHANNEL0_EQ_BITS)))
-					break;
-				/* in case of one line */
-				if ((tc->link.base.num_lanes == 1) &&
-				    (value == (LT_INTERLANE_ALIGN_DONE |
-					       LT_CHANNEL0_EQ_BITS)))
-					break;
-			}
-		}
-		/* restart */
-		tc_write(DP0CTL, 0);
-		usleep_range(10, 20);
-	} while (--retry);
-	if (retry == 0) {
-		dev_err(tc->dev, "Failed to finish training phase %d\n",
-			pattern);
+		udelay(1);
+		tc_read(DP0_LTSTAT, &value);
+	} while ((!(value & LT_LOOPDONE)) && (--timeout));
+
+	if (timeout == 0) {
+		dev_err(tc->dev, "Link training timeout waiting for LT_LOOPDONE!\n");
+		return -ETIMEDOUT;
 	}
 
-	return 0;
+	return (value >> 8) & 0x7;
+
 err:
 	return ret;
 }
 
-static int tc_main_link_setup(struct tc_data *tc)
+static int tc_main_link_enable(struct tc_data *tc)
 {
 	struct drm_dp_aux *aux = &tc->aux;
 	struct device *dev = tc->dev;
@@ -828,9 +784,11 @@ static int tc_main_link_setup(struct tc_data *tc)
 	int ret;
 	u8 tmp[8];
 
-	/* display mode should be set at this point */
-	if (!tc->mode)
-		return -EINVAL;
+	dev_dbg(tc->dev, "link enable\n");
+
+	tc_read(DP0CTL, &value);
+	if (WARN_ON(value & DP_EN))
+		tc_write(DP0CTL, 0);
 
 	tc_write(DP0_SRCCTRL, tc_srcctrl(tc));
 	/* SSCG and BW27 on DP1 must be set to the same as on DP0 */
@@ -863,7 +821,6 @@ static int tc_main_link_setup(struct tc_data *tc)
 	if (tc->link.base.num_lanes == 2)
 		dp_phy_ctrl |= PHY_2LANE;
 	tc_write(DP_PHY_CTRL, dp_phy_ctrl);
-	msleep(100);
 
 	/* PLL setup */
 	tc_write(DP0_PLLCTRL, PLLUPDATE | PLLEN);
@@ -872,14 +829,6 @@ static int tc_main_link_setup(struct tc_data *tc)
 	tc_write(DP1_PLLCTRL, PLLUPDATE | PLLEN);
 	tc_wait_pll_lock(tc);
 
-	/* PXL PLL setup */
-	if (tc_test_pattern) {
-		ret = tc_pxl_pll_en(tc, clk_get_rate(tc->refclk),
-				    1000 * tc->mode->clock);
-		if (ret)
-			goto err;
-	}
-
 	/* Reset/Enable Main Links */
 	dp_phy_ctrl |= DP_PHY_RST | PHY_M1_RST | PHY_M0_RST;
 	tc_write(DP_PHY_CTRL, dp_phy_ctrl);
@@ -925,9 +874,9 @@ static int tc_main_link_setup(struct tc_data *tc)
 
 		if (tmp[0] != tc->assr) {
 			dev_dbg(dev, "Failed to switch display ASSR to %d, falling back to unscrambled mode\n",
-				 tc->assr);
+				tc->assr);
 			/* trying with disabled scrambler */
-			tc->link.scrambler_dis = 1;
+			tc->link.scrambler_dis = true;
 		}
 	}
 
@@ -939,19 +888,82 @@ static int tc_main_link_setup(struct tc_data *tc)
 	/* DOWNSPREAD_CTRL */
 	tmp[0] = tc->link.spread ? DP_SPREAD_AMP_0_5 : 0x00;
 	/* MAIN_LINK_CHANNEL_CODING_SET */
-	tmp[1] =  tc->link.coding8b10b ? DP_SET_ANSI_8B10B : 0x00;
+	tmp[1] =  DP_SET_ANSI_8B10B;
 	ret = drm_dp_dpcd_write(aux, DP_DOWNSPREAD_CTRL, tmp, 2);
 	if (ret < 0)
 		goto err_dpcd_write;
 
-	ret = tc_link_training(tc, DP_TRAINING_PATTERN_1);
-	if (ret)
+	/* Reset voltage-swing & pre-emphasis */
+	tmp[0] = tmp[1] = DP_TRAIN_VOLTAGE_SWING_LEVEL_0 |
+			  DP_TRAIN_PRE_EMPH_LEVEL_0;
+	ret = drm_dp_dpcd_write(aux, DP_TRAINING_LANE0_SET, tmp, 2);
+	if (ret < 0)
+		goto err_dpcd_write;
+
+	/* Clock-Recovery */
+
+	/* Set DPCD 0x102 for Training Pattern 1 */
+	tc_write(DP0_SNKLTCTRL, DP_LINK_SCRAMBLING_DISABLE |
+		 DP_TRAINING_PATTERN_1);
+
+	tc_write(DP0_LTLOOPCTRL,
+		 (15 << 28) |	/* Defer Iteration Count */
+		 (15 << 24) |	/* Loop Iteration Count */
+		 (0xd << 0));	/* Loop Timer Delay */
+
+	tc_write(DP0_SRCCTRL, tc_srcctrl(tc) | DP0_SRCCTRL_SCRMBLDIS |
+		 DP0_SRCCTRL_AUTOCORRECT | DP0_SRCCTRL_TP1);
+
+	/* Enable DP0 to start Link Training */
+	tc_write(DP0CTL,
+		 ((tc->link.base.capabilities & DP_LINK_CAP_ENHANCED_FRAMING) ? EF_EN : 0) |
+		 DP_EN);
+
+	/* wait */
+	ret = tc_wait_link_training(tc);
+	if (ret < 0)
 		goto err;
 
-	ret = tc_link_training(tc, DP_TRAINING_PATTERN_2);
-	if (ret)
+	if (ret) {
+		dev_err(tc->dev, "Link training phase 1 failed: %s\n",
+			training_pattern1_errors[ret]);
+		ret = -ENODEV;
+		goto err;
+	}
+
+	/* Channel Equalization */
+
+	/* Set DPCD 0x102 for Training Pattern 2 */
+	tc_write(DP0_SNKLTCTRL, DP_LINK_SCRAMBLING_DISABLE |
+		 DP_TRAINING_PATTERN_2);
+
+	tc_write(DP0_SRCCTRL, tc_srcctrl(tc) | DP0_SRCCTRL_SCRMBLDIS |
+		 DP0_SRCCTRL_AUTOCORRECT | DP0_SRCCTRL_TP2);
+
+	/* wait */
+	ret = tc_wait_link_training(tc);
+	if (ret < 0)
 		goto err;
 
+	if (ret) {
+		dev_err(tc->dev, "Link training phase 2 failed: %s\n",
+			training_pattern2_errors[ret]);
+		ret = -ENODEV;
+		goto err;
+	}
+
+	/*
+	 * Toshiba's documentation suggests to first clear DPCD 0x102, then
+	 * clear the training pattern bit in DP0_SRCCTRL. Testing shows
+	 * that the link sometimes drops if those steps are done in that order,
+	 * but if the steps are done in reverse order, the link stays up.
+	 *
+	 * So we do the steps differently than documented here.
+	 */
+
+	/* Clear Training Pattern, set AutoCorrect Mode = 1 */
+	tc_write(DP0_SRCCTRL, tc_srcctrl(tc) | DP0_SRCCTRL_AUTOCORRECT);
+
 	/* Clear DPCD 0x102 */
 	/* Note: Can Not use DP0_SNKLTCTRL (0x06E4) short cut */
 	tmp[0] = tc->link.scrambler_dis ? DP_LINK_SCRAMBLING_DISABLE : 0x00;
@@ -959,47 +971,43 @@ static int tc_main_link_setup(struct tc_data *tc)
 	if (ret < 0)
 		goto err_dpcd_write;
 
-	/* Clear Training Pattern, set AutoCorrect Mode = 1 */
-	tc_write(DP0_SRCCTRL, tc_srcctrl(tc) | DP0_SRCCTRL_AUTOCORRECT);
+	/* Check link status */
+	ret = drm_dp_dpcd_read_link_status(aux, tmp);
+	if (ret < 0)
+		goto err_dpcd_read;
 
-	/* Wait */
-	timeout = 100;
-	do {
-		udelay(1);
-		/* Read DPCD 0x202-0x207 */
-		ret = drm_dp_dpcd_read_link_status(aux, tmp + 2);
-		if (ret < 0)
-			goto err_dpcd_read;
-	} while ((--timeout) &&
-		 !(drm_dp_channel_eq_ok(tmp + 2,  tc->link.base.num_lanes)));
+	ret = 0;
 
-	if (timeout == 0) {
-		/* Read DPCD 0x200-0x201 */
-		ret = drm_dp_dpcd_read(aux, DP_SINK_COUNT, tmp, 2);
-		if (ret < 0)
-			goto err_dpcd_read;
-		dev_err(dev, "channel(s) EQ not ok\n");
-		dev_info(dev, "0x0200 SINK_COUNT: 0x%02x\n", tmp[0]);
-		dev_info(dev, "0x0201 DEVICE_SERVICE_IRQ_VECTOR: 0x%02x\n",
-			 tmp[1]);
-		dev_info(dev, "0x0202 LANE0_1_STATUS: 0x%02x\n", tmp[2]);
-		dev_info(dev, "0x0204 LANE_ALIGN_STATUS_UPDATED: 0x%02x\n",
-			 tmp[4]);
-		dev_info(dev, "0x0205 SINK_STATUS: 0x%02x\n", tmp[5]);
-		dev_info(dev, "0x0206 ADJUST_REQUEST_LANE0_1: 0x%02x\n",
-			 tmp[6]);
-
-		return -EAGAIN;
+	value = tmp[0] & DP_CHANNEL_EQ_BITS;
+
+	if (value != DP_CHANNEL_EQ_BITS) {
+		dev_err(tc->dev, "Lane 0 failed: %x\n", value);
+		ret = -ENODEV;
 	}
 
-	ret = tc_set_video_mode(tc, tc->mode);
-	if (ret)
-		goto err;
+	if (tc->link.base.num_lanes == 2) {
+		value = (tmp[0] >> 4) & DP_CHANNEL_EQ_BITS;
 
-	/* Set M/N */
-	ret = tc_stream_clock_calc(tc);
-	if (ret)
+		if (value != DP_CHANNEL_EQ_BITS) {
+			dev_err(tc->dev, "Lane 1 failed: %x\n", value);
+			ret = -ENODEV;
+		}
+
+		if (!(tmp[2] & DP_INTERLANE_ALIGN_DONE)) {
+			dev_err(tc->dev, "Interlane align failed\n");
+			ret = -ENODEV;
+		}
+	}
+
+	if (ret) {
+		dev_err(dev, "0x0202 LANE0_1_STATUS:            0x%02x\n", tmp[0]);
+		dev_err(dev, "0x0203 LANE2_3_STATUS             0x%02x\n", tmp[1]);
+		dev_err(dev, "0x0204 LANE_ALIGN_STATUS_UPDATED: 0x%02x\n", tmp[2]);
+		dev_err(dev, "0x0205 SINK_STATUS:               0x%02x\n", tmp[3]);
+		dev_err(dev, "0x0206 ADJUST_REQUEST_LANE0_1:    0x%02x\n", tmp[4]);
+		dev_err(dev, "0x0207 ADJUST_REQUEST_LANE2_3:    0x%02x\n", tmp[5]);
 		goto err;
+	}
 
 	return 0;
 err_dpcd_read:
@@ -1011,39 +1019,84 @@ err:
 	return ret;
 }
 
-static int tc_main_link_stream(struct tc_data *tc, int state)
+static int tc_main_link_disable(struct tc_data *tc)
+{
+	int ret;
+
+	dev_dbg(tc->dev, "link disable\n");
+
+	tc_write(DP0_SRCCTRL, 0);
+	tc_write(DP0CTL, 0);
+
+	return 0;
+err:
+	return ret;
+}
+
+static int tc_stream_enable(struct tc_data *tc)
 {
 	int ret;
 	u32 value;
 
-	dev_dbg(tc->dev, "stream: %d\n", state);
+	dev_dbg(tc->dev, "enable video stream\n");
 
-	if (state) {
-		value = VID_MN_GEN | DP_EN;
-		if (tc->link.base.capabilities & DP_LINK_CAP_ENHANCED_FRAMING)
-			value |= EF_EN;
-		tc_write(DP0CTL, value);
-		/*
-		 * VID_EN assertion should be delayed by at least N * LSCLK
-		 * cycles from the time VID_MN_GEN is enabled in order to
-		 * generate stable values for VID_M. LSCLK is 270 MHz or
-		 * 162 MHz, VID_N is set to 32768 in  tc_stream_clock_calc(),
-		 * so a delay of at least 203 us should suffice.
-		 */
-		usleep_range(500, 1000);
-		value |= VID_EN;
-		tc_write(DP0CTL, value);
-		/* Set input interface */
-		value = DP0_AUDSRC_NO_INPUT;
-		if (tc_test_pattern)
-			value |= DP0_VIDSRC_COLOR_BAR;
-		else
-			value |= DP0_VIDSRC_DPI_RX;
-		tc_write(SYSCTRL, value);
-	} else {
-		tc_write(DP0CTL, 0);
+	/* PXL PLL setup */
+	if (tc_test_pattern) {
+		ret = tc_pxl_pll_en(tc, clk_get_rate(tc->refclk),
+				    1000 * tc->mode.clock);
+		if (ret)
+			goto err;
 	}
 
+	ret = tc_set_video_mode(tc, &tc->mode);
+	if (ret)
+		return ret;
+
+	/* Set M/N */
+	ret = tc_stream_clock_calc(tc);
+	if (ret)
+		return ret;
+
+	value = VID_MN_GEN | DP_EN;
+	if (tc->link.base.capabilities & DP_LINK_CAP_ENHANCED_FRAMING)
+		value |= EF_EN;
+	tc_write(DP0CTL, value);
+	/*
+	 * VID_EN assertion should be delayed by at least N * LSCLK
+	 * cycles from the time VID_MN_GEN is enabled in order to
+	 * generate stable values for VID_M. LSCLK is 270 MHz or
+	 * 162 MHz, VID_N is set to 32768 in  tc_stream_clock_calc(),
+	 * so a delay of at least 203 us should suffice.
+	 */
+	usleep_range(500, 1000);
+	value |= VID_EN;
+	tc_write(DP0CTL, value);
+	/* Set input interface */
+	value = DP0_AUDSRC_NO_INPUT;
+	if (tc_test_pattern)
+		value |= DP0_VIDSRC_COLOR_BAR;
+	else
+		value |= DP0_VIDSRC_DPI_RX;
+	tc_write(SYSCTRL, value);
+
+	return 0;
+err:
+	return ret;
+}
+
+static int tc_stream_disable(struct tc_data *tc)
+{
+	int ret;
+	u32 val;
+
+	dev_dbg(tc->dev, "disable video stream\n");
+
+	tc_read(DP0CTL, &val);
+	val &= ~VID_EN;
+	tc_write(DP0CTL, val);
+
+	tc_pxl_pll_dis(tc);
+
 	return 0;
 err:
 	return ret;
@@ -1061,15 +1114,22 @@ static void tc_bridge_enable(struct drm_bridge *bridge)
 	struct tc_data *tc = bridge_to_tc(bridge);
 	int ret;
 
-	ret = tc_main_link_setup(tc);
+	ret = tc_get_display_props(tc);
 	if (ret < 0) {
-		dev_err(tc->dev, "main link setup error: %d\n", ret);
+		dev_err(tc->dev, "failed to read display props: %d\n", ret);
 		return;
 	}
 
-	ret = tc_main_link_stream(tc, 1);
+	ret = tc_main_link_enable(tc);
+	if (ret < 0) {
+		dev_err(tc->dev, "main link enable error: %d\n", ret);
+		return;
+	}
+
+	ret = tc_stream_enable(tc);
 	if (ret < 0) {
 		dev_err(tc->dev, "main link stream start error: %d\n", ret);
+		tc_main_link_disable(tc);
 		return;
 	}
 
@@ -1083,9 +1143,13 @@ static void tc_bridge_disable(struct drm_bridge *bridge)
 
 	drm_panel_disable(tc->panel);
 
-	ret = tc_main_link_stream(tc, 0);
+	ret = tc_stream_disable(tc);
 	if (ret < 0)
 		dev_err(tc->dev, "main link stream stop error: %d\n", ret);
+
+	ret = tc_main_link_disable(tc);
+	if (ret < 0)
+		dev_err(tc->dev, "main link disable error: %d\n", ret);
 }
 
 static void tc_bridge_post_disable(struct drm_bridge *bridge)
@@ -1107,10 +1171,10 @@ static bool tc_bridge_mode_fixup(struct drm_bridge *bridge,
 	return true;
 }
 
-static enum drm_mode_status tc_connector_mode_valid(struct drm_connector *connector,
-				   struct drm_display_mode *mode)
+static enum drm_mode_status tc_mode_valid(struct drm_bridge *bridge,
+					  const struct drm_display_mode *mode)
 {
-	struct tc_data *tc = connector_to_tc(connector);
+	struct tc_data *tc = bridge_to_tc(bridge);
 	u32 req, avail;
 	u32 bits_per_pixel = 24;
 
@@ -1133,7 +1197,7 @@ static void tc_bridge_mode_set(struct drm_bridge *bridge,
 {
 	struct tc_data *tc = bridge_to_tc(bridge);
 
-	tc->mode = mode;
+	tc->mode = *mode;
 }
 
 static int tc_connector_get_modes(struct drm_connector *connector)
@@ -1141,6 +1205,13 @@ static int tc_connector_get_modes(struct drm_connector *connector)
 	struct tc_data *tc = connector_to_tc(connector);
 	struct edid *edid;
 	unsigned int count;
+	int ret;
+
+	ret = tc_get_display_props(tc);
+	if (ret < 0) {
+		dev_err(tc->dev, "failed to read display props: %d\n", ret);
+		return 0;
+	}
 
 	if (tc->panel && tc->panel->funcs && tc->panel->funcs->get_modes) {
 		count = tc->panel->funcs->get_modes(tc->panel);
@@ -1161,29 +1232,40 @@ static int tc_connector_get_modes(struct drm_connector *connector)
 	return count;
 }
 
-static void tc_connector_set_polling(struct tc_data *tc,
-				     struct drm_connector *connector)
-{
-	/* TODO: add support for HPD */
-	connector->polled = DRM_CONNECTOR_POLL_CONNECT |
-			    DRM_CONNECTOR_POLL_DISCONNECT;
-}
+static const struct drm_connector_helper_funcs tc_connector_helper_funcs = {
+	.get_modes = tc_connector_get_modes,
+};
 
-static struct drm_encoder *
-tc_connector_best_encoder(struct drm_connector *connector)
+static enum drm_connector_status tc_connector_detect(struct drm_connector *connector,
+						     bool force)
 {
 	struct tc_data *tc = connector_to_tc(connector);
+	bool conn;
+	u32 val;
+	int ret;
 
-	return tc->bridge.encoder;
-}
+	if (tc->hpd_pin < 0) {
+		if (tc->panel)
+			return connector_status_connected;
+		else
+			return connector_status_unknown;
+	}
 
-static const struct drm_connector_helper_funcs tc_connector_helper_funcs = {
-	.get_modes = tc_connector_get_modes,
-	.mode_valid = tc_connector_mode_valid,
-	.best_encoder = tc_connector_best_encoder,
-};
+	tc_read(GPIOI, &val);
+
+	conn = val & BIT(tc->hpd_pin);
+
+	if (conn)
+		return connector_status_connected;
+	else
+		return connector_status_disconnected;
+
+err:
+	return connector_status_unknown;
+}
 
 static const struct drm_connector_funcs tc_connector_funcs = {
+	.detect = tc_connector_detect,
 	.fill_modes = drm_helper_probe_single_connector_modes,
 	.destroy = drm_connector_cleanup,
 	.reset = drm_atomic_helper_connector_reset,
@@ -1198,7 +1280,7 @@ static int tc_bridge_attach(struct drm_bridge *bridge)
 	struct drm_device *drm = bridge->dev;
 	int ret;
 
-	/* Create eDP connector */
+	/* Create DP/eDP connector */
 	drm_connector_helper_add(&tc->connector, &tc_connector_helper_funcs);
 	ret = drm_connector_init(drm, &tc->connector, &tc_connector_funcs,
 				 tc->panel ? DRM_MODE_CONNECTOR_eDP :
@@ -1206,6 +1288,15 @@ static int tc_bridge_attach(struct drm_bridge *bridge)
 	if (ret)
 		return ret;
 
+	/* Don't poll if don't have HPD connected */
+	if (tc->hpd_pin >= 0) {
+		if (tc->have_irq)
+			tc->connector.polled = DRM_CONNECTOR_POLL_HPD;
+		else
+			tc->connector.polled = DRM_CONNECTOR_POLL_CONNECT |
+					       DRM_CONNECTOR_POLL_DISCONNECT;
+	}
+
 	if (tc->panel)
 		drm_panel_attach(tc->panel, &tc->connector);
 
@@ -1222,6 +1313,7 @@ static int tc_bridge_attach(struct drm_bridge *bridge)
 
 static const struct drm_bridge_funcs tc_bridge_funcs = {
 	.attach = tc_bridge_attach,
+	.mode_valid = tc_mode_valid,
 	.mode_set = tc_bridge_mode_set,
 	.pre_enable = tc_bridge_pre_enable,
 	.enable = tc_bridge_enable,
@@ -1241,6 +1333,8 @@ static const struct regmap_range tc_volatile_ranges[] = {
 	regmap_reg_range(DP_PHY_CTRL, DP_PHY_CTRL),
 	regmap_reg_range(DP0_PLLCTRL, PXL_PLLCTRL),
 	regmap_reg_range(VFUEN0, VFUEN0),
+	regmap_reg_range(INTSTS_G, INTSTS_G),
+	regmap_reg_range(GPIOI, GPIOI),
 };
 
 static const struct regmap_access_table tc_volatile_table = {
@@ -1269,6 +1363,49 @@ static const struct regmap_config tc_regmap_config = {
 	.val_format_endian = REGMAP_ENDIAN_LITTLE,
 };
 
+static irqreturn_t tc_irq_handler(int irq, void *arg)
+{
+	struct tc_data *tc = arg;
+	u32 val;
+	int r;
+
+	r = regmap_read(tc->regmap, INTSTS_G, &val);
+	if (r)
+		return IRQ_NONE;
+
+	if (!val)
+		return IRQ_NONE;
+
+	if (val & INT_SYSERR) {
+		u32 stat = 0;
+
+		regmap_read(tc->regmap, SYSSTAT, &stat);
+
+		dev_err(tc->dev, "syserr %x\n", stat);
+	}
+
+	if (tc->hpd_pin >= 0 && tc->bridge.dev) {
+		/*
+		 * H is triggered when the GPIO goes high.
+		 *
+		 * LC is triggered when the GPIO goes low and stays low for
+		 * the duration of LCNT
+		 */
+		bool h = val & INT_GPIO_H(tc->hpd_pin);
+		bool lc = val & INT_GPIO_LC(tc->hpd_pin);
+
+		dev_dbg(tc->dev, "GPIO%d: %s %s\n", tc->hpd_pin,
+			h ? "H" : "", lc ? "LC" : "");
+
+		if (h || lc)
+			drm_kms_helper_hotplug_event(tc->bridge.dev);
+	}
+
+	regmap_write(tc->regmap, INTSTS_G, val);
+
+	return IRQ_HANDLED;
+}
+
 static int tc_probe(struct i2c_client *client, const struct i2c_device_id *id)
 {
 	struct device *dev = &client->dev;
@@ -1320,6 +1457,33 @@ static int tc_probe(struct i2c_client *client, const struct i2c_device_id *id)
 		return ret;
 	}
 
+	ret = of_property_read_u32(dev->of_node, "toshiba,hpd-pin",
+				   &tc->hpd_pin);
+	if (ret) {
+		tc->hpd_pin = -ENODEV;
+	} else {
+		if (tc->hpd_pin < 0 || tc->hpd_pin > 1) {
+			dev_err(dev, "failed to parse HPD number\n");
+			return ret;
+		}
+	}
+
+	if (client->irq > 0) {
+		/* enable SysErr */
+		regmap_write(tc->regmap, INTCTL_G, INT_SYSERR);
+
+		ret = devm_request_threaded_irq(dev, client->irq,
+						NULL, tc_irq_handler,
+						IRQF_ONESHOT,
+						"tc358767-irq", tc);
+		if (ret) {
+			dev_err(dev, "failed to register dp interrupt\n");
+			return ret;
+		}
+
+		tc->have_irq = true;
+	}
+
 	ret = regmap_read(tc->regmap, TC_IDREG, &tc->rev);
 	if (ret) {
 		dev_err(tc->dev, "can not read device ID: %d\n", ret);
@@ -1333,6 +1497,22 @@ static int tc_probe(struct i2c_client *client, const struct i2c_device_id *id)
 
 	tc->assr = (tc->rev == 0x6601); /* Enable ASSR for eDP panels */
 
+	if (tc->hpd_pin >= 0) {
+		u32 lcnt_reg = tc->hpd_pin == 0 ? INT_GP0_LCNT : INT_GP1_LCNT;
+		u32 h_lc = INT_GPIO_H(tc->hpd_pin) | INT_GPIO_LC(tc->hpd_pin);
+
+		/* Set LCNT to 2ms */
+		regmap_write(tc->regmap, lcnt_reg,
+			     clk_get_rate(tc->refclk) * 2 / 1000);
+		/* We need the "alternate" mode for HPD */
+		regmap_write(tc->regmap, GPIOM, BIT(tc->hpd_pin));
+
+		if (tc->have_irq) {
+			/* enable H & LC */
+			regmap_update_bits(tc->regmap, INTCTL_G, h_lc, h_lc);
+		}
+	}
+
 	ret = tc_aux_link_setup(tc);
 	if (ret)
 		return ret;
@@ -1345,12 +1525,6 @@ static int tc_probe(struct i2c_client *client, const struct i2c_device_id *id)
 	if (ret)
 		return ret;
 
-	ret = tc_get_display_props(tc);
-	if (ret)
-		goto err_unregister_aux;
-
-	tc_connector_set_polling(tc, &tc->connector);
-
 	tc->bridge.funcs = &tc_bridge_funcs;
 	tc->bridge.of_node = dev->of_node;
 	drm_bridge_add(&tc->bridge);
@@ -1358,9 +1532,6 @@ static int tc_probe(struct i2c_client *client, const struct i2c_device_id *id)
 	i2c_set_clientdata(client, tc);
 
 	return 0;
-err_unregister_aux:
-	drm_dp_aux_unregister(&tc->aux);
-	return ret;
 }
 
 static int tc_remove(struct i2c_client *client)
@@ -1370,8 +1541,6 @@ static int tc_remove(struct i2c_client *client)
 	drm_bridge_remove(&tc->bridge);
 	drm_dp_aux_unregister(&tc->aux);
 
-	tc_pxl_pll_dis(tc);
-
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/bridge/thc63lvd1024.c b/drivers/gpu/drm/bridge/thc63lvd1024.c
index b083a740565c..3d74129b2995 100644
--- a/drivers/gpu/drm/bridge/thc63lvd1024.c
+++ b/drivers/gpu/drm/bridge/thc63lvd1024.c
@@ -5,15 +5,17 @@
  * Copyright (C) 2018 Jacopo Mondi <jacopo+renesas@jmondi.org>
  */
 
-#include <drm/drmP.h>
-#include <drm/drm_bridge.h>
-#include <drm/drm_panel.h>
-
 #include <linux/gpio/consumer.h>
+#include <linux/module.h>
+#include <linux/of.h>
 #include <linux/of_graph.h>
+#include <linux/platform_device.h>
 #include <linux/regulator/consumer.h>
 #include <linux/slab.h>
 
+#include <drm/drm_bridge.h>
+#include <drm/drm_panel.h>
+
 enum thc63_ports {
 	THC63_LVDS_IN0,
 	THC63_LVDS_IN1,
@@ -31,6 +33,8 @@ struct thc63_dev {
 
 	struct drm_bridge bridge;
 	struct drm_bridge *next;
+
+	struct drm_bridge_timings timings;
 };
 
 static inline struct thc63_dev *to_thc63(struct drm_bridge *bridge)
@@ -48,15 +52,28 @@ static int thc63_attach(struct drm_bridge *bridge)
 static enum drm_mode_status thc63_mode_valid(struct drm_bridge *bridge,
 					const struct drm_display_mode *mode)
 {
+	struct thc63_dev *thc63 = to_thc63(bridge);
+	unsigned int min_freq;
+	unsigned int max_freq;
+
 	/*
-	 * The THC63LVD1024 clock frequency range is 8 to 135 MHz in single-in
-	 * mode. Note that the limits are different in dual-in, single-out mode,
-	 * and will need to be adjusted accordingly.
+	 * The THC63LVD1024 pixel rate range is 8 to 135 MHz in all modes but
+	 * dual-in, single-out where it is 40 to 150 MHz. As dual-in, dual-out
+	 * isn't supported by the driver yet, simply derive the limits from the
+	 * input mode.
 	 */
-	if (mode->clock < 8000)
+	if (thc63->timings.dual_link) {
+		min_freq = 40000;
+		max_freq = 150000;
+	} else {
+		min_freq = 8000;
+		max_freq = 135000;
+	}
+
+	if (mode->clock < min_freq)
 		return MODE_CLOCK_LOW;
 
-	if (mode->clock > 135000)
+	if (mode->clock > max_freq)
 		return MODE_CLOCK_HIGH;
 
 	return MODE_OK;
@@ -101,19 +118,19 @@ static const struct drm_bridge_funcs thc63_bridge_func = {
 
 static int thc63_parse_dt(struct thc63_dev *thc63)
 {
-	struct device_node *thc63_out;
+	struct device_node *endpoint;
 	struct device_node *remote;
 
-	thc63_out = of_graph_get_endpoint_by_regs(thc63->dev->of_node,
-						  THC63_RGB_OUT0, -1);
-	if (!thc63_out) {
+	endpoint = of_graph_get_endpoint_by_regs(thc63->dev->of_node,
+						 THC63_RGB_OUT0, -1);
+	if (!endpoint) {
 		dev_err(thc63->dev, "Missing endpoint in port@%u\n",
 			THC63_RGB_OUT0);
 		return -ENODEV;
 	}
 
-	remote = of_graph_get_remote_port_parent(thc63_out);
-	of_node_put(thc63_out);
+	remote = of_graph_get_remote_port_parent(endpoint);
+	of_node_put(endpoint);
 	if (!remote) {
 		dev_err(thc63->dev, "Endpoint in port@%u unconnected\n",
 			THC63_RGB_OUT0);
@@ -132,6 +149,22 @@ static int thc63_parse_dt(struct thc63_dev *thc63)
 	if (!thc63->next)
 		return -EPROBE_DEFER;
 
+	endpoint = of_graph_get_endpoint_by_regs(thc63->dev->of_node,
+						 THC63_LVDS_IN1, -1);
+	if (endpoint) {
+		remote = of_graph_get_remote_port_parent(endpoint);
+		of_node_put(endpoint);
+
+		if (remote) {
+			if (of_device_is_available(remote))
+				thc63->timings.dual_link = true;
+			of_node_put(remote);
+		}
+	}
+
+	dev_dbg(thc63->dev, "operating in %s-link mode\n",
+		thc63->timings.dual_link ? "dual" : "single");
+
 	return 0;
 }
 
@@ -188,6 +221,7 @@ static int thc63_probe(struct platform_device *pdev)
 	thc63->bridge.driver_private = thc63;
 	thc63->bridge.of_node = pdev->dev.of_node;
 	thc63->bridge.funcs = &thc63_bridge_func;
+	thc63->bridge.timings = &thc63->timings;
 
 	drm_bridge_add(&thc63->bridge);
 
diff --git a/drivers/gpu/drm/bridge/ti-sn65dsi86.c b/drivers/gpu/drm/bridge/ti-sn65dsi86.c
index f72ee137e5f1..b77a52d05061 100644
--- a/drivers/gpu/drm/bridge/ti-sn65dsi86.c
+++ b/drivers/gpu/drm/bridge/ti-sn65dsi86.c
@@ -3,23 +3,25 @@
  * Copyright (c) 2018, The Linux Foundation. All rights reserved.
  */
 
-#include <drm/drmP.h>
-#include <drm/drm_atomic.h>
-#include <drm/drm_atomic_helper.h>
-#include <drm/drm_dp_helper.h>
-#include <drm/drm_mipi_dsi.h>
-#include <drm/drm_of.h>
-#include <drm/drm_panel.h>
-#include <drm/drm_probe_helper.h>
 #include <linux/clk.h>
 #include <linux/gpio/consumer.h>
 #include <linux/i2c.h>
 #include <linux/iopoll.h>
+#include <linux/module.h>
 #include <linux/of_graph.h>
 #include <linux/pm_runtime.h>
 #include <linux/regmap.h>
 #include <linux/regulator/consumer.h>
 
+#include <drm/drm_atomic.h>
+#include <drm/drm_atomic_helper.h>
+#include <drm/drm_dp_helper.h>
+#include <drm/drm_mipi_dsi.h>
+#include <drm/drm_of.h>
+#include <drm/drm_panel.h>
+#include <drm/drm_print.h>
+#include <drm/drm_probe_helper.h>
+
 #define SN_DEVICE_REV_REG			0x08
 #define SN_DPPLL_SRC_REG			0x0A
 #define  DPPLL_CLK_SRC_DSICLK			BIT(0)
diff --git a/drivers/gpu/drm/bridge/ti-tfp410.c b/drivers/gpu/drm/bridge/ti-tfp410.c
index 8b0e71bd3ca7..4e76b2b27374 100644
--- a/drivers/gpu/drm/bridge/ti-tfp410.c
+++ b/drivers/gpu/drm/bridge/ti-tfp410.c
@@ -11,15 +11,15 @@
 #include <linux/delay.h>
 #include <linux/fwnode.h>
 #include <linux/gpio/consumer.h>
+#include <linux/i2c.h>
 #include <linux/irq.h>
 #include <linux/module.h>
 #include <linux/of_graph.h>
 #include <linux/platform_device.h>
-#include <linux/i2c.h>
 
-#include <drm/drmP.h>
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_crtc.h>
+#include <drm/drm_print.h>
 #include <drm/drm_probe_helper.h>
 
 #define HOTPLUG_DEBOUNCE_MS		1100
@@ -70,7 +70,12 @@ static int tfp410_get_modes(struct drm_connector *connector)
 
 	drm_connector_update_edid_property(connector, edid);
 
-	return drm_add_edid_modes(connector, edid);
+	ret = drm_add_edid_modes(connector, edid);
+
+	kfree(edid);
+
+	return ret;
+
 fallback:
 	/* No EDID, fallback on the XGA standard modes */
 	ret = drm_add_modes_noedid(connector, 1920, 1200);
@@ -376,7 +381,8 @@ static int tfp410_fini(struct device *dev)
 {
 	struct tfp410 *dvi = dev_get_drvdata(dev);
 
-	cancel_delayed_work_sync(&dvi->hpd_work);
+	if (dvi->hpd_irq >= 0)
+		cancel_delayed_work_sync(&dvi->hpd_work);
 
 	drm_bridge_remove(&dvi->bridge);
 
diff --git a/drivers/gpu/drm/cirrus/cirrus_ttm.c b/drivers/gpu/drm/cirrus/cirrus_ttm.c
deleted file mode 100644
index e6b98467a428..000000000000
--- a/drivers/gpu/drm/cirrus/cirrus_ttm.c
+++ /dev/null
@@ -1,337 +0,0 @@
-/*
- * Copyright 2012 Red Hat Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- */
-/*
- * Authors: Dave Airlie <airlied@redhat.com>
- */
-#include <drm/drmP.h>
-#include <drm/ttm/ttm_page_alloc.h>
-
-#include "cirrus_drv.h"
-
-static inline struct cirrus_device *
-cirrus_bdev(struct ttm_bo_device *bd)
-{
-	return container_of(bd, struct cirrus_device, ttm.bdev);
-}
-
-static void cirrus_bo_ttm_destroy(struct ttm_buffer_object *tbo)
-{
-	struct cirrus_bo *bo;
-
-	bo = container_of(tbo, struct cirrus_bo, bo);
-
-	drm_gem_object_release(&bo->gem);
-	kfree(bo);
-}
-
-static bool cirrus_ttm_bo_is_cirrus_bo(struct ttm_buffer_object *bo)
-{
-	if (bo->destroy == &cirrus_bo_ttm_destroy)
-		return true;
-	return false;
-}
-
-static int
-cirrus_bo_init_mem_type(struct ttm_bo_device *bdev, uint32_t type,
-		     struct ttm_mem_type_manager *man)
-{
-	switch (type) {
-	case TTM_PL_SYSTEM:
-		man->flags = TTM_MEMTYPE_FLAG_MAPPABLE;
-		man->available_caching = TTM_PL_MASK_CACHING;
-		man->default_caching = TTM_PL_FLAG_CACHED;
-		break;
-	case TTM_PL_VRAM:
-		man->func = &ttm_bo_manager_func;
-		man->flags = TTM_MEMTYPE_FLAG_FIXED |
-			TTM_MEMTYPE_FLAG_MAPPABLE;
-		man->available_caching = TTM_PL_FLAG_UNCACHED |
-			TTM_PL_FLAG_WC;
-		man->default_caching = TTM_PL_FLAG_WC;
-		break;
-	default:
-		DRM_ERROR("Unsupported memory type %u\n", (unsigned)type);
-		return -EINVAL;
-	}
-	return 0;
-}
-
-static void
-cirrus_bo_evict_flags(struct ttm_buffer_object *bo, struct ttm_placement *pl)
-{
-	struct cirrus_bo *cirrusbo = cirrus_bo(bo);
-
-	if (!cirrus_ttm_bo_is_cirrus_bo(bo))
-		return;
-
-	cirrus_ttm_placement(cirrusbo, TTM_PL_FLAG_SYSTEM);
-	*pl = cirrusbo->placement;
-}
-
-static int cirrus_bo_verify_access(struct ttm_buffer_object *bo, struct file *filp)
-{
-	struct cirrus_bo *cirrusbo = cirrus_bo(bo);
-
-	return drm_vma_node_verify_access(&cirrusbo->gem.vma_node,
-					  filp->private_data);
-}
-
-static int cirrus_ttm_io_mem_reserve(struct ttm_bo_device *bdev,
-				  struct ttm_mem_reg *mem)
-{
-	struct ttm_mem_type_manager *man = &bdev->man[mem->mem_type];
-	struct cirrus_device *cirrus = cirrus_bdev(bdev);
-
-	mem->bus.addr = NULL;
-	mem->bus.offset = 0;
-	mem->bus.size = mem->num_pages << PAGE_SHIFT;
-	mem->bus.base = 0;
-	mem->bus.is_iomem = false;
-	if (!(man->flags & TTM_MEMTYPE_FLAG_MAPPABLE))
-		return -EINVAL;
-	switch (mem->mem_type) {
-	case TTM_PL_SYSTEM:
-		/* system memory */
-		return 0;
-	case TTM_PL_VRAM:
-		mem->bus.offset = mem->start << PAGE_SHIFT;
-		mem->bus.base = pci_resource_start(cirrus->dev->pdev, 0);
-		mem->bus.is_iomem = true;
-		break;
-	default:
-		return -EINVAL;
-		break;
-	}
-	return 0;
-}
-
-static void cirrus_ttm_io_mem_free(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem)
-{
-}
-
-static void cirrus_ttm_backend_destroy(struct ttm_tt *tt)
-{
-	ttm_tt_fini(tt);
-	kfree(tt);
-}
-
-static struct ttm_backend_func cirrus_tt_backend_func = {
-	.destroy = &cirrus_ttm_backend_destroy,
-};
-
-
-static struct ttm_tt *cirrus_ttm_tt_create(struct ttm_buffer_object *bo,
-					   uint32_t page_flags)
-{
-	struct ttm_tt *tt;
-
-	tt = kzalloc(sizeof(struct ttm_tt), GFP_KERNEL);
-	if (tt == NULL)
-		return NULL;
-	tt->func = &cirrus_tt_backend_func;
-	if (ttm_tt_init(tt, bo, page_flags)) {
-		kfree(tt);
-		return NULL;
-	}
-	return tt;
-}
-
-struct ttm_bo_driver cirrus_bo_driver = {
-	.ttm_tt_create = cirrus_ttm_tt_create,
-	.init_mem_type = cirrus_bo_init_mem_type,
-	.eviction_valuable = ttm_bo_eviction_valuable,
-	.evict_flags = cirrus_bo_evict_flags,
-	.move = NULL,
-	.verify_access = cirrus_bo_verify_access,
-	.io_mem_reserve = &cirrus_ttm_io_mem_reserve,
-	.io_mem_free = &cirrus_ttm_io_mem_free,
-};
-
-int cirrus_mm_init(struct cirrus_device *cirrus)
-{
-	int ret;
-	struct drm_device *dev = cirrus->dev;
-	struct ttm_bo_device *bdev = &cirrus->ttm.bdev;
-
-	ret = ttm_bo_device_init(&cirrus->ttm.bdev,
-				 &cirrus_bo_driver,
-				 dev->anon_inode->i_mapping,
-				 true);
-	if (ret) {
-		DRM_ERROR("Error initialising bo driver; %d\n", ret);
-		return ret;
-	}
-
-	ret = ttm_bo_init_mm(bdev, TTM_PL_VRAM,
-			     cirrus->mc.vram_size >> PAGE_SHIFT);
-	if (ret) {
-		DRM_ERROR("Failed ttm VRAM init: %d\n", ret);
-		return ret;
-	}
-
-	arch_io_reserve_memtype_wc(pci_resource_start(dev->pdev, 0),
-				   pci_resource_len(dev->pdev, 0));
-
-	cirrus->fb_mtrr = arch_phys_wc_add(pci_resource_start(dev->pdev, 0),
-					   pci_resource_len(dev->pdev, 0));
-
-	cirrus->mm_inited = true;
-	return 0;
-}
-
-void cirrus_mm_fini(struct cirrus_device *cirrus)
-{
-	struct drm_device *dev = cirrus->dev;
-
-	if (!cirrus->mm_inited)
-		return;
-
-	ttm_bo_device_release(&cirrus->ttm.bdev);
-
-	arch_phys_wc_del(cirrus->fb_mtrr);
-	cirrus->fb_mtrr = 0;
-	arch_io_free_memtype_wc(pci_resource_start(dev->pdev, 0),
-				pci_resource_len(dev->pdev, 0));
-}
-
-void cirrus_ttm_placement(struct cirrus_bo *bo, int domain)
-{
-	u32 c = 0;
-	unsigned i;
-	bo->placement.placement = bo->placements;
-	bo->placement.busy_placement = bo->placements;
-	if (domain & TTM_PL_FLAG_VRAM)
-		bo->placements[c++].flags = TTM_PL_FLAG_WC | TTM_PL_FLAG_UNCACHED | TTM_PL_FLAG_VRAM;
-	if (domain & TTM_PL_FLAG_SYSTEM)
-		bo->placements[c++].flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM;
-	if (!c)
-		bo->placements[c++].flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM;
-	bo->placement.num_placement = c;
-	bo->placement.num_busy_placement = c;
-	for (i = 0; i < c; ++i) {
-		bo->placements[i].fpfn = 0;
-		bo->placements[i].lpfn = 0;
-	}
-}
-
-int cirrus_bo_create(struct drm_device *dev, int size, int align,
-		  uint32_t flags, struct cirrus_bo **pcirrusbo)
-{
-	struct cirrus_device *cirrus = dev->dev_private;
-	struct cirrus_bo *cirrusbo;
-	size_t acc_size;
-	int ret;
-
-	cirrusbo = kzalloc(sizeof(struct cirrus_bo), GFP_KERNEL);
-	if (!cirrusbo)
-		return -ENOMEM;
-
-	ret = drm_gem_object_init(dev, &cirrusbo->gem, size);
-	if (ret) {
-		kfree(cirrusbo);
-		return ret;
-	}
-
-	cirrusbo->bo.bdev = &cirrus->ttm.bdev;
-
-	cirrus_ttm_placement(cirrusbo, TTM_PL_FLAG_VRAM | TTM_PL_FLAG_SYSTEM);
-
-	acc_size = ttm_bo_dma_acc_size(&cirrus->ttm.bdev, size,
-				       sizeof(struct cirrus_bo));
-
-	ret = ttm_bo_init(&cirrus->ttm.bdev, &cirrusbo->bo, size,
-			  ttm_bo_type_device, &cirrusbo->placement,
-			  align >> PAGE_SHIFT, false, acc_size,
-			  NULL, NULL, cirrus_bo_ttm_destroy);
-	if (ret)
-		return ret;
-
-	*pcirrusbo = cirrusbo;
-	return 0;
-}
-
-static inline u64 cirrus_bo_gpu_offset(struct cirrus_bo *bo)
-{
-	return bo->bo.offset;
-}
-
-int cirrus_bo_pin(struct cirrus_bo *bo, u32 pl_flag, u64 *gpu_addr)
-{
-	struct ttm_operation_ctx ctx = { false, false };
-	int i, ret;
-
-	if (bo->pin_count) {
-		bo->pin_count++;
-		if (gpu_addr)
-			*gpu_addr = cirrus_bo_gpu_offset(bo);
-	}
-
-	cirrus_ttm_placement(bo, pl_flag);
-	for (i = 0; i < bo->placement.num_placement; i++)
-		bo->placements[i].flags |= TTM_PL_FLAG_NO_EVICT;
-	ret = ttm_bo_validate(&bo->bo, &bo->placement, &ctx);
-	if (ret)
-		return ret;
-
-	bo->pin_count = 1;
-	if (gpu_addr)
-		*gpu_addr = cirrus_bo_gpu_offset(bo);
-	return 0;
-}
-
-int cirrus_bo_push_sysram(struct cirrus_bo *bo)
-{
-	struct ttm_operation_ctx ctx = { false, false };
-	int i, ret;
-	if (!bo->pin_count) {
-		DRM_ERROR("unpin bad %p\n", bo);
-		return 0;
-	}
-	bo->pin_count--;
-	if (bo->pin_count)
-		return 0;
-
-	if (bo->kmap.virtual)
-		ttm_bo_kunmap(&bo->kmap);
-
-	cirrus_ttm_placement(bo, TTM_PL_FLAG_SYSTEM);
-	for (i = 0; i < bo->placement.num_placement ; i++)
-		bo->placements[i].flags |= TTM_PL_FLAG_NO_EVICT;
-
-	ret = ttm_bo_validate(&bo->bo, &bo->placement, &ctx);
-	if (ret) {
-		DRM_ERROR("pushing to VRAM failed\n");
-		return ret;
-	}
-	return 0;
-}
-
-int cirrus_mmap(struct file *filp, struct vm_area_struct *vma)
-{
-	struct drm_file *file_priv = filp->private_data;
-	struct cirrus_device *cirrus = file_priv->minor->dev->dev_private;
-
-	return ttm_bo_mmap(filp, vma, &cirrus->ttm.bdev);
-}
diff --git a/drivers/gpu/drm/drm_agpsupport.c b/drivers/gpu/drm/drm_agpsupport.c
index 40fba1c04dfc..117b8ee98243 100644
--- a/drivers/gpu/drm/drm_agpsupport.c
+++ b/drivers/gpu/drm/drm_agpsupport.c
@@ -31,13 +31,20 @@
  * OTHER DEALINGS IN THE SOFTWARE.
  */
 
-#include <drm/drmP.h>
 #include <linux/module.h>
+#include <linux/pci.h>
 #include <linux/slab.h>
-#include "drm_legacy.h"
 
 #include <asm/agp.h>
 
+#include <drm/drm_agpsupport.h>
+#include <drm/drm_device.h>
+#include <drm/drm_drv.h>
+#include <drm/drm_file.h>
+#include <drm/drm_print.h>
+
+#include "drm_legacy.h"
+
 /**
  * Get AGP information.
  *
diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c
index f4924cb7f495..419381abbdd1 100644
--- a/drivers/gpu/drm/drm_atomic.c
+++ b/drivers/gpu/drm/drm_atomic.c
@@ -26,13 +26,18 @@
  */
 
 
-#include <drm/drmP.h>
+#include <linux/sync_file.h>
+
 #include <drm/drm_atomic.h>
 #include <drm/drm_atomic_uapi.h>
+#include <drm/drm_debugfs.h>
+#include <drm/drm_device.h>
+#include <drm/drm_drv.h>
+#include <drm/drm_file.h>
+#include <drm/drm_fourcc.h>
 #include <drm/drm_mode.h>
 #include <drm/drm_print.h>
 #include <drm/drm_writeback.h>
-#include <linux/sync_file.h>
 
 #include "drm_crtc_internal.h"
 #include "drm_internal.h"
@@ -379,6 +384,7 @@ static void drm_atomic_crtc_print_state(struct drm_printer *p,
 	drm_printf(p, "crtc[%u]: %s\n", crtc->base.id, crtc->name);
 	drm_printf(p, "\tenable=%d\n", state->enable);
 	drm_printf(p, "\tactive=%d\n", state->active);
+	drm_printf(p, "\tself_refresh_active=%d\n", state->self_refresh_active);
 	drm_printf(p, "\tplanes_changed=%d\n", state->planes_changed);
 	drm_printf(p, "\tmode_changed=%d\n", state->mode_changed);
 	drm_printf(p, "\tactive_changed=%d\n", state->active_changed);
@@ -842,6 +848,75 @@ drm_atomic_get_new_private_obj_state(struct drm_atomic_state *state,
 EXPORT_SYMBOL(drm_atomic_get_new_private_obj_state);
 
 /**
+ * drm_atomic_get_old_connector_for_encoder - Get old connector for an encoder
+ * @state: Atomic state
+ * @encoder: The encoder to fetch the connector state for
+ *
+ * This function finds and returns the connector that was connected to @encoder
+ * as specified by the @state.
+ *
+ * If there is no connector in @state which previously had @encoder connected to
+ * it, this function will return NULL. While this may seem like an invalid use
+ * case, it is sometimes useful to differentiate commits which had no prior
+ * connectors attached to @encoder vs ones that did (and to inspect their
+ * state). This is especially true in enable hooks because the pipeline has
+ * changed.
+ *
+ * Returns: The old connector connected to @encoder, or NULL if the encoder is
+ * not connected.
+ */
+struct drm_connector *
+drm_atomic_get_old_connector_for_encoder(struct drm_atomic_state *state,
+					 struct drm_encoder *encoder)
+{
+	struct drm_connector_state *conn_state;
+	struct drm_connector *connector;
+	unsigned int i;
+
+	for_each_old_connector_in_state(state, connector, conn_state, i) {
+		if (conn_state->best_encoder == encoder)
+			return connector;
+	}
+
+	return NULL;
+}
+EXPORT_SYMBOL(drm_atomic_get_old_connector_for_encoder);
+
+/**
+ * drm_atomic_get_new_connector_for_encoder - Get new connector for an encoder
+ * @state: Atomic state
+ * @encoder: The encoder to fetch the connector state for
+ *
+ * This function finds and returns the connector that will be connected to
+ * @encoder as specified by the @state.
+ *
+ * If there is no connector in @state which will have @encoder connected to it,
+ * this function will return NULL. While this may seem like an invalid use case,
+ * it is sometimes useful to differentiate commits which have no connectors
+ * attached to @encoder vs ones that do (and to inspect their state). This is
+ * especially true in disable hooks because the pipeline will change.
+ *
+ * Returns: The new connector connected to @encoder, or NULL if the encoder is
+ * not connected.
+ */
+struct drm_connector *
+drm_atomic_get_new_connector_for_encoder(struct drm_atomic_state *state,
+					 struct drm_encoder *encoder)
+{
+	struct drm_connector_state *conn_state;
+	struct drm_connector *connector;
+	unsigned int i;
+
+	for_each_new_connector_in_state(state, connector, conn_state, i) {
+		if (conn_state->best_encoder == encoder)
+			return connector;
+	}
+
+	return NULL;
+}
+EXPORT_SYMBOL(drm_atomic_get_new_connector_for_encoder);
+
+/**
  * drm_atomic_get_connector_state - get connector state
  * @state: global atomic state object
  * @connector: connector to get state object for
@@ -925,6 +1000,7 @@ static void drm_atomic_connector_print_state(struct drm_printer *p,
 
 	drm_printf(p, "connector[%u]: %s\n", connector->base.id, connector->name);
 	drm_printf(p, "\tcrtc=%s\n", state->crtc ? state->crtc->name : "(null)");
+	drm_printf(p, "\tself_refresh_aware=%d\n", state->self_refresh_aware);
 
 	if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK)
 		if (state->writeback_job && state->writeback_job->fb)
@@ -1174,6 +1250,174 @@ int drm_atomic_nonblocking_commit(struct drm_atomic_state *state)
 }
 EXPORT_SYMBOL(drm_atomic_nonblocking_commit);
 
+/* just used from drm-client and atomic-helper: */
+int __drm_atomic_helper_disable_plane(struct drm_plane *plane,
+				      struct drm_plane_state *plane_state)
+{
+	int ret;
+
+	ret = drm_atomic_set_crtc_for_plane(plane_state, NULL);
+	if (ret != 0)
+		return ret;
+
+	drm_atomic_set_fb_for_plane(plane_state, NULL);
+	plane_state->crtc_x = 0;
+	plane_state->crtc_y = 0;
+	plane_state->crtc_w = 0;
+	plane_state->crtc_h = 0;
+	plane_state->src_x = 0;
+	plane_state->src_y = 0;
+	plane_state->src_w = 0;
+	plane_state->src_h = 0;
+
+	return 0;
+}
+EXPORT_SYMBOL(__drm_atomic_helper_disable_plane);
+
+static int update_output_state(struct drm_atomic_state *state,
+			       struct drm_mode_set *set)
+{
+	struct drm_device *dev = set->crtc->dev;
+	struct drm_crtc *crtc;
+	struct drm_crtc_state *new_crtc_state;
+	struct drm_connector *connector;
+	struct drm_connector_state *new_conn_state;
+	int ret, i;
+
+	ret = drm_modeset_lock(&dev->mode_config.connection_mutex,
+			       state->acquire_ctx);
+	if (ret)
+		return ret;
+
+	/* First disable all connectors on the target crtc. */
+	ret = drm_atomic_add_affected_connectors(state, set->crtc);
+	if (ret)
+		return ret;
+
+	for_each_new_connector_in_state(state, connector, new_conn_state, i) {
+		if (new_conn_state->crtc == set->crtc) {
+			ret = drm_atomic_set_crtc_for_connector(new_conn_state,
+								NULL);
+			if (ret)
+				return ret;
+
+			/* Make sure legacy setCrtc always re-trains */
+			new_conn_state->link_status = DRM_LINK_STATUS_GOOD;
+		}
+	}
+
+	/* Then set all connectors from set->connectors on the target crtc */
+	for (i = 0; i < set->num_connectors; i++) {
+		new_conn_state = drm_atomic_get_connector_state(state,
+								set->connectors[i]);
+		if (IS_ERR(new_conn_state))
+			return PTR_ERR(new_conn_state);
+
+		ret = drm_atomic_set_crtc_for_connector(new_conn_state,
+							set->crtc);
+		if (ret)
+			return ret;
+	}
+
+	for_each_new_crtc_in_state(state, crtc, new_crtc_state, i) {
+		/*
+		 * Don't update ->enable for the CRTC in the set_config request,
+		 * since a mismatch would indicate a bug in the upper layers.
+		 * The actual modeset code later on will catch any
+		 * inconsistencies here.
+		 */
+		if (crtc == set->crtc)
+			continue;
+
+		if (!new_crtc_state->connector_mask) {
+			ret = drm_atomic_set_mode_prop_for_crtc(new_crtc_state,
+								NULL);
+			if (ret < 0)
+				return ret;
+
+			new_crtc_state->active = false;
+		}
+	}
+
+	return 0;
+}
+
+/* just used from drm-client and atomic-helper: */
+int __drm_atomic_helper_set_config(struct drm_mode_set *set,
+				   struct drm_atomic_state *state)
+{
+	struct drm_crtc_state *crtc_state;
+	struct drm_plane_state *primary_state;
+	struct drm_crtc *crtc = set->crtc;
+	int hdisplay, vdisplay;
+	int ret;
+
+	crtc_state = drm_atomic_get_crtc_state(state, crtc);
+	if (IS_ERR(crtc_state))
+		return PTR_ERR(crtc_state);
+
+	primary_state = drm_atomic_get_plane_state(state, crtc->primary);
+	if (IS_ERR(primary_state))
+		return PTR_ERR(primary_state);
+
+	if (!set->mode) {
+		WARN_ON(set->fb);
+		WARN_ON(set->num_connectors);
+
+		ret = drm_atomic_set_mode_for_crtc(crtc_state, NULL);
+		if (ret != 0)
+			return ret;
+
+		crtc_state->active = false;
+
+		ret = drm_atomic_set_crtc_for_plane(primary_state, NULL);
+		if (ret != 0)
+			return ret;
+
+		drm_atomic_set_fb_for_plane(primary_state, NULL);
+
+		goto commit;
+	}
+
+	WARN_ON(!set->fb);
+	WARN_ON(!set->num_connectors);
+
+	ret = drm_atomic_set_mode_for_crtc(crtc_state, set->mode);
+	if (ret != 0)
+		return ret;
+
+	crtc_state->active = true;
+
+	ret = drm_atomic_set_crtc_for_plane(primary_state, crtc);
+	if (ret != 0)
+		return ret;
+
+	drm_mode_get_hv_timing(set->mode, &hdisplay, &vdisplay);
+
+	drm_atomic_set_fb_for_plane(primary_state, set->fb);
+	primary_state->crtc_x = 0;
+	primary_state->crtc_y = 0;
+	primary_state->crtc_w = hdisplay;
+	primary_state->crtc_h = vdisplay;
+	primary_state->src_x = set->x << 16;
+	primary_state->src_y = set->y << 16;
+	if (drm_rotation_90_or_270(primary_state->rotation)) {
+		primary_state->src_w = vdisplay << 16;
+		primary_state->src_h = hdisplay << 16;
+	} else {
+		primary_state->src_w = hdisplay << 16;
+		primary_state->src_h = vdisplay << 16;
+	}
+
+commit:
+	ret = update_output_state(state, set);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+EXPORT_SYMBOL(__drm_atomic_helper_set_config);
+
 void drm_atomic_print_state(const struct drm_atomic_state *state)
 {
 	struct drm_printer p = drm_info_printer(state->dev->dev);
diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c
index 22a5c617f670..aa16ea17ff9b 100644
--- a/drivers/gpu/drm/drm_atomic_helper.c
+++ b/drivers/gpu/drm/drm_atomic_helper.c
@@ -25,14 +25,18 @@
  * Daniel Vetter <daniel.vetter@ffwll.ch>
  */
 
-#include <drm/drmP.h>
+#include <linux/dma-fence.h>
+
 #include <drm/drm_atomic.h>
+#include <drm/drm_atomic_helper.h>
 #include <drm/drm_atomic_uapi.h>
+#include <drm/drm_damage_helper.h>
+#include <drm/drm_device.h>
 #include <drm/drm_plane_helper.h>
-#include <drm/drm_atomic_helper.h>
+#include <drm/drm_print.h>
+#include <drm/drm_self_refresh_helper.h>
+#include <drm/drm_vblank.h>
 #include <drm/drm_writeback.h>
-#include <drm/drm_damage_helper.h>
-#include <linux/dma-fence.h>
 
 #include "drm_crtc_helper_internal.h"
 #include "drm_crtc_internal.h"
@@ -683,7 +687,7 @@ drm_atomic_helper_check_modeset(struct drm_device *dev,
 		}
 
 		if (funcs->atomic_check)
-			ret = funcs->atomic_check(connector, new_connector_state);
+			ret = funcs->atomic_check(connector, state);
 		if (ret)
 			return ret;
 
@@ -725,7 +729,7 @@ drm_atomic_helper_check_modeset(struct drm_device *dev,
 			continue;
 
 		if (funcs->atomic_check)
-			ret = funcs->atomic_check(connector, new_connector_state);
+			ret = funcs->atomic_check(connector, state);
 		if (ret)
 			return ret;
 	}
@@ -950,10 +954,33 @@ int drm_atomic_helper_check(struct drm_device *dev,
 	if (state->legacy_cursor_update)
 		state->async_update = !drm_atomic_helper_async_check(dev, state);
 
+	drm_self_refresh_helper_alter_state(state);
+
 	return ret;
 }
 EXPORT_SYMBOL(drm_atomic_helper_check);
 
+static bool
+crtc_needs_disable(struct drm_crtc_state *old_state,
+		   struct drm_crtc_state *new_state)
+{
+	/*
+	 * No new_state means the crtc is off, so the only criteria is whether
+	 * it's currently active or in self refresh mode.
+	 */
+	if (!new_state)
+		return drm_atomic_crtc_effectively_active(old_state);
+
+	/*
+	 * We need to run through the crtc_funcs->disable() function if the crtc
+	 * is currently on, if it's transitioning to self refresh mode, or if
+	 * it's in self refresh mode and needs to be fully disabled.
+	 */
+	return old_state->active ||
+	       (old_state->self_refresh_active && !new_state->enable) ||
+	       new_state->self_refresh_active;
+}
+
 static void
 disable_outputs(struct drm_device *dev, struct drm_atomic_state *old_state)
 {
@@ -974,7 +1001,14 @@ disable_outputs(struct drm_device *dev, struct drm_atomic_state *old_state)
 
 		old_crtc_state = drm_atomic_get_old_crtc_state(old_state, old_conn_state->crtc);
 
-		if (!old_crtc_state->active ||
+		if (new_conn_state->crtc)
+			new_crtc_state = drm_atomic_get_new_crtc_state(
+						old_state,
+						new_conn_state->crtc);
+		else
+			new_crtc_state = NULL;
+
+		if (!crtc_needs_disable(old_crtc_state, new_crtc_state) ||
 		    !drm_atomic_crtc_needs_modeset(old_conn_state->crtc->state))
 			continue;
 
@@ -995,11 +1029,13 @@ disable_outputs(struct drm_device *dev, struct drm_atomic_state *old_state)
 		 * Each encoder has at most one connector (since we always steal
 		 * it away), so we won't call disable hooks twice.
 		 */
-		drm_bridge_disable(encoder->bridge);
+		drm_atomic_bridge_disable(encoder->bridge, old_state);
 
 		/* Right function depends upon target state. */
 		if (funcs) {
-			if (new_conn_state->crtc && funcs->prepare)
+			if (funcs->atomic_disable)
+				funcs->atomic_disable(encoder, old_state);
+			else if (new_conn_state->crtc && funcs->prepare)
 				funcs->prepare(encoder);
 			else if (funcs->disable)
 				funcs->disable(encoder);
@@ -1007,7 +1043,7 @@ disable_outputs(struct drm_device *dev, struct drm_atomic_state *old_state)
 				funcs->dpms(encoder, DRM_MODE_DPMS_OFF);
 		}
 
-		drm_bridge_post_disable(encoder->bridge);
+		drm_atomic_bridge_post_disable(encoder->bridge, old_state);
 	}
 
 	for_each_oldnew_crtc_in_state(old_state, crtc, old_crtc_state, new_crtc_state, i) {
@@ -1018,7 +1054,7 @@ disable_outputs(struct drm_device *dev, struct drm_atomic_state *old_state)
 		if (!drm_atomic_crtc_needs_modeset(new_crtc_state))
 			continue;
 
-		if (!old_crtc_state->active)
+		if (!crtc_needs_disable(old_crtc_state, new_crtc_state))
 			continue;
 
 		funcs = crtc->helper_private;
@@ -1305,16 +1341,18 @@ void drm_atomic_helper_commit_modeset_enables(struct drm_device *dev,
 		 * Each encoder has at most one connector (since we always steal
 		 * it away), so we won't call enable hooks twice.
 		 */
-		drm_bridge_pre_enable(encoder->bridge);
+		drm_atomic_bridge_pre_enable(encoder->bridge, old_state);
 
 		if (funcs) {
-			if (funcs->enable)
+			if (funcs->atomic_enable)
+				funcs->atomic_enable(encoder, old_state);
+			else if (funcs->enable)
 				funcs->enable(encoder);
 			else if (funcs->commit)
 				funcs->commit(encoder);
 		}
 
-		drm_bridge_enable(encoder->bridge);
+		drm_atomic_bridge_enable(encoder->bridge, old_state);
 	}
 
 	drm_atomic_helper_commit_writebacks(dev, old_state);
@@ -1423,7 +1461,7 @@ drm_atomic_helper_wait_for_vblanks(struct drm_device *dev,
 		ret = wait_event_timeout(dev->vblank[i].queue,
 				old_state->crtcs[i].last_vblank_count !=
 					drm_crtc_vblank_count(crtc),
-				msecs_to_jiffies(50));
+				msecs_to_jiffies(100));
 
 		WARN(!ret, "[CRTC:%d:%s] vblank wait timed out\n",
 		     crtc->base.id, crtc->name);
@@ -2843,95 +2881,6 @@ fail:
 }
 EXPORT_SYMBOL(drm_atomic_helper_disable_plane);
 
-/* just used from fb-helper and atomic-helper: */
-int __drm_atomic_helper_disable_plane(struct drm_plane *plane,
-		struct drm_plane_state *plane_state)
-{
-	int ret;
-
-	ret = drm_atomic_set_crtc_for_plane(plane_state, NULL);
-	if (ret != 0)
-		return ret;
-
-	drm_atomic_set_fb_for_plane(plane_state, NULL);
-	plane_state->crtc_x = 0;
-	plane_state->crtc_y = 0;
-	plane_state->crtc_w = 0;
-	plane_state->crtc_h = 0;
-	plane_state->src_x = 0;
-	plane_state->src_y = 0;
-	plane_state->src_w = 0;
-	plane_state->src_h = 0;
-
-	return 0;
-}
-
-static int update_output_state(struct drm_atomic_state *state,
-			       struct drm_mode_set *set)
-{
-	struct drm_device *dev = set->crtc->dev;
-	struct drm_crtc *crtc;
-	struct drm_crtc_state *new_crtc_state;
-	struct drm_connector *connector;
-	struct drm_connector_state *new_conn_state;
-	int ret, i;
-
-	ret = drm_modeset_lock(&dev->mode_config.connection_mutex,
-			       state->acquire_ctx);
-	if (ret)
-		return ret;
-
-	/* First disable all connectors on the target crtc. */
-	ret = drm_atomic_add_affected_connectors(state, set->crtc);
-	if (ret)
-		return ret;
-
-	for_each_new_connector_in_state(state, connector, new_conn_state, i) {
-		if (new_conn_state->crtc == set->crtc) {
-			ret = drm_atomic_set_crtc_for_connector(new_conn_state,
-								NULL);
-			if (ret)
-				return ret;
-
-			/* Make sure legacy setCrtc always re-trains */
-			new_conn_state->link_status = DRM_LINK_STATUS_GOOD;
-		}
-	}
-
-	/* Then set all connectors from set->connectors on the target crtc */
-	for (i = 0; i < set->num_connectors; i++) {
-		new_conn_state = drm_atomic_get_connector_state(state,
-							    set->connectors[i]);
-		if (IS_ERR(new_conn_state))
-			return PTR_ERR(new_conn_state);
-
-		ret = drm_atomic_set_crtc_for_connector(new_conn_state,
-							set->crtc);
-		if (ret)
-			return ret;
-	}
-
-	for_each_new_crtc_in_state(state, crtc, new_crtc_state, i) {
-		/* Don't update ->enable for the CRTC in the set_config request,
-		 * since a mismatch would indicate a bug in the upper layers.
-		 * The actual modeset code later on will catch any
-		 * inconsistencies here. */
-		if (crtc == set->crtc)
-			continue;
-
-		if (!new_crtc_state->connector_mask) {
-			ret = drm_atomic_set_mode_prop_for_crtc(new_crtc_state,
-								NULL);
-			if (ret < 0)
-				return ret;
-
-			new_crtc_state->active = false;
-		}
-	}
-
-	return 0;
-}
-
 /**
  * drm_atomic_helper_set_config - set a new config from userspace
  * @set: mode set configuration
@@ -2976,81 +2925,6 @@ fail:
 }
 EXPORT_SYMBOL(drm_atomic_helper_set_config);
 
-/* just used from fb-helper and atomic-helper: */
-int __drm_atomic_helper_set_config(struct drm_mode_set *set,
-		struct drm_atomic_state *state)
-{
-	struct drm_crtc_state *crtc_state;
-	struct drm_plane_state *primary_state;
-	struct drm_crtc *crtc = set->crtc;
-	int hdisplay, vdisplay;
-	int ret;
-
-	crtc_state = drm_atomic_get_crtc_state(state, crtc);
-	if (IS_ERR(crtc_state))
-		return PTR_ERR(crtc_state);
-
-	primary_state = drm_atomic_get_plane_state(state, crtc->primary);
-	if (IS_ERR(primary_state))
-		return PTR_ERR(primary_state);
-
-	if (!set->mode) {
-		WARN_ON(set->fb);
-		WARN_ON(set->num_connectors);
-
-		ret = drm_atomic_set_mode_for_crtc(crtc_state, NULL);
-		if (ret != 0)
-			return ret;
-
-		crtc_state->active = false;
-
-		ret = drm_atomic_set_crtc_for_plane(primary_state, NULL);
-		if (ret != 0)
-			return ret;
-
-		drm_atomic_set_fb_for_plane(primary_state, NULL);
-
-		goto commit;
-	}
-
-	WARN_ON(!set->fb);
-	WARN_ON(!set->num_connectors);
-
-	ret = drm_atomic_set_mode_for_crtc(crtc_state, set->mode);
-	if (ret != 0)
-		return ret;
-
-	crtc_state->active = true;
-
-	ret = drm_atomic_set_crtc_for_plane(primary_state, crtc);
-	if (ret != 0)
-		return ret;
-
-	drm_mode_get_hv_timing(set->mode, &hdisplay, &vdisplay);
-
-	drm_atomic_set_fb_for_plane(primary_state, set->fb);
-	primary_state->crtc_x = 0;
-	primary_state->crtc_y = 0;
-	primary_state->crtc_w = hdisplay;
-	primary_state->crtc_h = vdisplay;
-	primary_state->src_x = set->x << 16;
-	primary_state->src_y = set->y << 16;
-	if (drm_rotation_90_or_270(primary_state->rotation)) {
-		primary_state->src_w = vdisplay << 16;
-		primary_state->src_h = hdisplay << 16;
-	} else {
-		primary_state->src_w = hdisplay << 16;
-		primary_state->src_h = vdisplay << 16;
-	}
-
-commit:
-	ret = update_output_state(state, set);
-	if (ret)
-		return ret;
-
-	return 0;
-}
-
 /**
  * drm_atomic_helper_disable_all - disable all currently active outputs
  * @dev: DRM device
diff --git a/drivers/gpu/drm/drm_atomic_state_helper.c b/drivers/gpu/drm/drm_atomic_state_helper.c
index 59ffb6b9c745..46dc264a248b 100644
--- a/drivers/gpu/drm/drm_atomic_state_helper.c
+++ b/drivers/gpu/drm/drm_atomic_state_helper.c
@@ -24,12 +24,13 @@
  * Daniel Vetter <daniel.vetter@ffwll.ch>
  */
 
+#include <drm/drm_atomic.h>
 #include <drm/drm_atomic_state_helper.h>
-#include <drm/drm_crtc.h>
-#include <drm/drm_plane.h>
 #include <drm/drm_connector.h>
-#include <drm/drm_atomic.h>
+#include <drm/drm_crtc.h>
 #include <drm/drm_device.h>
+#include <drm/drm_plane.h>
+#include <drm/drm_print.h>
 #include <drm/drm_writeback.h>
 
 #include <linux/slab.h>
@@ -57,6 +58,29 @@
  */
 
 /**
+ * __drm_atomic_helper_crtc_reset - reset state on CRTC
+ * @crtc: drm CRTC
+ * @crtc_state: CRTC state to assign
+ *
+ * Initializes the newly allocated @crtc_state and assigns it to
+ * the &drm_crtc->state pointer of @crtc, usually required when
+ * initializing the drivers or when called from the &drm_crtc_funcs.reset
+ * hook.
+ *
+ * This is useful for drivers that subclass the CRTC state.
+ */
+void
+__drm_atomic_helper_crtc_reset(struct drm_crtc *crtc,
+			       struct drm_crtc_state *crtc_state)
+{
+	if (crtc_state)
+		crtc_state->crtc = crtc;
+
+	crtc->state = crtc_state;
+}
+EXPORT_SYMBOL(__drm_atomic_helper_crtc_reset);
+
+/**
  * drm_atomic_helper_crtc_reset - default &drm_crtc_funcs.reset hook for CRTCs
  * @crtc: drm CRTC
  *
@@ -65,14 +89,13 @@
  */
 void drm_atomic_helper_crtc_reset(struct drm_crtc *crtc)
 {
-	if (crtc->state)
-		__drm_atomic_helper_crtc_destroy_state(crtc->state);
-
-	kfree(crtc->state);
-	crtc->state = kzalloc(sizeof(*crtc->state), GFP_KERNEL);
+	struct drm_crtc_state *crtc_state =
+		kzalloc(sizeof(*crtc->state), GFP_KERNEL);
 
 	if (crtc->state)
-		crtc->state->crtc = crtc;
+		crtc->funcs->atomic_destroy_state(crtc, crtc->state);
+
+	__drm_atomic_helper_crtc_reset(crtc, crtc_state);
 }
 EXPORT_SYMBOL(drm_atomic_helper_crtc_reset);
 
@@ -106,6 +129,10 @@ void __drm_atomic_helper_crtc_duplicate_state(struct drm_crtc *crtc,
 	state->commit = NULL;
 	state->event = NULL;
 	state->pageflip_flags = 0;
+
+	/* Self refresh should be canceled when a new update is available */
+	state->active = drm_atomic_crtc_effectively_active(state);
+	state->self_refresh_active = false;
 }
 EXPORT_SYMBOL(__drm_atomic_helper_crtc_duplicate_state);
 
@@ -314,7 +341,7 @@ EXPORT_SYMBOL(drm_atomic_helper_plane_destroy_state);
  * @conn_state: connector state to assign
  *
  * Initializes the newly allocated @conn_state and assigns it to
- * the &drm_conector->state pointer of @connector, usually required when
+ * the &drm_connector->state pointer of @connector, usually required when
  * initializing the drivers or when called from the &drm_connector_funcs.reset
  * hook.
  *
@@ -353,6 +380,24 @@ void drm_atomic_helper_connector_reset(struct drm_connector *connector)
 EXPORT_SYMBOL(drm_atomic_helper_connector_reset);
 
 /**
+ * drm_atomic_helper_connector_tv_reset - Resets TV connector properties
+ * @connector: DRM connector
+ *
+ * Resets the TV-related properties attached to a connector.
+ */
+void drm_atomic_helper_connector_tv_reset(struct drm_connector *connector)
+{
+	struct drm_cmdline_mode *cmdline = &connector->cmdline_mode;
+	struct drm_connector_state *state = connector->state;
+
+	state->tv.margins.left = cmdline->tv_margins.left;
+	state->tv.margins.right = cmdline->tv_margins.right;
+	state->tv.margins.top = cmdline->tv_margins.top;
+	state->tv.margins.bottom = cmdline->tv_margins.bottom;
+}
+EXPORT_SYMBOL(drm_atomic_helper_connector_tv_reset);
+
+/**
  * __drm_atomic_helper_connector_duplicate_state - copy atomic connector state
  * @connector: connector object
  * @state: atomic connector state
@@ -369,6 +414,9 @@ __drm_atomic_helper_connector_duplicate_state(struct drm_connector *connector,
 		drm_connector_get(connector);
 	state->commit = NULL;
 
+	if (state->hdr_output_metadata)
+		drm_property_blob_get(state->hdr_output_metadata);
+
 	/* Don't copy over a writeback job, they are used only once */
 	state->writeback_job = NULL;
 }
@@ -416,6 +464,8 @@ __drm_atomic_helper_connector_destroy_state(struct drm_connector_state *state)
 
 	if (state->writeback_job)
 		drm_writeback_cleanup_job(state->writeback_job);
+
+	drm_property_blob_put(state->hdr_output_metadata);
 }
 EXPORT_SYMBOL(__drm_atomic_helper_connector_destroy_state);
 
diff --git a/drivers/gpu/drm/drm_atomic_uapi.c b/drivers/gpu/drm/drm_atomic_uapi.c
index 428d82662dc4..abe38bdf85ae 100644
--- a/drivers/gpu/drm/drm_atomic_uapi.c
+++ b/drivers/gpu/drm/drm_atomic_uapi.c
@@ -490,7 +490,7 @@ drm_atomic_crtc_get_property(struct drm_crtc *crtc,
 	struct drm_mode_config *config = &dev->mode_config;
 
 	if (property == config->prop_active)
-		*val = state->active;
+		*val = drm_atomic_crtc_effectively_active(state);
 	else if (property == config->prop_mode_id)
 		*val = (state->mode_blob) ? state->mode_blob->base.id : 0;
 	else if (property == config->prop_vrr_enabled)
@@ -676,6 +676,8 @@ static int drm_atomic_connector_set_property(struct drm_connector *connector,
 {
 	struct drm_device *dev = connector->dev;
 	struct drm_mode_config *config = &dev->mode_config;
+	bool replaced = false;
+	int ret;
 
 	if (property == config->prop_crtc_id) {
 		struct drm_crtc *crtc = drm_crtc_find(dev, file_priv, val);
@@ -726,13 +728,20 @@ static int drm_atomic_connector_set_property(struct drm_connector *connector,
 		 */
 		if (state->link_status != DRM_LINK_STATUS_GOOD)
 			state->link_status = val;
+	} else if (property == config->hdr_output_metadata_property) {
+		ret = drm_atomic_replace_property_blob_from_id(dev,
+				&state->hdr_output_metadata,
+				val,
+				sizeof(struct hdr_output_metadata), -1,
+				&replaced);
+		return ret;
 	} else if (property == config->aspect_ratio_property) {
 		state->picture_aspect_ratio = val;
 	} else if (property == config->content_type_property) {
 		state->content_type = val;
 	} else if (property == connector->scaling_mode_property) {
 		state->scaling_mode = val;
-	} else if (property == connector->content_protection_property) {
+	} else if (property == config->content_protection_property) {
 		if (val == DRM_MODE_CONTENT_PROTECTION_ENABLED) {
 			DRM_DEBUG_KMS("only drivers can set CP Enabled\n");
 			return -EINVAL;
@@ -779,7 +788,10 @@ drm_atomic_connector_get_property(struct drm_connector *connector,
 	if (property == config->prop_crtc_id) {
 		*val = (state->crtc) ? state->crtc->base.id : 0;
 	} else if (property == config->dpms_property) {
-		*val = connector->dpms;
+		if (state->crtc && state->crtc->state->self_refresh_active)
+			*val = DRM_MODE_DPMS_ON;
+		else
+			*val = connector->dpms;
 	} else if (property == config->tv_select_subconnector_property) {
 		*val = state->tv.subconnector;
 	} else if (property == config->tv_left_margin_property) {
@@ -814,7 +826,10 @@ drm_atomic_connector_get_property(struct drm_connector *connector,
 		*val = state->colorspace;
 	} else if (property == connector->scaling_mode_property) {
 		*val = state->scaling_mode;
-	} else if (property == connector->content_protection_property) {
+	} else if (property == config->hdr_output_metadata_property) {
+		*val = state->hdr_output_metadata ?
+			state->hdr_output_metadata->base.id : 0;
+	} else if (property == config->content_protection_property) {
 		*val = state->content_protection;
 	} else if (property == config->writeback_fb_id_property) {
 		/* Writeback framebuffer is one-shot, write and forget */
diff --git a/drivers/gpu/drm/drm_auth.c b/drivers/gpu/drm/drm_auth.c
index 22c7a104b802..cc9acd986c68 100644
--- a/drivers/gpu/drm/drm_auth.c
+++ b/drivers/gpu/drm/drm_auth.c
@@ -28,10 +28,16 @@
  * OTHER DEALINGS IN THE SOFTWARE.
  */
 
-#include <drm/drmP.h>
+#include <linux/slab.h>
+
+#include <drm/drm_auth.h>
+#include <drm/drm_drv.h>
+#include <drm/drm_file.h>
+#include <drm/drm_lease.h>
+#include <drm/drm_print.h>
+
 #include "drm_internal.h"
 #include "drm_legacy.h"
-#include <drm/drm_lease.h>
 
 /**
  * DOC: master and authentication
@@ -351,3 +357,23 @@ void drm_master_put(struct drm_master **master)
 	*master = NULL;
 }
 EXPORT_SYMBOL(drm_master_put);
+
+/* Used by drm_client and drm_fb_helper */
+bool drm_master_internal_acquire(struct drm_device *dev)
+{
+	mutex_lock(&dev->master_mutex);
+	if (dev->master) {
+		mutex_unlock(&dev->master_mutex);
+		return false;
+	}
+
+	return true;
+}
+EXPORT_SYMBOL(drm_master_internal_acquire);
+
+/* Used by drm_client and drm_fb_helper */
+void drm_master_internal_release(struct drm_device *dev)
+{
+	mutex_unlock(&dev->master_mutex);
+}
+EXPORT_SYMBOL(drm_master_internal_release);
diff --git a/drivers/gpu/drm/drm_blend.c b/drivers/gpu/drm/drm_blend.c
index 0c78ca386cbe..37ac168fcb60 100644
--- a/drivers/gpu/drm/drm_blend.c
+++ b/drivers/gpu/drm/drm_blend.c
@@ -23,13 +23,16 @@
  * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
  * OF THIS SOFTWARE.
  */
-#include <drm/drmP.h>
-#include <drm/drm_atomic.h>
-#include <drm/drm_blend.h>
+
 #include <linux/export.h>
 #include <linux/slab.h>
 #include <linux/sort.h>
 
+#include <drm/drm_atomic.h>
+#include <drm/drm_blend.h>
+#include <drm/drm_device.h>
+#include <drm/drm_print.h>
+
 #include "drm_crtc_internal.h"
 
 /**
diff --git a/drivers/gpu/drm/drm_bridge.c b/drivers/gpu/drm/drm_bridge.c
index 138b2711d389..cba537c99e43 100644
--- a/drivers/gpu/drm/drm_bridge.c
+++ b/drivers/gpu/drm/drm_bridge.c
@@ -352,6 +352,116 @@ void drm_bridge_enable(struct drm_bridge *bridge)
 }
 EXPORT_SYMBOL(drm_bridge_enable);
 
+/**
+ * drm_atomic_bridge_disable - disables all bridges in the encoder chain
+ * @bridge: bridge control structure
+ * @state: atomic state being committed
+ *
+ * Calls &drm_bridge_funcs.atomic_disable (falls back on
+ * &drm_bridge_funcs.disable) op for all the bridges in the encoder chain,
+ * starting from the last bridge to the first. These are called before calling
+ * &drm_encoder_helper_funcs.atomic_disable
+ *
+ * Note: the bridge passed should be the one closest to the encoder
+ */
+void drm_atomic_bridge_disable(struct drm_bridge *bridge,
+			       struct drm_atomic_state *state)
+{
+	if (!bridge)
+		return;
+
+	drm_atomic_bridge_disable(bridge->next, state);
+
+	if (bridge->funcs->atomic_disable)
+		bridge->funcs->atomic_disable(bridge, state);
+	else if (bridge->funcs->disable)
+		bridge->funcs->disable(bridge);
+}
+EXPORT_SYMBOL(drm_atomic_bridge_disable);
+
+/**
+ * drm_atomic_bridge_post_disable - cleans up after disabling all bridges in the
+ *				    encoder chain
+ * @bridge: bridge control structure
+ * @state: atomic state being committed
+ *
+ * Calls &drm_bridge_funcs.atomic_post_disable (falls back on
+ * &drm_bridge_funcs.post_disable) op for all the bridges in the encoder chain,
+ * starting from the first bridge to the last. These are called after completing
+ * &drm_encoder_helper_funcs.atomic_disable
+ *
+ * Note: the bridge passed should be the one closest to the encoder
+ */
+void drm_atomic_bridge_post_disable(struct drm_bridge *bridge,
+				    struct drm_atomic_state *state)
+{
+	if (!bridge)
+		return;
+
+	if (bridge->funcs->atomic_post_disable)
+		bridge->funcs->atomic_post_disable(bridge, state);
+	else if (bridge->funcs->post_disable)
+		bridge->funcs->post_disable(bridge);
+
+	drm_atomic_bridge_post_disable(bridge->next, state);
+}
+EXPORT_SYMBOL(drm_atomic_bridge_post_disable);
+
+/**
+ * drm_atomic_bridge_pre_enable - prepares for enabling all bridges in the
+ *				  encoder chain
+ * @bridge: bridge control structure
+ * @state: atomic state being committed
+ *
+ * Calls &drm_bridge_funcs.atomic_pre_enable (falls back on
+ * &drm_bridge_funcs.pre_enable) op for all the bridges in the encoder chain,
+ * starting from the last bridge to the first. These are called before calling
+ * &drm_encoder_helper_funcs.atomic_enable
+ *
+ * Note: the bridge passed should be the one closest to the encoder
+ */
+void drm_atomic_bridge_pre_enable(struct drm_bridge *bridge,
+				  struct drm_atomic_state *state)
+{
+	if (!bridge)
+		return;
+
+	drm_atomic_bridge_pre_enable(bridge->next, state);
+
+	if (bridge->funcs->atomic_pre_enable)
+		bridge->funcs->atomic_pre_enable(bridge, state);
+	else if (bridge->funcs->pre_enable)
+		bridge->funcs->pre_enable(bridge);
+}
+EXPORT_SYMBOL(drm_atomic_bridge_pre_enable);
+
+/**
+ * drm_atomic_bridge_enable - enables all bridges in the encoder chain
+ * @bridge: bridge control structure
+ * @state: atomic state being committed
+ *
+ * Calls &drm_bridge_funcs.atomic_enable (falls back on
+ * &drm_bridge_funcs.enable) op for all the bridges in the encoder chain,
+ * starting from the first bridge to the last. These are called after completing
+ * &drm_encoder_helper_funcs.atomic_enable
+ *
+ * Note: the bridge passed should be the one closest to the encoder
+ */
+void drm_atomic_bridge_enable(struct drm_bridge *bridge,
+			      struct drm_atomic_state *state)
+{
+	if (!bridge)
+		return;
+
+	if (bridge->funcs->atomic_enable)
+		bridge->funcs->atomic_enable(bridge, state);
+	else if (bridge->funcs->enable)
+		bridge->funcs->enable(bridge);
+
+	drm_atomic_bridge_enable(bridge->next, state);
+}
+EXPORT_SYMBOL(drm_atomic_bridge_enable);
+
 #ifdef CONFIG_OF
 /**
  * of_drm_find_bridge - find the bridge corresponding to the device node in
diff --git a/drivers/gpu/drm/drm_bufs.c b/drivers/gpu/drm/drm_bufs.c
index bfc419ed9d6c..68dacf8422c6 100644
--- a/drivers/gpu/drm/drm_bufs.c
+++ b/drivers/gpu/drm/drm_bufs.c
@@ -28,15 +28,26 @@
  * OTHER DEALINGS IN THE SOFTWARE.
  */
 
-#include <linux/vmalloc.h>
-#include <linux/slab.h>
-#include <linux/log2.h>
 #include <linux/export.h>
+#include <linux/log2.h>
+#include <linux/mm.h>
+#include <linux/mman.h>
+#include <linux/nospec.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/vmalloc.h>
+
 #include <asm/shmparam.h>
-#include <drm/drmP.h>
+
+#include <drm/drm_agpsupport.h>
+#include <drm/drm_device.h>
+#include <drm/drm_drv.h>
+#include <drm/drm_file.h>
+#include <drm/drm_pci.h>
+#include <drm/drm_print.h>
+
 #include "drm_legacy.h"
 
-#include <linux/nospec.h>
 
 static struct drm_map_list *drm_find_matching_map(struct drm_device *dev,
 						  struct drm_local_map *map)
diff --git a/drivers/gpu/drm/drm_client.c b/drivers/gpu/drm/drm_client.c
index f20d1dda3961..410572f14257 100644
--- a/drivers/gpu/drm/drm_client.c
+++ b/drivers/gpu/drm/drm_client.c
@@ -15,10 +15,10 @@
 #include <drm/drm_drv.h>
 #include <drm/drm_file.h>
 #include <drm/drm_fourcc.h>
+#include <drm/drm_framebuffer.h>
 #include <drm/drm_gem.h>
 #include <drm/drm_mode.h>
 #include <drm/drm_print.h>
-#include <drm/drmP.h>
 
 #include "drm_crtc_internal.h"
 #include "drm_internal.h"
@@ -27,7 +27,6 @@
  * DOC: overview
  *
  * This library provides support for clients running in the kernel like fbdev and bootsplash.
- * Currently it's only partially implemented, just enough to support fbdev.
  *
  * GEM drivers which provide a GEM based dumb buffer with a virtual address are supported.
  */
@@ -92,14 +91,20 @@ int drm_client_init(struct drm_device *dev, struct drm_client_dev *client,
 	client->name = name;
 	client->funcs = funcs;
 
-	ret = drm_client_open(client);
+	ret = drm_client_modeset_create(client);
 	if (ret)
 		goto err_put_module;
 
+	ret = drm_client_open(client);
+	if (ret)
+		goto err_free;
+
 	drm_dev_get(dev);
 
 	return 0;
 
+err_free:
+	drm_client_modeset_free(client);
 err_put_module:
 	if (funcs)
 		module_put(funcs->owner);
@@ -148,6 +153,7 @@ void drm_client_release(struct drm_client_dev *client)
 
 	DRM_DEV_DEBUG_KMS(dev->dev, "%s\n", client->name);
 
+	drm_client_modeset_free(client);
 	drm_client_close(client);
 	drm_dev_put(dev);
 	if (client->funcs)
@@ -243,6 +249,7 @@ static void drm_client_buffer_delete(struct drm_client_buffer *buffer)
 static struct drm_client_buffer *
 drm_client_buffer_create(struct drm_client_dev *client, u32 width, u32 height, u32 format)
 {
+	const struct drm_format_info *info = drm_format_info(format);
 	struct drm_mode_create_dumb dumb_args = { };
 	struct drm_device *dev = client->dev;
 	struct drm_client_buffer *buffer;
@@ -258,7 +265,7 @@ drm_client_buffer_create(struct drm_client_dev *client, u32 width, u32 height, u
 
 	dumb_args.width = width;
 	dumb_args.height = height;
-	dumb_args.bpp = drm_format_plane_cpp(format, 0) * 8;
+	dumb_args.bpp = info->cpp[0] * 8;
 	ret = drm_mode_create_dumb(dev, &dumb_args, client->file);
 	if (ret)
 		goto err_delete;
diff --git a/drivers/gpu/drm/drm_client_modeset.c b/drivers/gpu/drm/drm_client_modeset.c
new file mode 100644
index 000000000000..e95fceac8f8b
--- /dev/null
+++ b/drivers/gpu/drm/drm_client_modeset.c
@@ -0,0 +1,1125 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2018 Noralf Trønnes
+ * Copyright (c) 2006-2009 Red Hat Inc.
+ * Copyright (c) 2006-2008 Intel Corporation
+ *   Jesse Barnes <jesse.barnes@intel.com>
+ * Copyright (c) 2007 Dave Airlie <airlied@linux.ie>
+ */
+
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/slab.h>
+
+#include <drm/drm_atomic.h>
+#include <drm/drm_client.h>
+#include <drm/drm_connector.h>
+#include <drm/drm_crtc.h>
+#include <drm/drm_device.h>
+#include <drm/drm_drv.h>
+#include <drm/drm_encoder.h>
+#include <drm/drm_print.h>
+
+#include "drm_crtc_internal.h"
+#include "drm_internal.h"
+
+#define DRM_CLIENT_MAX_CLONED_CONNECTORS	8
+
+struct drm_client_offset {
+	int x, y;
+};
+
+int drm_client_modeset_create(struct drm_client_dev *client)
+{
+	struct drm_device *dev = client->dev;
+	unsigned int num_crtc = dev->mode_config.num_crtc;
+	unsigned int max_connector_count = 1;
+	struct drm_mode_set *modeset;
+	struct drm_crtc *crtc;
+	unsigned int i = 0;
+
+	/* Add terminating zero entry to enable index less iteration */
+	client->modesets = kcalloc(num_crtc + 1, sizeof(*client->modesets), GFP_KERNEL);
+	if (!client->modesets)
+		return -ENOMEM;
+
+	mutex_init(&client->modeset_mutex);
+
+	drm_for_each_crtc(crtc, dev)
+		client->modesets[i++].crtc = crtc;
+
+	/* Cloning is only supported in the single crtc case. */
+	if (num_crtc == 1)
+		max_connector_count = DRM_CLIENT_MAX_CLONED_CONNECTORS;
+
+	for (modeset = client->modesets; modeset->crtc; modeset++) {
+		modeset->connectors = kcalloc(max_connector_count,
+					      sizeof(*modeset->connectors), GFP_KERNEL);
+		if (!modeset->connectors)
+			goto err_free;
+	}
+
+	return 0;
+
+err_free:
+	drm_client_modeset_free(client);
+
+	return -ENOMEM;
+}
+
+static void drm_client_modeset_release(struct drm_client_dev *client)
+{
+	struct drm_mode_set *modeset;
+	unsigned int i;
+
+	drm_client_for_each_modeset(modeset, client) {
+		drm_mode_destroy(client->dev, modeset->mode);
+		modeset->mode = NULL;
+		modeset->fb = NULL;
+
+		for (i = 0; i < modeset->num_connectors; i++) {
+			drm_connector_put(modeset->connectors[i]);
+			modeset->connectors[i] = NULL;
+		}
+		modeset->num_connectors = 0;
+	}
+}
+
+void drm_client_modeset_free(struct drm_client_dev *client)
+{
+	struct drm_mode_set *modeset;
+
+	mutex_lock(&client->modeset_mutex);
+
+	drm_client_modeset_release(client);
+
+	drm_client_for_each_modeset(modeset, client)
+		kfree(modeset->connectors);
+
+	mutex_unlock(&client->modeset_mutex);
+
+	mutex_destroy(&client->modeset_mutex);
+	kfree(client->modesets);
+}
+
+static struct drm_mode_set *
+drm_client_find_modeset(struct drm_client_dev *client, struct drm_crtc *crtc)
+{
+	struct drm_mode_set *modeset;
+
+	drm_client_for_each_modeset(modeset, client)
+		if (modeset->crtc == crtc)
+			return modeset;
+
+	return NULL;
+}
+
+static struct drm_display_mode *
+drm_connector_has_preferred_mode(struct drm_connector *connector, int width, int height)
+{
+	struct drm_display_mode *mode;
+
+	list_for_each_entry(mode, &connector->modes, head) {
+		if (mode->hdisplay > width ||
+		    mode->vdisplay > height)
+			continue;
+		if (mode->type & DRM_MODE_TYPE_PREFERRED)
+			return mode;
+	}
+	return NULL;
+}
+
+static struct drm_display_mode *
+drm_connector_pick_cmdline_mode(struct drm_connector *connector)
+{
+	struct drm_cmdline_mode *cmdline_mode;
+	struct drm_display_mode *mode;
+	bool prefer_non_interlace;
+
+	cmdline_mode = &connector->cmdline_mode;
+	if (cmdline_mode->specified == false)
+		return NULL;
+
+	/* attempt to find a matching mode in the list of modes
+	 *  we have gotten so far, if not add a CVT mode that conforms
+	 */
+	if (cmdline_mode->rb || cmdline_mode->margins)
+		goto create_mode;
+
+	prefer_non_interlace = !cmdline_mode->interlace;
+again:
+	list_for_each_entry(mode, &connector->modes, head) {
+		/* Check (optional) mode name first */
+		if (!strcmp(mode->name, cmdline_mode->name))
+			return mode;
+
+		/* check width/height */
+		if (mode->hdisplay != cmdline_mode->xres ||
+		    mode->vdisplay != cmdline_mode->yres)
+			continue;
+
+		if (cmdline_mode->refresh_specified) {
+			if (mode->vrefresh != cmdline_mode->refresh)
+				continue;
+		}
+
+		if (cmdline_mode->interlace) {
+			if (!(mode->flags & DRM_MODE_FLAG_INTERLACE))
+				continue;
+		} else if (prefer_non_interlace) {
+			if (mode->flags & DRM_MODE_FLAG_INTERLACE)
+				continue;
+		}
+		return mode;
+	}
+
+	if (prefer_non_interlace) {
+		prefer_non_interlace = false;
+		goto again;
+	}
+
+create_mode:
+	mode = drm_mode_create_from_cmdline_mode(connector->dev, cmdline_mode);
+	list_add(&mode->head, &connector->modes);
+
+	return mode;
+}
+
+static bool drm_connector_enabled(struct drm_connector *connector, bool strict)
+{
+	bool enable;
+
+	if (connector->display_info.non_desktop)
+		return false;
+
+	if (strict)
+		enable = connector->status == connector_status_connected;
+	else
+		enable = connector->status != connector_status_disconnected;
+
+	return enable;
+}
+
+static void drm_client_connectors_enabled(struct drm_connector **connectors,
+					  unsigned int connector_count,
+					  bool *enabled)
+{
+	bool any_enabled = false;
+	struct drm_connector *connector;
+	int i = 0;
+
+	for (i = 0; i < connector_count; i++) {
+		connector = connectors[i];
+		enabled[i] = drm_connector_enabled(connector, true);
+		DRM_DEBUG_KMS("connector %d enabled? %s\n", connector->base.id,
+			      connector->display_info.non_desktop ? "non desktop" : enabled[i] ? "yes" : "no");
+
+		any_enabled |= enabled[i];
+	}
+
+	if (any_enabled)
+		return;
+
+	for (i = 0; i < connector_count; i++)
+		enabled[i] = drm_connector_enabled(connectors[i], false);
+}
+
+static bool drm_client_target_cloned(struct drm_device *dev,
+				     struct drm_connector **connectors,
+				     unsigned int connector_count,
+				     struct drm_display_mode **modes,
+				     struct drm_client_offset *offsets,
+				     bool *enabled, int width, int height)
+{
+	int count, i, j;
+	bool can_clone = false;
+	struct drm_display_mode *dmt_mode, *mode;
+
+	/* only contemplate cloning in the single crtc case */
+	if (dev->mode_config.num_crtc > 1)
+		return false;
+
+	count = 0;
+	for (i = 0; i < connector_count; i++) {
+		if (enabled[i])
+			count++;
+	}
+
+	/* only contemplate cloning if more than one connector is enabled */
+	if (count <= 1)
+		return false;
+
+	/* check the command line or if nothing common pick 1024x768 */
+	can_clone = true;
+	for (i = 0; i < connector_count; i++) {
+		if (!enabled[i])
+			continue;
+		modes[i] = drm_connector_pick_cmdline_mode(connectors[i]);
+		if (!modes[i]) {
+			can_clone = false;
+			break;
+		}
+		for (j = 0; j < i; j++) {
+			if (!enabled[j])
+				continue;
+			if (!drm_mode_match(modes[j], modes[i],
+					    DRM_MODE_MATCH_TIMINGS |
+					    DRM_MODE_MATCH_CLOCK |
+					    DRM_MODE_MATCH_FLAGS |
+					    DRM_MODE_MATCH_3D_FLAGS))
+				can_clone = false;
+		}
+	}
+
+	if (can_clone) {
+		DRM_DEBUG_KMS("can clone using command line\n");
+		return true;
+	}
+
+	/* try and find a 1024x768 mode on each connector */
+	can_clone = true;
+	dmt_mode = drm_mode_find_dmt(dev, 1024, 768, 60, false);
+
+	for (i = 0; i < connector_count; i++) {
+		if (!enabled[i])
+			continue;
+
+		list_for_each_entry(mode, &connectors[i]->modes, head) {
+			if (drm_mode_match(mode, dmt_mode,
+					   DRM_MODE_MATCH_TIMINGS |
+					   DRM_MODE_MATCH_CLOCK |
+					   DRM_MODE_MATCH_FLAGS |
+					   DRM_MODE_MATCH_3D_FLAGS))
+				modes[i] = mode;
+		}
+		if (!modes[i])
+			can_clone = false;
+	}
+
+	if (can_clone) {
+		DRM_DEBUG_KMS("can clone using 1024x768\n");
+		return true;
+	}
+	DRM_INFO("kms: can't enable cloning when we probably wanted to.\n");
+	return false;
+}
+
+static int drm_client_get_tile_offsets(struct drm_connector **connectors,
+				       unsigned int connector_count,
+				       struct drm_display_mode **modes,
+				       struct drm_client_offset *offsets,
+				       int idx,
+				       int h_idx, int v_idx)
+{
+	struct drm_connector *connector;
+	int i;
+	int hoffset = 0, voffset = 0;
+
+	for (i = 0; i < connector_count; i++) {
+		connector = connectors[i];
+		if (!connector->has_tile)
+			continue;
+
+		if (!modes[i] && (h_idx || v_idx)) {
+			DRM_DEBUG_KMS("no modes for connector tiled %d %d\n", i,
+				      connector->base.id);
+			continue;
+		}
+		if (connector->tile_h_loc < h_idx)
+			hoffset += modes[i]->hdisplay;
+
+		if (connector->tile_v_loc < v_idx)
+			voffset += modes[i]->vdisplay;
+	}
+	offsets[idx].x = hoffset;
+	offsets[idx].y = voffset;
+	DRM_DEBUG_KMS("returned %d %d for %d %d\n", hoffset, voffset, h_idx, v_idx);
+	return 0;
+}
+
+static bool drm_client_target_preferred(struct drm_connector **connectors,
+					unsigned int connector_count,
+					struct drm_display_mode **modes,
+					struct drm_client_offset *offsets,
+					bool *enabled, int width, int height)
+{
+	const u64 mask = BIT_ULL(connector_count) - 1;
+	struct drm_connector *connector;
+	u64 conn_configured = 0;
+	int tile_pass = 0;
+	int i;
+
+retry:
+	for (i = 0; i < connector_count; i++) {
+		connector = connectors[i];
+
+		if (conn_configured & BIT_ULL(i))
+			continue;
+
+		if (enabled[i] == false) {
+			conn_configured |= BIT_ULL(i);
+			continue;
+		}
+
+		/* first pass over all the untiled connectors */
+		if (tile_pass == 0 && connector->has_tile)
+			continue;
+
+		if (tile_pass == 1) {
+			if (connector->tile_h_loc != 0 ||
+			    connector->tile_v_loc != 0)
+				continue;
+
+		} else {
+			if (connector->tile_h_loc != tile_pass - 1 &&
+			    connector->tile_v_loc != tile_pass - 1)
+			/* if this tile_pass doesn't cover any of the tiles - keep going */
+				continue;
+
+			/*
+			 * find the tile offsets for this pass - need to find
+			 * all tiles left and above
+			 */
+			drm_client_get_tile_offsets(connectors, connector_count, modes, offsets, i,
+						    connector->tile_h_loc, connector->tile_v_loc);
+		}
+		DRM_DEBUG_KMS("looking for cmdline mode on connector %d\n",
+			      connector->base.id);
+
+		/* got for command line mode first */
+		modes[i] = drm_connector_pick_cmdline_mode(connector);
+		if (!modes[i]) {
+			DRM_DEBUG_KMS("looking for preferred mode on connector %d %d\n",
+				      connector->base.id, connector->tile_group ? connector->tile_group->id : 0);
+			modes[i] = drm_connector_has_preferred_mode(connector, width, height);
+		}
+		/* No preferred modes, pick one off the list */
+		if (!modes[i] && !list_empty(&connector->modes)) {
+			list_for_each_entry(modes[i], &connector->modes, head)
+				break;
+		}
+		DRM_DEBUG_KMS("found mode %s\n", modes[i] ? modes[i]->name :
+			  "none");
+		conn_configured |= BIT_ULL(i);
+	}
+
+	if ((conn_configured & mask) != mask) {
+		tile_pass++;
+		goto retry;
+	}
+	return true;
+}
+
+static bool connector_has_possible_crtc(struct drm_connector *connector,
+					struct drm_crtc *crtc)
+{
+	struct drm_encoder *encoder;
+	int i;
+
+	drm_connector_for_each_possible_encoder(connector, encoder, i) {
+		if (encoder->possible_crtcs & drm_crtc_mask(crtc))
+			return true;
+	}
+
+	return false;
+}
+
+static int drm_client_pick_crtcs(struct drm_client_dev *client,
+				 struct drm_connector **connectors,
+				 unsigned int connector_count,
+				 struct drm_crtc **best_crtcs,
+				 struct drm_display_mode **modes,
+				 int n, int width, int height)
+{
+	struct drm_device *dev = client->dev;
+	struct drm_connector *connector;
+	int my_score, best_score, score;
+	struct drm_crtc **crtcs, *crtc;
+	struct drm_mode_set *modeset;
+	int o;
+
+	if (n == connector_count)
+		return 0;
+
+	connector = connectors[n];
+
+	best_crtcs[n] = NULL;
+	best_score = drm_client_pick_crtcs(client, connectors, connector_count,
+					   best_crtcs, modes, n + 1, width, height);
+	if (modes[n] == NULL)
+		return best_score;
+
+	crtcs = kcalloc(connector_count, sizeof(*crtcs), GFP_KERNEL);
+	if (!crtcs)
+		return best_score;
+
+	my_score = 1;
+	if (connector->status == connector_status_connected)
+		my_score++;
+	if (connector->cmdline_mode.specified)
+		my_score++;
+	if (drm_connector_has_preferred_mode(connector, width, height))
+		my_score++;
+
+	/*
+	 * select a crtc for this connector and then attempt to configure
+	 * remaining connectors
+	 */
+	drm_client_for_each_modeset(modeset, client) {
+		crtc = modeset->crtc;
+
+		if (!connector_has_possible_crtc(connector, crtc))
+			continue;
+
+		for (o = 0; o < n; o++)
+			if (best_crtcs[o] == crtc)
+				break;
+
+		if (o < n) {
+			/* ignore cloning unless only a single crtc */
+			if (dev->mode_config.num_crtc > 1)
+				continue;
+
+			if (!drm_mode_equal(modes[o], modes[n]))
+				continue;
+		}
+
+		crtcs[n] = crtc;
+		memcpy(crtcs, best_crtcs, n * sizeof(*crtcs));
+		score = my_score + drm_client_pick_crtcs(client, connectors, connector_count,
+							 crtcs, modes, n + 1, width, height);
+		if (score > best_score) {
+			best_score = score;
+			memcpy(best_crtcs, crtcs, connector_count * sizeof(*crtcs));
+		}
+	}
+
+	kfree(crtcs);
+	return best_score;
+}
+
+/* Try to read the BIOS display configuration and use it for the initial config */
+static bool drm_client_firmware_config(struct drm_client_dev *client,
+				       struct drm_connector **connectors,
+				       unsigned int connector_count,
+				       struct drm_crtc **crtcs,
+				       struct drm_display_mode **modes,
+				       struct drm_client_offset *offsets,
+				       bool *enabled, int width, int height)
+{
+	unsigned int count = min_t(unsigned int, connector_count, BITS_PER_LONG);
+	unsigned long conn_configured, conn_seq, mask;
+	struct drm_device *dev = client->dev;
+	int i, j;
+	bool *save_enabled;
+	bool fallback = true, ret = true;
+	int num_connectors_enabled = 0;
+	int num_connectors_detected = 0;
+	struct drm_modeset_acquire_ctx ctx;
+
+	if (!drm_drv_uses_atomic_modeset(dev))
+		return false;
+
+	save_enabled = kcalloc(count, sizeof(bool), GFP_KERNEL);
+	if (!save_enabled)
+		return false;
+
+	drm_modeset_acquire_init(&ctx, 0);
+
+	while (drm_modeset_lock_all_ctx(dev, &ctx) != 0)
+		drm_modeset_backoff(&ctx);
+
+	memcpy(save_enabled, enabled, count);
+	mask = GENMASK(count - 1, 0);
+	conn_configured = 0;
+retry:
+	conn_seq = conn_configured;
+	for (i = 0; i < count; i++) {
+		struct drm_connector *connector;
+		struct drm_encoder *encoder;
+		struct drm_crtc *new_crtc;
+
+		connector = connectors[i];
+
+		if (conn_configured & BIT(i))
+			continue;
+
+		if (conn_seq == 0 && !connector->has_tile)
+			continue;
+
+		if (connector->status == connector_status_connected)
+			num_connectors_detected++;
+
+		if (!enabled[i]) {
+			DRM_DEBUG_KMS("connector %s not enabled, skipping\n",
+				      connector->name);
+			conn_configured |= BIT(i);
+			continue;
+		}
+
+		if (connector->force == DRM_FORCE_OFF) {
+			DRM_DEBUG_KMS("connector %s is disabled by user, skipping\n",
+				      connector->name);
+			enabled[i] = false;
+			continue;
+		}
+
+		encoder = connector->state->best_encoder;
+		if (!encoder || WARN_ON(!connector->state->crtc)) {
+			if (connector->force > DRM_FORCE_OFF)
+				goto bail;
+
+			DRM_DEBUG_KMS("connector %s has no encoder or crtc, skipping\n",
+				      connector->name);
+			enabled[i] = false;
+			conn_configured |= BIT(i);
+			continue;
+		}
+
+		num_connectors_enabled++;
+
+		new_crtc = connector->state->crtc;
+
+		/*
+		 * Make sure we're not trying to drive multiple connectors
+		 * with a single CRTC, since our cloning support may not
+		 * match the BIOS.
+		 */
+		for (j = 0; j < count; j++) {
+			if (crtcs[j] == new_crtc) {
+				DRM_DEBUG_KMS("fallback: cloned configuration\n");
+				goto bail;
+			}
+		}
+
+		DRM_DEBUG_KMS("looking for cmdline mode on connector %s\n",
+			      connector->name);
+
+		/* go for command line mode first */
+		modes[i] = drm_connector_pick_cmdline_mode(connector);
+
+		/* try for preferred next */
+		if (!modes[i]) {
+			DRM_DEBUG_KMS("looking for preferred mode on connector %s %d\n",
+				      connector->name, connector->has_tile);
+			modes[i] = drm_connector_has_preferred_mode(connector, width, height);
+		}
+
+		/* No preferred mode marked by the EDID? Are there any modes? */
+		if (!modes[i] && !list_empty(&connector->modes)) {
+			DRM_DEBUG_KMS("using first mode listed on connector %s\n",
+				      connector->name);
+			modes[i] = list_first_entry(&connector->modes,
+						    struct drm_display_mode,
+						    head);
+		}
+
+		/* last resort: use current mode */
+		if (!modes[i]) {
+			/*
+			 * IMPORTANT: We want to use the adjusted mode (i.e.
+			 * after the panel fitter upscaling) as the initial
+			 * config, not the input mode, which is what crtc->mode
+			 * usually contains. But since our current
+			 * code puts a mode derived from the post-pfit timings
+			 * into crtc->mode this works out correctly.
+			 *
+			 * This is crtc->mode and not crtc->state->mode for the
+			 * fastboot check to work correctly.
+			 */
+			DRM_DEBUG_KMS("looking for current mode on connector %s\n",
+				      connector->name);
+			modes[i] = &connector->state->crtc->mode;
+		}
+		crtcs[i] = new_crtc;
+
+		DRM_DEBUG_KMS("connector %s on [CRTC:%d:%s]: %dx%d%s\n",
+			      connector->name,
+			      connector->state->crtc->base.id,
+			      connector->state->crtc->name,
+			      modes[i]->hdisplay, modes[i]->vdisplay,
+			      modes[i]->flags & DRM_MODE_FLAG_INTERLACE ? "i" : "");
+
+		fallback = false;
+		conn_configured |= BIT(i);
+	}
+
+	if ((conn_configured & mask) != mask && conn_configured != conn_seq)
+		goto retry;
+
+	/*
+	 * If the BIOS didn't enable everything it could, fall back to have the
+	 * same user experiencing of lighting up as much as possible like the
+	 * fbdev helper library.
+	 */
+	if (num_connectors_enabled != num_connectors_detected &&
+	    num_connectors_enabled < dev->mode_config.num_crtc) {
+		DRM_DEBUG_KMS("fallback: Not all outputs enabled\n");
+		DRM_DEBUG_KMS("Enabled: %i, detected: %i\n", num_connectors_enabled,
+			      num_connectors_detected);
+		fallback = true;
+	}
+
+	if (fallback) {
+bail:
+		DRM_DEBUG_KMS("Not using firmware configuration\n");
+		memcpy(enabled, save_enabled, count);
+		ret = false;
+	}
+
+	drm_modeset_drop_locks(&ctx);
+	drm_modeset_acquire_fini(&ctx);
+
+	kfree(save_enabled);
+	return ret;
+}
+
+/**
+ * drm_client_modeset_probe() - Probe for displays
+ * @client: DRM client
+ * @width: Maximum display mode width (optional)
+ * @height: Maximum display mode height (optional)
+ *
+ * This function sets up display pipelines for enabled connectors and stores the
+ * config in the client's modeset array.
+ *
+ * Returns:
+ * Zero on success or negative error code on failure.
+ */
+int drm_client_modeset_probe(struct drm_client_dev *client, unsigned int width, unsigned int height)
+{
+	struct drm_connector *connector, **connectors = NULL;
+	struct drm_connector_list_iter conn_iter;
+	struct drm_device *dev = client->dev;
+	unsigned int total_modes_count = 0;
+	struct drm_client_offset *offsets;
+	unsigned int connector_count = 0;
+	struct drm_display_mode **modes;
+	struct drm_crtc **crtcs;
+	int i, ret = 0;
+	bool *enabled;
+
+	DRM_DEBUG_KMS("\n");
+
+	if (!width)
+		width = dev->mode_config.max_width;
+	if (!height)
+		height = dev->mode_config.max_height;
+
+	drm_connector_list_iter_begin(dev, &conn_iter);
+	drm_client_for_each_connector_iter(connector, &conn_iter) {
+		struct drm_connector **tmp;
+
+		tmp = krealloc(connectors, (connector_count + 1) * sizeof(*connectors), GFP_KERNEL);
+		if (!tmp) {
+			ret = -ENOMEM;
+			goto free_connectors;
+		}
+
+		connectors = tmp;
+		drm_connector_get(connector);
+		connectors[connector_count++] = connector;
+	}
+	drm_connector_list_iter_end(&conn_iter);
+
+	if (!connector_count)
+		return 0;
+
+	crtcs = kcalloc(connector_count, sizeof(*crtcs), GFP_KERNEL);
+	modes = kcalloc(connector_count, sizeof(*modes), GFP_KERNEL);
+	offsets = kcalloc(connector_count, sizeof(*offsets), GFP_KERNEL);
+	enabled = kcalloc(connector_count, sizeof(bool), GFP_KERNEL);
+	if (!crtcs || !modes || !enabled || !offsets) {
+		DRM_ERROR("Memory allocation failed\n");
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	mutex_lock(&client->modeset_mutex);
+
+	mutex_lock(&dev->mode_config.mutex);
+	for (i = 0; i < connector_count; i++)
+		total_modes_count += connectors[i]->funcs->fill_modes(connectors[i], width, height);
+	if (!total_modes_count)
+		DRM_DEBUG_KMS("No connectors reported connected with modes\n");
+	drm_client_connectors_enabled(connectors, connector_count, enabled);
+
+	if (!drm_client_firmware_config(client, connectors, connector_count, crtcs,
+					modes, offsets, enabled, width, height)) {
+		memset(modes, 0, connector_count * sizeof(*modes));
+		memset(crtcs, 0, connector_count * sizeof(*crtcs));
+		memset(offsets, 0, connector_count * sizeof(*offsets));
+
+		if (!drm_client_target_cloned(dev, connectors, connector_count, modes,
+					      offsets, enabled, width, height) &&
+		    !drm_client_target_preferred(connectors, connector_count, modes,
+						 offsets, enabled, width, height))
+			DRM_ERROR("Unable to find initial modes\n");
+
+		DRM_DEBUG_KMS("picking CRTCs for %dx%d config\n",
+			      width, height);
+
+		drm_client_pick_crtcs(client, connectors, connector_count,
+				      crtcs, modes, 0, width, height);
+	}
+	mutex_unlock(&dev->mode_config.mutex);
+
+	drm_client_modeset_release(client);
+
+	for (i = 0; i < connector_count; i++) {
+		struct drm_display_mode *mode = modes[i];
+		struct drm_crtc *crtc = crtcs[i];
+		struct drm_client_offset *offset = &offsets[i];
+
+		if (mode && crtc) {
+			struct drm_mode_set *modeset = drm_client_find_modeset(client, crtc);
+			struct drm_connector *connector = connectors[i];
+
+			DRM_DEBUG_KMS("desired mode %s set on crtc %d (%d,%d)\n",
+				      mode->name, crtc->base.id, offset->x, offset->y);
+
+			if (WARN_ON_ONCE(modeset->num_connectors == DRM_CLIENT_MAX_CLONED_CONNECTORS ||
+					 (dev->mode_config.num_crtc > 1 && modeset->num_connectors == 1))) {
+				ret = -EINVAL;
+				break;
+			}
+
+			modeset->mode = drm_mode_duplicate(dev, mode);
+			drm_connector_get(connector);
+			modeset->connectors[modeset->num_connectors++] = connector;
+			modeset->x = offset->x;
+			modeset->y = offset->y;
+		}
+	}
+
+	mutex_unlock(&client->modeset_mutex);
+out:
+	kfree(crtcs);
+	kfree(modes);
+	kfree(offsets);
+	kfree(enabled);
+free_connectors:
+	for (i = 0; i < connector_count; i++)
+		drm_connector_put(connectors[i]);
+	kfree(connectors);
+
+	return ret;
+}
+EXPORT_SYMBOL(drm_client_modeset_probe);
+
+/**
+ * drm_client_rotation() - Check the initial rotation value
+ * @modeset: DRM modeset
+ * @rotation: Returned rotation value
+ *
+ * This function checks if the primary plane in @modeset can hw rotate
+ * to match the rotation needed on its connector.
+ *
+ * Note: Currently only 0 and 180 degrees are supported.
+ *
+ * Return:
+ * True if the plane can do the rotation, false otherwise.
+ */
+bool drm_client_rotation(struct drm_mode_set *modeset, unsigned int *rotation)
+{
+	struct drm_connector *connector = modeset->connectors[0];
+	struct drm_plane *plane = modeset->crtc->primary;
+	struct drm_cmdline_mode *cmdline;
+	u64 valid_mask = 0;
+	unsigned int i;
+
+	if (!modeset->num_connectors)
+		return false;
+
+	switch (connector->display_info.panel_orientation) {
+	case DRM_MODE_PANEL_ORIENTATION_BOTTOM_UP:
+		*rotation = DRM_MODE_ROTATE_180;
+		break;
+	case DRM_MODE_PANEL_ORIENTATION_LEFT_UP:
+		*rotation = DRM_MODE_ROTATE_90;
+		break;
+	case DRM_MODE_PANEL_ORIENTATION_RIGHT_UP:
+		*rotation = DRM_MODE_ROTATE_270;
+		break;
+	default:
+		*rotation = DRM_MODE_ROTATE_0;
+	}
+
+	/**
+	 * The panel already defined the default rotation
+	 * through its orientation. Whatever has been provided
+	 * on the command line needs to be added to that.
+	 *
+	 * Unfortunately, the rotations are at different bit
+	 * indices, so the math to add them up are not as
+	 * trivial as they could.
+	 *
+	 * Reflections on the other hand are pretty trivial to deal with, a
+	 * simple XOR between the two handle the addition nicely.
+	 */
+	cmdline = &connector->cmdline_mode;
+	if (cmdline->specified) {
+		unsigned int cmdline_rest, panel_rest;
+		unsigned int cmdline_rot, panel_rot;
+		unsigned int sum_rot, sum_rest;
+
+		panel_rot = ilog2(*rotation & DRM_MODE_ROTATE_MASK);
+		cmdline_rot = ilog2(cmdline->rotation_reflection & DRM_MODE_ROTATE_MASK);
+		sum_rot = (panel_rot + cmdline_rot) % 4;
+
+		panel_rest = *rotation & ~DRM_MODE_ROTATE_MASK;
+		cmdline_rest = cmdline->rotation_reflection & ~DRM_MODE_ROTATE_MASK;
+		sum_rest = panel_rest ^ cmdline_rest;
+
+		*rotation = (1 << sum_rot) | sum_rest;
+	}
+
+	/*
+	 * TODO: support 90 / 270 degree hardware rotation,
+	 * depending on the hardware this may require the framebuffer
+	 * to be in a specific tiling format.
+	 */
+	if ((*rotation & DRM_MODE_ROTATE_MASK) != DRM_MODE_ROTATE_180 ||
+	    !plane->rotation_property)
+		return false;
+
+	for (i = 0; i < plane->rotation_property->num_values; i++)
+		valid_mask |= (1ULL << plane->rotation_property->values[i]);
+
+	if (!(*rotation & valid_mask))
+		return false;
+
+	return true;
+}
+EXPORT_SYMBOL(drm_client_rotation);
+
+static int drm_client_modeset_commit_atomic(struct drm_client_dev *client, bool active)
+{
+	struct drm_device *dev = client->dev;
+	struct drm_plane *plane;
+	struct drm_atomic_state *state;
+	struct drm_modeset_acquire_ctx ctx;
+	struct drm_mode_set *mode_set;
+	int ret;
+
+	drm_modeset_acquire_init(&ctx, 0);
+
+	state = drm_atomic_state_alloc(dev);
+	if (!state) {
+		ret = -ENOMEM;
+		goto out_ctx;
+	}
+
+	state->acquire_ctx = &ctx;
+retry:
+	drm_for_each_plane(plane, dev) {
+		struct drm_plane_state *plane_state;
+
+		plane_state = drm_atomic_get_plane_state(state, plane);
+		if (IS_ERR(plane_state)) {
+			ret = PTR_ERR(plane_state);
+			goto out_state;
+		}
+
+		plane_state->rotation = DRM_MODE_ROTATE_0;
+
+		/* disable non-primary: */
+		if (plane->type == DRM_PLANE_TYPE_PRIMARY)
+			continue;
+
+		ret = __drm_atomic_helper_disable_plane(plane, plane_state);
+		if (ret != 0)
+			goto out_state;
+	}
+
+	drm_client_for_each_modeset(mode_set, client) {
+		struct drm_plane *primary = mode_set->crtc->primary;
+		unsigned int rotation;
+
+		if (drm_client_rotation(mode_set, &rotation)) {
+			struct drm_plane_state *plane_state;
+
+			/* Cannot fail as we've already gotten the plane state above */
+			plane_state = drm_atomic_get_new_plane_state(state, primary);
+			plane_state->rotation = rotation;
+		}
+
+		ret = __drm_atomic_helper_set_config(mode_set, state);
+		if (ret != 0)
+			goto out_state;
+
+		/*
+		 * __drm_atomic_helper_set_config() sets active when a
+		 * mode is set, unconditionally clear it if we force DPMS off
+		 */
+		if (!active) {
+			struct drm_crtc *crtc = mode_set->crtc;
+			struct drm_crtc_state *crtc_state = drm_atomic_get_new_crtc_state(state, crtc);
+
+			crtc_state->active = false;
+		}
+	}
+
+	ret = drm_atomic_commit(state);
+
+out_state:
+	if (ret == -EDEADLK)
+		goto backoff;
+
+	drm_atomic_state_put(state);
+out_ctx:
+	drm_modeset_drop_locks(&ctx);
+	drm_modeset_acquire_fini(&ctx);
+
+	return ret;
+
+backoff:
+	drm_atomic_state_clear(state);
+	drm_modeset_backoff(&ctx);
+
+	goto retry;
+}
+
+static int drm_client_modeset_commit_legacy(struct drm_client_dev *client)
+{
+	struct drm_device *dev = client->dev;
+	struct drm_mode_set *mode_set;
+	struct drm_plane *plane;
+	int ret = 0;
+
+	drm_modeset_lock_all(dev);
+	drm_for_each_plane(plane, dev) {
+		if (plane->type != DRM_PLANE_TYPE_PRIMARY)
+			drm_plane_force_disable(plane);
+
+		if (plane->rotation_property)
+			drm_mode_plane_set_obj_prop(plane,
+						    plane->rotation_property,
+						    DRM_MODE_ROTATE_0);
+	}
+
+	drm_client_for_each_modeset(mode_set, client) {
+		struct drm_crtc *crtc = mode_set->crtc;
+
+		if (crtc->funcs->cursor_set2) {
+			ret = crtc->funcs->cursor_set2(crtc, NULL, 0, 0, 0, 0, 0);
+			if (ret)
+				goto out;
+		} else if (crtc->funcs->cursor_set) {
+			ret = crtc->funcs->cursor_set(crtc, NULL, 0, 0, 0);
+			if (ret)
+				goto out;
+		}
+
+		ret = drm_mode_set_config_internal(mode_set);
+		if (ret)
+			goto out;
+	}
+out:
+	drm_modeset_unlock_all(dev);
+
+	return ret;
+}
+
+/**
+ * drm_client_modeset_commit_force() - Force commit CRTC configuration
+ * @client: DRM client
+ *
+ * Commit modeset configuration to crtcs without checking if there is a DRM master.
+ *
+ * Returns:
+ * Zero on success or negative error code on failure.
+ */
+int drm_client_modeset_commit_force(struct drm_client_dev *client)
+{
+	struct drm_device *dev = client->dev;
+	int ret;
+
+	mutex_lock(&client->modeset_mutex);
+	if (drm_drv_uses_atomic_modeset(dev))
+		ret = drm_client_modeset_commit_atomic(client, true);
+	else
+		ret = drm_client_modeset_commit_legacy(client);
+	mutex_unlock(&client->modeset_mutex);
+
+	return ret;
+}
+EXPORT_SYMBOL(drm_client_modeset_commit_force);
+
+/**
+ * drm_client_modeset_commit() - Commit CRTC configuration
+ * @client: DRM client
+ *
+ * Commit modeset configuration to crtcs.
+ *
+ * Returns:
+ * Zero on success or negative error code on failure.
+ */
+int drm_client_modeset_commit(struct drm_client_dev *client)
+{
+	struct drm_device *dev = client->dev;
+	int ret;
+
+	if (!drm_master_internal_acquire(dev))
+		return -EBUSY;
+
+	ret = drm_client_modeset_commit_force(client);
+
+	drm_master_internal_release(dev);
+
+	return ret;
+}
+EXPORT_SYMBOL(drm_client_modeset_commit);
+
+static void drm_client_modeset_dpms_legacy(struct drm_client_dev *client, int dpms_mode)
+{
+	struct drm_device *dev = client->dev;
+	struct drm_connector *connector;
+	struct drm_mode_set *modeset;
+	int j;
+
+	drm_modeset_lock_all(dev);
+	drm_client_for_each_modeset(modeset, client) {
+		if (!modeset->crtc->enabled)
+			continue;
+
+		for (j = 0; j < modeset->num_connectors; j++) {
+			connector = modeset->connectors[j];
+			connector->funcs->dpms(connector, dpms_mode);
+			drm_object_property_set_value(&connector->base,
+				dev->mode_config.dpms_property, dpms_mode);
+		}
+	}
+	drm_modeset_unlock_all(dev);
+}
+
+/**
+ * drm_client_modeset_dpms() - Set DPMS mode
+ * @client: DRM client
+ * @mode: DPMS mode
+ *
+ * Note: For atomic drivers @mode is reduced to on/off.
+ *
+ * Returns:
+ * Zero on success or negative error code on failure.
+ */
+int drm_client_modeset_dpms(struct drm_client_dev *client, int mode)
+{
+	struct drm_device *dev = client->dev;
+	int ret = 0;
+
+	if (!drm_master_internal_acquire(dev))
+		return -EBUSY;
+
+	mutex_lock(&client->modeset_mutex);
+	if (drm_drv_uses_atomic_modeset(dev))
+		ret = drm_client_modeset_commit_atomic(client, mode == DRM_MODE_DPMS_ON);
+	else
+		drm_client_modeset_dpms_legacy(client, mode);
+	mutex_unlock(&client->modeset_mutex);
+
+	drm_master_internal_release(dev);
+
+	return ret;
+}
+EXPORT_SYMBOL(drm_client_modeset_dpms);
diff --git a/drivers/gpu/drm/drm_color_mgmt.c b/drivers/gpu/drm/drm_color_mgmt.c
index d5d34d0c79c7..4ce5c6d8de99 100644
--- a/drivers/gpu/drm/drm_color_mgmt.c
+++ b/drivers/gpu/drm/drm_color_mgmt.c
@@ -20,9 +20,13 @@
  * OF THIS SOFTWARE.
  */
 
-#include <drm/drmP.h>
-#include <drm/drm_crtc.h>
+#include <linux/uaccess.h>
+
 #include <drm/drm_color_mgmt.h>
+#include <drm/drm_crtc.h>
+#include <drm/drm_device.h>
+#include <drm/drm_drv.h>
+#include <drm/drm_print.h>
 
 #include "drm_crtc_internal.h"
 
diff --git a/drivers/gpu/drm/drm_connector.c b/drivers/gpu/drm/drm_connector.c
index b34c3d38bf15..3afed5677946 100644
--- a/drivers/gpu/drm/drm_connector.c
+++ b/drivers/gpu/drm/drm_connector.c
@@ -139,8 +139,9 @@ static void drm_connector_get_cmdline_mode(struct drm_connector *connector)
 		connector->force = mode->force;
 	}
 
-	DRM_DEBUG_KMS("cmdline mode for connector %s %dx%d@%dHz%s%s%s\n",
+	DRM_DEBUG_KMS("cmdline mode for connector %s %s %dx%d@%dHz%s%s%s\n",
 		      connector->name,
+		      mode->name ? mode->name : "",
 		      mode->xres, mode->yres,
 		      mode->refresh_specified ? mode->refresh : 60,
 		      mode->rb ? " reduced blanking" : "",
@@ -464,10 +465,7 @@ int drm_connector_register(struct drm_connector *connector)
 	if (ret)
 		goto unlock;
 
-	ret = drm_debugfs_connector_add(connector);
-	if (ret) {
-		goto err_sysfs;
-	}
+	drm_debugfs_connector_add(connector);
 
 	if (connector->funcs->late_register) {
 		ret = connector->funcs->late_register(connector);
@@ -482,7 +480,6 @@ int drm_connector_register(struct drm_connector *connector)
 
 err_debugfs:
 	drm_debugfs_connector_remove(connector);
-err_sysfs:
 	drm_sysfs_connector_remove(connector);
 unlock:
 	mutex_unlock(&connector->mutex);
@@ -823,13 +820,6 @@ static const struct drm_prop_enum_list drm_tv_subconnector_enum_list[] = {
 DRM_ENUM_NAME_FN(drm_get_tv_subconnector_name,
 		 drm_tv_subconnector_enum_list)
 
-static struct drm_prop_enum_list drm_cp_enum_list[] = {
-	{ DRM_MODE_CONTENT_PROTECTION_UNDESIRED, "Undesired" },
-	{ DRM_MODE_CONTENT_PROTECTION_DESIRED, "Desired" },
-	{ DRM_MODE_CONTENT_PROTECTION_ENABLED, "Enabled" },
-};
-DRM_ENUM_NAME_FN(drm_get_content_protection_name, drm_cp_enum_list)
-
 static const struct drm_prop_enum_list hdmi_colorspaces[] = {
 	/* For Default case, driver will set the colorspace */
 	{ DRM_MODE_COLORIMETRY_DEFAULT, "Default" },
@@ -963,6 +953,47 @@ static const struct drm_prop_enum_list hdmi_colorspaces[] = {
  *	  is no longer protected and userspace should take appropriate action
  *	  (whatever that might be).
  *
+ * HDR_OUTPUT_METADATA:
+ *	Connector property to enable userspace to send HDR Metadata to
+ *	driver. This metadata is based on the composition and blending
+ *	policies decided by user, taking into account the hardware and
+ *	sink capabilities. The driver gets this metadata and creates a
+ *	Dynamic Range and Mastering Infoframe (DRM) in case of HDMI,
+ *	SDP packet (Non-audio INFOFRAME SDP v1.3) for DP. This is then
+ *	sent to sink. This notifies the sink of the upcoming frame's Color
+ *	Encoding and Luminance parameters.
+ *
+ *	Userspace first need to detect the HDR capabilities of sink by
+ *	reading and parsing the EDID. Details of HDR metadata for HDMI
+ *	are added in CTA 861.G spec. For DP , its defined in VESA DP
+ *	Standard v1.4. It needs to then get the metadata information
+ *	of the video/game/app content which are encoded in HDR (basically
+ *	using HDR transfer functions). With this information it needs to
+ *	decide on a blending policy and compose the relevant
+ *	layers/overlays into a common format. Once this blending is done,
+ *	userspace will be aware of the metadata of the composed frame to
+ *	be send to sink. It then uses this property to communicate this
+ *	metadata to driver which then make a Infoframe packet and sends
+ *	to sink based on the type of encoder connected.
+ *
+ *	Userspace will be responsible to do Tone mapping operation in case:
+ *		- Some layers are HDR and others are SDR
+ *		- HDR layers luminance is not same as sink
+ *
+ *	It will even need to do colorspace conversion and get all layers
+ *	to one common colorspace for blending. It can use either GL, Media
+ *	or display engine to get this done based on the capabilties of the
+ *	associated hardware.
+ *
+ *	Driver expects metadata to be put in &struct hdr_output_metadata
+ *	structure from userspace. This is received as blob and stored in
+ *	&drm_connector_state.hdr_output_metadata. It parses EDID and saves the
+ *	sink metadata in &struct hdr_sink_metadata, as
+ *	&drm_connector.hdr_sink_metadata.  Driver uses
+ *	drm_hdmi_infoframe_set_hdr_metadata() helper to set the HDR metadata,
+ *	hdmi_drm_infoframe_pack() to pack the infoframe as per spec, in case of
+ *	HDMI encoder.
+ *
  * max bpc:
  *	This range property is used by userspace to limit the bit depth. When
  *	used the driver would limit the bpc in accordance with the valid range
@@ -1058,6 +1089,12 @@ int drm_connector_create_standard_properties(struct drm_device *dev)
 		return -ENOMEM;
 	dev->mode_config.non_desktop_property = prop;
 
+	prop = drm_property_create(dev, DRM_MODE_PROP_BLOB,
+				   "HDR_OUTPUT_METADATA", 0);
+	if (!prop)
+		return -ENOMEM;
+	dev->mode_config.hdr_output_metadata_property = prop;
+
 	return 0;
 }
 
@@ -1510,42 +1547,6 @@ int drm_connector_attach_scaling_mode_property(struct drm_connector *connector,
 EXPORT_SYMBOL(drm_connector_attach_scaling_mode_property);
 
 /**
- * drm_connector_attach_content_protection_property - attach content protection
- * property
- *
- * @connector: connector to attach CP property on.
- *
- * This is used to add support for content protection on select connectors.
- * Content Protection is intentionally vague to allow for different underlying
- * technologies, however it is most implemented by HDCP.
- *
- * The content protection will be set to &drm_connector_state.content_protection
- *
- * Returns:
- * Zero on success, negative errno on failure.
- */
-int drm_connector_attach_content_protection_property(
-		struct drm_connector *connector)
-{
-	struct drm_device *dev = connector->dev;
-	struct drm_property *prop;
-
-	prop = drm_property_create_enum(dev, 0, "Content Protection",
-					drm_cp_enum_list,
-					ARRAY_SIZE(drm_cp_enum_list));
-	if (!prop)
-		return -ENOMEM;
-
-	drm_object_attach_property(&connector->base, prop,
-				   DRM_MODE_CONTENT_PROTECTION_UNDESIRED);
-
-	connector->content_protection_property = prop;
-
-	return 0;
-}
-EXPORT_SYMBOL(drm_connector_attach_content_protection_property);
-
-/**
  * drm_mode_create_aspect_ratio_property - create aspect ratio property
  * @dev: DRM device
  *
diff --git a/drivers/gpu/drm/drm_context.c b/drivers/gpu/drm/drm_context.c
index 6e8e1a9fcae3..1f802d8e5681 100644
--- a/drivers/gpu/drm/drm_context.c
+++ b/drivers/gpu/drm/drm_context.c
@@ -28,7 +28,13 @@
  * OTHER DEALINGS IN THE SOFTWARE.
  */
 
-#include <drm/drmP.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+
+#include <drm/drm_drv.h>
+#include <drm/drm_file.h>
+#include <drm/drm_print.h>
+
 #include "drm_legacy.h"
 
 struct drm_ctx_list {
diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c
index 790ba5941954..4936e1080e41 100644
--- a/drivers/gpu/drm/drm_crtc.c
+++ b/drivers/gpu/drm/drm_crtc.c
@@ -122,9 +122,7 @@ int drm_crtc_register_all(struct drm_device *dev)
 	int ret = 0;
 
 	drm_for_each_crtc(crtc, dev) {
-		if (drm_debugfs_crtc_add(crtc))
-			DRM_ERROR("Failed to initialize debugfs entry for CRTC '%s'.\n",
-				  crtc->name);
+		drm_debugfs_crtc_add(crtc);
 
 		if (crtc->funcs->late_register)
 			ret = crtc->funcs->late_register(crtc);
diff --git a/drivers/gpu/drm/drm_crtc_helper.c b/drivers/gpu/drm/drm_crtc_helper.c
index 747661f63fbb..6dd49a60deac 100644
--- a/drivers/gpu/drm/drm_crtc_helper.c
+++ b/drivers/gpu/drm/drm_crtc_helper.c
@@ -29,21 +29,23 @@
  *      Jesse Barnes <jesse.barnes@intel.com>
  */
 
-#include <linux/kernel.h>
 #include <linux/export.h>
+#include <linux/kernel.h>
 #include <linux/moduleparam.h>
 
-#include <drm/drmP.h>
 #include <drm/drm_atomic.h>
+#include <drm/drm_atomic_helper.h>
 #include <drm/drm_atomic_uapi.h>
 #include <drm/drm_crtc.h>
-#include <drm/drm_encoder.h>
-#include <drm/drm_fourcc.h>
 #include <drm/drm_crtc_helper.h>
+#include <drm/drm_drv.h>
+#include <drm/drm_edid.h>
+#include <drm/drm_encoder.h>
 #include <drm/drm_fb_helper.h>
+#include <drm/drm_fourcc.h>
 #include <drm/drm_plane_helper.h>
-#include <drm/drm_atomic_helper.h>
-#include <drm/drm_edid.h>
+#include <drm/drm_print.h>
+#include <drm/drm_vblank.h>
 
 /**
  * DOC: overview
diff --git a/drivers/gpu/drm/drm_crtc_internal.h b/drivers/gpu/drm/drm_crtc_internal.h
index 0719a235d6cc..c7d5e4c21423 100644
--- a/drivers/gpu/drm/drm_crtc_internal.h
+++ b/drivers/gpu/drm/drm_crtc_internal.h
@@ -31,14 +31,32 @@
  * and are not exported to drivers.
  */
 
-enum drm_mode_status;
+#include <linux/types.h>
+
+enum drm_color_encoding;
+enum drm_color_range;
 enum drm_connector_force;
+enum drm_mode_status;
 
-struct drm_display_mode;
-struct work_struct;
-struct drm_connector;
+struct drm_atomic_state;
 struct drm_bridge;
+struct drm_connector;
+struct drm_crtc;
+struct drm_device;
+struct drm_display_mode;
+struct drm_file;
+struct drm_framebuffer;
+struct drm_mode_create_dumb;
+struct drm_mode_fb_cmd2;
+struct drm_mode_fb_cmd;
+struct drm_mode_object;
+struct drm_mode_set;
+struct drm_plane;
+struct drm_plane_state;
+struct drm_property;
 struct edid;
+struct kref;
+struct work_struct;
 
 /* drm_crtc.c */
 int drm_mode_crtc_set_obj_prop(struct drm_mode_object *obj,
@@ -207,6 +225,11 @@ struct drm_minor;
 int drm_atomic_debugfs_init(struct drm_minor *minor);
 #endif
 
+int __drm_atomic_helper_disable_plane(struct drm_plane *plane,
+				      struct drm_plane_state *plane_state);
+int __drm_atomic_helper_set_config(struct drm_mode_set *set,
+				   struct drm_atomic_state *state);
+
 void drm_atomic_print_state(const struct drm_atomic_state *state);
 
 /* drm_atomic_uapi.c */
diff --git a/drivers/gpu/drm/drm_damage_helper.c b/drivers/gpu/drm/drm_damage_helper.c
index ee67c96841fa..8230dac01a89 100644
--- a/drivers/gpu/drm/drm_damage_helper.c
+++ b/drivers/gpu/drm/drm_damage_helper.c
@@ -286,7 +286,7 @@ drm_atomic_helper_damage_iter_init(struct drm_atomic_helper_damage_iter *iter,
 	iter->plane_src.y2 = (state->src.y2 >> 16) + !!(state->src.y2 & 0xFFFF);
 
 	if (!iter->clips || !drm_rect_equals(&state->src, &old_state->src)) {
-		iter->clips = 0;
+		iter->clips = NULL;
 		iter->num_clips = 0;
 		iter->full_update = true;
 	}
diff --git a/drivers/gpu/drm/drm_debugfs.c b/drivers/gpu/drm/drm_debugfs.c
index f8468eae0503..eab0f2687cd6 100644
--- a/drivers/gpu/drm/drm_debugfs.c
+++ b/drivers/gpu/drm/drm_debugfs.c
@@ -24,20 +24,23 @@
  */
 
 #include <linux/debugfs.h>
+#include <linux/export.h>
 #include <linux/seq_file.h>
 #include <linux/slab.h>
-#include <linux/export.h>
+#include <linux/uaccess.h>
 
+#include <drm/drm_atomic.h>
+#include <drm/drm_auth.h>
 #include <drm/drm_client.h>
 #include <drm/drm_debugfs.h>
+#include <drm/drm_device.h>
+#include <drm/drm_drv.h>
 #include <drm/drm_edid.h>
-#include <drm/drm_atomic.h>
-#include <drm/drm_auth.h>
+#include <drm/drm_file.h>
 #include <drm/drm_gem.h>
-#include <drm/drmP.h>
 
-#include "drm_internal.h"
 #include "drm_crtc_internal.h"
+#include "drm_internal.h"
 
 #if defined(CONFIG_DEBUG_FS)
 
@@ -173,9 +176,8 @@ int drm_debugfs_create_files(const struct drm_info_list *files, int count,
 			     struct dentry *root, struct drm_minor *minor)
 {
 	struct drm_device *dev = minor->dev;
-	struct dentry *ent;
 	struct drm_info_node *tmp;
-	int i, ret;
+	int i;
 
 	for (i = 0; i < count; i++) {
 		u32 features = files[i].driver_features;
@@ -185,22 +187,13 @@ int drm_debugfs_create_files(const struct drm_info_list *files, int count,
 			continue;
 
 		tmp = kmalloc(sizeof(struct drm_info_node), GFP_KERNEL);
-		if (tmp == NULL) {
-			ret = -1;
-			goto fail;
-		}
-		ent = debugfs_create_file(files[i].name, S_IFREG | S_IRUGO,
-					  root, tmp, &drm_debugfs_fops);
-		if (!ent) {
-			DRM_ERROR("Cannot create /sys/kernel/debug/dri/%pd/%s\n",
-				  root, files[i].name);
-			kfree(tmp);
-			ret = -1;
-			goto fail;
-		}
+		if (tmp == NULL)
+			continue;
 
 		tmp->minor = minor;
-		tmp->dent = ent;
+		tmp->dent = debugfs_create_file(files[i].name,
+						S_IFREG | S_IRUGO, root, tmp,
+						&drm_debugfs_fops);
 		tmp->info_ent = &files[i];
 
 		mutex_lock(&minor->debugfs_lock);
@@ -208,10 +201,6 @@ int drm_debugfs_create_files(const struct drm_info_list *files, int count,
 		mutex_unlock(&minor->debugfs_lock);
 	}
 	return 0;
-
-fail:
-	drm_debugfs_remove_files(files, count, minor);
-	return ret;
 }
 EXPORT_SYMBOL(drm_debugfs_create_files);
 
@@ -226,10 +215,6 @@ int drm_debugfs_init(struct drm_minor *minor, int minor_id,
 	mutex_init(&minor->debugfs_lock);
 	sprintf(name, "%d", minor_id);
 	minor->debugfs_root = debugfs_create_dir(name, root);
-	if (!minor->debugfs_root) {
-		DRM_ERROR("Cannot create /sys/kernel/debug/dri/%s\n", name);
-		return -1;
-	}
 
 	ret = drm_debugfs_create_files(drm_debugfs_list, DRM_DEBUGFS_ENTRIES,
 				       minor->debugfs_root, minor);
@@ -310,17 +295,15 @@ static void drm_debugfs_remove_all_files(struct drm_minor *minor)
 	mutex_unlock(&minor->debugfs_lock);
 }
 
-int drm_debugfs_cleanup(struct drm_minor *minor)
+void drm_debugfs_cleanup(struct drm_minor *minor)
 {
 	if (!minor->debugfs_root)
-		return 0;
+		return;
 
 	drm_debugfs_remove_all_files(minor);
 
 	debugfs_remove_recursive(minor->debugfs_root);
 	minor->debugfs_root = NULL;
-
-	return 0;
 }
 
 static int connector_show(struct seq_file *m, void *data)
@@ -438,38 +421,24 @@ static const struct file_operations drm_connector_fops = {
 	.write = connector_write
 };
 
-int drm_debugfs_connector_add(struct drm_connector *connector)
+void drm_debugfs_connector_add(struct drm_connector *connector)
 {
 	struct drm_minor *minor = connector->dev->primary;
-	struct dentry *root, *ent;
+	struct dentry *root;
 
 	if (!minor->debugfs_root)
-		return -1;
+		return;
 
 	root = debugfs_create_dir(connector->name, minor->debugfs_root);
-	if (!root)
-		return -ENOMEM;
-
 	connector->debugfs_entry = root;
 
 	/* force */
-	ent = debugfs_create_file("force", S_IRUGO | S_IWUSR, root, connector,
-				  &drm_connector_fops);
-	if (!ent)
-		goto error;
+	debugfs_create_file("force", S_IRUGO | S_IWUSR, root, connector,
+			    &drm_connector_fops);
 
 	/* edid */
-	ent = debugfs_create_file("edid_override", S_IRUGO | S_IWUSR, root,
-				  connector, &drm_edid_fops);
-	if (!ent)
-		goto error;
-
-	return 0;
-
-error:
-	debugfs_remove_recursive(connector->debugfs_entry);
-	connector->debugfs_entry = NULL;
-	return -ENOMEM;
+	debugfs_create_file("edid_override", S_IRUGO | S_IWUSR, root, connector,
+			    &drm_edid_fops);
 }
 
 void drm_debugfs_connector_remove(struct drm_connector *connector)
@@ -482,7 +451,7 @@ void drm_debugfs_connector_remove(struct drm_connector *connector)
 	connector->debugfs_entry = NULL;
 }
 
-int drm_debugfs_crtc_add(struct drm_crtc *crtc)
+void drm_debugfs_crtc_add(struct drm_crtc *crtc)
 {
 	struct drm_minor *minor = crtc->dev->primary;
 	struct dentry *root;
@@ -490,23 +459,14 @@ int drm_debugfs_crtc_add(struct drm_crtc *crtc)
 
 	name = kasprintf(GFP_KERNEL, "crtc-%d", crtc->index);
 	if (!name)
-		return -ENOMEM;
+		return;
 
 	root = debugfs_create_dir(name, minor->debugfs_root);
 	kfree(name);
-	if (!root)
-		return -ENOMEM;
 
 	crtc->debugfs_entry = root;
 
-	if (drm_debugfs_crtc_crc_add(crtc))
-		goto error;
-
-	return 0;
-
-error:
-	drm_debugfs_crtc_remove(crtc);
-	return -ENOMEM;
+	drm_debugfs_crtc_crc_add(crtc);
 }
 
 void drm_debugfs_crtc_remove(struct drm_crtc *crtc)
diff --git a/drivers/gpu/drm/drm_debugfs_crc.c b/drivers/gpu/drm/drm_debugfs_crc.c
index 00e743153e94..7ca486d750e9 100644
--- a/drivers/gpu/drm/drm_debugfs_crc.c
+++ b/drivers/gpu/drm/drm_debugfs_crc.c
@@ -29,7 +29,14 @@
 #include <linux/circ_buf.h>
 #include <linux/ctype.h>
 #include <linux/debugfs.h>
-#include <drm/drmP.h>
+#include <linux/poll.h>
+#include <linux/uaccess.h>
+
+#include <drm/drm_crtc.h>
+#include <drm/drm_debugfs_crc.h>
+#include <drm/drm_drv.h>
+#include <drm/drm_print.h>
+
 #include "drm_internal.h"
 
 /**
@@ -344,33 +351,19 @@ static const struct file_operations drm_crtc_crc_data_fops = {
 	.release = crtc_crc_release,
 };
 
-int drm_debugfs_crtc_crc_add(struct drm_crtc *crtc)
+void drm_debugfs_crtc_crc_add(struct drm_crtc *crtc)
 {
-	struct dentry *crc_ent, *ent;
+	struct dentry *crc_ent;
 
 	if (!crtc->funcs->set_crc_source || !crtc->funcs->verify_crc_source)
-		return 0;
+		return;
 
 	crc_ent = debugfs_create_dir("crc", crtc->debugfs_entry);
-	if (!crc_ent)
-		return -ENOMEM;
-
-	ent = debugfs_create_file("control", S_IRUGO, crc_ent, crtc,
-				  &drm_crtc_crc_control_fops);
-	if (!ent)
-		goto error;
-
-	ent = debugfs_create_file("data", S_IRUGO, crc_ent, crtc,
-				  &drm_crtc_crc_data_fops);
-	if (!ent)
-		goto error;
-
-	return 0;
-
-error:
-	debugfs_remove_recursive(crc_ent);
 
-	return -ENOMEM;
+	debugfs_create_file("control", S_IRUGO, crc_ent, crtc,
+			    &drm_crtc_crc_control_fops);
+	debugfs_create_file("data", S_IRUGO, crc_ent, crtc,
+			    &drm_crtc_crc_data_fops);
 }
 
 /**
@@ -389,12 +382,13 @@ int drm_crtc_add_crc_entry(struct drm_crtc *crtc, bool has_frame,
 	struct drm_crtc_crc *crc = &crtc->crc;
 	struct drm_crtc_crc_entry *entry;
 	int head, tail;
+	unsigned long flags;
 
-	spin_lock(&crc->lock);
+	spin_lock_irqsave(&crc->lock, flags);
 
 	/* Caller may not have noticed yet that userspace has stopped reading */
 	if (!crc->entries) {
-		spin_unlock(&crc->lock);
+		spin_unlock_irqrestore(&crc->lock, flags);
 		return -EINVAL;
 	}
 
@@ -405,7 +399,7 @@ int drm_crtc_add_crc_entry(struct drm_crtc *crtc, bool has_frame,
 		bool was_overflow = crc->overflow;
 
 		crc->overflow = true;
-		spin_unlock(&crc->lock);
+		spin_unlock_irqrestore(&crc->lock, flags);
 
 		if (!was_overflow)
 			DRM_ERROR("Overflow of CRC buffer, userspace reads too slow.\n");
@@ -421,7 +415,7 @@ int drm_crtc_add_crc_entry(struct drm_crtc *crtc, bool has_frame,
 	head = (head + 1) & (DRM_CRC_ENTRIES_NR - 1);
 	crc->head = head;
 
-	spin_unlock(&crc->lock);
+	spin_unlock_irqrestore(&crc->lock, flags);
 
 	wake_up_interruptible(&crc->wq);
 
diff --git a/drivers/gpu/drm/drm_dma.c b/drivers/gpu/drm/drm_dma.c
index 3f83e2ca80ad..5ef0227eaa0e 100644
--- a/drivers/gpu/drm/drm_dma.c
+++ b/drivers/gpu/drm/drm_dma.c
@@ -34,7 +34,11 @@
  */
 
 #include <linux/export.h>
-#include <drm/drmP.h>
+
+#include <drm/drm_drv.h>
+#include <drm/drm_pci.h>
+#include <drm/drm_print.h>
+
 #include "drm_legacy.h"
 
 /**
diff --git a/drivers/gpu/drm/drm_dp_aux_dev.c b/drivers/gpu/drm/drm_dp_aux_dev.c
index 0e4f25d63fd2..5be28e3295f3 100644
--- a/drivers/gpu/drm/drm_dp_aux_dev.c
+++ b/drivers/gpu/drm/drm_dp_aux_dev.c
@@ -27,15 +27,17 @@
 
 #include <linux/device.h>
 #include <linux/fs.h>
-#include <linux/slab.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/sched/signal.h>
+#include <linux/slab.h>
 #include <linux/uaccess.h>
 #include <linux/uio.h>
-#include <drm/drm_dp_helper.h>
+
 #include <drm/drm_crtc.h>
-#include <drm/drmP.h>
+#include <drm/drm_dp_helper.h>
+#include <drm/drm_print.h>
 
 #include "drm_crtc_helper_internal.h"
 
diff --git a/drivers/gpu/drm/drm_dp_dual_mode_helper.c b/drivers/gpu/drm/drm_dp_dual_mode_helper.c
index e7f4fe2848a5..1c9ea9f7fdaf 100644
--- a/drivers/gpu/drm/drm_dp_dual_mode_helper.c
+++ b/drivers/gpu/drm/drm_dp_dual_mode_helper.c
@@ -20,13 +20,15 @@
  * OTHER DEALINGS IN THE SOFTWARE.
  */
 
+#include <linux/delay.h>
 #include <linux/errno.h>
 #include <linux/export.h>
 #include <linux/i2c.h>
 #include <linux/slab.h>
 #include <linux/string.h>
+
 #include <drm/drm_dp_dual_mode_helper.h>
-#include <drm/drmP.h>
+#include <drm/drm_print.h>
 
 /**
  * DOC: dp dual mode helpers
diff --git a/drivers/gpu/drm/drm_dp_helper.c b/drivers/gpu/drm/drm_dp_helper.c
index 54a6414c5d96..0b994d083a89 100644
--- a/drivers/gpu/drm/drm_dp_helper.c
+++ b/drivers/gpu/drm/drm_dp_helper.c
@@ -20,16 +20,18 @@
  * OF THIS SOFTWARE.
  */
 
-#include <linux/kernel.h>
-#include <linux/module.h>
 #include <linux/delay.h>
-#include <linux/init.h>
 #include <linux/errno.h>
-#include <linux/sched.h>
 #include <linux/i2c.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/sched.h>
 #include <linux/seq_file.h>
+
 #include <drm/drm_dp_helper.h>
-#include <drm/drmP.h>
+#include <drm/drm_print.h>
+#include <drm/drm_vblank.h>
 
 #include "drm_crtc_helper_internal.h"
 
@@ -1278,7 +1280,9 @@ static const struct dpcd_quirk dpcd_quirk_list[] = {
 	/* LG LP140WF6-SPM1 eDP panel */
 	{ OUI(0x00, 0x22, 0xb9), DEVICE_ID('s', 'i', 'v', 'a', 'r', 'T'), false, BIT(DP_DPCD_QUIRK_CONSTANT_N) },
 	/* Apple panels need some additional handling to support PSR */
-	{ OUI(0x00, 0x10, 0xfa), DEVICE_ID_ANY, false, BIT(DP_DPCD_QUIRK_NO_PSR) }
+	{ OUI(0x00, 0x10, 0xfa), DEVICE_ID_ANY, false, BIT(DP_DPCD_QUIRK_NO_PSR) },
+	/* CH7511 seems to leave SINK_COUNT zeroed */
+	{ OUI(0x00, 0x00, 0x00), DEVICE_ID('C', 'H', '7', '5', '1', '1'), false, BIT(DP_DPCD_QUIRK_NO_SINK_COUNT) },
 };
 
 #undef OUI
diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c
index c630ed157994..0984b9a34d55 100644
--- a/drivers/gpu/drm/drm_dp_mst_topology.c
+++ b/drivers/gpu/drm/drm_dp_mst_topology.c
@@ -20,19 +20,20 @@
  * OF THIS SOFTWARE.
  */
 
-#include <linux/kernel.h>
 #include <linux/delay.h>
-#include <linux/init.h>
 #include <linux/errno.h>
+#include <linux/i2c.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
 #include <linux/sched.h>
 #include <linux/seq_file.h>
-#include <linux/i2c.h>
-#include <drm/drm_dp_mst_helper.h>
-#include <drm/drmP.h>
 
-#include <drm/drm_fixed.h>
 #include <drm/drm_atomic.h>
 #include <drm/drm_atomic_helper.h>
+#include <drm/drm_dp_mst_helper.h>
+#include <drm/drm_drv.h>
+#include <drm/drm_fixed.h>
+#include <drm/drm_print.h>
 #include <drm/drm_probe_helper.h>
 
 /**
@@ -1995,7 +1996,11 @@ static void process_single_up_tx_qlock(struct drm_dp_mst_topology_mgr *mgr,
 	if (ret != 1)
 		DRM_DEBUG_KMS("failed to send msg in q %d\n", ret);
 
-	txmsg->dst->tx_slots[txmsg->seqno] = NULL;
+	if (txmsg->seqno != -1) {
+		WARN_ON((unsigned int)txmsg->seqno >
+			ARRAY_SIZE(txmsg->dst->tx_slots));
+		txmsg->dst->tx_slots[txmsg->seqno] = NULL;
+	}
 }
 
 static void drm_dp_queue_down_tx(struct drm_dp_mst_topology_mgr *mgr,
diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c
index 862621494a93..fe0ce86c280f 100644
--- a/drivers/gpu/drm/drm_drv.c
+++ b/drivers/gpu/drm/drm_drv.c
@@ -35,16 +35,19 @@
 #include <linux/srcu.h>
 
 #include <drm/drm_client.h>
+#include <drm/drm_color_mgmt.h>
 #include <drm/drm_drv.h>
-#include <drm/drmP.h>
+#include <drm/drm_file.h>
+#include <drm/drm_mode_object.h>
+#include <drm/drm_print.h>
 
 #include "drm_crtc_internal.h"
-#include "drm_legacy.h"
 #include "drm_internal.h"
+#include "drm_legacy.h"
 
 /*
  * drm_debug: Enable debug output.
- * Bitmask of DRM_UT_x. See include/drm/drmP.h for details.
+ * Bitmask of DRM_UT_x. See include/drm/drm_print.h for details.
  */
 unsigned int drm_debug = 0;
 EXPORT_SYMBOL(drm_debug);
@@ -1161,11 +1164,6 @@ static int __init drm_core_init(void)
 	}
 
 	drm_debugfs_root = debugfs_create_dir("dri", NULL);
-	if (!drm_debugfs_root) {
-		ret = -ENOMEM;
-		DRM_ERROR("Cannot create debugfs-root: %d\n", ret);
-		goto error;
-	}
 
 	ret = register_chrdev(DRM_MAJOR, "drm", &drm_stub_fops);
 	if (ret < 0)
diff --git a/drivers/gpu/drm/drm_dumb_buffers.c b/drivers/gpu/drm/drm_dumb_buffers.c
index 81dfdd33753a..d18a740fe0f1 100644
--- a/drivers/gpu/drm/drm_dumb_buffers.c
+++ b/drivers/gpu/drm/drm_dumb_buffers.c
@@ -23,8 +23,10 @@
  * OF THIS SOFTWARE.
  */
 
-#include <drm/drmP.h>
+#include <drm/drm_device.h>
+#include <drm/drm_drv.h>
 #include <drm/drm_gem.h>
+#include <drm/drm_mode.h>
 
 #include "drm_crtc_internal.h"
 
diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c
index 649cfd8b4200..82a4ceed3fcf 100644
--- a/drivers/gpu/drm/drm_edid.c
+++ b/drivers/gpu/drm/drm_edid.c
@@ -27,16 +27,19 @@
  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  * DEALINGS IN THE SOFTWARE.
  */
-#include <linux/kernel.h>
-#include <linux/slab.h>
+
 #include <linux/hdmi.h>
 #include <linux/i2c.h>
+#include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/slab.h>
 #include <linux/vga_switcheroo.h>
-#include <drm/drmP.h>
+
+#include <drm/drm_displayid.h>
+#include <drm/drm_drv.h>
 #include <drm/drm_edid.h>
 #include <drm/drm_encoder.h>
-#include <drm/drm_displayid.h>
+#include <drm/drm_print.h>
 #include <drm/drm_scdc_helper.h>
 
 #include "drm_crtc_internal.h"
@@ -1339,6 +1342,7 @@ MODULE_PARM_DESC(edid_fixup,
 
 static void drm_get_displayid(struct drm_connector *connector,
 			      struct edid *edid);
+static int validate_displayid(u8 *displayid, int length, int idx);
 
 static int drm_edid_block_checksum(const u8 *raw_edid)
 {
@@ -1570,6 +1574,50 @@ static void connector_bad_edid(struct drm_connector *connector,
 	}
 }
 
+/* Get override or firmware EDID */
+static struct edid *drm_get_override_edid(struct drm_connector *connector)
+{
+	struct edid *override = NULL;
+
+	if (connector->override_edid)
+		override = drm_edid_duplicate(connector->edid_blob_ptr->data);
+
+	if (!override)
+		override = drm_load_edid_firmware(connector);
+
+	return IS_ERR(override) ? NULL : override;
+}
+
+/**
+ * drm_add_override_edid_modes - add modes from override/firmware EDID
+ * @connector: connector we're probing
+ *
+ * Add modes from the override/firmware EDID, if available. Only to be used from
+ * drm_helper_probe_single_connector_modes() as a fallback for when DDC probe
+ * failed during drm_get_edid() and caused the override/firmware EDID to be
+ * skipped.
+ *
+ * Return: The number of modes added or 0 if we couldn't find any.
+ */
+int drm_add_override_edid_modes(struct drm_connector *connector)
+{
+	struct edid *override;
+	int num_modes = 0;
+
+	override = drm_get_override_edid(connector);
+	if (override) {
+		drm_connector_update_edid_property(connector, override);
+		num_modes = drm_add_edid_modes(connector, override);
+		kfree(override);
+
+		DRM_DEBUG_KMS("[CONNECTOR:%d:%s] adding %d modes via fallback override/firmware EDID\n",
+			      connector->base.id, connector->name, num_modes);
+	}
+
+	return num_modes;
+}
+EXPORT_SYMBOL(drm_add_override_edid_modes);
+
 /**
  * drm_do_get_edid - get EDID data using a custom EDID block read function
  * @connector: connector we're probing
@@ -1597,15 +1645,10 @@ struct edid *drm_do_get_edid(struct drm_connector *connector,
 {
 	int i, j = 0, valid_extensions = 0;
 	u8 *edid, *new;
-	struct edid *override = NULL;
+	struct edid *override;
 
-	if (connector->override_edid)
-		override = drm_edid_duplicate(connector->edid_blob_ptr->data);
-
-	if (!override)
-		override = drm_load_edid_firmware(connector);
-
-	if (!IS_ERR_OR_NULL(override))
+	override = drm_get_override_edid(connector);
+	if (override)
 		return override;
 
 	if ((edid = kmalloc(EDID_LENGTH, GFP_KERNEL)) == NULL)
@@ -2849,6 +2892,7 @@ add_detailed_modes(struct drm_connector *connector, struct edid *edid,
 #define VIDEO_BLOCK     0x02
 #define VENDOR_BLOCK    0x03
 #define SPEAKER_BLOCK	0x04
+#define HDR_STATIC_METADATA_BLOCK	0x6
 #define USE_EXTENDED_TAG 0x07
 #define EXT_VIDEO_CAPABILITY_BLOCK 0x00
 #define EXT_VIDEO_DATA_BLOCK_420	0x0E
@@ -2883,16 +2927,46 @@ static u8 *drm_find_edid_extension(const struct edid *edid, int ext_id)
 	return edid_ext;
 }
 
-static u8 *drm_find_cea_extension(const struct edid *edid)
-{
-	return drm_find_edid_extension(edid, CEA_EXT);
-}
 
 static u8 *drm_find_displayid_extension(const struct edid *edid)
 {
 	return drm_find_edid_extension(edid, DISPLAYID_EXT);
 }
 
+static u8 *drm_find_cea_extension(const struct edid *edid)
+{
+	int ret;
+	int idx = 1;
+	int length = EDID_LENGTH;
+	struct displayid_block *block;
+	u8 *cea;
+	u8 *displayid;
+
+	/* Look for a top level CEA extension block */
+	cea = drm_find_edid_extension(edid, CEA_EXT);
+	if (cea)
+		return cea;
+
+	/* CEA blocks can also be found embedded in a DisplayID block */
+	displayid = drm_find_displayid_extension(edid);
+	if (!displayid)
+		return NULL;
+
+	ret = validate_displayid(displayid, length, idx);
+	if (ret)
+		return NULL;
+
+	idx += sizeof(struct displayid_hdr);
+	for_each_displayid_db(displayid, block, idx, length) {
+		if (block->tag == DATA_BLOCK_CTA) {
+			cea = (u8 *)block;
+			break;
+		}
+	}
+
+	return cea;
+}
+
 /*
  * Calculate the alternate clock for the CEA mode
  * (60Hz vs. 59.94Hz etc.)
@@ -3616,13 +3690,38 @@ cea_revision(const u8 *cea)
 static int
 cea_db_offsets(const u8 *cea, int *start, int *end)
 {
-	/* Data block offset in CEA extension block */
-	*start = 4;
-	*end = cea[2];
-	if (*end == 0)
-		*end = 127;
-	if (*end < 4 || *end > 127)
-		return -ERANGE;
+	/* DisplayID CTA extension blocks and top-level CEA EDID
+	 * block header definitions differ in the following bytes:
+	 *   1) Byte 2 of the header specifies length differently,
+	 *   2) Byte 3 is only present in the CEA top level block.
+	 *
+	 * The different definitions for byte 2 follow.
+	 *
+	 * DisplayID CTA extension block defines byte 2 as:
+	 *   Number of payload bytes
+	 *
+	 * CEA EDID block defines byte 2 as:
+	 *   Byte number (decimal) within this block where the 18-byte
+	 *   DTDs begin. If no non-DTD data is present in this extension
+	 *   block, the value should be set to 04h (the byte after next).
+	 *   If set to 00h, there are no DTDs present in this block and
+	 *   no non-DTD data.
+	 */
+	if (cea[0] == DATA_BLOCK_CTA) {
+		*start = 3;
+		*end = *start + cea[2];
+	} else if (cea[0] == CEA_EXT) {
+		/* Data block offset in CEA extension block */
+		*start = 4;
+		*end = cea[2];
+		if (*end == 0)
+			*end = 127;
+		if (*end < 4 || *end > 127)
+			return -ERANGE;
+	} else {
+		return -ENOTSUPP;
+	}
+
 	return 0;
 }
 
@@ -3831,6 +3930,55 @@ static void fixup_detailed_cea_mode_clock(struct drm_display_mode *mode)
 	mode->clock = clock;
 }
 
+static bool cea_db_is_hdmi_hdr_metadata_block(const u8 *db)
+{
+	if (cea_db_tag(db) != USE_EXTENDED_TAG)
+		return false;
+
+	if (db[1] != HDR_STATIC_METADATA_BLOCK)
+		return false;
+
+	if (cea_db_payload_len(db) < 3)
+		return false;
+
+	return true;
+}
+
+static uint8_t eotf_supported(const u8 *edid_ext)
+{
+	return edid_ext[2] &
+		(BIT(HDMI_EOTF_TRADITIONAL_GAMMA_SDR) |
+		 BIT(HDMI_EOTF_TRADITIONAL_GAMMA_HDR) |
+		 BIT(HDMI_EOTF_SMPTE_ST2084) |
+		 BIT(HDMI_EOTF_BT_2100_HLG));
+}
+
+static uint8_t hdr_metadata_type(const u8 *edid_ext)
+{
+	return edid_ext[3] &
+		BIT(HDMI_STATIC_METADATA_TYPE1);
+}
+
+static void
+drm_parse_hdr_metadata_block(struct drm_connector *connector, const u8 *db)
+{
+	u16 len;
+
+	len = cea_db_payload_len(db);
+
+	connector->hdr_sink_metadata.hdmi_type1.eotf =
+						eotf_supported(db);
+	connector->hdr_sink_metadata.hdmi_type1.metadata_type =
+						hdr_metadata_type(db);
+
+	if (len >= 4)
+		connector->hdr_sink_metadata.hdmi_type1.max_cll = db[4];
+	if (len >= 5)
+		connector->hdr_sink_metadata.hdmi_type1.max_fall = db[5];
+	if (len >= 6)
+		connector->hdr_sink_metadata.hdmi_type1.min_cll = db[6];
+}
+
 static void
 drm_parse_hdmi_vsdb_audio(struct drm_connector *connector, const u8 *db)
 {
@@ -4458,6 +4606,8 @@ static void drm_parse_cea_ext(struct drm_connector *connector,
 			drm_parse_y420cmdb_bitmap(connector, db);
 		if (cea_db_is_vcdb(db))
 			drm_parse_vcdb(connector, db);
+		if (cea_db_is_hdmi_hdr_metadata_block(db))
+			drm_parse_hdr_metadata_block(connector, db);
 	}
 }
 
@@ -4514,8 +4664,8 @@ u32 drm_add_display_info(struct drm_connector *connector, const struct edid *edi
 	 * tells us to assume 8 bpc color depth if the EDID doesn't have
 	 * extensions which tell otherwise.
 	 */
-	if ((info->bpc == 0) && (edid->revision < 4) &&
-	    (edid->input & DRM_EDID_DIGITAL_TYPE_DVI)) {
+	if (info->bpc == 0 && edid->revision == 3 &&
+	    edid->input & DRM_EDID_DIGITAL_DFP_1_X) {
 		info->bpc = 8;
 		DRM_DEBUG("%s: Assigning DFP sink color depth as %d bpc.\n",
 			  connector->name, info->bpc);
@@ -4674,11 +4824,7 @@ static int add_displayid_detailed_modes(struct drm_connector *connector,
 		return 0;
 
 	idx += sizeof(struct displayid_hdr);
-	while (block = (struct displayid_block *)&displayid[idx],
-	       idx + sizeof(struct displayid_block) <= length &&
-	       idx + sizeof(struct displayid_block) + block->num_bytes <= length &&
-	       block->num_bytes > 0) {
-		idx += block->num_bytes + sizeof(struct displayid_block);
+	for_each_displayid_db(displayid, block, idx, length) {
 		switch (block->tag) {
 		case DATA_BLOCK_TYPE_1_DETAILED_TIMING:
 			num_modes += add_displayid_detailed_1_modes(connector, block);
@@ -4850,6 +4996,78 @@ static bool is_hdmi2_sink(struct drm_connector *connector)
 		connector->display_info.color_formats & DRM_COLOR_FORMAT_YCRCB420;
 }
 
+static inline bool is_eotf_supported(u8 output_eotf, u8 sink_eotf)
+{
+	return sink_eotf & BIT(output_eotf);
+}
+
+/**
+ * drm_hdmi_infoframe_set_hdr_metadata() - fill an HDMI DRM infoframe with
+ *                                         HDR metadata from userspace
+ * @frame: HDMI DRM infoframe
+ * @conn_state: Connector state containing HDR metadata
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int
+drm_hdmi_infoframe_set_hdr_metadata(struct hdmi_drm_infoframe *frame,
+				    const struct drm_connector_state *conn_state)
+{
+	struct drm_connector *connector;
+	struct hdr_output_metadata *hdr_metadata;
+	int err;
+
+	if (!frame || !conn_state)
+		return -EINVAL;
+
+	connector = conn_state->connector;
+
+	if (!conn_state->hdr_output_metadata)
+		return -EINVAL;
+
+	hdr_metadata = conn_state->hdr_output_metadata->data;
+
+	if (!hdr_metadata || !connector)
+		return -EINVAL;
+
+	/* Sink EOTF is Bit map while infoframe is absolute values */
+	if (!is_eotf_supported(hdr_metadata->hdmi_metadata_type1.eotf,
+	    connector->hdr_sink_metadata.hdmi_type1.eotf)) {
+		DRM_DEBUG_KMS("EOTF Not Supported\n");
+		return -EINVAL;
+	}
+
+	err = hdmi_drm_infoframe_init(frame);
+	if (err < 0)
+		return err;
+
+	frame->eotf = hdr_metadata->hdmi_metadata_type1.eotf;
+	frame->metadata_type = hdr_metadata->hdmi_metadata_type1.metadata_type;
+
+	BUILD_BUG_ON(sizeof(frame->display_primaries) !=
+		     sizeof(hdr_metadata->hdmi_metadata_type1.display_primaries));
+	BUILD_BUG_ON(sizeof(frame->white_point) !=
+		     sizeof(hdr_metadata->hdmi_metadata_type1.white_point));
+
+	memcpy(&frame->display_primaries,
+	       &hdr_metadata->hdmi_metadata_type1.display_primaries,
+	       sizeof(frame->display_primaries));
+
+	memcpy(&frame->white_point,
+	       &hdr_metadata->hdmi_metadata_type1.white_point,
+	       sizeof(frame->white_point));
+
+	frame->max_display_mastering_luminance =
+		hdr_metadata->hdmi_metadata_type1.max_display_mastering_luminance;
+	frame->min_display_mastering_luminance =
+		hdr_metadata->hdmi_metadata_type1.min_display_mastering_luminance;
+	frame->max_fall = hdr_metadata->hdmi_metadata_type1.max_fall;
+	frame->max_cll = hdr_metadata->hdmi_metadata_type1.max_cll;
+
+	return 0;
+}
+EXPORT_SYMBOL(drm_hdmi_infoframe_set_hdr_metadata);
+
 /**
  * drm_hdmi_avi_infoframe_from_display_mode() - fill an HDMI AVI infoframe with
  *                                              data from a DRM display mode
@@ -5223,11 +5441,7 @@ static int drm_parse_display_id(struct drm_connector *connector,
 		return ret;
 
 	idx += sizeof(struct displayid_hdr);
-	while (block = (struct displayid_block *)&displayid[idx],
-	       idx + sizeof(struct displayid_block) <= length &&
-	       idx + sizeof(struct displayid_block) + block->num_bytes <= length &&
-	       block->num_bytes > 0) {
-		idx += block->num_bytes + sizeof(struct displayid_block);
+	for_each_displayid_db(displayid, block, idx, length) {
 		DRM_DEBUG_KMS("block id 0x%x, rev %d, len %d\n",
 			      block->tag, block->rev, block->num_bytes);
 
@@ -5240,6 +5454,9 @@ static int drm_parse_display_id(struct drm_connector *connector,
 		case DATA_BLOCK_TYPE_1_DETAILED_TIMING:
 			/* handled in mode gathering code. */
 			break;
+		case DATA_BLOCK_CTA:
+			/* handled in the cea parser code. */
+			break;
 		default:
 			DRM_DEBUG_KMS("found DisplayID tag 0x%x, unhandled\n", block->tag);
 			break;
diff --git a/drivers/gpu/drm/drm_edid_load.c b/drivers/gpu/drm/drm_edid_load.c
index 1e5593575d23..d38b3b255926 100644
--- a/drivers/gpu/drm/drm_edid_load.c
+++ b/drivers/gpu/drm/drm_edid_load.c
@@ -7,12 +7,15 @@
 
 */
 
-#include <linux/module.h>
 #include <linux/firmware.h>
-#include <drm/drmP.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+
 #include <drm/drm_crtc.h>
 #include <drm/drm_crtc_helper.h>
+#include <drm/drm_drv.h>
 #include <drm/drm_edid.h>
+#include <drm/drm_print.h>
 
 static char edid_firmware[PATH_MAX];
 module_param_string(edid_firmware, edid_firmware, sizeof(edid_firmware), 0644);
@@ -278,6 +281,8 @@ struct edid *drm_load_edid_firmware(struct drm_connector *connector)
 	 * the last one found one as a fallback.
 	 */
 	fwstr = kstrdup(edid_firmware, GFP_KERNEL);
+	if (!fwstr)
+		return ERR_PTR(-ENOMEM);
 	edidstr = fwstr;
 
 	while ((edidname = strsep(&edidstr, ","))) {
diff --git a/drivers/gpu/drm/drm_encoder.c b/drivers/gpu/drm/drm_encoder.c
index b694fb57eaa4..7fb47b7b8b44 100644
--- a/drivers/gpu/drm/drm_encoder.c
+++ b/drivers/gpu/drm/drm_encoder.c
@@ -21,7 +21,9 @@
  */
 
 #include <linux/export.h>
-#include <drm/drmP.h>
+
+#include <drm/drm_device.h>
+#include <drm/drm_drv.h>
 #include <drm/drm_encoder.h>
 
 #include "drm_crtc_internal.h"
diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c
index 498f95c3e81d..1984e5c54d58 100644
--- a/drivers/gpu/drm/drm_fb_helper.c
+++ b/drivers/gpu/drm/drm_fb_helper.c
@@ -32,18 +32,21 @@
 #include <linux/console.h>
 #include <linux/dma-buf.h>
 #include <linux/kernel.h>
-#include <linux/sysrq.h>
-#include <linux/slab.h>
 #include <linux/module.h>
-#include <drm/drmP.h>
+#include <linux/slab.h>
+#include <linux/sysrq.h>
+#include <linux/vmalloc.h>
+
+#include <drm/drm_atomic.h>
 #include <drm/drm_crtc.h>
-#include <drm/drm_fb_helper.h>
 #include <drm/drm_crtc_helper.h>
-#include <drm/drm_atomic.h>
-#include <drm/drm_atomic_helper.h>
+#include <drm/drm_drv.h>
+#include <drm/drm_fb_helper.h>
+#include <drm/drm_fourcc.h>
+#include <drm/drm_print.h>
+#include <drm/drm_vblank.h>
 
-#include "drm_crtc_internal.h"
-#include "drm_crtc_helper_internal.h"
+#include "drm_internal.h"
 
 static bool drm_fbdev_emulation = true;
 module_param_named(fbdev_emulation, drm_fbdev_emulation, bool, 0600);
@@ -92,12 +95,6 @@ static DEFINE_MUTEX(kernel_fb_helper_lock);
  * Setup fbdev emulation by calling drm_fb_helper_fbdev_setup() and tear it
  * down by calling drm_fb_helper_fbdev_teardown().
  *
- * Drivers that need to handle connector hotplugging (e.g. dp mst) can't use
- * the setup helper and will need to do the whole four-step setup process with
- * drm_fb_helper_prepare(), drm_fb_helper_init(),
- * drm_fb_helper_single_add_all_connectors(), enable hotplugging and
- * drm_fb_helper_initial_config() to avoid a possible race window.
- *
  * At runtime drivers should restore the fbdev console by using
  * drm_fb_helper_lastclose() as their &drm_driver.lastclose callback.
  * They should also notify the fb helper code from updates to the output
@@ -120,8 +117,7 @@ static DEFINE_MUTEX(kernel_fb_helper_lock);
  * encoders and connectors. To finish up the fbdev helper initialization, the
  * drm_fb_helper_init() function is called. To probe for all attached displays
  * and set up an initial configuration using the detected hardware, drivers
- * should call drm_fb_helper_single_add_all_connectors() followed by
- * drm_fb_helper_initial_config().
+ * should call drm_fb_helper_initial_config().
  *
  * If &drm_framebuffer_funcs.dirty is set, the
  * drm_fb_helper_{cfb,sys}_{write,fillrect,copyarea,imageblit} functions will
@@ -134,165 +130,6 @@ static DEFINE_MUTEX(kernel_fb_helper_lock);
  * deferred I/O (coupled with drm_fb_helper_fbdev_teardown()).
  */
 
-#define drm_fb_helper_for_each_connector(fbh, i__) \
-	for (({ lockdep_assert_held(&(fbh)->lock); }), \
-	     i__ = 0; i__ < (fbh)->connector_count; i__++)
-
-static int __drm_fb_helper_add_one_connector(struct drm_fb_helper *fb_helper,
-					     struct drm_connector *connector)
-{
-	struct drm_fb_helper_connector *fb_conn;
-	struct drm_fb_helper_connector **temp;
-	unsigned int count;
-
-	if (!drm_fbdev_emulation)
-		return 0;
-
-	lockdep_assert_held(&fb_helper->lock);
-
-	count = fb_helper->connector_count + 1;
-
-	if (count > fb_helper->connector_info_alloc_count) {
-		size_t size = count * sizeof(fb_conn);
-
-		temp = krealloc(fb_helper->connector_info, size, GFP_KERNEL);
-		if (!temp)
-			return -ENOMEM;
-
-		fb_helper->connector_info_alloc_count = count;
-		fb_helper->connector_info = temp;
-	}
-
-	fb_conn = kzalloc(sizeof(*fb_conn), GFP_KERNEL);
-	if (!fb_conn)
-		return -ENOMEM;
-
-	drm_connector_get(connector);
-	fb_conn->connector = connector;
-	fb_helper->connector_info[fb_helper->connector_count++] = fb_conn;
-
-	return 0;
-}
-
-int drm_fb_helper_add_one_connector(struct drm_fb_helper *fb_helper,
-				    struct drm_connector *connector)
-{
-	int err;
-
-	if (!fb_helper)
-		return 0;
-
-	mutex_lock(&fb_helper->lock);
-	err = __drm_fb_helper_add_one_connector(fb_helper, connector);
-	mutex_unlock(&fb_helper->lock);
-
-	return err;
-}
-EXPORT_SYMBOL(drm_fb_helper_add_one_connector);
-
-/**
- * drm_fb_helper_single_add_all_connectors() - add all connectors to fbdev
- * 					       emulation helper
- * @fb_helper: fbdev initialized with drm_fb_helper_init, can be NULL
- *
- * This functions adds all the available connectors for use with the given
- * fb_helper. This is a separate step to allow drivers to freely assign
- * connectors to the fbdev, e.g. if some are reserved for special purposes or
- * not adequate to be used for the fbcon.
- *
- * This function is protected against concurrent connector hotadds/removals
- * using drm_fb_helper_add_one_connector() and
- * drm_fb_helper_remove_one_connector().
- */
-int drm_fb_helper_single_add_all_connectors(struct drm_fb_helper *fb_helper)
-{
-	struct drm_device *dev;
-	struct drm_connector *connector;
-	struct drm_connector_list_iter conn_iter;
-	int i, ret = 0;
-
-	if (!drm_fbdev_emulation || !fb_helper)
-		return 0;
-
-	dev = fb_helper->dev;
-
-	mutex_lock(&fb_helper->lock);
-	drm_connector_list_iter_begin(dev, &conn_iter);
-	drm_for_each_connector_iter(connector, &conn_iter) {
-		if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK)
-			continue;
-
-		ret = __drm_fb_helper_add_one_connector(fb_helper, connector);
-		if (ret)
-			goto fail;
-	}
-	goto out;
-
-fail:
-	drm_fb_helper_for_each_connector(fb_helper, i) {
-		struct drm_fb_helper_connector *fb_helper_connector =
-			fb_helper->connector_info[i];
-
-		drm_connector_put(fb_helper_connector->connector);
-
-		kfree(fb_helper_connector);
-		fb_helper->connector_info[i] = NULL;
-	}
-	fb_helper->connector_count = 0;
-out:
-	drm_connector_list_iter_end(&conn_iter);
-	mutex_unlock(&fb_helper->lock);
-
-	return ret;
-}
-EXPORT_SYMBOL(drm_fb_helper_single_add_all_connectors);
-
-static int __drm_fb_helper_remove_one_connector(struct drm_fb_helper *fb_helper,
-						struct drm_connector *connector)
-{
-	struct drm_fb_helper_connector *fb_helper_connector;
-	int i, j;
-
-	if (!drm_fbdev_emulation)
-		return 0;
-
-	lockdep_assert_held(&fb_helper->lock);
-
-	drm_fb_helper_for_each_connector(fb_helper, i) {
-		if (fb_helper->connector_info[i]->connector == connector)
-			break;
-	}
-
-	if (i == fb_helper->connector_count)
-		return -EINVAL;
-	fb_helper_connector = fb_helper->connector_info[i];
-	drm_connector_put(fb_helper_connector->connector);
-
-	for (j = i + 1; j < fb_helper->connector_count; j++)
-		fb_helper->connector_info[j - 1] = fb_helper->connector_info[j];
-
-	fb_helper->connector_count--;
-	kfree(fb_helper_connector);
-
-	return 0;
-}
-
-int drm_fb_helper_remove_one_connector(struct drm_fb_helper *fb_helper,
-				       struct drm_connector *connector)
-{
-	int err;
-
-	if (!fb_helper)
-		return 0;
-
-	mutex_lock(&fb_helper->lock);
-	err = __drm_fb_helper_remove_one_connector(fb_helper, connector);
-	mutex_unlock(&fb_helper->lock);
-
-	return err;
-}
-EXPORT_SYMBOL(drm_fb_helper_remove_one_connector);
-
 static void drm_fb_helper_restore_lut_atomic(struct drm_crtc *crtc)
 {
 	uint16_t *r_base, *g_base, *b_base;
@@ -316,13 +153,11 @@ int drm_fb_helper_debug_enter(struct fb_info *info)
 {
 	struct drm_fb_helper *helper = info->par;
 	const struct drm_crtc_helper_funcs *funcs;
-	int i;
+	struct drm_mode_set *mode_set;
 
 	list_for_each_entry(helper, &kernel_fb_helper_list, kernel_fb_list) {
-		for (i = 0; i < helper->crtc_count; i++) {
-			struct drm_mode_set *mode_set =
-				&helper->crtc_info[i].mode_set;
-
+		mutex_lock(&helper->client.modeset_mutex);
+		drm_client_for_each_modeset(mode_set, &helper->client) {
 			if (!mode_set->crtc->enabled)
 				continue;
 
@@ -339,6 +174,7 @@ int drm_fb_helper_debug_enter(struct fb_info *info)
 						    mode_set->y,
 						    ENTER_ATOMIC_MODE_SET);
 		}
+		mutex_unlock(&helper->client.modeset_mutex);
 	}
 
 	return 0;
@@ -352,14 +188,14 @@ EXPORT_SYMBOL(drm_fb_helper_debug_enter);
 int drm_fb_helper_debug_leave(struct fb_info *info)
 {
 	struct drm_fb_helper *helper = info->par;
+	struct drm_client_dev *client = &helper->client;
 	struct drm_crtc *crtc;
 	const struct drm_crtc_helper_funcs *funcs;
+	struct drm_mode_set *mode_set;
 	struct drm_framebuffer *fb;
-	int i;
-
-	for (i = 0; i < helper->crtc_count; i++) {
-		struct drm_mode_set *mode_set = &helper->crtc_info[i].mode_set;
 
+	mutex_lock(&client->modeset_mutex);
+	drm_client_for_each_modeset(mode_set, client) {
 		crtc = mode_set->crtc;
 		if (drm_drv_uses_atomic_modeset(crtc->dev))
 			continue;
@@ -382,143 +218,12 @@ int drm_fb_helper_debug_leave(struct fb_info *info)
 		funcs->mode_set_base_atomic(mode_set->crtc, fb, crtc->x,
 					    crtc->y, LEAVE_ATOMIC_MODE_SET);
 	}
+	mutex_unlock(&client->modeset_mutex);
 
 	return 0;
 }
 EXPORT_SYMBOL(drm_fb_helper_debug_leave);
 
-static int restore_fbdev_mode_atomic(struct drm_fb_helper *fb_helper, bool active)
-{
-	struct drm_device *dev = fb_helper->dev;
-	struct drm_plane_state *plane_state;
-	struct drm_plane *plane;
-	struct drm_atomic_state *state;
-	int i, ret;
-	struct drm_modeset_acquire_ctx ctx;
-
-	drm_modeset_acquire_init(&ctx, 0);
-
-	state = drm_atomic_state_alloc(dev);
-	if (!state) {
-		ret = -ENOMEM;
-		goto out_ctx;
-	}
-
-	state->acquire_ctx = &ctx;
-retry:
-	drm_for_each_plane(plane, dev) {
-		plane_state = drm_atomic_get_plane_state(state, plane);
-		if (IS_ERR(plane_state)) {
-			ret = PTR_ERR(plane_state);
-			goto out_state;
-		}
-
-		plane_state->rotation = DRM_MODE_ROTATE_0;
-
-		/* disable non-primary: */
-		if (plane->type == DRM_PLANE_TYPE_PRIMARY)
-			continue;
-
-		ret = __drm_atomic_helper_disable_plane(plane, plane_state);
-		if (ret != 0)
-			goto out_state;
-	}
-
-	for (i = 0; i < fb_helper->crtc_count; i++) {
-		struct drm_mode_set *mode_set = &fb_helper->crtc_info[i].mode_set;
-		struct drm_plane *primary = mode_set->crtc->primary;
-
-		/* Cannot fail as we've already gotten the plane state above */
-		plane_state = drm_atomic_get_new_plane_state(state, primary);
-		plane_state->rotation = fb_helper->crtc_info[i].rotation;
-
-		ret = __drm_atomic_helper_set_config(mode_set, state);
-		if (ret != 0)
-			goto out_state;
-
-		/*
-		 * __drm_atomic_helper_set_config() sets active when a
-		 * mode is set, unconditionally clear it if we force DPMS off
-		 */
-		if (!active) {
-			struct drm_crtc *crtc = mode_set->crtc;
-			struct drm_crtc_state *crtc_state = drm_atomic_get_new_crtc_state(state, crtc);
-
-			crtc_state->active = false;
-		}
-	}
-
-	ret = drm_atomic_commit(state);
-
-out_state:
-	if (ret == -EDEADLK)
-		goto backoff;
-
-	drm_atomic_state_put(state);
-out_ctx:
-	drm_modeset_drop_locks(&ctx);
-	drm_modeset_acquire_fini(&ctx);
-
-	return ret;
-
-backoff:
-	drm_atomic_state_clear(state);
-	drm_modeset_backoff(&ctx);
-
-	goto retry;
-}
-
-static int restore_fbdev_mode_legacy(struct drm_fb_helper *fb_helper)
-{
-	struct drm_device *dev = fb_helper->dev;
-	struct drm_plane *plane;
-	int i, ret = 0;
-
-	drm_modeset_lock_all(fb_helper->dev);
-	drm_for_each_plane(plane, dev) {
-		if (plane->type != DRM_PLANE_TYPE_PRIMARY)
-			drm_plane_force_disable(plane);
-
-		if (plane->rotation_property)
-			drm_mode_plane_set_obj_prop(plane,
-						    plane->rotation_property,
-						    DRM_MODE_ROTATE_0);
-	}
-
-	for (i = 0; i < fb_helper->crtc_count; i++) {
-		struct drm_mode_set *mode_set = &fb_helper->crtc_info[i].mode_set;
-		struct drm_crtc *crtc = mode_set->crtc;
-
-		if (crtc->funcs->cursor_set2) {
-			ret = crtc->funcs->cursor_set2(crtc, NULL, 0, 0, 0, 0, 0);
-			if (ret)
-				goto out;
-		} else if (crtc->funcs->cursor_set) {
-			ret = crtc->funcs->cursor_set(crtc, NULL, 0, 0, 0);
-			if (ret)
-				goto out;
-		}
-
-		ret = drm_mode_set_config_internal(mode_set);
-		if (ret)
-			goto out;
-	}
-out:
-	drm_modeset_unlock_all(fb_helper->dev);
-
-	return ret;
-}
-
-static int restore_fbdev_mode(struct drm_fb_helper *fb_helper)
-{
-	struct drm_device *dev = fb_helper->dev;
-
-	if (drm_drv_uses_atomic_modeset(dev))
-		return restore_fbdev_mode_atomic(fb_helper, true);
-	else
-		return restore_fbdev_mode_legacy(fb_helper);
-}
-
 /**
  * drm_fb_helper_restore_fbdev_mode_unlocked - restore fbdev configuration
  * @fb_helper: driver-allocated fbdev helper, can be NULL
@@ -542,7 +247,17 @@ int drm_fb_helper_restore_fbdev_mode_unlocked(struct drm_fb_helper *fb_helper)
 		return 0;
 
 	mutex_lock(&fb_helper->lock);
-	ret = restore_fbdev_mode(fb_helper);
+	/*
+	 * TODO:
+	 * We should bail out here if there is a master by dropping _force.
+	 * Currently these igt tests fail if we do that:
+	 * - kms_fbcon_fbt@psr
+	 * - kms_fbcon_fbt@psr-suspend
+	 *
+	 * So first these tests need to be fixed so they drop master or don't
+	 * have an fd open.
+	 */
+	ret = drm_client_modeset_commit_force(&fb_helper->client);
 
 	do_delayed = fb_helper->delayed_hotplug;
 	if (do_delayed)
@@ -556,34 +271,6 @@ int drm_fb_helper_restore_fbdev_mode_unlocked(struct drm_fb_helper *fb_helper)
 }
 EXPORT_SYMBOL(drm_fb_helper_restore_fbdev_mode_unlocked);
 
-static bool drm_fb_helper_is_bound(struct drm_fb_helper *fb_helper)
-{
-	struct drm_device *dev = fb_helper->dev;
-	struct drm_crtc *crtc;
-	int bound = 0, crtcs_bound = 0;
-
-	/*
-	 * Sometimes user space wants everything disabled, so don't steal the
-	 * display if there's a master.
-	 */
-	if (READ_ONCE(dev->master))
-		return false;
-
-	drm_for_each_crtc(crtc, dev) {
-		drm_modeset_lock(&crtc->mutex, NULL);
-		if (crtc->primary->fb)
-			crtcs_bound++;
-		if (crtc->primary->fb == fb_helper->fb)
-			bound++;
-		drm_modeset_unlock(&crtc->mutex);
-	}
-
-	if (bound < crtcs_bound)
-		return false;
-
-	return true;
-}
-
 #ifdef CONFIG_MAGIC_SYSRQ
 /*
  * restore fbcon display for all kms driver's using this helper, used for sysrq
@@ -604,7 +291,7 @@ static bool drm_fb_helper_force_kernel_mode(void)
 			continue;
 
 		mutex_lock(&helper->lock);
-		ret = restore_fbdev_mode(helper);
+		ret = drm_client_modeset_commit_force(&helper->client);
 		if (ret)
 			error = true;
 		mutex_unlock(&helper->lock);
@@ -636,47 +323,12 @@ static struct sysrq_key_op sysrq_drm_fb_helper_restore_op = {
 static struct sysrq_key_op sysrq_drm_fb_helper_restore_op = { };
 #endif
 
-static void dpms_legacy(struct drm_fb_helper *fb_helper, int dpms_mode)
-{
-	struct drm_device *dev = fb_helper->dev;
-	struct drm_connector *connector;
-	struct drm_mode_set *modeset;
-	int i, j;
-
-	drm_modeset_lock_all(dev);
-	for (i = 0; i < fb_helper->crtc_count; i++) {
-		modeset = &fb_helper->crtc_info[i].mode_set;
-
-		if (!modeset->crtc->enabled)
-			continue;
-
-		for (j = 0; j < modeset->num_connectors; j++) {
-			connector = modeset->connectors[j];
-			connector->funcs->dpms(connector, dpms_mode);
-			drm_object_property_set_value(&connector->base,
-				dev->mode_config.dpms_property, dpms_mode);
-		}
-	}
-	drm_modeset_unlock_all(dev);
-}
-
 static void drm_fb_helper_dpms(struct fb_info *info, int dpms_mode)
 {
 	struct drm_fb_helper *fb_helper = info->par;
 
-	/*
-	 * For each CRTC in this fb, turn the connectors on/off.
-	 */
 	mutex_lock(&fb_helper->lock);
-	if (!drm_fb_helper_is_bound(fb_helper)) {
-		mutex_unlock(&fb_helper->lock);
-		return;
-	}
-
-	if (drm_drv_uses_atomic_modeset(fb_helper->dev))
-		restore_fbdev_mode_atomic(fb_helper, dpms_mode == DRM_MODE_DPMS_ON);
-	else
-		dpms_legacy(fb_helper, dpms_mode);
+	drm_client_modeset_dpms(&fb_helper->client, dpms_mode);
 	mutex_unlock(&fb_helper->lock);
 }
 
@@ -716,43 +368,6 @@ int drm_fb_helper_blank(int blank, struct fb_info *info)
 }
 EXPORT_SYMBOL(drm_fb_helper_blank);
 
-static void drm_fb_helper_modeset_release(struct drm_fb_helper *helper,
-					  struct drm_mode_set *modeset)
-{
-	int i;
-
-	for (i = 0; i < modeset->num_connectors; i++) {
-		drm_connector_put(modeset->connectors[i]);
-		modeset->connectors[i] = NULL;
-	}
-	modeset->num_connectors = 0;
-
-	drm_mode_destroy(helper->dev, modeset->mode);
-	modeset->mode = NULL;
-
-	/* FIXME should hold a ref? */
-	modeset->fb = NULL;
-}
-
-static void drm_fb_helper_crtc_free(struct drm_fb_helper *helper)
-{
-	int i;
-
-	for (i = 0; i < helper->connector_count; i++) {
-		drm_connector_put(helper->connector_info[i]->connector);
-		kfree(helper->connector_info[i]);
-	}
-	kfree(helper->connector_info);
-
-	for (i = 0; i < helper->crtc_count; i++) {
-		struct drm_mode_set *modeset = &helper->crtc_info[i].mode_set;
-
-		drm_fb_helper_modeset_release(helper, modeset);
-		kfree(modeset->connectors);
-	}
-	kfree(helper->crtc_info);
-}
-
 static void drm_fb_helper_resume_worker(struct work_struct *work)
 {
 	struct drm_fb_helper *helper = container_of(work, struct drm_fb_helper,
@@ -767,7 +382,7 @@ static void drm_fb_helper_dirty_blit_real(struct drm_fb_helper *fb_helper,
 					  struct drm_clip_rect *clip)
 {
 	struct drm_framebuffer *fb = fb_helper->fb;
-	unsigned int cpp = drm_format_plane_cpp(fb->format->format, 0);
+	unsigned int cpp = fb->format->cpp[0];
 	size_t offset = clip->y1 * fb->pitches[0] + clip->x1 * cpp;
 	void *src = fb_helper->fbdev->screen_buffer + offset;
 	void *dst = fb_helper->buffer->vaddr + offset;
@@ -831,7 +446,7 @@ EXPORT_SYMBOL(drm_fb_helper_prepare);
  * drm_fb_helper_init - initialize a &struct drm_fb_helper
  * @dev: drm device
  * @fb_helper: driver-allocated fbdev helper structure to initialize
- * @max_conn_count: max connector count
+ * @max_conn_count: max connector count (not used)
  *
  * This allocates the structures for the fbdev helper with the given limits.
  * Note that this won't yet touch the hardware (through the driver interfaces)
@@ -847,55 +462,26 @@ int drm_fb_helper_init(struct drm_device *dev,
 		       struct drm_fb_helper *fb_helper,
 		       int max_conn_count)
 {
-	struct drm_crtc *crtc;
-	struct drm_mode_config *config = &dev->mode_config;
-	int i;
+	int ret;
 
 	if (!drm_fbdev_emulation) {
 		dev->fb_helper = fb_helper;
 		return 0;
 	}
 
-	if (!max_conn_count)
-		return -EINVAL;
-
-	fb_helper->crtc_info = kcalloc(config->num_crtc, sizeof(struct drm_fb_helper_crtc), GFP_KERNEL);
-	if (!fb_helper->crtc_info)
-		return -ENOMEM;
-
-	fb_helper->crtc_count = config->num_crtc;
-	fb_helper->connector_info = kcalloc(dev->mode_config.num_connector, sizeof(struct drm_fb_helper_connector *), GFP_KERNEL);
-	if (!fb_helper->connector_info) {
-		kfree(fb_helper->crtc_info);
-		return -ENOMEM;
-	}
-	fb_helper->connector_info_alloc_count = dev->mode_config.num_connector;
-	fb_helper->connector_count = 0;
-
-	for (i = 0; i < fb_helper->crtc_count; i++) {
-		fb_helper->crtc_info[i].mode_set.connectors =
-			kcalloc(max_conn_count,
-				sizeof(struct drm_connector *),
-				GFP_KERNEL);
-
-		if (!fb_helper->crtc_info[i].mode_set.connectors)
-			goto out_free;
-		fb_helper->crtc_info[i].mode_set.num_connectors = 0;
-		fb_helper->crtc_info[i].rotation = DRM_MODE_ROTATE_0;
-	}
-
-	i = 0;
-	drm_for_each_crtc(crtc, dev) {
-		fb_helper->crtc_info[i].mode_set.crtc = crtc;
-		i++;
+	/*
+	 * If this is not the generic fbdev client, initialize a drm_client
+	 * without callbacks so we can use the modesets.
+	 */
+	if (!fb_helper->client.funcs) {
+		ret = drm_client_init(dev, &fb_helper->client, "drm_fb_helper", NULL);
+		if (ret)
+			return ret;
 	}
 
 	dev->fb_helper = fb_helper;
 
 	return 0;
-out_free:
-	drm_fb_helper_crtc_free(fb_helper);
-	return -ENOMEM;
 }
 EXPORT_SYMBOL(drm_fb_helper_init);
 
@@ -999,8 +585,9 @@ void drm_fb_helper_fini(struct drm_fb_helper *fb_helper)
 	mutex_unlock(&kernel_fb_helper_lock);
 
 	mutex_destroy(&fb_helper->lock);
-	drm_fb_helper_crtc_free(fb_helper);
 
+	if (!fb_helper->client.funcs)
+		drm_client_release(&fb_helper->client);
 }
 EXPORT_SYMBOL(drm_fb_helper_fini);
 
@@ -1345,13 +932,14 @@ static int setcmap_pseudo_palette(struct fb_cmap *cmap, struct fb_info *info)
 static int setcmap_legacy(struct fb_cmap *cmap, struct fb_info *info)
 {
 	struct drm_fb_helper *fb_helper = info->par;
+	struct drm_mode_set *modeset;
 	struct drm_crtc *crtc;
 	u16 *r, *g, *b;
-	int i, ret = 0;
+	int ret = 0;
 
 	drm_modeset_lock_all(fb_helper->dev);
-	for (i = 0; i < fb_helper->crtc_count; i++) {
-		crtc = fb_helper->crtc_info[i].mode_set.crtc;
+	drm_client_for_each_modeset(modeset, &fb_helper->client) {
+		crtc = modeset->crtc;
 		if (!crtc->funcs->gamma_set || !crtc->gamma_size)
 			return -EINVAL;
 
@@ -1427,10 +1015,11 @@ static int setcmap_atomic(struct fb_cmap *cmap, struct fb_info *info)
 	struct drm_modeset_acquire_ctx ctx;
 	struct drm_crtc_state *crtc_state;
 	struct drm_atomic_state *state;
+	struct drm_mode_set *modeset;
 	struct drm_crtc *crtc;
 	u16 *r, *g, *b;
-	int i, ret = 0;
 	bool replaced;
+	int ret = 0;
 
 	drm_modeset_acquire_init(&ctx, 0);
 
@@ -1442,8 +1031,8 @@ static int setcmap_atomic(struct fb_cmap *cmap, struct fb_info *info)
 
 	state->acquire_ctx = &ctx;
 retry:
-	for (i = 0; i < fb_helper->crtc_count; i++) {
-		crtc = fb_helper->crtc_info[i].mode_set.crtc;
+	drm_client_for_each_modeset(modeset, &fb_helper->client) {
+		crtc = modeset->crtc;
 
 		if (!gamma_lut)
 			gamma_lut = setcmap_new_gamma_lut(crtc, cmap);
@@ -1471,8 +1060,8 @@ retry:
 	if (ret)
 		goto out_state;
 
-	for (i = 0; i < fb_helper->crtc_count; i++) {
-		crtc = fb_helper->crtc_info[i].mode_set.crtc;
+	drm_client_for_each_modeset(modeset, &fb_helper->client) {
+		crtc = modeset->crtc;
 
 		r = crtc->gamma_store;
 		g = r + crtc->gamma_size;
@@ -1509,6 +1098,7 @@ backoff:
 int drm_fb_helper_setcmap(struct fb_cmap *cmap, struct fb_info *info)
 {
 	struct drm_fb_helper *fb_helper = info->par;
+	struct drm_device *dev = fb_helper->dev;
 	int ret;
 
 	if (oops_in_progress)
@@ -1516,19 +1106,22 @@ int drm_fb_helper_setcmap(struct fb_cmap *cmap, struct fb_info *info)
 
 	mutex_lock(&fb_helper->lock);
 
-	if (!drm_fb_helper_is_bound(fb_helper)) {
+	if (!drm_master_internal_acquire(dev)) {
 		ret = -EBUSY;
-		goto out;
+		goto unlock;
 	}
 
+	mutex_lock(&fb_helper->client.modeset_mutex);
 	if (info->fix.visual == FB_VISUAL_TRUECOLOR)
 		ret = setcmap_pseudo_palette(cmap, info);
 	else if (drm_drv_uses_atomic_modeset(fb_helper->dev))
 		ret = setcmap_atomic(cmap, info);
 	else
 		ret = setcmap_legacy(cmap, info);
+	mutex_unlock(&fb_helper->client.modeset_mutex);
 
-out:
+	drm_master_internal_release(dev);
+unlock:
 	mutex_unlock(&fb_helper->lock);
 
 	return ret;
@@ -1548,12 +1141,12 @@ int drm_fb_helper_ioctl(struct fb_info *info, unsigned int cmd,
 			unsigned long arg)
 {
 	struct drm_fb_helper *fb_helper = info->par;
-	struct drm_mode_set *mode_set;
+	struct drm_device *dev = fb_helper->dev;
 	struct drm_crtc *crtc;
 	int ret = 0;
 
 	mutex_lock(&fb_helper->lock);
-	if (!drm_fb_helper_is_bound(fb_helper)) {
+	if (!drm_master_internal_acquire(dev)) {
 		ret = -EBUSY;
 		goto unlock;
 	}
@@ -1576,8 +1169,7 @@ int drm_fb_helper_ioctl(struct fb_info *info, unsigned int cmd,
 		 * make. If we're not smart enough here, one should
 		 * just consider switch the userspace to KMS.
 		 */
-		mode_set = &fb_helper->crtc_info[0].mode_set;
-		crtc = mode_set->crtc;
+		crtc = fb_helper->client.modesets[0].crtc;
 
 		/*
 		 * Only wait for a vblank event if the CRTC is
@@ -1591,11 +1183,12 @@ int drm_fb_helper_ioctl(struct fb_info *info, unsigned int cmd,
 		}
 
 		ret = 0;
-		goto unlock;
+		break;
 	default:
 		ret = -ENOTTY;
 	}
 
+	drm_master_internal_release(dev);
 unlock:
 	mutex_unlock(&fb_helper->lock);
 	return ret;
@@ -1773,16 +1366,14 @@ EXPORT_SYMBOL(drm_fb_helper_set_par);
 
 static void pan_set(struct drm_fb_helper *fb_helper, int x, int y)
 {
-	int i;
-
-	for (i = 0; i < fb_helper->crtc_count; i++) {
-		struct drm_mode_set *mode_set;
-
-		mode_set = &fb_helper->crtc_info[i].mode_set;
+	struct drm_mode_set *mode_set;
 
+	mutex_lock(&fb_helper->client.modeset_mutex);
+	drm_client_for_each_modeset(mode_set, &fb_helper->client) {
 		mode_set->x = x;
 		mode_set->y = y;
 	}
+	mutex_unlock(&fb_helper->client.modeset_mutex);
 }
 
 static int pan_display_atomic(struct fb_var_screeninfo *var,
@@ -1793,7 +1384,7 @@ static int pan_display_atomic(struct fb_var_screeninfo *var,
 
 	pan_set(fb_helper, var->xoffset, var->yoffset);
 
-	ret = restore_fbdev_mode_atomic(fb_helper, true);
+	ret = drm_client_modeset_commit_force(&fb_helper->client);
 	if (!ret) {
 		info->var.xoffset = var->xoffset;
 		info->var.yoffset = var->yoffset;
@@ -1807,14 +1398,13 @@ static int pan_display_legacy(struct fb_var_screeninfo *var,
 			      struct fb_info *info)
 {
 	struct drm_fb_helper *fb_helper = info->par;
+	struct drm_client_dev *client = &fb_helper->client;
 	struct drm_mode_set *modeset;
 	int ret = 0;
-	int i;
 
+	mutex_lock(&client->modeset_mutex);
 	drm_modeset_lock_all(fb_helper->dev);
-	for (i = 0; i < fb_helper->crtc_count; i++) {
-		modeset = &fb_helper->crtc_info[i].mode_set;
-
+	drm_client_for_each_modeset(modeset, client) {
 		modeset->x = var->xoffset;
 		modeset->y = var->yoffset;
 
@@ -1827,6 +1417,7 @@ static int pan_display_legacy(struct fb_var_screeninfo *var,
 		}
 	}
 	drm_modeset_unlock_all(fb_helper->dev);
+	mutex_unlock(&client->modeset_mutex);
 
 	return ret;
 }
@@ -1847,15 +1438,18 @@ int drm_fb_helper_pan_display(struct fb_var_screeninfo *var,
 		return -EBUSY;
 
 	mutex_lock(&fb_helper->lock);
-	if (!drm_fb_helper_is_bound(fb_helper)) {
-		mutex_unlock(&fb_helper->lock);
-		return -EBUSY;
+	if (!drm_master_internal_acquire(dev)) {
+		ret = -EBUSY;
+		goto unlock;
 	}
 
 	if (drm_drv_uses_atomic_modeset(dev))
 		ret = pan_display_atomic(var, info);
 	else
 		ret = pan_display_legacy(var, info);
+
+	drm_master_internal_release(dev);
+unlock:
 	mutex_unlock(&fb_helper->lock);
 
 	return ret;
@@ -1869,10 +1463,13 @@ EXPORT_SYMBOL(drm_fb_helper_pan_display);
 static int drm_fb_helper_single_fb_probe(struct drm_fb_helper *fb_helper,
 					 int preferred_bpp)
 {
+	struct drm_client_dev *client = &fb_helper->client;
 	int ret = 0;
 	int crtc_count = 0;
-	int i;
+	struct drm_connector_list_iter conn_iter;
 	struct drm_fb_helper_surface_size sizes;
+	struct drm_connector *connector;
+	struct drm_mode_set *mode_set;
 	int best_depth = 0;
 
 	memset(&sizes, 0, sizeof(struct drm_fb_helper_surface_size));
@@ -1888,11 +1485,11 @@ static int drm_fb_helper_single_fb_probe(struct drm_fb_helper *fb_helper,
 	if (preferred_bpp != sizes.surface_bpp)
 		sizes.surface_depth = sizes.surface_bpp = preferred_bpp;
 
-	drm_fb_helper_for_each_connector(fb_helper, i) {
-		struct drm_fb_helper_connector *fb_helper_conn = fb_helper->connector_info[i];
+	drm_connector_list_iter_begin(fb_helper->dev, &conn_iter);
+	drm_client_for_each_connector_iter(connector, &conn_iter) {
 		struct drm_cmdline_mode *cmdline_mode;
 
-		cmdline_mode = &fb_helper_conn->connector->cmdline_mode;
+		cmdline_mode = &connector->cmdline_mode;
 
 		if (cmdline_mode->bpp_specified) {
 			switch (cmdline_mode->bpp) {
@@ -1917,19 +1514,20 @@ static int drm_fb_helper_single_fb_probe(struct drm_fb_helper *fb_helper,
 			break;
 		}
 	}
+	drm_connector_list_iter_end(&conn_iter);
 
 	/*
 	 * If we run into a situation where, for example, the primary plane
 	 * supports RGBA5551 (16 bpp, depth 15) but not RGB565 (16 bpp, depth
 	 * 16) we need to scale down the depth of the sizes we request.
 	 */
-	for (i = 0; i < fb_helper->crtc_count; i++) {
-		struct drm_mode_set *mode_set = &fb_helper->crtc_info[i].mode_set;
+	mutex_lock(&client->modeset_mutex);
+	drm_client_for_each_modeset(mode_set, client) {
 		struct drm_crtc *crtc = mode_set->crtc;
 		struct drm_plane *plane = crtc->primary;
 		int j;
 
-		DRM_DEBUG("test CRTC %d primary plane\n", i);
+		DRM_DEBUG("test CRTC %u primary plane\n", drm_crtc_index(crtc));
 
 		for (j = 0; j < plane->format_count; j++) {
 			const struct drm_format_info *fmt;
@@ -1969,9 +1567,8 @@ static int drm_fb_helper_single_fb_probe(struct drm_fb_helper *fb_helper,
 
 	/* first up get a count of crtcs now in use and new min/maxes width/heights */
 	crtc_count = 0;
-	for (i = 0; i < fb_helper->crtc_count; i++) {
+	drm_client_for_each_modeset(mode_set, client) {
 		struct drm_display_mode *desired_mode;
-		struct drm_mode_set *mode_set;
 		int x, y, j;
 		/* in case of tile group, are we the last tile vert or horiz?
 		 * If no tile group you are always the last one both vertically
@@ -1979,16 +1576,15 @@ static int drm_fb_helper_single_fb_probe(struct drm_fb_helper *fb_helper,
 		 */
 		bool lastv = true, lasth = true;
 
-		desired_mode = fb_helper->crtc_info[i].desired_mode;
-		mode_set = &fb_helper->crtc_info[i].mode_set;
+		desired_mode = mode_set->mode;
 
 		if (!desired_mode)
 			continue;
 
 		crtc_count++;
 
-		x = fb_helper->crtc_info[i].x;
-		y = fb_helper->crtc_info[i].y;
+		x = mode_set->x;
+		y = mode_set->y;
 
 		sizes.surface_width  = max_t(u32, desired_mode->hdisplay + x, sizes.surface_width);
 		sizes.surface_height = max_t(u32, desired_mode->vdisplay + y, sizes.surface_height);
@@ -2009,13 +1605,14 @@ static int drm_fb_helper_single_fb_probe(struct drm_fb_helper *fb_helper,
 		if (lastv)
 			sizes.fb_height = min_t(u32, desired_mode->vdisplay + y, sizes.fb_height);
 	}
+	mutex_unlock(&client->modeset_mutex);
 
 	if (crtc_count == 0 || sizes.fb_width == -1 || sizes.fb_height == -1) {
 		DRM_INFO("Cannot find any crtc or sizes\n");
 
 		/* First time: disable all crtc's.. */
-		if (!fb_helper->deferred_setup && !READ_ONCE(fb_helper->dev->master))
-			restore_fbdev_mode(fb_helper);
+		if (!fb_helper->deferred_setup)
+			drm_client_modeset_commit(client);
 		return -EAGAIN;
 	}
 
@@ -2102,742 +1699,6 @@ void drm_fb_helper_fill_info(struct fb_info *info,
 }
 EXPORT_SYMBOL(drm_fb_helper_fill_info);
 
-static int drm_fb_helper_probe_connector_modes(struct drm_fb_helper *fb_helper,
-						uint32_t maxX,
-						uint32_t maxY)
-{
-	struct drm_connector *connector;
-	int i, count = 0;
-
-	drm_fb_helper_for_each_connector(fb_helper, i) {
-		connector = fb_helper->connector_info[i]->connector;
-		count += connector->funcs->fill_modes(connector, maxX, maxY);
-	}
-
-	return count;
-}
-
-struct drm_display_mode *drm_has_preferred_mode(struct drm_fb_helper_connector *fb_connector, int width, int height)
-{
-	struct drm_display_mode *mode;
-
-	list_for_each_entry(mode, &fb_connector->connector->modes, head) {
-		if (mode->hdisplay > width ||
-		    mode->vdisplay > height)
-			continue;
-		if (mode->type & DRM_MODE_TYPE_PREFERRED)
-			return mode;
-	}
-	return NULL;
-}
-EXPORT_SYMBOL(drm_has_preferred_mode);
-
-static bool drm_has_cmdline_mode(struct drm_fb_helper_connector *fb_connector)
-{
-	return fb_connector->connector->cmdline_mode.specified;
-}
-
-struct drm_display_mode *drm_pick_cmdline_mode(struct drm_fb_helper_connector *fb_helper_conn)
-{
-	struct drm_cmdline_mode *cmdline_mode;
-	struct drm_display_mode *mode;
-	bool prefer_non_interlace;
-
-	cmdline_mode = &fb_helper_conn->connector->cmdline_mode;
-	if (cmdline_mode->specified == false)
-		return NULL;
-
-	/* attempt to find a matching mode in the list of modes
-	 *  we have gotten so far, if not add a CVT mode that conforms
-	 */
-	if (cmdline_mode->rb || cmdline_mode->margins)
-		goto create_mode;
-
-	prefer_non_interlace = !cmdline_mode->interlace;
-again:
-	list_for_each_entry(mode, &fb_helper_conn->connector->modes, head) {
-		/* check width/height */
-		if (mode->hdisplay != cmdline_mode->xres ||
-		    mode->vdisplay != cmdline_mode->yres)
-			continue;
-
-		if (cmdline_mode->refresh_specified) {
-			if (mode->vrefresh != cmdline_mode->refresh)
-				continue;
-		}
-
-		if (cmdline_mode->interlace) {
-			if (!(mode->flags & DRM_MODE_FLAG_INTERLACE))
-				continue;
-		} else if (prefer_non_interlace) {
-			if (mode->flags & DRM_MODE_FLAG_INTERLACE)
-				continue;
-		}
-		return mode;
-	}
-
-	if (prefer_non_interlace) {
-		prefer_non_interlace = false;
-		goto again;
-	}
-
-create_mode:
-	mode = drm_mode_create_from_cmdline_mode(fb_helper_conn->connector->dev,
-						 cmdline_mode);
-	list_add(&mode->head, &fb_helper_conn->connector->modes);
-	return mode;
-}
-EXPORT_SYMBOL(drm_pick_cmdline_mode);
-
-static bool drm_connector_enabled(struct drm_connector *connector, bool strict)
-{
-	bool enable;
-
-	if (connector->display_info.non_desktop)
-		return false;
-
-	if (strict)
-		enable = connector->status == connector_status_connected;
-	else
-		enable = connector->status != connector_status_disconnected;
-
-	return enable;
-}
-
-static void drm_enable_connectors(struct drm_fb_helper *fb_helper,
-				  bool *enabled)
-{
-	bool any_enabled = false;
-	struct drm_connector *connector;
-	int i = 0;
-
-	drm_fb_helper_for_each_connector(fb_helper, i) {
-		connector = fb_helper->connector_info[i]->connector;
-		enabled[i] = drm_connector_enabled(connector, true);
-		DRM_DEBUG_KMS("connector %d enabled? %s\n", connector->base.id,
-			      connector->display_info.non_desktop ? "non desktop" : enabled[i] ? "yes" : "no");
-
-		any_enabled |= enabled[i];
-	}
-
-	if (any_enabled)
-		return;
-
-	drm_fb_helper_for_each_connector(fb_helper, i) {
-		connector = fb_helper->connector_info[i]->connector;
-		enabled[i] = drm_connector_enabled(connector, false);
-	}
-}
-
-static bool drm_target_cloned(struct drm_fb_helper *fb_helper,
-			      struct drm_display_mode **modes,
-			      struct drm_fb_offset *offsets,
-			      bool *enabled, int width, int height)
-{
-	int count, i, j;
-	bool can_clone = false;
-	struct drm_fb_helper_connector *fb_helper_conn;
-	struct drm_display_mode *dmt_mode, *mode;
-
-	/* only contemplate cloning in the single crtc case */
-	if (fb_helper->crtc_count > 1)
-		return false;
-
-	count = 0;
-	drm_fb_helper_for_each_connector(fb_helper, i) {
-		if (enabled[i])
-			count++;
-	}
-
-	/* only contemplate cloning if more than one connector is enabled */
-	if (count <= 1)
-		return false;
-
-	/* check the command line or if nothing common pick 1024x768 */
-	can_clone = true;
-	drm_fb_helper_for_each_connector(fb_helper, i) {
-		if (!enabled[i])
-			continue;
-		fb_helper_conn = fb_helper->connector_info[i];
-		modes[i] = drm_pick_cmdline_mode(fb_helper_conn);
-		if (!modes[i]) {
-			can_clone = false;
-			break;
-		}
-		for (j = 0; j < i; j++) {
-			if (!enabled[j])
-				continue;
-			if (!drm_mode_match(modes[j], modes[i],
-					    DRM_MODE_MATCH_TIMINGS |
-					    DRM_MODE_MATCH_CLOCK |
-					    DRM_MODE_MATCH_FLAGS |
-					    DRM_MODE_MATCH_3D_FLAGS))
-				can_clone = false;
-		}
-	}
-
-	if (can_clone) {
-		DRM_DEBUG_KMS("can clone using command line\n");
-		return true;
-	}
-
-	/* try and find a 1024x768 mode on each connector */
-	can_clone = true;
-	dmt_mode = drm_mode_find_dmt(fb_helper->dev, 1024, 768, 60, false);
-
-	drm_fb_helper_for_each_connector(fb_helper, i) {
-		if (!enabled[i])
-			continue;
-
-		fb_helper_conn = fb_helper->connector_info[i];
-		list_for_each_entry(mode, &fb_helper_conn->connector->modes, head) {
-			if (drm_mode_match(mode, dmt_mode,
-					   DRM_MODE_MATCH_TIMINGS |
-					   DRM_MODE_MATCH_CLOCK |
-					   DRM_MODE_MATCH_FLAGS |
-					   DRM_MODE_MATCH_3D_FLAGS))
-				modes[i] = mode;
-		}
-		if (!modes[i])
-			can_clone = false;
-	}
-
-	if (can_clone) {
-		DRM_DEBUG_KMS("can clone using 1024x768\n");
-		return true;
-	}
-	DRM_INFO("kms: can't enable cloning when we probably wanted to.\n");
-	return false;
-}
-
-static int drm_get_tile_offsets(struct drm_fb_helper *fb_helper,
-				struct drm_display_mode **modes,
-				struct drm_fb_offset *offsets,
-				int idx,
-				int h_idx, int v_idx)
-{
-	struct drm_fb_helper_connector *fb_helper_conn;
-	int i;
-	int hoffset = 0, voffset = 0;
-
-	drm_fb_helper_for_each_connector(fb_helper, i) {
-		fb_helper_conn = fb_helper->connector_info[i];
-		if (!fb_helper_conn->connector->has_tile)
-			continue;
-
-		if (!modes[i] && (h_idx || v_idx)) {
-			DRM_DEBUG_KMS("no modes for connector tiled %d %d\n", i,
-				      fb_helper_conn->connector->base.id);
-			continue;
-		}
-		if (fb_helper_conn->connector->tile_h_loc < h_idx)
-			hoffset += modes[i]->hdisplay;
-
-		if (fb_helper_conn->connector->tile_v_loc < v_idx)
-			voffset += modes[i]->vdisplay;
-	}
-	offsets[idx].x = hoffset;
-	offsets[idx].y = voffset;
-	DRM_DEBUG_KMS("returned %d %d for %d %d\n", hoffset, voffset, h_idx, v_idx);
-	return 0;
-}
-
-static bool drm_target_preferred(struct drm_fb_helper *fb_helper,
-				 struct drm_display_mode **modes,
-				 struct drm_fb_offset *offsets,
-				 bool *enabled, int width, int height)
-{
-	struct drm_fb_helper_connector *fb_helper_conn;
-	const u64 mask = BIT_ULL(fb_helper->connector_count) - 1;
-	u64 conn_configured = 0;
-	int tile_pass = 0;
-	int i;
-
-retry:
-	drm_fb_helper_for_each_connector(fb_helper, i) {
-		fb_helper_conn = fb_helper->connector_info[i];
-
-		if (conn_configured & BIT_ULL(i))
-			continue;
-
-		if (enabled[i] == false) {
-			conn_configured |= BIT_ULL(i);
-			continue;
-		}
-
-		/* first pass over all the untiled connectors */
-		if (tile_pass == 0 && fb_helper_conn->connector->has_tile)
-			continue;
-
-		if (tile_pass == 1) {
-			if (fb_helper_conn->connector->tile_h_loc != 0 ||
-			    fb_helper_conn->connector->tile_v_loc != 0)
-				continue;
-
-		} else {
-			if (fb_helper_conn->connector->tile_h_loc != tile_pass - 1 &&
-			    fb_helper_conn->connector->tile_v_loc != tile_pass - 1)
-			/* if this tile_pass doesn't cover any of the tiles - keep going */
-				continue;
-
-			/*
-			 * find the tile offsets for this pass - need to find
-			 * all tiles left and above
-			 */
-			drm_get_tile_offsets(fb_helper, modes, offsets,
-					     i, fb_helper_conn->connector->tile_h_loc, fb_helper_conn->connector->tile_v_loc);
-		}
-		DRM_DEBUG_KMS("looking for cmdline mode on connector %d\n",
-			      fb_helper_conn->connector->base.id);
-
-		/* got for command line mode first */
-		modes[i] = drm_pick_cmdline_mode(fb_helper_conn);
-		if (!modes[i]) {
-			DRM_DEBUG_KMS("looking for preferred mode on connector %d %d\n",
-				      fb_helper_conn->connector->base.id, fb_helper_conn->connector->tile_group ? fb_helper_conn->connector->tile_group->id : 0);
-			modes[i] = drm_has_preferred_mode(fb_helper_conn, width, height);
-		}
-		/* No preferred modes, pick one off the list */
-		if (!modes[i] && !list_empty(&fb_helper_conn->connector->modes)) {
-			list_for_each_entry(modes[i], &fb_helper_conn->connector->modes, head)
-				break;
-		}
-		DRM_DEBUG_KMS("found mode %s\n", modes[i] ? modes[i]->name :
-			  "none");
-		conn_configured |= BIT_ULL(i);
-	}
-
-	if ((conn_configured & mask) != mask) {
-		tile_pass++;
-		goto retry;
-	}
-	return true;
-}
-
-static bool connector_has_possible_crtc(struct drm_connector *connector,
-					struct drm_crtc *crtc)
-{
-	struct drm_encoder *encoder;
-	int i;
-
-	drm_connector_for_each_possible_encoder(connector, encoder, i) {
-		if (encoder->possible_crtcs & drm_crtc_mask(crtc))
-			return true;
-	}
-
-	return false;
-}
-
-static int drm_pick_crtcs(struct drm_fb_helper *fb_helper,
-			  struct drm_fb_helper_crtc **best_crtcs,
-			  struct drm_display_mode **modes,
-			  int n, int width, int height)
-{
-	int c, o;
-	struct drm_connector *connector;
-	int my_score, best_score, score;
-	struct drm_fb_helper_crtc **crtcs, *crtc;
-	struct drm_fb_helper_connector *fb_helper_conn;
-
-	if (n == fb_helper->connector_count)
-		return 0;
-
-	fb_helper_conn = fb_helper->connector_info[n];
-	connector = fb_helper_conn->connector;
-
-	best_crtcs[n] = NULL;
-	best_score = drm_pick_crtcs(fb_helper, best_crtcs, modes, n+1, width, height);
-	if (modes[n] == NULL)
-		return best_score;
-
-	crtcs = kcalloc(fb_helper->connector_count,
-			sizeof(struct drm_fb_helper_crtc *), GFP_KERNEL);
-	if (!crtcs)
-		return best_score;
-
-	my_score = 1;
-	if (connector->status == connector_status_connected)
-		my_score++;
-	if (drm_has_cmdline_mode(fb_helper_conn))
-		my_score++;
-	if (drm_has_preferred_mode(fb_helper_conn, width, height))
-		my_score++;
-
-	/*
-	 * select a crtc for this connector and then attempt to configure
-	 * remaining connectors
-	 */
-	for (c = 0; c < fb_helper->crtc_count; c++) {
-		crtc = &fb_helper->crtc_info[c];
-
-		if (!connector_has_possible_crtc(connector,
-						 crtc->mode_set.crtc))
-			continue;
-
-		for (o = 0; o < n; o++)
-			if (best_crtcs[o] == crtc)
-				break;
-
-		if (o < n) {
-			/* ignore cloning unless only a single crtc */
-			if (fb_helper->crtc_count > 1)
-				continue;
-
-			if (!drm_mode_equal(modes[o], modes[n]))
-				continue;
-		}
-
-		crtcs[n] = crtc;
-		memcpy(crtcs, best_crtcs, n * sizeof(struct drm_fb_helper_crtc *));
-		score = my_score + drm_pick_crtcs(fb_helper, crtcs, modes, n + 1,
-						  width, height);
-		if (score > best_score) {
-			best_score = score;
-			memcpy(best_crtcs, crtcs,
-			       fb_helper->connector_count *
-			       sizeof(struct drm_fb_helper_crtc *));
-		}
-	}
-
-	kfree(crtcs);
-	return best_score;
-}
-
-/*
- * This function checks if rotation is necessary because of panel orientation
- * and if it is, if it is supported.
- * If rotation is necessary and supported, it gets set in fb_crtc.rotation.
- * If rotation is necessary but not supported, a DRM_MODE_ROTATE_* flag gets
- * or-ed into fb_helper->sw_rotations. In drm_setup_crtcs_fb() we check if only
- * one bit is set and then we set fb_info.fbcon_rotate_hint to make fbcon do
- * the unsupported rotation.
- */
-static void drm_setup_crtc_rotation(struct drm_fb_helper *fb_helper,
-				    struct drm_fb_helper_crtc *fb_crtc,
-				    struct drm_connector *connector)
-{
-	struct drm_plane *plane = fb_crtc->mode_set.crtc->primary;
-	uint64_t valid_mask = 0;
-	int i, rotation;
-
-	fb_crtc->rotation = DRM_MODE_ROTATE_0;
-
-	switch (connector->display_info.panel_orientation) {
-	case DRM_MODE_PANEL_ORIENTATION_BOTTOM_UP:
-		rotation = DRM_MODE_ROTATE_180;
-		break;
-	case DRM_MODE_PANEL_ORIENTATION_LEFT_UP:
-		rotation = DRM_MODE_ROTATE_90;
-		break;
-	case DRM_MODE_PANEL_ORIENTATION_RIGHT_UP:
-		rotation = DRM_MODE_ROTATE_270;
-		break;
-	default:
-		rotation = DRM_MODE_ROTATE_0;
-	}
-
-	/*
-	 * TODO: support 90 / 270 degree hardware rotation,
-	 * depending on the hardware this may require the framebuffer
-	 * to be in a specific tiling format.
-	 */
-	if (rotation != DRM_MODE_ROTATE_180 || !plane->rotation_property) {
-		fb_helper->sw_rotations |= rotation;
-		return;
-	}
-
-	for (i = 0; i < plane->rotation_property->num_values; i++)
-		valid_mask |= (1ULL << plane->rotation_property->values[i]);
-
-	if (!(rotation & valid_mask)) {
-		fb_helper->sw_rotations |= rotation;
-		return;
-	}
-
-	fb_crtc->rotation = rotation;
-	/* Rotating in hardware, fbcon should not rotate */
-	fb_helper->sw_rotations |= DRM_MODE_ROTATE_0;
-}
-
-static struct drm_fb_helper_crtc *
-drm_fb_helper_crtc(struct drm_fb_helper *fb_helper, struct drm_crtc *crtc)
-{
-	int i;
-
-	for (i = 0; i < fb_helper->crtc_count; i++)
-		if (fb_helper->crtc_info[i].mode_set.crtc == crtc)
-			return &fb_helper->crtc_info[i];
-
-	return NULL;
-}
-
-/* Try to read the BIOS display configuration and use it for the initial config */
-static bool drm_fb_helper_firmware_config(struct drm_fb_helper *fb_helper,
-					  struct drm_fb_helper_crtc **crtcs,
-					  struct drm_display_mode **modes,
-					  struct drm_fb_offset *offsets,
-					  bool *enabled, int width, int height)
-{
-	struct drm_device *dev = fb_helper->dev;
-	unsigned int count = min(fb_helper->connector_count, BITS_PER_LONG);
-	unsigned long conn_configured, conn_seq, mask;
-	int i, j;
-	bool *save_enabled;
-	bool fallback = true, ret = true;
-	int num_connectors_enabled = 0;
-	int num_connectors_detected = 0;
-	struct drm_modeset_acquire_ctx ctx;
-
-	if (!drm_drv_uses_atomic_modeset(dev))
-		return false;
-
-	save_enabled = kcalloc(count, sizeof(bool), GFP_KERNEL);
-	if (!save_enabled)
-		return false;
-
-	drm_modeset_acquire_init(&ctx, 0);
-
-	while (drm_modeset_lock_all_ctx(dev, &ctx) != 0)
-		drm_modeset_backoff(&ctx);
-
-	memcpy(save_enabled, enabled, count);
-	mask = GENMASK(count - 1, 0);
-	conn_configured = 0;
-retry:
-	conn_seq = conn_configured;
-	for (i = 0; i < count; i++) {
-		struct drm_fb_helper_connector *fb_conn;
-		struct drm_connector *connector;
-		struct drm_encoder *encoder;
-		struct drm_fb_helper_crtc *new_crtc;
-
-		fb_conn = fb_helper->connector_info[i];
-		connector = fb_conn->connector;
-
-		if (conn_configured & BIT(i))
-			continue;
-
-		if (conn_seq == 0 && !connector->has_tile)
-			continue;
-
-		if (connector->status == connector_status_connected)
-			num_connectors_detected++;
-
-		if (!enabled[i]) {
-			DRM_DEBUG_KMS("connector %s not enabled, skipping\n",
-				      connector->name);
-			conn_configured |= BIT(i);
-			continue;
-		}
-
-		if (connector->force == DRM_FORCE_OFF) {
-			DRM_DEBUG_KMS("connector %s is disabled by user, skipping\n",
-				      connector->name);
-			enabled[i] = false;
-			continue;
-		}
-
-		encoder = connector->state->best_encoder;
-		if (!encoder || WARN_ON(!connector->state->crtc)) {
-			if (connector->force > DRM_FORCE_OFF)
-				goto bail;
-
-			DRM_DEBUG_KMS("connector %s has no encoder or crtc, skipping\n",
-				      connector->name);
-			enabled[i] = false;
-			conn_configured |= BIT(i);
-			continue;
-		}
-
-		num_connectors_enabled++;
-
-		new_crtc = drm_fb_helper_crtc(fb_helper, connector->state->crtc);
-
-		/*
-		 * Make sure we're not trying to drive multiple connectors
-		 * with a single CRTC, since our cloning support may not
-		 * match the BIOS.
-		 */
-		for (j = 0; j < count; j++) {
-			if (crtcs[j] == new_crtc) {
-				DRM_DEBUG_KMS("fallback: cloned configuration\n");
-				goto bail;
-			}
-		}
-
-		DRM_DEBUG_KMS("looking for cmdline mode on connector %s\n",
-			      connector->name);
-
-		/* go for command line mode first */
-		modes[i] = drm_pick_cmdline_mode(fb_conn);
-
-		/* try for preferred next */
-		if (!modes[i]) {
-			DRM_DEBUG_KMS("looking for preferred mode on connector %s %d\n",
-				      connector->name, connector->has_tile);
-			modes[i] = drm_has_preferred_mode(fb_conn, width,
-							  height);
-		}
-
-		/* No preferred mode marked by the EDID? Are there any modes? */
-		if (!modes[i] && !list_empty(&connector->modes)) {
-			DRM_DEBUG_KMS("using first mode listed on connector %s\n",
-				      connector->name);
-			modes[i] = list_first_entry(&connector->modes,
-						    struct drm_display_mode,
-						    head);
-		}
-
-		/* last resort: use current mode */
-		if (!modes[i]) {
-			/*
-			 * IMPORTANT: We want to use the adjusted mode (i.e.
-			 * after the panel fitter upscaling) as the initial
-			 * config, not the input mode, which is what crtc->mode
-			 * usually contains. But since our current
-			 * code puts a mode derived from the post-pfit timings
-			 * into crtc->mode this works out correctly.
-			 *
-			 * This is crtc->mode and not crtc->state->mode for the
-			 * fastboot check to work correctly.
-			 */
-			DRM_DEBUG_KMS("looking for current mode on connector %s\n",
-				      connector->name);
-			modes[i] = &connector->state->crtc->mode;
-		}
-		crtcs[i] = new_crtc;
-
-		DRM_DEBUG_KMS("connector %s on [CRTC:%d:%s]: %dx%d%s\n",
-			      connector->name,
-			      connector->state->crtc->base.id,
-			      connector->state->crtc->name,
-			      modes[i]->hdisplay, modes[i]->vdisplay,
-			      modes[i]->flags & DRM_MODE_FLAG_INTERLACE ? "i" : "");
-
-		fallback = false;
-		conn_configured |= BIT(i);
-	}
-
-	if ((conn_configured & mask) != mask && conn_configured != conn_seq)
-		goto retry;
-
-	/*
-	 * If the BIOS didn't enable everything it could, fall back to have the
-	 * same user experiencing of lighting up as much as possible like the
-	 * fbdev helper library.
-	 */
-	if (num_connectors_enabled != num_connectors_detected &&
-	    num_connectors_enabled < dev->mode_config.num_crtc) {
-		DRM_DEBUG_KMS("fallback: Not all outputs enabled\n");
-		DRM_DEBUG_KMS("Enabled: %i, detected: %i\n", num_connectors_enabled,
-			      num_connectors_detected);
-		fallback = true;
-	}
-
-	if (fallback) {
-bail:
-		DRM_DEBUG_KMS("Not using firmware configuration\n");
-		memcpy(enabled, save_enabled, count);
-		ret = false;
-	}
-
-	drm_modeset_drop_locks(&ctx);
-	drm_modeset_acquire_fini(&ctx);
-
-	kfree(save_enabled);
-	return ret;
-}
-
-static void drm_setup_crtcs(struct drm_fb_helper *fb_helper,
-			    u32 width, u32 height)
-{
-	struct drm_device *dev = fb_helper->dev;
-	struct drm_fb_helper_crtc **crtcs;
-	struct drm_display_mode **modes;
-	struct drm_fb_offset *offsets;
-	bool *enabled;
-	int i;
-
-	DRM_DEBUG_KMS("\n");
-	/* prevent concurrent modification of connector_count by hotplug */
-	lockdep_assert_held(&fb_helper->lock);
-
-	crtcs = kcalloc(fb_helper->connector_count,
-			sizeof(struct drm_fb_helper_crtc *), GFP_KERNEL);
-	modes = kcalloc(fb_helper->connector_count,
-			sizeof(struct drm_display_mode *), GFP_KERNEL);
-	offsets = kcalloc(fb_helper->connector_count,
-			  sizeof(struct drm_fb_offset), GFP_KERNEL);
-	enabled = kcalloc(fb_helper->connector_count,
-			  sizeof(bool), GFP_KERNEL);
-	if (!crtcs || !modes || !enabled || !offsets) {
-		DRM_ERROR("Memory allocation failed\n");
-		goto out;
-	}
-
-	mutex_lock(&fb_helper->dev->mode_config.mutex);
-	if (drm_fb_helper_probe_connector_modes(fb_helper, width, height) == 0)
-		DRM_DEBUG_KMS("No connectors reported connected with modes\n");
-	drm_enable_connectors(fb_helper, enabled);
-
-	if (!drm_fb_helper_firmware_config(fb_helper, crtcs, modes, offsets,
-					   enabled, width, height)) {
-		memset(modes, 0, fb_helper->connector_count*sizeof(modes[0]));
-		memset(crtcs, 0, fb_helper->connector_count*sizeof(crtcs[0]));
-		memset(offsets, 0, fb_helper->connector_count*sizeof(offsets[0]));
-
-		if (!drm_target_cloned(fb_helper, modes, offsets,
-				       enabled, width, height) &&
-		    !drm_target_preferred(fb_helper, modes, offsets,
-					  enabled, width, height))
-			DRM_ERROR("Unable to find initial modes\n");
-
-		DRM_DEBUG_KMS("picking CRTCs for %dx%d config\n",
-			      width, height);
-
-		drm_pick_crtcs(fb_helper, crtcs, modes, 0, width, height);
-	}
-	mutex_unlock(&fb_helper->dev->mode_config.mutex);
-
-	/* need to set the modesets up here for use later */
-	/* fill out the connector<->crtc mappings into the modesets */
-	for (i = 0; i < fb_helper->crtc_count; i++)
-		drm_fb_helper_modeset_release(fb_helper,
-					      &fb_helper->crtc_info[i].mode_set);
-
-	fb_helper->sw_rotations = 0;
-	drm_fb_helper_for_each_connector(fb_helper, i) {
-		struct drm_display_mode *mode = modes[i];
-		struct drm_fb_helper_crtc *fb_crtc = crtcs[i];
-		struct drm_fb_offset *offset = &offsets[i];
-
-		if (mode && fb_crtc) {
-			struct drm_mode_set *modeset = &fb_crtc->mode_set;
-			struct drm_connector *connector =
-				fb_helper->connector_info[i]->connector;
-
-			DRM_DEBUG_KMS("desired mode %s set on crtc %d (%d,%d)\n",
-				      mode->name, fb_crtc->mode_set.crtc->base.id, offset->x, offset->y);
-
-			fb_crtc->desired_mode = mode;
-			fb_crtc->x = offset->x;
-			fb_crtc->y = offset->y;
-			modeset->mode = drm_mode_duplicate(dev,
-							   fb_crtc->desired_mode);
-			drm_connector_get(connector);
-			drm_setup_crtc_rotation(fb_helper, fb_crtc, connector);
-			modeset->connectors[modeset->num_connectors++] = connector;
-			modeset->x = offset->x;
-			modeset->y = offset->y;
-		}
-	}
-out:
-	kfree(crtcs);
-	kfree(modes);
-	kfree(offsets);
-	kfree(enabled);
-}
-
 /*
  * This is a continuation of drm_setup_crtcs() that sets up anything related
  * to the framebuffer. During initialization, drm_setup_crtcs() is called before
@@ -2847,17 +1708,30 @@ out:
  */
 static void drm_setup_crtcs_fb(struct drm_fb_helper *fb_helper)
 {
+	struct drm_client_dev *client = &fb_helper->client;
+	struct drm_connector_list_iter conn_iter;
 	struct fb_info *info = fb_helper->fbdev;
-	int i;
+	unsigned int rotation, sw_rotations = 0;
+	struct drm_connector *connector;
+	struct drm_mode_set *modeset;
+
+	mutex_lock(&client->modeset_mutex);
+	drm_client_for_each_modeset(modeset, client) {
+		if (!modeset->num_connectors)
+			continue;
 
-	for (i = 0; i < fb_helper->crtc_count; i++)
-		if (fb_helper->crtc_info[i].mode_set.num_connectors)
-			fb_helper->crtc_info[i].mode_set.fb = fb_helper->fb;
+		modeset->fb = fb_helper->fb;
+
+		if (drm_client_rotation(modeset, &rotation))
+			/* Rotating in hardware, fbcon should not rotate */
+			sw_rotations |= DRM_MODE_ROTATE_0;
+		else
+			sw_rotations |= rotation;
+	}
+	mutex_unlock(&client->modeset_mutex);
 
-	mutex_lock(&fb_helper->dev->mode_config.mutex);
-	drm_fb_helper_for_each_connector(fb_helper, i) {
-		struct drm_connector *connector =
-					fb_helper->connector_info[i]->connector;
+	drm_connector_list_iter_begin(fb_helper->dev, &conn_iter);
+	drm_client_for_each_connector_iter(connector, &conn_iter) {
 
 		/* use first connected connector for the physical dimensions */
 		if (connector->status == connector_status_connected) {
@@ -2866,9 +1740,9 @@ static void drm_setup_crtcs_fb(struct drm_fb_helper *fb_helper)
 			break;
 		}
 	}
-	mutex_unlock(&fb_helper->dev->mode_config.mutex);
+	drm_connector_list_iter_end(&conn_iter);
 
-	switch (fb_helper->sw_rotations) {
+	switch (sw_rotations) {
 	case DRM_MODE_ROTATE_0:
 		info->fbcon_rotate_hint = FB_ROTATE_UR;
 		break;
@@ -2904,7 +1778,7 @@ __drm_fb_helper_initial_config_and_unlock(struct drm_fb_helper *fb_helper,
 	width = dev->mode_config.max_width;
 	height = dev->mode_config.max_height;
 
-	drm_setup_crtcs(fb_helper, width, height);
+	drm_client_modeset_probe(&fb_helper->client, width, height);
 	ret = drm_fb_helper_single_fb_probe(fb_helper, bpp_sel);
 	if (ret < 0) {
 		if (ret == -EAGAIN) {
@@ -3041,15 +1915,17 @@ int drm_fb_helper_hotplug_event(struct drm_fb_helper *fb_helper)
 		return err;
 	}
 
-	if (!fb_helper->fb || !drm_fb_helper_is_bound(fb_helper)) {
+	if (!fb_helper->fb || !drm_master_internal_acquire(fb_helper->dev)) {
 		fb_helper->delayed_hotplug = true;
 		mutex_unlock(&fb_helper->lock);
 		return err;
 	}
 
+	drm_master_internal_release(fb_helper->dev);
+
 	DRM_DEBUG_KMS("\n");
 
-	drm_setup_crtcs(fb_helper, fb_helper->fb->width, fb_helper->fb->height);
+	drm_client_modeset_probe(&fb_helper->client, fb_helper->fb->width, fb_helper->fb->height);
 	drm_setup_crtcs_fb(fb_helper);
 	mutex_unlock(&fb_helper->lock);
 
@@ -3066,8 +1942,7 @@ EXPORT_SYMBOL(drm_fb_helper_hotplug_event);
  * @funcs: fbdev helper functions
  * @preferred_bpp: Preferred bits per pixel for the device.
  *                 @dev->mode_config.preferred_depth is used if this is zero.
- * @max_conn_count: Maximum number of connectors.
- *                  @dev->mode_config.num_connector is used if this is zero.
+ * @max_conn_count: Maximum number of connectors (not used)
  *
  * This function sets up fbdev emulation and registers fbdev for access by
  * userspace. If all connectors are disconnected, setup is deferred to the next
@@ -3095,27 +1970,14 @@ int drm_fb_helper_fbdev_setup(struct drm_device *dev,
 	if (!preferred_bpp)
 		preferred_bpp = 32;
 
-	if (!max_conn_count)
-		max_conn_count = dev->mode_config.num_connector;
-	if (!max_conn_count) {
-		DRM_DEV_ERROR(dev->dev, "fbdev: No connectors\n");
-		return -EINVAL;
-	}
-
 	drm_fb_helper_prepare(dev, fb_helper, funcs);
 
-	ret = drm_fb_helper_init(dev, fb_helper, max_conn_count);
+	ret = drm_fb_helper_init(dev, fb_helper, 0);
 	if (ret < 0) {
 		DRM_DEV_ERROR(dev->dev, "fbdev: Failed to initialize (ret=%d)\n", ret);
 		return ret;
 	}
 
-	ret = drm_fb_helper_single_add_all_connectors(fb_helper);
-	if (ret < 0) {
-		DRM_DEV_ERROR(dev->dev, "fbdev: Failed to add connectors (ret=%d)\n", ret);
-		goto err_drm_fb_helper_fini;
-	}
-
 	if (!drm_drv_uses_atomic_modeset(dev))
 		drm_helper_disable_unused_functions(dev);
 
@@ -3417,14 +2279,10 @@ static int drm_fbdev_client_hotplug(struct drm_client_dev *client)
 
 	drm_fb_helper_prepare(dev, fb_helper, &drm_fb_helper_generic_funcs);
 
-	ret = drm_fb_helper_init(dev, fb_helper, dev->mode_config.num_connector);
+	ret = drm_fb_helper_init(dev, fb_helper, 0);
 	if (ret)
 		goto err;
 
-	ret = drm_fb_helper_single_add_all_connectors(fb_helper);
-	if (ret)
-		goto err_cleanup;
-
 	if (!drm_drv_uses_atomic_modeset(dev))
 		drm_helper_disable_unused_functions(dev);
 
diff --git a/drivers/gpu/drm/drm_file.c b/drivers/gpu/drm/drm_file.c
index 233f114d2186..754af25fe255 100644
--- a/drivers/gpu/drm/drm_file.c
+++ b/drivers/gpu/drm/drm_file.c
@@ -31,17 +31,20 @@
  * OTHER DEALINGS IN THE SOFTWARE.
  */
 
+#include <linux/dma-fence.h>
+#include <linux/module.h>
+#include <linux/pci.h>
 #include <linux/poll.h>
 #include <linux/slab.h>
-#include <linux/module.h>
 
 #include <drm/drm_client.h>
+#include <drm/drm_drv.h>
 #include <drm/drm_file.h>
-#include <drm/drmP.h>
+#include <drm/drm_print.h>
 
-#include "drm_legacy.h"
-#include "drm_internal.h"
 #include "drm_crtc_internal.h"
+#include "drm_internal.h"
+#include "drm_legacy.h"
 
 /* from BKL pushdown */
 DEFINE_MUTEX(drm_global_mutex);
@@ -100,8 +103,6 @@ DEFINE_MUTEX(drm_global_mutex);
  * :ref:`IOCTL support in the userland interfaces chapter<drm_driver_ioctl>`.
  */
 
-static int drm_open_helper(struct file *filp, struct drm_minor *minor);
-
 /**
  * drm_file_alloc - allocate file context
  * @minor: minor to allocate on
@@ -273,76 +274,6 @@ static void drm_close_helper(struct file *filp)
 	drm_file_free(file_priv);
 }
 
-static int drm_setup(struct drm_device * dev)
-{
-	int ret;
-
-	if (dev->driver->firstopen &&
-	    drm_core_check_feature(dev, DRIVER_LEGACY)) {
-		ret = dev->driver->firstopen(dev);
-		if (ret != 0)
-			return ret;
-	}
-
-	ret = drm_legacy_dma_setup(dev);
-	if (ret < 0)
-		return ret;
-
-
-	DRM_DEBUG("\n");
-	return 0;
-}
-
-/**
- * drm_open - open method for DRM file
- * @inode: device inode
- * @filp: file pointer.
- *
- * This function must be used by drivers as their &file_operations.open method.
- * It looks up the correct DRM device and instantiates all the per-file
- * resources for it. It also calls the &drm_driver.open driver callback.
- *
- * RETURNS:
- *
- * 0 on success or negative errno value on falure.
- */
-int drm_open(struct inode *inode, struct file *filp)
-{
-	struct drm_device *dev;
-	struct drm_minor *minor;
-	int retcode;
-	int need_setup = 0;
-
-	minor = drm_minor_acquire(iminor(inode));
-	if (IS_ERR(minor))
-		return PTR_ERR(minor);
-
-	dev = minor->dev;
-	if (!dev->open_count++)
-		need_setup = 1;
-
-	/* share address_space across all char-devs of a single device */
-	filp->f_mapping = dev->anon_inode->i_mapping;
-
-	retcode = drm_open_helper(filp, minor);
-	if (retcode)
-		goto err_undo;
-	if (need_setup) {
-		retcode = drm_setup(dev);
-		if (retcode) {
-			drm_close_helper(filp);
-			goto err_undo;
-		}
-	}
-	return 0;
-
-err_undo:
-	dev->open_count--;
-	drm_minor_release(minor);
-	return retcode;
-}
-EXPORT_SYMBOL(drm_open);
-
 /*
  * Check whether DRI will run on this CPU.
  *
@@ -424,6 +355,56 @@ static int drm_open_helper(struct file *filp, struct drm_minor *minor)
 	return 0;
 }
 
+/**
+ * drm_open - open method for DRM file
+ * @inode: device inode
+ * @filp: file pointer.
+ *
+ * This function must be used by drivers as their &file_operations.open method.
+ * It looks up the correct DRM device and instantiates all the per-file
+ * resources for it. It also calls the &drm_driver.open driver callback.
+ *
+ * RETURNS:
+ *
+ * 0 on success or negative errno value on falure.
+ */
+int drm_open(struct inode *inode, struct file *filp)
+{
+	struct drm_device *dev;
+	struct drm_minor *minor;
+	int retcode;
+	int need_setup = 0;
+
+	minor = drm_minor_acquire(iminor(inode));
+	if (IS_ERR(minor))
+		return PTR_ERR(minor);
+
+	dev = minor->dev;
+	if (!dev->open_count++)
+		need_setup = 1;
+
+	/* share address_space across all char-devs of a single device */
+	filp->f_mapping = dev->anon_inode->i_mapping;
+
+	retcode = drm_open_helper(filp, minor);
+	if (retcode)
+		goto err_undo;
+	if (need_setup) {
+		retcode = drm_legacy_setup(dev);
+		if (retcode) {
+			drm_close_helper(filp);
+			goto err_undo;
+		}
+	}
+	return 0;
+
+err_undo:
+	dev->open_count--;
+	drm_minor_release(minor);
+	return retcode;
+}
+EXPORT_SYMBOL(drm_open);
+
 void drm_lastclose(struct drm_device * dev)
 {
 	DRM_DEBUG("\n");
diff --git a/drivers/gpu/drm/drm_flip_work.c b/drivers/gpu/drm/drm_flip_work.c
index 3da3bf5af405..060b753881a2 100644
--- a/drivers/gpu/drm/drm_flip_work.c
+++ b/drivers/gpu/drm/drm_flip_work.c
@@ -21,9 +21,11 @@
  * SOFTWARE.
  */
 
-#include <drm/drmP.h>
-#include <drm/drm_util.h>
+#include <linux/slab.h>
+
 #include <drm/drm_flip_work.h>
+#include <drm/drm_print.h>
+#include <drm/drm_util.h>
 
 /**
  * drm_flip_work_allocate_task - allocate a flip-work task
diff --git a/drivers/gpu/drm/drm_format_helper.c b/drivers/gpu/drm/drm_format_helper.c
index a18da35145b7..0897cb9aeaff 100644
--- a/drivers/gpu/drm/drm_format_helper.c
+++ b/drivers/gpu/drm/drm_format_helper.c
@@ -36,7 +36,7 @@ static unsigned int clip_offset(struct drm_rect *clip,
 void drm_fb_memcpy(void *dst, void *vaddr, struct drm_framebuffer *fb,
 		   struct drm_rect *clip)
 {
-	unsigned int cpp = drm_format_plane_cpp(fb->format->format, 0);
+	unsigned int cpp = fb->format->cpp[0];
 	size_t len = (clip->x2 - clip->x1) * cpp;
 	unsigned int y, lines = clip->y2 - clip->y1;
 
@@ -63,7 +63,7 @@ void drm_fb_memcpy_dstclip(void __iomem *dst, void *vaddr,
 			   struct drm_framebuffer *fb,
 			   struct drm_rect *clip)
 {
-	unsigned int cpp = drm_format_plane_cpp(fb->format->format, 0);
+	unsigned int cpp = fb->format->cpp[0];
 	unsigned int offset = clip_offset(clip, fb->pitches[0], cpp);
 	size_t len = (clip->x2 - clip->x1) * cpp;
 	unsigned int y, lines = clip->y2 - clip->y1;
diff --git a/drivers/gpu/drm/drm_fourcc.c b/drivers/gpu/drm/drm_fourcc.c
index 6ea55fb4526d..c630064ccf41 100644
--- a/drivers/gpu/drm/drm_fourcc.c
+++ b/drivers/gpu/drm/drm_fourcc.c
@@ -27,7 +27,7 @@
 #include <linux/export.h>
 #include <linux/kernel.h>
 
-#include <drm/drmP.h>
+#include <drm/drm_device.h>
 #include <drm/drm_fourcc.h>
 
 static char printable_char(int c)
@@ -333,124 +333,6 @@ drm_get_format_info(struct drm_device *dev,
 EXPORT_SYMBOL(drm_get_format_info);
 
 /**
- * drm_format_num_planes - get the number of planes for format
- * @format: pixel format (DRM_FORMAT_*)
- *
- * Returns:
- * The number of planes used by the specified pixel format.
- */
-int drm_format_num_planes(uint32_t format)
-{
-	const struct drm_format_info *info;
-
-	info = drm_format_info(format);
-	return info ? info->num_planes : 1;
-}
-EXPORT_SYMBOL(drm_format_num_planes);
-
-/**
- * drm_format_plane_cpp - determine the bytes per pixel value
- * @format: pixel format (DRM_FORMAT_*)
- * @plane: plane index
- *
- * Returns:
- * The bytes per pixel value for the specified plane.
- */
-int drm_format_plane_cpp(uint32_t format, int plane)
-{
-	const struct drm_format_info *info;
-
-	info = drm_format_info(format);
-	if (!info || plane >= info->num_planes)
-		return 0;
-
-	return info->cpp[plane];
-}
-EXPORT_SYMBOL(drm_format_plane_cpp);
-
-/**
- * drm_format_horz_chroma_subsampling - get the horizontal chroma subsampling factor
- * @format: pixel format (DRM_FORMAT_*)
- *
- * Returns:
- * The horizontal chroma subsampling factor for the
- * specified pixel format.
- */
-int drm_format_horz_chroma_subsampling(uint32_t format)
-{
-	const struct drm_format_info *info;
-
-	info = drm_format_info(format);
-	return info ? info->hsub : 1;
-}
-EXPORT_SYMBOL(drm_format_horz_chroma_subsampling);
-
-/**
- * drm_format_vert_chroma_subsampling - get the vertical chroma subsampling factor
- * @format: pixel format (DRM_FORMAT_*)
- *
- * Returns:
- * The vertical chroma subsampling factor for the
- * specified pixel format.
- */
-int drm_format_vert_chroma_subsampling(uint32_t format)
-{
-	const struct drm_format_info *info;
-
-	info = drm_format_info(format);
-	return info ? info->vsub : 1;
-}
-EXPORT_SYMBOL(drm_format_vert_chroma_subsampling);
-
-/**
- * drm_format_plane_width - width of the plane given the first plane
- * @width: width of the first plane
- * @format: pixel format
- * @plane: plane index
- *
- * Returns:
- * The width of @plane, given that the width of the first plane is @width.
- */
-int drm_format_plane_width(int width, uint32_t format, int plane)
-{
-	const struct drm_format_info *info;
-
-	info = drm_format_info(format);
-	if (!info || plane >= info->num_planes)
-		return 0;
-
-	if (plane == 0)
-		return width;
-
-	return width / info->hsub;
-}
-EXPORT_SYMBOL(drm_format_plane_width);
-
-/**
- * drm_format_plane_height - height of the plane given the first plane
- * @height: height of the first plane
- * @format: pixel format
- * @plane: plane index
- *
- * Returns:
- * The height of @plane, given that the height of the first plane is @height.
- */
-int drm_format_plane_height(int height, uint32_t format, int plane)
-{
-	const struct drm_format_info *info;
-
-	info = drm_format_info(format);
-	if (!info || plane >= info->num_planes)
-		return 0;
-
-	if (plane == 0)
-		return height;
-
-	return height / info->vsub;
-}
-EXPORT_SYMBOL(drm_format_plane_height);
-
-/**
  * drm_format_info_block_width - width in pixels of block.
  * @info: pixel format info
  * @plane: plane index
diff --git a/drivers/gpu/drm/drm_framebuffer.c b/drivers/gpu/drm/drm_framebuffer.c
index d8d75e25f6fb..0b72468e8131 100644
--- a/drivers/gpu/drm/drm_framebuffer.c
+++ b/drivers/gpu/drm/drm_framebuffer.c
@@ -21,16 +21,21 @@
  */
 
 #include <linux/export.h>
-#include <drm/drmP.h>
-#include <drm/drm_auth.h>
-#include <drm/drm_framebuffer.h>
+#include <linux/uaccess.h>
+
 #include <drm/drm_atomic.h>
 #include <drm/drm_atomic_uapi.h>
+#include <drm/drm_auth.h>
+#include <drm/drm_debugfs.h>
+#include <drm/drm_drv.h>
+#include <drm/drm_file.h>
+#include <drm/drm_fourcc.h>
+#include <drm/drm_framebuffer.h>
 #include <drm/drm_print.h>
 #include <drm/drm_util.h>
 
-#include "drm_internal.h"
 #include "drm_crtc_internal.h"
+#include "drm_internal.h"
 
 /**
  * DOC: overview
diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c
index 50de138c89e0..a8c4468f03d9 100644
--- a/drivers/gpu/drm/drm_gem.c
+++ b/drivers/gpu/drm/drm_gem.c
@@ -38,10 +38,14 @@
 #include <linux/dma-buf.h>
 #include <linux/mem_encrypt.h>
 #include <linux/pagevec.h>
-#include <drm/drmP.h>
-#include <drm/drm_vma_manager.h>
+
+#include <drm/drm_device.h>
+#include <drm/drm_drv.h>
+#include <drm/drm_file.h>
 #include <drm/drm_gem.h>
 #include <drm/drm_print.h>
+#include <drm/drm_vma_manager.h>
+
 #include "drm_internal.h"
 
 /** @file drm_gem.c
@@ -1212,15 +1216,6 @@ void drm_gem_print_info(struct drm_printer *p, unsigned int indent,
 		obj->dev->driver->gem_print_info(p, indent, obj);
 }
 
-/**
- * drm_gem_pin - Pin backing buffer in memory
- * @obj: GEM object
- *
- * Make sure the backing buffer is pinned in memory.
- *
- * Returns:
- * 0 on success or a negative error code on failure.
- */
 int drm_gem_pin(struct drm_gem_object *obj)
 {
 	if (obj->funcs && obj->funcs->pin)
@@ -1230,14 +1225,7 @@ int drm_gem_pin(struct drm_gem_object *obj)
 	else
 		return 0;
 }
-EXPORT_SYMBOL(drm_gem_pin);
 
-/**
- * drm_gem_unpin - Unpin backing buffer from memory
- * @obj: GEM object
- *
- * Relax the requirement that the backing buffer is pinned in memory.
- */
 void drm_gem_unpin(struct drm_gem_object *obj)
 {
 	if (obj->funcs && obj->funcs->unpin)
@@ -1245,16 +1233,7 @@ void drm_gem_unpin(struct drm_gem_object *obj)
 	else if (obj->dev->driver->gem_prime_unpin)
 		obj->dev->driver->gem_prime_unpin(obj);
 }
-EXPORT_SYMBOL(drm_gem_unpin);
 
-/**
- * drm_gem_vmap - Map buffer into kernel virtual address space
- * @obj: GEM object
- *
- * Returns:
- * A virtual pointer to a newly created GEM object or an ERR_PTR-encoded negative
- * error code on failure.
- */
 void *drm_gem_vmap(struct drm_gem_object *obj)
 {
 	void *vaddr;
@@ -1271,13 +1250,7 @@ void *drm_gem_vmap(struct drm_gem_object *obj)
 
 	return vaddr;
 }
-EXPORT_SYMBOL(drm_gem_vmap);
 
-/**
- * drm_gem_vunmap - Remove buffer mapping from kernel virtual address space
- * @obj: GEM object
- * @vaddr: Virtual address (can be NULL)
- */
 void drm_gem_vunmap(struct drm_gem_object *obj, void *vaddr)
 {
 	if (!vaddr)
@@ -1288,7 +1261,6 @@ void drm_gem_vunmap(struct drm_gem_object *obj, void *vaddr)
 	else if (obj->dev->driver->gem_prime_vunmap)
 		obj->dev->driver->gem_prime_vunmap(obj, vaddr);
 }
-EXPORT_SYMBOL(drm_gem_vunmap);
 
 /**
  * drm_gem_lock_reservations - Sets up the ww context and acquires
diff --git a/drivers/gpu/drm/drm_gem_cma_helper.c b/drivers/gpu/drm/drm_gem_cma_helper.c
index a5c8850079f1..12e98fb28229 100644
--- a/drivers/gpu/drm/drm_gem_cma_helper.c
+++ b/drivers/gpu/drm/drm_gem_cma_helper.c
@@ -9,15 +9,16 @@
  * Copyright (c) 2011 Samsung Electronics Co., Ltd.
  */
 
-#include <linux/mm.h>
-#include <linux/slab.h>
-#include <linux/mutex.h>
-#include <linux/export.h>
 #include <linux/dma-buf.h>
 #include <linux/dma-mapping.h>
+#include <linux/export.h>
+#include <linux/mm.h>
+#include <linux/mutex.h>
+#include <linux/slab.h>
 
-#include <drm/drmP.h>
 #include <drm/drm.h>
+#include <drm/drm_device.h>
+#include <drm/drm_drv.h>
 #include <drm/drm_gem_cma_helper.h>
 #include <drm/drm_vma_manager.h>
 
diff --git a/drivers/gpu/drm/drm_gem_framebuffer_helper.c b/drivers/gpu/drm/drm_gem_framebuffer_helper.c
index ed985d9b6010..8fcbabf02dfd 100644
--- a/drivers/gpu/drm/drm_gem_framebuffer_helper.c
+++ b/drivers/gpu/drm/drm_gem_framebuffer_helper.c
@@ -10,7 +10,6 @@
 #include <linux/reservation.h>
 #include <linux/slab.h>
 
-#include <drm/drmP.h>
 #include <drm/drm_atomic.h>
 #include <drm/drm_atomic_uapi.h>
 #include <drm/drm_damage_helper.h>
@@ -281,6 +280,9 @@ EXPORT_SYMBOL_GPL(drm_gem_fb_create_with_dirty);
  * There is no need for &drm_plane_helper_funcs.cleanup_fb hook for simple
  * gem based framebuffer drivers which have their buffers always pinned in
  * memory.
+ *
+ * See drm_atomic_set_fence_for_plane() for a discussion of implicit and
+ * explicit fencing in atomic modeset updates.
  */
 int drm_gem_fb_prepare_fb(struct drm_plane *plane,
 			  struct drm_plane_state *state)
@@ -311,6 +313,9 @@ EXPORT_SYMBOL_GPL(drm_gem_fb_prepare_fb);
  * &dma_buf attached, extracts the exclusive fence and attaches it to plane
  * state for the atomic helper to wait on. Drivers can use this as their
  * &drm_simple_display_pipe_funcs.prepare_fb callback.
+ *
+ * See drm_atomic_set_fence_for_plane() for a discussion of implicit and
+ * explicit fencing in atomic modeset updates.
  */
 int drm_gem_fb_simple_display_pipe_prepare_fb(struct drm_simple_display_pipe *pipe,
 					      struct drm_plane_state *plane_state)
diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c b/drivers/gpu/drm/drm_gem_shmem_helper.c
index 1ee208c2c85e..472ea5d81f82 100644
--- a/drivers/gpu/drm/drm_gem_shmem_helper.c
+++ b/drivers/gpu/drm/drm_gem_shmem_helper.c
@@ -255,7 +255,8 @@ static void *drm_gem_shmem_vmap_locked(struct drm_gem_shmem_object *shmem)
 	if (obj->import_attach)
 		shmem->vaddr = dma_buf_vmap(obj->import_attach->dmabuf);
 	else
-		shmem->vaddr = vmap(shmem->pages, obj->size >> PAGE_SHIFT, VM_MAP, PAGE_KERNEL);
+		shmem->vaddr = vmap(shmem->pages, obj->size >> PAGE_SHIFT,
+				    VM_MAP, pgprot_writecombine(PAGE_KERNEL));
 
 	if (!shmem->vaddr) {
 		DRM_DEBUG_KMS("Failed to vmap pages\n");
diff --git a/drivers/gpu/drm/drm_gem_vram_helper.c b/drivers/gpu/drm/drm_gem_vram_helper.c
new file mode 100644
index 000000000000..4de782ca26b2
--- /dev/null
+++ b/drivers/gpu/drm/drm_gem_vram_helper.c
@@ -0,0 +1,641 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <drm/drm_gem_vram_helper.h>
+#include <drm/drm_device.h>
+#include <drm/drm_mode.h>
+#include <drm/drm_prime.h>
+#include <drm/drm_vram_mm_helper.h>
+#include <drm/ttm/ttm_page_alloc.h>
+
+/**
+ * DOC: overview
+ *
+ * This library provides a GEM buffer object that is backed by video RAM
+ * (VRAM). It can be used for framebuffer devices with dedicated memory.
+ */
+
+/*
+ * Buffer-objects helpers
+ */
+
+static void drm_gem_vram_cleanup(struct drm_gem_vram_object *gbo)
+{
+	/* We got here via ttm_bo_put(), which means that the
+	 * TTM buffer object in 'bo' has already been cleaned
+	 * up; only release the GEM object.
+	 */
+	drm_gem_object_release(&gbo->gem);
+}
+
+static void drm_gem_vram_destroy(struct drm_gem_vram_object *gbo)
+{
+	drm_gem_vram_cleanup(gbo);
+	kfree(gbo);
+}
+
+static void ttm_buffer_object_destroy(struct ttm_buffer_object *bo)
+{
+	struct drm_gem_vram_object *gbo = drm_gem_vram_of_bo(bo);
+
+	drm_gem_vram_destroy(gbo);
+}
+
+static void drm_gem_vram_placement(struct drm_gem_vram_object *gbo,
+				   unsigned long pl_flag)
+{
+	unsigned int i;
+	unsigned int c = 0;
+
+	gbo->placement.placement = gbo->placements;
+	gbo->placement.busy_placement = gbo->placements;
+
+	if (pl_flag & TTM_PL_FLAG_VRAM)
+		gbo->placements[c++].flags = TTM_PL_FLAG_WC |
+					     TTM_PL_FLAG_UNCACHED |
+					     TTM_PL_FLAG_VRAM;
+
+	if (pl_flag & TTM_PL_FLAG_SYSTEM)
+		gbo->placements[c++].flags = TTM_PL_MASK_CACHING |
+					     TTM_PL_FLAG_SYSTEM;
+
+	if (!c)
+		gbo->placements[c++].flags = TTM_PL_MASK_CACHING |
+					     TTM_PL_FLAG_SYSTEM;
+
+	gbo->placement.num_placement = c;
+	gbo->placement.num_busy_placement = c;
+
+	for (i = 0; i < c; ++i) {
+		gbo->placements[i].fpfn = 0;
+		gbo->placements[i].lpfn = 0;
+	}
+}
+
+static int drm_gem_vram_init(struct drm_device *dev,
+			     struct ttm_bo_device *bdev,
+			     struct drm_gem_vram_object *gbo,
+			     size_t size, unsigned long pg_align,
+			     bool interruptible)
+{
+	int ret;
+	size_t acc_size;
+
+	ret = drm_gem_object_init(dev, &gbo->gem, size);
+	if (ret)
+		return ret;
+
+	acc_size = ttm_bo_dma_acc_size(bdev, size, sizeof(*gbo));
+
+	gbo->bo.bdev = bdev;
+	drm_gem_vram_placement(gbo, TTM_PL_FLAG_VRAM | TTM_PL_FLAG_SYSTEM);
+
+	ret = ttm_bo_init(bdev, &gbo->bo, size, ttm_bo_type_device,
+			  &gbo->placement, pg_align, interruptible, acc_size,
+			  NULL, NULL, ttm_buffer_object_destroy);
+	if (ret)
+		goto err_drm_gem_object_release;
+
+	return 0;
+
+err_drm_gem_object_release:
+	drm_gem_object_release(&gbo->gem);
+	return ret;
+}
+
+/**
+ * drm_gem_vram_create() - Creates a VRAM-backed GEM object
+ * @dev:		the DRM device
+ * @bdev:		the TTM BO device backing the object
+ * @size:		the buffer size in bytes
+ * @pg_align:		the buffer's alignment in multiples of the page size
+ * @interruptible:	sleep interruptible if waiting for memory
+ *
+ * Returns:
+ * A new instance of &struct drm_gem_vram_object on success, or
+ * an ERR_PTR()-encoded error code otherwise.
+ */
+struct drm_gem_vram_object *drm_gem_vram_create(struct drm_device *dev,
+						struct ttm_bo_device *bdev,
+						size_t size,
+						unsigned long pg_align,
+						bool interruptible)
+{
+	struct drm_gem_vram_object *gbo;
+	int ret;
+
+	gbo = kzalloc(sizeof(*gbo), GFP_KERNEL);
+	if (!gbo)
+		return ERR_PTR(-ENOMEM);
+
+	ret = drm_gem_vram_init(dev, bdev, gbo, size, pg_align, interruptible);
+	if (ret < 0)
+		goto err_kfree;
+
+	return gbo;
+
+err_kfree:
+	kfree(gbo);
+	return ERR_PTR(ret);
+}
+EXPORT_SYMBOL(drm_gem_vram_create);
+
+/**
+ * drm_gem_vram_put() - Releases a reference to a VRAM-backed GEM object
+ * @gbo:	the GEM VRAM object
+ *
+ * See ttm_bo_put() for more information.
+ */
+void drm_gem_vram_put(struct drm_gem_vram_object *gbo)
+{
+	ttm_bo_put(&gbo->bo);
+}
+EXPORT_SYMBOL(drm_gem_vram_put);
+
+/**
+ * drm_gem_vram_mmap_offset() - Returns a GEM VRAM object's mmap offset
+ * @gbo:	the GEM VRAM object
+ *
+ * See drm_vma_node_offset_addr() for more information.
+ *
+ * Returns:
+ * The buffer object's offset for userspace mappings on success, or
+ * 0 if no offset is allocated.
+ */
+u64 drm_gem_vram_mmap_offset(struct drm_gem_vram_object *gbo)
+{
+	return drm_vma_node_offset_addr(&gbo->bo.vma_node);
+}
+EXPORT_SYMBOL(drm_gem_vram_mmap_offset);
+
+/**
+ * drm_gem_vram_offset() - \
+	Returns a GEM VRAM object's offset in video memory
+ * @gbo:	the GEM VRAM object
+ *
+ * This function returns the buffer object's offset in the device's video
+ * memory. The buffer object has to be pinned to %TTM_PL_VRAM.
+ *
+ * Returns:
+ * The buffer object's offset in video memory on success, or
+ * a negative errno code otherwise.
+ */
+s64 drm_gem_vram_offset(struct drm_gem_vram_object *gbo)
+{
+	if (WARN_ON_ONCE(!gbo->pin_count))
+		return (s64)-ENODEV;
+	return gbo->bo.offset;
+}
+EXPORT_SYMBOL(drm_gem_vram_offset);
+
+/**
+ * drm_gem_vram_pin() - Pins a GEM VRAM object in a region.
+ * @gbo:	the GEM VRAM object
+ * @pl_flag:	a bitmask of possible memory regions
+ *
+ * Pinning a buffer object ensures that it is not evicted from
+ * a memory region. A pinned buffer object has to be unpinned before
+ * it can be pinned to another region. If the pl_flag argument is 0,
+ * the buffer is pinned at its current location (video RAM or system
+ * memory).
+ *
+ * Returns:
+ * 0 on success, or
+ * a negative error code otherwise.
+ */
+int drm_gem_vram_pin(struct drm_gem_vram_object *gbo, unsigned long pl_flag)
+{
+	int i, ret;
+	struct ttm_operation_ctx ctx = { false, false };
+
+	ret = ttm_bo_reserve(&gbo->bo, true, false, NULL);
+	if (ret < 0)
+		return ret;
+
+	if (gbo->pin_count)
+		goto out;
+
+	if (pl_flag)
+		drm_gem_vram_placement(gbo, pl_flag);
+
+	for (i = 0; i < gbo->placement.num_placement; ++i)
+		gbo->placements[i].flags |= TTM_PL_FLAG_NO_EVICT;
+
+	ret = ttm_bo_validate(&gbo->bo, &gbo->placement, &ctx);
+	if (ret < 0)
+		goto err_ttm_bo_unreserve;
+
+out:
+	++gbo->pin_count;
+	ttm_bo_unreserve(&gbo->bo);
+
+	return 0;
+
+err_ttm_bo_unreserve:
+	ttm_bo_unreserve(&gbo->bo);
+	return ret;
+}
+EXPORT_SYMBOL(drm_gem_vram_pin);
+
+/**
+ * drm_gem_vram_unpin() - Unpins a GEM VRAM object
+ * @gbo:	the GEM VRAM object
+ *
+ * Returns:
+ * 0 on success, or
+ * a negative error code otherwise.
+ */
+int drm_gem_vram_unpin(struct drm_gem_vram_object *gbo)
+{
+	int i, ret;
+	struct ttm_operation_ctx ctx = { false, false };
+
+	ret = ttm_bo_reserve(&gbo->bo, true, false, NULL);
+	if (ret < 0)
+		return ret;
+
+	if (WARN_ON_ONCE(!gbo->pin_count))
+		goto out;
+
+	--gbo->pin_count;
+	if (gbo->pin_count)
+		goto out;
+
+	for (i = 0; i < gbo->placement.num_placement ; ++i)
+		gbo->placements[i].flags &= ~TTM_PL_FLAG_NO_EVICT;
+
+	ret = ttm_bo_validate(&gbo->bo, &gbo->placement, &ctx);
+	if (ret < 0)
+		goto err_ttm_bo_unreserve;
+
+out:
+	ttm_bo_unreserve(&gbo->bo);
+
+	return 0;
+
+err_ttm_bo_unreserve:
+	ttm_bo_unreserve(&gbo->bo);
+	return ret;
+}
+EXPORT_SYMBOL(drm_gem_vram_unpin);
+
+/**
+ * drm_gem_vram_kmap() - Maps a GEM VRAM object into kernel address space
+ * @gbo:	the GEM VRAM object
+ * @map:	establish a mapping if necessary
+ * @is_iomem:	returns true if the mapped memory is I/O memory, or false \
+	otherwise; can be NULL
+ *
+ * This function maps the buffer object into the kernel's address space
+ * or returns the current mapping. If the parameter map is false, the
+ * function only queries the current mapping, but does not establish a
+ * new one.
+ *
+ * Returns:
+ * The buffers virtual address if mapped, or
+ * NULL if not mapped, or
+ * an ERR_PTR()-encoded error code otherwise.
+ */
+void *drm_gem_vram_kmap(struct drm_gem_vram_object *gbo, bool map,
+			bool *is_iomem)
+{
+	int ret;
+	struct ttm_bo_kmap_obj *kmap = &gbo->kmap;
+
+	if (kmap->virtual || !map)
+		goto out;
+
+	ret = ttm_bo_kmap(&gbo->bo, 0, gbo->bo.num_pages, kmap);
+	if (ret)
+		return ERR_PTR(ret);
+
+out:
+	if (!is_iomem)
+		return kmap->virtual;
+	if (!kmap->virtual) {
+		*is_iomem = false;
+		return NULL;
+	}
+	return ttm_kmap_obj_virtual(kmap, is_iomem);
+}
+EXPORT_SYMBOL(drm_gem_vram_kmap);
+
+/**
+ * drm_gem_vram_kunmap() - Unmaps a GEM VRAM object
+ * @gbo:	the GEM VRAM object
+ */
+void drm_gem_vram_kunmap(struct drm_gem_vram_object *gbo)
+{
+	struct ttm_bo_kmap_obj *kmap = &gbo->kmap;
+
+	if (!kmap->virtual)
+		return;
+
+	ttm_bo_kunmap(kmap);
+	kmap->virtual = NULL;
+}
+EXPORT_SYMBOL(drm_gem_vram_kunmap);
+
+/**
+ * drm_gem_vram_fill_create_dumb() - \
+	Helper for implementing &struct drm_driver.dumb_create
+ * @file:		the DRM file
+ * @dev:		the DRM device
+ * @bdev:		the TTM BO device managing the buffer object
+ * @pg_align:		the buffer's alignment in multiples of the page size
+ * @interruptible:	sleep interruptible if waiting for memory
+ * @args:		the arguments as provided to \
+				&struct drm_driver.dumb_create
+ *
+ * This helper function fills &struct drm_mode_create_dumb, which is used
+ * by &struct drm_driver.dumb_create. Implementations of this interface
+ * should forwards their arguments to this helper, plus the driver-specific
+ * parameters.
+ *
+ * Returns:
+ * 0 on success, or
+ * a negative error code otherwise.
+ */
+int drm_gem_vram_fill_create_dumb(struct drm_file *file,
+				  struct drm_device *dev,
+				  struct ttm_bo_device *bdev,
+				  unsigned long pg_align,
+				  bool interruptible,
+				  struct drm_mode_create_dumb *args)
+{
+	size_t pitch, size;
+	struct drm_gem_vram_object *gbo;
+	int ret;
+	u32 handle;
+
+	pitch = args->width * ((args->bpp + 7) / 8);
+	size = pitch * args->height;
+
+	size = roundup(size, PAGE_SIZE);
+	if (!size)
+		return -EINVAL;
+
+	gbo = drm_gem_vram_create(dev, bdev, size, pg_align, interruptible);
+	if (IS_ERR(gbo))
+		return PTR_ERR(gbo);
+
+	ret = drm_gem_handle_create(file, &gbo->gem, &handle);
+	if (ret)
+		goto err_drm_gem_object_put_unlocked;
+
+	drm_gem_object_put_unlocked(&gbo->gem);
+
+	args->pitch = pitch;
+	args->size = size;
+	args->handle = handle;
+
+	return 0;
+
+err_drm_gem_object_put_unlocked:
+	drm_gem_object_put_unlocked(&gbo->gem);
+	return ret;
+}
+EXPORT_SYMBOL(drm_gem_vram_fill_create_dumb);
+
+/*
+ * Helpers for struct ttm_bo_driver
+ */
+
+static bool drm_is_gem_vram(struct ttm_buffer_object *bo)
+{
+	return (bo->destroy == ttm_buffer_object_destroy);
+}
+
+/**
+ * drm_gem_vram_bo_driver_evict_flags() - \
+	Implements &struct ttm_bo_driver.evict_flags
+ * @bo:	TTM buffer object. Refers to &struct drm_gem_vram_object.bo
+ * @pl:	TTM placement information.
+ */
+void drm_gem_vram_bo_driver_evict_flags(struct ttm_buffer_object *bo,
+					struct ttm_placement *pl)
+{
+	struct drm_gem_vram_object *gbo;
+
+	/* TTM may pass BOs that are not GEM VRAM BOs. */
+	if (!drm_is_gem_vram(bo))
+		return;
+
+	gbo = drm_gem_vram_of_bo(bo);
+	drm_gem_vram_placement(gbo, TTM_PL_FLAG_SYSTEM);
+	*pl = gbo->placement;
+}
+EXPORT_SYMBOL(drm_gem_vram_bo_driver_evict_flags);
+
+/**
+ * drm_gem_vram_bo_driver_verify_access() - \
+	Implements &struct ttm_bo_driver.verify_access
+ * @bo:		TTM buffer object. Refers to &struct drm_gem_vram_object.bo
+ * @filp:	File pointer.
+ *
+ * Returns:
+ * 0 on success, or
+ * a negative errno code otherwise.
+ */
+int drm_gem_vram_bo_driver_verify_access(struct ttm_buffer_object *bo,
+					 struct file *filp)
+{
+	struct drm_gem_vram_object *gbo = drm_gem_vram_of_bo(bo);
+
+	return drm_vma_node_verify_access(&gbo->gem.vma_node,
+					  filp->private_data);
+}
+EXPORT_SYMBOL(drm_gem_vram_bo_driver_verify_access);
+
+/*
+ * drm_gem_vram_mm_funcs - Functions for &struct drm_vram_mm
+ *
+ * Most users of @struct drm_gem_vram_object will also use
+ * @struct drm_vram_mm. This instance of &struct drm_vram_mm_funcs
+ * can be used to connect both.
+ */
+const struct drm_vram_mm_funcs drm_gem_vram_mm_funcs = {
+	.evict_flags = drm_gem_vram_bo_driver_evict_flags,
+	.verify_access = drm_gem_vram_bo_driver_verify_access
+};
+EXPORT_SYMBOL(drm_gem_vram_mm_funcs);
+
+/*
+ * Helpers for struct drm_driver
+ */
+
+/**
+ * drm_gem_vram_driver_gem_free_object_unlocked() - \
+	Implements &struct drm_driver.gem_free_object_unlocked
+ * @gem:	GEM object. Refers to &struct drm_gem_vram_object.gem
+ */
+void drm_gem_vram_driver_gem_free_object_unlocked(struct drm_gem_object *gem)
+{
+	struct drm_gem_vram_object *gbo = drm_gem_vram_of_gem(gem);
+
+	drm_gem_vram_put(gbo);
+}
+EXPORT_SYMBOL(drm_gem_vram_driver_gem_free_object_unlocked);
+
+/**
+ * drm_gem_vram_driver_create_dumb() - \
+	Implements &struct drm_driver.dumb_create
+ * @file:		the DRM file
+ * @dev:		the DRM device
+ * @args:		the arguments as provided to \
+				&struct drm_driver.dumb_create
+ *
+ * This function requires the driver to use @drm_device.vram_mm for its
+ * instance of VRAM MM.
+ *
+ * Returns:
+ * 0 on success, or
+ * a negative error code otherwise.
+ */
+int drm_gem_vram_driver_dumb_create(struct drm_file *file,
+				    struct drm_device *dev,
+				    struct drm_mode_create_dumb *args)
+{
+	if (WARN_ONCE(!dev->vram_mm, "VRAM MM not initialized"))
+		return -EINVAL;
+
+	return drm_gem_vram_fill_create_dumb(file, dev, &dev->vram_mm->bdev, 0,
+					     false, args);
+}
+EXPORT_SYMBOL(drm_gem_vram_driver_dumb_create);
+
+/**
+ * drm_gem_vram_driver_dumb_mmap_offset() - \
+	Implements &struct drm_driver.dumb_mmap_offset
+ * @file:	DRM file pointer.
+ * @dev:	DRM device.
+ * @handle:	GEM handle
+ * @offset:	Returns the mapping's memory offset on success
+ *
+ * Returns:
+ * 0 on success, or
+ * a negative errno code otherwise.
+ */
+int drm_gem_vram_driver_dumb_mmap_offset(struct drm_file *file,
+					 struct drm_device *dev,
+					 uint32_t handle, uint64_t *offset)
+{
+	struct drm_gem_object *gem;
+	struct drm_gem_vram_object *gbo;
+
+	gem = drm_gem_object_lookup(file, handle);
+	if (!gem)
+		return -ENOENT;
+
+	gbo = drm_gem_vram_of_gem(gem);
+	*offset = drm_gem_vram_mmap_offset(gbo);
+
+	drm_gem_object_put_unlocked(gem);
+
+	return 0;
+}
+EXPORT_SYMBOL(drm_gem_vram_driver_dumb_mmap_offset);
+
+/*
+ * PRIME helpers for struct drm_driver
+ */
+
+/**
+ * drm_gem_vram_driver_gem_prime_pin() - \
+	Implements &struct drm_driver.gem_prime_pin
+ * @gem:	The GEM object to pin
+ *
+ * Returns:
+ * 0 on success, or
+ * a negative errno code otherwise.
+ */
+int drm_gem_vram_driver_gem_prime_pin(struct drm_gem_object *gem)
+{
+	struct drm_gem_vram_object *gbo = drm_gem_vram_of_gem(gem);
+
+	/* Fbdev console emulation is the use case of these PRIME
+	 * helpers. This may involve updating a hardware buffer from
+	 * a shadow FB. We pin the buffer to it's current location
+	 * (either video RAM or system memory) to prevent it from
+	 * being relocated during the update operation. If you require
+	 * the buffer to be pinned to VRAM, implement a callback that
+	 * sets the flags accordingly.
+	 */
+	return drm_gem_vram_pin(gbo, 0);
+}
+EXPORT_SYMBOL(drm_gem_vram_driver_gem_prime_pin);
+
+/**
+ * drm_gem_vram_driver_gem_prime_unpin() - \
+	Implements &struct drm_driver.gem_prime_unpin
+ * @gem:	The GEM object to unpin
+ */
+void drm_gem_vram_driver_gem_prime_unpin(struct drm_gem_object *gem)
+{
+	struct drm_gem_vram_object *gbo = drm_gem_vram_of_gem(gem);
+
+	drm_gem_vram_unpin(gbo);
+}
+EXPORT_SYMBOL(drm_gem_vram_driver_gem_prime_unpin);
+
+/**
+ * drm_gem_vram_driver_gem_prime_vmap() - \
+	Implements &struct drm_driver.gem_prime_vmap
+ * @gem:	The GEM object to map
+ *
+ * Returns:
+ * The buffers virtual address on success, or
+ * NULL otherwise.
+ */
+void *drm_gem_vram_driver_gem_prime_vmap(struct drm_gem_object *gem)
+{
+	struct drm_gem_vram_object *gbo = drm_gem_vram_of_gem(gem);
+	int ret;
+	void *base;
+
+	ret = drm_gem_vram_pin(gbo, 0);
+	if (ret)
+		return NULL;
+	base = drm_gem_vram_kmap(gbo, true, NULL);
+	if (IS_ERR(base)) {
+		drm_gem_vram_unpin(gbo);
+		return NULL;
+	}
+	return base;
+}
+EXPORT_SYMBOL(drm_gem_vram_driver_gem_prime_vmap);
+
+/**
+ * drm_gem_vram_driver_gem_prime_vunmap() - \
+	Implements &struct drm_driver.gem_prime_vunmap
+ * @gem:	The GEM object to unmap
+ * @vaddr:	The mapping's base address
+ */
+void drm_gem_vram_driver_gem_prime_vunmap(struct drm_gem_object *gem,
+					  void *vaddr)
+{
+	struct drm_gem_vram_object *gbo = drm_gem_vram_of_gem(gem);
+
+	drm_gem_vram_kunmap(gbo);
+	drm_gem_vram_unpin(gbo);
+}
+EXPORT_SYMBOL(drm_gem_vram_driver_gem_prime_vunmap);
+
+/**
+ * drm_gem_vram_driver_gem_prime_mmap() - \
+	Implements &struct drm_driver.gem_prime_mmap
+ * @gem:	The GEM object to map
+ * @vma:	The VMA describing the mapping
+ *
+ * Returns:
+ * 0 on success, or
+ * a negative errno code otherwise.
+ */
+int drm_gem_vram_driver_gem_prime_mmap(struct drm_gem_object *gem,
+				       struct vm_area_struct *vma)
+{
+	struct drm_gem_vram_object *gbo = drm_gem_vram_of_gem(gem);
+
+	gbo->gem.vma_node.vm_node.start = gbo->bo.vma_node.vm_node.start;
+	return drm_gem_prime_mmap(gem, vma);
+}
+EXPORT_SYMBOL(drm_gem_vram_driver_gem_prime_mmap);
diff --git a/drivers/gpu/drm/drm_hashtab.c b/drivers/gpu/drm/drm_hashtab.c
index c92b00d42ece..c50fa6f0709f 100644
--- a/drivers/gpu/drm/drm_hashtab.c
+++ b/drivers/gpu/drm/drm_hashtab.c
@@ -32,11 +32,15 @@
  * Thomas Hellström <thomas-at-tungstengraphics-dot-com>
  */
 
-#include <drm/drmP.h>
-#include <drm/drm_hashtab.h>
+#include <linux/export.h>
 #include <linux/hash.h>
+#include <linux/mm.h>
+#include <linux/rculist.h>
 #include <linux/slab.h>
-#include <linux/export.h>
+#include <linux/vmalloc.h>
+
+#include <drm/drm_hashtab.h>
+#include <drm/drm_print.h>
 
 int drm_ht_create(struct drm_open_hash *ht, unsigned int order)
 {
diff --git a/drivers/gpu/drm/drm_hdcp.c b/drivers/gpu/drm/drm_hdcp.c
new file mode 100644
index 000000000000..cd837bd409f7
--- /dev/null
+++ b/drivers/gpu/drm/drm_hdcp.c
@@ -0,0 +1,382 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019 Intel Corporation.
+ *
+ * Authors:
+ * Ramalingam C <ramalingam.c@intel.com>
+ */
+
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/gfp.h>
+#include <linux/export.h>
+#include <linux/slab.h>
+#include <linux/firmware.h>
+
+#include <drm/drm_hdcp.h>
+#include <drm/drm_sysfs.h>
+#include <drm/drm_print.h>
+#include <drm/drm_device.h>
+#include <drm/drm_property.h>
+#include <drm/drm_mode_object.h>
+#include <drm/drm_connector.h>
+
+#include "drm_internal.h"
+
+static struct hdcp_srm {
+	u32 revoked_ksv_cnt;
+	u8 *revoked_ksv_list;
+
+	/* Mutex to protect above struct member */
+	struct mutex mutex;
+} *srm_data;
+
+static inline void drm_hdcp_print_ksv(const u8 *ksv)
+{
+	DRM_DEBUG("\t%#02x, %#02x, %#02x, %#02x, %#02x\n",
+		  ksv[0], ksv[1], ksv[2], ksv[3], ksv[4]);
+}
+
+static u32 drm_hdcp_get_revoked_ksv_count(const u8 *buf, u32 vrls_length)
+{
+	u32 parsed_bytes = 0, ksv_count = 0, vrl_ksv_cnt, vrl_sz;
+
+	while (parsed_bytes < vrls_length) {
+		vrl_ksv_cnt = *buf;
+		ksv_count += vrl_ksv_cnt;
+
+		vrl_sz = (vrl_ksv_cnt * DRM_HDCP_KSV_LEN) + 1;
+		buf += vrl_sz;
+		parsed_bytes += vrl_sz;
+	}
+
+	/*
+	 * When vrls are not valid, ksvs are not considered.
+	 * Hence SRM will be discarded.
+	 */
+	if (parsed_bytes != vrls_length)
+		ksv_count = 0;
+
+	return ksv_count;
+}
+
+static u32 drm_hdcp_get_revoked_ksvs(const u8 *buf, u8 *revoked_ksv_list,
+				     u32 vrls_length)
+{
+	u32 parsed_bytes = 0, ksv_count = 0;
+	u32 vrl_ksv_cnt, vrl_ksv_sz, vrl_idx = 0;
+
+	do {
+		vrl_ksv_cnt = *buf;
+		vrl_ksv_sz = vrl_ksv_cnt * DRM_HDCP_KSV_LEN;
+
+		buf++;
+
+		DRM_DEBUG("vrl: %d, Revoked KSVs: %d\n", vrl_idx++,
+			  vrl_ksv_cnt);
+		memcpy(revoked_ksv_list, buf, vrl_ksv_sz);
+
+		ksv_count += vrl_ksv_cnt;
+		revoked_ksv_list += vrl_ksv_sz;
+		buf += vrl_ksv_sz;
+
+		parsed_bytes += (vrl_ksv_sz + 1);
+	} while (parsed_bytes < vrls_length);
+
+	return ksv_count;
+}
+
+static inline u32 get_vrl_length(const u8 *buf)
+{
+	return drm_hdcp_be24_to_cpu(buf);
+}
+
+static int drm_hdcp_parse_hdcp1_srm(const u8 *buf, size_t count)
+{
+	struct hdcp_srm_header *header;
+	u32 vrl_length, ksv_count;
+
+	if (count < (sizeof(struct hdcp_srm_header) +
+	    DRM_HDCP_1_4_VRL_LENGTH_SIZE + DRM_HDCP_1_4_DCP_SIG_SIZE)) {
+		DRM_ERROR("Invalid blob length\n");
+		return -EINVAL;
+	}
+
+	header = (struct hdcp_srm_header *)buf;
+	DRM_DEBUG("SRM ID: 0x%x, SRM Ver: 0x%x, SRM Gen No: 0x%x\n",
+		  header->srm_id,
+		  be16_to_cpu(header->srm_version), header->srm_gen_no);
+
+	WARN_ON(header->reserved);
+
+	buf = buf + sizeof(*header);
+	vrl_length = get_vrl_length(buf);
+	if (count < (sizeof(struct hdcp_srm_header) + vrl_length) ||
+	    vrl_length < (DRM_HDCP_1_4_VRL_LENGTH_SIZE +
+			  DRM_HDCP_1_4_DCP_SIG_SIZE)) {
+		DRM_ERROR("Invalid blob length or vrl length\n");
+		return -EINVAL;
+	}
+
+	/* Length of the all vrls combined */
+	vrl_length -= (DRM_HDCP_1_4_VRL_LENGTH_SIZE +
+		       DRM_HDCP_1_4_DCP_SIG_SIZE);
+
+	if (!vrl_length) {
+		DRM_ERROR("No vrl found\n");
+		return -EINVAL;
+	}
+
+	buf += DRM_HDCP_1_4_VRL_LENGTH_SIZE;
+	ksv_count = drm_hdcp_get_revoked_ksv_count(buf, vrl_length);
+	if (!ksv_count) {
+		DRM_DEBUG("Revoked KSV count is 0\n");
+		return count;
+	}
+
+	kfree(srm_data->revoked_ksv_list);
+	srm_data->revoked_ksv_list = kcalloc(ksv_count, DRM_HDCP_KSV_LEN,
+					     GFP_KERNEL);
+	if (!srm_data->revoked_ksv_list) {
+		DRM_ERROR("Out of Memory\n");
+		return -ENOMEM;
+	}
+
+	if (drm_hdcp_get_revoked_ksvs(buf, srm_data->revoked_ksv_list,
+				      vrl_length) != ksv_count) {
+		srm_data->revoked_ksv_cnt = 0;
+		kfree(srm_data->revoked_ksv_list);
+		return -EINVAL;
+	}
+
+	srm_data->revoked_ksv_cnt = ksv_count;
+	return count;
+}
+
+static int drm_hdcp_parse_hdcp2_srm(const u8 *buf, size_t count)
+{
+	struct hdcp_srm_header *header;
+	u32 vrl_length, ksv_count, ksv_sz;
+
+	if (count < (sizeof(struct hdcp_srm_header) +
+	    DRM_HDCP_2_VRL_LENGTH_SIZE + DRM_HDCP_2_DCP_SIG_SIZE)) {
+		DRM_ERROR("Invalid blob length\n");
+		return -EINVAL;
+	}
+
+	header = (struct hdcp_srm_header *)buf;
+	DRM_DEBUG("SRM ID: 0x%x, SRM Ver: 0x%x, SRM Gen No: 0x%x\n",
+		  header->srm_id & DRM_HDCP_SRM_ID_MASK,
+		  be16_to_cpu(header->srm_version), header->srm_gen_no);
+
+	if (header->reserved)
+		return -EINVAL;
+
+	buf = buf + sizeof(*header);
+	vrl_length = get_vrl_length(buf);
+
+	if (count < (sizeof(struct hdcp_srm_header) + vrl_length) ||
+	    vrl_length < (DRM_HDCP_2_VRL_LENGTH_SIZE +
+	    DRM_HDCP_2_DCP_SIG_SIZE)) {
+		DRM_ERROR("Invalid blob length or vrl length\n");
+		return -EINVAL;
+	}
+
+	/* Length of the all vrls combined */
+	vrl_length -= (DRM_HDCP_2_VRL_LENGTH_SIZE +
+		       DRM_HDCP_2_DCP_SIG_SIZE);
+
+	if (!vrl_length) {
+		DRM_ERROR("No vrl found\n");
+		return -EINVAL;
+	}
+
+	buf += DRM_HDCP_2_VRL_LENGTH_SIZE;
+	ksv_count = (*buf << 2) | DRM_HDCP_2_KSV_COUNT_2_LSBITS(*(buf + 1));
+	if (!ksv_count) {
+		DRM_DEBUG("Revoked KSV count is 0\n");
+		return count;
+	}
+
+	kfree(srm_data->revoked_ksv_list);
+	srm_data->revoked_ksv_list = kcalloc(ksv_count, DRM_HDCP_KSV_LEN,
+					     GFP_KERNEL);
+	if (!srm_data->revoked_ksv_list) {
+		DRM_ERROR("Out of Memory\n");
+		return -ENOMEM;
+	}
+
+	ksv_sz = ksv_count * DRM_HDCP_KSV_LEN;
+	buf += DRM_HDCP_2_NO_OF_DEV_PLUS_RESERVED_SZ;
+
+	DRM_DEBUG("Revoked KSVs: %d\n", ksv_count);
+	memcpy(srm_data->revoked_ksv_list, buf, ksv_sz);
+
+	srm_data->revoked_ksv_cnt = ksv_count;
+	return count;
+}
+
+static inline bool is_srm_version_hdcp1(const u8 *buf)
+{
+	return *buf == (u8)(DRM_HDCP_1_4_SRM_ID << 4);
+}
+
+static inline bool is_srm_version_hdcp2(const u8 *buf)
+{
+	return *buf == (u8)(DRM_HDCP_2_SRM_ID << 4 | DRM_HDCP_2_INDICATOR);
+}
+
+static void drm_hdcp_srm_update(const u8 *buf, size_t count)
+{
+	if (count < sizeof(struct hdcp_srm_header))
+		return;
+
+	if (is_srm_version_hdcp1(buf))
+		drm_hdcp_parse_hdcp1_srm(buf, count);
+	else if (is_srm_version_hdcp2(buf))
+		drm_hdcp_parse_hdcp2_srm(buf, count);
+}
+
+static void drm_hdcp_request_srm(struct drm_device *drm_dev)
+{
+	char fw_name[36] = "display_hdcp_srm.bin";
+	const struct firmware *fw;
+
+	int ret;
+
+	ret = request_firmware_direct(&fw, (const char *)fw_name,
+				      drm_dev->dev);
+	if (ret < 0)
+		goto exit;
+
+	if (fw->size && fw->data)
+		drm_hdcp_srm_update(fw->data, fw->size);
+
+exit:
+	release_firmware(fw);
+}
+
+/**
+ * drm_hdcp_check_ksvs_revoked - Check the revoked status of the IDs
+ *
+ * @drm_dev: drm_device for which HDCP revocation check is requested
+ * @ksvs: List of KSVs (HDCP receiver IDs)
+ * @ksv_count: KSV count passed in through @ksvs
+ *
+ * This function reads the HDCP System renewability Message(SRM Table)
+ * from userspace as a firmware and parses it for the revoked HDCP
+ * KSVs(Receiver IDs) detected by DCP LLC. Once the revoked KSVs are known,
+ * revoked state of the KSVs in the list passed in by display drivers are
+ * decided and response is sent.
+ *
+ * SRM should be presented in the name of "display_hdcp_srm.bin".
+ *
+ * Returns:
+ * TRUE on any of the KSV is revoked, else FALSE.
+ */
+bool drm_hdcp_check_ksvs_revoked(struct drm_device *drm_dev, u8 *ksvs,
+				 u32 ksv_count)
+{
+	u32 rev_ksv_cnt, cnt, i, j;
+	u8 *rev_ksv_list;
+
+	if (!srm_data)
+		return false;
+
+	mutex_lock(&srm_data->mutex);
+	drm_hdcp_request_srm(drm_dev);
+
+	rev_ksv_cnt = srm_data->revoked_ksv_cnt;
+	rev_ksv_list = srm_data->revoked_ksv_list;
+
+	/* If the Revoked ksv list is empty */
+	if (!rev_ksv_cnt || !rev_ksv_list) {
+		mutex_unlock(&srm_data->mutex);
+		return false;
+	}
+
+	for  (cnt = 0; cnt < ksv_count; cnt++) {
+		rev_ksv_list = srm_data->revoked_ksv_list;
+		for (i = 0; i < rev_ksv_cnt; i++) {
+			for (j = 0; j < DRM_HDCP_KSV_LEN; j++)
+				if (ksvs[j] != rev_ksv_list[j]) {
+					break;
+				} else if (j == (DRM_HDCP_KSV_LEN - 1)) {
+					DRM_DEBUG("Revoked KSV is ");
+					drm_hdcp_print_ksv(ksvs);
+					mutex_unlock(&srm_data->mutex);
+					return true;
+				}
+			/* Move the offset to next KSV in the revoked list */
+			rev_ksv_list += DRM_HDCP_KSV_LEN;
+		}
+
+		/* Iterate to next ksv_offset */
+		ksvs += DRM_HDCP_KSV_LEN;
+	}
+	mutex_unlock(&srm_data->mutex);
+	return false;
+}
+EXPORT_SYMBOL_GPL(drm_hdcp_check_ksvs_revoked);
+
+int drm_setup_hdcp_srm(struct class *drm_class)
+{
+	srm_data = kzalloc(sizeof(*srm_data), GFP_KERNEL);
+	if (!srm_data)
+		return -ENOMEM;
+	mutex_init(&srm_data->mutex);
+
+	return 0;
+}
+
+void drm_teardown_hdcp_srm(struct class *drm_class)
+{
+	if (srm_data) {
+		kfree(srm_data->revoked_ksv_list);
+		kfree(srm_data);
+	}
+}
+
+static struct drm_prop_enum_list drm_cp_enum_list[] = {
+	{ DRM_MODE_CONTENT_PROTECTION_UNDESIRED, "Undesired" },
+	{ DRM_MODE_CONTENT_PROTECTION_DESIRED, "Desired" },
+	{ DRM_MODE_CONTENT_PROTECTION_ENABLED, "Enabled" },
+};
+DRM_ENUM_NAME_FN(drm_get_content_protection_name, drm_cp_enum_list)
+
+/**
+ * drm_connector_attach_content_protection_property - attach content protection
+ * property
+ *
+ * @connector: connector to attach CP property on.
+ *
+ * This is used to add support for content protection on select connectors.
+ * Content Protection is intentionally vague to allow for different underlying
+ * technologies, however it is most implemented by HDCP.
+ *
+ * The content protection will be set to &drm_connector_state.content_protection
+ *
+ * Returns:
+ * Zero on success, negative errno on failure.
+ */
+int drm_connector_attach_content_protection_property(
+		struct drm_connector *connector)
+{
+	struct drm_device *dev = connector->dev;
+	struct drm_property *prop =
+			dev->mode_config.content_protection_property;
+
+	if (!prop)
+		prop = drm_property_create_enum(dev, 0, "Content Protection",
+						drm_cp_enum_list,
+						ARRAY_SIZE(drm_cp_enum_list));
+	if (!prop)
+		return -ENOMEM;
+
+	drm_object_attach_property(&connector->base, prop,
+				   DRM_MODE_CONTENT_PROTECTION_UNDESIRED);
+	dev->mode_config.content_protection_property = prop;
+
+	return 0;
+}
+EXPORT_SYMBOL(drm_connector_attach_content_protection_property);
diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h
index e19ac7ca602d..51a2055c8f18 100644
--- a/drivers/gpu/drm/drm_internal.h
+++ b/drivers/gpu/drm/drm_internal.h
@@ -28,8 +28,16 @@
 
 #define DRM_IF_VERSION(maj, min) (maj << 16 | min)
 
-struct drm_prime_file_private;
+struct dentry;
 struct dma_buf;
+struct drm_connector;
+struct drm_crtc;
+struct drm_framebuffer;
+struct drm_gem_object;
+struct drm_master;
+struct drm_minor;
+struct drm_prime_file_private;
+struct drm_printer;
 
 /* drm_file.c */
 extern struct mutex drm_global_mutex;
@@ -93,6 +101,8 @@ int drm_dropmaster_ioctl(struct drm_device *dev, void *data,
 			 struct drm_file *file_priv);
 int drm_master_open(struct drm_file *file_priv);
 void drm_master_release(struct drm_file *file_priv);
+bool drm_master_internal_acquire(struct drm_device *dev);
+void drm_master_internal_release(struct drm_device *dev);
 
 /* drm_sysfs.c */
 extern struct class *drm_class;
@@ -106,6 +116,7 @@ void drm_sysfs_connector_remove(struct drm_connector *connector);
 void drm_sysfs_lease_event(struct drm_device *dev);
 
 /* drm_gem.c */
+struct drm_gem_object;
 int drm_gem_init(struct drm_device *dev);
 void drm_gem_destroy(struct drm_device *dev);
 int drm_gem_handle_create_tail(struct drm_file *file_priv,
@@ -122,16 +133,21 @@ void drm_gem_release(struct drm_device *dev, struct drm_file *file_private);
 void drm_gem_print_info(struct drm_printer *p, unsigned int indent,
 			const struct drm_gem_object *obj);
 
+int drm_gem_pin(struct drm_gem_object *obj);
+void drm_gem_unpin(struct drm_gem_object *obj);
+void *drm_gem_vmap(struct drm_gem_object *obj);
+void drm_gem_vunmap(struct drm_gem_object *obj, void *vaddr);
+
 /* drm_debugfs.c drm_debugfs_crc.c */
 #if defined(CONFIG_DEBUG_FS)
 int drm_debugfs_init(struct drm_minor *minor, int minor_id,
 		     struct dentry *root);
-int drm_debugfs_cleanup(struct drm_minor *minor);
-int drm_debugfs_connector_add(struct drm_connector *connector);
+void drm_debugfs_cleanup(struct drm_minor *minor);
+void drm_debugfs_connector_add(struct drm_connector *connector);
 void drm_debugfs_connector_remove(struct drm_connector *connector);
-int drm_debugfs_crtc_add(struct drm_crtc *crtc);
+void drm_debugfs_crtc_add(struct drm_crtc *crtc);
 void drm_debugfs_crtc_remove(struct drm_crtc *crtc);
-int drm_debugfs_crtc_crc_add(struct drm_crtc *crtc);
+void drm_debugfs_crtc_crc_add(struct drm_crtc *crtc);
 #else
 static inline int drm_debugfs_init(struct drm_minor *minor, int minor_id,
 				   struct dentry *root)
@@ -139,30 +155,26 @@ static inline int drm_debugfs_init(struct drm_minor *minor, int minor_id,
 	return 0;
 }
 
-static inline int drm_debugfs_cleanup(struct drm_minor *minor)
+static inline void drm_debugfs_cleanup(struct drm_minor *minor)
 {
-	return 0;
 }
 
-static inline int drm_debugfs_connector_add(struct drm_connector *connector)
+static inline void drm_debugfs_connector_add(struct drm_connector *connector)
 {
-	return 0;
 }
 static inline void drm_debugfs_connector_remove(struct drm_connector *connector)
 {
 }
 
-static inline int drm_debugfs_crtc_add(struct drm_crtc *crtc)
+static inline void drm_debugfs_crtc_add(struct drm_crtc *crtc)
 {
-	return 0;
 }
 static inline void drm_debugfs_crtc_remove(struct drm_crtc *crtc)
 {
 }
 
-static inline int drm_debugfs_crtc_crc_add(struct drm_crtc *crtc)
+static inline void drm_debugfs_crtc_crc_add(struct drm_crtc *crtc)
 {
-	return 0;
 }
 
 #endif
@@ -201,3 +213,7 @@ int drm_syncobj_query_ioctl(struct drm_device *dev, void *data,
 void drm_framebuffer_print_info(struct drm_printer *p, unsigned int indent,
 				const struct drm_framebuffer *fb);
 int drm_framebuffer_debugfs_init(struct drm_minor *minor);
+
+/* drm_hdcp.c */
+int drm_setup_hdcp_srm(struct class *drm_class);
+void drm_teardown_hdcp_srm(struct class *drm_class);
diff --git a/drivers/gpu/drm/drm_ioc32.c b/drivers/gpu/drm/drm_ioc32.c
index 374b372da58a..586aa28024c5 100644
--- a/drivers/gpu/drm/drm_ioc32.c
+++ b/drivers/gpu/drm/drm_ioc32.c
@@ -31,10 +31,13 @@
 #include <linux/ratelimit.h>
 #include <linux/export.h>
 
-#include <drm/drmP.h>
-#include "drm_legacy.h"
-#include "drm_internal.h"
+#include <drm/drm_agpsupport.h>
+#include <drm/drm_file.h>
+#include <drm/drm_print.h>
+
 #include "drm_crtc_internal.h"
+#include "drm_internal.h"
+#include "drm_legacy.h"
 
 #define DRM_IOCTL_VERSION32		DRM_IOWR(0x00, drm_version32_t)
 #define DRM_IOCTL_GET_UNIQUE32		DRM_IOWR(0x01, drm_unique32_t)
diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c
index 2263e3ddd822..9441a36a2469 100644
--- a/drivers/gpu/drm/drm_ioctl.c
+++ b/drivers/gpu/drm/drm_ioctl.c
@@ -28,16 +28,22 @@
  * OTHER DEALINGS IN THE SOFTWARE.
  */
 
-#include <drm/drm_ioctl.h>
-#include <drm/drmP.h>
-#include <drm/drm_auth.h>
-#include "drm_legacy.h"
-#include "drm_internal.h"
-#include "drm_crtc_internal.h"
-
-#include <linux/pci.h>
 #include <linux/export.h>
 #include <linux/nospec.h>
+#include <linux/pci.h>
+#include <linux/uaccess.h>
+
+#include <drm/drm_agpsupport.h>
+#include <drm/drm_auth.h>
+#include <drm/drm_crtc.h>
+#include <drm/drm_drv.h>
+#include <drm/drm_file.h>
+#include <drm/drm_ioctl.h>
+#include <drm/drm_print.h>
+
+#include "drm_crtc_internal.h"
+#include "drm_internal.h"
+#include "drm_legacy.h"
 
 /**
  * DOC: getunique and setversion story
diff --git a/drivers/gpu/drm/drm_irq.c b/drivers/gpu/drm/drm_irq.c
index 02f38cc9f468..03bce566a8c3 100644
--- a/drivers/gpu/drm/drm_irq.c
+++ b/drivers/gpu/drm/drm_irq.c
@@ -51,13 +51,18 @@
  * OTHER DEALINGS IN THE SOFTWARE.
  */
 
-#include <drm/drm_irq.h>
-#include <drm/drmP.h>
 
+#include <linux/export.h>
 #include <linux/interrupt.h>	/* For task queue support */
-
+#include <linux/pci.h>
 #include <linux/vgaarb.h>
-#include <linux/export.h>
+
+#include <drm/drm.h>
+#include <drm/drm_device.h>
+#include <drm/drm_drv.h>
+#include <drm/drm_irq.h>
+#include <drm/drm_print.h>
+#include <drm/drm_vblank.h>
 
 #include "drm_internal.h"
 
diff --git a/drivers/gpu/drm/drm_kms_helper_common.c b/drivers/gpu/drm/drm_kms_helper_common.c
index 9c5ae825c507..d9a5ac81949e 100644
--- a/drivers/gpu/drm/drm_kms_helper_common.c
+++ b/drivers/gpu/drm/drm_kms_helper_common.c
@@ -26,7 +26,8 @@
  */
 
 #include <linux/module.h>
-#include <drm/drmP.h>
+
+#include <drm/drm_print.h>
 
 #include "drm_crtc_helper_internal.h"
 
diff --git a/drivers/gpu/drm/drm_lease.c b/drivers/gpu/drm/drm_lease.c
index e8a5e3b13b2a..b481cafdde28 100644
--- a/drivers/gpu/drm/drm_lease.c
+++ b/drivers/gpu/drm/drm_lease.c
@@ -2,14 +2,19 @@
 /*
  * Copyright © 2017 Keith Packard <keithp@keithp.com>
  */
+#include <linux/file.h>
+#include <linux/uaccess.h>
 
-#include <drm/drmP.h>
-#include "drm_internal.h"
-#include "drm_legacy.h"
-#include "drm_crtc_internal.h"
-#include <drm/drm_lease.h>
 #include <drm/drm_auth.h>
 #include <drm/drm_crtc_helper.h>
+#include <drm/drm_drv.h>
+#include <drm/drm_file.h>
+#include <drm/drm_lease.h>
+#include <drm/drm_print.h>
+
+#include "drm_crtc_internal.h"
+#include "drm_internal.h"
+#include "drm_legacy.h"
 
 #define drm_for_each_lessee(lessee, lessor) \
 	list_for_each_entry((lessee), &(lessor)->lessees, lessee_list)
diff --git a/drivers/gpu/drm/drm_legacy.h b/drivers/gpu/drm/drm_legacy.h
index 51f1fabfa145..1be3ea320474 100644
--- a/drivers/gpu/drm/drm_legacy.h
+++ b/drivers/gpu/drm/drm_legacy.h
@@ -29,11 +29,15 @@
  * drivers use them, and removing them are API breaks.
  */
 #include <linux/list.h>
+
+#include <drm/drm.h>
+#include <drm/drm_device.h>
 #include <drm/drm_legacy.h>
 
 struct agp_memory;
 struct drm_device;
 struct drm_file;
+struct drm_buf_desc;
 
 /*
  * Generic DRM Contexts
@@ -187,10 +191,12 @@ int drm_legacy_sg_free(struct drm_device *dev, void *data,
 void drm_legacy_init_members(struct drm_device *dev);
 void drm_legacy_destroy_members(struct drm_device *dev);
 void drm_legacy_dev_reinit(struct drm_device *dev);
+int drm_legacy_setup(struct drm_device * dev);
 #else
 static inline void drm_legacy_init_members(struct drm_device *dev) {}
 static inline void drm_legacy_destroy_members(struct drm_device *dev) {}
 static inline void drm_legacy_dev_reinit(struct drm_device *dev) {}
+static inline int drm_legacy_setup(struct drm_device * dev) { return 0; }
 #endif
 
 #if IS_ENABLED(CONFIG_DRM_LEGACY)
diff --git a/drivers/gpu/drm/drm_legacy_misc.c b/drivers/gpu/drm/drm_legacy_misc.c
index 2fe786839ca8..4d3a11cfd979 100644
--- a/drivers/gpu/drm/drm_legacy_misc.c
+++ b/drivers/gpu/drm/drm_legacy_misc.c
@@ -33,7 +33,12 @@
  * OTHER DEALINGS IN THE SOFTWARE.
  */
 
-#include <drm/drmP.h>
+#include <drm/drm_agpsupport.h>
+#include <drm/drm_device.h>
+#include <drm/drm_drv.h>
+#include <drm/drm_irq.h>
+#include <drm/drm_print.h>
+
 #include "drm_internal.h"
 #include "drm_legacy.h"
 
@@ -51,6 +56,26 @@ void drm_legacy_destroy_members(struct drm_device *dev)
 	mutex_destroy(&dev->ctxlist_mutex);
 }
 
+int drm_legacy_setup(struct drm_device * dev)
+{
+	int ret;
+
+	if (dev->driver->firstopen &&
+	    drm_core_check_feature(dev, DRIVER_LEGACY)) {
+		ret = dev->driver->firstopen(dev);
+		if (ret != 0)
+			return ret;
+	}
+
+	ret = drm_legacy_dma_setup(dev);
+	if (ret < 0)
+		return ret;
+
+
+	DRM_DEBUG("\n");
+	return 0;
+}
+
 void drm_legacy_dev_reinit(struct drm_device *dev)
 {
 	if (dev->irq_enabled)
diff --git a/drivers/gpu/drm/drm_lock.c b/drivers/gpu/drm/drm_lock.c
index b70058e77a28..68b18b0e290c 100644
--- a/drivers/gpu/drm/drm_lock.c
+++ b/drivers/gpu/drm/drm_lock.c
@@ -36,9 +36,13 @@
 #include <linux/export.h>
 #include <linux/sched/signal.h>
 
-#include <drm/drmP.h>
-#include "drm_legacy.h"
+#include <drm/drm.h>
+#include <drm/drm_drv.h>
+#include <drm/drm_file.h>
+#include <drm/drm_print.h>
+
 #include "drm_internal.h"
+#include "drm_legacy.h"
 
 static int drm_lock_take(struct drm_lock_data *lock_data, unsigned int context);
 
diff --git a/drivers/gpu/drm/drm_memory.c b/drivers/gpu/drm/drm_memory.c
index 132fef8ff1b6..b634e1670190 100644
--- a/drivers/gpu/drm/drm_memory.c
+++ b/drivers/gpu/drm/drm_memory.c
@@ -33,10 +33,15 @@
  * OTHER DEALINGS IN THE SOFTWARE.
  */
 
-#include <linux/highmem.h>
 #include <linux/export.h>
+#include <linux/highmem.h>
+#include <linux/pci.h>
+#include <linux/vmalloc.h>
 #include <xen/xen.h>
-#include <drm/drmP.h>
+
+#include <drm/drm_agpsupport.h>
+#include <drm/drm_device.h>
+
 #include "drm_legacy.h"
 
 #if IS_ENABLED(CONFIG_AGP)
diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c
index 8b4cd31ce7bd..9a59865ce574 100644
--- a/drivers/gpu/drm/drm_mm.c
+++ b/drivers/gpu/drm/drm_mm.c
@@ -42,12 +42,13 @@
  * Thomas Hellström <thomas-at-tungstengraphics-dot-com>
  */
 
-#include <drm/drmP.h>
-#include <drm/drm_mm.h>
-#include <linux/slab.h>
-#include <linux/seq_file.h>
 #include <linux/export.h>
 #include <linux/interval_tree_generic.h>
+#include <linux/seq_file.h>
+#include <linux/slab.h>
+#include <linux/stacktrace.h>
+
+#include <drm/drm_mm.h>
 
 /**
  * DOC: Overview
diff --git a/drivers/gpu/drm/drm_mode_config.c b/drivers/gpu/drm/drm_mode_config.c
index 1a346ae1599d..7bc03c3c154f 100644
--- a/drivers/gpu/drm/drm_mode_config.c
+++ b/drivers/gpu/drm/drm_mode_config.c
@@ -20,9 +20,13 @@
  * OF THIS SOFTWARE.
  */
 
+#include <linux/uaccess.h>
+
+#include <drm/drm_drv.h>
 #include <drm/drm_encoder.h>
+#include <drm/drm_file.h>
 #include <drm/drm_mode_config.h>
-#include <drm/drmP.h>
+#include <drm/drm_print.h>
 
 #include "drm_crtc_internal.h"
 #include "drm_internal.h"
diff --git a/drivers/gpu/drm/drm_mode_object.c b/drivers/gpu/drm/drm_mode_object.c
index f32507e65b79..1c6e51135962 100644
--- a/drivers/gpu/drm/drm_mode_object.c
+++ b/drivers/gpu/drm/drm_mode_object.c
@@ -21,9 +21,14 @@
  */
 
 #include <linux/export.h>
-#include <drm/drmP.h>
-#include <drm/drm_mode_object.h>
+#include <linux/uaccess.h>
+
 #include <drm/drm_atomic.h>
+#include <drm/drm_drv.h>
+#include <drm/drm_device.h>
+#include <drm/drm_file.h>
+#include <drm/drm_mode_object.h>
+#include <drm/drm_print.h>
 
 #include "drm_crtc_internal.h"
 
diff --git a/drivers/gpu/drm/drm_modes.c b/drivers/gpu/drm/drm_modes.c
index 56f92a0bba62..57e6408288c8 100644
--- a/drivers/gpu/drm/drm_modes.c
+++ b/drivers/gpu/drm/drm_modes.c
@@ -30,14 +30,18 @@
  * authorization from the copyright holder(s) and author(s).
  */
 
+#include <linux/ctype.h>
 #include <linux/list.h>
 #include <linux/list_sort.h>
 #include <linux/export.h>
-#include <drm/drmP.h>
-#include <drm/drm_crtc.h>
+
 #include <video/of_videomode.h>
 #include <video/videomode.h>
+
+#include <drm/drm_crtc.h>
+#include <drm/drm_device.h>
 #include <drm/drm_modes.h>
+#include <drm/drm_print.h>
 
 #include "drm_crtc_internal.h"
 
@@ -1405,6 +1409,260 @@ void drm_connector_list_update(struct drm_connector *connector)
 }
 EXPORT_SYMBOL(drm_connector_list_update);
 
+static int drm_mode_parse_cmdline_bpp(const char *str, char **end_ptr,
+				      struct drm_cmdline_mode *mode)
+{
+	unsigned int bpp;
+
+	if (str[0] != '-')
+		return -EINVAL;
+
+	str++;
+	bpp = simple_strtol(str, end_ptr, 10);
+	if (*end_ptr == str)
+		return -EINVAL;
+
+	mode->bpp = bpp;
+	mode->bpp_specified = true;
+
+	return 0;
+}
+
+static int drm_mode_parse_cmdline_refresh(const char *str, char **end_ptr,
+					  struct drm_cmdline_mode *mode)
+{
+	unsigned int refresh;
+
+	if (str[0] != '@')
+		return -EINVAL;
+
+	str++;
+	refresh = simple_strtol(str, end_ptr, 10);
+	if (*end_ptr == str)
+		return -EINVAL;
+
+	mode->refresh = refresh;
+	mode->refresh_specified = true;
+
+	return 0;
+}
+
+static int drm_mode_parse_cmdline_extra(const char *str, int length,
+					struct drm_connector *connector,
+					struct drm_cmdline_mode *mode)
+{
+	int i;
+
+	for (i = 0; i < length; i++) {
+		switch (str[i]) {
+		case 'i':
+			mode->interlace = true;
+			break;
+		case 'm':
+			mode->margins = true;
+			break;
+		case 'D':
+			if (mode->force != DRM_FORCE_UNSPECIFIED)
+				return -EINVAL;
+
+			if ((connector->connector_type != DRM_MODE_CONNECTOR_DVII) &&
+			    (connector->connector_type != DRM_MODE_CONNECTOR_HDMIB))
+				mode->force = DRM_FORCE_ON;
+			else
+				mode->force = DRM_FORCE_ON_DIGITAL;
+			break;
+		case 'd':
+			if (mode->force != DRM_FORCE_UNSPECIFIED)
+				return -EINVAL;
+
+			mode->force = DRM_FORCE_OFF;
+			break;
+		case 'e':
+			if (mode->force != DRM_FORCE_UNSPECIFIED)
+				return -EINVAL;
+
+			mode->force = DRM_FORCE_ON;
+			break;
+		default:
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
+static int drm_mode_parse_cmdline_res_mode(const char *str, unsigned int length,
+					   bool extras,
+					   struct drm_connector *connector,
+					   struct drm_cmdline_mode *mode)
+{
+	const char *str_start = str;
+	bool rb = false, cvt = false;
+	int xres = 0, yres = 0;
+	int remaining, i;
+	char *end_ptr;
+
+	xres = simple_strtol(str, &end_ptr, 10);
+	if (end_ptr == str)
+		return -EINVAL;
+
+	if (end_ptr[0] != 'x')
+		return -EINVAL;
+	end_ptr++;
+
+	str = end_ptr;
+	yres = simple_strtol(str, &end_ptr, 10);
+	if (end_ptr == str)
+		return -EINVAL;
+
+	remaining = length - (end_ptr - str_start);
+	if (remaining < 0)
+		return -EINVAL;
+
+	for (i = 0; i < remaining; i++) {
+		switch (end_ptr[i]) {
+		case 'M':
+			cvt = true;
+			break;
+		case 'R':
+			rb = true;
+			break;
+		default:
+			/*
+			 * Try to pass that to our extras parsing
+			 * function to handle the case where the
+			 * extras are directly after the resolution
+			 */
+			if (extras) {
+				int ret = drm_mode_parse_cmdline_extra(end_ptr + i,
+								       1,
+								       connector,
+								       mode);
+				if (ret)
+					return ret;
+			} else {
+				return -EINVAL;
+			}
+		}
+	}
+
+	mode->xres = xres;
+	mode->yres = yres;
+	mode->cvt = cvt;
+	mode->rb = rb;
+
+	return 0;
+}
+
+static int drm_mode_parse_cmdline_options(char *str, size_t len,
+					  struct drm_connector *connector,
+					  struct drm_cmdline_mode *mode)
+{
+	unsigned int rotation = 0;
+	char *sep = str;
+
+	while ((sep = strchr(sep, ','))) {
+		char *delim, *option;
+
+		option = sep + 1;
+		delim = strchr(option, '=');
+		if (!delim) {
+			delim = strchr(option, ',');
+
+			if (!delim)
+				delim = str + len;
+		}
+
+		if (!strncmp(option, "rotate", delim - option)) {
+			const char *value = delim + 1;
+			unsigned int deg;
+
+			deg = simple_strtol(value, &sep, 10);
+
+			/* Make sure we have parsed something */
+			if (sep == value)
+				return -EINVAL;
+
+			switch (deg) {
+			case 0:
+				rotation |= DRM_MODE_ROTATE_0;
+				break;
+
+			case 90:
+				rotation |= DRM_MODE_ROTATE_90;
+				break;
+
+			case 180:
+				rotation |= DRM_MODE_ROTATE_180;
+				break;
+
+			case 270:
+				rotation |= DRM_MODE_ROTATE_270;
+				break;
+
+			default:
+				return -EINVAL;
+			}
+		} else if (!strncmp(option, "reflect_x", delim - option)) {
+			rotation |= DRM_MODE_REFLECT_X;
+			sep = delim;
+		} else if (!strncmp(option, "reflect_y", delim - option)) {
+			rotation |= DRM_MODE_REFLECT_Y;
+			sep = delim;
+		} else if (!strncmp(option, "margin_right", delim - option)) {
+			const char *value = delim + 1;
+			unsigned int margin;
+
+			margin = simple_strtol(value, &sep, 10);
+
+			/* Make sure we have parsed something */
+			if (sep == value)
+				return -EINVAL;
+
+			mode->tv_margins.right = margin;
+		} else if (!strncmp(option, "margin_left", delim - option)) {
+			const char *value = delim + 1;
+			unsigned int margin;
+
+			margin = simple_strtol(value, &sep, 10);
+
+			/* Make sure we have parsed something */
+			if (sep == value)
+				return -EINVAL;
+
+			mode->tv_margins.left = margin;
+		} else if (!strncmp(option, "margin_top", delim - option)) {
+			const char *value = delim + 1;
+			unsigned int margin;
+
+			margin = simple_strtol(value, &sep, 10);
+
+			/* Make sure we have parsed something */
+			if (sep == value)
+				return -EINVAL;
+
+			mode->tv_margins.top = margin;
+		} else if (!strncmp(option, "margin_bottom", delim - option)) {
+			const char *value = delim + 1;
+			unsigned int margin;
+
+			margin = simple_strtol(value, &sep, 10);
+
+			/* Make sure we have parsed something */
+			if (sep == value)
+				return -EINVAL;
+
+			mode->tv_margins.bottom = margin;
+		} else {
+			return -EINVAL;
+		}
+	}
+
+	mode->rotation_reflection = rotation;
+
+	return 0;
+}
+
 /**
  * drm_mode_parse_command_line_for_connector - parse command line modeline for connector
  * @mode_option: optional per connector mode option
@@ -1420,6 +1678,10 @@ EXPORT_SYMBOL(drm_connector_list_update);
  *
  *	<xres>x<yres>[M][R][-<bpp>][@<refresh>][i][m][eDd]
  *
+ * Additionals options can be provided following the mode, using a comma to
+ * separate each option. Valid options can be found in
+ * Documentation/fb/modedb.txt.
+ *
  * The intermediate drm_cmdline_mode structure is required to store additional
  * options from the command line modline like the force-enable/disable flag.
  *
@@ -1431,13 +1693,13 @@ bool drm_mode_parse_command_line_for_connector(const char *mode_option,
 					       struct drm_cmdline_mode *mode)
 {
 	const char *name;
-	unsigned int namelen;
-	bool res_specified = false, bpp_specified = false, refresh_specified = false;
-	unsigned int xres = 0, yres = 0, bpp = 32, refresh = 0;
-	bool yres_specified = false, cvt = false, rb = false;
-	bool interlace = false, margins = false, was_digit = false;
-	int i;
-	enum drm_connector_force force = DRM_FORCE_UNSPECIFIED;
+	bool named_mode = false, parse_extras = false;
+	unsigned int bpp_off = 0, refresh_off = 0, options_off = 0;
+	unsigned int mode_end = 0;
+	char *bpp_ptr = NULL, *refresh_ptr = NULL, *extra_ptr = NULL;
+	char *options_ptr = NULL;
+	char *bpp_end_ptr = NULL, *refresh_end_ptr = NULL;
+	int ret;
 
 #ifdef CONFIG_FB
 	if (!mode_option)
@@ -1450,127 +1712,111 @@ bool drm_mode_parse_command_line_for_connector(const char *mode_option,
 	}
 
 	name = mode_option;
-	namelen = strlen(name);
-	for (i = namelen-1; i >= 0; i--) {
-		switch (name[i]) {
-		case '@':
-			if (!refresh_specified && !bpp_specified &&
-			    !yres_specified && !cvt && !rb && was_digit) {
-				refresh = simple_strtol(&name[i+1], NULL, 10);
-				refresh_specified = true;
-				was_digit = false;
-			} else
-				goto done;
-			break;
-		case '-':
-			if (!bpp_specified && !yres_specified && !cvt &&
-			    !rb && was_digit) {
-				bpp = simple_strtol(&name[i+1], NULL, 10);
-				bpp_specified = true;
-				was_digit = false;
-			} else
-				goto done;
-			break;
-		case 'x':
-			if (!yres_specified && was_digit) {
-				yres = simple_strtol(&name[i+1], NULL, 10);
-				yres_specified = true;
-				was_digit = false;
-			} else
-				goto done;
-			break;
-		case '0' ... '9':
-			was_digit = true;
-			break;
-		case 'M':
-			if (yres_specified || cvt || was_digit)
-				goto done;
-			cvt = true;
-			break;
-		case 'R':
-			if (yres_specified || cvt || rb || was_digit)
-				goto done;
-			rb = true;
-			break;
-		case 'm':
-			if (cvt || yres_specified || was_digit)
-				goto done;
-			margins = true;
-			break;
-		case 'i':
-			if (cvt || yres_specified || was_digit)
-				goto done;
-			interlace = true;
-			break;
-		case 'e':
-			if (yres_specified || bpp_specified || refresh_specified ||
-			    was_digit || (force != DRM_FORCE_UNSPECIFIED))
-				goto done;
 
-			force = DRM_FORCE_ON;
-			break;
-		case 'D':
-			if (yres_specified || bpp_specified || refresh_specified ||
-			    was_digit || (force != DRM_FORCE_UNSPECIFIED))
-				goto done;
+	/*
+	 * This is a bit convoluted. To differentiate between the
+	 * named modes and poorly formatted resolutions, we need a
+	 * bunch of things:
+	 *   - We need to make sure that the first character (which
+	 *     would be our resolution in X) is a digit.
+	 *   - However, if the X resolution is missing, then we end up
+	 *     with something like x<yres>, with our first character
+	 *     being an alpha-numerical character, which would be
+	 *     considered a named mode.
+	 *
+	 * If this isn't enough, we should add more heuristics here,
+	 * and matching unit-tests.
+	 */
+	if (!isdigit(name[0]) && name[0] != 'x')
+		named_mode = true;
 
-			if ((connector->connector_type != DRM_MODE_CONNECTOR_DVII) &&
-			    (connector->connector_type != DRM_MODE_CONNECTOR_HDMIB))
-				force = DRM_FORCE_ON;
-			else
-				force = DRM_FORCE_ON_DIGITAL;
-			break;
-		case 'd':
-			if (yres_specified || bpp_specified || refresh_specified ||
-			    was_digit || (force != DRM_FORCE_UNSPECIFIED))
-				goto done;
+	/* Try to locate the bpp and refresh specifiers, if any */
+	bpp_ptr = strchr(name, '-');
+	if (bpp_ptr) {
+		bpp_off = bpp_ptr - name;
+		mode->bpp_specified = true;
+	}
 
-			force = DRM_FORCE_OFF;
-			break;
-		default:
-			goto done;
-		}
+	refresh_ptr = strchr(name, '@');
+	if (refresh_ptr) {
+		if (named_mode)
+			return false;
+
+		refresh_off = refresh_ptr - name;
+		mode->refresh_specified = true;
 	}
 
-	if (i < 0 && yres_specified) {
-		char *ch;
-		xres = simple_strtol(name, &ch, 10);
-		if ((ch != NULL) && (*ch == 'x'))
-			res_specified = true;
-		else
-			i = ch - name;
-	} else if (!yres_specified && was_digit) {
-		/* catch mode that begins with digits but has no 'x' */
-		i = 0;
+	/* Locate the start of named options */
+	options_ptr = strchr(name, ',');
+	if (options_ptr)
+		options_off = options_ptr - name;
+
+	/* Locate the end of the name / resolution, and parse it */
+	if (bpp_ptr) {
+		mode_end = bpp_off;
+	} else if (refresh_ptr) {
+		mode_end = refresh_off;
+	} else if (options_ptr) {
+		mode_end = options_off;
+	} else {
+		mode_end = strlen(name);
+		parse_extras = true;
 	}
-done:
-	if (i >= 0) {
-		pr_warn("[drm] parse error at position %i in video mode '%s'\n",
-			i, name);
-		mode->specified = false;
-		return false;
+
+	if (named_mode) {
+		strncpy(mode->name, name, mode_end);
+	} else {
+		ret = drm_mode_parse_cmdline_res_mode(name, mode_end,
+						      parse_extras,
+						      connector,
+						      mode);
+		if (ret)
+			return false;
 	}
+	mode->specified = true;
 
-	if (res_specified) {
-		mode->specified = true;
-		mode->xres = xres;
-		mode->yres = yres;
+	if (bpp_ptr) {
+		ret = drm_mode_parse_cmdline_bpp(bpp_ptr, &bpp_end_ptr, mode);
+		if (ret)
+			return false;
 	}
 
-	if (refresh_specified) {
-		mode->refresh_specified = true;
-		mode->refresh = refresh;
+	if (refresh_ptr) {
+		ret = drm_mode_parse_cmdline_refresh(refresh_ptr,
+						     &refresh_end_ptr, mode);
+		if (ret)
+			return false;
 	}
 
-	if (bpp_specified) {
-		mode->bpp_specified = true;
-		mode->bpp = bpp;
+	/*
+	 * Locate the end of the bpp / refresh, and parse the extras
+	 * if relevant
+	 */
+	if (bpp_ptr && refresh_ptr)
+		extra_ptr = max(bpp_end_ptr, refresh_end_ptr);
+	else if (bpp_ptr)
+		extra_ptr = bpp_end_ptr;
+	else if (refresh_ptr)
+		extra_ptr = refresh_end_ptr;
+
+	if (extra_ptr &&
+	    extra_ptr != options_ptr) {
+		int len = strlen(name) - (extra_ptr - name);
+
+		ret = drm_mode_parse_cmdline_extra(extra_ptr, len,
+						   connector, mode);
+		if (ret)
+			return false;
+	}
+
+	if (options_ptr) {
+		int len = strlen(name) - (options_ptr - name);
+
+		ret = drm_mode_parse_cmdline_options(options_ptr, len,
+						     connector, mode);
+		if (ret)
+			return false;
 	}
-	mode->rb = rb;
-	mode->cvt = cvt;
-	mode->interlace = interlace;
-	mode->margins = margins;
-	mode->force = force;
 
 	return true;
 }
diff --git a/drivers/gpu/drm/drm_modeset_lock.c b/drivers/gpu/drm/drm_modeset_lock.c
index 81dd11901ffd..53187821df01 100644
--- a/drivers/gpu/drm/drm_modeset_lock.c
+++ b/drivers/gpu/drm/drm_modeset_lock.c
@@ -21,9 +21,9 @@
  * OTHER DEALINGS IN THE SOFTWARE.
  */
 
-#include <drm/drmP.h>
 #include <drm/drm_atomic.h>
 #include <drm/drm_crtc.h>
+#include <drm/drm_device.h>
 #include <drm/drm_modeset_lock.h>
 
 /**
diff --git a/drivers/gpu/drm/drm_of.c b/drivers/gpu/drm/drm_of.c
index 6becf63f9166..43d89dd59c6b 100644
--- a/drivers/gpu/drm/drm_of.c
+++ b/drivers/gpu/drm/drm_of.c
@@ -3,12 +3,13 @@
 #include <linux/export.h>
 #include <linux/list.h>
 #include <linux/of_graph.h>
-#include <drm/drmP.h>
+
 #include <drm/drm_bridge.h>
 #include <drm/drm_crtc.h>
+#include <drm/drm_device.h>
 #include <drm/drm_encoder.h>
-#include <drm/drm_panel.h>
 #include <drm/drm_of.h>
+#include <drm/drm_panel.h>
 
 /**
  * DOC: overview
diff --git a/drivers/gpu/drm/drm_panel_orientation_quirks.c b/drivers/gpu/drm/drm_panel_orientation_quirks.c
index 521aff99b08a..d8a0bcd02f34 100644
--- a/drivers/gpu/drm/drm_panel_orientation_quirks.c
+++ b/drivers/gpu/drm/drm_panel_orientation_quirks.c
@@ -42,6 +42,14 @@ static const struct drm_dmi_panel_orientation_data asus_t100ha = {
 	.orientation = DRM_MODE_PANEL_ORIENTATION_LEFT_UP,
 };
 
+static const struct drm_dmi_panel_orientation_data gpd_micropc = {
+	.width = 720,
+	.height = 1280,
+	.bios_dates = (const char * const []){ "04/26/2019",
+		NULL },
+	.orientation = DRM_MODE_PANEL_ORIENTATION_RIGHT_UP,
+};
+
 static const struct drm_dmi_panel_orientation_data gpd_pocket = {
 	.width = 1200,
 	.height = 1920,
@@ -50,6 +58,14 @@ static const struct drm_dmi_panel_orientation_data gpd_pocket = {
 	.orientation = DRM_MODE_PANEL_ORIENTATION_RIGHT_UP,
 };
 
+static const struct drm_dmi_panel_orientation_data gpd_pocket2 = {
+	.width = 1200,
+	.height = 1920,
+	.bios_dates = (const char * const []){ "06/28/2018", "08/28/2018",
+		"12/07/2018", NULL },
+	.orientation = DRM_MODE_PANEL_ORIENTATION_RIGHT_UP,
+};
+
 static const struct drm_dmi_panel_orientation_data gpd_win = {
 	.width = 720,
 	.height = 1280,
@@ -99,6 +115,14 @@ static const struct dmi_system_id orientation_data[] = {
 		  DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "T100HAN"),
 		},
 		.driver_data = (void *)&asus_t100ha,
+	}, {	/* GPD MicroPC (generic strings, also match on bios date) */
+		.matches = {
+		  DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Default string"),
+		  DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Default string"),
+		  DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "Default string"),
+		  DMI_EXACT_MATCH(DMI_BOARD_NAME, "Default string"),
+		},
+		.driver_data = (void *)&gpd_micropc,
 	}, {	/*
 		 * GPD Pocket, note that the the DMI data is less generic then
 		 * it seems, devices with a board-vendor of "AMI Corporation"
@@ -112,6 +136,14 @@ static const struct dmi_system_id orientation_data[] = {
 		  DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Default string"),
 		},
 		.driver_data = (void *)&gpd_pocket,
+	}, {	/* GPD Pocket 2 (generic strings, also match on bios date) */
+		.matches = {
+		  DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Default string"),
+		  DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Default string"),
+		  DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "Default string"),
+		  DMI_EXACT_MATCH(DMI_BOARD_NAME, "Default string"),
+		},
+		.driver_data = (void *)&gpd_pocket2,
 	}, {	/* GPD Win (same note on DMI match as GPD Pocket) */
 		.matches = {
 		  DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "AMI Corporation"),
diff --git a/drivers/gpu/drm/drm_pci.c b/drivers/gpu/drm/drm_pci.c
index 693748ad8b88..a86a3ab2771c 100644
--- a/drivers/gpu/drm/drm_pci.c
+++ b/drivers/gpu/drm/drm_pci.c
@@ -22,12 +22,17 @@
  * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  */
 
-#include <linux/pci.h>
-#include <linux/slab.h>
 #include <linux/dma-mapping.h>
 #include <linux/export.h>
+#include <linux/pci.h>
+#include <linux/slab.h>
+
+#include <drm/drm.h>
+#include <drm/drm_agpsupport.h>
+#include <drm/drm_drv.h>
 #include <drm/drm_pci.h>
-#include <drm/drmP.h>
+#include <drm/drm_print.h>
+
 #include "drm_internal.h"
 #include "drm_legacy.h"
 
diff --git a/drivers/gpu/drm/drm_plane_helper.c b/drivers/gpu/drm/drm_plane_helper.c
index 0fff72dcd06d..3aae7ea522f2 100644
--- a/drivers/gpu/drm/drm_plane_helper.c
+++ b/drivers/gpu/drm/drm_plane_helper.c
@@ -24,14 +24,15 @@
  */
 
 #include <linux/list.h>
-#include <drm/drmP.h>
-#include <drm/drm_plane_helper.h>
-#include <drm/drm_rect.h>
+
 #include <drm/drm_atomic.h>
+#include <drm/drm_atomic_helper.h>
 #include <drm/drm_atomic_uapi.h>
 #include <drm/drm_crtc_helper.h>
+#include <drm/drm_device.h>
 #include <drm/drm_encoder.h>
-#include <drm/drm_atomic_helper.h>
+#include <drm/drm_plane_helper.h>
+#include <drm/drm_rect.h>
 
 #define SUBPIXEL_MASK 0xffff
 
diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c
index dc079efb3b0f..d0c01318076b 100644
--- a/drivers/gpu/drm/drm_prime.c
+++ b/drivers/gpu/drm/drm_prime.c
@@ -29,9 +29,12 @@
 #include <linux/export.h>
 #include <linux/dma-buf.h>
 #include <linux/rbtree.h>
-#include <drm/drm_prime.h>
+
+#include <drm/drm_drv.h>
+#include <drm/drm_file.h>
+#include <drm/drm_framebuffer.h>
 #include <drm/drm_gem.h>
-#include <drm/drmP.h>
+#include <drm/drm_prime.h>
 
 #include "drm_internal.h"
 
@@ -86,11 +89,6 @@ struct drm_prime_member {
 	struct rb_node handle_rb;
 };
 
-struct drm_prime_attachment {
-	struct sg_table *sgt;
-	enum dma_data_direction dir;
-};
-
 static int drm_prime_add_buf_handle(struct drm_prime_file_private *prime_fpriv,
 				    struct dma_buf *dma_buf, uint32_t handle)
 {
@@ -188,25 +186,16 @@ static int drm_prime_lookup_buf_handle(struct drm_prime_file_private *prime_fpri
  * @dma_buf: buffer to attach device to
  * @attach: buffer attachment data
  *
- * Allocates &drm_prime_attachment and calls &drm_driver.gem_prime_pin for
- * device specific attachment. This can be used as the &dma_buf_ops.attach
- * callback.
+ * Calls &drm_driver.gem_prime_pin for device specific handling. This can be
+ * used as the &dma_buf_ops.attach callback.
  *
  * Returns 0 on success, negative error code on failure.
  */
 int drm_gem_map_attach(struct dma_buf *dma_buf,
 		       struct dma_buf_attachment *attach)
 {
-	struct drm_prime_attachment *prime_attach;
 	struct drm_gem_object *obj = dma_buf->priv;
 
-	prime_attach = kzalloc(sizeof(*prime_attach), GFP_KERNEL);
-	if (!prime_attach)
-		return -ENOMEM;
-
-	prime_attach->dir = DMA_NONE;
-	attach->priv = prime_attach;
-
 	return drm_gem_pin(obj);
 }
 EXPORT_SYMBOL(drm_gem_map_attach);
@@ -222,26 +211,8 @@ EXPORT_SYMBOL(drm_gem_map_attach);
 void drm_gem_map_detach(struct dma_buf *dma_buf,
 			struct dma_buf_attachment *attach)
 {
-	struct drm_prime_attachment *prime_attach = attach->priv;
 	struct drm_gem_object *obj = dma_buf->priv;
 
-	if (prime_attach) {
-		struct sg_table *sgt = prime_attach->sgt;
-
-		if (sgt) {
-			if (prime_attach->dir != DMA_NONE)
-				dma_unmap_sg_attrs(attach->dev, sgt->sgl,
-						   sgt->nents,
-						   prime_attach->dir,
-						   DMA_ATTR_SKIP_CPU_SYNC);
-			sg_free_table(sgt);
-		}
-
-		kfree(sgt);
-		kfree(prime_attach);
-		attach->priv = NULL;
-	}
-
 	drm_gem_unpin(obj);
 }
 EXPORT_SYMBOL(drm_gem_map_detach);
@@ -286,39 +257,22 @@ void drm_prime_remove_buf_handle_locked(struct drm_prime_file_private *prime_fpr
 struct sg_table *drm_gem_map_dma_buf(struct dma_buf_attachment *attach,
 				     enum dma_data_direction dir)
 {
-	struct drm_prime_attachment *prime_attach = attach->priv;
 	struct drm_gem_object *obj = attach->dmabuf->priv;
 	struct sg_table *sgt;
 
-	if (WARN_ON(dir == DMA_NONE || !prime_attach))
+	if (WARN_ON(dir == DMA_NONE))
 		return ERR_PTR(-EINVAL);
 
-	/* return the cached mapping when possible */
-	if (prime_attach->dir == dir)
-		return prime_attach->sgt;
-
-	/*
-	 * two mappings with different directions for the same attachment are
-	 * not allowed
-	 */
-	if (WARN_ON(prime_attach->dir != DMA_NONE))
-		return ERR_PTR(-EBUSY);
-
 	if (obj->funcs)
 		sgt = obj->funcs->get_sg_table(obj);
 	else
 		sgt = obj->dev->driver->gem_prime_get_sg_table(obj);
 
-	if (!IS_ERR(sgt)) {
-		if (!dma_map_sg_attrs(attach->dev, sgt->sgl, sgt->nents, dir,
-				      DMA_ATTR_SKIP_CPU_SYNC)) {
-			sg_free_table(sgt);
-			kfree(sgt);
-			sgt = ERR_PTR(-ENOMEM);
-		} else {
-			prime_attach->sgt = sgt;
-			prime_attach->dir = dir;
-		}
+	if (!dma_map_sg_attrs(attach->dev, sgt->sgl, sgt->nents, dir,
+			      DMA_ATTR_SKIP_CPU_SYNC)) {
+		sg_free_table(sgt);
+		kfree(sgt);
+		sgt = ERR_PTR(-ENOMEM);
 	}
 
 	return sgt;
@@ -331,14 +285,19 @@ EXPORT_SYMBOL(drm_gem_map_dma_buf);
  * @sgt: scatterlist info of the buffer to unmap
  * @dir: direction of DMA transfer
  *
- * Not implemented. The unmap is done at drm_gem_map_detach().  This can be
- * used as the &dma_buf_ops.unmap_dma_buf callback.
+ * This can be used as the &dma_buf_ops.unmap_dma_buf callback.
  */
 void drm_gem_unmap_dma_buf(struct dma_buf_attachment *attach,
 			   struct sg_table *sgt,
 			   enum dma_data_direction dir)
 {
-	/* nothing to be done here */
+	if (!sgt)
+		return;
+
+	dma_unmap_sg_attrs(attach->dev, sgt->sgl, sgt->nents, dir,
+			   DMA_ATTR_SKIP_CPU_SYNC);
+	sg_free_table(sgt);
+	kfree(sgt);
 }
 EXPORT_SYMBOL(drm_gem_unmap_dma_buf);
 
@@ -452,6 +411,7 @@ int drm_gem_dmabuf_mmap(struct dma_buf *dma_buf, struct vm_area_struct *vma)
 EXPORT_SYMBOL(drm_gem_dmabuf_mmap);
 
 static const struct dma_buf_ops drm_gem_prime_dmabuf_ops =  {
+	.cache_sgt_mapping = true,
 	.attach = drm_gem_map_attach,
 	.detach = drm_gem_map_detach,
 	.map_dma_buf = drm_gem_map_dma_buf,
diff --git a/drivers/gpu/drm/drm_print.c b/drivers/gpu/drm/drm_print.c
index f5cb0aabfe35..a17c8a14dba4 100644
--- a/drivers/gpu/drm/drm_print.c
+++ b/drivers/gpu/drm/drm_print.c
@@ -26,8 +26,13 @@
 #define DEBUG /* for pr_debug() */
 
 #include <stdarg.h>
+
+#include <linux/io.h>
 #include <linux/seq_file.h>
-#include <drm/drmP.h>
+#include <linux/slab.h>
+
+#include <drm/drm.h>
+#include <drm/drm_drv.h>
 #include <drm/drm_print.h>
 
 void __drm_puts_coredump(struct drm_printer *p, const char *str)
diff --git a/drivers/gpu/drm/drm_probe_helper.c b/drivers/gpu/drm/drm_probe_helper.c
index 6fd08e04b323..ef2c468205a2 100644
--- a/drivers/gpu/drm/drm_probe_helper.c
+++ b/drivers/gpu/drm/drm_probe_helper.c
@@ -32,14 +32,15 @@
 #include <linux/export.h>
 #include <linux/moduleparam.h>
 
-#include <drm/drmP.h>
 #include <drm/drm_client.h>
 #include <drm/drm_crtc.h>
-#include <drm/drm_fourcc.h>
-#include <drm/drm_fb_helper.h>
 #include <drm/drm_edid.h>
+#include <drm/drm_fb_helper.h>
+#include <drm/drm_fourcc.h>
 #include <drm/drm_modeset_helper_vtables.h>
+#include <drm/drm_print.h>
 #include <drm/drm_probe_helper.h>
+#include <drm/drm_sysfs.h>
 
 #include "drm_crtc_helper_internal.h"
 
@@ -479,6 +480,13 @@ retry:
 
 	count = (*connector_funcs->get_modes)(connector);
 
+	/*
+	 * Fallback for when DDC probe failed in drm_get_edid() and thus skipped
+	 * override/firmware EDID.
+	 */
+	if (count == 0 && connector->status == connector_status_connected)
+		count = drm_add_override_edid_modes(connector);
+
 	if (count == 0 && connector->status == connector_status_connected)
 		count = drm_add_modes_noedid(connector, 1024, 768);
 	count += drm_helper_probe_add_cmdline_mode(connector);
@@ -574,6 +582,9 @@ static void output_poll_execute(struct work_struct *work)
 	enum drm_connector_status old_status;
 	bool repoll = false, changed;
 
+	if (!dev->mode_config.poll_enabled)
+		return;
+
 	/* Pick up any changes detected by the probe functions. */
 	changed = dev->mode_config.delayed_event;
 	dev->mode_config.delayed_event = false;
@@ -728,7 +739,11 @@ EXPORT_SYMBOL(drm_kms_helper_poll_init);
  */
 void drm_kms_helper_poll_fini(struct drm_device *dev)
 {
-	drm_kms_helper_poll_disable(dev);
+	if (!dev->mode_config.poll_enabled)
+		return;
+
+	dev->mode_config.poll_enabled = false;
+	cancel_delayed_work_sync(&dev->mode_config.output_poll_work);
 }
 EXPORT_SYMBOL(drm_kms_helper_poll_fini);
 
diff --git a/drivers/gpu/drm/drm_property.c b/drivers/gpu/drm/drm_property.c
index f8ec8f9c3e7a..892ce636ef72 100644
--- a/drivers/gpu/drm/drm_property.c
+++ b/drivers/gpu/drm/drm_property.c
@@ -21,7 +21,12 @@
  */
 
 #include <linux/export.h>
-#include <drm/drmP.h>
+#include <linux/uaccess.h>
+
+#include <drm/drm_crtc.h>
+#include <drm/drm_drv.h>
+#include <drm/drm_file.h>
+#include <drm/drm_framebuffer.h>
 #include <drm/drm_property.h>
 
 #include "drm_crtc_internal.h"
diff --git a/drivers/gpu/drm/drm_rect.c b/drivers/gpu/drm/drm_rect.c
index 66c41b12719c..b8363aaa9032 100644
--- a/drivers/gpu/drm/drm_rect.c
+++ b/drivers/gpu/drm/drm_rect.c
@@ -24,7 +24,9 @@
 #include <linux/errno.h>
 #include <linux/export.h>
 #include <linux/kernel.h>
-#include <drm/drmP.h>
+
+#include <drm/drm_mode.h>
+#include <drm/drm_print.h>
 #include <drm/drm_rect.h>
 
 /**
diff --git a/drivers/gpu/drm/drm_scatter.c b/drivers/gpu/drm/drm_scatter.c
index bb829a115fc6..2d7790f14b0c 100644
--- a/drivers/gpu/drm/drm_scatter.c
+++ b/drivers/gpu/drm/drm_scatter.c
@@ -31,9 +31,14 @@
  * DEALINGS IN THE SOFTWARE.
  */
 
-#include <linux/vmalloc.h>
+#include <linux/mm.h>
 #include <linux/slab.h>
-#include <drm/drmP.h>
+#include <linux/vmalloc.h>
+
+#include <drm/drm.h>
+#include <drm/drm_drv.h>
+#include <drm/drm_print.h>
+
 #include "drm_legacy.h"
 
 #define DEBUG_SCATTER 0
diff --git a/drivers/gpu/drm/drm_scdc_helper.c b/drivers/gpu/drm/drm_scdc_helper.c
index 870e25f1f788..311e71bbba5b 100644
--- a/drivers/gpu/drm/drm_scdc_helper.c
+++ b/drivers/gpu/drm/drm_scdc_helper.c
@@ -24,8 +24,8 @@
 #include <linux/slab.h>
 #include <linux/delay.h>
 
+#include <drm/drm_print.h>
 #include <drm/drm_scdc_helper.h>
-#include <drm/drmP.h>
 
 /**
  * DOC: scdc helpers
diff --git a/drivers/gpu/drm/drm_self_refresh_helper.c b/drivers/gpu/drm/drm_self_refresh_helper.c
new file mode 100644
index 000000000000..4b9424a8f1f1
--- /dev/null
+++ b/drivers/gpu/drm/drm_self_refresh_helper.c
@@ -0,0 +1,218 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright (C) 2019 Google, Inc.
+ *
+ * Authors:
+ * Sean Paul <seanpaul@chromium.org>
+ */
+#include <linux/bitops.h>
+#include <linux/slab.h>
+#include <linux/workqueue.h>
+
+#include <drm/drm_atomic.h>
+#include <drm/drm_atomic_helper.h>
+#include <drm/drm_connector.h>
+#include <drm/drm_crtc.h>
+#include <drm/drm_device.h>
+#include <drm/drm_mode_config.h>
+#include <drm/drm_modeset_lock.h>
+#include <drm/drm_print.h>
+#include <drm/drm_self_refresh_helper.h>
+
+/**
+ * DOC: overview
+ *
+ * This helper library provides an easy way for drivers to leverage the atomic
+ * framework to implement panel self refresh (SR) support. Drivers are
+ * responsible for initializing and cleaning up the SR helpers on load/unload
+ * (see &drm_self_refresh_helper_init/&drm_self_refresh_helper_cleanup).
+ * The connector is responsible for setting
+ * &drm_connector_state.self_refresh_aware to true at runtime if it is SR-aware
+ * (meaning it knows how to initiate self refresh on the panel).
+ *
+ * Once a crtc has enabled SR using &drm_self_refresh_helper_init, the
+ * helpers will monitor activity and call back into the driver to enable/disable
+ * SR as appropriate. The best way to think about this is that it's a DPMS
+ * on/off request with &drm_crtc_state.self_refresh_active set in crtc state
+ * that tells you to disable/enable SR on the panel instead of power-cycling it.
+ *
+ * During SR, drivers may choose to fully disable their crtc/encoder/bridge
+ * hardware (in which case no driver changes are necessary), or they can inspect
+ * &drm_crtc_state.self_refresh_active if they want to enter low power mode
+ * without full disable (in case full disable/enable is too slow).
+ *
+ * SR will be deactivated if there are any atomic updates affecting the
+ * pipe that is in SR mode. If a crtc is driving multiple connectors, all
+ * connectors must be SR aware and all will enter/exit SR mode at the same time.
+ *
+ * If the crtc and connector are SR aware, but the panel connected does not
+ * support it (or is otherwise unable to enter SR), the driver should fail
+ * atomic_check when &drm_crtc_state.self_refresh_active is true.
+ */
+
+struct drm_self_refresh_data {
+	struct drm_crtc *crtc;
+	struct delayed_work entry_work;
+	struct drm_atomic_state *save_state;
+	unsigned int entry_delay_ms;
+};
+
+static void drm_self_refresh_helper_entry_work(struct work_struct *work)
+{
+	struct drm_self_refresh_data *sr_data = container_of(
+				to_delayed_work(work),
+				struct drm_self_refresh_data, entry_work);
+	struct drm_crtc *crtc = sr_data->crtc;
+	struct drm_device *dev = crtc->dev;
+	struct drm_modeset_acquire_ctx ctx;
+	struct drm_atomic_state *state;
+	struct drm_connector *conn;
+	struct drm_connector_state *conn_state;
+	struct drm_crtc_state *crtc_state;
+	int i, ret = 0;
+
+	drm_modeset_acquire_init(&ctx, 0);
+
+	state = drm_atomic_state_alloc(dev);
+	if (!state) {
+		ret = -ENOMEM;
+		goto out_drop_locks;
+	}
+
+retry:
+	state->acquire_ctx = &ctx;
+
+	crtc_state = drm_atomic_get_crtc_state(state, crtc);
+	if (IS_ERR(crtc_state)) {
+		ret = PTR_ERR(crtc_state);
+		goto out;
+	}
+
+	if (!crtc_state->enable)
+		goto out;
+
+	ret = drm_atomic_add_affected_connectors(state, crtc);
+	if (ret)
+		goto out;
+
+	for_each_new_connector_in_state(state, conn, conn_state, i) {
+		if (!conn_state->self_refresh_aware)
+			goto out;
+	}
+
+	crtc_state->active = false;
+	crtc_state->self_refresh_active = true;
+
+	ret = drm_atomic_commit(state);
+	if (ret)
+		goto out;
+
+out:
+	if (ret == -EDEADLK) {
+		drm_atomic_state_clear(state);
+		ret = drm_modeset_backoff(&ctx);
+		if (!ret)
+			goto retry;
+	}
+
+	drm_atomic_state_put(state);
+
+out_drop_locks:
+	drm_modeset_drop_locks(&ctx);
+	drm_modeset_acquire_fini(&ctx);
+}
+
+/**
+ * drm_self_refresh_helper_alter_state - Alters the atomic state for SR exit
+ * @state: the state currently being checked
+ *
+ * Called at the end of atomic check. This function checks the state for flags
+ * incompatible with self refresh exit and changes them. This is a bit
+ * disingenuous since userspace is expecting one thing and we're giving it
+ * another. However in order to keep self refresh entirely hidden from
+ * userspace, this is required.
+ *
+ * At the end, we queue up the self refresh entry work so we can enter PSR after
+ * the desired delay.
+ */
+void drm_self_refresh_helper_alter_state(struct drm_atomic_state *state)
+{
+	struct drm_crtc *crtc;
+	struct drm_crtc_state *crtc_state;
+	int i;
+
+	if (state->async_update || !state->allow_modeset) {
+		for_each_old_crtc_in_state(state, crtc, crtc_state, i) {
+			if (crtc_state->self_refresh_active) {
+				state->async_update = false;
+				state->allow_modeset = true;
+				break;
+			}
+		}
+	}
+
+	for_each_new_crtc_in_state(state, crtc, crtc_state, i) {
+		struct drm_self_refresh_data *sr_data;
+
+		/* Don't trigger the entry timer when we're already in SR */
+		if (crtc_state->self_refresh_active)
+			continue;
+
+		sr_data = crtc->self_refresh_data;
+		if (!sr_data)
+			continue;
+
+		mod_delayed_work(system_wq, &sr_data->entry_work,
+				 msecs_to_jiffies(sr_data->entry_delay_ms));
+	}
+}
+EXPORT_SYMBOL(drm_self_refresh_helper_alter_state);
+
+/**
+ * drm_self_refresh_helper_init - Initializes self refresh helpers for a crtc
+ * @crtc: the crtc which supports self refresh supported displays
+ * @entry_delay_ms: amount of inactivity to wait before entering self refresh
+ *
+ * Returns zero if successful or -errno on failure
+ */
+int drm_self_refresh_helper_init(struct drm_crtc *crtc,
+				 unsigned int entry_delay_ms)
+{
+	struct drm_self_refresh_data *sr_data = crtc->self_refresh_data;
+
+	/* Helper is already initialized */
+	if (WARN_ON(sr_data))
+		return -EINVAL;
+
+	sr_data = kzalloc(sizeof(*sr_data), GFP_KERNEL);
+	if (!sr_data)
+		return -ENOMEM;
+
+	INIT_DELAYED_WORK(&sr_data->entry_work,
+			  drm_self_refresh_helper_entry_work);
+	sr_data->entry_delay_ms = entry_delay_ms;
+	sr_data->crtc = crtc;
+
+	crtc->self_refresh_data = sr_data;
+	return 0;
+}
+EXPORT_SYMBOL(drm_self_refresh_helper_init);
+
+/**
+ * drm_self_refresh_helper_cleanup - Cleans up self refresh helpers for a crtc
+ * @crtc: the crtc to cleanup
+ */
+void drm_self_refresh_helper_cleanup(struct drm_crtc *crtc)
+{
+	struct drm_self_refresh_data *sr_data = crtc->self_refresh_data;
+
+	/* Helper is already uninitialized */
+	if (!sr_data)
+		return;
+
+	crtc->self_refresh_data = NULL;
+
+	cancel_delayed_work_sync(&sr_data->entry_work);
+	kfree(sr_data);
+}
+EXPORT_SYMBOL(drm_self_refresh_helper_cleanup);
diff --git a/drivers/gpu/drm/drm_simple_kms_helper.c b/drivers/gpu/drm/drm_simple_kms_helper.c
index 5d5e9091ad6d..b11910f14c46 100644
--- a/drivers/gpu/drm/drm_simple_kms_helper.c
+++ b/drivers/gpu/drm/drm_simple_kms_helper.c
@@ -3,13 +3,14 @@
  * Copyright (C) 2016 Noralf Trønnes
  */
 
-#include <drm/drmP.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+
 #include <drm/drm_atomic.h>
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_plane_helper.h>
 #include <drm/drm_probe_helper.h>
 #include <drm/drm_simple_kms_helper.h>
-#include <linux/slab.h>
 
 /**
  * DOC: overview
diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
index 3d400905100b..a199c8d56b95 100644
--- a/drivers/gpu/drm/drm_syncobj.c
+++ b/drivers/gpu/drm/drm_syncobj.c
@@ -46,16 +46,21 @@
  * The file takes a reference on the kref.
  */
 
-#include <drm/drmP.h>
+#include <linux/anon_inodes.h>
 #include <linux/file.h>
 #include <linux/fs.h>
-#include <linux/anon_inodes.h>
-#include <linux/sync_file.h>
 #include <linux/sched/signal.h>
+#include <linux/sync_file.h>
+#include <linux/uaccess.h>
 
-#include "drm_internal.h"
+#include <drm/drm_drv.h>
+#include <drm/drm_file.h>
+#include <drm/drm_gem.h>
+#include <drm/drm_print.h>
 #include <drm/drm_syncobj.h>
 
+#include "drm_internal.h"
+
 struct syncobj_wait_entry {
 	struct list_head node;
 	struct task_struct *task;
diff --git a/drivers/gpu/drm/drm_sysfs.c b/drivers/gpu/drm/drm_sysfs.c
index 74857fafb0b8..ad10810bc972 100644
--- a/drivers/gpu/drm/drm_sysfs.c
+++ b/drivers/gpu/drm/drm_sysfs.c
@@ -11,13 +11,20 @@
  */
 
 #include <linux/device.h>
-#include <linux/kdev_t.h>
-#include <linux/gfp.h>
 #include <linux/err.h>
 #include <linux/export.h>
-
+#include <linux/gfp.h>
+#include <linux/kdev_t.h>
+#include <linux/slab.h>
+
+#include <drm/drm_connector.h>
+#include <drm/drm_device.h>
+#include <drm/drm_file.h>
+#include <drm/drm_modes.h>
+#include <drm/drm_print.h>
+#include <drm/drm_property.h>
 #include <drm/drm_sysfs.h>
-#include <drm/drmP.h>
+
 #include "drm_internal.h"
 
 #define to_drm_minor(d) dev_get_drvdata(d)
@@ -76,6 +83,7 @@ int drm_sysfs_init(void)
 	}
 
 	drm_class->devnode = drm_devnode;
+	drm_setup_hdcp_srm(drm_class);
 	return 0;
 }
 
@@ -88,6 +96,7 @@ void drm_sysfs_destroy(void)
 {
 	if (IS_ERR_OR_NULL(drm_class))
 		return;
+	drm_teardown_hdcp_srm(drm_class);
 	class_remove_file(drm_class, &class_attr_version.attr);
 	class_destroy(drm_class);
 	drm_class = NULL;
diff --git a/drivers/gpu/drm/drm_trace.h b/drivers/gpu/drm/drm_trace.h
index baccc63db106..471eb927474b 100644
--- a/drivers/gpu/drm/drm_trace.h
+++ b/drivers/gpu/drm/drm_trace.h
@@ -6,6 +6,8 @@
 #include <linux/types.h>
 #include <linux/tracepoint.h>
 
+struct drm_file;
+
 #undef TRACE_SYSTEM
 #define TRACE_SYSTEM drm
 #define TRACE_INCLUDE_FILE drm_trace
diff --git a/drivers/gpu/drm/drm_trace_points.c b/drivers/gpu/drm/drm_trace_points.c
index 3bbc4deb4dbc..1e2065b403c9 100644
--- a/drivers/gpu/drm/drm_trace_points.c
+++ b/drivers/gpu/drm/drm_trace_points.c
@@ -1,4 +1,5 @@
-#include <drm/drmP.h>
+
+#include <drm/drm_file.h>
 
 #define CREATE_TRACE_POINTS
 #include "drm_trace.h"
diff --git a/drivers/gpu/drm/drm_vblank.c b/drivers/gpu/drm/drm_vblank.c
index a1b65d26d761..603ab105125d 100644
--- a/drivers/gpu/drm/drm_vblank.c
+++ b/drivers/gpu/drm/drm_vblank.c
@@ -24,12 +24,18 @@
  * OTHER DEALINGS IN THE SOFTWARE.
  */
 
-#include <drm/drm_vblank.h>
-#include <drm/drmP.h>
 #include <linux/export.h>
+#include <linux/moduleparam.h>
+
+#include <drm/drm_crtc.h>
+#include <drm/drm_drv.h>
+#include <drm/drm_framebuffer.h>
+#include <drm/drm_print.h>
+#include <drm/drm_os_linux.h>
+#include <drm/drm_vblank.h>
 
-#include "drm_trace.h"
 #include "drm_internal.h"
+#include "drm_trace.h"
 
 /**
  * DOC: vblank handling
@@ -235,12 +241,16 @@ static void drm_update_vblank_count(struct drm_device *dev, unsigned int pipe,
 		 * on the difference in the timestamps and the
 		 * frame/field duration.
 		 */
+
+		DRM_DEBUG_VBL("crtc %u: Calculating number of vblanks."
+			      " diff_ns = %lld, framedur_ns = %d)\n",
+			      pipe, (long long) diff_ns, framedur_ns);
+
 		diff = DIV_ROUND_CLOSEST_ULL(diff_ns, framedur_ns);
 
 		if (diff == 0 && in_vblank_irq)
-			DRM_DEBUG_VBL("crtc %u: Redundant vblirq ignored."
-				      " diff_ns = %lld, framedur_ns = %d)\n",
-				      pipe, (long long) diff_ns, framedur_ns);
+			DRM_DEBUG_VBL("crtc %u: Redundant vblirq ignored\n",
+				      pipe);
 	} else {
 		/* some kind of default for drivers w/o accurate vbl timestamping */
 		diff = in_vblank_irq ? 1 : 0;
diff --git a/drivers/gpu/drm/drm_vm.c b/drivers/gpu/drm/drm_vm.c
index 10cf83d569e1..05f7c5833946 100644
--- a/drivers/gpu/drm/drm_vm.c
+++ b/drivers/gpu/drm/drm_vm.c
@@ -33,15 +33,27 @@
  * OTHER DEALINGS IN THE SOFTWARE.
  */
 
-#include <drm/drmP.h>
 #include <linux/export.h>
+#include <linux/pci.h>
 #include <linux/seq_file.h>
+#include <linux/vmalloc.h>
+
 #if defined(__ia64__)
 #include <linux/efi.h>
 #include <linux/slab.h>
 #endif
 #include <linux/mem_encrypt.h>
+
 #include <asm/pgtable.h>
+
+#include <drm/drm_agpsupport.h>
+#include <drm/drm_device.h>
+#include <drm/drm_drv.h>
+#include <drm/drm_file.h>
+#include <drm/drm_framebuffer.h>
+#include <drm/drm_gem.h>
+#include <drm/drm_print.h>
+
 #include "drm_internal.h"
 #include "drm_legacy.h"
 
@@ -62,7 +74,8 @@ static pgprot_t drm_io_prot(struct drm_local_map *map,
 	/* We don't want graphics memory to be mapped encrypted */
 	tmp = pgprot_decrypted(tmp);
 
-#if defined(__i386__) || defined(__x86_64__) || defined(__powerpc__)
+#if defined(__i386__) || defined(__x86_64__) || defined(__powerpc__) || \
+    defined(__mips__)
 	if (map->type == _DRM_REGISTERS && !(map->flags & _DRM_WRITE_COMBINING))
 		tmp = pgprot_noncached(tmp);
 	else
@@ -73,7 +86,7 @@ static pgprot_t drm_io_prot(struct drm_local_map *map,
 		tmp = pgprot_writecombine(tmp);
 	else
 		tmp = pgprot_noncached(tmp);
-#elif defined(__sparc__) || defined(__arm__) || defined(__mips__)
+#elif defined(__sparc__) || defined(__arm__)
 	tmp = pgprot_noncached(tmp);
 #endif
 	return tmp;
diff --git a/drivers/gpu/drm/drm_vma_manager.c b/drivers/gpu/drm/drm_vma_manager.c
index c5d0d2358301..4565319fa6b3 100644
--- a/drivers/gpu/drm/drm_vma_manager.c
+++ b/drivers/gpu/drm/drm_vma_manager.c
@@ -23,9 +23,6 @@
  * OTHER DEALINGS IN THE SOFTWARE.
  */
 
-#include <drm/drmP.h>
-#include <drm/drm_mm.h>
-#include <drm/drm_vma_manager.h>
 #include <linux/mm.h>
 #include <linux/module.h>
 #include <linux/rbtree.h>
@@ -33,6 +30,9 @@
 #include <linux/spinlock.h>
 #include <linux/types.h>
 
+#include <drm/drm_mm.h>
+#include <drm/drm_vma_manager.h>
+
 /**
  * DOC: vma offset manager
  *
diff --git a/drivers/gpu/drm/drm_vram_helper_common.c b/drivers/gpu/drm/drm_vram_helper_common.c
new file mode 100644
index 000000000000..e9c9f9a80ba3
--- /dev/null
+++ b/drivers/gpu/drm/drm_vram_helper_common.c
@@ -0,0 +1,96 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <linux/module.h>
+
+/**
+ * DOC: overview
+ *
+ * This library provides &struct drm_gem_vram_object (GEM VRAM), a GEM
+ * buffer object that is backed by video RAM. It can be used for
+ * framebuffer devices with dedicated memory. The video RAM can be
+ * managed with &struct drm_vram_mm (VRAM MM). Both data structures are
+ * supposed to be used together, but can also be used individually.
+ *
+ * With the GEM interface userspace applications create, manage and destroy
+ * graphics buffers, such as an on-screen framebuffer. GEM does not provide
+ * an implementation of these interfaces. It's up to the DRM driver to
+ * provide an implementation that suits the hardware. If the hardware device
+ * contains dedicated video memory, the DRM driver can use the VRAM helper
+ * library. Each active buffer object is stored in video RAM. Active
+ * buffer are used for drawing the current frame, typically something like
+ * the frame's scanout buffer or the cursor image. If there's no more space
+ * left in VRAM, inactive GEM objects can be moved to system memory.
+ *
+ * The easiest way to use the VRAM helper library is to call
+ * drm_vram_helper_alloc_mm(). The function allocates and initializes an
+ * instance of &struct drm_vram_mm in &struct drm_device.vram_mm . Use
+ * &DRM_GEM_VRAM_DRIVER to initialize &struct drm_driver and
+ * &DRM_VRAM_MM_FILE_OPERATIONS to initialize &struct file_operations;
+ * as illustrated below.
+ *
+ * .. code-block:: c
+ *
+ *	struct file_operations fops ={
+ *		.owner = THIS_MODULE,
+ *		DRM_VRAM_MM_FILE_OPERATION
+ *	};
+ *	struct drm_driver drv = {
+ *		.driver_feature = DRM_ ... ,
+ *		.fops = &fops,
+ *		DRM_GEM_VRAM_DRIVER
+ *	};
+ *
+ *	int init_drm_driver()
+ *	{
+ *		struct drm_device *dev;
+ *		uint64_t vram_base;
+ *		unsigned long vram_size;
+ *		int ret;
+ *
+ *		// setup device, vram base and size
+ *		// ...
+ *
+ *		ret = drm_vram_helper_alloc_mm(dev, vram_base, vram_size,
+ *					       &drm_gem_vram_mm_funcs);
+ *		if (ret)
+ *			return ret;
+ *		return 0;
+ *	}
+ *
+ * This creates an instance of &struct drm_vram_mm, exports DRM userspace
+ * interfaces for GEM buffer management and initializes file operations to
+ * allow for accessing created GEM buffers. With this setup, the DRM driver
+ * manages an area of video RAM with VRAM MM and provides GEM VRAM objects
+ * to userspace.
+ *
+ * To clean up the VRAM memory management, call drm_vram_helper_release_mm()
+ * in the driver's clean-up code.
+ *
+ * .. code-block:: c
+ *
+ *	void fini_drm_driver()
+ *	{
+ *		struct drm_device *dev = ...;
+ *
+ *		drm_vram_helper_release_mm(dev);
+ *	}
+ *
+ * For drawing or scanout operations, buffer object have to be pinned in video
+ * RAM. Call drm_gem_vram_pin() with &DRM_GEM_VRAM_PL_FLAG_VRAM or
+ * &DRM_GEM_VRAM_PL_FLAG_SYSTEM to pin a buffer object in video RAM or system
+ * memory. Call drm_gem_vram_unpin() to release the pinned object afterwards.
+ *
+ * A buffer object that is pinned in video RAM has a fixed address within that
+ * memory region. Call drm_gem_vram_offset() to retrieve this value. Typically
+ * it's used to program the hardware's scanout engine for framebuffers, set
+ * the cursor overlay's image for a mouse cursor, or use it as input to the
+ * hardware's draing engine.
+ *
+ * To access a buffer object's memory from the DRM driver, call
+ * drm_gem_vram_kmap(). It (optionally) maps the buffer into kernel address
+ * space and returns the memory address. Use drm_gem_vram_kunmap() to
+ * release the mapping.
+ */
+
+MODULE_DESCRIPTION("DRM VRAM memory-management helpers");
+MODULE_LICENSE("GPL");
diff --git a/drivers/gpu/drm/drm_vram_mm_helper.c b/drivers/gpu/drm/drm_vram_mm_helper.c
new file mode 100644
index 000000000000..c911781d6728
--- /dev/null
+++ b/drivers/gpu/drm/drm_vram_mm_helper.c
@@ -0,0 +1,297 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <drm/drm_device.h>
+#include <drm/drm_file.h>
+#include <drm/drm_vram_mm_helper.h>
+
+#include <drm/ttm/ttm_page_alloc.h>
+
+/**
+ * DOC: overview
+ *
+ * The data structure &struct drm_vram_mm and its helpers implement a memory
+ * manager for simple framebuffer devices with dedicated video memory. Buffer
+ * objects are either placed in video RAM or evicted to system memory. These
+ * helper functions work well with &struct drm_gem_vram_object.
+ */
+
+/*
+ * TTM TT
+ */
+
+static void backend_func_destroy(struct ttm_tt *tt)
+{
+	ttm_tt_fini(tt);
+	kfree(tt);
+}
+
+static struct ttm_backend_func backend_func = {
+	.destroy = backend_func_destroy
+};
+
+/*
+ * TTM BO device
+ */
+
+static struct ttm_tt *bo_driver_ttm_tt_create(struct ttm_buffer_object *bo,
+					      uint32_t page_flags)
+{
+	struct ttm_tt *tt;
+	int ret;
+
+	tt = kzalloc(sizeof(*tt), GFP_KERNEL);
+	if (!tt)
+		return NULL;
+
+	tt->func = &backend_func;
+
+	ret = ttm_tt_init(tt, bo, page_flags);
+	if (ret < 0)
+		goto err_ttm_tt_init;
+
+	return tt;
+
+err_ttm_tt_init:
+	kfree(tt);
+	return NULL;
+}
+
+static int bo_driver_init_mem_type(struct ttm_bo_device *bdev, uint32_t type,
+				   struct ttm_mem_type_manager *man)
+{
+	switch (type) {
+	case TTM_PL_SYSTEM:
+		man->flags = TTM_MEMTYPE_FLAG_MAPPABLE;
+		man->available_caching = TTM_PL_MASK_CACHING;
+		man->default_caching = TTM_PL_FLAG_CACHED;
+		break;
+	case TTM_PL_VRAM:
+		man->func = &ttm_bo_manager_func;
+		man->flags = TTM_MEMTYPE_FLAG_FIXED |
+			     TTM_MEMTYPE_FLAG_MAPPABLE;
+		man->available_caching = TTM_PL_FLAG_UNCACHED |
+					 TTM_PL_FLAG_WC;
+		man->default_caching = TTM_PL_FLAG_WC;
+		break;
+	default:
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static void bo_driver_evict_flags(struct ttm_buffer_object *bo,
+				  struct ttm_placement *placement)
+{
+	struct drm_vram_mm *vmm = drm_vram_mm_of_bdev(bo->bdev);
+
+	if (vmm->funcs && vmm->funcs->evict_flags)
+		vmm->funcs->evict_flags(bo, placement);
+}
+
+static int bo_driver_verify_access(struct ttm_buffer_object *bo,
+				   struct file *filp)
+{
+	struct drm_vram_mm *vmm = drm_vram_mm_of_bdev(bo->bdev);
+
+	if (!vmm->funcs || !vmm->funcs->verify_access)
+		return 0;
+	return vmm->funcs->verify_access(bo, filp);
+}
+
+static int bo_driver_io_mem_reserve(struct ttm_bo_device *bdev,
+				    struct ttm_mem_reg *mem)
+{
+	struct ttm_mem_type_manager *man = bdev->man + mem->mem_type;
+	struct drm_vram_mm *vmm = drm_vram_mm_of_bdev(bdev);
+
+	if (!(man->flags & TTM_MEMTYPE_FLAG_MAPPABLE))
+		return -EINVAL;
+
+	mem->bus.addr = NULL;
+	mem->bus.size = mem->num_pages << PAGE_SHIFT;
+
+	switch (mem->mem_type) {
+	case TTM_PL_SYSTEM:	/* nothing to do */
+		mem->bus.offset = 0;
+		mem->bus.base = 0;
+		mem->bus.is_iomem = false;
+		break;
+	case TTM_PL_VRAM:
+		mem->bus.offset = mem->start << PAGE_SHIFT;
+		mem->bus.base = vmm->vram_base;
+		mem->bus.is_iomem = true;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static void bo_driver_io_mem_free(struct ttm_bo_device *bdev,
+				  struct ttm_mem_reg *mem)
+{ }
+
+static struct ttm_bo_driver bo_driver = {
+	.ttm_tt_create = bo_driver_ttm_tt_create,
+	.ttm_tt_populate = ttm_pool_populate,
+	.ttm_tt_unpopulate = ttm_pool_unpopulate,
+	.init_mem_type = bo_driver_init_mem_type,
+	.eviction_valuable = ttm_bo_eviction_valuable,
+	.evict_flags = bo_driver_evict_flags,
+	.verify_access = bo_driver_verify_access,
+	.io_mem_reserve = bo_driver_io_mem_reserve,
+	.io_mem_free = bo_driver_io_mem_free,
+};
+
+/*
+ * struct drm_vram_mm
+ */
+
+/**
+ * drm_vram_mm_init() - Initialize an instance of VRAM MM.
+ * @vmm:	the VRAM MM instance to initialize
+ * @dev:	the DRM device
+ * @vram_base:	the base address of the video memory
+ * @vram_size:	the size of the video memory in bytes
+ * @funcs:	callback functions for buffer objects
+ *
+ * Returns:
+ * 0 on success, or
+ * a negative error code otherwise.
+ */
+int drm_vram_mm_init(struct drm_vram_mm *vmm, struct drm_device *dev,
+		     uint64_t vram_base, size_t vram_size,
+		     const struct drm_vram_mm_funcs *funcs)
+{
+	int ret;
+
+	vmm->vram_base = vram_base;
+	vmm->vram_size = vram_size;
+	vmm->funcs = funcs;
+
+	ret = ttm_bo_device_init(&vmm->bdev, &bo_driver,
+				 dev->anon_inode->i_mapping,
+				 true);
+	if (ret)
+		return ret;
+
+	ret = ttm_bo_init_mm(&vmm->bdev, TTM_PL_VRAM, vram_size >> PAGE_SHIFT);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+EXPORT_SYMBOL(drm_vram_mm_init);
+
+/**
+ * drm_vram_mm_cleanup() - Cleans up an initialized instance of VRAM MM.
+ * @vmm:	the VRAM MM instance to clean up
+ */
+void drm_vram_mm_cleanup(struct drm_vram_mm *vmm)
+{
+	ttm_bo_device_release(&vmm->bdev);
+}
+EXPORT_SYMBOL(drm_vram_mm_cleanup);
+
+/**
+ * drm_vram_mm_mmap() - Helper for implementing &struct file_operations.mmap()
+ * @filp:	the mapping's file structure
+ * @vma:	the mapping's memory area
+ * @vmm:	the VRAM MM instance
+ *
+ * Returns:
+ * 0 on success, or
+ * a negative error code otherwise.
+ */
+int drm_vram_mm_mmap(struct file *filp, struct vm_area_struct *vma,
+		     struct drm_vram_mm *vmm)
+{
+	return ttm_bo_mmap(filp, vma, &vmm->bdev);
+}
+EXPORT_SYMBOL(drm_vram_mm_mmap);
+
+/*
+ * Helpers for integration with struct drm_device
+ */
+
+/**
+ * drm_vram_helper_alloc_mm - Allocates a device's instance of \
+	&struct drm_vram_mm
+ * @dev:	the DRM device
+ * @vram_base:	the base address of the video memory
+ * @vram_size:	the size of the video memory in bytes
+ * @funcs:	callback functions for buffer objects
+ *
+ * Returns:
+ * The new instance of &struct drm_vram_mm on success, or
+ * an ERR_PTR()-encoded errno code otherwise.
+ */
+struct drm_vram_mm *drm_vram_helper_alloc_mm(
+	struct drm_device *dev, uint64_t vram_base, size_t vram_size,
+	const struct drm_vram_mm_funcs *funcs)
+{
+	int ret;
+
+	if (WARN_ON(dev->vram_mm))
+		return dev->vram_mm;
+
+	dev->vram_mm = kzalloc(sizeof(*dev->vram_mm), GFP_KERNEL);
+	if (!dev->vram_mm)
+		return ERR_PTR(-ENOMEM);
+
+	ret = drm_vram_mm_init(dev->vram_mm, dev, vram_base, vram_size, funcs);
+	if (ret)
+		goto err_kfree;
+
+	return dev->vram_mm;
+
+err_kfree:
+	kfree(dev->vram_mm);
+	dev->vram_mm = NULL;
+	return ERR_PTR(ret);
+}
+EXPORT_SYMBOL(drm_vram_helper_alloc_mm);
+
+/**
+ * drm_vram_helper_release_mm - Releases a device's instance of \
+	&struct drm_vram_mm
+ * @dev:	the DRM device
+ */
+void drm_vram_helper_release_mm(struct drm_device *dev)
+{
+	if (!dev->vram_mm)
+		return;
+
+	drm_vram_mm_cleanup(dev->vram_mm);
+	kfree(dev->vram_mm);
+	dev->vram_mm = NULL;
+}
+EXPORT_SYMBOL(drm_vram_helper_release_mm);
+
+/*
+ * Helpers for &struct file_operations
+ */
+
+/**
+ * drm_vram_mm_file_operations_mmap() - \
+	Implements &struct file_operations.mmap()
+ * @filp:	the mapping's file structure
+ * @vma:	the mapping's memory area
+ *
+ * Returns:
+ * 0 on success, or
+ * a negative error code otherwise.
+ */
+int drm_vram_mm_file_operations_mmap(
+	struct file *filp, struct vm_area_struct *vma)
+{
+	struct drm_file *file_priv = filp->private_data;
+	struct drm_device *dev = file_priv->minor->dev;
+
+	if (WARN_ONCE(!dev->vram_mm, "VRAM MM not initialized"))
+		return -EINVAL;
+
+	return drm_vram_mm_mmap(filp, vma, dev->vram_mm);
+}
+EXPORT_SYMBOL(drm_vram_mm_file_operations_mmap);
diff --git a/drivers/gpu/drm/drm_writeback.c b/drivers/gpu/drm/drm_writeback.c
index 79ac014701c8..ff138b6ec48b 100644
--- a/drivers/gpu/drm/drm_writeback.c
+++ b/drivers/gpu/drm/drm_writeback.c
@@ -9,12 +9,14 @@
  * of such GNU licence.
  */
 
+#include <linux/dma-fence.h>
+
 #include <drm/drm_crtc.h>
+#include <drm/drm_device.h>
+#include <drm/drm_drv.h>
 #include <drm/drm_modeset_helper_vtables.h>
 #include <drm/drm_property.h>
 #include <drm/drm_writeback.h>
-#include <drm/drmP.h>
-#include <linux/dma-fence.h>
 
 /**
  * DOC: overview
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_dump.c b/drivers/gpu/drm/etnaviv/etnaviv_dump.c
index 515515ef24f9..9a6f5b65488f 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_dump.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_dump.c
@@ -118,7 +118,6 @@ void etnaviv_core_dump(struct etnaviv_gpu *gpu)
 	unsigned int n_obj, n_bomap_pages;
 	size_t file_size, mmu_size;
 	__le64 *bomap, *bomap_start;
-	unsigned long flags;
 
 	/* Only catch the first event, or when manually re-armed */
 	if (!etnaviv_dump_core)
@@ -137,13 +136,11 @@ void etnaviv_core_dump(struct etnaviv_gpu *gpu)
 		    mmu_size + gpu->buffer.size;
 
 	/* Add in the active command buffers */
-	spin_lock_irqsave(&gpu->sched.job_list_lock, flags);
 	list_for_each_entry(s_job, &gpu->sched.ring_mirror_list, node) {
 		submit = to_etnaviv_submit(s_job);
 		file_size += submit->cmdbuf.size;
 		n_obj++;
 	}
-	spin_unlock_irqrestore(&gpu->sched.job_list_lock, flags);
 
 	/* Add in the active buffer objects */
 	list_for_each_entry(vram, &gpu->mmu->mappings, mmu_node) {
@@ -186,14 +183,12 @@ void etnaviv_core_dump(struct etnaviv_gpu *gpu)
 			      gpu->buffer.size,
 			      etnaviv_cmdbuf_get_va(&gpu->buffer));
 
-	spin_lock_irqsave(&gpu->sched.job_list_lock, flags);
 	list_for_each_entry(s_job, &gpu->sched.ring_mirror_list, node) {
 		submit = to_etnaviv_submit(s_job);
 		etnaviv_core_dump_mem(&iter, ETDUMP_BUF_CMD,
 				      submit->cmdbuf.vaddr, submit->cmdbuf.size,
 				      etnaviv_cmdbuf_get_va(&submit->cmdbuf));
 	}
-	spin_unlock_irqrestore(&gpu->sched.job_list_lock, flags);
 
 	/* Reserve space for the bomap */
 	if (n_bomap_pages) {
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_sched.c b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
index 6d24fea1766b..a813c824e154 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_sched.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
@@ -109,7 +109,7 @@ static void etnaviv_sched_timedout_job(struct drm_sched_job *sched_job)
 	}
 
 	/* block scheduler */
-	drm_sched_stop(&gpu->sched);
+	drm_sched_stop(&gpu->sched, sched_job);
 
 	if(sched_job)
 		drm_sched_increase_karma(sched_job);
diff --git a/drivers/gpu/drm/gma500/accel_2d.c b/drivers/gpu/drm/gma500/accel_2d.c
index a07eacb3d5e2..45ad5ffedc93 100644
--- a/drivers/gpu/drm/gma500/accel_2d.c
+++ b/drivers/gpu/drm/gma500/accel_2d.c
@@ -8,24 +8,24 @@
  *
  **************************************************************************/
 
-#include <linux/module.h>
-#include <linux/kernel.h>
+#include <linux/console.h>
+#include <linux/delay.h>
 #include <linux/errno.h>
-#include <linux/string.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
 #include <linux/mm.h>
-#include <linux/tty.h>
+#include <linux/module.h>
 #include <linux/slab.h>
-#include <linux/delay.h>
-#include <linux/init.h>
-#include <linux/console.h>
+#include <linux/string.h>
+#include <linux/tty.h>
 
-#include <drm/drmP.h>
 #include <drm/drm.h>
 #include <drm/drm_crtc.h>
+#include <drm/drm_fourcc.h>
 
+#include "framebuffer.h"
 #include "psb_drv.h"
 #include "psb_reg.h"
-#include "framebuffer.h"
 
 /**
  *	psb_spank		-	reset the 2D engine
diff --git a/drivers/gpu/drm/gma500/blitter.h b/drivers/gpu/drm/gma500/blitter.h
index 0d1c4cc01fd2..8d67dabd9ba3 100644
--- a/drivers/gpu/drm/gma500/blitter.h
+++ b/drivers/gpu/drm/gma500/blitter.h
@@ -9,6 +9,8 @@
 #ifndef __BLITTER_H
 #define __BLITTER_H
 
+struct drm_psb_private;
+
 extern int gma_blt_wait_idle(struct drm_psb_private *dev_priv);
 
 #endif
diff --git a/drivers/gpu/drm/gma500/cdv_device.c b/drivers/gpu/drm/gma500/cdv_device.c
index 3df010438bac..4d216a0205f2 100644
--- a/drivers/gpu/drm/gma500/cdv_device.c
+++ b/drivers/gpu/drm/gma500/cdv_device.c
@@ -6,15 +6,16 @@
  **************************************************************************/
 
 #include <linux/backlight.h>
-#include <drm/drmP.h>
+#include <linux/delay.h>
+
 #include <drm/drm.h>
-#include <drm/gma_drm.h>
-#include "psb_drv.h"
-#include "psb_reg.h"
-#include "psb_intel_reg.h"
-#include "intel_bios.h"
+
 #include "cdv_device.h"
 #include "gma_device.h"
+#include "intel_bios.h"
+#include "psb_drv.h"
+#include "psb_intel_reg.h"
+#include "psb_reg.h"
 
 #define VGA_SR_INDEX		0x3c4
 #define VGA_SR_DATA		0x3c5
diff --git a/drivers/gpu/drm/gma500/cdv_device.h b/drivers/gpu/drm/gma500/cdv_device.h
index b375bc206363..37e4bdc84c03 100644
--- a/drivers/gpu/drm/gma500/cdv_device.h
+++ b/drivers/gpu/drm/gma500/cdv_device.h
@@ -3,6 +3,10 @@
  * Copyright © 2011 Intel Corporation
  */
 
+struct drm_crtc;
+struct drm_device;
+struct psb_intel_mode_device;
+
 extern const struct drm_crtc_helper_funcs cdv_intel_helper_funcs;
 extern const struct drm_crtc_funcs cdv_intel_crtc_funcs;
 extern const struct gma_clock_funcs cdv_clock_funcs;
diff --git a/drivers/gpu/drm/gma500/cdv_intel_crt.c b/drivers/gpu/drm/gma500/cdv_intel_crt.c
index cb5a14b7ec7f..29c36d63b20e 100644
--- a/drivers/gpu/drm/gma500/cdv_intel_crt.c
+++ b/drivers/gpu/drm/gma500/cdv_intel_crt.c
@@ -24,16 +24,16 @@
  *	Eric Anholt <eric@anholt.net>
  */
 
+#include <linux/delay.h>
 #include <linux/i2c.h>
-#include <drm/drmP.h>
+#include <linux/pm_runtime.h>
 
+#include "cdv_device.h"
 #include "intel_bios.h"
+#include "power.h"
 #include "psb_drv.h"
 #include "psb_intel_drv.h"
 #include "psb_intel_reg.h"
-#include "power.h"
-#include "cdv_device.h"
-#include <linux/pm_runtime.h>
 
 
 static void cdv_intel_crt_dpms(struct drm_encoder *encoder, int mode)
diff --git a/drivers/gpu/drm/gma500/cdv_intel_display.c b/drivers/gpu/drm/gma500/cdv_intel_display.c
index 235cfeeec100..f56852a503e8 100644
--- a/drivers/gpu/drm/gma500/cdv_intel_display.c
+++ b/drivers/gpu/drm/gma500/cdv_intel_display.c
@@ -6,16 +6,18 @@
  *	Eric Anholt <eric@anholt.net>
  */
 
+#include <linux/delay.h>
 #include <linux/i2c.h>
 
-#include <drm/drmP.h>
+#include <drm/drm_crtc.h>
+
+#include "cdv_device.h"
 #include "framebuffer.h"
+#include "gma_display.h"
+#include "power.h"
 #include "psb_drv.h"
 #include "psb_intel_drv.h"
 #include "psb_intel_reg.h"
-#include "gma_display.h"
-#include "power.h"
-#include "cdv_device.h"
 
 static bool cdv_intel_find_dp_pll(const struct gma_limit_t *limit,
 				  struct drm_crtc *crtc, int target,
diff --git a/drivers/gpu/drm/gma500/cdv_intel_dp.c b/drivers/gpu/drm/gma500/cdv_intel_dp.c
index 90ed20083009..570b59520fd1 100644
--- a/drivers/gpu/drm/gma500/cdv_intel_dp.c
+++ b/drivers/gpu/drm/gma500/cdv_intel_dp.c
@@ -26,16 +26,17 @@
  */
 
 #include <linux/i2c.h>
-#include <linux/slab.h>
 #include <linux/module.h>
-#include <drm/drmP.h>
+#include <linux/slab.h>
+
 #include <drm/drm_crtc.h>
 #include <drm/drm_crtc_helper.h>
+#include <drm/drm_dp_helper.h>
+
+#include "gma_display.h"
 #include "psb_drv.h"
 #include "psb_intel_drv.h"
 #include "psb_intel_reg.h"
-#include "gma_display.h"
-#include <drm/drm_dp_helper.h>
 
 /**
  * struct i2c_algo_dp_aux_data - driver interface structure for i2c over dp
diff --git a/drivers/gpu/drm/gma500/cdv_intel_hdmi.c b/drivers/gpu/drm/gma500/cdv_intel_hdmi.c
index 4e4e4a66eaee..1711a41acc16 100644
--- a/drivers/gpu/drm/gma500/cdv_intel_hdmi.c
+++ b/drivers/gpu/drm/gma500/cdv_intel_hdmi.c
@@ -27,15 +27,16 @@
  *	We should probably make this generic and share it with Medfield
  */
 
-#include <drm/drmP.h>
+#include <linux/pm_runtime.h>
+
 #include <drm/drm.h>
 #include <drm/drm_crtc.h>
 #include <drm/drm_edid.h>
-#include "psb_intel_drv.h"
+
+#include "cdv_device.h"
 #include "psb_drv.h"
+#include "psb_intel_drv.h"
 #include "psb_intel_reg.h"
-#include "cdv_device.h"
-#include <linux/pm_runtime.h>
 
 /* hdmi control bits */
 #define HDMI_NULL_PACKETS_DURING_VSYNC	(1 << 9)
diff --git a/drivers/gpu/drm/gma500/cdv_intel_lvds.c b/drivers/gpu/drm/gma500/cdv_intel_lvds.c
index 4b103b3eb5ad..ea0a5d9a0acc 100644
--- a/drivers/gpu/drm/gma500/cdv_intel_lvds.c
+++ b/drivers/gpu/drm/gma500/cdv_intel_lvds.c
@@ -8,17 +8,16 @@
  *	Jesse Barnes <jesse.barnes@intel.com>
  */
 
-#include <linux/i2c.h>
 #include <linux/dmi.h>
-#include <drm/drmP.h>
+#include <linux/i2c.h>
+#include <linux/pm_runtime.h>
 
+#include "cdv_device.h"
 #include "intel_bios.h"
+#include "power.h"
 #include "psb_drv.h"
 #include "psb_intel_drv.h"
 #include "psb_intel_reg.h"
-#include "power.h"
-#include <linux/pm_runtime.h>
-#include "cdv_device.h"
 
 /**
  * LVDS I2C backlight control macros
diff --git a/drivers/gpu/drm/gma500/framebuffer.c b/drivers/gpu/drm/gma500/framebuffer.c
index 45c3db50ee1a..218f3bb15276 100644
--- a/drivers/gpu/drm/gma500/framebuffer.c
+++ b/drivers/gpu/drm/gma500/framebuffer.c
@@ -5,29 +5,29 @@
  *
  **************************************************************************/
 
-#include <linux/module.h>
-#include <linux/kernel.h>
+#include <linux/console.h>
+#include <linux/delay.h>
 #include <linux/errno.h>
-#include <linux/string.h>
-#include <linux/pfn_t.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
 #include <linux/mm.h>
-#include <linux/tty.h>
+#include <linux/module.h>
+#include <linux/pfn_t.h>
 #include <linux/slab.h>
-#include <linux/delay.h>
-#include <linux/init.h>
-#include <linux/console.h>
+#include <linux/string.h>
+#include <linux/tty.h>
 
-#include <drm/drmP.h>
 #include <drm/drm.h>
 #include <drm/drm_crtc.h>
 #include <drm/drm_fb_helper.h>
+#include <drm/drm_fourcc.h>
 #include <drm/drm_gem_framebuffer_helper.h>
 
-#include "psb_drv.h"
-#include "psb_intel_reg.h"
-#include "psb_intel_drv.h"
 #include "framebuffer.h"
 #include "gtt.h"
+#include "psb_drv.h"
+#include "psb_intel_drv.h"
+#include "psb_intel_reg.h"
 
 static const struct drm_framebuffer_funcs psb_fb_funcs = {
 	.destroy = drm_gem_fb_destroy,
@@ -220,7 +220,7 @@ static int psb_framebuffer_init(struct drm_device *dev,
 	 * Reject unknown formats, YUV formats, and formats with more than
 	 * 4 bytes per pixel.
 	 */
-	info = drm_format_info(mode_cmd->pixel_format);
+	info = drm_get_format_info(dev, mode_cmd);
 	if (!info || !info->depth || info->cpp[0] > 4)
 		return -EINVAL;
 
diff --git a/drivers/gpu/drm/gma500/framebuffer.h b/drivers/gpu/drm/gma500/framebuffer.h
index a8a0b117b661..ae8a02639fd9 100644
--- a/drivers/gpu/drm/gma500/framebuffer.h
+++ b/drivers/gpu/drm/gma500/framebuffer.h
@@ -9,7 +9,6 @@
 #ifndef _FRAMEBUFFER_H_
 #define _FRAMEBUFFER_H_
 
-#include <drm/drmP.h>
 #include <drm/drm_fb_helper.h>
 
 #include "psb_drv.h"
diff --git a/drivers/gpu/drm/gma500/gem.c b/drivers/gpu/drm/gma500/gem.c
index 6cb0ad52de5e..83ee86f70b89 100644
--- a/drivers/gpu/drm/gma500/gem.c
+++ b/drivers/gpu/drm/gma500/gem.c
@@ -11,10 +11,11 @@
  *		accelerated operations on a GEM object)
  */
 
-#include <drm/drmP.h>
+#include <linux/pagemap.h>
+
 #include <drm/drm.h>
-#include <drm/gma_drm.h>
 #include <drm/drm_vma_manager.h>
+
 #include "psb_drv.h"
 
 void psb_gem_free_object(struct drm_gem_object *obj)
diff --git a/drivers/gpu/drm/gma500/gma_device.c b/drivers/gpu/drm/gma500/gma_device.c
index 938408221142..869f30392566 100644
--- a/drivers/gpu/drm/gma500/gma_device.c
+++ b/drivers/gpu/drm/gma500/gma_device.c
@@ -5,7 +5,6 @@
  *
  **************************************************************************/
 
-#include <drm/drmP.h>
 #include "psb_drv.h"
 
 void gma_get_core_freq(struct drm_device *dev)
diff --git a/drivers/gpu/drm/gma500/gma_device.h b/drivers/gpu/drm/gma500/gma_device.h
index 39eb4de39048..a8cf31d4b814 100644
--- a/drivers/gpu/drm/gma500/gma_device.h
+++ b/drivers/gpu/drm/gma500/gma_device.h
@@ -7,6 +7,7 @@
 
 #ifndef _GMA_DEVICE_H
 #define _GMA_DEVICE_H
+struct drm_device;
 
 extern void gma_get_core_freq(struct drm_device *dev);
 
diff --git a/drivers/gpu/drm/gma500/gma_display.c b/drivers/gpu/drm/gma500/gma_display.c
index f504754020fb..e20ccb5d10fd 100644
--- a/drivers/gpu/drm/gma500/gma_display.c
+++ b/drivers/gpu/drm/gma500/gma_display.c
@@ -7,12 +7,18 @@
  *	Patrik Jakobsson <patrik.r.jakobsson@gmail.com>
  */
 
-#include <drm/drmP.h>
+#include <linux/delay.h>
+#include <linux/highmem.h>
+
+#include <drm/drm_crtc.h>
+#include <drm/drm_fourcc.h>
+#include <drm/drm_vblank.h>
+
+#include "framebuffer.h"
 #include "gma_display.h"
+#include "psb_drv.h"
 #include "psb_intel_drv.h"
 #include "psb_intel_reg.h"
-#include "psb_drv.h"
-#include "framebuffer.h"
 
 /**
  * Returns whether any output on the specified pipe is of the specified type
diff --git a/drivers/gpu/drm/gma500/gma_display.h b/drivers/gpu/drm/gma500/gma_display.h
index 7f5bc65045a4..fdbd7ecaa59c 100644
--- a/drivers/gpu/drm/gma500/gma_display.h
+++ b/drivers/gpu/drm/gma500/gma_display.h
@@ -12,6 +12,9 @@
 
 #include <linux/pm_runtime.h>
 
+struct drm_encoder;
+struct drm_mode_set;
+
 struct gma_clock_t {
 	/* given values */
 	int n;
diff --git a/drivers/gpu/drm/gma500/gtt.c b/drivers/gpu/drm/gma500/gtt.c
index f851b922413f..afaf4bea21cf 100644
--- a/drivers/gpu/drm/gma500/gtt.c
+++ b/drivers/gpu/drm/gma500/gtt.c
@@ -7,11 +7,12 @@
  *	    Alan Cox <alan@linux.intel.com>
  */
 
-#include <drm/drmP.h>
 #include <linux/shmem_fs.h>
+
 #include <asm/set_memory.h>
-#include "psb_drv.h"
+
 #include "blitter.h"
+#include "psb_drv.h"
 
 
 /*
diff --git a/drivers/gpu/drm/gma500/gtt.h b/drivers/gpu/drm/gma500/gtt.h
index eb08b4b04731..3cf190295ad3 100644
--- a/drivers/gpu/drm/gma500/gtt.h
+++ b/drivers/gpu/drm/gma500/gtt.h
@@ -8,7 +8,6 @@
 #ifndef _PSB_GTT_H_
 #define _PSB_GTT_H_
 
-#include <drm/drmP.h>
 #include <drm/drm_gem.h>
 
 /* This wants cleaning up with respect to the psb_dev and un-needed stuff */
diff --git a/drivers/gpu/drm/gma500/intel_bios.c b/drivers/gpu/drm/gma500/intel_bios.c
index 6ed7cfda0a4f..8ad6337eeba3 100644
--- a/drivers/gpu/drm/gma500/intel_bios.c
+++ b/drivers/gpu/drm/gma500/intel_bios.c
@@ -5,13 +5,13 @@
  * Authors:
  *    Eric Anholt <eric@anholt.net>
  */
-#include <drm/drmP.h>
 #include <drm/drm.h>
-#include <drm/gma_drm.h>
+#include <drm/drm_dp_helper.h>
+
+#include "intel_bios.h"
 #include "psb_drv.h"
 #include "psb_intel_drv.h"
 #include "psb_intel_reg.h"
-#include "intel_bios.h"
 
 #define	SLAVE_ADDR1	0x70
 #define	SLAVE_ADDR2	0x72
diff --git a/drivers/gpu/drm/gma500/intel_bios.h b/drivers/gpu/drm/gma500/intel_bios.h
index 4927b68ec561..a1f9ce9465a5 100644
--- a/drivers/gpu/drm/gma500/intel_bios.h
+++ b/drivers/gpu/drm/gma500/intel_bios.h
@@ -9,8 +9,7 @@
 #ifndef _INTEL_BIOS_H_
 #define _INTEL_BIOS_H_
 
-#include <drm/drmP.h>
-#include <drm/drm_dp_helper.h>
+struct drm_device;
 
 struct vbt_header {
 	u8 signature[20];		/**< Always starts with 'VBT$' */
diff --git a/drivers/gpu/drm/gma500/intel_gmbus.c b/drivers/gpu/drm/gma500/intel_gmbus.c
index e7e22187c539..a083fbfe35b8 100644
--- a/drivers/gpu/drm/gma500/intel_gmbus.c
+++ b/drivers/gpu/drm/gma500/intel_gmbus.c
@@ -26,13 +26,14 @@
  *	Eric Anholt <eric@anholt.net>
  *	Chris Wilson <chris@chris-wilson.co.uk>
  */
-#include <linux/module.h>
-#include <linux/i2c.h>
+
+#include <linux/delay.h>
 #include <linux/i2c-algo-bit.h>
-#include <drm/drmP.h>
-#include "psb_intel_drv.h"
-#include <drm/gma_drm.h>
+#include <linux/i2c.h>
+#include <linux/module.h>
+
 #include "psb_drv.h"
+#include "psb_intel_drv.h"
 #include "psb_intel_reg.h"
 
 #define _wait_for(COND, MS, W) ({ \
diff --git a/drivers/gpu/drm/gma500/intel_i2c.c b/drivers/gpu/drm/gma500/intel_i2c.c
index 7f1d4a514aef..de8810188190 100644
--- a/drivers/gpu/drm/gma500/intel_i2c.c
+++ b/drivers/gpu/drm/gma500/intel_i2c.c
@@ -5,9 +5,11 @@
  * Authors:
  *	Eric Anholt <eric@anholt.net>
  */
+
+#include <linux/delay.h>
 #include <linux/export.h>
-#include <linux/i2c.h>
 #include <linux/i2c-algo-bit.h>
+#include <linux/i2c.h>
 
 #include "psb_drv.h"
 #include "psb_intel_reg.h"
diff --git a/drivers/gpu/drm/gma500/mdfld_device.c b/drivers/gpu/drm/gma500/mdfld_device.c
index 0db869dcd7bd..b718efccdcf2 100644
--- a/drivers/gpu/drm/gma500/mdfld_device.c
+++ b/drivers/gpu/drm/gma500/mdfld_device.c
@@ -5,14 +5,16 @@
  *
  **************************************************************************/
 
-#include "psb_drv.h"
-#include "mid_bios.h"
-#include "mdfld_output.h"
-#include "mdfld_dsi_output.h"
-#include "tc35876x-dsi-lvds.h"
+#include <linux/delay.h>
 
 #include <asm/intel_scu_ipc.h>
 
+#include "mdfld_dsi_output.h"
+#include "mdfld_output.h"
+#include "mid_bios.h"
+#include "psb_drv.h"
+#include "tc35876x-dsi-lvds.h"
+
 #ifdef CONFIG_BACKLIGHT_CLASS_DEVICE
 
 #define MRST_BLC_MAX_PWM_REG_FREQ	    0xFFFF
@@ -330,7 +332,7 @@ static int mdfld_restore_display_registers(struct drm_device *dev, int pipenum)
 
 	if (pipenum == 1) {
 		/* restore palette (gamma) */
-		/*DRM_UDELAY(50000); */
+		/* udelay(50000); */
 		for (i = 0; i < 256; i++)
 			PSB_WVDC32(pipe->palette[i], map->palette + (i << 2));
 
@@ -392,7 +394,7 @@ static int mdfld_restore_display_registers(struct drm_device *dev, int pipenum)
 	PSB_WVDC32(pipe->conf, map->conf);
 
 	/* restore palette (gamma) */
-	/*DRM_UDELAY(50000); */
+	/* udelay(50000); */
 	for (i = 0; i < 256; i++)
 		PSB_WVDC32(pipe->palette[i], map->palette + (i << 2));
 
diff --git a/drivers/gpu/drm/gma500/mdfld_dsi_dpi.c b/drivers/gpu/drm/gma500/mdfld_dsi_dpi.c
index d0bf5a1e94e8..d4c65f268922 100644
--- a/drivers/gpu/drm/gma500/mdfld_dsi_dpi.c
+++ b/drivers/gpu/drm/gma500/mdfld_dsi_dpi.c
@@ -25,9 +25,11 @@
  * Jackie Li<yaodong.li@intel.com>
  */
 
+#include <linux/delay.h>
+
 #include "mdfld_dsi_dpi.h"
-#include "mdfld_output.h"
 #include "mdfld_dsi_pkg_sender.h"
+#include "mdfld_output.h"
 #include "psb_drv.h"
 #include "tc35876x-dsi-lvds.h"
 
diff --git a/drivers/gpu/drm/gma500/mdfld_dsi_output.c b/drivers/gpu/drm/gma500/mdfld_dsi_output.c
index fe020926ea4f..03023fa0fb6f 100644
--- a/drivers/gpu/drm/gma500/mdfld_dsi_output.c
+++ b/drivers/gpu/drm/gma500/mdfld_dsi_output.c
@@ -25,15 +25,17 @@
  * Jackie Li<yaodong.li@intel.com>
  */
 
-#include <linux/module.h>
+#include <linux/delay.h>
+#include <linux/moduleparam.h>
+#include <linux/pm_runtime.h>
+
+#include <asm/intel_scu_ipc.h>
 
-#include "mdfld_dsi_output.h"
 #include "mdfld_dsi_dpi.h"
-#include "mdfld_output.h"
+#include "mdfld_dsi_output.h"
 #include "mdfld_dsi_pkg_sender.h"
+#include "mdfld_output.h"
 #include "tc35876x-dsi-lvds.h"
-#include <linux/pm_runtime.h>
-#include <asm/intel_scu_ipc.h>
 
 /* get the LABC from command line. */
 static int LABC_control = 1;
diff --git a/drivers/gpu/drm/gma500/mdfld_dsi_output.h b/drivers/gpu/drm/gma500/mdfld_dsi_output.h
index 5b646c1f0c3e..0cccfe400a98 100644
--- a/drivers/gpu/drm/gma500/mdfld_dsi_output.h
+++ b/drivers/gpu/drm/gma500/mdfld_dsi_output.h
@@ -29,17 +29,17 @@
 #define __MDFLD_DSI_OUTPUT_H__
 
 #include <linux/backlight.h>
-#include <drm/drmP.h>
+
+#include <asm/intel-mid.h>
+
 #include <drm/drm.h>
 #include <drm/drm_crtc.h>
 #include <drm/drm_edid.h>
 
+#include "mdfld_output.h"
 #include "psb_drv.h"
 #include "psb_intel_drv.h"
 #include "psb_intel_reg.h"
-#include "mdfld_output.h"
-
-#include <asm/intel-mid.h>
 
 #define FLD_MASK(start, end)	(((1 << ((start) - (end) + 1)) - 1) << (end))
 #define FLD_VAL(val, start, end) (((val) << (end)) & FLD_MASK(start, end))
diff --git a/drivers/gpu/drm/gma500/mdfld_dsi_pkg_sender.c b/drivers/gpu/drm/gma500/mdfld_dsi_pkg_sender.c
index c50534c923df..6e0de83e9f7d 100644
--- a/drivers/gpu/drm/gma500/mdfld_dsi_pkg_sender.c
+++ b/drivers/gpu/drm/gma500/mdfld_dsi_pkg_sender.c
@@ -24,12 +24,14 @@
  * Jackie Li<yaodong.li@intel.com>
  */
 
+#include <linux/delay.h>
 #include <linux/freezer.h>
+
 #include <video/mipi_display.h>
 
+#include "mdfld_dsi_dpi.h"
 #include "mdfld_dsi_output.h"
 #include "mdfld_dsi_pkg_sender.h"
-#include "mdfld_dsi_dpi.h"
 
 #define MDFLD_DSI_READ_MAX_COUNT		5000
 
diff --git a/drivers/gpu/drm/gma500/mdfld_intel_display.c b/drivers/gpu/drm/gma500/mdfld_intel_display.c
index 6a3c612c83ab..b8bfb96008b8 100644
--- a/drivers/gpu/drm/gma500/mdfld_intel_display.c
+++ b/drivers/gpu/drm/gma500/mdfld_intel_display.c
@@ -6,15 +6,18 @@
  *	Eric Anholt <eric@anholt.net>
  */
 
+#include <linux/delay.h>
 #include <linux/i2c.h>
 #include <linux/pm_runtime.h>
 
-#include <drm/drmP.h>
-#include "psb_intel_reg.h"
-#include "gma_display.h"
+#include <drm/drm_crtc.h>
+#include <drm/drm_fourcc.h>
+
 #include "framebuffer.h"
-#include "mdfld_output.h"
+#include "gma_display.h"
 #include "mdfld_dsi_output.h"
+#include "mdfld_output.h"
+#include "psb_intel_reg.h"
 
 /* Hardcoded currently */
 static int ksel = KSEL_CRYSTAL_19;
diff --git a/drivers/gpu/drm/gma500/mdfld_tmd_vid.c b/drivers/gpu/drm/gma500/mdfld_tmd_vid.c
index dc0c6c3d3d29..49c92debb7b2 100644
--- a/drivers/gpu/drm/gma500/mdfld_tmd_vid.c
+++ b/drivers/gpu/drm/gma500/mdfld_tmd_vid.c
@@ -27,6 +27,8 @@
  * Scott Rowe <scott.m.rowe@intel.com>
  */
 
+#include <linux/delay.h>
+
 #include "mdfld_dsi_dpi.h"
 #include "mdfld_dsi_pkg_sender.h"
 
diff --git a/drivers/gpu/drm/gma500/mid_bios.c b/drivers/gpu/drm/gma500/mid_bios.c
index 3a12fef339bf..8ab44fec4bfa 100644
--- a/drivers/gpu/drm/gma500/mid_bios.c
+++ b/drivers/gpu/drm/gma500/mid_bios.c
@@ -11,11 +11,10 @@
  * - Check ioremap failures
  */
 
-#include <drm/drmP.h>
 #include <drm/drm.h>
-#include <drm/gma_drm.h>
-#include "psb_drv.h"
+
 #include "mid_bios.h"
+#include "psb_drv.h"
 
 static void mid_get_fuse_settings(struct drm_device *dev)
 {
diff --git a/drivers/gpu/drm/gma500/mid_bios.h b/drivers/gpu/drm/gma500/mid_bios.h
index 7e743f731a92..8707f7c893a7 100644
--- a/drivers/gpu/drm/gma500/mid_bios.h
+++ b/drivers/gpu/drm/gma500/mid_bios.h
@@ -4,6 +4,7 @@
  * All Rights Reserved.
  *
  **************************************************************************/
+struct drm_device;
 
 extern int mid_chip_setup(struct drm_device *dev);
 
diff --git a/drivers/gpu/drm/gma500/mmu.c b/drivers/gpu/drm/gma500/mmu.c
index 5d806a8ff1bd..505044c9a673 100644
--- a/drivers/gpu/drm/gma500/mmu.c
+++ b/drivers/gpu/drm/gma500/mmu.c
@@ -3,10 +3,12 @@
  * Copyright (c) 2007, Intel Corporation.
  *
  **************************************************************************/
-#include <drm/drmP.h>
+
+#include <linux/highmem.h>
+
+#include "mmu.h"
 #include "psb_drv.h"
 #include "psb_reg.h"
-#include "mmu.h"
 
 /*
  * Code for the SGX MMU:
diff --git a/drivers/gpu/drm/gma500/oaktrail.h b/drivers/gpu/drm/gma500/oaktrail.h
index 72da4e0bc8c7..8d20fa2ee286 100644
--- a/drivers/gpu/drm/gma500/oaktrail.h
+++ b/drivers/gpu/drm/gma500/oaktrail.h
@@ -5,6 +5,8 @@
  *
  **************************************************************************/
 
+struct psb_intel_mode_device;
+
 /* MID device specific descriptors */
 
 struct oaktrail_timing_info {
diff --git a/drivers/gpu/drm/gma500/oaktrail_crtc.c b/drivers/gpu/drm/gma500/oaktrail_crtc.c
index cb4dafd113b3..167c10767dd4 100644
--- a/drivers/gpu/drm/gma500/oaktrail_crtc.c
+++ b/drivers/gpu/drm/gma500/oaktrail_crtc.c
@@ -3,16 +3,18 @@
  * Copyright © 2009 Intel Corporation
  */
 
+#include <linux/delay.h>
 #include <linux/i2c.h>
 #include <linux/pm_runtime.h>
 
-#include <drm/drmP.h>
+#include <drm/drm_fourcc.h>
+
 #include "framebuffer.h"
+#include "gma_display.h"
+#include "power.h"
 #include "psb_drv.h"
 #include "psb_intel_drv.h"
 #include "psb_intel_reg.h"
-#include "gma_display.h"
-#include "power.h"
 
 #define MRST_LIMIT_LVDS_100L	0
 #define MRST_LIMIT_LVDS_83	1
diff --git a/drivers/gpu/drm/gma500/oaktrail_device.c b/drivers/gpu/drm/gma500/oaktrail_device.c
index e35e99508a70..ade7e2416a66 100644
--- a/drivers/gpu/drm/gma500/oaktrail_device.c
+++ b/drivers/gpu/drm/gma500/oaktrail_device.c
@@ -6,18 +6,20 @@
  **************************************************************************/
 
 #include <linux/backlight.h>
-#include <linux/module.h>
+#include <linux/delay.h>
 #include <linux/dmi.h>
-#include <drm/drmP.h>
-#include <drm/drm.h>
-#include <drm/gma_drm.h>
-#include "psb_drv.h"
-#include "psb_reg.h"
-#include "psb_intel_reg.h"
+#include <linux/module.h>
+
 #include <asm/intel-mid.h>
 #include <asm/intel_scu_ipc.h>
-#include "mid_bios.h"
+
+#include <drm/drm.h>
+
 #include "intel_bios.h"
+#include "mid_bios.h"
+#include "psb_drv.h"
+#include "psb_intel_reg.h"
+#include "psb_reg.h"
 
 static int oaktrail_output_init(struct drm_device *dev)
 {
@@ -315,7 +317,7 @@ static int oaktrail_restore_display_registers(struct drm_device *dev)
 
 	/* Actually enable it */
 	PSB_WVDC32(p->dpll, MRST_DPLL_A);
-	DRM_UDELAY(150);
+	udelay(150);
 
 	/* Restore mode */
 	PSB_WVDC32(p->htotal, HTOTAL_A);
diff --git a/drivers/gpu/drm/gma500/oaktrail_hdmi.c b/drivers/gpu/drm/gma500/oaktrail_hdmi.c
index c6d72de1c054..f4c520893ceb 100644
--- a/drivers/gpu/drm/gma500/oaktrail_hdmi.c
+++ b/drivers/gpu/drm/gma500/oaktrail_hdmi.c
@@ -24,11 +24,13 @@
  *	Li Peng <peng.li@intel.com>
  */
 
-#include <drm/drmP.h>
+#include <linux/delay.h>
+
 #include <drm/drm.h>
+
+#include "psb_drv.h"
 #include "psb_intel_drv.h"
 #include "psb_intel_reg.h"
-#include "psb_drv.h"
 
 #define HDMI_READ(reg)		readl(hdmi_dev->regs + (reg))
 #define HDMI_WRITE(reg, val)	writel(val, hdmi_dev->regs + (reg))
@@ -815,7 +817,7 @@ void oaktrail_hdmi_restore(struct drm_device *dev)
 	PSB_WVDC32(hdmi_dev->saveDPLL_ADJUST, DPLL_ADJUST);
 	PSB_WVDC32(hdmi_dev->saveDPLL_UPDATE, DPLL_UPDATE);
 	PSB_WVDC32(hdmi_dev->saveDPLL_CLK_ENABLE, DPLL_CLK_ENABLE);
-	DRM_UDELAY(150);
+	udelay(150);
 
 	/* pipe */
 	PSB_WVDC32(pipeb->src, PIPEBSRC);
diff --git a/drivers/gpu/drm/gma500/oaktrail_lvds.c b/drivers/gpu/drm/gma500/oaktrail_lvds.c
index 56e652664ae2..7390403ea1b7 100644
--- a/drivers/gpu/drm/gma500/oaktrail_lvds.c
+++ b/drivers/gpu/drm/gma500/oaktrail_lvds.c
@@ -9,15 +9,15 @@
  */
 
 #include <linux/i2c.h>
-#include <drm/drmP.h>
+#include <linux/pm_runtime.h>
+
 #include <asm/intel-mid.h>
 
 #include "intel_bios.h"
+#include "power.h"
 #include "psb_drv.h"
 #include "psb_intel_drv.h"
 #include "psb_intel_reg.h"
-#include "power.h"
-#include <linux/pm_runtime.h>
 
 /* The max/min PWM frequency in BPCR[31:17] - */
 /* The smallest number is 1 (not 0) that can fit in the
diff --git a/drivers/gpu/drm/gma500/oaktrail_lvds_i2c.c b/drivers/gpu/drm/gma500/oaktrail_lvds_i2c.c
index f913a62eee5f..baaf8212e01d 100644
--- a/drivers/gpu/drm/gma500/oaktrail_lvds_i2c.c
+++ b/drivers/gpu/drm/gma500/oaktrail_lvds_i2c.c
@@ -23,17 +23,16 @@
  *
  */
 
+#include <linux/delay.h>
+#include <linux/i2c-algo-bit.h>
+#include <linux/i2c.h>
+#include <linux/init.h>
+#include <linux/io.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/pci.h>
 #include <linux/types.h>
-#include <linux/i2c.h>
-#include <linux/i2c-algo-bit.h>
-#include <linux/init.h>
-#include <linux/io.h>
-#include <linux/delay.h>
 
-#include <drm/drmP.h>
 #include "psb_drv.h"
 #include "psb_intel_reg.h"
 
diff --git a/drivers/gpu/drm/gma500/power.h b/drivers/gpu/drm/gma500/power.h
index 56d8708bd41c..0c89c4d6ec20 100644
--- a/drivers/gpu/drm/gma500/power.h
+++ b/drivers/gpu/drm/gma500/power.h
@@ -31,7 +31,9 @@
 #define _PSB_POWERMGMT_H_
 
 #include <linux/pci.h>
-#include <drm/drmP.h>
+
+struct device;
+struct drm_device;
 
 void gma_power_init(struct drm_device *dev);
 void gma_power_uninit(struct drm_device *dev);
diff --git a/drivers/gpu/drm/gma500/psb_device.c b/drivers/gpu/drm/gma500/psb_device.c
index 649b0282bff8..ece994c4c21a 100644
--- a/drivers/gpu/drm/gma500/psb_device.c
+++ b/drivers/gpu/drm/gma500/psb_device.c
@@ -6,15 +6,15 @@
  **************************************************************************/
 
 #include <linux/backlight.h>
-#include <drm/drmP.h>
+
 #include <drm/drm.h>
-#include <drm/gma_drm.h>
-#include "psb_drv.h"
-#include "psb_reg.h"
-#include "psb_intel_reg.h"
+
+#include "gma_device.h"
 #include "intel_bios.h"
 #include "psb_device.h"
-#include "gma_device.h"
+#include "psb_drv.h"
+#include "psb_intel_reg.h"
+#include "psb_reg.h"
 
 static int psb_output_init(struct drm_device *dev)
 {
diff --git a/drivers/gpu/drm/gma500/psb_drv.c b/drivers/gpu/drm/gma500/psb_drv.c
index 5280be2c2500..7005f8f69c68 100644
--- a/drivers/gpu/drm/gma500/psb_drv.c
+++ b/drivers/gpu/drm/gma500/psb_drv.c
@@ -7,23 +7,32 @@
  *
  **************************************************************************/
 
-#include <drm/drmP.h>
+#include <linux/cpu.h>
+#include <linux/module.h>
+#include <linux/notifier.h>
+#include <linux/pm_runtime.h>
+#include <linux/spinlock.h>
+
+#include <asm/set_memory.h>
+
+#include <acpi/video.h>
+
 #include <drm/drm.h>
-#include "psb_drv.h"
+#include <drm/drm_drv.h>
+#include <drm/drm_file.h>
+#include <drm/drm_ioctl.h>
+#include <drm/drm_irq.h>
+#include <drm/drm_pci.h>
+#include <drm/drm_pciids.h>
+#include <drm/drm_vblank.h>
+
 #include "framebuffer.h"
-#include "psb_reg.h"
-#include "psb_intel_reg.h"
 #include "intel_bios.h"
 #include "mid_bios.h"
-#include <drm/drm_pciids.h>
 #include "power.h"
-#include <linux/cpu.h>
-#include <linux/notifier.h>
-#include <linux/spinlock.h>
-#include <linux/pm_runtime.h>
-#include <acpi/video.h>
-#include <linux/module.h>
-#include <asm/set_memory.h>
+#include "psb_drv.h"
+#include "psb_intel_reg.h"
+#include "psb_reg.h"
 
 static struct drm_driver driver;
 static int psb_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent);
diff --git a/drivers/gpu/drm/gma500/psb_drv.h b/drivers/gpu/drm/gma500/psb_drv.h
index e0eec6d2b63c..9b3c03f4a38d 100644
--- a/drivers/gpu/drm/gma500/psb_drv.h
+++ b/drivers/gpu/drm/gma500/psb_drv.h
@@ -11,17 +11,17 @@
 #include <linux/kref.h>
 #include <linux/mm_types.h>
 
-#include <drm/drmP.h>
-#include <drm/gma_drm.h>
-#include "psb_reg.h"
-#include "psb_intel_drv.h"
+#include <drm/drm_device.h>
+
 #include "gma_display.h"
-#include "intel_bios.h"
 #include "gtt.h"
-#include "power.h"
-#include "opregion.h"
-#include "oaktrail.h"
+#include "intel_bios.h"
 #include "mmu.h"
+#include "oaktrail.h"
+#include "opregion.h"
+#include "power.h"
+#include "psb_intel_drv.h"
+#include "psb_reg.h"
 
 #define DRIVER_AUTHOR "Alan Cox <alan@linux.intel.com> and others"
 
diff --git a/drivers/gpu/drm/gma500/psb_intel_display.c b/drivers/gpu/drm/gma500/psb_intel_display.c
index 3dcf3a32c6ca..4256410535f0 100644
--- a/drivers/gpu/drm/gma500/psb_intel_display.c
+++ b/drivers/gpu/drm/gma500/psb_intel_display.c
@@ -6,16 +6,17 @@
  *	Eric Anholt <eric@anholt.net>
  */
 
+#include <linux/delay.h>
 #include <linux/i2c.h>
 
-#include <drm/drmP.h>
 #include <drm/drm_plane_helper.h>
+
 #include "framebuffer.h"
+#include "gma_display.h"
+#include "power.h"
 #include "psb_drv.h"
 #include "psb_intel_drv.h"
 #include "psb_intel_reg.h"
-#include "gma_display.h"
-#include "power.h"
 
 #define INTEL_LIMIT_I9XX_SDVO_DAC   0
 #define INTEL_LIMIT_I9XX_LVDS	    1
diff --git a/drivers/gpu/drm/gma500/psb_intel_lvds.c b/drivers/gpu/drm/gma500/psb_intel_lvds.c
index eaacacab1b46..afaebab7bc17 100644
--- a/drivers/gpu/drm/gma500/psb_intel_lvds.c
+++ b/drivers/gpu/drm/gma500/psb_intel_lvds.c
@@ -9,14 +9,13 @@
  */
 
 #include <linux/i2c.h>
-#include <drm/drmP.h>
+#include <linux/pm_runtime.h>
 
 #include "intel_bios.h"
+#include "power.h"
 #include "psb_drv.h"
 #include "psb_intel_drv.h"
 #include "psb_intel_reg.h"
-#include "power.h"
-#include <linux/pm_runtime.h>
 
 /*
  * LVDS I2C backlight control macros
diff --git a/drivers/gpu/drm/gma500/psb_intel_modes.c b/drivers/gpu/drm/gma500/psb_intel_modes.c
index 4831caedbed8..88653a40aeb5 100644
--- a/drivers/gpu/drm/gma500/psb_intel_modes.c
+++ b/drivers/gpu/drm/gma500/psb_intel_modes.c
@@ -6,7 +6,7 @@
  */
 
 #include <linux/i2c.h>
-#include <drm/drmP.h>
+
 #include "psb_intel_drv.h"
 
 /**
diff --git a/drivers/gpu/drm/gma500/psb_intel_sdvo.c b/drivers/gpu/drm/gma500/psb_intel_sdvo.c
index dd3cec0e3190..264d7ad004b4 100644
--- a/drivers/gpu/drm/gma500/psb_intel_sdvo.c
+++ b/drivers/gpu/drm/gma500/psb_intel_sdvo.c
@@ -25,19 +25,20 @@
  * Authors:
  *	Eric Anholt <eric@anholt.net>
  */
-#include <linux/module.h>
+
+#include <linux/delay.h>
 #include <linux/i2c.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
 #include <linux/slab.h>
-#include <linux/delay.h>
-#include <drm/drmP.h>
+
 #include <drm/drm_crtc.h>
 #include <drm/drm_edid.h>
-#include "psb_intel_drv.h"
-#include <drm/gma_drm.h>
+
 #include "psb_drv.h"
-#include "psb_intel_sdvo_regs.h"
+#include "psb_intel_drv.h"
 #include "psb_intel_reg.h"
-#include <linux/kernel.h>
+#include "psb_intel_sdvo_regs.h"
 
 #define SDVO_TMDS_MASK (SDVO_OUTPUT_TMDS0 | SDVO_OUTPUT_TMDS1)
 #define SDVO_RGB_MASK  (SDVO_OUTPUT_RGB0 | SDVO_OUTPUT_RGB1)
diff --git a/drivers/gpu/drm/gma500/psb_irq.c b/drivers/gpu/drm/gma500/psb_irq.c
index 5d48703bb2e2..e6265fb85626 100644
--- a/drivers/gpu/drm/gma500/psb_irq.c
+++ b/drivers/gpu/drm/gma500/psb_irq.c
@@ -8,13 +8,14 @@
  *
  **************************************************************************/
 
-#include <drm/drmP.h>
+#include <drm/drm_vblank.h>
+
+#include "mdfld_output.h"
+#include "power.h"
 #include "psb_drv.h"
-#include "psb_reg.h"
 #include "psb_intel_reg.h"
-#include "power.h"
 #include "psb_irq.h"
-#include "mdfld_output.h"
+#include "psb_reg.h"
 
 /*
  * inline functions
diff --git a/drivers/gpu/drm/gma500/psb_irq.h b/drivers/gpu/drm/gma500/psb_irq.h
index 604cba75528b..58fd502e3b9d 100644
--- a/drivers/gpu/drm/gma500/psb_irq.h
+++ b/drivers/gpu/drm/gma500/psb_irq.h
@@ -12,7 +12,7 @@
 #ifndef _PSB_IRQ_H_
 #define _PSB_IRQ_H_
 
-#include <drm/drmP.h>
+struct drm_device;
 
 bool sysirq_init(struct drm_device *dev);
 void sysirq_uninit(struct drm_device *dev);
diff --git a/drivers/gpu/drm/gma500/psb_lid.c b/drivers/gpu/drm/gma500/psb_lid.c
index 5a9cc1721e2d..97b0c52bfd8a 100644
--- a/drivers/gpu/drm/gma500/psb_lid.c
+++ b/drivers/gpu/drm/gma500/psb_lid.c
@@ -5,11 +5,11 @@
  * Authors: Thomas Hellstrom <thomas-at-tungstengraphics-dot-com>
  **************************************************************************/
 
-#include <drm/drmP.h>
+#include <linux/spinlock.h>
+
 #include "psb_drv.h"
-#include "psb_reg.h"
 #include "psb_intel_reg.h"
-#include <linux/spinlock.h>
+#include "psb_reg.h"
 
 static void psb_lid_timer_func(struct timer_list *t)
 {
diff --git a/drivers/gpu/drm/gma500/tc35876x-dsi-lvds.c b/drivers/gpu/drm/gma500/tc35876x-dsi-lvds.c
index 37c997e24b9e..7de3ce637c7f 100644
--- a/drivers/gpu/drm/gma500/tc35876x-dsi-lvds.c
+++ b/drivers/gpu/drm/gma500/tc35876x-dsi-lvds.c
@@ -22,15 +22,18 @@
  *
  */
 
-#include "mdfld_dsi_dpi.h"
-#include "mdfld_output.h"
-#include "mdfld_dsi_pkg_sender.h"
-#include "tc35876x-dsi-lvds.h"
-#include <linux/platform_data/tc35876x.h>
+#include <linux/delay.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/platform_data/tc35876x.h>
+
 #include <asm/intel_scu_ipc.h>
 
+#include "mdfld_dsi_dpi.h"
+#include "mdfld_dsi_pkg_sender.h"
+#include "mdfld_output.h"
+#include "tc35876x-dsi-lvds.h"
+
 static struct i2c_client *tc35876x_client;
 static struct i2c_client *cmi_lcd_i2c_client;
 
diff --git a/drivers/gpu/drm/hisilicon/hibmc/Kconfig b/drivers/gpu/drm/hisilicon/hibmc/Kconfig
index 7cf8d38da8be..f20eedf0073a 100644
--- a/drivers/gpu/drm/hisilicon/hibmc/Kconfig
+++ b/drivers/gpu/drm/hisilicon/hibmc/Kconfig
@@ -3,7 +3,7 @@ config DRM_HISI_HIBMC
 	tristate "DRM Support for Hisilicon Hibmc"
 	depends on DRM && PCI && MMU
 	select DRM_KMS_HELPER
-	select DRM_TTM
+	select DRM_VRAM_HELPER
 
 	help
 	  Choose this option if you have a Hisilicon Hibmc soc chipset.
diff --git a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_de.c b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_de.c
index 0a753d397d7d..08657a3627f3 100644
--- a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_de.c
+++ b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_de.c
@@ -91,27 +91,26 @@ static void hibmc_plane_atomic_update(struct drm_plane *plane,
 	struct drm_plane_state	*state	= plane->state;
 	u32 reg;
 	int ret;
-	u64 gpu_addr = 0;
+	s64 gpu_addr = 0;
 	unsigned int line_l;
 	struct hibmc_drm_private *priv = plane->dev->dev_private;
 	struct hibmc_framebuffer *hibmc_fb;
-	struct hibmc_bo *bo;
+	struct drm_gem_vram_object *gbo;
 
 	if (!state->fb)
 		return;
 
 	hibmc_fb = to_hibmc_framebuffer(state->fb);
-	bo = gem_to_hibmc_bo(hibmc_fb->obj);
-	ret = ttm_bo_reserve(&bo->bo, true, false, NULL);
+	gbo = drm_gem_vram_of_gem(hibmc_fb->obj);
+
+	ret = drm_gem_vram_pin(gbo, DRM_GEM_VRAM_PL_FLAG_VRAM);
 	if (ret) {
-		DRM_ERROR("failed to reserve ttm_bo: %d", ret);
+		DRM_ERROR("failed to pin bo: %d", ret);
 		return;
 	}
-
-	ret = hibmc_bo_pin(bo, TTM_PL_FLAG_VRAM, &gpu_addr);
-	ttm_bo_unreserve(&bo->bo);
-	if (ret) {
-		DRM_ERROR("failed to pin hibmc_bo: %d", ret);
+	gpu_addr = drm_gem_vram_offset(gbo);
+	if (gpu_addr < 0) {
+		drm_gem_vram_unpin(gbo);
 		return;
 	}
 
diff --git a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c
index d14ce808d1d0..ce89e56937b0 100644
--- a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c
+++ b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c
@@ -22,14 +22,7 @@
 
 static const struct file_operations hibmc_fops = {
 	.owner		= THIS_MODULE,
-	.open		= drm_open,
-	.release	= drm_release,
-	.unlocked_ioctl	= drm_ioctl,
-	.compat_ioctl	= drm_compat_ioctl,
-	.mmap		= hibmc_mmap,
-	.poll		= drm_poll,
-	.read		= drm_read,
-	.llseek		= no_llseek,
+	DRM_VRAM_MM_FILE_OPERATIONS
 };
 
 static irqreturn_t hibmc_drm_interrupt(int irq, void *arg)
@@ -58,9 +51,10 @@ static struct drm_driver hibmc_driver = {
 	.desc			= "hibmc drm driver",
 	.major			= 1,
 	.minor			= 0,
-	.gem_free_object_unlocked = hibmc_gem_free_object,
+	.gem_free_object_unlocked =
+		drm_gem_vram_driver_gem_free_object_unlocked,
 	.dumb_create            = hibmc_dumb_create,
-	.dumb_map_offset        = hibmc_dumb_mmap_offset,
+	.dumb_map_offset        = drm_gem_vram_driver_dumb_mmap_offset,
 	.irq_handler		= hibmc_drm_interrupt,
 };
 
diff --git a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.h b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.h
index 740c64332837..69348bf54a84 100644
--- a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.h
+++ b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.h
@@ -18,7 +18,8 @@
 #include <drm/drm_atomic.h>
 #include <drm/drm_fb_helper.h>
 #include <drm/drm_gem.h>
-#include <drm/ttm/ttm_bo_driver.h>
+#include <drm/drm_gem_vram_helper.h>
+#include <drm/drm_vram_mm_helper.h>
 
 struct hibmc_framebuffer {
 	struct drm_framebuffer fb;
@@ -43,36 +44,12 @@ struct hibmc_drm_private {
 	struct drm_device  *dev;
 	bool mode_config_initialized;
 
-	/* ttm */
-	struct ttm_bo_device bdev;
-	bool initialized;
-
 	/* fbdev */
 	struct hibmc_fbdev *fbdev;
-	bool mm_inited;
 };
 
 #define to_hibmc_framebuffer(x) container_of(x, struct hibmc_framebuffer, fb)
 
-struct hibmc_bo {
-	struct ttm_buffer_object bo;
-	struct ttm_placement placement;
-	struct ttm_bo_kmap_obj kmap;
-	struct drm_gem_object gem;
-	struct ttm_place placements[3];
-	int pin_count;
-};
-
-static inline struct hibmc_bo *hibmc_bo(struct ttm_buffer_object *bo)
-{
-	return container_of(bo, struct hibmc_bo, bo);
-}
-
-static inline struct hibmc_bo *gem_to_hibmc_bo(struct drm_gem_object *gem)
-{
-	return container_of(gem, struct hibmc_bo, gem);
-}
-
 void hibmc_set_power_mode(struct hibmc_drm_private *priv,
 			  unsigned int power_mode);
 void hibmc_set_current_gate(struct hibmc_drm_private *priv,
@@ -92,14 +69,8 @@ hibmc_framebuffer_init(struct drm_device *dev,
 
 int hibmc_mm_init(struct hibmc_drm_private *hibmc);
 void hibmc_mm_fini(struct hibmc_drm_private *hibmc);
-int hibmc_bo_pin(struct hibmc_bo *bo, u32 pl_flag, u64 *gpu_addr);
-int hibmc_bo_unpin(struct hibmc_bo *bo);
-void hibmc_gem_free_object(struct drm_gem_object *obj);
 int hibmc_dumb_create(struct drm_file *file, struct drm_device *dev,
 		      struct drm_mode_create_dumb *args);
-int hibmc_dumb_mmap_offset(struct drm_file *file, struct drm_device *dev,
-			   u32 handle, u64 *offset);
-int hibmc_mmap(struct file *filp, struct vm_area_struct *vma);
 
 extern const struct drm_mode_config_funcs hibmc_mode_funcs;
 
diff --git a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_fbdev.c b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_fbdev.c
index 498545417717..af1ea4cceffa 100644
--- a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_fbdev.c
+++ b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_fbdev.c
@@ -58,10 +58,10 @@ static int hibmc_drm_fb_create(struct drm_fb_helper *helper,
 	struct drm_mode_fb_cmd2 mode_cmd;
 	struct drm_gem_object *gobj = NULL;
 	int ret = 0;
-	int ret1;
 	size_t size;
 	unsigned int bytes_per_pixel;
-	struct hibmc_bo *bo = NULL;
+	struct drm_gem_vram_object *gbo = NULL;
+	void *base;
 
 	DRM_DEBUG_DRIVER("surface width(%d), height(%d) and bpp(%d)\n",
 			 sizes->surface_width, sizes->surface_height,
@@ -83,26 +83,20 @@ static int hibmc_drm_fb_create(struct drm_fb_helper *helper,
 		return -ENOMEM;
 	}
 
-	bo = gem_to_hibmc_bo(gobj);
+	gbo = drm_gem_vram_of_gem(gobj);
 
-	ret = ttm_bo_reserve(&bo->bo, true, false, NULL);
-	if (ret) {
-		DRM_ERROR("failed to reserve ttm_bo: %d\n", ret);
-		goto out_unref_gem;
-	}
-
-	ret = hibmc_bo_pin(bo, TTM_PL_FLAG_VRAM, NULL);
+	ret = drm_gem_vram_pin(gbo, DRM_GEM_VRAM_PL_FLAG_VRAM);
 	if (ret) {
 		DRM_ERROR("failed to pin fbcon: %d\n", ret);
-		goto out_unreserve_ttm_bo;
+		goto out_unref_gem;
 	}
 
-	ret = ttm_bo_kmap(&bo->bo, 0, bo->bo.num_pages, &bo->kmap);
-	if (ret) {
+	base = drm_gem_vram_kmap(gbo, true, NULL);
+	if (IS_ERR(base)) {
+		ret = PTR_ERR(base);
 		DRM_ERROR("failed to kmap fbcon: %d\n", ret);
 		goto out_unpin_bo;
 	}
-	ttm_bo_unreserve(&bo->bo);
 
 	info = drm_fb_helper_alloc_fbi(helper);
 	if (IS_ERR(info)) {
@@ -126,24 +120,17 @@ static int hibmc_drm_fb_create(struct drm_fb_helper *helper,
 
 	drm_fb_helper_fill_info(info, &priv->fbdev->helper, sizes);
 
-	info->screen_base = bo->kmap.virtual;
+	info->screen_base = base;
 	info->screen_size = size;
 
-	info->fix.smem_start = bo->bo.mem.bus.offset + bo->bo.mem.bus.base;
+	info->fix.smem_start = gbo->bo.mem.bus.offset + gbo->bo.mem.bus.base;
 	info->fix.smem_len = size;
 	return 0;
 
 out_release_fbi:
-	ret1 = ttm_bo_reserve(&bo->bo, true, false, NULL);
-	if (ret1) {
-		DRM_ERROR("failed to rsv ttm_bo when release fbi: %d\n", ret1);
-		goto out_unref_gem;
-	}
-	ttm_bo_kunmap(&bo->kmap);
+	drm_gem_vram_kunmap(gbo);
 out_unpin_bo:
-	hibmc_bo_unpin(bo);
-out_unreserve_ttm_bo:
-	ttm_bo_unreserve(&bo->bo);
+	drm_gem_vram_unpin(gbo);
 out_unref_gem:
 	drm_gem_object_put_unlocked(gobj);
 
diff --git a/drivers/gpu/drm/hisilicon/hibmc/hibmc_ttm.c b/drivers/gpu/drm/hisilicon/hibmc/hibmc_ttm.c
index 1ee339e0e1db..5d4a03cd7d50 100644
--- a/drivers/gpu/drm/hisilicon/hibmc/hibmc_ttm.c
+++ b/drivers/gpu/drm/hisilicon/hibmc/hibmc_ttm.c
@@ -12,335 +12,55 @@
  */
 
 #include <drm/drm_atomic_helper.h>
-#include <drm/ttm/ttm_page_alloc.h>
 
 #include "hibmc_drm_drv.h"
 
-static inline struct hibmc_drm_private *
-hibmc_bdev(struct ttm_bo_device *bd)
-{
-	return container_of(bd, struct hibmc_drm_private, bdev);
-}
-
-static void hibmc_bo_ttm_destroy(struct ttm_buffer_object *tbo)
-{
-	struct hibmc_bo *bo = container_of(tbo, struct hibmc_bo, bo);
-
-	drm_gem_object_release(&bo->gem);
-	kfree(bo);
-}
-
-static bool hibmc_ttm_bo_is_hibmc_bo(struct ttm_buffer_object *bo)
-{
-	return bo->destroy == &hibmc_bo_ttm_destroy;
-}
-
-static int
-hibmc_bo_init_mem_type(struct ttm_bo_device *bdev, u32 type,
-		       struct ttm_mem_type_manager *man)
-{
-	switch (type) {
-	case TTM_PL_SYSTEM:
-		man->flags = TTM_MEMTYPE_FLAG_MAPPABLE;
-		man->available_caching = TTM_PL_MASK_CACHING;
-		man->default_caching = TTM_PL_FLAG_CACHED;
-		break;
-	case TTM_PL_VRAM:
-		man->func = &ttm_bo_manager_func;
-		man->flags = TTM_MEMTYPE_FLAG_FIXED |
-			TTM_MEMTYPE_FLAG_MAPPABLE;
-		man->available_caching = TTM_PL_FLAG_UNCACHED |
-			TTM_PL_FLAG_WC;
-		man->default_caching = TTM_PL_FLAG_WC;
-		break;
-	default:
-		DRM_ERROR("unsupported memory type %u\n", type);
-		return -EINVAL;
-	}
-	return 0;
-}
-
-void hibmc_ttm_placement(struct hibmc_bo *bo, int domain)
-{
-	u32 count = 0;
-	u32 i;
-
-	bo->placement.placement = bo->placements;
-	bo->placement.busy_placement = bo->placements;
-	if (domain & TTM_PL_FLAG_VRAM)
-		bo->placements[count++].flags = TTM_PL_FLAG_WC |
-			TTM_PL_FLAG_UNCACHED | TTM_PL_FLAG_VRAM;
-	if (domain & TTM_PL_FLAG_SYSTEM)
-		bo->placements[count++].flags = TTM_PL_MASK_CACHING |
-			TTM_PL_FLAG_SYSTEM;
-	if (!count)
-		bo->placements[count++].flags = TTM_PL_MASK_CACHING |
-			TTM_PL_FLAG_SYSTEM;
-
-	bo->placement.num_placement = count;
-	bo->placement.num_busy_placement = count;
-	for (i = 0; i < count; i++) {
-		bo->placements[i].fpfn = 0;
-		bo->placements[i].lpfn = 0;
-	}
-}
-
-static void
-hibmc_bo_evict_flags(struct ttm_buffer_object *bo, struct ttm_placement *pl)
-{
-	struct hibmc_bo *hibmcbo = hibmc_bo(bo);
-
-	if (!hibmc_ttm_bo_is_hibmc_bo(bo))
-		return;
-
-	hibmc_ttm_placement(hibmcbo, TTM_PL_FLAG_SYSTEM);
-	*pl = hibmcbo->placement;
-}
-
-static int hibmc_bo_verify_access(struct ttm_buffer_object *bo,
-				  struct file *filp)
-{
-	struct hibmc_bo *hibmcbo = hibmc_bo(bo);
-
-	return drm_vma_node_verify_access(&hibmcbo->gem.vma_node,
-					  filp->private_data);
-}
-
-static int hibmc_ttm_io_mem_reserve(struct ttm_bo_device *bdev,
-				    struct ttm_mem_reg *mem)
-{
-	struct ttm_mem_type_manager *man = &bdev->man[mem->mem_type];
-	struct hibmc_drm_private *hibmc = hibmc_bdev(bdev);
-
-	mem->bus.addr = NULL;
-	mem->bus.offset = 0;
-	mem->bus.size = mem->num_pages << PAGE_SHIFT;
-	mem->bus.base = 0;
-	mem->bus.is_iomem = false;
-	if (!(man->flags & TTM_MEMTYPE_FLAG_MAPPABLE))
-		return -EINVAL;
-	switch (mem->mem_type) {
-	case TTM_PL_SYSTEM:
-		/* system memory */
-		return 0;
-	case TTM_PL_VRAM:
-		mem->bus.offset = mem->start << PAGE_SHIFT;
-		mem->bus.base = pci_resource_start(hibmc->dev->pdev, 0);
-		mem->bus.is_iomem = true;
-		break;
-	default:
-		return -EINVAL;
-	}
-	return 0;
-}
-
-static void hibmc_ttm_backend_destroy(struct ttm_tt *tt)
-{
-	ttm_tt_fini(tt);
-	kfree(tt);
-}
-
-static struct ttm_backend_func hibmc_tt_backend_func = {
-	.destroy = &hibmc_ttm_backend_destroy,
-};
-
-static struct ttm_tt *hibmc_ttm_tt_create(struct ttm_buffer_object *bo,
-					  u32 page_flags)
-{
-	struct ttm_tt *tt;
-	int ret;
-
-	tt = kzalloc(sizeof(*tt), GFP_KERNEL);
-	if (!tt) {
-		DRM_ERROR("failed to allocate ttm_tt\n");
-		return NULL;
-	}
-	tt->func = &hibmc_tt_backend_func;
-	ret = ttm_tt_init(tt, bo, page_flags);
-	if (ret) {
-		DRM_ERROR("failed to initialize ttm_tt: %d\n", ret);
-		kfree(tt);
-		return NULL;
-	}
-	return tt;
-}
-
-struct ttm_bo_driver hibmc_bo_driver = {
-	.ttm_tt_create		= hibmc_ttm_tt_create,
-	.init_mem_type		= hibmc_bo_init_mem_type,
-	.evict_flags		= hibmc_bo_evict_flags,
-	.move			= NULL,
-	.verify_access		= hibmc_bo_verify_access,
-	.io_mem_reserve		= &hibmc_ttm_io_mem_reserve,
-	.io_mem_free		= NULL,
-};
-
 int hibmc_mm_init(struct hibmc_drm_private *hibmc)
 {
+	struct drm_vram_mm *vmm;
 	int ret;
 	struct drm_device *dev = hibmc->dev;
-	struct ttm_bo_device *bdev = &hibmc->bdev;
 
-	ret = ttm_bo_device_init(&hibmc->bdev,
-				 &hibmc_bo_driver,
-				 dev->anon_inode->i_mapping,
-				 true);
-	if (ret) {
-		DRM_ERROR("error initializing bo driver: %d\n", ret);
+	vmm = drm_vram_helper_alloc_mm(dev,
+				       pci_resource_start(dev->pdev, 0),
+				       hibmc->fb_size, &drm_gem_vram_mm_funcs);
+	if (IS_ERR(vmm)) {
+		ret = PTR_ERR(vmm);
+		DRM_ERROR("Error initializing VRAM MM; %d\n", ret);
 		return ret;
 	}
 
-	ret = ttm_bo_init_mm(bdev, TTM_PL_VRAM,
-			     hibmc->fb_size >> PAGE_SHIFT);
-	if (ret) {
-		DRM_ERROR("failed ttm VRAM init: %d\n", ret);
-		return ret;
-	}
-
-	hibmc->mm_inited = true;
 	return 0;
 }
 
 void hibmc_mm_fini(struct hibmc_drm_private *hibmc)
 {
-	if (!hibmc->mm_inited)
-		return;
-
-	ttm_bo_device_release(&hibmc->bdev);
-	hibmc->mm_inited = false;
-}
-
-static void hibmc_bo_unref(struct hibmc_bo **bo)
-{
-	struct ttm_buffer_object *tbo;
-
-	if ((*bo) == NULL)
+	if (!hibmc->dev->vram_mm)
 		return;
 
-	tbo = &((*bo)->bo);
-	ttm_bo_put(tbo);
-	*bo = NULL;
-}
-
-int hibmc_bo_create(struct drm_device *dev, int size, int align,
-		    u32 flags, struct hibmc_bo **phibmcbo)
-{
-	struct hibmc_drm_private *hibmc = dev->dev_private;
-	struct hibmc_bo *hibmcbo;
-	size_t acc_size;
-	int ret;
-
-	hibmcbo = kzalloc(sizeof(*hibmcbo), GFP_KERNEL);
-	if (!hibmcbo) {
-		DRM_ERROR("failed to allocate hibmcbo\n");
-		return -ENOMEM;
-	}
-	ret = drm_gem_object_init(dev, &hibmcbo->gem, size);
-	if (ret) {
-		DRM_ERROR("failed to initialize drm gem object: %d\n", ret);
-		kfree(hibmcbo);
-		return ret;
-	}
-
-	hibmcbo->bo.bdev = &hibmc->bdev;
-
-	hibmc_ttm_placement(hibmcbo, TTM_PL_FLAG_VRAM | TTM_PL_FLAG_SYSTEM);
-
-	acc_size = ttm_bo_dma_acc_size(&hibmc->bdev, size,
-				       sizeof(struct hibmc_bo));
-
-	ret = ttm_bo_init(&hibmc->bdev, &hibmcbo->bo, size,
-			  ttm_bo_type_device, &hibmcbo->placement,
-			  align >> PAGE_SHIFT, false, acc_size,
-			  NULL, NULL, hibmc_bo_ttm_destroy);
-	if (ret) {
-		hibmc_bo_unref(&hibmcbo);
-		DRM_ERROR("failed to initialize ttm_bo: %d\n", ret);
-		return ret;
-	}
-
-	*phibmcbo = hibmcbo;
-	return 0;
-}
-
-int hibmc_bo_pin(struct hibmc_bo *bo, u32 pl_flag, u64 *gpu_addr)
-{
-	struct ttm_operation_ctx ctx = { false, false };
-	int i, ret;
-
-	if (bo->pin_count) {
-		bo->pin_count++;
-		if (gpu_addr)
-			*gpu_addr = bo->bo.offset;
-		return 0;
-	}
-
-	hibmc_ttm_placement(bo, pl_flag);
-	for (i = 0; i < bo->placement.num_placement; i++)
-		bo->placements[i].flags |= TTM_PL_FLAG_NO_EVICT;
-	ret = ttm_bo_validate(&bo->bo, &bo->placement, &ctx);
-	if (ret)
-		return ret;
-
-	bo->pin_count = 1;
-	if (gpu_addr)
-		*gpu_addr = bo->bo.offset;
-	return 0;
-}
-
-int hibmc_bo_unpin(struct hibmc_bo *bo)
-{
-	struct ttm_operation_ctx ctx = { false, false };
-	int i, ret;
-
-	if (!bo->pin_count) {
-		DRM_ERROR("unpin bad %p\n", bo);
-		return 0;
-	}
-	bo->pin_count--;
-	if (bo->pin_count)
-		return 0;
-
-	for (i = 0; i < bo->placement.num_placement ; i++)
-		bo->placements[i].flags &= ~TTM_PL_FLAG_NO_EVICT;
-	ret = ttm_bo_validate(&bo->bo, &bo->placement, &ctx);
-	if (ret) {
-		DRM_ERROR("validate failed for unpin: %d\n", ret);
-		return ret;
-	}
-
-	return 0;
-}
-
-int hibmc_mmap(struct file *filp, struct vm_area_struct *vma)
-{
-	struct drm_file *file_priv = filp->private_data;
-	struct hibmc_drm_private *hibmc = file_priv->minor->dev->dev_private;
-
-	return ttm_bo_mmap(filp, vma, &hibmc->bdev);
+	drm_vram_helper_release_mm(hibmc->dev);
 }
 
 int hibmc_gem_create(struct drm_device *dev, u32 size, bool iskernel,
 		     struct drm_gem_object **obj)
 {
-	struct hibmc_bo *hibmcbo;
+	struct drm_gem_vram_object *gbo;
 	int ret;
 
 	*obj = NULL;
 
-	size = PAGE_ALIGN(size);
-	if (size == 0) {
-		DRM_ERROR("error: zero size\n");
+	size = roundup(size, PAGE_SIZE);
+	if (size == 0)
 		return -EINVAL;
-	}
 
-	ret = hibmc_bo_create(dev, size, 0, 0, &hibmcbo);
-	if (ret) {
+	gbo = drm_gem_vram_create(dev, &dev->vram_mm->bdev, size, 0, false);
+	if (IS_ERR(gbo)) {
+		ret = PTR_ERR(gbo);
 		if (ret != -ERESTARTSYS)
 			DRM_ERROR("failed to allocate GEM object: %d\n", ret);
 		return ret;
 	}
-	*obj = &hibmcbo->gem;
+	*obj = &gbo->gem;
 	return 0;
 }
 
@@ -372,35 +92,6 @@ int hibmc_dumb_create(struct drm_file *file, struct drm_device *dev,
 	return 0;
 }
 
-void hibmc_gem_free_object(struct drm_gem_object *obj)
-{
-	struct hibmc_bo *hibmcbo = gem_to_hibmc_bo(obj);
-
-	hibmc_bo_unref(&hibmcbo);
-}
-
-static u64 hibmc_bo_mmap_offset(struct hibmc_bo *bo)
-{
-	return drm_vma_node_offset_addr(&bo->bo.vma_node);
-}
-
-int hibmc_dumb_mmap_offset(struct drm_file *file, struct drm_device *dev,
-			   u32 handle, u64 *offset)
-{
-	struct drm_gem_object *obj;
-	struct hibmc_bo *bo;
-
-	obj = drm_gem_object_lookup(file, handle);
-	if (!obj)
-		return -ENOENT;
-
-	bo = gem_to_hibmc_bo(obj);
-	*offset = hibmc_bo_mmap_offset(bo);
-
-	drm_gem_object_put_unlocked(obj);
-	return 0;
-}
-
 static void hibmc_user_framebuffer_destroy(struct drm_framebuffer *fb)
 {
 	struct hibmc_framebuffer *hibmc_fb = to_hibmc_framebuffer(fb);
diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig
index 255f224db64b..0d21402945ab 100644
--- a/drivers/gpu/drm/i915/Kconfig
+++ b/drivers/gpu/drm/i915/Kconfig
@@ -45,19 +45,28 @@ config DRM_I915
 config DRM_I915_ALPHA_SUPPORT
 	bool "Enable alpha quality support for new Intel hardware by default"
 	depends on DRM_I915
-	default n
 	help
-	  Choose this option if you have new Intel hardware and want to enable
-	  the alpha quality i915 driver support for the hardware in this kernel
-	  version. You can also enable the support at runtime using the module
-	  parameter i915.alpha_support=1; this option changes the default for
-	  that module parameter.
+	  This option is deprecated. Use DRM_I915_FORCE_PROBE option instead.
 
-	  It is recommended to upgrade to a kernel version with proper support
-	  as soon as it is available. Generally fixes for platforms with alpha
-	  support are not backported to older kernels.
+config DRM_I915_FORCE_PROBE
+	string "Force probe driver for selected new Intel hardware"
+	depends on DRM_I915
+	default "*" if DRM_I915_ALPHA_SUPPORT
+	help
+	  This is the default value for the i915.force_probe module
+	  parameter. Using the module parameter overrides this option.
 
-	  If in doubt, say "N".
+	  Force probe the driver for new Intel graphics devices that are
+	  recognized but not properly supported by this kernel version. It is
+	  recommended to upgrade to a kernel version with proper support as soon
+	  as it is available.
+
+	  Use "" to disable force probe. If in doubt, use this.
+
+	  Use "<pci-id>[,<pci-id>,...]" to force probe the driver for listed
+	  devices. For example, "4500" or "4500,4571".
+
+	  Use "*" to force probe the driver for all known devices.
 
 config DRM_I915_CAPTURE_ERROR
 	bool "Enable capturing GPU state following a hang"
@@ -133,3 +142,9 @@ depends on DRM_I915
 depends on EXPERT
 source "drivers/gpu/drm/i915/Kconfig.debug"
 endmenu
+
+menu "drm/i915 Profile Guided Optimisation"
+	visible if EXPERT
+	depends on DRM_I915
+	source "drivers/gpu/drm/i915/Kconfig.profile"
+endmenu
diff --git a/drivers/gpu/drm/i915/Kconfig.debug b/drivers/gpu/drm/i915/Kconfig.debug
index 04b686d2c2d0..8d922bb4d953 100644
--- a/drivers/gpu/drm/i915/Kconfig.debug
+++ b/drivers/gpu/drm/i915/Kconfig.debug
@@ -21,6 +21,7 @@ config DRM_I915_DEBUG
         depends on DRM_I915
         select DEBUG_FS
         select PREEMPT_COUNT
+        select REFCOUNT_FULL
         select I2C_CHARDEV
         select STACKDEPOT
         select DRM_DP_AUX_CHARDEV
@@ -32,6 +33,7 @@ config DRM_I915_DEBUG
 	select DRM_I915_SW_FENCE_DEBUG_OBJECTS
 	select DRM_I915_SELFTEST
 	select DRM_I915_DEBUG_RUNTIME_PM
+	select DRM_I915_DEBUG_MMIO
         default n
         help
           Choose this option to turn on extra driver debugging that may affect
@@ -41,6 +43,19 @@ config DRM_I915_DEBUG
 
           If in doubt, say "N".
 
+config DRM_I915_DEBUG_MMIO
+	bool "Always insert extra checks around mmio access by default"
+	default n
+	help
+	  By default, always enables the extra sanity checks (extra register
+	  reads) around every mmio (register) access that will slow the system
+	  down. This sets the default value of i915.mmio_debug to -1 and can
+	  be overridden at module load.
+
+	  Recommended for driver developers only.
+
+	  If in doubt, say "N".
+
 config DRM_I915_DEBUG_GEM
         bool "Insert extra checks into the GEM internals"
         default n
diff --git a/drivers/gpu/drm/i915/Kconfig.profile b/drivers/gpu/drm/i915/Kconfig.profile
new file mode 100644
index 000000000000..48df8889a88a
--- /dev/null
+++ b/drivers/gpu/drm/i915/Kconfig.profile
@@ -0,0 +1,27 @@
+config DRM_I915_USERFAULT_AUTOSUSPEND
+	int "Runtime autosuspend delay for userspace GGTT mmaps (ms)"
+	default 250 # milliseconds
+	help
+	  On runtime suspend, as we suspend the device, we have to revoke
+	  userspace GGTT mmaps and force userspace to take a pagefault on
+	  their next access. The revocation and subsequent recreation of
+	  the GGTT mmap can be very slow and so we impose a small hysteris
+	  that complements the runtime-pm autosuspend and provides a lower
+	  floor on the autosuspend delay.
+
+	  May be 0 to disable the extra delay and solely use the device level
+	  runtime pm autosuspend delay tunable.
+
+config DRM_I915_SPIN_REQUEST
+	int "Busywait for request completion (us)"
+	default 5 # microseconds
+	help
+	  Before sleeping waiting for a request (GPU operation) to complete,
+	  we may spend some time polling for its completion. As the IRQ may
+	  take a non-negligible time to setup, we do a short spin first to
+	  check if the request will complete in the time it would have taken
+	  us to enable the interrupt.
+
+	  May be 0 to disable the initial spin. In practice, we estimate
+	  the cost of enabling the interrupt (if currently disabled) to be
+	  a few microseconds.
diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index fbcb0904f4a8..91355c2ea8a5 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -35,52 +35,93 @@ subdir-ccflags-y += \
 # Extra header tests
 include $(src)/Makefile.header-test
 
+subdir-ccflags-y += -I$(src)
+
 # Please keep these build lists sorted!
 
 # core driver code
 i915-y += i915_drv.o \
 	  i915_irq.o \
-	  i915_memcpy.o \
-	  i915_mm.o \
 	  i915_params.o \
 	  i915_pci.o \
-	  i915_reset.o \
+	  i915_scatterlist.o \
 	  i915_suspend.o \
-	  i915_sw_fence.o \
-	  i915_syncmap.o \
 	  i915_sysfs.o \
-	  i915_user_extensions.o \
 	  intel_csr.o \
 	  intel_device_info.o \
 	  intel_pm.o \
 	  intel_runtime_pm.o \
-	  intel_workarounds.o
+	  intel_sideband.o \
+	  intel_uncore.o \
+	  intel_wakeref.o
+
+# core library code
+i915-y += \
+	i915_memcpy.o \
+	i915_mm.o \
+	i915_sw_fence.o \
+	i915_syncmap.o \
+	i915_user_extensions.o
 
 i915-$(CONFIG_COMPAT)   += i915_ioc32.o
-i915-$(CONFIG_DEBUG_FS) += i915_debugfs.o intel_pipe_crc.o
+i915-$(CONFIG_DEBUG_FS) += i915_debugfs.o display/intel_pipe_crc.o
 i915-$(CONFIG_PERF_EVENTS) += i915_pmu.o
 
-# GEM code
+# "Graphics Technology" (aka we talk to the gpu)
+obj-y += gt/
+gt-y += \
+	gt/intel_breadcrumbs.o \
+	gt/intel_context.o \
+	gt/intel_engine_cs.o \
+	gt/intel_engine_pm.o \
+	gt/intel_gt_pm.o \
+	gt/intel_hangcheck.o \
+	gt/intel_lrc.o \
+	gt/intel_reset.o \
+	gt/intel_ringbuffer.o \
+	gt/intel_mocs.o \
+	gt/intel_sseu.o \
+	gt/intel_workarounds.o
+gt-$(CONFIG_DRM_I915_SELFTEST) += \
+	gt/mock_engine.o
+i915-y += $(gt-y)
+
+# GEM (Graphics Execution Management) code
+obj-y += gem/
+gem-y += \
+	gem/i915_gem_busy.o \
+	gem/i915_gem_clflush.o \
+	gem/i915_gem_client_blt.o \
+	gem/i915_gem_context.o \
+	gem/i915_gem_dmabuf.o \
+	gem/i915_gem_domain.o \
+	gem/i915_gem_execbuffer.o \
+	gem/i915_gem_fence.o \
+	gem/i915_gem_internal.o \
+	gem/i915_gem_object.o \
+	gem/i915_gem_object_blt.o \
+	gem/i915_gem_mman.o \
+	gem/i915_gem_pages.o \
+	gem/i915_gem_phys.o \
+	gem/i915_gem_pm.o \
+	gem/i915_gem_shmem.o \
+	gem/i915_gem_shrinker.o \
+	gem/i915_gem_stolen.o \
+	gem/i915_gem_throttle.o \
+	gem/i915_gem_tiling.o \
+	gem/i915_gem_userptr.o \
+	gem/i915_gem_wait.o \
+	gem/i915_gemfs.o
 i915-y += \
+	  $(gem-y) \
 	  i915_active.o \
 	  i915_cmd_parser.o \
 	  i915_gem_batch_pool.o \
-	  i915_gem_clflush.o \
-	  i915_gem_context.o \
-	  i915_gem_dmabuf.o \
 	  i915_gem_evict.o \
-	  i915_gem_execbuffer.o \
 	  i915_gem_fence_reg.o \
 	  i915_gem_gtt.o \
-	  i915_gem_internal.o \
 	  i915_gem.o \
-	  i915_gem_object.o \
 	  i915_gem_render_state.o \
-	  i915_gem_shrinker.o \
-	  i915_gem_stolen.o \
-	  i915_gem_tiling.o \
-	  i915_gem_userptr.o \
-	  i915_gemfs.o \
 	  i915_globals.o \
 	  i915_query.o \
 	  i915_request.o \
@@ -88,14 +129,6 @@ i915-y += \
 	  i915_timeline.o \
 	  i915_trace_points.o \
 	  i915_vma.o \
-	  intel_breadcrumbs.o \
-	  intel_context.o \
-	  intel_engine_cs.o \
-	  intel_hangcheck.o \
-	  intel_lrc.o \
-	  intel_mocs.o \
-	  intel_ringbuffer.o \
-	  intel_uncore.o \
 	  intel_wopcm.o
 
 # general-purpose microcontroller (GuC) support
@@ -117,62 +150,71 @@ i915-y += intel_renderstate_gen6.o \
 	  intel_renderstate_gen9.o
 
 # modesetting core code
-i915-y += intel_audio.o \
-	  intel_atomic.o \
-	  intel_atomic_plane.o \
-	  intel_bios.o \
-	  intel_cdclk.o \
-	  intel_color.o \
-	  intel_combo_phy.o \
-	  intel_connector.o \
-	  intel_display.o \
-	  intel_dpio_phy.o \
-	  intel_dpll_mgr.o \
-	  intel_fbc.o \
-	  intel_fifo_underrun.o \
-	  intel_frontbuffer.o \
-	  intel_hdcp.o \
-	  intel_hotplug.o \
-	  intel_overlay.o \
-	  intel_psr.o \
-	  intel_quirks.o \
-	  intel_sideband.o \
-	  intel_sprite.o
-i915-$(CONFIG_ACPI)		+= intel_acpi.o intel_opregion.o
-i915-$(CONFIG_DRM_FBDEV_EMULATION)	+= intel_fbdev.o
+obj-y += display/
+i915-y += \
+	display/intel_atomic.o \
+	display/intel_atomic_plane.o \
+	display/intel_audio.o \
+	display/intel_bios.o \
+	display/intel_bw.o \
+	display/intel_cdclk.o \
+	display/intel_color.o \
+	display/intel_combo_phy.o \
+	display/intel_connector.o \
+	display/intel_display.o \
+	display/intel_display_power.o \
+	display/intel_dpio_phy.o \
+	display/intel_dpll_mgr.o \
+	display/intel_fbc.o \
+	display/intel_fifo_underrun.o \
+	display/intel_frontbuffer.o \
+	display/intel_hdcp.o \
+	display/intel_hotplug.o \
+	display/intel_lpe_audio.o \
+	display/intel_overlay.o \
+	display/intel_psr.o \
+	display/intel_quirks.o \
+	display/intel_sprite.o
+i915-$(CONFIG_ACPI) += \
+	display/intel_acpi.o \
+	display/intel_opregion.o
+i915-$(CONFIG_DRM_FBDEV_EMULATION) += \
+	display/intel_fbdev.o
 
 # modesetting output/encoder code
-i915-y += dvo_ch7017.o \
-	  dvo_ch7xxx.o \
-	  dvo_ivch.o \
-	  dvo_ns2501.o \
-	  dvo_sil164.o \
-	  dvo_tfp410.o \
-	  icl_dsi.o \
-	  intel_crt.o \
-	  intel_ddi.o \
-	  intel_dp_aux_backlight.o \
-	  intel_dp_link_training.o \
-	  intel_dp_mst.o \
-	  intel_dp.o \
-	  intel_dsi.o \
-	  intel_dsi_dcs_backlight.o \
-	  intel_dsi_vbt.o \
-	  intel_dvo.o \
-	  intel_hdmi.o \
-	  intel_i2c.o \
-	  intel_lspcon.o \
-	  intel_lvds.o \
-	  intel_panel.o \
-	  intel_sdvo.o \
-	  intel_tv.o \
-	  vlv_dsi.o \
-	  vlv_dsi_pll.o \
-	  intel_vdsc.o
+i915-y += \
+	display/dvo_ch7017.o \
+	display/dvo_ch7xxx.o \
+	display/dvo_ivch.o \
+	display/dvo_ns2501.o \
+	display/dvo_sil164.o \
+	display/dvo_tfp410.o \
+	display/icl_dsi.o \
+	display/intel_crt.o \
+	display/intel_ddi.o \
+	display/intel_dp.o \
+	display/intel_dp_aux_backlight.o \
+	display/intel_dp_link_training.o \
+	display/intel_dp_mst.o \
+	display/intel_dsi.o \
+	display/intel_dsi_dcs_backlight.o \
+	display/intel_dsi_vbt.o \
+	display/intel_dvo.o \
+	display/intel_gmbus.o \
+	display/intel_hdmi.o \
+	display/intel_lspcon.o \
+	display/intel_lvds.o \
+	display/intel_panel.o \
+	display/intel_sdvo.o \
+	display/intel_tv.o \
+	display/intel_vdsc.o \
+	display/vlv_dsi.o \
+	display/vlv_dsi_pll.o
 
 # Post-mortem debug and GPU hang state capture
 i915-$(CONFIG_DRM_I915_CAPTURE_ERROR) += i915_gpu_error.o
 i915-$(CONFIG_DRM_I915_SELFTEST) += \
+	gem/selftests/igt_gem_utils.o \
 	selftests/i915_random.o \
 	selftests/i915_selftest.o \
 	selftests/igt_flush_test.o \
@@ -205,8 +247,5 @@ i915-y += intel_gvt.o
 include $(src)/gvt/Makefile
 endif
 
-# LPE Audio for VLV and CHT
-i915-y += intel_lpe_audio.o
-
 obj-$(CONFIG_DRM_I915) += i915.o
 obj-$(CONFIG_DRM_I915_GVT_KVMGT) += gvt/kvmgt.o
diff --git a/drivers/gpu/drm/i915/Makefile.header-test b/drivers/gpu/drm/i915/Makefile.header-test
index c1c391816fa7..e6ba66f787f9 100644
--- a/drivers/gpu/drm/i915/Makefile.header-test
+++ b/drivers/gpu/drm/i915/Makefile.header-test
@@ -4,37 +4,22 @@
 # Test the headers are compilable as standalone units
 header_test := \
 	i915_active_types.h \
-	i915_gem_context_types.h \
+	i915_debugfs.h \
+	i915_drv.h \
+	i915_irq.h \
+	i915_params.h \
 	i915_priolist_types.h \
+	i915_reg.h \
 	i915_scheduler_types.h \
 	i915_timeline_types.h \
-	intel_atomic_plane.h \
-	intel_audio.h \
-	intel_cdclk.h \
-	intel_color.h \
-	intel_connector.h \
-	intel_context_types.h \
-	intel_crt.h \
+	i915_utils.h \
 	intel_csr.h \
-	intel_ddi.h \
-	intel_dp.h \
-	intel_dvo.h \
-	intel_engine_types.h \
-	intel_fbc.h \
-	intel_fbdev.h \
-	intel_frontbuffer.h \
-	intel_hdcp.h \
-	intel_hdmi.h \
-	intel_lspcon.h \
-	intel_lvds.h \
-	intel_panel.h \
-	intel_pipe_crc.h \
+	intel_drv.h \
 	intel_pm.h \
-	intel_psr.h \
-	intel_sdvo.h \
-	intel_sprite.h \
-	intel_tv.h \
-	intel_workarounds_types.h
+	intel_runtime_pm.h \
+	intel_sideband.h \
+	intel_uncore.h \
+	intel_wakeref.h
 
 quiet_cmd_header_test = HDRTEST $@
       cmd_header_test = echo "\#include \"$(<F)\"" > $@
diff --git a/drivers/gpu/drm/i915/display/Makefile b/drivers/gpu/drm/i915/display/Makefile
new file mode 100644
index 000000000000..1c75b5c9790c
--- /dev/null
+++ b/drivers/gpu/drm/i915/display/Makefile
@@ -0,0 +1,2 @@
+# Extra header tests
+include $(src)/Makefile.header-test
diff --git a/drivers/gpu/drm/i915/display/Makefile.header-test b/drivers/gpu/drm/i915/display/Makefile.header-test
new file mode 100644
index 000000000000..fc7d4e5bd2c6
--- /dev/null
+++ b/drivers/gpu/drm/i915/display/Makefile.header-test
@@ -0,0 +1,16 @@
+# SPDX-License-Identifier: MIT
+# Copyright © 2019 Intel Corporation
+
+# Test the headers are compilable as standalone units
+header_test := $(notdir $(filter-out %/intel_vbt_defs.h,$(wildcard $(src)/*.h)))
+
+quiet_cmd_header_test = HDRTEST $@
+      cmd_header_test = echo "\#include \"$(<F)\"" > $@
+
+header_test_%.c: %.h
+	$(call cmd,header_test)
+
+extra-$(CONFIG_DRM_I915_WERROR) += \
+	$(foreach h,$(header_test),$(patsubst %.h,header_test_%.o,$(h)))
+
+clean-files += $(foreach h,$(header_test),$(patsubst %.h,header_test_%.c,$(h)))
diff --git a/drivers/gpu/drm/i915/dvo_ch7017.c b/drivers/gpu/drm/i915/display/dvo_ch7017.c
index caac9942e1e3..602380fe74f3 100644
--- a/drivers/gpu/drm/i915/dvo_ch7017.c
+++ b/drivers/gpu/drm/i915/display/dvo_ch7017.c
@@ -25,7 +25,8 @@
  *
  */
 
-#include "dvo.h"
+#include "intel_drv.h"
+#include "intel_dvo_dev.h"
 
 #define CH7017_TV_DISPLAY_MODE		0x00
 #define CH7017_FLICKER_FILTER		0x01
diff --git a/drivers/gpu/drm/i915/dvo_ch7xxx.c b/drivers/gpu/drm/i915/display/dvo_ch7xxx.c
index 397ac5233726..e070bebee7b5 100644
--- a/drivers/gpu/drm/i915/dvo_ch7xxx.c
+++ b/drivers/gpu/drm/i915/display/dvo_ch7xxx.c
@@ -26,7 +26,8 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 
 **************************************************************************/
 
-#include "dvo.h"
+#include "intel_drv.h"
+#include "intel_dvo_dev.h"
 
 #define CH7xxx_REG_VID		0x4a
 #define CH7xxx_REG_DID		0x4b
diff --git a/drivers/gpu/drm/i915/dvo_ivch.c b/drivers/gpu/drm/i915/display/dvo_ivch.c
index 24278cc49090..09dba35f3ffa 100644
--- a/drivers/gpu/drm/i915/dvo_ivch.c
+++ b/drivers/gpu/drm/i915/display/dvo_ivch.c
@@ -29,7 +29,8 @@
  *
  */
 
-#include "dvo.h"
+#include "intel_drv.h"
+#include "intel_dvo_dev.h"
 
 /*
  * register definitions for the i82807aa.
diff --git a/drivers/gpu/drm/i915/dvo_ns2501.c b/drivers/gpu/drm/i915/display/dvo_ns2501.c
index c584e01dc8dc..c83a5d88d62b 100644
--- a/drivers/gpu/drm/i915/dvo_ns2501.c
+++ b/drivers/gpu/drm/i915/display/dvo_ns2501.c
@@ -26,9 +26,10 @@
  *
  */
 
-#include "dvo.h"
-#include "i915_reg.h"
 #include "i915_drv.h"
+#include "i915_reg.h"
+#include "intel_drv.h"
+#include "intel_dvo_dev.h"
 
 #define NS2501_VID 0x1305
 #define NS2501_DID 0x6726
diff --git a/drivers/gpu/drm/i915/dvo_sil164.c b/drivers/gpu/drm/i915/display/dvo_sil164.c
index 4ae5d8fd9ff0..04698eaeb632 100644
--- a/drivers/gpu/drm/i915/dvo_sil164.c
+++ b/drivers/gpu/drm/i915/display/dvo_sil164.c
@@ -26,7 +26,8 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 
 **************************************************************************/
 
-#include "dvo.h"
+#include "intel_drv.h"
+#include "intel_dvo_dev.h"
 
 #define SIL164_VID 0x0001
 #define SIL164_DID 0x0006
diff --git a/drivers/gpu/drm/i915/dvo_tfp410.c b/drivers/gpu/drm/i915/display/dvo_tfp410.c
index d603bc2f2506..623114ee73cd 100644
--- a/drivers/gpu/drm/i915/dvo_tfp410.c
+++ b/drivers/gpu/drm/i915/display/dvo_tfp410.c
@@ -25,7 +25,8 @@
  *
  */
 
-#include "dvo.h"
+#include "intel_drv.h"
+#include "intel_dvo_dev.h"
 
 /* register definitions according to the TFP410 data sheet */
 #define TFP410_VID		0x014C
diff --git a/drivers/gpu/drm/i915/icl_dsi.c b/drivers/gpu/drm/i915/display/icl_dsi.c
index 9d962ea1e635..74448e6bf749 100644
--- a/drivers/gpu/drm/i915/icl_dsi.c
+++ b/drivers/gpu/drm/i915/display/icl_dsi.c
@@ -28,6 +28,8 @@
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_mipi_dsi.h>
 
+#include "intel_atomic.h"
+#include "intel_combo_phy.h"
 #include "intel_connector.h"
 #include "intel_ddi.h"
 #include "intel_dsi.h"
@@ -363,30 +365,10 @@ static void gen11_dsi_power_up_lanes(struct intel_encoder *encoder)
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
 	struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
 	enum port port;
-	u32 tmp;
-	u32 lane_mask;
-
-	switch (intel_dsi->lane_count) {
-	case 1:
-		lane_mask = PWR_DOWN_LN_3_1_0;
-		break;
-	case 2:
-		lane_mask = PWR_DOWN_LN_3_1;
-		break;
-	case 3:
-		lane_mask = PWR_DOWN_LN_3;
-		break;
-	case 4:
-	default:
-		lane_mask = PWR_UP_ALL_LANES;
-		break;
-	}
 
-	for_each_dsi_port(port, intel_dsi->ports) {
-		tmp = I915_READ(ICL_PORT_CL_DW10(port));
-		tmp &= ~PWR_DOWN_LN_MASK;
-		I915_WRITE(ICL_PORT_CL_DW10(port), tmp | lane_mask);
-	}
+	for_each_dsi_port(port, intel_dsi->ports)
+		intel_combo_phy_power_up_lanes(dev_priv, port, true,
+					       intel_dsi->lane_count, false);
 }
 
 static void gen11_dsi_config_phy_lanes_sequence(struct intel_encoder *encoder)
@@ -1193,17 +1175,51 @@ static void gen11_dsi_disable(struct intel_encoder *encoder,
 	gen11_dsi_disable_io_power(encoder);
 }
 
+static void gen11_dsi_get_timings(struct intel_encoder *encoder,
+				  struct intel_crtc_state *pipe_config)
+{
+	struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
+	struct drm_display_mode *adjusted_mode =
+					&pipe_config->base.adjusted_mode;
+
+	if (intel_dsi->dual_link) {
+		adjusted_mode->crtc_hdisplay *= 2;
+		if (intel_dsi->dual_link == DSI_DUAL_LINK_FRONT_BACK)
+			adjusted_mode->crtc_hdisplay -=
+						intel_dsi->pixel_overlap;
+		adjusted_mode->crtc_htotal *= 2;
+	}
+	adjusted_mode->crtc_hblank_start = adjusted_mode->crtc_hdisplay;
+	adjusted_mode->crtc_hblank_end = adjusted_mode->crtc_htotal;
+
+	if (intel_dsi->operation_mode == INTEL_DSI_VIDEO_MODE) {
+		if (intel_dsi->dual_link) {
+			adjusted_mode->crtc_hsync_start *= 2;
+			adjusted_mode->crtc_hsync_end *= 2;
+		}
+	}
+	adjusted_mode->crtc_vblank_start = adjusted_mode->crtc_vdisplay;
+	adjusted_mode->crtc_vblank_end = adjusted_mode->crtc_vtotal;
+}
+
 static void gen11_dsi_get_config(struct intel_encoder *encoder,
 				 struct intel_crtc_state *pipe_config)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
+	struct intel_crtc *crtc = to_intel_crtc(pipe_config->base.crtc);
 	struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
 
 	/* FIXME: adapt icl_ddi_clock_get() for DSI and use that? */
 	pipe_config->port_clock =
 		cnl_calc_wrpll_link(dev_priv, &pipe_config->dpll_hw_state);
+
 	pipe_config->base.adjusted_mode.crtc_clock = intel_dsi->pclk;
+	if (intel_dsi->dual_link)
+		pipe_config->base.adjusted_mode.crtc_clock *= 2;
+
+	gen11_dsi_get_timings(encoder, pipe_config);
 	pipe_config->output_types |= BIT(INTEL_OUTPUT_DSI);
+	pipe_config->pipe_bpp = bdw_get_pipemisc_bpp(crtc);
 }
 
 static int gen11_dsi_compute_config(struct intel_encoder *encoder,
@@ -1219,6 +1235,7 @@ static int gen11_dsi_compute_config(struct intel_encoder *encoder,
 	struct drm_display_mode *adjusted_mode =
 					&pipe_config->base.adjusted_mode;
 
+	pipe_config->output_format = INTEL_OUTPUT_FORMAT_RGB;
 	intel_fixed_panel_mode(fixed_mode, adjusted_mode);
 	intel_pch_panel_fitting(crtc, pipe_config, conn_state->scaling_mode);
 
@@ -1363,6 +1380,113 @@ static const struct mipi_dsi_host_ops gen11_dsi_host_ops = {
 	.transfer = gen11_dsi_host_transfer,
 };
 
+#define ICL_PREPARE_CNT_MAX	0x7
+#define ICL_CLK_ZERO_CNT_MAX	0xf
+#define ICL_TRAIL_CNT_MAX	0x7
+#define ICL_TCLK_PRE_CNT_MAX	0x3
+#define ICL_TCLK_POST_CNT_MAX	0x7
+#define ICL_HS_ZERO_CNT_MAX	0xf
+#define ICL_EXIT_ZERO_CNT_MAX	0x7
+
+static void icl_dphy_param_init(struct intel_dsi *intel_dsi)
+{
+	struct drm_device *dev = intel_dsi->base.base.dev;
+	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct mipi_config *mipi_config = dev_priv->vbt.dsi.config;
+	u32 tlpx_ns;
+	u32 prepare_cnt, exit_zero_cnt, clk_zero_cnt, trail_cnt;
+	u32 ths_prepare_ns, tclk_trail_ns;
+	u32 hs_zero_cnt;
+	u32 tclk_pre_cnt, tclk_post_cnt;
+
+	tlpx_ns = intel_dsi_tlpx_ns(intel_dsi);
+
+	tclk_trail_ns = max(mipi_config->tclk_trail, mipi_config->ths_trail);
+	ths_prepare_ns = max(mipi_config->ths_prepare,
+			     mipi_config->tclk_prepare);
+
+	/*
+	 * prepare cnt in escape clocks
+	 * this field represents a hexadecimal value with a precision
+	 * of 1.2 – i.e. the most significant bit is the integer
+	 * and the least significant 2 bits are fraction bits.
+	 * so, the field can represent a range of 0.25 to 1.75
+	 */
+	prepare_cnt = DIV_ROUND_UP(ths_prepare_ns * 4, tlpx_ns);
+	if (prepare_cnt > ICL_PREPARE_CNT_MAX) {
+		DRM_DEBUG_KMS("prepare_cnt out of range (%d)\n", prepare_cnt);
+		prepare_cnt = ICL_PREPARE_CNT_MAX;
+	}
+
+	/* clk zero count in escape clocks */
+	clk_zero_cnt = DIV_ROUND_UP(mipi_config->tclk_prepare_clkzero -
+				    ths_prepare_ns, tlpx_ns);
+	if (clk_zero_cnt > ICL_CLK_ZERO_CNT_MAX) {
+		DRM_DEBUG_KMS("clk_zero_cnt out of range (%d)\n", clk_zero_cnt);
+		clk_zero_cnt = ICL_CLK_ZERO_CNT_MAX;
+	}
+
+	/* trail cnt in escape clocks*/
+	trail_cnt = DIV_ROUND_UP(tclk_trail_ns, tlpx_ns);
+	if (trail_cnt > ICL_TRAIL_CNT_MAX) {
+		DRM_DEBUG_KMS("trail_cnt out of range (%d)\n", trail_cnt);
+		trail_cnt = ICL_TRAIL_CNT_MAX;
+	}
+
+	/* tclk pre count in escape clocks */
+	tclk_pre_cnt = DIV_ROUND_UP(mipi_config->tclk_pre, tlpx_ns);
+	if (tclk_pre_cnt > ICL_TCLK_PRE_CNT_MAX) {
+		DRM_DEBUG_KMS("tclk_pre_cnt out of range (%d)\n", tclk_pre_cnt);
+		tclk_pre_cnt = ICL_TCLK_PRE_CNT_MAX;
+	}
+
+	/* tclk post count in escape clocks */
+	tclk_post_cnt = DIV_ROUND_UP(mipi_config->tclk_post, tlpx_ns);
+	if (tclk_post_cnt > ICL_TCLK_POST_CNT_MAX) {
+		DRM_DEBUG_KMS("tclk_post_cnt out of range (%d)\n", tclk_post_cnt);
+		tclk_post_cnt = ICL_TCLK_POST_CNT_MAX;
+	}
+
+	/* hs zero cnt in escape clocks */
+	hs_zero_cnt = DIV_ROUND_UP(mipi_config->ths_prepare_hszero -
+				   ths_prepare_ns, tlpx_ns);
+	if (hs_zero_cnt > ICL_HS_ZERO_CNT_MAX) {
+		DRM_DEBUG_KMS("hs_zero_cnt out of range (%d)\n", hs_zero_cnt);
+		hs_zero_cnt = ICL_HS_ZERO_CNT_MAX;
+	}
+
+	/* hs exit zero cnt in escape clocks */
+	exit_zero_cnt = DIV_ROUND_UP(mipi_config->ths_exit, tlpx_ns);
+	if (exit_zero_cnt > ICL_EXIT_ZERO_CNT_MAX) {
+		DRM_DEBUG_KMS("exit_zero_cnt out of range (%d)\n", exit_zero_cnt);
+		exit_zero_cnt = ICL_EXIT_ZERO_CNT_MAX;
+	}
+
+	/* clock lane dphy timings */
+	intel_dsi->dphy_reg = (CLK_PREPARE_OVERRIDE |
+			       CLK_PREPARE(prepare_cnt) |
+			       CLK_ZERO_OVERRIDE |
+			       CLK_ZERO(clk_zero_cnt) |
+			       CLK_PRE_OVERRIDE |
+			       CLK_PRE(tclk_pre_cnt) |
+			       CLK_POST_OVERRIDE |
+			       CLK_POST(tclk_post_cnt) |
+			       CLK_TRAIL_OVERRIDE |
+			       CLK_TRAIL(trail_cnt));
+
+	/* data lanes dphy timings */
+	intel_dsi->dphy_data_lane_reg = (HS_PREPARE_OVERRIDE |
+					 HS_PREPARE(prepare_cnt) |
+					 HS_ZERO_OVERRIDE |
+					 HS_ZERO(hs_zero_cnt) |
+					 HS_TRAIL_OVERRIDE |
+					 HS_TRAIL(trail_cnt) |
+					 HS_EXIT_OVERRIDE |
+					 HS_EXIT(exit_zero_cnt));
+
+	intel_dsi_log_params(intel_dsi);
+}
+
 void icl_dsi_init(struct drm_i915_private *dev_priv)
 {
 	struct drm_device *dev = &dev_priv->drm;
@@ -1455,6 +1579,7 @@ void icl_dsi_init(struct drm_i915_private *dev_priv)
 		goto err;
 	}
 
+	icl_dphy_param_init(intel_dsi);
 	return;
 
 err:
diff --git a/drivers/gpu/drm/i915/intel_acpi.c b/drivers/gpu/drm/i915/display/intel_acpi.c
index 9d142d038a7d..3456d33feb46 100644
--- a/drivers/gpu/drm/i915/intel_acpi.c
+++ b/drivers/gpu/drm/i915/display/intel_acpi.c
@@ -4,9 +4,12 @@
  *
  * _DSM related code stolen from nouveau_acpi.c.
  */
+
 #include <linux/pci.h>
 #include <linux/acpi.h>
+
 #include "i915_drv.h"
+#include "intel_acpi.h"
 
 #define INTEL_DSM_REVISION_ID 1 /* For Calpella anyway... */
 #define INTEL_DSM_FN_PLATFORM_MUX_INFO 1 /* No args */
diff --git a/drivers/gpu/drm/i915/display/intel_acpi.h b/drivers/gpu/drm/i915/display/intel_acpi.h
new file mode 100644
index 000000000000..1c576b3fb712
--- /dev/null
+++ b/drivers/gpu/drm/i915/display/intel_acpi.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef __INTEL_ACPI_H__
+#define __INTEL_ACPI_H__
+
+#ifdef CONFIG_ACPI
+void intel_register_dsm_handler(void);
+void intel_unregister_dsm_handler(void);
+#else
+static inline void intel_register_dsm_handler(void) { return; }
+static inline void intel_unregister_dsm_handler(void) { return; }
+#endif /* CONFIG_ACPI */
+
+#endif /* __INTEL_ACPI_H__ */
diff --git a/drivers/gpu/drm/i915/intel_atomic.c b/drivers/gpu/drm/i915/display/intel_atomic.c
index 8c8fae32ec50..90ca11a4ae88 100644
--- a/drivers/gpu/drm/i915/intel_atomic.c
+++ b/drivers/gpu/drm/i915/display/intel_atomic.c
@@ -34,6 +34,7 @@
 #include <drm/drm_fourcc.h>
 #include <drm/drm_plane_helper.h>
 
+#include "intel_atomic.h"
 #include "intel_drv.h"
 #include "intel_hdcp.h"
 #include "intel_sprite.h"
@@ -104,13 +105,25 @@ int intel_digital_connector_atomic_set_property(struct drm_connector *connector,
 	return -EINVAL;
 }
 
+static bool blob_equal(const struct drm_property_blob *a,
+		       const struct drm_property_blob *b)
+{
+	if (a && b)
+		return a->length == b->length &&
+			!memcmp(a->data, b->data, a->length);
+
+	return !a == !b;
+}
+
 int intel_digital_connector_atomic_check(struct drm_connector *conn,
-					 struct drm_connector_state *new_state)
+					 struct drm_atomic_state *state)
 {
+	struct drm_connector_state *new_state =
+		drm_atomic_get_new_connector_state(state, conn);
 	struct intel_digital_connector_state *new_conn_state =
 		to_intel_digital_connector_state(new_state);
 	struct drm_connector_state *old_state =
-		drm_atomic_get_old_connector_state(new_state->state, conn);
+		drm_atomic_get_old_connector_state(state, conn);
 	struct intel_digital_connector_state *old_conn_state =
 		to_intel_digital_connector_state(old_state);
 	struct drm_crtc_state *crtc_state;
@@ -120,7 +133,7 @@ int intel_digital_connector_atomic_check(struct drm_connector *conn,
 	if (!new_state->crtc)
 		return 0;
 
-	crtc_state = drm_atomic_get_new_crtc_state(new_state->state, new_state->crtc);
+	crtc_state = drm_atomic_get_new_crtc_state(state, new_state->crtc);
 
 	/*
 	 * These properties are handled by fastset, and might not end
@@ -131,7 +144,9 @@ int intel_digital_connector_atomic_check(struct drm_connector *conn,
 	    new_conn_state->base.colorspace != old_conn_state->base.colorspace ||
 	    new_conn_state->base.picture_aspect_ratio != old_conn_state->base.picture_aspect_ratio ||
 	    new_conn_state->base.content_type != old_conn_state->base.content_type ||
-	    new_conn_state->base.scaling_mode != old_conn_state->base.scaling_mode)
+	    new_conn_state->base.scaling_mode != old_conn_state->base.scaling_mode ||
+	    !blob_equal(new_conn_state->base.hdr_output_metadata,
+			old_conn_state->base.hdr_output_metadata))
 		crtc_state->mode_changed = true;
 
 	return 0;
@@ -411,3 +426,15 @@ void intel_atomic_state_clear(struct drm_atomic_state *s)
 	drm_atomic_state_default_clear(&state->base);
 	state->dpll_set = state->modeset = false;
 }
+
+struct intel_crtc_state *
+intel_atomic_get_crtc_state(struct drm_atomic_state *state,
+			    struct intel_crtc *crtc)
+{
+	struct drm_crtc_state *crtc_state;
+	crtc_state = drm_atomic_get_crtc_state(state, &crtc->base);
+	if (IS_ERR(crtc_state))
+		return ERR_CAST(crtc_state);
+
+	return to_intel_crtc_state(crtc_state);
+}
diff --git a/drivers/gpu/drm/i915/display/intel_atomic.h b/drivers/gpu/drm/i915/display/intel_atomic.h
new file mode 100644
index 000000000000..58065d3161a3
--- /dev/null
+++ b/drivers/gpu/drm/i915/display/intel_atomic.h
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef __INTEL_ATOMIC_H__
+#define __INTEL_ATOMIC_H__
+
+#include <linux/types.h>
+
+struct drm_atomic_state;
+struct drm_connector;
+struct drm_connector_state;
+struct drm_crtc;
+struct drm_crtc_state;
+struct drm_device;
+struct drm_i915_private;
+struct drm_property;
+struct intel_crtc;
+struct intel_crtc_state;
+
+int intel_digital_connector_atomic_get_property(struct drm_connector *connector,
+						const struct drm_connector_state *state,
+						struct drm_property *property,
+						u64 *val);
+int intel_digital_connector_atomic_set_property(struct drm_connector *connector,
+						struct drm_connector_state *state,
+						struct drm_property *property,
+						u64 val);
+int intel_digital_connector_atomic_check(struct drm_connector *conn,
+					 struct drm_atomic_state *state);
+struct drm_connector_state *
+intel_digital_connector_duplicate_state(struct drm_connector *connector);
+
+struct drm_crtc_state *intel_crtc_duplicate_state(struct drm_crtc *crtc);
+void intel_crtc_destroy_state(struct drm_crtc *crtc,
+			       struct drm_crtc_state *state);
+struct drm_atomic_state *intel_atomic_state_alloc(struct drm_device *dev);
+void intel_atomic_state_clear(struct drm_atomic_state *state);
+
+struct intel_crtc_state *
+intel_atomic_get_crtc_state(struct drm_atomic_state *state,
+			    struct intel_crtc *crtc);
+
+int intel_atomic_setup_scalers(struct drm_i915_private *dev_priv,
+			       struct intel_crtc *intel_crtc,
+			       struct intel_crtc_state *crtc_state);
+
+#endif /* __INTEL_ATOMIC_H__ */
diff --git a/drivers/gpu/drm/i915/intel_atomic_plane.c b/drivers/gpu/drm/i915/display/intel_atomic_plane.c
index d11681d71add..30bd4e76fff9 100644
--- a/drivers/gpu/drm/i915/intel_atomic_plane.c
+++ b/drivers/gpu/drm/i915/display/intel_atomic_plane.c
@@ -114,6 +114,29 @@ intel_plane_destroy_state(struct drm_plane *plane,
 	drm_atomic_helper_plane_destroy_state(plane, state);
 }
 
+unsigned int intel_plane_data_rate(const struct intel_crtc_state *crtc_state,
+				   const struct intel_plane_state *plane_state)
+{
+	const struct drm_framebuffer *fb = plane_state->base.fb;
+	unsigned int cpp;
+
+	if (!plane_state->base.visible)
+		return 0;
+
+	cpp = fb->format->cpp[0];
+
+	/*
+	 * Based on HSD#:1408715493
+	 * NV12 cpp == 4, P010 cpp == 8
+	 *
+	 * FIXME what is the logic behind this?
+	 */
+	if (fb->format->is_yuv && fb->format->num_planes > 1)
+		cpp *= 4;
+
+	return cpp * crtc_state->pixel_rate;
+}
+
 int intel_plane_atomic_check_with_state(const struct intel_crtc_state *old_crtc_state,
 					struct intel_crtc_state *new_crtc_state,
 					const struct intel_plane_state *old_plane_state,
@@ -125,6 +148,7 @@ int intel_plane_atomic_check_with_state(const struct intel_crtc_state *old_crtc_
 	new_crtc_state->active_planes &= ~BIT(plane->id);
 	new_crtc_state->nv12_planes &= ~BIT(plane->id);
 	new_crtc_state->c8_planes &= ~BIT(plane->id);
+	new_crtc_state->data_rate[plane->id] = 0;
 	new_plane_state->base.visible = false;
 
 	if (!new_plane_state->base.crtc && !old_plane_state->base.crtc)
@@ -149,6 +173,9 @@ int intel_plane_atomic_check_with_state(const struct intel_crtc_state *old_crtc_
 	if (new_plane_state->base.visible || old_plane_state->base.visible)
 		new_crtc_state->update_planes |= BIT(plane->id);
 
+	new_crtc_state->data_rate[plane->id] =
+		intel_plane_data_rate(new_crtc_state, new_plane_state);
+
 	return intel_plane_atomic_calc_changes(old_crtc_state,
 					       &new_crtc_state->base,
 					       old_plane_state,
@@ -326,48 +353,3 @@ const struct drm_plane_helper_funcs intel_plane_helper_funcs = {
 	.cleanup_fb = intel_cleanup_plane_fb,
 	.atomic_check = intel_plane_atomic_check,
 };
-
-/**
- * intel_plane_atomic_get_property - fetch plane property value
- * @plane: plane to fetch property for
- * @state: state containing the property value
- * @property: property to look up
- * @val: pointer to write property value into
- *
- * The DRM core does not store shadow copies of properties for
- * atomic-capable drivers.  This entrypoint is used to fetch
- * the current value of a driver-specific plane property.
- */
-int
-intel_plane_atomic_get_property(struct drm_plane *plane,
-				const struct drm_plane_state *state,
-				struct drm_property *property,
-				u64 *val)
-{
-	DRM_DEBUG_KMS("Unknown property [PROP:%d:%s]\n",
-		      property->base.id, property->name);
-	return -EINVAL;
-}
-
-/**
- * intel_plane_atomic_set_property - set plane property value
- * @plane: plane to set property for
- * @state: state to update property value in
- * @property: property to set
- * @val: value to set property to
- *
- * Writes the specified property value for a plane into the provided atomic
- * state object.
- *
- * Returns 0 on success, -EINVAL on unrecognized properties
- */
-int
-intel_plane_atomic_set_property(struct drm_plane *plane,
-				struct drm_plane_state *state,
-				struct drm_property *property,
-				u64 val)
-{
-	DRM_DEBUG_KMS("Unknown property [PROP:%d:%s]\n",
-		      property->base.id, property->name);
-	return -EINVAL;
-}
diff --git a/drivers/gpu/drm/i915/intel_atomic_plane.h b/drivers/gpu/drm/i915/display/intel_atomic_plane.h
index 14678620440f..1437a8797e10 100644
--- a/drivers/gpu/drm/i915/intel_atomic_plane.h
+++ b/drivers/gpu/drm/i915/display/intel_atomic_plane.h
@@ -6,7 +6,11 @@
 #ifndef __INTEL_ATOMIC_PLANE_H__
 #define __INTEL_ATOMIC_PLANE_H__
 
+#include <linux/types.h>
+
+struct drm_crtc_state;
 struct drm_plane;
+struct drm_property;
 struct intel_atomic_state;
 struct intel_crtc;
 struct intel_crtc_state;
@@ -15,6 +19,8 @@ struct intel_plane_state;
 
 extern const struct drm_plane_helper_funcs intel_plane_helper_funcs;
 
+unsigned int intel_plane_data_rate(const struct intel_crtc_state *crtc_state,
+				   const struct intel_plane_state *plane_state);
 void intel_update_plane(struct intel_plane *plane,
 			const struct intel_crtc_state *crtc_state,
 			const struct intel_plane_state *plane_state);
@@ -36,5 +42,9 @@ int intel_plane_atomic_check_with_state(const struct intel_crtc_state *old_crtc_
 					struct intel_crtc_state *crtc_state,
 					const struct intel_plane_state *old_plane_state,
 					struct intel_plane_state *intel_state);
+int intel_plane_atomic_calc_changes(const struct intel_crtc_state *old_crtc_state,
+				    struct drm_crtc_state *crtc_state,
+				    const struct intel_plane_state *old_plane_state,
+				    struct drm_plane_state *plane_state);
 
 #endif /* __INTEL_ATOMIC_PLANE_H__ */
diff --git a/drivers/gpu/drm/i915/intel_audio.c b/drivers/gpu/drm/i915/display/intel_audio.c
index bca4cc025d3d..840daff12246 100644
--- a/drivers/gpu/drm/i915/intel_audio.c
+++ b/drivers/gpu/drm/i915/display/intel_audio.c
@@ -26,11 +26,11 @@
 
 #include <drm/drm_edid.h>
 #include <drm/i915_component.h>
-#include <drm/intel_lpe_audio.h>
 
 #include "i915_drv.h"
 #include "intel_audio.h"
 #include "intel_drv.h"
+#include "intel_lpe_audio.h"
 
 /**
  * DOC: High Definition Audio over HDMI and Display Port
@@ -319,9 +319,8 @@ hsw_dp_audio_config_update(struct intel_encoder *encoder,
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
 	struct i915_audio_component *acomp = dev_priv->audio_component;
-	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+	enum transcoder cpu_transcoder = crtc_state->cpu_transcoder;
 	enum port port = encoder->port;
-	enum pipe pipe = crtc->pipe;
 	const struct dp_aud_n_m *nm;
 	int rate;
 	u32 tmp;
@@ -333,7 +332,7 @@ hsw_dp_audio_config_update(struct intel_encoder *encoder,
 	else
 		DRM_DEBUG_KMS("using automatic Maud, Naud\n");
 
-	tmp = I915_READ(HSW_AUD_CFG(pipe));
+	tmp = I915_READ(HSW_AUD_CFG(cpu_transcoder));
 	tmp &= ~AUD_CONFIG_N_VALUE_INDEX;
 	tmp &= ~AUD_CONFIG_PIXEL_CLOCK_HDMI_MASK;
 	tmp &= ~AUD_CONFIG_N_PROG_ENABLE;
@@ -345,9 +344,9 @@ hsw_dp_audio_config_update(struct intel_encoder *encoder,
 		tmp |= AUD_CONFIG_N_PROG_ENABLE;
 	}
 
-	I915_WRITE(HSW_AUD_CFG(pipe), tmp);
+	I915_WRITE(HSW_AUD_CFG(cpu_transcoder), tmp);
 
-	tmp = I915_READ(HSW_AUD_M_CTS_ENABLE(pipe));
+	tmp = I915_READ(HSW_AUD_M_CTS_ENABLE(cpu_transcoder));
 	tmp &= ~AUD_CONFIG_M_MASK;
 	tmp &= ~AUD_M_CTS_M_VALUE_INDEX;
 	tmp &= ~AUD_M_CTS_M_PROG_ENABLE;
@@ -358,7 +357,7 @@ hsw_dp_audio_config_update(struct intel_encoder *encoder,
 		tmp |= AUD_M_CTS_M_PROG_ENABLE;
 	}
 
-	I915_WRITE(HSW_AUD_M_CTS_ENABLE(pipe), tmp);
+	I915_WRITE(HSW_AUD_M_CTS_ENABLE(cpu_transcoder), tmp);
 }
 
 static void
@@ -367,15 +366,14 @@ hsw_hdmi_audio_config_update(struct intel_encoder *encoder,
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
 	struct i915_audio_component *acomp = dev_priv->audio_component;
-	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+	enum transcoder cpu_transcoder = crtc_state->cpu_transcoder;
 	enum port port = encoder->port;
-	enum pipe pipe = crtc->pipe;
 	int n, rate;
 	u32 tmp;
 
 	rate = acomp ? acomp->aud_sample_rate[port] : 0;
 
-	tmp = I915_READ(HSW_AUD_CFG(pipe));
+	tmp = I915_READ(HSW_AUD_CFG(cpu_transcoder));
 	tmp &= ~AUD_CONFIG_N_VALUE_INDEX;
 	tmp &= ~AUD_CONFIG_PIXEL_CLOCK_HDMI_MASK;
 	tmp &= ~AUD_CONFIG_N_PROG_ENABLE;
@@ -392,16 +390,16 @@ hsw_hdmi_audio_config_update(struct intel_encoder *encoder,
 		DRM_DEBUG_KMS("using automatic N\n");
 	}
 
-	I915_WRITE(HSW_AUD_CFG(pipe), tmp);
+	I915_WRITE(HSW_AUD_CFG(cpu_transcoder), tmp);
 
 	/*
 	 * Let's disable "Enable CTS or M Prog bit"
 	 * and let HW calculate the value
 	 */
-	tmp = I915_READ(HSW_AUD_M_CTS_ENABLE(pipe));
+	tmp = I915_READ(HSW_AUD_M_CTS_ENABLE(cpu_transcoder));
 	tmp &= ~AUD_M_CTS_M_PROG_ENABLE;
 	tmp &= ~AUD_M_CTS_M_VALUE_INDEX;
-	I915_WRITE(HSW_AUD_M_CTS_ENABLE(pipe), tmp);
+	I915_WRITE(HSW_AUD_M_CTS_ENABLE(cpu_transcoder), tmp);
 }
 
 static void
@@ -419,28 +417,28 @@ static void hsw_audio_codec_disable(struct intel_encoder *encoder,
 				    const struct drm_connector_state *old_conn_state)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->base.crtc);
-	enum pipe pipe = crtc->pipe;
+	enum transcoder cpu_transcoder = old_crtc_state->cpu_transcoder;
 	u32 tmp;
 
-	DRM_DEBUG_KMS("Disable audio codec on pipe %c\n", pipe_name(pipe));
+	DRM_DEBUG_KMS("Disable audio codec on transcoder %s\n",
+		      transcoder_name(cpu_transcoder));
 
 	mutex_lock(&dev_priv->av_mutex);
 
 	/* Disable timestamps */
-	tmp = I915_READ(HSW_AUD_CFG(pipe));
+	tmp = I915_READ(HSW_AUD_CFG(cpu_transcoder));
 	tmp &= ~AUD_CONFIG_N_VALUE_INDEX;
 	tmp |= AUD_CONFIG_N_PROG_ENABLE;
 	tmp &= ~AUD_CONFIG_UPPER_N_MASK;
 	tmp &= ~AUD_CONFIG_LOWER_N_MASK;
 	if (intel_crtc_has_dp_encoder(old_crtc_state))
 		tmp |= AUD_CONFIG_N_VALUE_INDEX;
-	I915_WRITE(HSW_AUD_CFG(pipe), tmp);
+	I915_WRITE(HSW_AUD_CFG(cpu_transcoder), tmp);
 
 	/* Invalidate ELD */
 	tmp = I915_READ(HSW_AUD_PIN_ELD_CP_VLD);
-	tmp &= ~AUDIO_ELD_VALID(pipe);
-	tmp &= ~AUDIO_OUTPUT_ENABLE(pipe);
+	tmp &= ~AUDIO_ELD_VALID(cpu_transcoder);
+	tmp &= ~AUDIO_OUTPUT_ENABLE(cpu_transcoder);
 	I915_WRITE(HSW_AUD_PIN_ELD_CP_VLD, tmp);
 
 	mutex_unlock(&dev_priv->av_mutex);
@@ -451,22 +449,21 @@ static void hsw_audio_codec_enable(struct intel_encoder *encoder,
 				   const struct drm_connector_state *conn_state)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
 	struct drm_connector *connector = conn_state->connector;
-	enum pipe pipe = crtc->pipe;
+	enum transcoder cpu_transcoder = crtc_state->cpu_transcoder;
 	const u8 *eld = connector->eld;
 	u32 tmp;
 	int len, i;
 
-	DRM_DEBUG_KMS("Enable audio codec on pipe %c, %u bytes ELD\n",
-		      pipe_name(pipe), drm_eld_size(eld));
+	DRM_DEBUG_KMS("Enable audio codec on transcoder %s, %u bytes ELD\n",
+		      transcoder_name(cpu_transcoder), drm_eld_size(eld));
 
 	mutex_lock(&dev_priv->av_mutex);
 
 	/* Enable audio presence detect, invalidate ELD */
 	tmp = I915_READ(HSW_AUD_PIN_ELD_CP_VLD);
-	tmp |= AUDIO_OUTPUT_ENABLE(pipe);
-	tmp &= ~AUDIO_ELD_VALID(pipe);
+	tmp |= AUDIO_OUTPUT_ENABLE(cpu_transcoder);
+	tmp &= ~AUDIO_ELD_VALID(cpu_transcoder);
 	I915_WRITE(HSW_AUD_PIN_ELD_CP_VLD, tmp);
 
 	/*
@@ -477,18 +474,18 @@ static void hsw_audio_codec_enable(struct intel_encoder *encoder,
 	 */
 
 	/* Reset ELD write address */
-	tmp = I915_READ(HSW_AUD_DIP_ELD_CTRL(pipe));
+	tmp = I915_READ(HSW_AUD_DIP_ELD_CTRL(cpu_transcoder));
 	tmp &= ~IBX_ELD_ADDRESS_MASK;
-	I915_WRITE(HSW_AUD_DIP_ELD_CTRL(pipe), tmp);
+	I915_WRITE(HSW_AUD_DIP_ELD_CTRL(cpu_transcoder), tmp);
 
 	/* Up to 84 bytes of hw ELD buffer */
 	len = min(drm_eld_size(eld), 84);
 	for (i = 0; i < len / 4; i++)
-		I915_WRITE(HSW_AUD_EDID_DATA(pipe), *((const u32 *)eld + i));
+		I915_WRITE(HSW_AUD_EDID_DATA(cpu_transcoder), *((const u32 *)eld + i));
 
 	/* ELD valid */
 	tmp = I915_READ(HSW_AUD_PIN_ELD_CP_VLD);
-	tmp |= AUDIO_ELD_VALID(pipe);
+	tmp |= AUDIO_ELD_VALID(cpu_transcoder);
 	I915_WRITE(HSW_AUD_PIN_ELD_CP_VLD, tmp);
 
 	/* Enable timestamps */
@@ -644,8 +641,10 @@ void intel_audio_codec_enable(struct intel_encoder *encoder,
 	enum port port = encoder->port;
 	enum pipe pipe = crtc->pipe;
 
+	/* FIXME precompute the ELD in .compute_config() */
 	if (!connector->eld[0])
-		return;
+		DRM_DEBUG_KMS("Bogus ELD on [CONNECTOR:%d:%s]\n",
+			      connector->base.id, connector->name);
 
 	DRM_DEBUG_DRIVER("ELD on [CONNECTOR:%d:%s], [ENCODER:%d:%s]\n",
 			 connector->base.id,
diff --git a/drivers/gpu/drm/i915/intel_audio.h b/drivers/gpu/drm/i915/display/intel_audio.h
index a3657c7a7ba2..a3657c7a7ba2 100644
--- a/drivers/gpu/drm/i915/intel_audio.h
+++ b/drivers/gpu/drm/i915/display/intel_audio.h
diff --git a/drivers/gpu/drm/i915/intel_bios.c b/drivers/gpu/drm/i915/display/intel_bios.c
index 1dc8d03ff127..c4710889cb32 100644
--- a/drivers/gpu/drm/i915/intel_bios.c
+++ b/drivers/gpu/drm/i915/display/intel_bios.c
@@ -27,6 +27,9 @@
 
 #include <drm/drm_dp_helper.h>
 #include <drm/i915_drm.h>
+
+#include "display/intel_gmbus.h"
+
 #include "i915_drv.h"
 
 #define _INTEL_BIOS_PRIVATE
@@ -74,13 +77,13 @@ static u32 get_blocksize(const void *block_data)
 }
 
 static const void *
-find_section(const void *_bdb, int section_id)
+find_section(const void *_bdb, enum bdb_block_id section_id)
 {
 	const struct bdb_header *bdb = _bdb;
 	const u8 *base = _bdb;
 	int index = 0;
 	u32 total, current_size;
-	u8 current_id;
+	enum bdb_block_id current_id;
 
 	/* skip to first section */
 	index += bdb->header_size;
@@ -300,7 +303,7 @@ parse_lfp_backlight(struct drm_i915_private *dev_priv,
 		    const struct bdb_header *bdb)
 {
 	const struct bdb_lfp_backlight_data *backlight_data;
-	const struct bdb_lfp_backlight_data_entry *entry;
+	const struct lfp_backlight_data_entry *entry;
 	int panel_type = dev_priv->vbt.panel_type;
 
 	backlight_data = find_section(bdb, BDB_LVDS_BACKLIGHT);
@@ -325,7 +328,7 @@ parse_lfp_backlight(struct drm_i915_private *dev_priv,
 	dev_priv->vbt.backlight.type = INTEL_BACKLIGHT_DISPLAY_DDI;
 	if (bdb->version >= 191 &&
 	    get_blocksize(backlight_data) >= sizeof(*backlight_data)) {
-		const struct bdb_lfp_backlight_control_method *method;
+		const struct lfp_backlight_control_method *method;
 
 		method = &backlight_data->backlight_control[panel_type];
 		dev_priv->vbt.backlight.type = method->type;
@@ -349,7 +352,7 @@ static void
 parse_sdvo_panel_data(struct drm_i915_private *dev_priv,
 		      const struct bdb_header *bdb)
 {
-	const struct lvds_dvo_timing *dvo_timing;
+	const struct bdb_sdvo_panel_dtds *dtds;
 	struct drm_display_mode *panel_fixed_mode;
 	int index;
 
@@ -369,15 +372,15 @@ parse_sdvo_panel_data(struct drm_i915_private *dev_priv,
 		index = sdvo_lvds_options->panel_type;
 	}
 
-	dvo_timing = find_section(bdb, BDB_SDVO_PANEL_DTDS);
-	if (!dvo_timing)
+	dtds = find_section(bdb, BDB_SDVO_PANEL_DTDS);
+	if (!dtds)
 		return;
 
 	panel_fixed_mode = kzalloc(sizeof(*panel_fixed_mode), GFP_KERNEL);
 	if (!panel_fixed_mode)
 		return;
 
-	fill_detail_timing_data(panel_fixed_mode, dvo_timing + index);
+	fill_detail_timing_data(panel_fixed_mode, &dtds->dtds[index]);
 
 	dev_priv->vbt.sdvo_lvds_vbt_mode = panel_fixed_mode;
 
@@ -1237,27 +1240,36 @@ static u8 translate_iboost(u8 val)
 	return mapping[val];
 }
 
+static enum port get_port_by_ddc_pin(struct drm_i915_private *i915, u8 ddc_pin)
+{
+	const struct ddi_vbt_port_info *info;
+	enum port port;
+
+	for (port = PORT_A; port < I915_MAX_PORTS; port++) {
+		info = &i915->vbt.ddi_port_info[port];
+
+		if (info->child && ddc_pin == info->alternate_ddc_pin)
+			return port;
+	}
+
+	return PORT_NONE;
+}
+
 static void sanitize_ddc_pin(struct drm_i915_private *dev_priv,
 			     enum port port)
 {
-	const struct ddi_vbt_port_info *info =
-		&dev_priv->vbt.ddi_port_info[port];
+	struct ddi_vbt_port_info *info = &dev_priv->vbt.ddi_port_info[port];
 	enum port p;
 
 	if (!info->alternate_ddc_pin)
 		return;
 
-	for (p = PORT_A; p < I915_MAX_PORTS; p++) {
-		struct ddi_vbt_port_info *i = &dev_priv->vbt.ddi_port_info[p];
-
-		if (p == port || !i->present ||
-		    info->alternate_ddc_pin != i->alternate_ddc_pin)
-			continue;
-
+	p = get_port_by_ddc_pin(dev_priv, info->alternate_ddc_pin);
+	if (p != PORT_NONE) {
 		DRM_DEBUG_KMS("port %c trying to use the same DDC pin (0x%x) as port %c, "
 			      "disabling port %c DVI/HDMI support\n",
-			      port_name(p), i->alternate_ddc_pin,
-			      port_name(port), port_name(p));
+			      port_name(port), info->alternate_ddc_pin,
+			      port_name(p), port_name(port));
 
 		/*
 		 * If we have multiple ports supposedly sharing the
@@ -1265,36 +1277,45 @@ static void sanitize_ddc_pin(struct drm_i915_private *dev_priv,
 		 * port. Otherwise they share the same ddc bin and
 		 * system couldn't communicate with them separately.
 		 *
-		 * Due to parsing the ports in child device order,
-		 * a later device will always clobber an earlier one.
+		 * Give child device order the priority, first come first
+		 * served.
 		 */
-		i->supports_dvi = false;
-		i->supports_hdmi = false;
-		i->alternate_ddc_pin = 0;
+		info->supports_dvi = false;
+		info->supports_hdmi = false;
+		info->alternate_ddc_pin = 0;
 	}
 }
 
+static enum port get_port_by_aux_ch(struct drm_i915_private *i915, u8 aux_ch)
+{
+	const struct ddi_vbt_port_info *info;
+	enum port port;
+
+	for (port = PORT_A; port < I915_MAX_PORTS; port++) {
+		info = &i915->vbt.ddi_port_info[port];
+
+		if (info->child && aux_ch == info->alternate_aux_channel)
+			return port;
+	}
+
+	return PORT_NONE;
+}
+
 static void sanitize_aux_ch(struct drm_i915_private *dev_priv,
 			    enum port port)
 {
-	const struct ddi_vbt_port_info *info =
-		&dev_priv->vbt.ddi_port_info[port];
+	struct ddi_vbt_port_info *info = &dev_priv->vbt.ddi_port_info[port];
 	enum port p;
 
 	if (!info->alternate_aux_channel)
 		return;
 
-	for (p = PORT_A; p < I915_MAX_PORTS; p++) {
-		struct ddi_vbt_port_info *i = &dev_priv->vbt.ddi_port_info[p];
-
-		if (p == port || !i->present ||
-		    info->alternate_aux_channel != i->alternate_aux_channel)
-			continue;
-
+	p = get_port_by_aux_ch(dev_priv, info->alternate_aux_channel);
+	if (p != PORT_NONE) {
 		DRM_DEBUG_KMS("port %c trying to use the same AUX CH (0x%x) as port %c, "
 			      "disabling port %c DP support\n",
-			      port_name(p), i->alternate_aux_channel,
-			      port_name(port), port_name(p));
+			      port_name(port), info->alternate_aux_channel,
+			      port_name(p), port_name(port));
 
 		/*
 		 * If we have multiple ports supposedlt sharing the
@@ -1302,11 +1323,11 @@ static void sanitize_aux_ch(struct drm_i915_private *dev_priv,
 		 * port. Otherwise they share the same aux channel
 		 * and system couldn't communicate with them separately.
 		 *
-		 * Due to parsing the ports in child device order,
-		 * a later device will always clobber an earlier one.
+		 * Give child device order the priority, first come first
+		 * served.
 		 */
-		i->supports_dp = false;
-		i->alternate_aux_channel = 0;
+		info->supports_dp = false;
+		info->alternate_aux_channel = 0;
 	}
 }
 
@@ -1327,12 +1348,21 @@ static const u8 icp_ddc_pin_map[] = {
 	[ICL_DDC_BUS_PORT_4] = GMBUS_PIN_12_TC4_ICP,
 };
 
+static const u8 mcc_ddc_pin_map[] = {
+	[MCC_DDC_BUS_DDI_A] = GMBUS_PIN_1_BXT,
+	[MCC_DDC_BUS_DDI_B] = GMBUS_PIN_2_BXT,
+	[MCC_DDC_BUS_DDI_C] = GMBUS_PIN_9_TC1_ICP,
+};
+
 static u8 map_ddc_pin(struct drm_i915_private *dev_priv, u8 vbt_pin)
 {
 	const u8 *ddc_pin_map;
 	int n_entries;
 
-	if (HAS_PCH_ICP(dev_priv)) {
+	if (HAS_PCH_MCC(dev_priv)) {
+		ddc_pin_map = mcc_ddc_pin_map;
+		n_entries = ARRAY_SIZE(mcc_ddc_pin_map);
+	} else if (HAS_PCH_ICP(dev_priv)) {
 		ddc_pin_map = icp_ddc_pin_map;
 		n_entries = ARRAY_SIZE(icp_ddc_pin_map);
 	} else if (HAS_PCH_CNP(dev_priv)) {
@@ -1395,14 +1425,12 @@ static void parse_ddi_port(struct drm_i915_private *dev_priv,
 
 	info = &dev_priv->vbt.ddi_port_info[port];
 
-	if (info->present) {
+	if (info->child) {
 		DRM_DEBUG_KMS("More than one child device for port %c in VBT, using the first.\n",
 			      port_name(port));
 		return;
 	}
 
-	info->present = true;
-
 	is_dvi = child->device_type & DEVICE_TYPE_TMDS_DVI_SIGNALING;
 	is_dp = child->device_type & DEVICE_TYPE_DISPLAYPORT_OUTPUT;
 	is_crt = child->device_type & DEVICE_TYPE_ANALOG_OUTPUT;
@@ -1427,8 +1455,9 @@ static void parse_ddi_port(struct drm_i915_private *dev_priv,
 	if (bdb_version >= 209)
 		info->supports_tbt = child->tbt;
 
-	DRM_DEBUG_KMS("Port %c VBT info: DP:%d HDMI:%d DVI:%d EDP:%d CRT:%d TCUSB:%d TBT:%d\n",
-		      port_name(port), is_dp, is_hdmi, is_dvi, is_edp, is_crt,
+	DRM_DEBUG_KMS("Port %c VBT info: CRT:%d DVI:%d HDMI:%d DP:%d eDP:%d LSPCON:%d USB-Type-C:%d TBT:%d\n",
+		      port_name(port), is_crt, is_dvi, is_hdmi, is_dp, is_edp,
+		      HAS_LSPCON(dev_priv) && child->lspcon,
 		      info->supports_typec_usb, info->supports_tbt);
 
 	if (is_edp && is_dvi)
@@ -1530,6 +1559,8 @@ static void parse_ddi_port(struct drm_i915_private *dev_priv,
 		DRM_DEBUG_KMS("VBT DP max link rate for port %c: %d\n",
 			      port_name(port), info->dp_max_link_rate);
 	}
+
+	info->child = child;
 }
 
 static void parse_ddi_ports(struct drm_i915_private *dev_priv, u8 bdb_version)
@@ -2149,106 +2180,39 @@ bool intel_bios_is_dsi_present(struct drm_i915_private *dev_priv,
 
 /**
  * intel_bios_is_port_hpd_inverted - is HPD inverted for %port
- * @dev_priv:	i915 device instance
+ * @i915:	i915 device instance
  * @port:	port to check
  *
  * Return true if HPD should be inverted for %port.
  */
 bool
-intel_bios_is_port_hpd_inverted(struct drm_i915_private *dev_priv,
+intel_bios_is_port_hpd_inverted(const struct drm_i915_private *i915,
 				enum port port)
 {
-	const struct child_device_config *child;
-	int i;
+	const struct child_device_config *child =
+		i915->vbt.ddi_port_info[port].child;
 
-	if (WARN_ON_ONCE(!IS_GEN9_LP(dev_priv)))
+	if (WARN_ON_ONCE(!IS_GEN9_LP(i915)))
 		return false;
 
-	for (i = 0; i < dev_priv->vbt.child_dev_num; i++) {
-		child = dev_priv->vbt.child_dev + i;
-
-		if (!child->hpd_invert)
-			continue;
-
-		switch (child->dvo_port) {
-		case DVO_PORT_DPA:
-		case DVO_PORT_HDMIA:
-			if (port == PORT_A)
-				return true;
-			break;
-		case DVO_PORT_DPB:
-		case DVO_PORT_HDMIB:
-			if (port == PORT_B)
-				return true;
-			break;
-		case DVO_PORT_DPC:
-		case DVO_PORT_HDMIC:
-			if (port == PORT_C)
-				return true;
-			break;
-		default:
-			break;
-		}
-	}
-
-	return false;
+	return child && child->hpd_invert;
 }
 
 /**
  * intel_bios_is_lspcon_present - if LSPCON is attached on %port
- * @dev_priv:	i915 device instance
+ * @i915:	i915 device instance
  * @port:	port to check
  *
  * Return true if LSPCON is present on this port
  */
 bool
-intel_bios_is_lspcon_present(struct drm_i915_private *dev_priv,
-				enum port port)
+intel_bios_is_lspcon_present(const struct drm_i915_private *i915,
+			     enum port port)
 {
-	const struct child_device_config *child;
-	int i;
-
-	if (!HAS_LSPCON(dev_priv))
-		return false;
-
-	for (i = 0; i < dev_priv->vbt.child_dev_num; i++) {
-		child = dev_priv->vbt.child_dev + i;
-
-		if (!child->lspcon)
-			continue;
+	const struct child_device_config *child =
+		i915->vbt.ddi_port_info[port].child;
 
-		switch (child->dvo_port) {
-		case DVO_PORT_DPA:
-		case DVO_PORT_HDMIA:
-			if (port == PORT_A)
-				return true;
-			break;
-		case DVO_PORT_DPB:
-		case DVO_PORT_HDMIB:
-			if (port == PORT_B)
-				return true;
-			break;
-		case DVO_PORT_DPC:
-		case DVO_PORT_HDMIC:
-			if (port == PORT_C)
-				return true;
-			break;
-		case DVO_PORT_DPD:
-		case DVO_PORT_HDMID:
-			if (port == PORT_D)
-				return true;
-			break;
-		case DVO_PORT_DPF:
-		case DVO_PORT_HDMIF:
-			if (port == PORT_F)
-				return true;
-			break;
-		default:
-			break;
-		}
-	}
-
-	return false;
+	return HAS_LSPCON(i915) && child && child->lspcon;
 }
 
 enum aux_ch intel_bios_port_aux_ch(struct drm_i915_private *dev_priv,
diff --git a/drivers/gpu/drm/i915/intel_bios.h b/drivers/gpu/drm/i915/display/intel_bios.h
index 7e3545f65257..4e42cfaf61a7 100644
--- a/drivers/gpu/drm/i915/intel_bios.h
+++ b/drivers/gpu/drm/i915/display/intel_bios.h
@@ -30,6 +30,12 @@
 #ifndef _INTEL_BIOS_H_
 #define _INTEL_BIOS_H_
 
+#include <linux/types.h>
+
+#include <drm/i915_drm.h>
+
+struct drm_i915_private;
+
 enum intel_backlight_type {
 	INTEL_BACKLIGHT_PMIC,
 	INTEL_BACKLIGHT_LPSS,
@@ -220,4 +226,19 @@ struct mipi_pps_data {
 	u16 panel_power_cycle_delay;
 } __packed;
 
+void intel_bios_init(struct drm_i915_private *dev_priv);
+void intel_bios_cleanup(struct drm_i915_private *dev_priv);
+bool intel_bios_is_valid_vbt(const void *buf, size_t size);
+bool intel_bios_is_tv_present(struct drm_i915_private *dev_priv);
+bool intel_bios_is_lvds_present(struct drm_i915_private *dev_priv, u8 *i2c_pin);
+bool intel_bios_is_port_present(struct drm_i915_private *dev_priv, enum port port);
+bool intel_bios_is_port_edp(struct drm_i915_private *dev_priv, enum port port);
+bool intel_bios_is_port_dp_dual_mode(struct drm_i915_private *dev_priv, enum port port);
+bool intel_bios_is_dsi_present(struct drm_i915_private *dev_priv, enum port *port);
+bool intel_bios_is_port_hpd_inverted(const struct drm_i915_private *i915,
+				     enum port port);
+bool intel_bios_is_lspcon_present(const struct drm_i915_private *i915,
+				  enum port port);
+enum aux_ch intel_bios_port_aux_ch(struct drm_i915_private *dev_priv, enum port port);
+
 #endif /* _INTEL_BIOS_H_ */
diff --git a/drivers/gpu/drm/i915/display/intel_bw.c b/drivers/gpu/drm/i915/display/intel_bw.c
new file mode 100644
index 000000000000..753ac3165061
--- /dev/null
+++ b/drivers/gpu/drm/i915/display/intel_bw.c
@@ -0,0 +1,421 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include <drm/drm_atomic_state_helper.h>
+
+#include "intel_bw.h"
+#include "intel_drv.h"
+#include "intel_sideband.h"
+
+/* Parameters for Qclk Geyserville (QGV) */
+struct intel_qgv_point {
+	u16 dclk, t_rp, t_rdpre, t_rc, t_ras, t_rcd;
+};
+
+struct intel_qgv_info {
+	struct intel_qgv_point points[3];
+	u8 num_points;
+	u8 num_channels;
+	u8 t_bl;
+	enum intel_dram_type dram_type;
+};
+
+static int icl_pcode_read_mem_global_info(struct drm_i915_private *dev_priv,
+					  struct intel_qgv_info *qi)
+{
+	u32 val = 0;
+	int ret;
+
+	ret = sandybridge_pcode_read(dev_priv,
+				     ICL_PCODE_MEM_SUBSYSYSTEM_INFO |
+				     ICL_PCODE_MEM_SS_READ_GLOBAL_INFO,
+				     &val, NULL);
+	if (ret)
+		return ret;
+
+	switch (val & 0xf) {
+	case 0:
+		qi->dram_type = INTEL_DRAM_DDR4;
+		break;
+	case 1:
+		qi->dram_type = INTEL_DRAM_DDR3;
+		break;
+	case 2:
+		qi->dram_type = INTEL_DRAM_LPDDR3;
+		break;
+	case 3:
+		qi->dram_type = INTEL_DRAM_LPDDR3;
+		break;
+	default:
+		MISSING_CASE(val & 0xf);
+		break;
+	}
+
+	qi->num_channels = (val & 0xf0) >> 4;
+	qi->num_points = (val & 0xf00) >> 8;
+
+	qi->t_bl = qi->dram_type == INTEL_DRAM_DDR4 ? 4 : 8;
+
+	return 0;
+}
+
+static int icl_pcode_read_qgv_point_info(struct drm_i915_private *dev_priv,
+					 struct intel_qgv_point *sp,
+					 int point)
+{
+	u32 val = 0, val2;
+	int ret;
+
+	ret = sandybridge_pcode_read(dev_priv,
+				     ICL_PCODE_MEM_SUBSYSYSTEM_INFO |
+				     ICL_PCODE_MEM_SS_READ_QGV_POINT_INFO(point),
+				     &val, &val2);
+	if (ret)
+		return ret;
+
+	sp->dclk = val & 0xffff;
+	sp->t_rp = (val & 0xff0000) >> 16;
+	sp->t_rcd = (val & 0xff000000) >> 24;
+
+	sp->t_rdpre = val2 & 0xff;
+	sp->t_ras = (val2 & 0xff00) >> 8;
+
+	sp->t_rc = sp->t_rp + sp->t_ras;
+
+	return 0;
+}
+
+static int icl_get_qgv_points(struct drm_i915_private *dev_priv,
+			      struct intel_qgv_info *qi)
+{
+	int i, ret;
+
+	ret = icl_pcode_read_mem_global_info(dev_priv, qi);
+	if (ret)
+		return ret;
+
+	if (WARN_ON(qi->num_points > ARRAY_SIZE(qi->points)))
+		qi->num_points = ARRAY_SIZE(qi->points);
+
+	for (i = 0; i < qi->num_points; i++) {
+		struct intel_qgv_point *sp = &qi->points[i];
+
+		ret = icl_pcode_read_qgv_point_info(dev_priv, sp, i);
+		if (ret)
+			return ret;
+
+		DRM_DEBUG_KMS("QGV %d: DCLK=%d tRP=%d tRDPRE=%d tRAS=%d tRCD=%d tRC=%d\n",
+			      i, sp->dclk, sp->t_rp, sp->t_rdpre, sp->t_ras,
+			      sp->t_rcd, sp->t_rc);
+	}
+
+	return 0;
+}
+
+static int icl_calc_bw(int dclk, int num, int den)
+{
+	/* multiples of 16.666MHz (100/6) */
+	return DIV_ROUND_CLOSEST(num * dclk * 100, den * 6);
+}
+
+static int icl_sagv_max_dclk(const struct intel_qgv_info *qi)
+{
+	u16 dclk = 0;
+	int i;
+
+	for (i = 0; i < qi->num_points; i++)
+		dclk = max(dclk, qi->points[i].dclk);
+
+	return dclk;
+}
+
+struct intel_sa_info {
+	u8 deburst, mpagesize, deprogbwlimit, displayrtids;
+};
+
+static const struct intel_sa_info icl_sa_info = {
+	.deburst = 8,
+	.mpagesize = 16,
+	.deprogbwlimit = 25, /* GB/s */
+	.displayrtids = 128,
+};
+
+static int icl_get_bw_info(struct drm_i915_private *dev_priv)
+{
+	struct intel_qgv_info qi = {};
+	const struct intel_sa_info *sa = &icl_sa_info;
+	bool is_y_tile = true; /* assume y tile may be used */
+	int num_channels;
+	int deinterleave;
+	int ipqdepth, ipqdepthpch;
+	int dclk_max;
+	int maxdebw;
+	int i, ret;
+
+	ret = icl_get_qgv_points(dev_priv, &qi);
+	if (ret) {
+		DRM_DEBUG_KMS("Failed to get memory subsystem information, ignoring bandwidth limits");
+		return ret;
+	}
+	num_channels = qi.num_channels;
+
+	deinterleave = DIV_ROUND_UP(num_channels, is_y_tile ? 4 : 2);
+	dclk_max = icl_sagv_max_dclk(&qi);
+
+	ipqdepthpch = 16;
+
+	maxdebw = min(sa->deprogbwlimit * 1000,
+		      icl_calc_bw(dclk_max, 16, 1) * 6 / 10); /* 60% */
+	ipqdepth = min(ipqdepthpch, sa->displayrtids / num_channels);
+
+	for (i = 0; i < ARRAY_SIZE(dev_priv->max_bw); i++) {
+		struct intel_bw_info *bi = &dev_priv->max_bw[i];
+		int clpchgroup;
+		int j;
+
+		clpchgroup = (sa->deburst * deinterleave / num_channels) << i;
+		bi->num_planes = (ipqdepth - clpchgroup) / clpchgroup + 1;
+
+		for (j = 0; j < qi.num_points; j++) {
+			const struct intel_qgv_point *sp = &qi.points[j];
+			int ct, bw;
+
+			/*
+			 * Max row cycle time
+			 *
+			 * FIXME what is the logic behind the
+			 * assumed burst length?
+			 */
+			ct = max_t(int, sp->t_rc, sp->t_rp + sp->t_rcd +
+				   (clpchgroup - 1) * qi.t_bl + sp->t_rdpre);
+			bw = icl_calc_bw(sp->dclk, clpchgroup * 32 * num_channels, ct);
+
+			bi->deratedbw[j] = min(maxdebw,
+					       bw * 9 / 10); /* 90% */
+
+			DRM_DEBUG_KMS("BW%d / QGV %d: num_planes=%d deratedbw=%d\n",
+				      i, j, bi->num_planes, bi->deratedbw[j]);
+		}
+
+		if (bi->num_planes == 1)
+			break;
+	}
+
+	return 0;
+}
+
+static unsigned int icl_max_bw(struct drm_i915_private *dev_priv,
+			       int num_planes, int qgv_point)
+{
+	int i;
+
+	/* Did we initialize the bw limits successfully? */
+	if (dev_priv->max_bw[0].num_planes == 0)
+		return UINT_MAX;
+
+	for (i = 0; i < ARRAY_SIZE(dev_priv->max_bw); i++) {
+		const struct intel_bw_info *bi =
+			&dev_priv->max_bw[i];
+
+		if (num_planes >= bi->num_planes)
+			return bi->deratedbw[qgv_point];
+	}
+
+	return 0;
+}
+
+void intel_bw_init_hw(struct drm_i915_private *dev_priv)
+{
+	if (IS_GEN(dev_priv, 11))
+		icl_get_bw_info(dev_priv);
+}
+
+static unsigned int intel_max_data_rate(struct drm_i915_private *dev_priv,
+					int num_planes)
+{
+	if (IS_GEN(dev_priv, 11))
+		/*
+		 * FIXME with SAGV disabled maybe we can assume
+		 * point 1 will always be used? Seems to match
+		 * the behaviour observed in the wild.
+		 */
+		return min3(icl_max_bw(dev_priv, num_planes, 0),
+			    icl_max_bw(dev_priv, num_planes, 1),
+			    icl_max_bw(dev_priv, num_planes, 2));
+	else
+		return UINT_MAX;
+}
+
+static unsigned int intel_bw_crtc_num_active_planes(const struct intel_crtc_state *crtc_state)
+{
+	/*
+	 * We assume cursors are small enough
+	 * to not not cause bandwidth problems.
+	 */
+	return hweight8(crtc_state->active_planes & ~BIT(PLANE_CURSOR));
+}
+
+static unsigned int intel_bw_crtc_data_rate(const struct intel_crtc_state *crtc_state)
+{
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+	unsigned int data_rate = 0;
+	enum plane_id plane_id;
+
+	for_each_plane_id_on_crtc(crtc, plane_id) {
+		/*
+		 * We assume cursors are small enough
+		 * to not not cause bandwidth problems.
+		 */
+		if (plane_id == PLANE_CURSOR)
+			continue;
+
+		data_rate += crtc_state->data_rate[plane_id];
+	}
+
+	return data_rate;
+}
+
+void intel_bw_crtc_update(struct intel_bw_state *bw_state,
+			  const struct intel_crtc_state *crtc_state)
+{
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+
+	bw_state->data_rate[crtc->pipe] =
+		intel_bw_crtc_data_rate(crtc_state);
+	bw_state->num_active_planes[crtc->pipe] =
+		intel_bw_crtc_num_active_planes(crtc_state);
+
+	DRM_DEBUG_KMS("pipe %c data rate %u num active planes %u\n",
+		      pipe_name(crtc->pipe),
+		      bw_state->data_rate[crtc->pipe],
+		      bw_state->num_active_planes[crtc->pipe]);
+}
+
+static unsigned int intel_bw_num_active_planes(struct drm_i915_private *dev_priv,
+					       const struct intel_bw_state *bw_state)
+{
+	unsigned int num_active_planes = 0;
+	enum pipe pipe;
+
+	for_each_pipe(dev_priv, pipe)
+		num_active_planes += bw_state->num_active_planes[pipe];
+
+	return num_active_planes;
+}
+
+static unsigned int intel_bw_data_rate(struct drm_i915_private *dev_priv,
+				       const struct intel_bw_state *bw_state)
+{
+	unsigned int data_rate = 0;
+	enum pipe pipe;
+
+	for_each_pipe(dev_priv, pipe)
+		data_rate += bw_state->data_rate[pipe];
+
+	return data_rate;
+}
+
+int intel_bw_atomic_check(struct intel_atomic_state *state)
+{
+	struct drm_i915_private *dev_priv = to_i915(state->base.dev);
+	struct intel_crtc_state *new_crtc_state, *old_crtc_state;
+	struct intel_bw_state *bw_state = NULL;
+	unsigned int data_rate, max_data_rate;
+	unsigned int num_active_planes;
+	struct intel_crtc *crtc;
+	int i;
+
+	/* FIXME earlier gens need some checks too */
+	if (INTEL_GEN(dev_priv) < 11)
+		return 0;
+
+	for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state,
+					    new_crtc_state, i) {
+		unsigned int old_data_rate =
+			intel_bw_crtc_data_rate(old_crtc_state);
+		unsigned int new_data_rate =
+			intel_bw_crtc_data_rate(new_crtc_state);
+		unsigned int old_active_planes =
+			intel_bw_crtc_num_active_planes(old_crtc_state);
+		unsigned int new_active_planes =
+			intel_bw_crtc_num_active_planes(new_crtc_state);
+
+		/*
+		 * Avoid locking the bw state when
+		 * nothing significant has changed.
+		 */
+		if (old_data_rate == new_data_rate &&
+		    old_active_planes == new_active_planes)
+			continue;
+
+		bw_state  = intel_atomic_get_bw_state(state);
+		if (IS_ERR(bw_state))
+			return PTR_ERR(bw_state);
+
+		bw_state->data_rate[crtc->pipe] = new_data_rate;
+		bw_state->num_active_planes[crtc->pipe] = new_active_planes;
+
+		DRM_DEBUG_KMS("pipe %c data rate %u num active planes %u\n",
+			      pipe_name(crtc->pipe),
+			      bw_state->data_rate[crtc->pipe],
+			      bw_state->num_active_planes[crtc->pipe]);
+	}
+
+	if (!bw_state)
+		return 0;
+
+	data_rate = intel_bw_data_rate(dev_priv, bw_state);
+	num_active_planes = intel_bw_num_active_planes(dev_priv, bw_state);
+
+	max_data_rate = intel_max_data_rate(dev_priv, num_active_planes);
+
+	data_rate = DIV_ROUND_UP(data_rate, 1000);
+
+	if (data_rate > max_data_rate) {
+		DRM_DEBUG_KMS("Bandwidth %u MB/s exceeds max available %d MB/s (%d active planes)\n",
+			      data_rate, max_data_rate, num_active_planes);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static struct drm_private_state *intel_bw_duplicate_state(struct drm_private_obj *obj)
+{
+	struct intel_bw_state *state;
+
+	state = kmemdup(obj->state, sizeof(*state), GFP_KERNEL);
+	if (!state)
+		return NULL;
+
+	__drm_atomic_helper_private_obj_duplicate_state(obj, &state->base);
+
+	return &state->base;
+}
+
+static void intel_bw_destroy_state(struct drm_private_obj *obj,
+				   struct drm_private_state *state)
+{
+	kfree(state);
+}
+
+static const struct drm_private_state_funcs intel_bw_funcs = {
+	.atomic_duplicate_state = intel_bw_duplicate_state,
+	.atomic_destroy_state = intel_bw_destroy_state,
+};
+
+int intel_bw_init(struct drm_i915_private *dev_priv)
+{
+	struct intel_bw_state *state;
+
+	state = kzalloc(sizeof(*state), GFP_KERNEL);
+	if (!state)
+		return -ENOMEM;
+
+	drm_atomic_private_obj_init(&dev_priv->drm, &dev_priv->bw_obj,
+				    &state->base, &intel_bw_funcs);
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/i915/display/intel_bw.h b/drivers/gpu/drm/i915/display/intel_bw.h
new file mode 100644
index 000000000000..e9d9c6d63bc3
--- /dev/null
+++ b/drivers/gpu/drm/i915/display/intel_bw.h
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef __INTEL_BW_H__
+#define __INTEL_BW_H__
+
+#include <drm/drm_atomic.h>
+
+#include "i915_drv.h"
+#include "intel_display.h"
+
+struct drm_i915_private;
+struct intel_atomic_state;
+struct intel_crtc_state;
+
+struct intel_bw_state {
+	struct drm_private_state base;
+
+	unsigned int data_rate[I915_MAX_PIPES];
+	u8 num_active_planes[I915_MAX_PIPES];
+};
+
+#define to_intel_bw_state(x) container_of((x), struct intel_bw_state, base)
+
+static inline struct intel_bw_state *
+intel_atomic_get_bw_state(struct intel_atomic_state *state)
+{
+	struct drm_i915_private *dev_priv = to_i915(state->base.dev);
+	struct drm_private_state *bw_state;
+
+	bw_state = drm_atomic_get_private_obj_state(&state->base,
+						    &dev_priv->bw_obj);
+	if (IS_ERR(bw_state))
+		return ERR_CAST(bw_state);
+
+	return to_intel_bw_state(bw_state);
+}
+
+void intel_bw_init_hw(struct drm_i915_private *dev_priv);
+int intel_bw_init(struct drm_i915_private *dev_priv);
+int intel_bw_atomic_check(struct intel_atomic_state *state);
+void intel_bw_crtc_update(struct intel_bw_state *bw_state,
+			  const struct intel_crtc_state *crtc_state);
+
+#endif /* __INTEL_BW_H__ */
diff --git a/drivers/gpu/drm/i915/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c
index ae40a8679314..8993ab283562 100644
--- a/drivers/gpu/drm/i915/intel_cdclk.c
+++ b/drivers/gpu/drm/i915/display/intel_cdclk.c
@@ -23,6 +23,7 @@
 
 #include "intel_cdclk.h"
 #include "intel_drv.h"
+#include "intel_sideband.h"
 
 /**
  * DOC: CDCLK / RAWCLK
@@ -464,14 +465,18 @@ static void vlv_get_cdclk(struct drm_i915_private *dev_priv,
 {
 	u32 val;
 
+	vlv_iosf_sb_get(dev_priv,
+			BIT(VLV_IOSF_SB_CCK) | BIT(VLV_IOSF_SB_PUNIT));
+
 	cdclk_state->vco = vlv_get_hpll_vco(dev_priv);
 	cdclk_state->cdclk = vlv_get_cck_clock(dev_priv, "cdclk",
 					       CCK_DISPLAY_CLOCK_CONTROL,
 					       cdclk_state->vco);
 
-	mutex_lock(&dev_priv->pcu_lock);
 	val = vlv_punit_read(dev_priv, PUNIT_REG_DSPSSPM);
-	mutex_unlock(&dev_priv->pcu_lock);
+
+	vlv_iosf_sb_put(dev_priv,
+			BIT(VLV_IOSF_SB_CCK) | BIT(VLV_IOSF_SB_PUNIT));
 
 	if (IS_VALLEYVIEW(dev_priv))
 		cdclk_state->voltage_level = (val & DSPFREQGUAR_MASK) >>
@@ -545,7 +550,11 @@ static void vlv_set_cdclk(struct drm_i915_private *dev_priv,
 	 */
 	wakeref = intel_display_power_get(dev_priv, POWER_DOMAIN_PIPE_A);
 
-	mutex_lock(&dev_priv->pcu_lock);
+	vlv_iosf_sb_get(dev_priv,
+			BIT(VLV_IOSF_SB_CCK) |
+			BIT(VLV_IOSF_SB_BUNIT) |
+			BIT(VLV_IOSF_SB_PUNIT));
+
 	val = vlv_punit_read(dev_priv, PUNIT_REG_DSPSSPM);
 	val &= ~DSPFREQGUAR_MASK;
 	val |= (cmd << DSPFREQGUAR_SHIFT);
@@ -555,9 +564,6 @@ static void vlv_set_cdclk(struct drm_i915_private *dev_priv,
 		     50)) {
 		DRM_ERROR("timed out waiting for CDclk change\n");
 	}
-	mutex_unlock(&dev_priv->pcu_lock);
-
-	mutex_lock(&dev_priv->sb_lock);
 
 	if (cdclk == 400000) {
 		u32 divider;
@@ -591,7 +597,10 @@ static void vlv_set_cdclk(struct drm_i915_private *dev_priv,
 		val |= 3000 / 250; /* 3.0 usec */
 	vlv_bunit_write(dev_priv, BUNIT_REG_BISOC, val);
 
-	mutex_unlock(&dev_priv->sb_lock);
+	vlv_iosf_sb_put(dev_priv,
+			BIT(VLV_IOSF_SB_CCK) |
+			BIT(VLV_IOSF_SB_BUNIT) |
+			BIT(VLV_IOSF_SB_PUNIT));
 
 	intel_update_cdclk(dev_priv);
 
@@ -627,7 +636,7 @@ static void chv_set_cdclk(struct drm_i915_private *dev_priv,
 	 */
 	wakeref = intel_display_power_get(dev_priv, POWER_DOMAIN_PIPE_A);
 
-	mutex_lock(&dev_priv->pcu_lock);
+	vlv_punit_get(dev_priv);
 	val = vlv_punit_read(dev_priv, PUNIT_REG_DSPSSPM);
 	val &= ~DSPFREQGUAR_MASK_CHV;
 	val |= (cmd << DSPFREQGUAR_SHIFT_CHV);
@@ -637,7 +646,8 @@ static void chv_set_cdclk(struct drm_i915_private *dev_priv,
 		     50)) {
 		DRM_ERROR("timed out waiting for CDclk change\n");
 	}
-	mutex_unlock(&dev_priv->pcu_lock);
+
+	vlv_punit_put(dev_priv);
 
 	intel_update_cdclk(dev_priv);
 
@@ -716,10 +726,8 @@ static void bdw_set_cdclk(struct drm_i915_private *dev_priv,
 		 "trying to change cdclk frequency with cdclk not enabled\n"))
 		return;
 
-	mutex_lock(&dev_priv->pcu_lock);
 	ret = sandybridge_pcode_write(dev_priv,
 				      BDW_PCODE_DISPLAY_FREQ_CHANGE_REQ, 0x0);
-	mutex_unlock(&dev_priv->pcu_lock);
 	if (ret) {
 		DRM_ERROR("failed to inform pcode about cdclk change\n");
 		return;
@@ -768,10 +776,8 @@ static void bdw_set_cdclk(struct drm_i915_private *dev_priv,
 			LCPLL_CD_SOURCE_FCLK_DONE) == 0, 1))
 		DRM_ERROR("Switching back to LCPLL failed\n");
 
-	mutex_lock(&dev_priv->pcu_lock);
 	sandybridge_pcode_write(dev_priv, HSW_PCODE_DE_WRITE_FREQ_REQ,
 				cdclk_state->voltage_level);
-	mutex_unlock(&dev_priv->pcu_lock);
 
 	I915_WRITE(CDCLK_FREQ, DIV_ROUND_CLOSEST(cdclk, 1000) - 1);
 
@@ -803,20 +809,14 @@ static int skl_calc_cdclk(int min_cdclk, int vco)
 
 static u8 skl_calc_voltage_level(int cdclk)
 {
-	switch (cdclk) {
-	default:
-	case 308571:
-	case 337500:
-		return 0;
-	case 450000:
-	case 432000:
-		return 1;
-	case 540000:
-		return 2;
-	case 617143:
-	case 675000:
+	if (cdclk > 540000)
 		return 3;
-	}
+	else if (cdclk > 450000)
+		return 2;
+	else if (cdclk > 337500)
+		return 1;
+	else
+		return 0;
 }
 
 static void skl_dpll0_update(struct drm_i915_private *dev_priv,
@@ -1010,12 +1010,10 @@ static void skl_set_cdclk(struct drm_i915_private *dev_priv,
 	 */
 	WARN_ON_ONCE(IS_SKYLAKE(dev_priv) && vco == 8640000);
 
-	mutex_lock(&dev_priv->pcu_lock);
 	ret = skl_pcode_request(dev_priv, SKL_PCODE_CDCLK_CONTROL,
 				SKL_CDCLK_PREPARE_FOR_CHANGE,
 				SKL_CDCLK_READY_FOR_CHANGE,
 				SKL_CDCLK_READY_FOR_CHANGE, 3);
-	mutex_unlock(&dev_priv->pcu_lock);
 	if (ret) {
 		DRM_ERROR("Failed to inform PCU about cdclk change (%d)\n",
 			  ret);
@@ -1079,10 +1077,8 @@ static void skl_set_cdclk(struct drm_i915_private *dev_priv,
 	POSTING_READ(CDCLK_CTL);
 
 	/* inform PCU of the change */
-	mutex_lock(&dev_priv->pcu_lock);
 	sandybridge_pcode_write(dev_priv, SKL_PCODE_CDCLK_CONTROL,
 				cdclk_state->voltage_level);
-	mutex_unlock(&dev_priv->pcu_lock);
 
 	intel_update_cdclk(dev_priv);
 }
@@ -1379,12 +1375,9 @@ static void bxt_set_cdclk(struct drm_i915_private *dev_priv,
 	 * requires us to wait up to 150usec, but that leads to timeouts;
 	 * the 2ms used here is based on experiment.
 	 */
-	mutex_lock(&dev_priv->pcu_lock);
 	ret = sandybridge_pcode_write_timeout(dev_priv,
 					      HSW_PCODE_DE_WRITE_FREQ_REQ,
 					      0x80000000, 150, 2);
-	mutex_unlock(&dev_priv->pcu_lock);
-
 	if (ret) {
 		DRM_ERROR("PCode CDCLK freq change notify failed (err %d, freq %d)\n",
 			  ret, cdclk);
@@ -1414,7 +1407,6 @@ static void bxt_set_cdclk(struct drm_i915_private *dev_priv,
 	if (pipe != INVALID_PIPE)
 		intel_wait_for_vblank(dev_priv, pipe);
 
-	mutex_lock(&dev_priv->pcu_lock);
 	/*
 	 * The timeout isn't specified, the 2ms used here is based on
 	 * experiment.
@@ -1424,8 +1416,6 @@ static void bxt_set_cdclk(struct drm_i915_private *dev_priv,
 	ret = sandybridge_pcode_write_timeout(dev_priv,
 					      HSW_PCODE_DE_WRITE_FREQ_REQ,
 					      cdclk_state->voltage_level, 150, 2);
-	mutex_unlock(&dev_priv->pcu_lock);
-
 	if (ret) {
 		DRM_ERROR("PCode CDCLK freq set failed, (err %d, freq %d)\n",
 			  ret, cdclk);
@@ -1535,15 +1525,12 @@ static int cnl_calc_cdclk(int min_cdclk)
 
 static u8 cnl_calc_voltage_level(int cdclk)
 {
-	switch (cdclk) {
-	default:
-	case 168000:
-		return 0;
-	case 336000:
-		return 1;
-	case 528000:
+	if (cdclk > 336000)
 		return 2;
-	}
+	else if (cdclk > 168000)
+		return 1;
+	else
+		return 0;
 }
 
 static void cnl_cdclk_pll_update(struct drm_i915_private *dev_priv,
@@ -1648,12 +1635,10 @@ static void cnl_set_cdclk(struct drm_i915_private *dev_priv,
 	u32 val, divider;
 	int ret;
 
-	mutex_lock(&dev_priv->pcu_lock);
 	ret = skl_pcode_request(dev_priv, SKL_PCODE_CDCLK_CONTROL,
 				SKL_CDCLK_PREPARE_FOR_CHANGE,
 				SKL_CDCLK_READY_FOR_CHANGE,
 				SKL_CDCLK_READY_FOR_CHANGE, 3);
-	mutex_unlock(&dev_priv->pcu_lock);
 	if (ret) {
 		DRM_ERROR("Failed to inform PCU about cdclk change (%d)\n",
 			  ret);
@@ -1692,10 +1677,8 @@ static void cnl_set_cdclk(struct drm_i915_private *dev_priv,
 		intel_wait_for_vblank(dev_priv, pipe);
 
 	/* inform PCU of the change */
-	mutex_lock(&dev_priv->pcu_lock);
 	sandybridge_pcode_write(dev_priv, SKL_PCODE_CDCLK_CONTROL,
 				cdclk_state->voltage_level);
-	mutex_unlock(&dev_priv->pcu_lock);
 
 	intel_update_cdclk(dev_priv);
 
@@ -1834,12 +1817,10 @@ static void icl_set_cdclk(struct drm_i915_private *dev_priv,
 	unsigned int vco = cdclk_state->vco;
 	int ret;
 
-	mutex_lock(&dev_priv->pcu_lock);
 	ret = skl_pcode_request(dev_priv, SKL_PCODE_CDCLK_CONTROL,
 				SKL_CDCLK_PREPARE_FOR_CHANGE,
 				SKL_CDCLK_READY_FOR_CHANGE,
 				SKL_CDCLK_READY_FOR_CHANGE, 3);
-	mutex_unlock(&dev_priv->pcu_lock);
 	if (ret) {
 		DRM_ERROR("Failed to inform PCU about cdclk change (%d)\n",
 			  ret);
@@ -1861,10 +1842,8 @@ static void icl_set_cdclk(struct drm_i915_private *dev_priv,
 	I915_WRITE(CDCLK_CTL, ICL_CDCLK_CD2X_PIPE_NONE |
 			      skl_cdclk_decimal(cdclk));
 
-	mutex_lock(&dev_priv->pcu_lock);
 	sandybridge_pcode_write(dev_priv, SKL_PCODE_CDCLK_CONTROL,
 				cdclk_state->voltage_level);
-	mutex_unlock(&dev_priv->pcu_lock);
 
 	intel_update_cdclk(dev_priv);
 
@@ -1877,21 +1856,12 @@ static void icl_set_cdclk(struct drm_i915_private *dev_priv,
 
 static u8 icl_calc_voltage_level(int cdclk)
 {
-	switch (cdclk) {
-	case 50000:
-	case 307200:
-	case 312000:
-		return 0;
-	case 556800:
-	case 552000:
-		return 1;
-	default:
-		MISSING_CASE(cdclk);
-		/* fall through */
-	case 652800:
-	case 648000:
+	if (cdclk > 556800)
 		return 2;
-	}
+	else if (cdclk > 312000)
+		return 1;
+	else
+		return 0;
 }
 
 static void icl_get_cdclk(struct drm_i915_private *dev_priv,
@@ -2277,6 +2247,15 @@ int intel_crtc_compute_min_cdclk(const struct intel_crtc_state *crtc_state)
 	    IS_VALLEYVIEW(dev_priv))
 		min_cdclk = max(320000, min_cdclk);
 
+	/*
+	 * On Geminilake once the CDCLK gets as low as 79200
+	 * picture gets unstable, despite that values are
+	 * correct for DSI PLL and DE PLL.
+	 */
+	if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_DSI) &&
+	    IS_GEMINILAKE(dev_priv))
+		min_cdclk = max(158400, min_cdclk);
+
 	if (min_cdclk > dev_priv->max_cdclk_freq) {
 		DRM_DEBUG_KMS("required cdclk (%d kHz) exceeds max (%d kHz)\n",
 			      min_cdclk, dev_priv->max_cdclk_freq);
@@ -2286,29 +2265,28 @@ int intel_crtc_compute_min_cdclk(const struct intel_crtc_state *crtc_state)
 	return min_cdclk;
 }
 
-static int intel_compute_min_cdclk(struct drm_atomic_state *state)
+static int intel_compute_min_cdclk(struct intel_atomic_state *state)
 {
-	struct intel_atomic_state *intel_state = to_intel_atomic_state(state);
-	struct drm_i915_private *dev_priv = to_i915(state->dev);
+	struct drm_i915_private *dev_priv = to_i915(state->base.dev);
 	struct intel_crtc *crtc;
 	struct intel_crtc_state *crtc_state;
 	int min_cdclk, i;
 	enum pipe pipe;
 
-	memcpy(intel_state->min_cdclk, dev_priv->min_cdclk,
-	       sizeof(intel_state->min_cdclk));
+	memcpy(state->min_cdclk, dev_priv->min_cdclk,
+	       sizeof(state->min_cdclk));
 
-	for_each_new_intel_crtc_in_state(intel_state, crtc, crtc_state, i) {
+	for_each_new_intel_crtc_in_state(state, crtc, crtc_state, i) {
 		min_cdclk = intel_crtc_compute_min_cdclk(crtc_state);
 		if (min_cdclk < 0)
 			return min_cdclk;
 
-		intel_state->min_cdclk[i] = min_cdclk;
+		state->min_cdclk[i] = min_cdclk;
 	}
 
-	min_cdclk = intel_state->cdclk.force_min_cdclk;
+	min_cdclk = state->cdclk.force_min_cdclk;
 	for_each_pipe(dev_priv, pipe)
-		min_cdclk = max(intel_state->min_cdclk[pipe], min_cdclk);
+		min_cdclk = max(state->min_cdclk[pipe], min_cdclk);
 
 	return min_cdclk;
 }
@@ -2350,10 +2328,9 @@ static u8 cnl_compute_min_voltage_level(struct intel_atomic_state *state)
 	return min_voltage_level;
 }
 
-static int vlv_modeset_calc_cdclk(struct drm_atomic_state *state)
+static int vlv_modeset_calc_cdclk(struct intel_atomic_state *state)
 {
-	struct drm_i915_private *dev_priv = to_i915(state->dev);
-	struct intel_atomic_state *intel_state = to_intel_atomic_state(state);
+	struct drm_i915_private *dev_priv = to_i915(state->base.dev);
 	int min_cdclk, cdclk;
 
 	min_cdclk = intel_compute_min_cdclk(state);
@@ -2362,28 +2339,25 @@ static int vlv_modeset_calc_cdclk(struct drm_atomic_state *state)
 
 	cdclk = vlv_calc_cdclk(dev_priv, min_cdclk);
 
-	intel_state->cdclk.logical.cdclk = cdclk;
-	intel_state->cdclk.logical.voltage_level =
+	state->cdclk.logical.cdclk = cdclk;
+	state->cdclk.logical.voltage_level =
 		vlv_calc_voltage_level(dev_priv, cdclk);
 
-	if (!intel_state->active_crtcs) {
-		cdclk = vlv_calc_cdclk(dev_priv,
-				       intel_state->cdclk.force_min_cdclk);
+	if (!state->active_crtcs) {
+		cdclk = vlv_calc_cdclk(dev_priv, state->cdclk.force_min_cdclk);
 
-		intel_state->cdclk.actual.cdclk = cdclk;
-		intel_state->cdclk.actual.voltage_level =
+		state->cdclk.actual.cdclk = cdclk;
+		state->cdclk.actual.voltage_level =
 			vlv_calc_voltage_level(dev_priv, cdclk);
 	} else {
-		intel_state->cdclk.actual =
-			intel_state->cdclk.logical;
+		state->cdclk.actual = state->cdclk.logical;
 	}
 
 	return 0;
 }
 
-static int bdw_modeset_calc_cdclk(struct drm_atomic_state *state)
+static int bdw_modeset_calc_cdclk(struct intel_atomic_state *state)
 {
-	struct intel_atomic_state *intel_state = to_intel_atomic_state(state);
 	int min_cdclk, cdclk;
 
 	min_cdclk = intel_compute_min_cdclk(state);
@@ -2396,36 +2370,35 @@ static int bdw_modeset_calc_cdclk(struct drm_atomic_state *state)
 	 */
 	cdclk = bdw_calc_cdclk(min_cdclk);
 
-	intel_state->cdclk.logical.cdclk = cdclk;
-	intel_state->cdclk.logical.voltage_level =
+	state->cdclk.logical.cdclk = cdclk;
+	state->cdclk.logical.voltage_level =
 		bdw_calc_voltage_level(cdclk);
 
-	if (!intel_state->active_crtcs) {
-		cdclk = bdw_calc_cdclk(intel_state->cdclk.force_min_cdclk);
+	if (!state->active_crtcs) {
+		cdclk = bdw_calc_cdclk(state->cdclk.force_min_cdclk);
 
-		intel_state->cdclk.actual.cdclk = cdclk;
-		intel_state->cdclk.actual.voltage_level =
+		state->cdclk.actual.cdclk = cdclk;
+		state->cdclk.actual.voltage_level =
 			bdw_calc_voltage_level(cdclk);
 	} else {
-		intel_state->cdclk.actual =
-			intel_state->cdclk.logical;
+		state->cdclk.actual = state->cdclk.logical;
 	}
 
 	return 0;
 }
 
-static int skl_dpll0_vco(struct intel_atomic_state *intel_state)
+static int skl_dpll0_vco(struct intel_atomic_state *state)
 {
-	struct drm_i915_private *dev_priv = to_i915(intel_state->base.dev);
+	struct drm_i915_private *dev_priv = to_i915(state->base.dev);
 	struct intel_crtc *crtc;
 	struct intel_crtc_state *crtc_state;
 	int vco, i;
 
-	vco = intel_state->cdclk.logical.vco;
+	vco = state->cdclk.logical.vco;
 	if (!vco)
 		vco = dev_priv->skl_preferred_vco_freq;
 
-	for_each_new_intel_crtc_in_state(intel_state, crtc, crtc_state, i) {
+	for_each_new_intel_crtc_in_state(state, crtc, crtc_state, i) {
 		if (!crtc_state->base.enable)
 			continue;
 
@@ -2450,16 +2423,15 @@ static int skl_dpll0_vco(struct intel_atomic_state *intel_state)
 	return vco;
 }
 
-static int skl_modeset_calc_cdclk(struct drm_atomic_state *state)
+static int skl_modeset_calc_cdclk(struct intel_atomic_state *state)
 {
-	struct intel_atomic_state *intel_state = to_intel_atomic_state(state);
 	int min_cdclk, cdclk, vco;
 
 	min_cdclk = intel_compute_min_cdclk(state);
 	if (min_cdclk < 0)
 		return min_cdclk;
 
-	vco = skl_dpll0_vco(intel_state);
+	vco = skl_dpll0_vco(state);
 
 	/*
 	 * FIXME should also account for plane ratio
@@ -2467,30 +2439,28 @@ static int skl_modeset_calc_cdclk(struct drm_atomic_state *state)
 	 */
 	cdclk = skl_calc_cdclk(min_cdclk, vco);
 
-	intel_state->cdclk.logical.vco = vco;
-	intel_state->cdclk.logical.cdclk = cdclk;
-	intel_state->cdclk.logical.voltage_level =
+	state->cdclk.logical.vco = vco;
+	state->cdclk.logical.cdclk = cdclk;
+	state->cdclk.logical.voltage_level =
 		skl_calc_voltage_level(cdclk);
 
-	if (!intel_state->active_crtcs) {
-		cdclk = skl_calc_cdclk(intel_state->cdclk.force_min_cdclk, vco);
+	if (!state->active_crtcs) {
+		cdclk = skl_calc_cdclk(state->cdclk.force_min_cdclk, vco);
 
-		intel_state->cdclk.actual.vco = vco;
-		intel_state->cdclk.actual.cdclk = cdclk;
-		intel_state->cdclk.actual.voltage_level =
+		state->cdclk.actual.vco = vco;
+		state->cdclk.actual.cdclk = cdclk;
+		state->cdclk.actual.voltage_level =
 			skl_calc_voltage_level(cdclk);
 	} else {
-		intel_state->cdclk.actual =
-			intel_state->cdclk.logical;
+		state->cdclk.actual = state->cdclk.logical;
 	}
 
 	return 0;
 }
 
-static int bxt_modeset_calc_cdclk(struct drm_atomic_state *state)
+static int bxt_modeset_calc_cdclk(struct intel_atomic_state *state)
 {
-	struct drm_i915_private *dev_priv = to_i915(state->dev);
-	struct intel_atomic_state *intel_state = to_intel_atomic_state(state);
+	struct drm_i915_private *dev_priv = to_i915(state->base.dev);
 	int min_cdclk, cdclk, vco;
 
 	min_cdclk = intel_compute_min_cdclk(state);
@@ -2505,36 +2475,34 @@ static int bxt_modeset_calc_cdclk(struct drm_atomic_state *state)
 		vco = bxt_de_pll_vco(dev_priv, cdclk);
 	}
 
-	intel_state->cdclk.logical.vco = vco;
-	intel_state->cdclk.logical.cdclk = cdclk;
-	intel_state->cdclk.logical.voltage_level =
+	state->cdclk.logical.vco = vco;
+	state->cdclk.logical.cdclk = cdclk;
+	state->cdclk.logical.voltage_level =
 		bxt_calc_voltage_level(cdclk);
 
-	if (!intel_state->active_crtcs) {
+	if (!state->active_crtcs) {
 		if (IS_GEMINILAKE(dev_priv)) {
-			cdclk = glk_calc_cdclk(intel_state->cdclk.force_min_cdclk);
+			cdclk = glk_calc_cdclk(state->cdclk.force_min_cdclk);
 			vco = glk_de_pll_vco(dev_priv, cdclk);
 		} else {
-			cdclk = bxt_calc_cdclk(intel_state->cdclk.force_min_cdclk);
+			cdclk = bxt_calc_cdclk(state->cdclk.force_min_cdclk);
 			vco = bxt_de_pll_vco(dev_priv, cdclk);
 		}
 
-		intel_state->cdclk.actual.vco = vco;
-		intel_state->cdclk.actual.cdclk = cdclk;
-		intel_state->cdclk.actual.voltage_level =
+		state->cdclk.actual.vco = vco;
+		state->cdclk.actual.cdclk = cdclk;
+		state->cdclk.actual.voltage_level =
 			bxt_calc_voltage_level(cdclk);
 	} else {
-		intel_state->cdclk.actual =
-			intel_state->cdclk.logical;
+		state->cdclk.actual = state->cdclk.logical;
 	}
 
 	return 0;
 }
 
-static int cnl_modeset_calc_cdclk(struct drm_atomic_state *state)
+static int cnl_modeset_calc_cdclk(struct intel_atomic_state *state)
 {
-	struct drm_i915_private *dev_priv = to_i915(state->dev);
-	struct intel_atomic_state *intel_state = to_intel_atomic_state(state);
+	struct drm_i915_private *dev_priv = to_i915(state->base.dev);
 	int min_cdclk, cdclk, vco;
 
 	min_cdclk = intel_compute_min_cdclk(state);
@@ -2544,33 +2512,31 @@ static int cnl_modeset_calc_cdclk(struct drm_atomic_state *state)
 	cdclk = cnl_calc_cdclk(min_cdclk);
 	vco = cnl_cdclk_pll_vco(dev_priv, cdclk);
 
-	intel_state->cdclk.logical.vco = vco;
-	intel_state->cdclk.logical.cdclk = cdclk;
-	intel_state->cdclk.logical.voltage_level =
+	state->cdclk.logical.vco = vco;
+	state->cdclk.logical.cdclk = cdclk;
+	state->cdclk.logical.voltage_level =
 		max(cnl_calc_voltage_level(cdclk),
-		    cnl_compute_min_voltage_level(intel_state));
+		    cnl_compute_min_voltage_level(state));
 
-	if (!intel_state->active_crtcs) {
-		cdclk = cnl_calc_cdclk(intel_state->cdclk.force_min_cdclk);
+	if (!state->active_crtcs) {
+		cdclk = cnl_calc_cdclk(state->cdclk.force_min_cdclk);
 		vco = cnl_cdclk_pll_vco(dev_priv, cdclk);
 
-		intel_state->cdclk.actual.vco = vco;
-		intel_state->cdclk.actual.cdclk = cdclk;
-		intel_state->cdclk.actual.voltage_level =
+		state->cdclk.actual.vco = vco;
+		state->cdclk.actual.cdclk = cdclk;
+		state->cdclk.actual.voltage_level =
 			cnl_calc_voltage_level(cdclk);
 	} else {
-		intel_state->cdclk.actual =
-			intel_state->cdclk.logical;
+		state->cdclk.actual = state->cdclk.logical;
 	}
 
 	return 0;
 }
 
-static int icl_modeset_calc_cdclk(struct drm_atomic_state *state)
+static int icl_modeset_calc_cdclk(struct intel_atomic_state *state)
 {
-	struct drm_i915_private *dev_priv = to_i915(state->dev);
-	struct intel_atomic_state *intel_state = to_intel_atomic_state(state);
-	unsigned int ref = intel_state->cdclk.logical.ref;
+	struct drm_i915_private *dev_priv = to_i915(state->base.dev);
+	unsigned int ref = state->cdclk.logical.ref;
 	int min_cdclk, cdclk, vco;
 
 	min_cdclk = intel_compute_min_cdclk(state);
@@ -2580,22 +2546,22 @@ static int icl_modeset_calc_cdclk(struct drm_atomic_state *state)
 	cdclk = icl_calc_cdclk(min_cdclk, ref);
 	vco = icl_calc_cdclk_pll_vco(dev_priv, cdclk);
 
-	intel_state->cdclk.logical.vco = vco;
-	intel_state->cdclk.logical.cdclk = cdclk;
-	intel_state->cdclk.logical.voltage_level =
+	state->cdclk.logical.vco = vco;
+	state->cdclk.logical.cdclk = cdclk;
+	state->cdclk.logical.voltage_level =
 		max(icl_calc_voltage_level(cdclk),
-		    cnl_compute_min_voltage_level(intel_state));
+		    cnl_compute_min_voltage_level(state));
 
-	if (!intel_state->active_crtcs) {
-		cdclk = icl_calc_cdclk(intel_state->cdclk.force_min_cdclk, ref);
+	if (!state->active_crtcs) {
+		cdclk = icl_calc_cdclk(state->cdclk.force_min_cdclk, ref);
 		vco = icl_calc_cdclk_pll_vco(dev_priv, cdclk);
 
-		intel_state->cdclk.actual.vco = vco;
-		intel_state->cdclk.actual.cdclk = cdclk;
-		intel_state->cdclk.actual.voltage_level =
+		state->cdclk.actual.vco = vco;
+		state->cdclk.actual.cdclk = cdclk;
+		state->cdclk.actual.voltage_level =
 			icl_calc_voltage_level(cdclk);
 	} else {
-		intel_state->cdclk.actual = intel_state->cdclk.logical;
+		state->cdclk.actual = state->cdclk.logical;
 	}
 
 	return 0;
@@ -2817,28 +2783,22 @@ void intel_init_cdclk_hooks(struct drm_i915_private *dev_priv)
 		dev_priv->display.modeset_calc_cdclk = icl_modeset_calc_cdclk;
 	} else if (IS_CANNONLAKE(dev_priv)) {
 		dev_priv->display.set_cdclk = cnl_set_cdclk;
-		dev_priv->display.modeset_calc_cdclk =
-			cnl_modeset_calc_cdclk;
+		dev_priv->display.modeset_calc_cdclk = cnl_modeset_calc_cdclk;
 	} else if (IS_GEN9_LP(dev_priv)) {
 		dev_priv->display.set_cdclk = bxt_set_cdclk;
-		dev_priv->display.modeset_calc_cdclk =
-			bxt_modeset_calc_cdclk;
+		dev_priv->display.modeset_calc_cdclk = bxt_modeset_calc_cdclk;
 	} else if (IS_GEN9_BC(dev_priv)) {
 		dev_priv->display.set_cdclk = skl_set_cdclk;
-		dev_priv->display.modeset_calc_cdclk =
-			skl_modeset_calc_cdclk;
+		dev_priv->display.modeset_calc_cdclk = skl_modeset_calc_cdclk;
 	} else if (IS_BROADWELL(dev_priv)) {
 		dev_priv->display.set_cdclk = bdw_set_cdclk;
-		dev_priv->display.modeset_calc_cdclk =
-			bdw_modeset_calc_cdclk;
+		dev_priv->display.modeset_calc_cdclk = bdw_modeset_calc_cdclk;
 	} else if (IS_CHERRYVIEW(dev_priv)) {
 		dev_priv->display.set_cdclk = chv_set_cdclk;
-		dev_priv->display.modeset_calc_cdclk =
-			vlv_modeset_calc_cdclk;
+		dev_priv->display.modeset_calc_cdclk = vlv_modeset_calc_cdclk;
 	} else if (IS_VALLEYVIEW(dev_priv)) {
 		dev_priv->display.set_cdclk = vlv_set_cdclk;
-		dev_priv->display.modeset_calc_cdclk =
-			vlv_modeset_calc_cdclk;
+		dev_priv->display.modeset_calc_cdclk = vlv_modeset_calc_cdclk;
 	}
 
 	if (INTEL_GEN(dev_priv) >= 11)
diff --git a/drivers/gpu/drm/i915/intel_cdclk.h b/drivers/gpu/drm/i915/display/intel_cdclk.h
index 4d6f7f5f8930..4d6f7f5f8930 100644
--- a/drivers/gpu/drm/i915/intel_cdclk.h
+++ b/drivers/gpu/drm/i915/display/intel_cdclk.h
diff --git a/drivers/gpu/drm/i915/intel_color.c b/drivers/gpu/drm/i915/display/intel_color.c
index 9093daabc290..23a84dd7989f 100644
--- a/drivers/gpu/drm/i915/intel_color.c
+++ b/drivers/gpu/drm/i915/display/intel_color.c
@@ -41,6 +41,7 @@
 #define CTM_COEFF_ABS(coeff)		((coeff) & (CTM_COEFF_SIGN - 1))
 
 #define LEGACY_LUT_LENGTH		256
+
 /*
  * Extract the CSC coefficient from a CTM coefficient (in U32.32 fixed point
  * format). This macro takes the coefficient we want transformed and the
@@ -607,7 +608,7 @@ static void bdw_load_lut_10(struct intel_crtc *crtc,
 	I915_WRITE(PREC_PAL_INDEX(pipe), 0);
 }
 
-static void ivb_load_lut_10_max(struct intel_crtc *crtc)
+static void ivb_load_lut_ext_max(struct intel_crtc *crtc)
 {
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 	enum pipe pipe = crtc->pipe;
@@ -640,7 +641,7 @@ static void ivb_load_luts(const struct intel_crtc_state *crtc_state)
 	} else if (crtc_state->gamma_mode == GAMMA_MODE_MODE_SPLIT) {
 		ivb_load_lut_10(crtc, degamma_lut, PAL_PREC_SPLIT_MODE |
 				PAL_PREC_INDEX_VALUE(0));
-		ivb_load_lut_10_max(crtc);
+		ivb_load_lut_ext_max(crtc);
 		ivb_load_lut_10(crtc, gamma_lut, PAL_PREC_SPLIT_MODE |
 				PAL_PREC_INDEX_VALUE(512));
 	} else {
@@ -648,7 +649,7 @@ static void ivb_load_luts(const struct intel_crtc_state *crtc_state)
 
 		ivb_load_lut_10(crtc, blob,
 				PAL_PREC_INDEX_VALUE(0));
-		ivb_load_lut_10_max(crtc);
+		ivb_load_lut_ext_max(crtc);
 	}
 }
 
@@ -663,7 +664,7 @@ static void bdw_load_luts(const struct intel_crtc_state *crtc_state)
 	} else if (crtc_state->gamma_mode == GAMMA_MODE_MODE_SPLIT) {
 		bdw_load_lut_10(crtc, degamma_lut, PAL_PREC_SPLIT_MODE |
 				PAL_PREC_INDEX_VALUE(0));
-		ivb_load_lut_10_max(crtc);
+		ivb_load_lut_ext_max(crtc);
 		bdw_load_lut_10(crtc, gamma_lut, PAL_PREC_SPLIT_MODE |
 				PAL_PREC_INDEX_VALUE(512));
 	} else {
@@ -671,7 +672,7 @@ static void bdw_load_luts(const struct intel_crtc_state *crtc_state)
 
 		bdw_load_lut_10(crtc, blob,
 				PAL_PREC_INDEX_VALUE(0));
-		ivb_load_lut_10_max(crtc);
+		ivb_load_lut_ext_max(crtc);
 	}
 }
 
@@ -763,10 +764,120 @@ static void glk_load_luts(const struct intel_crtc_state *crtc_state)
 		i9xx_load_luts(crtc_state);
 	} else {
 		bdw_load_lut_10(crtc, gamma_lut, PAL_PREC_INDEX_VALUE(0));
-		ivb_load_lut_10_max(crtc);
+		ivb_load_lut_ext_max(crtc);
+	}
+}
+
+/* ilk+ "12.4" interpolated format (high 10 bits) */
+static u32 ilk_lut_12p4_udw(const struct drm_color_lut *color)
+{
+	return (color->red >> 6) << 20 | (color->green >> 6) << 10 |
+		(color->blue >> 6);
+}
+
+/* ilk+ "12.4" interpolated format (low 6 bits) */
+static u32 ilk_lut_12p4_ldw(const struct drm_color_lut *color)
+{
+	return (color->red & 0x3f) << 24 | (color->green & 0x3f) << 14 |
+		(color->blue & 0x3f) << 4;
+}
+
+static void
+icl_load_gcmax(const struct intel_crtc_state *crtc_state,
+	       const struct drm_color_lut *color)
+{
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
+	enum pipe pipe = crtc->pipe;
+
+	/* Fixme: LUT entries are 16 bit only, so we can prog 0xFFFF max */
+	I915_WRITE(PREC_PAL_GC_MAX(pipe, 0), color->red);
+	I915_WRITE(PREC_PAL_GC_MAX(pipe, 1), color->green);
+	I915_WRITE(PREC_PAL_GC_MAX(pipe, 2), color->blue);
+}
+
+static void
+icl_program_gamma_superfine_segment(const struct intel_crtc_state *crtc_state)
+{
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
+	const struct drm_property_blob *blob = crtc_state->base.gamma_lut;
+	const struct drm_color_lut *lut = blob->data;
+	enum pipe pipe = crtc->pipe;
+	u32 i;
+
+	/*
+	 * Every entry in the multi-segment LUT is corresponding to a superfine
+	 * segment step which is 1/(8 * 128 * 256).
+	 *
+	 * Superfine segment has 9 entries, corresponding to values
+	 * 0, 1/(8 * 128 * 256), 2/(8 * 128 * 256) .... 8/(8 * 128 * 256).
+	 */
+	I915_WRITE(PREC_PAL_MULTI_SEG_INDEX(pipe), PAL_PREC_AUTO_INCREMENT);
+
+	for (i = 0; i < 9; i++) {
+		const struct drm_color_lut *entry = &lut[i];
+
+		I915_WRITE(PREC_PAL_MULTI_SEG_DATA(pipe),
+			   ilk_lut_12p4_ldw(entry));
+		I915_WRITE(PREC_PAL_MULTI_SEG_DATA(pipe),
+			   ilk_lut_12p4_udw(entry));
 	}
 }
 
+static void
+icl_program_gamma_multi_segment(const struct intel_crtc_state *crtc_state)
+{
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
+	const struct drm_property_blob *blob = crtc_state->base.gamma_lut;
+	const struct drm_color_lut *lut = blob->data;
+	const struct drm_color_lut *entry;
+	enum pipe pipe = crtc->pipe;
+	u32 i;
+
+	/*
+	 *
+	 * Program Fine segment (let's call it seg2)...
+	 *
+	 * Fine segment's step is 1/(128 * 256) ie 1/(128 * 256),  2/(128*256)
+	 * ... 256/(128*256). So in order to program fine segment of LUT we
+	 * need to pick every 8'th entry in LUT, and program 256 indexes.
+	 *
+	 * PAL_PREC_INDEX[0] and PAL_PREC_INDEX[1] map to seg2[1],
+	 * with seg2[0] being unused by the hardware.
+	 */
+	I915_WRITE(PREC_PAL_INDEX(pipe), PAL_PREC_AUTO_INCREMENT);
+	for (i = 1; i < 257; i++) {
+		entry = &lut[i * 8];
+		I915_WRITE(PREC_PAL_DATA(pipe), ilk_lut_12p4_ldw(entry));
+		I915_WRITE(PREC_PAL_DATA(pipe), ilk_lut_12p4_udw(entry));
+	}
+
+	/*
+	 * Program Coarse segment (let's call it seg3)...
+	 *
+	 * Coarse segment's starts from index 0 and it's step is 1/256 ie 0,
+	 * 1/256, 2/256 ...256/256. As per the description of each entry in LUT
+	 * above, we need to pick every (8 * 128)th entry in LUT, and
+	 * program 256 of those.
+	 *
+	 * Spec is not very clear about if entries seg3[0] and seg3[1] are
+	 * being used or not, but we still need to program these to advance
+	 * the index.
+	 */
+	for (i = 0; i < 256; i++) {
+		entry = &lut[i * 8 * 128];
+		I915_WRITE(PREC_PAL_DATA(pipe), ilk_lut_12p4_ldw(entry));
+		I915_WRITE(PREC_PAL_DATA(pipe), ilk_lut_12p4_udw(entry));
+	}
+
+	/* The last entry in the LUT is to be programmed in GCMAX */
+	entry = &lut[256 * 8 * 128];
+	icl_load_gcmax(crtc_state, entry);
+	ivb_load_lut_ext_max(crtc);
+}
+
 static void icl_load_luts(const struct intel_crtc_state *crtc_state)
 {
 	const struct drm_property_blob *gamma_lut = crtc_state->base.gamma_lut;
@@ -775,65 +886,94 @@ static void icl_load_luts(const struct intel_crtc_state *crtc_state)
 	if (crtc_state->base.degamma_lut)
 		glk_load_degamma_lut(crtc_state);
 
-	if ((crtc_state->gamma_mode & GAMMA_MODE_MODE_MASK) ==
-	    GAMMA_MODE_MODE_8BIT) {
+	switch (crtc_state->gamma_mode & GAMMA_MODE_MODE_MASK) {
+	case GAMMA_MODE_MODE_8BIT:
 		i9xx_load_luts(crtc_state);
-	} else {
+		break;
+
+	case GAMMA_MODE_MODE_12BIT_MULTI_SEGMENTED:
+		icl_program_gamma_superfine_segment(crtc_state);
+		icl_program_gamma_multi_segment(crtc_state);
+		break;
+
+	default:
 		bdw_load_lut_10(crtc, gamma_lut, PAL_PREC_INDEX_VALUE(0));
-		ivb_load_lut_10_max(crtc);
+		ivb_load_lut_ext_max(crtc);
 	}
 }
 
-static void cherryview_load_luts(const struct intel_crtc_state *crtc_state)
+static u32 chv_cgm_degamma_ldw(const struct drm_color_lut *color)
+{
+	return drm_color_lut_extract(color->green, 14) << 16 |
+		drm_color_lut_extract(color->blue, 14);
+}
+
+static u32 chv_cgm_degamma_udw(const struct drm_color_lut *color)
+{
+	return drm_color_lut_extract(color->red, 14);
+}
+
+static void chv_load_cgm_degamma(struct intel_crtc *crtc,
+				 const struct drm_property_blob *blob)
 {
-	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
-	const struct drm_property_blob *gamma_lut = crtc_state->base.gamma_lut;
-	const struct drm_property_blob *degamma_lut = crtc_state->base.degamma_lut;
+	const struct drm_color_lut *lut = blob->data;
+	int i, lut_size = drm_color_lut_size(blob);
 	enum pipe pipe = crtc->pipe;
 
-	cherryview_load_csc_matrix(crtc_state);
-
-	if (crtc_state_is_legacy_gamma(crtc_state)) {
-		i9xx_load_luts(crtc_state);
-		return;
+	for (i = 0; i < lut_size; i++) {
+		I915_WRITE(CGM_PIPE_DEGAMMA(pipe, i, 0),
+			   chv_cgm_degamma_ldw(&lut[i]));
+		I915_WRITE(CGM_PIPE_DEGAMMA(pipe, i, 1),
+			   chv_cgm_degamma_udw(&lut[i]));
 	}
+}
 
-	if (degamma_lut) {
-		const struct drm_color_lut *lut = degamma_lut->data;
-		int i, lut_size = INTEL_INFO(dev_priv)->color.degamma_lut_size;
+static u32 chv_cgm_gamma_ldw(const struct drm_color_lut *color)
+{
+	return drm_color_lut_extract(color->green, 10) << 16 |
+		drm_color_lut_extract(color->blue, 10);
+}
 
-		for (i = 0; i < lut_size; i++) {
-			u32 word0, word1;
+static u32 chv_cgm_gamma_udw(const struct drm_color_lut *color)
+{
+	return drm_color_lut_extract(color->red, 10);
+}
 
-			/* Write LUT in U0.14 format. */
-			word0 =
-			(drm_color_lut_extract(lut[i].green, 14) << 16) |
-			drm_color_lut_extract(lut[i].blue, 14);
-			word1 = drm_color_lut_extract(lut[i].red, 14);
+static void chv_load_cgm_gamma(struct intel_crtc *crtc,
+			       const struct drm_property_blob *blob)
+{
+	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
+	const struct drm_color_lut *lut = blob->data;
+	int i, lut_size = drm_color_lut_size(blob);
+	enum pipe pipe = crtc->pipe;
 
-			I915_WRITE(CGM_PIPE_DEGAMMA(pipe, i, 0), word0);
-			I915_WRITE(CGM_PIPE_DEGAMMA(pipe, i, 1), word1);
-		}
+	for (i = 0; i < lut_size; i++) {
+		I915_WRITE(CGM_PIPE_GAMMA(pipe, i, 0),
+			   chv_cgm_gamma_ldw(&lut[i]));
+		I915_WRITE(CGM_PIPE_GAMMA(pipe, i, 1),
+			   chv_cgm_gamma_udw(&lut[i]));
 	}
+}
 
-	if (gamma_lut) {
-		const struct drm_color_lut *lut = gamma_lut->data;
-		int i, lut_size = INTEL_INFO(dev_priv)->color.gamma_lut_size;
-
-		for (i = 0; i < lut_size; i++) {
-			u32 word0, word1;
+static void chv_load_luts(const struct intel_crtc_state *crtc_state)
+{
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+	const struct drm_property_blob *gamma_lut = crtc_state->base.gamma_lut;
+	const struct drm_property_blob *degamma_lut = crtc_state->base.degamma_lut;
 
-			/* Write LUT in U0.10 format. */
-			word0 =
-			(drm_color_lut_extract(lut[i].green, 10) << 16) |
-			drm_color_lut_extract(lut[i].blue, 10);
-			word1 = drm_color_lut_extract(lut[i].red, 10);
+	cherryview_load_csc_matrix(crtc_state);
 
-			I915_WRITE(CGM_PIPE_GAMMA(pipe, i, 0), word0);
-			I915_WRITE(CGM_PIPE_GAMMA(pipe, i, 1), word1);
-		}
+	if (crtc_state_is_legacy_gamma(crtc_state)) {
+		i9xx_load_luts(crtc_state);
+		return;
 	}
+
+	if (degamma_lut)
+		chv_load_cgm_degamma(crtc, degamma_lut);
+
+	if (gamma_lut)
+		chv_load_cgm_gamma(crtc, gamma_lut);
 }
 
 void intel_color_load_luts(const struct intel_crtc_state *crtc_state)
@@ -857,6 +997,14 @@ int intel_color_check(struct intel_crtc_state *crtc_state)
 	return dev_priv->display.color_check(crtc_state);
 }
 
+void intel_color_get_config(struct intel_crtc_state *crtc_state)
+{
+	struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
+
+	if (dev_priv->display.read_luts)
+		dev_priv->display.read_luts(crtc_state);
+}
+
 static bool need_plane_update(struct intel_plane *plane,
 			      const struct intel_crtc_state *crtc_state)
 {
@@ -937,8 +1085,10 @@ static int check_luts(const struct intel_crtc_state *crtc_state)
 		return 0;
 
 	/* C8 relies on its palette being stored in the legacy LUT */
-	if (crtc_state->c8_planes)
+	if (crtc_state->c8_planes) {
+		DRM_DEBUG_KMS("C8 pixelformat requires the legacy LUT\n");
 		return -EINVAL;
+	}
 
 	degamma_length = INTEL_INFO(dev_priv)->color.degamma_lut_size;
 	gamma_length = INTEL_INFO(dev_priv)->color.gamma_lut_size;
@@ -1187,7 +1337,7 @@ static u32 icl_gamma_mode(const struct intel_crtc_state *crtc_state)
 	    crtc_state_is_legacy_gamma(crtc_state))
 		gamma_mode |= GAMMA_MODE_MODE_8BIT;
 	else
-		gamma_mode |= GAMMA_MODE_MODE_10BIT;
+		gamma_mode |= GAMMA_MODE_MODE_12BIT_MULTI_SEGMENTED;
 
 	return gamma_mode;
 }
@@ -1232,7 +1382,7 @@ void intel_color_init(struct intel_crtc *crtc)
 		if (IS_CHERRYVIEW(dev_priv)) {
 			dev_priv->display.color_check = chv_color_check;
 			dev_priv->display.color_commit = i9xx_color_commit;
-			dev_priv->display.load_luts = cherryview_load_luts;
+			dev_priv->display.load_luts = chv_load_luts;
 		} else if (INTEL_GEN(dev_priv) >= 4) {
 			dev_priv->display.color_check = i9xx_color_check;
 			dev_priv->display.color_commit = i9xx_color_commit;
diff --git a/drivers/gpu/drm/i915/intel_color.h b/drivers/gpu/drm/i915/display/intel_color.h
index b8a3ce609587..057e8ac63555 100644
--- a/drivers/gpu/drm/i915/intel_color.h
+++ b/drivers/gpu/drm/i915/display/intel_color.h
@@ -13,5 +13,6 @@ void intel_color_init(struct intel_crtc *crtc);
 int intel_color_check(struct intel_crtc_state *crtc_state);
 void intel_color_commit(const struct intel_crtc_state *crtc_state);
 void intel_color_load_luts(const struct intel_crtc_state *crtc_state);
+void intel_color_get_config(struct intel_crtc_state *crtc_state);
 
 #endif /* __INTEL_COLOR_H__ */
diff --git a/drivers/gpu/drm/i915/intel_combo_phy.c b/drivers/gpu/drm/i915/display/intel_combo_phy.c
index 2bf4359d7e41..841708da5a56 100644
--- a/drivers/gpu/drm/i915/intel_combo_phy.c
+++ b/drivers/gpu/drm/i915/display/intel_combo_phy.c
@@ -3,6 +3,7 @@
  * Copyright © 2018 Intel Corporation
  */
 
+#include "intel_combo_phy.h"
 #include "intel_drv.h"
 
 #define for_each_combo_port(__dev_priv, __port) \
@@ -147,7 +148,7 @@ static bool cnl_combo_phy_verify_state(struct drm_i915_private *dev_priv)
 	return ret;
 }
 
-void cnl_combo_phys_init(struct drm_i915_private *dev_priv)
+static void cnl_combo_phys_init(struct drm_i915_private *dev_priv)
 {
 	u32 val;
 
@@ -167,7 +168,7 @@ void cnl_combo_phys_init(struct drm_i915_private *dev_priv)
 	I915_WRITE(CNL_PORT_CL1CM_DW5, val);
 }
 
-void cnl_combo_phys_uninit(struct drm_i915_private *dev_priv)
+static void cnl_combo_phys_uninit(struct drm_i915_private *dev_priv)
 {
 	u32 val;
 
@@ -197,13 +198,69 @@ static bool icl_combo_phy_verify_state(struct drm_i915_private *dev_priv,
 
 	ret = cnl_verify_procmon_ref_values(dev_priv, port);
 
+	if (port == PORT_A)
+		ret &= check_phy_reg(dev_priv, port, ICL_PORT_COMP_DW8(port),
+				     IREFGEN, IREFGEN);
+
 	ret &= check_phy_reg(dev_priv, port, ICL_PORT_CL_DW5(port),
 			     CL_POWER_DOWN_ENABLE, CL_POWER_DOWN_ENABLE);
 
 	return ret;
 }
 
-void icl_combo_phys_init(struct drm_i915_private *dev_priv)
+void intel_combo_phy_power_up_lanes(struct drm_i915_private *dev_priv,
+				    enum port port, bool is_dsi,
+				    int lane_count, bool lane_reversal)
+{
+	u8 lane_mask;
+	u32 val;
+
+	if (is_dsi) {
+		WARN_ON(lane_reversal);
+
+		switch (lane_count) {
+		case 1:
+			lane_mask = PWR_DOWN_LN_3_1_0;
+			break;
+		case 2:
+			lane_mask = PWR_DOWN_LN_3_1;
+			break;
+		case 3:
+			lane_mask = PWR_DOWN_LN_3;
+			break;
+		default:
+			MISSING_CASE(lane_count);
+			/* fall-through */
+		case 4:
+			lane_mask = PWR_UP_ALL_LANES;
+			break;
+		}
+	} else {
+		switch (lane_count) {
+		case 1:
+			lane_mask = lane_reversal ? PWR_DOWN_LN_2_1_0 :
+						    PWR_DOWN_LN_3_2_1;
+			break;
+		case 2:
+			lane_mask = lane_reversal ? PWR_DOWN_LN_1_0 :
+						    PWR_DOWN_LN_3_2;
+			break;
+		default:
+			MISSING_CASE(lane_count);
+			/* fall-through */
+		case 4:
+			lane_mask = PWR_UP_ALL_LANES;
+			break;
+		}
+	}
+
+	val = I915_READ(ICL_PORT_CL_DW10(port));
+	val &= ~PWR_DOWN_LN_MASK;
+	val |= lane_mask << PWR_DOWN_LN_SHIFT;
+	I915_WRITE(ICL_PORT_CL_DW10(port), val);
+}
+
+static void icl_combo_phys_init(struct drm_i915_private *dev_priv)
 {
 	enum port port;
 
@@ -222,6 +279,12 @@ void icl_combo_phys_init(struct drm_i915_private *dev_priv)
 
 		cnl_set_procmon_ref_values(dev_priv, port);
 
+		if (port == PORT_A) {
+			val = I915_READ(ICL_PORT_COMP_DW8(port));
+			val |= IREFGEN;
+			I915_WRITE(ICL_PORT_COMP_DW8(port), val);
+		}
+
 		val = I915_READ(ICL_PORT_COMP_DW0(port));
 		val |= COMP_INIT;
 		I915_WRITE(ICL_PORT_COMP_DW0(port), val);
@@ -232,7 +295,7 @@ void icl_combo_phys_init(struct drm_i915_private *dev_priv)
 	}
 }
 
-void icl_combo_phys_uninit(struct drm_i915_private *dev_priv)
+static void icl_combo_phys_uninit(struct drm_i915_private *dev_priv)
 {
 	enum port port;
 
@@ -253,3 +316,19 @@ void icl_combo_phys_uninit(struct drm_i915_private *dev_priv)
 		I915_WRITE(ICL_PORT_COMP_DW0(port), val);
 	}
 }
+
+void intel_combo_phy_init(struct drm_i915_private *i915)
+{
+	if (INTEL_GEN(i915) >= 11)
+		icl_combo_phys_init(i915);
+	else if (IS_CANNONLAKE(i915))
+		cnl_combo_phys_init(i915);
+}
+
+void intel_combo_phy_uninit(struct drm_i915_private *i915)
+{
+	if (INTEL_GEN(i915) >= 11)
+		icl_combo_phys_uninit(i915);
+	else if (IS_CANNONLAKE(i915))
+		cnl_combo_phys_uninit(i915);
+}
diff --git a/drivers/gpu/drm/i915/display/intel_combo_phy.h b/drivers/gpu/drm/i915/display/intel_combo_phy.h
new file mode 100644
index 000000000000..e6e195a83b19
--- /dev/null
+++ b/drivers/gpu/drm/i915/display/intel_combo_phy.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef __INTEL_COMBO_PHY_H__
+#define __INTEL_COMBO_PHY_H__
+
+#include <linux/types.h>
+#include <drm/i915_drm.h>
+
+struct drm_i915_private;
+
+void intel_combo_phy_init(struct drm_i915_private *dev_priv);
+void intel_combo_phy_uninit(struct drm_i915_private *dev_priv);
+void intel_combo_phy_power_up_lanes(struct drm_i915_private *dev_priv,
+				    enum port port, bool is_dsi,
+				    int lane_count, bool lane_reversal);
+
+#endif /* __INTEL_COMBO_PHY_H__ */
diff --git a/drivers/gpu/drm/i915/intel_connector.c b/drivers/gpu/drm/i915/display/intel_connector.c
index 073b6c3ab7cc..41310f8e5a2a 100644
--- a/drivers/gpu/drm/i915/intel_connector.c
+++ b/drivers/gpu/drm/i915/display/intel_connector.c
@@ -29,11 +29,12 @@
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_edid.h>
 
+#include "display/intel_panel.h"
+
 #include "i915_drv.h"
 #include "intel_connector.h"
 #include "intel_drv.h"
 #include "intel_hdcp.h"
-#include "intel_panel.h"
 
 int intel_connector_init(struct intel_connector *connector)
 {
diff --git a/drivers/gpu/drm/i915/intel_connector.h b/drivers/gpu/drm/i915/display/intel_connector.h
index 93a7375c8196..93a7375c8196 100644
--- a/drivers/gpu/drm/i915/intel_connector.h
+++ b/drivers/gpu/drm/i915/display/intel_connector.h
diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/display/intel_crt.c
index b665c370111b..3fcf2f84bcce 100644
--- a/drivers/gpu/drm/i915/intel_crt.c
+++ b/drivers/gpu/drm/i915/display/intel_crt.c
@@ -39,6 +39,9 @@
 #include "intel_crt.h"
 #include "intel_ddi.h"
 #include "intel_drv.h"
+#include "intel_fifo_underrun.h"
+#include "intel_gmbus.h"
+#include "intel_hotplug.h"
 
 /* Here's the desired hotplug mode */
 #define ADPA_HOTPLUG_BITS (ADPA_CRT_HOTPLUG_PERIOD_128 |		\
@@ -640,6 +643,7 @@ intel_crt_load_detect(struct intel_crt *crt, u32 pipe)
 {
 	struct drm_device *dev = crt->base.base.dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct intel_uncore *uncore = &dev_priv->uncore;
 	u32 save_bclrpat;
 	u32 save_vtotal;
 	u32 vtotal, vactive;
@@ -660,9 +664,9 @@ intel_crt_load_detect(struct intel_crt *crt, u32 pipe)
 	pipeconf_reg = PIPECONF(pipe);
 	pipe_dsl_reg = PIPEDSL(pipe);
 
-	save_bclrpat = I915_READ(bclrpat_reg);
-	save_vtotal = I915_READ(vtotal_reg);
-	vblank = I915_READ(vblank_reg);
+	save_bclrpat = intel_uncore_read(uncore, bclrpat_reg);
+	save_vtotal = intel_uncore_read(uncore, vtotal_reg);
+	vblank = intel_uncore_read(uncore, vblank_reg);
 
 	vtotal = ((save_vtotal >> 16) & 0xfff) + 1;
 	vactive = (save_vtotal & 0x7ff) + 1;
@@ -671,21 +675,23 @@ intel_crt_load_detect(struct intel_crt *crt, u32 pipe)
 	vblank_end = ((vblank >> 16) & 0xfff) + 1;
 
 	/* Set the border color to purple. */
-	I915_WRITE(bclrpat_reg, 0x500050);
+	intel_uncore_write(uncore, bclrpat_reg, 0x500050);
 
 	if (!IS_GEN(dev_priv, 2)) {
-		u32 pipeconf = I915_READ(pipeconf_reg);
-		I915_WRITE(pipeconf_reg, pipeconf | PIPECONF_FORCE_BORDER);
-		POSTING_READ(pipeconf_reg);
+		u32 pipeconf = intel_uncore_read(uncore, pipeconf_reg);
+		intel_uncore_write(uncore,
+				   pipeconf_reg,
+				   pipeconf | PIPECONF_FORCE_BORDER);
+		intel_uncore_posting_read(uncore, pipeconf_reg);
 		/* Wait for next Vblank to substitue
 		 * border color for Color info */
 		intel_wait_for_vblank(dev_priv, pipe);
-		st00 = I915_READ8(_VGA_MSR_WRITE);
+		st00 = intel_uncore_read8(uncore, _VGA_MSR_WRITE);
 		status = ((st00 & (1 << 4)) != 0) ?
 			connector_status_connected :
 			connector_status_disconnected;
 
-		I915_WRITE(pipeconf_reg, pipeconf);
+		intel_uncore_write(uncore, pipeconf_reg, pipeconf);
 	} else {
 		bool restore_vblank = false;
 		int count, detect;
@@ -699,9 +705,10 @@ intel_crt_load_detect(struct intel_crt *crt, u32 pipe)
 			u32 vsync_start = (vsync & 0xffff) + 1;
 
 			vblank_start = vsync_start;
-			I915_WRITE(vblank_reg,
-				   (vblank_start - 1) |
-				   ((vblank_end - 1) << 16));
+			intel_uncore_write(uncore,
+					   vblank_reg,
+					   (vblank_start - 1) |
+					   ((vblank_end - 1) << 16));
 			restore_vblank = true;
 		}
 		/* sample in the vertical border, selecting the larger one */
@@ -713,9 +720,10 @@ intel_crt_load_detect(struct intel_crt *crt, u32 pipe)
 		/*
 		 * Wait for the border to be displayed
 		 */
-		while (I915_READ(pipe_dsl_reg) >= vactive)
+		while (intel_uncore_read(uncore, pipe_dsl_reg) >= vactive)
 			;
-		while ((dsl = I915_READ(pipe_dsl_reg)) <= vsample)
+		while ((dsl = intel_uncore_read(uncore, pipe_dsl_reg)) <=
+		       vsample)
 			;
 		/*
 		 * Watch ST00 for an entire scanline
@@ -725,14 +733,14 @@ intel_crt_load_detect(struct intel_crt *crt, u32 pipe)
 		do {
 			count++;
 			/* Read the ST00 VGA status register */
-			st00 = I915_READ8(_VGA_MSR_WRITE);
+			st00 = intel_uncore_read8(uncore, _VGA_MSR_WRITE);
 			if (st00 & (1 << 4))
 				detect++;
-		} while ((I915_READ(pipe_dsl_reg) == dsl));
+		} while ((intel_uncore_read(uncore, pipe_dsl_reg) == dsl));
 
 		/* restore vblank if necessary */
 		if (restore_vblank)
-			I915_WRITE(vblank_reg, vblank);
+			intel_uncore_write(uncore, vblank_reg, vblank);
 		/*
 		 * If more than 3/4 of the scanline detected a monitor,
 		 * then it is assumed to be present. This works even on i830,
@@ -745,7 +753,7 @@ intel_crt_load_detect(struct intel_crt *crt, u32 pipe)
 	}
 
 	/* Restore previous settings */
-	I915_WRITE(bclrpat_reg, save_bclrpat);
+	intel_uncore_write(uncore, bclrpat_reg, save_bclrpat);
 
 	return status;
 }
diff --git a/drivers/gpu/drm/i915/intel_crt.h b/drivers/gpu/drm/i915/display/intel_crt.h
index 1b3fba359efc..1b3fba359efc 100644
--- a/drivers/gpu/drm/i915/intel_crt.h
+++ b/drivers/gpu/drm/i915/display/intel_crt.h
diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c
index f181c26f62fd..7925a176f900 100644
--- a/drivers/gpu/drm/i915/intel_ddi.c
+++ b/drivers/gpu/drm/i915/display/intel_ddi.c
@@ -29,16 +29,23 @@
 
 #include "i915_drv.h"
 #include "intel_audio.h"
+#include "intel_combo_phy.h"
 #include "intel_connector.h"
 #include "intel_ddi.h"
 #include "intel_dp.h"
+#include "intel_dp_link_training.h"
+#include "intel_dpio_phy.h"
 #include "intel_drv.h"
 #include "intel_dsi.h"
+#include "intel_fifo_underrun.h"
+#include "intel_gmbus.h"
 #include "intel_hdcp.h"
 #include "intel_hdmi.h"
+#include "intel_hotplug.h"
 #include "intel_lspcon.h"
 #include "intel_panel.h"
 #include "intel_psr.h"
+#include "intel_vdsc.h"
 
 struct ddi_buf_trans {
 	u32 trans1;	/* balance leg enable, de-emph level */
@@ -608,7 +615,7 @@ skl_get_buf_trans_dp(struct drm_i915_private *dev_priv, int *n_entries)
 static const struct ddi_buf_trans *
 kbl_get_buf_trans_dp(struct drm_i915_private *dev_priv, int *n_entries)
 {
-	if (IS_KBL_ULX(dev_priv) || IS_AML_ULX(dev_priv)) {
+	if (IS_KBL_ULX(dev_priv) || IS_CFL_ULX(dev_priv)) {
 		*n_entries = ARRAY_SIZE(kbl_y_ddi_translations_dp);
 		return kbl_y_ddi_translations_dp;
 	} else if (IS_KBL_ULT(dev_priv) || IS_CFL_ULT(dev_priv)) {
@@ -624,7 +631,8 @@ static const struct ddi_buf_trans *
 skl_get_buf_trans_edp(struct drm_i915_private *dev_priv, int *n_entries)
 {
 	if (dev_priv->vbt.edp.low_vswing) {
-		if (IS_SKL_ULX(dev_priv) || IS_KBL_ULX(dev_priv) || IS_AML_ULX(dev_priv)) {
+		if (IS_SKL_ULX(dev_priv) || IS_KBL_ULX(dev_priv) ||
+		    IS_CFL_ULX(dev_priv)) {
 			*n_entries = ARRAY_SIZE(skl_y_ddi_translations_edp);
 			return skl_y_ddi_translations_edp;
 		} else if (IS_SKL_ULT(dev_priv) || IS_KBL_ULT(dev_priv) ||
@@ -646,7 +654,8 @@ skl_get_buf_trans_edp(struct drm_i915_private *dev_priv, int *n_entries)
 static const struct ddi_buf_trans *
 skl_get_buf_trans_hdmi(struct drm_i915_private *dev_priv, int *n_entries)
 {
-	if (IS_SKL_ULX(dev_priv) || IS_KBL_ULX(dev_priv) || IS_AML_ULX(dev_priv)) {
+	if (IS_SKL_ULX(dev_priv) || IS_KBL_ULX(dev_priv) ||
+	    IS_CFL_ULX(dev_priv)) {
 		*n_entries = ARRAY_SIZE(skl_y_ddi_translations_hdmi);
 		return skl_y_ddi_translations_hdmi;
 	} else {
@@ -1214,19 +1223,30 @@ intel_ddi_get_crtc_encoder(struct intel_crtc *crtc)
 	return ret;
 }
 
-#define LC_FREQ 2700
-
 static int hsw_ddi_calc_wrpll_link(struct drm_i915_private *dev_priv,
 				   i915_reg_t reg)
 {
-	int refclk = LC_FREQ;
+	int refclk;
 	int n, p, r;
 	u32 wrpll;
 
 	wrpll = I915_READ(reg);
-	switch (wrpll & WRPLL_PLL_REF_MASK) {
-	case WRPLL_PLL_SSC:
-	case WRPLL_PLL_NON_SSC:
+	switch (wrpll & WRPLL_REF_MASK) {
+	case WRPLL_REF_SPECIAL_HSW:
+		/*
+		 * muxed-SSC for BDW.
+		 * non-SSC for non-ULT HSW. Check FUSE_STRAP3
+		 * for the non-SSC reference frequency.
+		 */
+		if (IS_HASWELL(dev_priv) && !IS_HSW_ULT(dev_priv)) {
+			if (I915_READ(FUSE_STRAP3) & HSW_REF_CLK_SELECT)
+				refclk = 24;
+			else
+				refclk = 135;
+			break;
+		}
+		/* fall through */
+	case WRPLL_REF_PCH_SSC:
 		/*
 		 * We could calculate spread here, but our checking
 		 * code only cares about 5% accuracy, and spread is a max of
@@ -1234,11 +1254,11 @@ static int hsw_ddi_calc_wrpll_link(struct drm_i915_private *dev_priv,
 		 */
 		refclk = 135;
 		break;
-	case WRPLL_PLL_LCPLL:
-		refclk = LC_FREQ;
+	case WRPLL_REF_LCPLL:
+		refclk = 2700;
 		break;
 	default:
-		WARN(1, "bad wrpll refclk\n");
+		MISSING_CASE(wrpll);
 		return 0;
 	}
 
@@ -1450,7 +1470,8 @@ static void ddi_dotclock_get(struct intel_crtc_state *pipe_config)
 	else
 		dotclock = pipe_config->port_clock;
 
-	if (pipe_config->output_format == INTEL_OUTPUT_FORMAT_YCBCR420)
+	if (pipe_config->output_format == INTEL_OUTPUT_FORMAT_YCBCR420 &&
+	    !intel_crtc_has_dp_encoder(pipe_config))
 		dotclock *= 2;
 
 	if (pipe_config->pixel_multiplier)
@@ -1605,12 +1626,12 @@ static void hsw_ddi_clock_get(struct intel_encoder *encoder,
 		link_clock = hsw_ddi_calc_wrpll_link(dev_priv, WRPLL_CTL(1));
 		break;
 	case PORT_CLK_SEL_SPLL:
-		pll = I915_READ(SPLL_CTL) & SPLL_PLL_FREQ_MASK;
-		if (pll == SPLL_PLL_FREQ_810MHz)
+		pll = I915_READ(SPLL_CTL) & SPLL_FREQ_MASK;
+		if (pll == SPLL_FREQ_810MHz)
 			link_clock = 81000;
-		else if (pll == SPLL_PLL_FREQ_1350MHz)
+		else if (pll == SPLL_FREQ_1350MHz)
 			link_clock = 135000;
-		else if (pll == SPLL_PLL_FREQ_2700MHz)
+		else if (pll == SPLL_FREQ_2700MHz)
 			link_clock = 270000;
 		else {
 			WARN(1, "bad spll freq\n");
@@ -1710,6 +1731,14 @@ void intel_ddi_set_pipe_settings(const struct intel_crtc_state *crtc_state)
 	 */
 	if (crtc_state->output_format == INTEL_OUTPUT_FORMAT_YCBCR444)
 		temp |= TRANS_MSA_SAMPLING_444 | TRANS_MSA_CLRSP_YCBCR;
+	/*
+	 * As per DP 1.4a spec section 2.2.4.3 [MSA Field for Indication
+	 * of Color Encoding Format and Content Color Gamut] while sending
+	 * YCBCR 420 signals we should program MSA MISC1 fields which
+	 * indicate VSC SDP for the Pixel Encoding/Colorimetry Format.
+	 */
+	if (crtc_state->output_format == INTEL_OUTPUT_FORMAT_YCBCR420)
+		temp |= TRANS_MSA_USE_VSC_SDP;
 	I915_WRITE(TRANS_MSA_MISC(cpu_transcoder), temp);
 }
 
@@ -1772,9 +1801,7 @@ void intel_ddi_enable_transcoder_func(const struct intel_crtc_state *crtc_state)
 			 * eDP when not using the panel fitter, and when not
 			 * using motion blur mitigation (which we don't
 			 * support). */
-			if (IS_HASWELL(dev_priv) &&
-			    (crtc_state->pch_pfit.enabled ||
-			     crtc_state->pch_pfit.force_thru))
+			if (crtc_state->pch_pfit.force_thru)
 				temp |= TRANS_DDI_EDP_INPUT_A_ONOFF;
 			else
 				temp |= TRANS_DDI_EDP_INPUT_A_ON;
@@ -3111,6 +3138,15 @@ static void intel_ddi_pre_enable_dp(struct intel_encoder *encoder,
 	else
 		intel_prepare_dp_ddi_buffers(encoder, crtc_state);
 
+	if (intel_port_is_combophy(dev_priv, port)) {
+		bool lane_reversal =
+			dig_port->saved_port_bits & DDI_BUF_PORT_REVERSAL;
+
+		intel_combo_phy_power_up_lanes(dev_priv, port, false,
+					       crtc_state->lane_count,
+					       lane_reversal);
+	}
+
 	intel_ddi_init_dp_buf_reg(encoder);
 	if (!is_mst)
 		intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_ON);
@@ -3375,6 +3411,7 @@ static void intel_enable_ddi_dp(struct intel_encoder *encoder,
 
 	intel_edp_backlight_on(crtc_state, conn_state);
 	intel_psr_enable(intel_dp, crtc_state);
+	intel_dp_ycbcr_420_enable(intel_dp, crtc_state);
 	intel_edp_drrs_enable(intel_dp, crtc_state);
 
 	if (crtc_state->has_audio)
@@ -3626,7 +3663,7 @@ intel_ddi_post_pll_disable(struct intel_encoder *encoder,
 						  intel_ddi_main_link_aux_domain(dig_port));
 }
 
-void intel_ddi_prepare_link_retrain(struct intel_dp *intel_dp)
+static void intel_ddi_prepare_link_retrain(struct intel_dp *intel_dp)
 {
 	struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp);
 	struct drm_i915_private *dev_priv =
@@ -3820,6 +3857,9 @@ void intel_ddi_get_config(struct intel_encoder *encoder,
 	intel_read_infoframe(encoder, pipe_config,
 			     HDMI_INFOFRAME_TYPE_VENDOR,
 			     &pipe_config->infoframes.hdmi);
+	intel_read_infoframe(encoder, pipe_config,
+			     HDMI_INFOFRAME_TYPE_DRM,
+			     &pipe_config->infoframes.drm);
 }
 
 static enum intel_output_type
@@ -3844,6 +3884,7 @@ static int intel_ddi_compute_config(struct intel_encoder *encoder,
 				    struct intel_crtc_state *pipe_config,
 				    struct drm_connector_state *conn_state)
 {
+	struct intel_crtc *crtc = to_intel_crtc(pipe_config->base.crtc);
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
 	enum port port = encoder->port;
 	int ret;
@@ -3858,6 +3899,12 @@ static int intel_ddi_compute_config(struct intel_encoder *encoder,
 	if (ret)
 		return ret;
 
+	if (IS_HASWELL(dev_priv) && crtc->pipe == PIPE_A &&
+	    pipe_config->cpu_transcoder == TRANSCODER_EDP)
+		pipe_config->pch_pfit.force_thru =
+			pipe_config->pch_pfit.enabled ||
+			pipe_config->crc_enabled;
+
 	if (IS_GEN9_LP(dev_priv))
 		pipe_config->lane_lat_optim_mask =
 			bxt_ddi_phy_calc_lane_lat_optim_mask(pipe_config->lane_count);
@@ -3865,7 +3912,6 @@ static int intel_ddi_compute_config(struct intel_encoder *encoder,
 	intel_ddi_compute_min_voltage_level(dev_priv, pipe_config);
 
 	return 0;
-
 }
 
 static void intel_ddi_encoder_suspend(struct intel_encoder *encoder)
@@ -3925,6 +3971,9 @@ intel_ddi_init_dp_connector(struct intel_digital_port *intel_dig_port)
 		return NULL;
 
 	intel_dig_port->dp.output_reg = DDI_BUF_CTL(port);
+	intel_dig_port->dp.prepare_link_retrain =
+		intel_ddi_prepare_link_retrain;
+
 	if (!intel_dp_init_connector(intel_dig_port, connector)) {
 		kfree(connector);
 		return NULL;
diff --git a/drivers/gpu/drm/i915/intel_ddi.h b/drivers/gpu/drm/i915/display/intel_ddi.h
index 9cf69175942e..a08365da2643 100644
--- a/drivers/gpu/drm/i915/intel_ddi.h
+++ b/drivers/gpu/drm/i915/display/intel_ddi.h
@@ -31,7 +31,6 @@ void intel_ddi_disable_transcoder_func(const struct intel_crtc_state *crtc_state
 void intel_ddi_enable_pipe_clock(const struct intel_crtc_state *crtc_state);
 void intel_ddi_disable_pipe_clock(const  struct intel_crtc_state *crtc_state);
 void intel_ddi_set_pipe_settings(const struct intel_crtc_state *crtc_state);
-void intel_ddi_prepare_link_retrain(struct intel_dp *intel_dp);
 bool intel_ddi_connector_get_hw_state(struct intel_connector *intel_connector);
 void intel_ddi_get_config(struct intel_encoder *encoder,
 			  struct intel_crtc_state *pipe_config);
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c
index 5098228f1302..8592a7d422de 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/display/intel_display.c
@@ -44,31 +44,40 @@
 #include <drm/drm_rect.h>
 #include <drm/i915_drm.h>
 
+#include "display/intel_crt.h"
+#include "display/intel_ddi.h"
+#include "display/intel_dp.h"
+#include "display/intel_dsi.h"
+#include "display/intel_dvo.h"
+#include "display/intel_gmbus.h"
+#include "display/intel_hdmi.h"
+#include "display/intel_lvds.h"
+#include "display/intel_sdvo.h"
+#include "display/intel_tv.h"
+#include "display/intel_vdsc.h"
+
 #include "i915_drv.h"
-#include "i915_gem_clflush.h"
-#include "i915_reset.h"
 #include "i915_trace.h"
+#include "intel_acpi.h"
+#include "intel_atomic.h"
 #include "intel_atomic_plane.h"
+#include "intel_bw.h"
 #include "intel_color.h"
 #include "intel_cdclk.h"
-#include "intel_crt.h"
-#include "intel_ddi.h"
-#include "intel_dp.h"
 #include "intel_drv.h"
-#include "intel_dsi.h"
-#include "intel_dvo.h"
 #include "intel_fbc.h"
 #include "intel_fbdev.h"
+#include "intel_fifo_underrun.h"
 #include "intel_frontbuffer.h"
 #include "intel_hdcp.h"
-#include "intel_hdmi.h"
-#include "intel_lvds.h"
+#include "intel_hotplug.h"
+#include "intel_overlay.h"
 #include "intel_pipe_crc.h"
 #include "intel_pm.h"
 #include "intel_psr.h"
-#include "intel_sdvo.h"
+#include "intel_quirks.h"
+#include "intel_sideband.h"
 #include "intel_sprite.h"
-#include "intel_tv.h"
 
 /* Primary plane formats for gen <= 3 */
 static const u32 i8xx_primary_formats[] = {
@@ -120,7 +129,7 @@ static void intel_cpu_transcoder_set_m_n(const struct intel_crtc_state *crtc_sta
 static void i9xx_set_pipeconf(const struct intel_crtc_state *crtc_state);
 static void ironlake_set_pipeconf(const struct intel_crtc_state *crtc_state);
 static void haswell_set_pipeconf(const struct intel_crtc_state *crtc_state);
-static void haswell_set_pipemisc(const struct intel_crtc_state *crtc_state);
+static void bdw_set_pipemisc(const struct intel_crtc_state *crtc_state);
 static void vlv_prepare_pll(struct intel_crtc *crtc,
 			    const struct intel_crtc_state *pipe_config);
 static void chv_prepare_pll(struct intel_crtc *crtc,
@@ -153,10 +162,8 @@ int vlv_get_hpll_vco(struct drm_i915_private *dev_priv)
 	int hpll_freq, vco_freq[] = { 800, 1600, 2000, 2400 };
 
 	/* Obtain SKU information */
-	mutex_lock(&dev_priv->sb_lock);
 	hpll_freq = vlv_cck_read(dev_priv, CCK_FUSE_REG) &
 		CCK_FUSE_HPLL_FREQ_MASK;
-	mutex_unlock(&dev_priv->sb_lock);
 
 	return vco_freq[hpll_freq] * 1000;
 }
@@ -167,10 +174,7 @@ int vlv_get_cck_clock(struct drm_i915_private *dev_priv,
 	u32 val;
 	int divider;
 
-	mutex_lock(&dev_priv->sb_lock);
 	val = vlv_cck_read(dev_priv, reg);
-	mutex_unlock(&dev_priv->sb_lock);
-
 	divider = val & CCK_FREQUENCY_VALUES;
 
 	WARN((val & CCK_FREQUENCY_STATUS) !=
@@ -183,11 +187,18 @@ int vlv_get_cck_clock(struct drm_i915_private *dev_priv,
 int vlv_get_cck_clock_hpll(struct drm_i915_private *dev_priv,
 			   const char *name, u32 reg)
 {
+	int hpll;
+
+	vlv_cck_get(dev_priv);
+
 	if (dev_priv->hpll_freq == 0)
 		dev_priv->hpll_freq = vlv_get_hpll_vco(dev_priv);
 
-	return vlv_get_cck_clock(dev_priv, name, reg,
-				 dev_priv->hpll_freq);
+	hpll = vlv_get_cck_clock(dev_priv, name, reg, dev_priv->hpll_freq);
+
+	vlv_cck_put(dev_priv);
+
+	return hpll;
 }
 
 static void intel_update_czclk(struct drm_i915_private *dev_priv)
@@ -476,6 +487,7 @@ static const struct intel_limit intel_limits_bxt = {
 	.p2 = { .p2_slow = 1, .p2_fast = 20 },
 };
 
+/* WA Display #0827: Gen9:all */
 static void
 skl_wa_827(struct drm_i915_private *dev_priv, int pipe, bool enable)
 {
@@ -489,6 +501,19 @@ skl_wa_827(struct drm_i915_private *dev_priv, int pipe, bool enable)
 			   ~(DUPS1_GATING_DIS | DUPS2_GATING_DIS));
 }
 
+/* Wa_2006604312:icl */
+static void
+icl_wa_scalerclkgating(struct drm_i915_private *dev_priv, enum pipe pipe,
+		       bool enable)
+{
+	if (enable)
+		I915_WRITE(CLKGATE_DIS_PSL(pipe),
+			   I915_READ(CLKGATE_DIS_PSL(pipe)) | DPFR_GATING_DIS);
+	else
+		I915_WRITE(CLKGATE_DIS_PSL(pipe),
+			   I915_READ(CLKGATE_DIS_PSL(pipe)) & ~DPFR_GATING_DIS);
+}
+
 static bool
 needs_modeset(const struct drm_crtc_state *state)
 {
@@ -551,7 +576,7 @@ int chv_calc_dpll_params(int refclk, struct dpll *clock)
 	clock->p = clock->p1 * clock->p2;
 	if (WARN_ON(clock->n == 0 || clock->p == 0))
 		return 0;
-	clock->vco = DIV_ROUND_CLOSEST_ULL((u64)refclk * clock->m,
+	clock->vco = DIV_ROUND_CLOSEST_ULL(mul_u32_u32(refclk, clock->m),
 					   clock->n << 22);
 	clock->dot = DIV_ROUND_CLOSEST(clock->vco, clock->p);
 
@@ -936,8 +961,8 @@ chv_find_best_dpll(const struct intel_limit *limit,
 
 			clock.p = clock.p1 * clock.p2;
 
-			m2 = DIV_ROUND_CLOSEST_ULL(((u64)target * clock.p *
-					clock.n) << 22, refclk * clock.m1);
+			m2 = DIV_ROUND_CLOSEST_ULL(mul_u32_u32(target, clock.p * clock.n) << 22,
+						   refclk * clock.m1);
 
 			if (m2 > INT_MAX/clock.m1)
 				continue;
@@ -1080,9 +1105,9 @@ void assert_dsi_pll(struct drm_i915_private *dev_priv, bool state)
 	u32 val;
 	bool cur_state;
 
-	mutex_lock(&dev_priv->sb_lock);
+	vlv_cck_get(dev_priv);
 	val = vlv_cck_read(dev_priv, CCK_REG_DSI_PLL_CONTROL);
-	mutex_unlock(&dev_priv->sb_lock);
+	vlv_cck_put(dev_priv);
 
 	cur_state = val & DSI_PLL_VCO_EN;
 	I915_STATE_WARN(cur_state != state,
@@ -1392,14 +1417,14 @@ static void _chv_enable_pll(struct intel_crtc *crtc,
 	enum dpio_channel port = vlv_pipe_to_channel(pipe);
 	u32 tmp;
 
-	mutex_lock(&dev_priv->sb_lock);
+	vlv_dpio_get(dev_priv);
 
 	/* Enable back the 10bit clock to display controller */
 	tmp = vlv_dpio_read(dev_priv, pipe, CHV_CMN_DW14(port));
 	tmp |= DPIO_DCLKP_EN;
 	vlv_dpio_write(dev_priv, pipe, CHV_CMN_DW14(port), tmp);
 
-	mutex_unlock(&dev_priv->sb_lock);
+	vlv_dpio_put(dev_priv);
 
 	/*
 	 * Need to wait > 100ns between dclkp clock enable bit and PLL enable.
@@ -1556,14 +1581,14 @@ static void chv_disable_pll(struct drm_i915_private *dev_priv, enum pipe pipe)
 	I915_WRITE(DPLL(pipe), val);
 	POSTING_READ(DPLL(pipe));
 
-	mutex_lock(&dev_priv->sb_lock);
+	vlv_dpio_get(dev_priv);
 
 	/* Disable 10bit clock to display controller */
 	val = vlv_dpio_read(dev_priv, pipe, CHV_CMN_DW14(port));
 	val &= ~DPIO_DCLKP_EN;
 	vlv_dpio_write(dev_priv, pipe, CHV_CMN_DW14(port), val);
 
-	mutex_unlock(&dev_priv->sb_lock);
+	vlv_dpio_put(dev_priv);
 }
 
 void vlv_wait_port_ready(struct drm_i915_private *dev_priv,
@@ -1891,7 +1916,7 @@ intel_tile_width_bytes(const struct drm_framebuffer *fb, int color_plane)
 
 	switch (fb->modifier) {
 	case DRM_FORMAT_MOD_LINEAR:
-		return cpp;
+		return intel_tile_size(dev_priv);
 	case I915_FORMAT_MOD_X_TILED:
 		if (IS_GEN(dev_priv, 2))
 			return 128;
@@ -1934,11 +1959,8 @@ intel_tile_width_bytes(const struct drm_framebuffer *fb, int color_plane)
 static unsigned int
 intel_tile_height(const struct drm_framebuffer *fb, int color_plane)
 {
-	if (fb->modifier == DRM_FORMAT_MOD_LINEAR)
-		return 1;
-	else
-		return intel_tile_size(to_i915(fb->dev)) /
-			intel_tile_width_bytes(fb, color_plane);
+	return intel_tile_size(to_i915(fb->dev)) /
+		intel_tile_width_bytes(fb, color_plane);
 }
 
 /* Return the tile dimensions in pixel units */
@@ -1973,6 +1995,17 @@ unsigned int intel_rotation_info_size(const struct intel_rotation_info *rot_info
 	return size;
 }
 
+unsigned int intel_remapped_info_size(const struct intel_remapped_info *rem_info)
+{
+	unsigned int size = 0;
+	int i;
+
+	for (i = 0 ; i < ARRAY_SIZE(rem_info->plane); i++)
+		size += rem_info->plane[i].width * rem_info->plane[i].height;
+
+	return size;
+}
+
 static void
 intel_fill_fb_ggtt_view(struct i915_ggtt_view *view,
 			const struct drm_framebuffer *fb,
@@ -2042,7 +2075,9 @@ static bool intel_plane_uses_fence(const struct intel_plane_state *plane_state)
 	struct intel_plane *plane = to_intel_plane(plane_state->base.plane);
 	struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
 
-	return INTEL_GEN(dev_priv) < 4 || plane->has_fbc;
+	return INTEL_GEN(dev_priv) < 4 ||
+		(plane->has_fbc &&
+		 plane_state->view.type == I915_GGTT_VIEW_NORMAL);
 }
 
 struct i915_vma *
@@ -2078,7 +2113,8 @@ intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb,
 	 * intel_runtime_pm_put(), so it is correct to wrap only the
 	 * pin/unpin/fence and not more.
 	 */
-	wakeref = intel_runtime_pm_get(dev_priv);
+	wakeref = intel_runtime_pm_get(&dev_priv->runtime_pm);
+	i915_gem_object_lock(obj);
 
 	atomic_inc(&dev_priv->gpu_error.pending_fb_pin);
 
@@ -2133,7 +2169,8 @@ intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb,
 err:
 	atomic_dec(&dev_priv->gpu_error.pending_fb_pin);
 
-	intel_runtime_pm_put(dev_priv, wakeref);
+	i915_gem_object_unlock(obj);
+	intel_runtime_pm_put(&dev_priv->runtime_pm, wakeref);
 	return vma;
 }
 
@@ -2141,9 +2178,12 @@ void intel_unpin_fb_vma(struct i915_vma *vma, unsigned long flags)
 {
 	lockdep_assert_held(&vma->vm->i915->drm.struct_mutex);
 
+	i915_gem_object_lock(vma->obj);
 	if (flags & PLANE_HAS_FENCE)
 		i915_vma_unpin_fence(vma);
 	i915_gem_object_unpin_from_display_plane(vma);
+	i915_gem_object_unlock(vma->obj);
+
 	i915_vma_put(vma);
 }
 
@@ -2183,16 +2223,8 @@ void intel_add_fb_offsets(int *x, int *y,
 			  int color_plane)
 
 {
-	const struct intel_framebuffer *intel_fb = to_intel_framebuffer(state->base.fb);
-	unsigned int rotation = state->base.rotation;
-
-	if (drm_rotation_90_or_270(rotation)) {
-		*x += intel_fb->rotated[color_plane].x;
-		*y += intel_fb->rotated[color_plane].y;
-	} else {
-		*x += intel_fb->normal[color_plane].x;
-		*y += intel_fb->normal[color_plane].y;
-	}
+	*x += state->color_plane[color_plane].x;
+	*y += state->color_plane[color_plane].y;
 }
 
 static u32 intel_adjust_tile_offset(int *x, int *y,
@@ -2432,10 +2464,14 @@ static unsigned int intel_fb_modifier_to_tiling(u64 fb_modifier)
  * main surface.
  */
 static const struct drm_format_info ccs_formats[] = {
-	{ .format = DRM_FORMAT_XRGB8888, .depth = 24, .num_planes = 2, .cpp = { 4, 1, }, .hsub = 8, .vsub = 16, },
-	{ .format = DRM_FORMAT_XBGR8888, .depth = 24, .num_planes = 2, .cpp = { 4, 1, }, .hsub = 8, .vsub = 16, },
-	{ .format = DRM_FORMAT_ARGB8888, .depth = 32, .num_planes = 2, .cpp = { 4, 1, }, .hsub = 8, .vsub = 16, },
-	{ .format = DRM_FORMAT_ABGR8888, .depth = 32, .num_planes = 2, .cpp = { 4, 1, }, .hsub = 8, .vsub = 16, },
+	{ .format = DRM_FORMAT_XRGB8888, .depth = 24, .num_planes = 2,
+	  .cpp = { 4, 1, }, .hsub = 8, .vsub = 16, },
+	{ .format = DRM_FORMAT_XBGR8888, .depth = 24, .num_planes = 2,
+	  .cpp = { 4, 1, }, .hsub = 8, .vsub = 16, },
+	{ .format = DRM_FORMAT_ARGB8888, .depth = 32, .num_planes = 2,
+	  .cpp = { 4, 1, }, .hsub = 8, .vsub = 16, .has_alpha = true, },
+	{ .format = DRM_FORMAT_ABGR8888, .depth = 32, .num_planes = 2,
+	  .cpp = { 4, 1, }, .hsub = 8, .vsub = 16, .has_alpha = true, },
 };
 
 static const struct drm_format_info *
@@ -2472,6 +2508,134 @@ bool is_ccs_modifier(u64 modifier)
 	       modifier == I915_FORMAT_MOD_Yf_TILED_CCS;
 }
 
+u32 intel_plane_fb_max_stride(struct drm_i915_private *dev_priv,
+			      u32 pixel_format, u64 modifier)
+{
+	struct intel_crtc *crtc;
+	struct intel_plane *plane;
+
+	/*
+	 * We assume the primary plane for pipe A has
+	 * the highest stride limits of them all.
+	 */
+	crtc = intel_get_crtc_for_pipe(dev_priv, PIPE_A);
+	plane = to_intel_plane(crtc->base.primary);
+
+	return plane->max_stride(plane, pixel_format, modifier,
+				 DRM_MODE_ROTATE_0);
+}
+
+static
+u32 intel_fb_max_stride(struct drm_i915_private *dev_priv,
+			u32 pixel_format, u64 modifier)
+{
+	/*
+	 * Arbitrary limit for gen4+ chosen to match the
+	 * render engine max stride.
+	 *
+	 * The new CCS hash mode makes remapping impossible
+	 */
+	if (!is_ccs_modifier(modifier)) {
+		if (INTEL_GEN(dev_priv) >= 7)
+			return 256*1024;
+		else if (INTEL_GEN(dev_priv) >= 4)
+			return 128*1024;
+	}
+
+	return intel_plane_fb_max_stride(dev_priv, pixel_format, modifier);
+}
+
+static u32
+intel_fb_stride_alignment(const struct drm_framebuffer *fb, int color_plane)
+{
+	struct drm_i915_private *dev_priv = to_i915(fb->dev);
+
+	if (fb->modifier == DRM_FORMAT_MOD_LINEAR) {
+		u32 max_stride = intel_plane_fb_max_stride(dev_priv,
+							   fb->format->format,
+							   fb->modifier);
+
+		/*
+		 * To make remapping with linear generally feasible
+		 * we need the stride to be page aligned.
+		 */
+		if (fb->pitches[color_plane] > max_stride)
+			return intel_tile_size(dev_priv);
+		else
+			return 64;
+	} else {
+		return intel_tile_width_bytes(fb, color_plane);
+	}
+}
+
+bool intel_plane_can_remap(const struct intel_plane_state *plane_state)
+{
+	struct intel_plane *plane = to_intel_plane(plane_state->base.plane);
+	struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
+	const struct drm_framebuffer *fb = plane_state->base.fb;
+	int i;
+
+	/* We don't want to deal with remapping with cursors */
+	if (plane->id == PLANE_CURSOR)
+		return false;
+
+	/*
+	 * The display engine limits already match/exceed the
+	 * render engine limits, so not much point in remapping.
+	 * Would also need to deal with the fence POT alignment
+	 * and gen2 2KiB GTT tile size.
+	 */
+	if (INTEL_GEN(dev_priv) < 4)
+		return false;
+
+	/*
+	 * The new CCS hash mode isn't compatible with remapping as
+	 * the virtual address of the pages affects the compressed data.
+	 */
+	if (is_ccs_modifier(fb->modifier))
+		return false;
+
+	/* Linear needs a page aligned stride for remapping */
+	if (fb->modifier == DRM_FORMAT_MOD_LINEAR) {
+		unsigned int alignment = intel_tile_size(dev_priv) - 1;
+
+		for (i = 0; i < fb->format->num_planes; i++) {
+			if (fb->pitches[i] & alignment)
+				return false;
+		}
+	}
+
+	return true;
+}
+
+static bool intel_plane_needs_remap(const struct intel_plane_state *plane_state)
+{
+	struct intel_plane *plane = to_intel_plane(plane_state->base.plane);
+	const struct drm_framebuffer *fb = plane_state->base.fb;
+	unsigned int rotation = plane_state->base.rotation;
+	u32 stride, max_stride;
+
+	/*
+	 * No remapping for invisible planes since we don't have
+	 * an actual source viewport to remap.
+	 */
+	if (!plane_state->base.visible)
+		return false;
+
+	if (!intel_plane_can_remap(plane_state))
+		return false;
+
+	/*
+	 * FIXME: aux plane limits on gen9+ are
+	 * unclear in Bspec, for now no checking.
+	 */
+	stride = intel_fb_pitch(fb, 0, rotation);
+	max_stride = plane->max_stride(plane, fb->format->format,
+				       fb->modifier, rotation);
+
+	return stride > max_stride;
+}
+
 static int
 intel_fill_fb_info(struct drm_i915_private *dev_priv,
 		   struct drm_framebuffer *fb)
@@ -2637,6 +2801,168 @@ intel_fill_fb_info(struct drm_i915_private *dev_priv,
 	return 0;
 }
 
+static void
+intel_plane_remap_gtt(struct intel_plane_state *plane_state)
+{
+	struct drm_i915_private *dev_priv =
+		to_i915(plane_state->base.plane->dev);
+	struct drm_framebuffer *fb = plane_state->base.fb;
+	struct intel_framebuffer *intel_fb = to_intel_framebuffer(fb);
+	struct intel_rotation_info *info = &plane_state->view.rotated;
+	unsigned int rotation = plane_state->base.rotation;
+	int i, num_planes = fb->format->num_planes;
+	unsigned int tile_size = intel_tile_size(dev_priv);
+	unsigned int src_x, src_y;
+	unsigned int src_w, src_h;
+	u32 gtt_offset = 0;
+
+	memset(&plane_state->view, 0, sizeof(plane_state->view));
+	plane_state->view.type = drm_rotation_90_or_270(rotation) ?
+		I915_GGTT_VIEW_ROTATED : I915_GGTT_VIEW_REMAPPED;
+
+	src_x = plane_state->base.src.x1 >> 16;
+	src_y = plane_state->base.src.y1 >> 16;
+	src_w = drm_rect_width(&plane_state->base.src) >> 16;
+	src_h = drm_rect_height(&plane_state->base.src) >> 16;
+
+	WARN_ON(is_ccs_modifier(fb->modifier));
+
+	/* Make src coordinates relative to the viewport */
+	drm_rect_translate(&plane_state->base.src,
+			   -(src_x << 16), -(src_y << 16));
+
+	/* Rotate src coordinates to match rotated GTT view */
+	if (drm_rotation_90_or_270(rotation))
+		drm_rect_rotate(&plane_state->base.src,
+				src_w << 16, src_h << 16,
+				DRM_MODE_ROTATE_270);
+
+	for (i = 0; i < num_planes; i++) {
+		unsigned int hsub = i ? fb->format->hsub : 1;
+		unsigned int vsub = i ? fb->format->vsub : 1;
+		unsigned int cpp = fb->format->cpp[i];
+		unsigned int tile_width, tile_height;
+		unsigned int width, height;
+		unsigned int pitch_tiles;
+		unsigned int x, y;
+		u32 offset;
+
+		intel_tile_dims(fb, i, &tile_width, &tile_height);
+
+		x = src_x / hsub;
+		y = src_y / vsub;
+		width = src_w / hsub;
+		height = src_h / vsub;
+
+		/*
+		 * First pixel of the src viewport from the
+		 * start of the normal gtt mapping.
+		 */
+		x += intel_fb->normal[i].x;
+		y += intel_fb->normal[i].y;
+
+		offset = intel_compute_aligned_offset(dev_priv, &x, &y,
+						      fb, i, fb->pitches[i],
+						      DRM_MODE_ROTATE_0, tile_size);
+		offset /= tile_size;
+
+		info->plane[i].offset = offset;
+		info->plane[i].stride = DIV_ROUND_UP(fb->pitches[i],
+						     tile_width * cpp);
+		info->plane[i].width = DIV_ROUND_UP(x + width, tile_width);
+		info->plane[i].height = DIV_ROUND_UP(y + height, tile_height);
+
+		if (drm_rotation_90_or_270(rotation)) {
+			struct drm_rect r;
+
+			/* rotate the x/y offsets to match the GTT view */
+			r.x1 = x;
+			r.y1 = y;
+			r.x2 = x + width;
+			r.y2 = y + height;
+			drm_rect_rotate(&r,
+					info->plane[i].width * tile_width,
+					info->plane[i].height * tile_height,
+					DRM_MODE_ROTATE_270);
+			x = r.x1;
+			y = r.y1;
+
+			pitch_tiles = info->plane[i].height;
+			plane_state->color_plane[i].stride = pitch_tiles * tile_height;
+
+			/* rotate the tile dimensions to match the GTT view */
+			swap(tile_width, tile_height);
+		} else {
+			pitch_tiles = info->plane[i].width;
+			plane_state->color_plane[i].stride = pitch_tiles * tile_width * cpp;
+		}
+
+		/*
+		 * We only keep the x/y offsets, so push all of the
+		 * gtt offset into the x/y offsets.
+		 */
+		intel_adjust_tile_offset(&x, &y,
+					 tile_width, tile_height,
+					 tile_size, pitch_tiles,
+					 gtt_offset * tile_size, 0);
+
+		gtt_offset += info->plane[i].width * info->plane[i].height;
+
+		plane_state->color_plane[i].offset = 0;
+		plane_state->color_plane[i].x = x;
+		plane_state->color_plane[i].y = y;
+	}
+}
+
+static int
+intel_plane_compute_gtt(struct intel_plane_state *plane_state)
+{
+	const struct intel_framebuffer *fb =
+		to_intel_framebuffer(plane_state->base.fb);
+	unsigned int rotation = plane_state->base.rotation;
+	int i, num_planes;
+
+	if (!fb)
+		return 0;
+
+	num_planes = fb->base.format->num_planes;
+
+	if (intel_plane_needs_remap(plane_state)) {
+		intel_plane_remap_gtt(plane_state);
+
+		/*
+		 * Sometimes even remapping can't overcome
+		 * the stride limitations :( Can happen with
+		 * big plane sizes and suitably misaligned
+		 * offsets.
+		 */
+		return intel_plane_check_stride(plane_state);
+	}
+
+	intel_fill_fb_ggtt_view(&plane_state->view, &fb->base, rotation);
+
+	for (i = 0; i < num_planes; i++) {
+		plane_state->color_plane[i].stride = intel_fb_pitch(&fb->base, i, rotation);
+		plane_state->color_plane[i].offset = 0;
+
+		if (drm_rotation_90_or_270(rotation)) {
+			plane_state->color_plane[i].x = fb->rotated[i].x;
+			plane_state->color_plane[i].y = fb->rotated[i].y;
+		} else {
+			plane_state->color_plane[i].x = fb->normal[i].x;
+			plane_state->color_plane[i].y = fb->normal[i].y;
+		}
+	}
+
+	/* Rotate src coordinates to match rotated GTT view */
+	if (drm_rotation_90_or_270(rotation))
+		drm_rect_rotate(&plane_state->base.src,
+				fb->base.width << 16, fb->base.height << 16,
+				DRM_MODE_ROTATE_270);
+
+	return intel_plane_check_stride(plane_state);
+}
+
 static int i9xx_format_to_fourcc(int format)
 {
 	switch (format) {
@@ -2839,6 +3165,7 @@ static void intel_plane_disable_noatomic(struct intel_crtc *crtc,
 
 	intel_set_plane_visible(crtc_state, plane_state, false);
 	fixup_active_planes(crtc_state);
+	crtc_state->data_rate[plane->id] = 0;
 
 	if (plane->id == PLANE_PRIMARY)
 		intel_pre_disable_primary_noatomic(&crtc->base);
@@ -2964,41 +3291,55 @@ static int skl_max_plane_width(const struct drm_framebuffer *fb,
 	switch (fb->modifier) {
 	case DRM_FORMAT_MOD_LINEAR:
 	case I915_FORMAT_MOD_X_TILED:
-		switch (cpp) {
-		case 8:
-			return 4096;
-		case 4:
-		case 2:
-		case 1:
-			return 8192;
-		default:
-			MISSING_CASE(cpp);
-			break;
-		}
-		break;
+		return 4096;
 	case I915_FORMAT_MOD_Y_TILED_CCS:
 	case I915_FORMAT_MOD_Yf_TILED_CCS:
 		/* FIXME AUX plane? */
 	case I915_FORMAT_MOD_Y_TILED:
 	case I915_FORMAT_MOD_Yf_TILED:
-		switch (cpp) {
-		case 8:
+		if (cpp == 8)
 			return 2048;
-		case 4:
+		else
 			return 4096;
-		case 2:
-		case 1:
-			return 8192;
-		default:
-			MISSING_CASE(cpp);
-			break;
-		}
-		break;
 	default:
 		MISSING_CASE(fb->modifier);
+		return 2048;
 	}
+}
 
-	return 2048;
+static int glk_max_plane_width(const struct drm_framebuffer *fb,
+			       int color_plane,
+			       unsigned int rotation)
+{
+	int cpp = fb->format->cpp[color_plane];
+
+	switch (fb->modifier) {
+	case DRM_FORMAT_MOD_LINEAR:
+	case I915_FORMAT_MOD_X_TILED:
+		if (cpp == 8)
+			return 4096;
+		else
+			return 5120;
+	case I915_FORMAT_MOD_Y_TILED_CCS:
+	case I915_FORMAT_MOD_Yf_TILED_CCS:
+		/* FIXME AUX plane? */
+	case I915_FORMAT_MOD_Y_TILED:
+	case I915_FORMAT_MOD_Yf_TILED:
+		if (cpp == 8)
+			return 2048;
+		else
+			return 5120;
+	default:
+		MISSING_CASE(fb->modifier);
+		return 2048;
+	}
+}
+
+static int icl_max_plane_width(const struct drm_framebuffer *fb,
+			       int color_plane,
+			       unsigned int rotation)
+{
+	return 5120;
 }
 
 static bool skl_check_main_ccs_coordinates(struct intel_plane_state *plane_state,
@@ -3041,16 +3382,24 @@ static bool skl_check_main_ccs_coordinates(struct intel_plane_state *plane_state
 
 static int skl_check_main_surface(struct intel_plane_state *plane_state)
 {
+	struct drm_i915_private *dev_priv = to_i915(plane_state->base.plane->dev);
 	const struct drm_framebuffer *fb = plane_state->base.fb;
 	unsigned int rotation = plane_state->base.rotation;
 	int x = plane_state->base.src.x1 >> 16;
 	int y = plane_state->base.src.y1 >> 16;
 	int w = drm_rect_width(&plane_state->base.src) >> 16;
 	int h = drm_rect_height(&plane_state->base.src) >> 16;
-	int max_width = skl_max_plane_width(fb, 0, rotation);
+	int max_width;
 	int max_height = 4096;
 	u32 alignment, offset, aux_offset = plane_state->color_plane[1].offset;
 
+	if (INTEL_GEN(dev_priv) >= 11)
+		max_width = icl_max_plane_width(fb, 0, rotation);
+	else if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv))
+		max_width = glk_max_plane_width(fb, 0, rotation);
+	else
+		max_width = skl_max_plane_width(fb, 0, rotation);
+
 	if (w > max_width || h > max_height) {
 		DRM_DEBUG_KMS("requested Y/RGB source size %dx%d too big (limit %dx%d)\n",
 			      w, h, max_width, max_height);
@@ -3113,6 +3462,14 @@ static int skl_check_main_surface(struct intel_plane_state *plane_state)
 	plane_state->color_plane[0].x = x;
 	plane_state->color_plane[0].y = y;
 
+	/*
+	 * Put the final coordinates back so that the src
+	 * coordinate checks will see the right values.
+	 */
+	drm_rect_translate(&plane_state->base.src,
+			   (x << 16) - plane_state->base.src.x1,
+			   (y << 16) - plane_state->base.src.y1);
+
 	return 0;
 }
 
@@ -3169,26 +3526,15 @@ static int skl_check_ccs_aux_surface(struct intel_plane_state *plane_state)
 int skl_check_plane_surface(struct intel_plane_state *plane_state)
 {
 	const struct drm_framebuffer *fb = plane_state->base.fb;
-	unsigned int rotation = plane_state->base.rotation;
 	int ret;
 
-	intel_fill_fb_ggtt_view(&plane_state->view, fb, rotation);
-	plane_state->color_plane[0].stride = intel_fb_pitch(fb, 0, rotation);
-	plane_state->color_plane[1].stride = intel_fb_pitch(fb, 1, rotation);
-
-	ret = intel_plane_check_stride(plane_state);
+	ret = intel_plane_compute_gtt(plane_state);
 	if (ret)
 		return ret;
 
 	if (!plane_state->base.visible)
 		return 0;
 
-	/* Rotate src coordinates to match rotated GTT view */
-	if (drm_rotation_90_or_270(rotation))
-		drm_rect_rotate(&plane_state->base.src,
-				fb->width << 16, fb->height << 16,
-				DRM_MODE_ROTATE_270);
-
 	/*
 	 * Handle the AUX surface first since
 	 * the main surface setup depends on it.
@@ -3318,20 +3664,20 @@ int i9xx_check_plane_surface(struct intel_plane_state *plane_state)
 {
 	struct drm_i915_private *dev_priv =
 		to_i915(plane_state->base.plane->dev);
-	const struct drm_framebuffer *fb = plane_state->base.fb;
-	unsigned int rotation = plane_state->base.rotation;
-	int src_x = plane_state->base.src.x1 >> 16;
-	int src_y = plane_state->base.src.y1 >> 16;
+	int src_x, src_y;
 	u32 offset;
 	int ret;
 
-	intel_fill_fb_ggtt_view(&plane_state->view, fb, rotation);
-	plane_state->color_plane[0].stride = intel_fb_pitch(fb, 0, rotation);
-
-	ret = intel_plane_check_stride(plane_state);
+	ret = intel_plane_compute_gtt(plane_state);
 	if (ret)
 		return ret;
 
+	if (!plane_state->base.visible)
+		return 0;
+
+	src_x = plane_state->base.src.x1 >> 16;
+	src_y = plane_state->base.src.y1 >> 16;
+
 	intel_add_fb_offsets(&src_x, &src_y, plane_state, 0);
 
 	if (INTEL_GEN(dev_priv) >= 4)
@@ -3340,8 +3686,17 @@ int i9xx_check_plane_surface(struct intel_plane_state *plane_state)
 	else
 		offset = 0;
 
+	/*
+	 * Put the final coordinates back so that the src
+	 * coordinate checks will see the right values.
+	 */
+	drm_rect_translate(&plane_state->base.src,
+			   (src_x << 16) - plane_state->base.src.x1,
+			   (src_y << 16) - plane_state->base.src.y1);
+
 	/* HSW/BDW do this automagically in hardware */
 	if (!IS_HASWELL(dev_priv) && !IS_BROADWELL(dev_priv)) {
+		unsigned int rotation = plane_state->base.rotation;
 		int src_w = drm_rect_width(&plane_state->base.src) >> 16;
 		int src_h = drm_rect_height(&plane_state->base.src) >> 16;
 
@@ -3378,6 +3733,10 @@ i9xx_plane_check(struct intel_crtc_state *crtc_state,
 	if (ret)
 		return ret;
 
+	ret = i9xx_check_plane_surface(plane_state);
+	if (ret)
+		return ret;
+
 	if (!plane_state->base.visible)
 		return 0;
 
@@ -3385,10 +3744,6 @@ i9xx_plane_check(struct intel_crtc_state *crtc_state,
 	if (ret)
 		return ret;
 
-	ret = i9xx_check_plane_surface(plane_state);
-	if (ret)
-		return ret;
-
 	plane_state->ctl = i9xx_plane_ctl(crtc_state, plane_state);
 
 	return 0;
@@ -3527,15 +3882,6 @@ static bool i9xx_plane_get_hw_state(struct intel_plane *plane,
 	return ret;
 }
 
-static u32
-intel_fb_stride_alignment(const struct drm_framebuffer *fb, int color_plane)
-{
-	if (fb->modifier == DRM_FORMAT_MOD_LINEAR)
-		return 64;
-	else
-		return intel_tile_width_bytes(fb, color_plane);
-}
-
 static void skl_detach_scaler(struct intel_crtc *intel_crtc, int id)
 {
 	struct drm_device *dev = intel_crtc->base.dev;
@@ -5015,6 +5361,21 @@ u16 skl_scaler_calc_phase(int sub, int scale, bool chroma_cosited)
 	return ((phase >> 2) & PS_PHASE_MASK) | trip;
 }
 
+#define SKL_MIN_SRC_W 8
+#define SKL_MAX_SRC_W 4096
+#define SKL_MIN_SRC_H 8
+#define SKL_MAX_SRC_H 4096
+#define SKL_MIN_DST_W 8
+#define SKL_MAX_DST_W 4096
+#define SKL_MIN_DST_H 8
+#define SKL_MAX_DST_H 4096
+#define ICL_MAX_SRC_W 5120
+#define ICL_MAX_SRC_H 4096
+#define ICL_MAX_DST_W 5120
+#define ICL_MAX_DST_H 4096
+#define SKL_MIN_YUV_420_SRC_W 16
+#define SKL_MIN_YUV_420_SRC_H 16
+
 static int
 skl_update_scaler(struct intel_crtc_state *crtc_state, bool force_detach,
 		  unsigned int scaler_user, int *scaler_id,
@@ -5294,10 +5655,8 @@ void hsw_enable_ips(const struct intel_crtc_state *crtc_state)
 	WARN_ON(!(crtc_state->active_planes & ~BIT(PLANE_CURSOR)));
 
 	if (IS_BROADWELL(dev_priv)) {
-		mutex_lock(&dev_priv->pcu_lock);
 		WARN_ON(sandybridge_pcode_write(dev_priv, DISPLAY_IPS_CONTROL,
 						IPS_ENABLE | IPS_PCODE_CONTROL));
-		mutex_unlock(&dev_priv->pcu_lock);
 		/* Quoting Art Runyan: "its not safe to expect any particular
 		 * value in IPS_CTL bit 31 after enabling IPS through the
 		 * mailbox." Moreover, the mailbox may return a bogus state,
@@ -5327,9 +5686,7 @@ void hsw_disable_ips(const struct intel_crtc_state *crtc_state)
 		return;
 
 	if (IS_BROADWELL(dev_priv)) {
-		mutex_lock(&dev_priv->pcu_lock);
 		WARN_ON(sandybridge_pcode_write(dev_priv, DISPLAY_IPS_CONTROL, 0));
-		mutex_unlock(&dev_priv->pcu_lock);
 		/*
 		 * Wait for PCODE to finish disabling IPS. The BSpec specified
 		 * 42ms timeout value leads to occasional timeouts so use 100ms
@@ -5505,6 +5862,16 @@ static bool needs_nv12_wa(struct drm_i915_private *dev_priv,
 	return false;
 }
 
+static bool needs_scalerclk_wa(struct drm_i915_private *dev_priv,
+			       const struct intel_crtc_state *crtc_state)
+{
+	/* Wa_2006604312:icl */
+	if (crtc_state->scaler_state.scaler_users > 0 && IS_ICELAKE(dev_priv))
+		return true;
+
+	return false;
+}
+
 static void intel_post_plane_update(struct intel_crtc_state *old_crtc_state)
 {
 	struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->base.crtc);
@@ -5538,11 +5905,13 @@ static void intel_post_plane_update(struct intel_crtc_state *old_crtc_state)
 			intel_post_enable_primary(&crtc->base, pipe_config);
 	}
 
-	/* Display WA 827 */
 	if (needs_nv12_wa(dev_priv, old_crtc_state) &&
-	    !needs_nv12_wa(dev_priv, pipe_config)) {
+	    !needs_nv12_wa(dev_priv, pipe_config))
 		skl_wa_827(dev_priv, crtc->pipe, false);
-	}
+
+	if (needs_scalerclk_wa(dev_priv, old_crtc_state) &&
+	    !needs_scalerclk_wa(dev_priv, pipe_config))
+		icl_wa_scalerclkgating(dev_priv, crtc->pipe, false);
 }
 
 static void intel_pre_plane_update(struct intel_crtc_state *old_crtc_state,
@@ -5579,9 +5948,13 @@ static void intel_pre_plane_update(struct intel_crtc_state *old_crtc_state,
 
 	/* Display WA 827 */
 	if (!needs_nv12_wa(dev_priv, old_crtc_state) &&
-	    needs_nv12_wa(dev_priv, pipe_config)) {
+	    needs_nv12_wa(dev_priv, pipe_config))
 		skl_wa_827(dev_priv, crtc->pipe, true);
-	}
+
+	/* Wa_2006604312:icl */
+	if (!needs_scalerclk_wa(dev_priv, old_crtc_state) &&
+	    needs_scalerclk_wa(dev_priv, pipe_config))
+		icl_wa_scalerclkgating(dev_priv, crtc->pipe, true);
 
 	/*
 	 * Vblank time updates from the shadow to live plane control register
@@ -5987,7 +6360,8 @@ static void haswell_crtc_enable(struct intel_crtc_state *pipe_config,
 	if (!transcoder_is_dsi(cpu_transcoder))
 		haswell_set_pipeconf(pipe_config);
 
-	haswell_set_pipemisc(pipe_config);
+	if (INTEL_GEN(dev_priv) >= 9 || IS_BROADWELL(dev_priv))
+		bdw_set_pipemisc(pipe_config);
 
 	intel_crtc->active = true;
 
@@ -6289,7 +6663,7 @@ static u64 get_crtc_power_domains(struct drm_crtc *crtc,
 		mask |= BIT_ULL(POWER_DOMAIN_AUDIO);
 
 	if (crtc_state->shared_dpll)
-		mask |= BIT_ULL(POWER_DOMAIN_PLLS);
+		mask |= BIT_ULL(POWER_DOMAIN_DISPLAY_CORE);
 
 	return mask;
 }
@@ -6516,6 +6890,8 @@ static void intel_crtc_disable_noatomic(struct drm_crtc *crtc,
 	struct intel_encoder *encoder;
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 	struct drm_i915_private *dev_priv = to_i915(crtc->dev);
+	struct intel_bw_state *bw_state =
+		to_intel_bw_state(dev_priv->bw_obj.state);
 	enum intel_display_power_domain domain;
 	struct intel_plane *plane;
 	u64 domains;
@@ -6578,6 +6954,9 @@ static void intel_crtc_disable_noatomic(struct drm_crtc *crtc,
 	dev_priv->active_crtcs &= ~(1 << intel_crtc->pipe);
 	dev_priv->min_cdclk[intel_crtc->pipe] = 0;
 	dev_priv->min_voltage_level[intel_crtc->pipe] = 0;
+
+	bw_state->data_rate[intel_crtc->pipe] = 0;
+	bw_state->num_active_planes[intel_crtc->pipe] = 0;
 }
 
 /*
@@ -6872,7 +7251,7 @@ static u32 ilk_pipe_pixel_rate(const struct intel_crtc_state *pipe_config)
 		if (WARN_ON(!pfit_w || !pfit_h))
 			return pixel_rate;
 
-		pixel_rate = div_u64((u64)pixel_rate * pipe_w * pipe_h,
+		pixel_rate = div_u64(mul_u32_u32(pixel_rate, pipe_w * pipe_h),
 				     pfit_w * pfit_h);
 	}
 
@@ -6992,7 +7371,7 @@ static void compute_m_n(unsigned int m, unsigned int n,
 	else
 		*ret_n = min_t(unsigned int, roundup_pow_of_two(n), DATA_LINK_N_MAX);
 
-	*ret_m = div_u64((u64)m * *ret_n, n);
+	*ret_m = div_u64(mul_u32_u32(m, *ret_n), n);
 	intel_reduce_m_n_ratio(ret_m, ret_n);
 }
 
@@ -7225,7 +7604,7 @@ static void vlv_prepare_pll(struct intel_crtc *crtc,
 	if ((pipe_config->dpll_hw_state.dpll & DPLL_VCO_ENABLE) == 0)
 		return;
 
-	mutex_lock(&dev_priv->sb_lock);
+	vlv_dpio_get(dev_priv);
 
 	bestn = pipe_config->dpll.n;
 	bestm1 = pipe_config->dpll.m1;
@@ -7302,7 +7681,8 @@ static void vlv_prepare_pll(struct intel_crtc *crtc,
 	vlv_dpio_write(dev_priv, pipe, VLV_PLL_DW7(pipe), coreclk);
 
 	vlv_dpio_write(dev_priv, pipe, VLV_PLL_DW11(pipe), 0x87871000);
-	mutex_unlock(&dev_priv->sb_lock);
+
+	vlv_dpio_put(dev_priv);
 }
 
 static void chv_prepare_pll(struct intel_crtc *crtc,
@@ -7335,7 +7715,7 @@ static void chv_prepare_pll(struct intel_crtc *crtc,
 	dpio_val = 0;
 	loopfilter = 0;
 
-	mutex_lock(&dev_priv->sb_lock);
+	vlv_dpio_get(dev_priv);
 
 	/* p1 and p2 divider */
 	vlv_dpio_write(dev_priv, pipe, CHV_CMN_DW13(port),
@@ -7407,7 +7787,7 @@ static void chv_prepare_pll(struct intel_crtc *crtc,
 			vlv_dpio_read(dev_priv, pipe, CHV_CMN_DW14(port)) |
 			DPIO_AFC_RECAL);
 
-	mutex_unlock(&dev_priv->sb_lock);
+	vlv_dpio_put(dev_priv);
 }
 
 /**
@@ -7675,9 +8055,14 @@ static void intel_get_pipe_timings(struct intel_crtc *crtc,
 	tmp = I915_READ(HTOTAL(cpu_transcoder));
 	pipe_config->base.adjusted_mode.crtc_hdisplay = (tmp & 0xffff) + 1;
 	pipe_config->base.adjusted_mode.crtc_htotal = ((tmp >> 16) & 0xffff) + 1;
-	tmp = I915_READ(HBLANK(cpu_transcoder));
-	pipe_config->base.adjusted_mode.crtc_hblank_start = (tmp & 0xffff) + 1;
-	pipe_config->base.adjusted_mode.crtc_hblank_end = ((tmp >> 16) & 0xffff) + 1;
+
+	if (!transcoder_is_dsi(cpu_transcoder)) {
+		tmp = I915_READ(HBLANK(cpu_transcoder));
+		pipe_config->base.adjusted_mode.crtc_hblank_start =
+							(tmp & 0xffff) + 1;
+		pipe_config->base.adjusted_mode.crtc_hblank_end =
+						((tmp >> 16) & 0xffff) + 1;
+	}
 	tmp = I915_READ(HSYNC(cpu_transcoder));
 	pipe_config->base.adjusted_mode.crtc_hsync_start = (tmp & 0xffff) + 1;
 	pipe_config->base.adjusted_mode.crtc_hsync_end = ((tmp >> 16) & 0xffff) + 1;
@@ -7685,9 +8070,14 @@ static void intel_get_pipe_timings(struct intel_crtc *crtc,
 	tmp = I915_READ(VTOTAL(cpu_transcoder));
 	pipe_config->base.adjusted_mode.crtc_vdisplay = (tmp & 0xffff) + 1;
 	pipe_config->base.adjusted_mode.crtc_vtotal = ((tmp >> 16) & 0xffff) + 1;
-	tmp = I915_READ(VBLANK(cpu_transcoder));
-	pipe_config->base.adjusted_mode.crtc_vblank_start = (tmp & 0xffff) + 1;
-	pipe_config->base.adjusted_mode.crtc_vblank_end = ((tmp >> 16) & 0xffff) + 1;
+
+	if (!transcoder_is_dsi(cpu_transcoder)) {
+		tmp = I915_READ(VBLANK(cpu_transcoder));
+		pipe_config->base.adjusted_mode.crtc_vblank_start =
+							(tmp & 0xffff) + 1;
+		pipe_config->base.adjusted_mode.crtc_vblank_end =
+						((tmp >> 16) & 0xffff) + 1;
+	}
 	tmp = I915_READ(VSYNC(cpu_transcoder));
 	pipe_config->base.adjusted_mode.crtc_vsync_start = (tmp & 0xffff) + 1;
 	pipe_config->base.adjusted_mode.crtc_vsync_end = ((tmp >> 16) & 0xffff) + 1;
@@ -8033,9 +8423,9 @@ static void vlv_crtc_clock_get(struct intel_crtc *crtc,
 	if ((pipe_config->dpll_hw_state.dpll & DPLL_VCO_ENABLE) == 0)
 		return;
 
-	mutex_lock(&dev_priv->sb_lock);
+	vlv_dpio_get(dev_priv);
 	mdiv = vlv_dpio_read(dev_priv, pipe, VLV_PLL_DW3(pipe));
-	mutex_unlock(&dev_priv->sb_lock);
+	vlv_dpio_put(dev_priv);
 
 	clock.m1 = (mdiv >> DPIO_M1DIV_SHIFT) & 7;
 	clock.m2 = mdiv & DPIO_M2DIV_MASK;
@@ -8144,13 +8534,13 @@ static void chv_crtc_clock_get(struct intel_crtc *crtc,
 	if ((pipe_config->dpll_hw_state.dpll & DPLL_VCO_ENABLE) == 0)
 		return;
 
-	mutex_lock(&dev_priv->sb_lock);
+	vlv_dpio_get(dev_priv);
 	cmn_dw13 = vlv_dpio_read(dev_priv, pipe, CHV_CMN_DW13(port));
 	pll_dw0 = vlv_dpio_read(dev_priv, pipe, CHV_PLL_DW0(port));
 	pll_dw1 = vlv_dpio_read(dev_priv, pipe, CHV_PLL_DW1(port));
 	pll_dw2 = vlv_dpio_read(dev_priv, pipe, CHV_PLL_DW2(port));
 	pll_dw3 = vlv_dpio_read(dev_priv, pipe, CHV_PLL_DW3(port));
-	mutex_unlock(&dev_priv->sb_lock);
+	vlv_dpio_put(dev_priv);
 
 	clock.m1 = (pll_dw1 & 0x7) == DPIO_CHV_M1_DIV_BY_2 ? 2 : 0;
 	clock.m2 = (pll_dw0 & 0xff) << 22;
@@ -8276,6 +8666,7 @@ static bool i9xx_get_pipe_config(struct intel_crtc *crtc,
 		pipe_config->cgm_mode = I915_READ(CGM_PIPE_MODE(crtc->pipe));
 
 	i9xx_get_pipe_color_config(pipe_config);
+	intel_color_get_config(pipe_config);
 
 	if (INTEL_GEN(dev_priv) < 4)
 		pipe_config->double_wide = tmp & PIPECONF_DOUBLE_WIDE;
@@ -8650,7 +9041,7 @@ static void lpt_enable_clkout_dp(struct drm_i915_private *dev_priv,
 }
 
 /* Sequence to disable CLKOUT_DP */
-static void lpt_disable_clkout_dp(struct drm_i915_private *dev_priv)
+void lpt_disable_clkout_dp(struct drm_i915_private *dev_priv)
 {
 	u32 reg, tmp;
 
@@ -8736,22 +9127,95 @@ static void lpt_bend_clkout_dp(struct drm_i915_private *dev_priv, int steps)
 
 #undef BEND_IDX
 
+static bool spll_uses_pch_ssc(struct drm_i915_private *dev_priv)
+{
+	u32 fuse_strap = I915_READ(FUSE_STRAP);
+	u32 ctl = I915_READ(SPLL_CTL);
+
+	if ((ctl & SPLL_PLL_ENABLE) == 0)
+		return false;
+
+	if ((ctl & SPLL_REF_MASK) == SPLL_REF_MUXED_SSC &&
+	    (fuse_strap & HSW_CPU_SSC_ENABLE) == 0)
+		return true;
+
+	if (IS_BROADWELL(dev_priv) &&
+	    (ctl & SPLL_REF_MASK) == SPLL_REF_PCH_SSC_BDW)
+		return true;
+
+	return false;
+}
+
+static bool wrpll_uses_pch_ssc(struct drm_i915_private *dev_priv,
+			       enum intel_dpll_id id)
+{
+	u32 fuse_strap = I915_READ(FUSE_STRAP);
+	u32 ctl = I915_READ(WRPLL_CTL(id));
+
+	if ((ctl & WRPLL_PLL_ENABLE) == 0)
+		return false;
+
+	if ((ctl & WRPLL_REF_MASK) == WRPLL_REF_PCH_SSC)
+		return true;
+
+	if ((IS_BROADWELL(dev_priv) || IS_HSW_ULT(dev_priv)) &&
+	    (ctl & WRPLL_REF_MASK) == WRPLL_REF_MUXED_SSC_BDW &&
+	    (fuse_strap & HSW_CPU_SSC_ENABLE) == 0)
+		return true;
+
+	return false;
+}
+
 static void lpt_init_pch_refclk(struct drm_i915_private *dev_priv)
 {
 	struct intel_encoder *encoder;
-	bool has_vga = false;
+	bool pch_ssc_in_use = false;
+	bool has_fdi = false;
 
 	for_each_intel_encoder(&dev_priv->drm, encoder) {
 		switch (encoder->type) {
 		case INTEL_OUTPUT_ANALOG:
-			has_vga = true;
+			has_fdi = true;
 			break;
 		default:
 			break;
 		}
 	}
 
-	if (has_vga) {
+	/*
+	 * The BIOS may have decided to use the PCH SSC
+	 * reference so we must not disable it until the
+	 * relevant PLLs have stopped relying on it. We'll
+	 * just leave the PCH SSC reference enabled in case
+	 * any active PLL is using it. It will get disabled
+	 * after runtime suspend if we don't have FDI.
+	 *
+	 * TODO: Move the whole reference clock handling
+	 * to the modeset sequence proper so that we can
+	 * actually enable/disable/reconfigure these things
+	 * safely. To do that we need to introduce a real
+	 * clock hierarchy. That would also allow us to do
+	 * clock bending finally.
+	 */
+	if (spll_uses_pch_ssc(dev_priv)) {
+		DRM_DEBUG_KMS("SPLL using PCH SSC\n");
+		pch_ssc_in_use = true;
+	}
+
+	if (wrpll_uses_pch_ssc(dev_priv, DPLL_ID_WRPLL1)) {
+		DRM_DEBUG_KMS("WRPLL1 using PCH SSC\n");
+		pch_ssc_in_use = true;
+	}
+
+	if (wrpll_uses_pch_ssc(dev_priv, DPLL_ID_WRPLL2)) {
+		DRM_DEBUG_KMS("WRPLL2 using PCH SSC\n");
+		pch_ssc_in_use = true;
+	}
+
+	if (pch_ssc_in_use)
+		return;
+
+	if (has_fdi) {
 		lpt_bend_clkout_dp(dev_priv, 0);
 		lpt_enable_clkout_dp(dev_priv, true, true);
 	} else {
@@ -8833,44 +9297,68 @@ static void haswell_set_pipeconf(const struct intel_crtc_state *crtc_state)
 	POSTING_READ(PIPECONF(cpu_transcoder));
 }
 
-static void haswell_set_pipemisc(const struct intel_crtc_state *crtc_state)
+static void bdw_set_pipemisc(const struct intel_crtc_state *crtc_state)
 {
-	struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->base.crtc);
-	struct drm_i915_private *dev_priv = to_i915(intel_crtc->base.dev);
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
+	u32 val = 0;
 
-	if (IS_BROADWELL(dev_priv) || INTEL_GEN(dev_priv) >= 9) {
-		u32 val = 0;
+	switch (crtc_state->pipe_bpp) {
+	case 18:
+		val |= PIPEMISC_DITHER_6_BPC;
+		break;
+	case 24:
+		val |= PIPEMISC_DITHER_8_BPC;
+		break;
+	case 30:
+		val |= PIPEMISC_DITHER_10_BPC;
+		break;
+	case 36:
+		val |= PIPEMISC_DITHER_12_BPC;
+		break;
+	default:
+		MISSING_CASE(crtc_state->pipe_bpp);
+		break;
+	}
 
-		switch (crtc_state->pipe_bpp) {
-		case 18:
-			val |= PIPEMISC_DITHER_6_BPC;
-			break;
-		case 24:
-			val |= PIPEMISC_DITHER_8_BPC;
-			break;
-		case 30:
-			val |= PIPEMISC_DITHER_10_BPC;
-			break;
-		case 36:
-			val |= PIPEMISC_DITHER_12_BPC;
-			break;
-		default:
-			/* Case prevented by pipe_config_set_bpp. */
-			BUG();
-		}
+	if (crtc_state->dither)
+		val |= PIPEMISC_DITHER_ENABLE | PIPEMISC_DITHER_TYPE_SP;
 
-		if (crtc_state->dither)
-			val |= PIPEMISC_DITHER_ENABLE | PIPEMISC_DITHER_TYPE_SP;
+	if (crtc_state->output_format == INTEL_OUTPUT_FORMAT_YCBCR420 ||
+	    crtc_state->output_format == INTEL_OUTPUT_FORMAT_YCBCR444)
+		val |= PIPEMISC_OUTPUT_COLORSPACE_YUV;
 
-		if (crtc_state->output_format == INTEL_OUTPUT_FORMAT_YCBCR420 ||
-		    crtc_state->output_format == INTEL_OUTPUT_FORMAT_YCBCR444)
-			val |= PIPEMISC_OUTPUT_COLORSPACE_YUV;
+	if (crtc_state->output_format == INTEL_OUTPUT_FORMAT_YCBCR420)
+		val |= PIPEMISC_YUV420_ENABLE |
+			PIPEMISC_YUV420_MODE_FULL_BLEND;
 
-		if (crtc_state->output_format == INTEL_OUTPUT_FORMAT_YCBCR420)
-			val |= PIPEMISC_YUV420_ENABLE |
-				PIPEMISC_YUV420_MODE_FULL_BLEND;
+	if (INTEL_GEN(dev_priv) >= 11 &&
+	    (crtc_state->active_planes & ~(icl_hdr_plane_mask() |
+					   BIT(PLANE_CURSOR))) == 0)
+		val |= PIPEMISC_HDR_MODE_PRECISION;
 
-		I915_WRITE(PIPEMISC(intel_crtc->pipe), val);
+	I915_WRITE(PIPEMISC(crtc->pipe), val);
+}
+
+int bdw_get_pipemisc_bpp(struct intel_crtc *crtc)
+{
+	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
+	u32 tmp;
+
+	tmp = I915_READ(PIPEMISC(crtc->pipe));
+
+	switch (tmp & PIPEMISC_DITHER_BPC_MASK) {
+	case PIPEMISC_DITHER_6_BPC:
+		return 18;
+	case PIPEMISC_DITHER_8_BPC:
+		return 24;
+	case PIPEMISC_DITHER_10_BPC:
+		return 30;
+	case PIPEMISC_DITHER_12_BPC:
+		return 36;
+	default:
+		MISSING_CASE(tmp);
+		return 0;
 	}
 }
 
@@ -9349,6 +9837,7 @@ static bool ironlake_get_pipe_config(struct intel_crtc *crtc,
 	pipe_config->csc_mode = I915_READ(PIPE_CSC_MODE(crtc->pipe));
 
 	i9xx_get_pipe_color_config(pipe_config);
+	intel_color_get_config(pipe_config);
 
 	if (I915_READ(PCH_TRANSCONF(crtc->pipe)) & TRANS_ENABLE) {
 		struct intel_shared_dpll *pll;
@@ -9405,228 +9894,6 @@ out:
 
 	return ret;
 }
-
-static void assert_can_disable_lcpll(struct drm_i915_private *dev_priv)
-{
-	struct drm_device *dev = &dev_priv->drm;
-	struct intel_crtc *crtc;
-
-	for_each_intel_crtc(dev, crtc)
-		I915_STATE_WARN(crtc->active, "CRTC for pipe %c enabled\n",
-		     pipe_name(crtc->pipe));
-
-	I915_STATE_WARN(I915_READ(HSW_PWR_WELL_CTL2),
-			"Display power well on\n");
-	I915_STATE_WARN(I915_READ(SPLL_CTL) & SPLL_PLL_ENABLE, "SPLL enabled\n");
-	I915_STATE_WARN(I915_READ(WRPLL_CTL(0)) & WRPLL_PLL_ENABLE, "WRPLL1 enabled\n");
-	I915_STATE_WARN(I915_READ(WRPLL_CTL(1)) & WRPLL_PLL_ENABLE, "WRPLL2 enabled\n");
-	I915_STATE_WARN(I915_READ(PP_STATUS(0)) & PP_ON, "Panel power on\n");
-	I915_STATE_WARN(I915_READ(BLC_PWM_CPU_CTL2) & BLM_PWM_ENABLE,
-	     "CPU PWM1 enabled\n");
-	if (IS_HASWELL(dev_priv))
-		I915_STATE_WARN(I915_READ(HSW_BLC_PWM2_CTL) & BLM_PWM_ENABLE,
-		     "CPU PWM2 enabled\n");
-	I915_STATE_WARN(I915_READ(BLC_PWM_PCH_CTL1) & BLM_PCH_PWM_ENABLE,
-	     "PCH PWM1 enabled\n");
-	I915_STATE_WARN(I915_READ(UTIL_PIN_CTL) & UTIL_PIN_ENABLE,
-	     "Utility pin enabled\n");
-	I915_STATE_WARN(I915_READ(PCH_GTC_CTL) & PCH_GTC_ENABLE, "PCH GTC enabled\n");
-
-	/*
-	 * In theory we can still leave IRQs enabled, as long as only the HPD
-	 * interrupts remain enabled. We used to check for that, but since it's
-	 * gen-specific and since we only disable LCPLL after we fully disable
-	 * the interrupts, the check below should be enough.
-	 */
-	I915_STATE_WARN(intel_irqs_enabled(dev_priv), "IRQs enabled\n");
-}
-
-static u32 hsw_read_dcomp(struct drm_i915_private *dev_priv)
-{
-	if (IS_HASWELL(dev_priv))
-		return I915_READ(D_COMP_HSW);
-	else
-		return I915_READ(D_COMP_BDW);
-}
-
-static void hsw_write_dcomp(struct drm_i915_private *dev_priv, u32 val)
-{
-	if (IS_HASWELL(dev_priv)) {
-		mutex_lock(&dev_priv->pcu_lock);
-		if (sandybridge_pcode_write(dev_priv, GEN6_PCODE_WRITE_D_COMP,
-					    val))
-			DRM_DEBUG_KMS("Failed to write to D_COMP\n");
-		mutex_unlock(&dev_priv->pcu_lock);
-	} else {
-		I915_WRITE(D_COMP_BDW, val);
-		POSTING_READ(D_COMP_BDW);
-	}
-}
-
-/*
- * This function implements pieces of two sequences from BSpec:
- * - Sequence for display software to disable LCPLL
- * - Sequence for display software to allow package C8+
- * The steps implemented here are just the steps that actually touch the LCPLL
- * register. Callers should take care of disabling all the display engine
- * functions, doing the mode unset, fixing interrupts, etc.
- */
-static void hsw_disable_lcpll(struct drm_i915_private *dev_priv,
-			      bool switch_to_fclk, bool allow_power_down)
-{
-	u32 val;
-
-	assert_can_disable_lcpll(dev_priv);
-
-	val = I915_READ(LCPLL_CTL);
-
-	if (switch_to_fclk) {
-		val |= LCPLL_CD_SOURCE_FCLK;
-		I915_WRITE(LCPLL_CTL, val);
-
-		if (wait_for_us(I915_READ(LCPLL_CTL) &
-				LCPLL_CD_SOURCE_FCLK_DONE, 1))
-			DRM_ERROR("Switching to FCLK failed\n");
-
-		val = I915_READ(LCPLL_CTL);
-	}
-
-	val |= LCPLL_PLL_DISABLE;
-	I915_WRITE(LCPLL_CTL, val);
-	POSTING_READ(LCPLL_CTL);
-
-	if (intel_wait_for_register(&dev_priv->uncore,
-				    LCPLL_CTL, LCPLL_PLL_LOCK, 0, 1))
-		DRM_ERROR("LCPLL still locked\n");
-
-	val = hsw_read_dcomp(dev_priv);
-	val |= D_COMP_COMP_DISABLE;
-	hsw_write_dcomp(dev_priv, val);
-	ndelay(100);
-
-	if (wait_for((hsw_read_dcomp(dev_priv) & D_COMP_RCOMP_IN_PROGRESS) == 0,
-		     1))
-		DRM_ERROR("D_COMP RCOMP still in progress\n");
-
-	if (allow_power_down) {
-		val = I915_READ(LCPLL_CTL);
-		val |= LCPLL_POWER_DOWN_ALLOW;
-		I915_WRITE(LCPLL_CTL, val);
-		POSTING_READ(LCPLL_CTL);
-	}
-}
-
-/*
- * Fully restores LCPLL, disallowing power down and switching back to LCPLL
- * source.
- */
-static void hsw_restore_lcpll(struct drm_i915_private *dev_priv)
-{
-	u32 val;
-
-	val = I915_READ(LCPLL_CTL);
-
-	if ((val & (LCPLL_PLL_LOCK | LCPLL_PLL_DISABLE | LCPLL_CD_SOURCE_FCLK |
-		    LCPLL_POWER_DOWN_ALLOW)) == LCPLL_PLL_LOCK)
-		return;
-
-	/*
-	 * Make sure we're not on PC8 state before disabling PC8, otherwise
-	 * we'll hang the machine. To prevent PC8 state, just enable force_wake.
-	 */
-	intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL);
-
-	if (val & LCPLL_POWER_DOWN_ALLOW) {
-		val &= ~LCPLL_POWER_DOWN_ALLOW;
-		I915_WRITE(LCPLL_CTL, val);
-		POSTING_READ(LCPLL_CTL);
-	}
-
-	val = hsw_read_dcomp(dev_priv);
-	val |= D_COMP_COMP_FORCE;
-	val &= ~D_COMP_COMP_DISABLE;
-	hsw_write_dcomp(dev_priv, val);
-
-	val = I915_READ(LCPLL_CTL);
-	val &= ~LCPLL_PLL_DISABLE;
-	I915_WRITE(LCPLL_CTL, val);
-
-	if (intel_wait_for_register(&dev_priv->uncore,
-				    LCPLL_CTL, LCPLL_PLL_LOCK, LCPLL_PLL_LOCK,
-				    5))
-		DRM_ERROR("LCPLL not locked yet\n");
-
-	if (val & LCPLL_CD_SOURCE_FCLK) {
-		val = I915_READ(LCPLL_CTL);
-		val &= ~LCPLL_CD_SOURCE_FCLK;
-		I915_WRITE(LCPLL_CTL, val);
-
-		if (wait_for_us((I915_READ(LCPLL_CTL) &
-				 LCPLL_CD_SOURCE_FCLK_DONE) == 0, 1))
-			DRM_ERROR("Switching back to LCPLL failed\n");
-	}
-
-	intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
-
-	intel_update_cdclk(dev_priv);
-	intel_dump_cdclk_state(&dev_priv->cdclk.hw, "Current CDCLK");
-}
-
-/*
- * Package states C8 and deeper are really deep PC states that can only be
- * reached when all the devices on the system allow it, so even if the graphics
- * device allows PC8+, it doesn't mean the system will actually get to these
- * states. Our driver only allows PC8+ when going into runtime PM.
- *
- * The requirements for PC8+ are that all the outputs are disabled, the power
- * well is disabled and most interrupts are disabled, and these are also
- * requirements for runtime PM. When these conditions are met, we manually do
- * the other conditions: disable the interrupts, clocks and switch LCPLL refclk
- * to Fclk. If we're in PC8+ and we get an non-hotplug interrupt, we can hard
- * hang the machine.
- *
- * When we really reach PC8 or deeper states (not just when we allow it) we lose
- * the state of some registers, so when we come back from PC8+ we need to
- * restore this state. We don't get into PC8+ if we're not in RC6, so we don't
- * need to take care of the registers kept by RC6. Notice that this happens even
- * if we don't put the device in PCI D3 state (which is what currently happens
- * because of the runtime PM support).
- *
- * For more, read "Display Sequences for Package C8" on the hardware
- * documentation.
- */
-void hsw_enable_pc8(struct drm_i915_private *dev_priv)
-{
-	u32 val;
-
-	DRM_DEBUG_KMS("Enabling package C8+\n");
-
-	if (HAS_PCH_LPT_LP(dev_priv)) {
-		val = I915_READ(SOUTH_DSPCLK_GATE_D);
-		val &= ~PCH_LP_PARTITION_LEVEL_DISABLE;
-		I915_WRITE(SOUTH_DSPCLK_GATE_D, val);
-	}
-
-	lpt_disable_clkout_dp(dev_priv);
-	hsw_disable_lcpll(dev_priv, true, true);
-}
-
-void hsw_disable_pc8(struct drm_i915_private *dev_priv)
-{
-	u32 val;
-
-	DRM_DEBUG_KMS("Disabling package C8+\n");
-
-	hsw_restore_lcpll(dev_priv);
-	lpt_init_pch_refclk(dev_priv);
-
-	if (HAS_PCH_LPT_LP(dev_priv)) {
-		val = I915_READ(SOUTH_DSPCLK_GATE_D);
-		val |= PCH_LP_PARTITION_LEVEL_DISABLE;
-		I915_WRITE(SOUTH_DSPCLK_GATE_D, val);
-	}
-}
-
 static int haswell_crtc_compute_clock(struct intel_crtc *crtc,
 				      struct intel_crtc_state *crtc_state)
 {
@@ -9797,6 +10064,7 @@ static bool hsw_get_transcoder_state(struct intel_crtc *crtc,
 	for_each_set_bit(panel_transcoder,
 			 &panel_transcoder_mask,
 			 ARRAY_SIZE(INTEL_INFO(dev_priv)->trans_offsets)) {
+		bool force_thru = false;
 		enum pipe trans_pipe;
 
 		tmp = I915_READ(TRANS_DDI_FUNC_CTL(panel_transcoder));
@@ -9818,6 +10086,8 @@ static bool hsw_get_transcoder_state(struct intel_crtc *crtc,
 			     transcoder_name(panel_transcoder));
 			/* fall through */
 		case TRANS_DDI_EDP_INPUT_A_ONOFF:
+			force_thru = true;
+			/* fall through */
 		case TRANS_DDI_EDP_INPUT_A_ON:
 			trans_pipe = PIPE_A;
 			break;
@@ -9829,8 +10099,10 @@ static bool hsw_get_transcoder_state(struct intel_crtc *crtc,
 			break;
 		}
 
-		if (trans_pipe == crtc->pipe)
+		if (trans_pipe == crtc->pipe) {
 			pipe_config->cpu_transcoder = panel_transcoder;
+			pipe_config->pch_pfit.force_thru = force_thru;
+		}
 	}
 
 	/*
@@ -10014,6 +10286,8 @@ static bool haswell_get_pipe_config(struct intel_crtc *crtc,
 		i9xx_get_pipe_color_config(pipe_config);
 	}
 
+	intel_color_get_config(pipe_config);
+
 	power_domain = POWER_DOMAIN_PIPE_PANEL_FITTER(crtc->pipe);
 	WARN_ON(power_domain_mask & BIT_ULL(power_domain));
 
@@ -10115,19 +10389,17 @@ static bool intel_cursor_size_ok(const struct intel_plane_state *plane_state)
 
 static int intel_cursor_check_surface(struct intel_plane_state *plane_state)
 {
-	const struct drm_framebuffer *fb = plane_state->base.fb;
-	unsigned int rotation = plane_state->base.rotation;
 	int src_x, src_y;
 	u32 offset;
 	int ret;
 
-	intel_fill_fb_ggtt_view(&plane_state->view, fb, rotation);
-	plane_state->color_plane[0].stride = intel_fb_pitch(fb, 0, rotation);
-
-	ret = intel_plane_check_stride(plane_state);
+	ret = intel_plane_compute_gtt(plane_state);
 	if (ret)
 		return ret;
 
+	if (!plane_state->base.visible)
+		return 0;
+
 	src_x = plane_state->base.src_x >> 16;
 	src_y = plane_state->base.src_y >> 16;
 
@@ -10164,6 +10436,10 @@ static int intel_check_cursor(struct intel_crtc_state *crtc_state,
 	if (ret)
 		return ret;
 
+	ret = intel_cursor_check_surface(plane_state);
+	if (ret)
+		return ret;
+
 	if (!plane_state->base.visible)
 		return 0;
 
@@ -10171,10 +10447,6 @@ static int intel_check_cursor(struct intel_crtc_state *crtc_state,
 	if (ret)
 		return ret;
 
-	ret = intel_cursor_check_surface(plane_state);
-	if (ret)
-		return ret;
-
 	return 0;
 }
 
@@ -11101,6 +11373,7 @@ int intel_plane_atomic_calc_changes(const struct intel_crtc_state *old_crtc_stat
 	if (!is_crtc_enabled) {
 		plane_state->visible = visible = false;
 		to_intel_crtc_state(crtc_state)->active_planes &= ~BIT(plane->id);
+		to_intel_crtc_state(crtc_state)->data_rate[plane->id] = 0;
 	}
 
 	if (!was_visible && !visible)
@@ -11316,6 +11589,17 @@ static int icl_check_nv12_planes(struct intel_crtc_state *crtc_state)
 	return 0;
 }
 
+static bool c8_planes_changed(const struct intel_crtc_state *new_crtc_state)
+{
+	struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->base.crtc);
+	struct intel_atomic_state *state =
+		to_intel_atomic_state(new_crtc_state->base.state);
+	const struct intel_crtc_state *old_crtc_state =
+		intel_atomic_get_old_crtc_state(state, crtc);
+
+	return !old_crtc_state->c8_planes != !new_crtc_state->c8_planes;
+}
+
 static int intel_crtc_atomic_check(struct drm_crtc *crtc,
 				   struct drm_crtc_state *crtc_state)
 {
@@ -11339,6 +11623,13 @@ static int intel_crtc_atomic_check(struct drm_crtc *crtc,
 			return ret;
 	}
 
+	/*
+	 * May need to update pipe gamma enable bits
+	 * when C8 planes are getting enabled/disabled.
+	 */
+	if (c8_planes_changed(pipe_config))
+		crtc_state->color_mgmt_changed = true;
+
 	if (mode_changed || pipe_config->update_pipe ||
 	    crtc_state->color_mgmt_changed) {
 		ret = intel_color_check(pipe_config);
@@ -11496,17 +11787,19 @@ compute_baseline_pipe_bpp(struct intel_crtc *crtc,
 static void intel_dump_crtc_timings(const struct drm_display_mode *mode)
 {
 	DRM_DEBUG_KMS("crtc timings: %d %d %d %d %d %d %d %d %d, "
-			"type: 0x%x flags: 0x%x\n",
-		mode->crtc_clock,
-		mode->crtc_hdisplay, mode->crtc_hsync_start,
-		mode->crtc_hsync_end, mode->crtc_htotal,
-		mode->crtc_vdisplay, mode->crtc_vsync_start,
-		mode->crtc_vsync_end, mode->crtc_vtotal, mode->type, mode->flags);
+		      "type: 0x%x flags: 0x%x\n",
+		      mode->crtc_clock,
+		      mode->crtc_hdisplay, mode->crtc_hsync_start,
+		      mode->crtc_hsync_end, mode->crtc_htotal,
+		      mode->crtc_vdisplay, mode->crtc_vsync_start,
+		      mode->crtc_vsync_end, mode->crtc_vtotal,
+		      mode->type, mode->flags);
 }
 
 static inline void
-intel_dump_m_n_config(struct intel_crtc_state *pipe_config, char *id,
-		      unsigned int lane_count, struct intel_link_m_n *m_n)
+intel_dump_m_n_config(const struct intel_crtc_state *pipe_config,
+		      const char *id, unsigned int lane_count,
+		      const struct intel_link_m_n *m_n)
 {
 	DRM_DEBUG_KMS("%s: lanes: %i; gmch_m: %u, gmch_n: %u, link_m: %u, link_n: %u, tu: %u\n",
 		      id, lane_count,
@@ -11584,26 +11877,54 @@ static const char *output_formats(enum intel_output_format format)
 	return output_format_str[format];
 }
 
-static void intel_dump_pipe_config(struct intel_crtc *crtc,
-				   struct intel_crtc_state *pipe_config,
+static void intel_dump_plane_state(const struct intel_plane_state *plane_state)
+{
+	struct intel_plane *plane = to_intel_plane(plane_state->base.plane);
+	const struct drm_framebuffer *fb = plane_state->base.fb;
+	struct drm_format_name_buf format_name;
+
+	if (!fb) {
+		DRM_DEBUG_KMS("[PLANE:%d:%s] fb: [NOFB], visible: %s\n",
+			      plane->base.base.id, plane->base.name,
+			      yesno(plane_state->base.visible));
+		return;
+	}
+
+	DRM_DEBUG_KMS("[PLANE:%d:%s] fb: [FB:%d] %ux%u format = %s, visible: %s\n",
+		      plane->base.base.id, plane->base.name,
+		      fb->base.id, fb->width, fb->height,
+		      drm_get_format_name(fb->format->format, &format_name),
+		      yesno(plane_state->base.visible));
+	DRM_DEBUG_KMS("\trotation: 0x%x, scaler: %d\n",
+		      plane_state->base.rotation, plane_state->scaler_id);
+	if (plane_state->base.visible)
+		DRM_DEBUG_KMS("\tsrc: " DRM_RECT_FP_FMT " dst: " DRM_RECT_FMT "\n",
+			      DRM_RECT_FP_ARG(&plane_state->base.src),
+			      DRM_RECT_ARG(&plane_state->base.dst));
+}
+
+static void intel_dump_pipe_config(const struct intel_crtc_state *pipe_config,
+				   struct intel_atomic_state *state,
 				   const char *context)
 {
-	struct drm_device *dev = crtc->base.dev;
-	struct drm_i915_private *dev_priv = to_i915(dev);
-	struct drm_plane *plane;
-	struct intel_plane *intel_plane;
-	struct intel_plane_state *state;
-	struct drm_framebuffer *fb;
+	struct intel_crtc *crtc = to_intel_crtc(pipe_config->base.crtc);
+	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
+	const struct intel_plane_state *plane_state;
+	struct intel_plane *plane;
 	char buf[64];
+	int i;
 
-	DRM_DEBUG_KMS("[CRTC:%d:%s]%s\n",
-		      crtc->base.base.id, crtc->base.name, context);
+	DRM_DEBUG_KMS("[CRTC:%d:%s] enable: %s %s\n",
+		      crtc->base.base.id, crtc->base.name,
+		      yesno(pipe_config->base.enable), context);
 
-	snprintf_output_types(buf, sizeof(buf), pipe_config->output_types);
-	DRM_DEBUG_KMS("output_types: %s (0x%x)\n",
-		      buf, pipe_config->output_types);
+	if (!pipe_config->base.enable)
+		goto dump_planes;
 
-	DRM_DEBUG_KMS("output format: %s\n",
+	snprintf_output_types(buf, sizeof(buf), pipe_config->output_types);
+	DRM_DEBUG_KMS("active: %s, output_types: %s (0x%x), output format: %s\n",
+		      yesno(pipe_config->base.active),
+		      buf, pipe_config->output_types,
 		      output_formats(pipe_config->output_format));
 
 	DRM_DEBUG_KMS("cpu_transcoder: %s, pipe bpp: %i, dithering: %i\n",
@@ -11624,10 +11945,8 @@ static void intel_dump_pipe_config(struct intel_crtc *crtc,
 					      &pipe_config->dp_m2_n2);
 	}
 
-	DRM_DEBUG_KMS("audio: %i, infoframes: %i\n",
-		      pipe_config->has_audio, pipe_config->has_infoframe);
-
-	DRM_DEBUG_KMS("infoframes enabled: 0x%x\n",
+	DRM_DEBUG_KMS("audio: %i, infoframes: %i, infoframes enabled: 0x%x\n",
+		      pipe_config->has_audio, pipe_config->has_infoframe,
 		      pipe_config->infoframes.enable);
 
 	if (pipe_config->infoframes.enable &
@@ -11665,51 +11984,30 @@ static void intel_dump_pipe_config(struct intel_crtc *crtc,
 			      pipe_config->gmch_pfit.pgm_ratios,
 			      pipe_config->gmch_pfit.lvds_border_bits);
 	else
-		DRM_DEBUG_KMS("pch pfit: pos: 0x%08x, size: 0x%08x, %s\n",
+		DRM_DEBUG_KMS("pch pfit: pos: 0x%08x, size: 0x%08x, %s, force thru: %s\n",
 			      pipe_config->pch_pfit.pos,
 			      pipe_config->pch_pfit.size,
-		              enableddisabled(pipe_config->pch_pfit.enabled));
+			      enableddisabled(pipe_config->pch_pfit.enabled),
+			      yesno(pipe_config->pch_pfit.force_thru));
 
 	DRM_DEBUG_KMS("ips: %i, double wide: %i\n",
 		      pipe_config->ips_enabled, pipe_config->double_wide);
 
 	intel_dpll_dump_hw_state(dev_priv, &pipe_config->dpll_hw_state);
 
-	DRM_DEBUG_KMS("planes on this crtc\n");
-	list_for_each_entry(plane, &dev->mode_config.plane_list, head) {
-		struct drm_format_name_buf format_name;
-		intel_plane = to_intel_plane(plane);
-		if (intel_plane->pipe != crtc->pipe)
-			continue;
-
-		state = to_intel_plane_state(plane->state);
-		fb = state->base.fb;
-		if (!fb) {
-			DRM_DEBUG_KMS("[PLANE:%d:%s] disabled, scaler_id = %d\n",
-				      plane->base.id, plane->name, state->scaler_id);
-			continue;
-		}
+dump_planes:
+	if (!state)
+		return;
 
-		DRM_DEBUG_KMS("[PLANE:%d:%s] FB:%d, fb = %ux%u format = %s\n",
-			      plane->base.id, plane->name,
-			      fb->base.id, fb->width, fb->height,
-			      drm_get_format_name(fb->format->format, &format_name));
-		if (INTEL_GEN(dev_priv) >= 9)
-			DRM_DEBUG_KMS("\tscaler:%d src %dx%d+%d+%d dst %dx%d+%d+%d\n",
-				      state->scaler_id,
-				      state->base.src.x1 >> 16,
-				      state->base.src.y1 >> 16,
-				      drm_rect_width(&state->base.src) >> 16,
-				      drm_rect_height(&state->base.src) >> 16,
-				      state->base.dst.x1, state->base.dst.y1,
-				      drm_rect_width(&state->base.dst),
-				      drm_rect_height(&state->base.dst));
+	for_each_new_intel_plane_in_state(state, plane, plane_state, i) {
+		if (plane->pipe == crtc->pipe)
+			intel_dump_plane_state(plane_state);
 	}
 }
 
-static bool check_digital_port_conflicts(struct drm_atomic_state *state)
+static bool check_digital_port_conflicts(struct intel_atomic_state *state)
 {
-	struct drm_device *dev = state->dev;
+	struct drm_device *dev = state->base.dev;
 	struct drm_connector *connector;
 	struct drm_connector_list_iter conn_iter;
 	unsigned int used_ports = 0;
@@ -11726,7 +12024,9 @@ static bool check_digital_port_conflicts(struct drm_atomic_state *state)
 		struct drm_connector_state *connector_state;
 		struct intel_encoder *encoder;
 
-		connector_state = drm_atomic_get_new_connector_state(state, connector);
+		connector_state =
+			drm_atomic_get_new_connector_state(&state->base,
+							   connector);
 		if (!connector_state)
 			connector_state = connector->state;
 
@@ -11790,7 +12090,6 @@ clear_intel_crtc_state(struct intel_crtc_state *crtc_state)
 	saved_state->scaler_state = crtc_state->scaler_state;
 	saved_state->shared_dpll = crtc_state->shared_dpll;
 	saved_state->dpll_hw_state = crtc_state->dpll_hw_state;
-	saved_state->pch_pfit.force_thru = crtc_state->pch_pfit.force_thru;
 	saved_state->crc_enabled = crtc_state->crc_enabled;
 	if (IS_G4X(dev_priv) ||
 	    IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
@@ -11806,9 +12105,9 @@ clear_intel_crtc_state(struct intel_crtc_state *crtc_state)
 }
 
 static int
-intel_modeset_pipe_config(struct drm_crtc *crtc,
-			  struct intel_crtc_state *pipe_config)
+intel_modeset_pipe_config(struct intel_crtc_state *pipe_config)
 {
+	struct drm_crtc *crtc = pipe_config->base.crtc;
 	struct drm_atomic_state *state = pipe_config->base.state;
 	struct intel_encoder *encoder;
 	struct drm_connector *connector;
@@ -11942,7 +12241,7 @@ encoder_retry:
 	return 0;
 }
 
-static bool intel_fuzzy_clock_check(int clock1, int clock2)
+bool intel_fuzzy_clock_check(int clock1, int clock2)
 {
 	int diff;
 
@@ -11993,21 +12292,14 @@ intel_compare_m_n(unsigned int m, unsigned int n,
 
 static bool
 intel_compare_link_m_n(const struct intel_link_m_n *m_n,
-		       struct intel_link_m_n *m2_n2,
-		       bool adjust)
+		       const struct intel_link_m_n *m2_n2,
+		       bool exact)
 {
-	if (m_n->tu == m2_n2->tu &&
-	    intel_compare_m_n(m_n->gmch_m, m_n->gmch_n,
-			      m2_n2->gmch_m, m2_n2->gmch_n, !adjust) &&
-	    intel_compare_m_n(m_n->link_m, m_n->link_n,
-			      m2_n2->link_m, m2_n2->link_n, !adjust)) {
-		if (adjust)
-			*m2_n2 = *m_n;
-
-		return true;
-	}
-
-	return false;
+	return m_n->tu == m2_n2->tu &&
+		intel_compare_m_n(m_n->gmch_m, m_n->gmch_n,
+				  m2_n2->gmch_m, m2_n2->gmch_n, exact) &&
+		intel_compare_m_n(m_n->link_m, m_n->link_n,
+				  m2_n2->link_m, m2_n2->link_n, exact);
 }
 
 static bool
@@ -12018,16 +12310,16 @@ intel_compare_infoframe(const union hdmi_infoframe *a,
 }
 
 static void
-pipe_config_infoframe_err(struct drm_i915_private *dev_priv,
-			  bool adjust, const char *name,
-			  const union hdmi_infoframe *a,
-			  const union hdmi_infoframe *b)
+pipe_config_infoframe_mismatch(struct drm_i915_private *dev_priv,
+			       bool fastset, const char *name,
+			       const union hdmi_infoframe *a,
+			       const union hdmi_infoframe *b)
 {
-	if (adjust) {
+	if (fastset) {
 		if ((drm_debug & DRM_UT_KMS) == 0)
 			return;
 
-		drm_dbg(DRM_UT_KMS, "mismatch in %s infoframe", name);
+		drm_dbg(DRM_UT_KMS, "fastset mismatch in %s infoframe", name);
 		drm_dbg(DRM_UT_KMS, "expected:");
 		hdmi_infoframe_log(KERN_DEBUG, dev_priv->drm.dev, a);
 		drm_dbg(DRM_UT_KMS, "found");
@@ -12042,7 +12334,7 @@ pipe_config_infoframe_err(struct drm_i915_private *dev_priv,
 }
 
 static void __printf(3, 4)
-pipe_config_err(bool adjust, const char *name, const char *format, ...)
+pipe_config_mismatch(bool fastset, const char *name, const char *format, ...)
 {
 	struct va_format vaf;
 	va_list args;
@@ -12051,8 +12343,8 @@ pipe_config_err(bool adjust, const char *name, const char *format, ...)
 	vaf.fmt = format;
 	vaf.va = &args;
 
-	if (adjust)
-		drm_dbg(DRM_UT_KMS, "mismatch in %s %pV", name, &vaf);
+	if (fastset)
+		drm_dbg(DRM_UT_KMS, "fastset mismatch in %s %pV", name, &vaf);
 	else
 		drm_err("mismatch in %s %pV", name, &vaf);
 
@@ -12077,14 +12369,13 @@ static bool fastboot_enabled(struct drm_i915_private *dev_priv)
 }
 
 static bool
-intel_pipe_config_compare(struct drm_i915_private *dev_priv,
-			  struct intel_crtc_state *current_config,
-			  struct intel_crtc_state *pipe_config,
-			  bool adjust)
+intel_pipe_config_compare(const struct intel_crtc_state *current_config,
+			  const struct intel_crtc_state *pipe_config,
+			  bool fastset)
 {
-	struct intel_crtc *crtc = to_intel_crtc(current_config->base.crtc);
+	struct drm_i915_private *dev_priv = to_i915(current_config->base.crtc->dev);
 	bool ret = true;
-	bool fixup_inherited = adjust &&
+	bool fixup_inherited = fastset &&
 		(current_config->base.mode.private_flags & I915_MODE_FLAG_INHERITED) &&
 		!(pipe_config->base.mode.private_flags & I915_MODE_FLAG_INHERITED);
 
@@ -12095,30 +12386,30 @@ intel_pipe_config_compare(struct drm_i915_private *dev_priv,
 
 #define PIPE_CONF_CHECK_X(name) do { \
 	if (current_config->name != pipe_config->name) { \
-		pipe_config_err(adjust, __stringify(name), \
-			  "(expected 0x%08x, found 0x%08x)\n", \
-			  current_config->name, \
-			  pipe_config->name); \
+		pipe_config_mismatch(fastset, __stringify(name), \
+				     "(expected 0x%08x, found 0x%08x)\n", \
+				     current_config->name, \
+				     pipe_config->name); \
 		ret = false; \
 	} \
 } while (0)
 
 #define PIPE_CONF_CHECK_I(name) do { \
 	if (current_config->name != pipe_config->name) { \
-		pipe_config_err(adjust, __stringify(name), \
-			  "(expected %i, found %i)\n", \
-			  current_config->name, \
-			  pipe_config->name); \
+		pipe_config_mismatch(fastset, __stringify(name), \
+				     "(expected %i, found %i)\n", \
+				     current_config->name, \
+				     pipe_config->name); \
 		ret = false; \
 	} \
 } while (0)
 
 #define PIPE_CONF_CHECK_BOOL(name) do { \
 	if (current_config->name != pipe_config->name) { \
-		pipe_config_err(adjust, __stringify(name), \
-			  "(expected %s, found %s)\n", \
-			  yesno(current_config->name), \
-			  yesno(pipe_config->name)); \
+		pipe_config_mismatch(fastset, __stringify(name), \
+				     "(expected %s, found %s)\n", \
+				     yesno(current_config->name), \
+				     yesno(pipe_config->name)); \
 		ret = false; \
 	} \
 } while (0)
@@ -12132,20 +12423,20 @@ intel_pipe_config_compare(struct drm_i915_private *dev_priv,
 	if (!fixup_inherited || (!current_config->name && !pipe_config->name)) { \
 		PIPE_CONF_CHECK_BOOL(name); \
 	} else { \
-		pipe_config_err(adjust, __stringify(name), \
-			  "unable to verify whether state matches exactly, forcing modeset (expected %s, found %s)\n", \
-			  yesno(current_config->name), \
-			  yesno(pipe_config->name)); \
+		pipe_config_mismatch(fastset, __stringify(name), \
+				     "unable to verify whether state matches exactly, forcing modeset (expected %s, found %s)\n", \
+				     yesno(current_config->name), \
+				     yesno(pipe_config->name)); \
 		ret = false; \
 	} \
 } while (0)
 
 #define PIPE_CONF_CHECK_P(name) do { \
 	if (current_config->name != pipe_config->name) { \
-		pipe_config_err(adjust, __stringify(name), \
-			  "(expected %p, found %p)\n", \
-			  current_config->name, \
-			  pipe_config->name); \
+		pipe_config_mismatch(fastset, __stringify(name), \
+				     "(expected %p, found %p)\n", \
+				     current_config->name, \
+				     pipe_config->name); \
 		ret = false; \
 	} \
 } while (0)
@@ -12153,20 +12444,20 @@ intel_pipe_config_compare(struct drm_i915_private *dev_priv,
 #define PIPE_CONF_CHECK_M_N(name) do { \
 	if (!intel_compare_link_m_n(&current_config->name, \
 				    &pipe_config->name,\
-				    adjust)) { \
-		pipe_config_err(adjust, __stringify(name), \
-			  "(expected tu %i gmch %i/%i link %i/%i, " \
-			  "found tu %i, gmch %i/%i link %i/%i)\n", \
-			  current_config->name.tu, \
-			  current_config->name.gmch_m, \
-			  current_config->name.gmch_n, \
-			  current_config->name.link_m, \
-			  current_config->name.link_n, \
-			  pipe_config->name.tu, \
-			  pipe_config->name.gmch_m, \
-			  pipe_config->name.gmch_n, \
-			  pipe_config->name.link_m, \
-			  pipe_config->name.link_n); \
+				    !fastset)) { \
+		pipe_config_mismatch(fastset, __stringify(name), \
+				     "(expected tu %i gmch %i/%i link %i/%i, " \
+				     "found tu %i, gmch %i/%i link %i/%i)\n", \
+				     current_config->name.tu, \
+				     current_config->name.gmch_m, \
+				     current_config->name.gmch_n, \
+				     current_config->name.link_m, \
+				     current_config->name.link_n, \
+				     pipe_config->name.tu, \
+				     pipe_config->name.gmch_m, \
+				     pipe_config->name.gmch_n, \
+				     pipe_config->name.link_m, \
+				     pipe_config->name.link_n); \
 		ret = false; \
 	} \
 } while (0)
@@ -12178,49 +12469,49 @@ intel_pipe_config_compare(struct drm_i915_private *dev_priv,
  */
 #define PIPE_CONF_CHECK_M_N_ALT(name, alt_name) do { \
 	if (!intel_compare_link_m_n(&current_config->name, \
-				    &pipe_config->name, adjust) && \
+				    &pipe_config->name, !fastset) && \
 	    !intel_compare_link_m_n(&current_config->alt_name, \
-				    &pipe_config->name, adjust)) { \
-		pipe_config_err(adjust, __stringify(name), \
-			  "(expected tu %i gmch %i/%i link %i/%i, " \
-			  "or tu %i gmch %i/%i link %i/%i, " \
-			  "found tu %i, gmch %i/%i link %i/%i)\n", \
-			  current_config->name.tu, \
-			  current_config->name.gmch_m, \
-			  current_config->name.gmch_n, \
-			  current_config->name.link_m, \
-			  current_config->name.link_n, \
-			  current_config->alt_name.tu, \
-			  current_config->alt_name.gmch_m, \
-			  current_config->alt_name.gmch_n, \
-			  current_config->alt_name.link_m, \
-			  current_config->alt_name.link_n, \
-			  pipe_config->name.tu, \
-			  pipe_config->name.gmch_m, \
-			  pipe_config->name.gmch_n, \
-			  pipe_config->name.link_m, \
-			  pipe_config->name.link_n); \
+				    &pipe_config->name, !fastset)) { \
+		pipe_config_mismatch(fastset, __stringify(name), \
+				     "(expected tu %i gmch %i/%i link %i/%i, " \
+				     "or tu %i gmch %i/%i link %i/%i, " \
+				     "found tu %i, gmch %i/%i link %i/%i)\n", \
+				     current_config->name.tu, \
+				     current_config->name.gmch_m, \
+				     current_config->name.gmch_n, \
+				     current_config->name.link_m, \
+				     current_config->name.link_n, \
+				     current_config->alt_name.tu, \
+				     current_config->alt_name.gmch_m, \
+				     current_config->alt_name.gmch_n, \
+				     current_config->alt_name.link_m, \
+				     current_config->alt_name.link_n, \
+				     pipe_config->name.tu, \
+				     pipe_config->name.gmch_m, \
+				     pipe_config->name.gmch_n, \
+				     pipe_config->name.link_m, \
+				     pipe_config->name.link_n); \
 		ret = false; \
 	} \
 } while (0)
 
 #define PIPE_CONF_CHECK_FLAGS(name, mask) do { \
 	if ((current_config->name ^ pipe_config->name) & (mask)) { \
-		pipe_config_err(adjust, __stringify(name), \
-			  "(%x) (expected %i, found %i)\n", \
-			  (mask), \
-			  current_config->name & (mask), \
-			  pipe_config->name & (mask)); \
+		pipe_config_mismatch(fastset, __stringify(name), \
+				     "(%x) (expected %i, found %i)\n", \
+				     (mask), \
+				     current_config->name & (mask), \
+				     pipe_config->name & (mask)); \
 		ret = false; \
 	} \
 } while (0)
 
 #define PIPE_CONF_CHECK_CLOCK_FUZZY(name) do { \
 	if (!intel_fuzzy_clock_check(current_config->name, pipe_config->name)) { \
-		pipe_config_err(adjust, __stringify(name), \
-			  "(expected %i, found %i)\n", \
-			  current_config->name, \
-			  pipe_config->name); \
+		pipe_config_mismatch(fastset, __stringify(name), \
+				     "(expected %i, found %i)\n", \
+				     current_config->name, \
+				     pipe_config->name); \
 		ret = false; \
 	} \
 } while (0)
@@ -12228,9 +12519,9 @@ intel_pipe_config_compare(struct drm_i915_private *dev_priv,
 #define PIPE_CONF_CHECK_INFOFRAME(name) do { \
 	if (!intel_compare_infoframe(&current_config->infoframes.name, \
 				     &pipe_config->infoframes.name)) { \
-		pipe_config_infoframe_err(dev_priv, adjust, __stringify(name), \
-					  &current_config->infoframes.name, \
-					  &pipe_config->infoframes.name); \
+		pipe_config_infoframe_mismatch(dev_priv, fastset, __stringify(name), \
+					       &current_config->infoframes.name, \
+					       &pipe_config->infoframes.name); \
 		ret = false; \
 	} \
 } while (0)
@@ -12280,7 +12571,7 @@ intel_pipe_config_compare(struct drm_i915_private *dev_priv,
 
 	PIPE_CONF_CHECK_BOOL(hdmi_scrambling);
 	PIPE_CONF_CHECK_BOOL(hdmi_high_tmds_clock_ratio);
-	PIPE_CONF_CHECK_BOOL_INCOMPLETE(has_infoframe);
+	PIPE_CONF_CHECK_BOOL(has_infoframe);
 
 	PIPE_CONF_CHECK_BOOL_INCOMPLETE(has_audio);
 
@@ -12308,11 +12599,9 @@ intel_pipe_config_compare(struct drm_i915_private *dev_priv,
 	 * Changing the EDP transcoder input mux
 	 * (A_ONOFF vs. A_ON) requires a full modeset.
 	 */
-	if (IS_HASWELL(dev_priv) && crtc->pipe == PIPE_A &&
-	    current_config->cpu_transcoder == TRANSCODER_EDP)
-		PIPE_CONF_CHECK_BOOL(pch_pfit.enabled);
+	PIPE_CONF_CHECK_BOOL(pch_pfit.force_thru);
 
-	if (!adjust) {
+	if (!fastset) {
 		PIPE_CONF_CHECK_I(pipe_src_w);
 		PIPE_CONF_CHECK_I(pipe_src_h);
 
@@ -12385,6 +12674,7 @@ intel_pipe_config_compare(struct drm_i915_private *dev_priv,
 	PIPE_CONF_CHECK_INFOFRAME(avi);
 	PIPE_CONF_CHECK_INFOFRAME(spd);
 	PIPE_CONF_CHECK_INFOFRAME(hdmi);
+	PIPE_CONF_CHECK_INFOFRAME(drm);
 
 #undef PIPE_CONF_CHECK_X
 #undef PIPE_CONF_CHECK_I
@@ -12691,13 +12981,10 @@ verify_crtc_state(struct drm_crtc *crtc,
 	intel_pipe_config_sanity_check(dev_priv, pipe_config);
 
 	sw_config = to_intel_crtc_state(new_crtc_state);
-	if (!intel_pipe_config_compare(dev_priv, sw_config,
-				       pipe_config, false)) {
+	if (!intel_pipe_config_compare(sw_config, pipe_config, false)) {
 		I915_STATE_WARN(1, "pipe state doesn't match!\n");
-		intel_dump_pipe_config(intel_crtc, pipe_config,
-				       "[hw state]");
-		intel_dump_pipe_config(intel_crtc, sw_config,
-				       "[sw state]");
+		intel_dump_pipe_config(pipe_config, NULL, "[hw state]");
+		intel_dump_pipe_config(sw_config, NULL, "[sw state]");
 	}
 }
 
@@ -12878,31 +13165,30 @@ static void update_scanline_offset(const struct intel_crtc_state *crtc_state)
 		crtc->scanline_offset = 1;
 }
 
-static void intel_modeset_clear_plls(struct drm_atomic_state *state)
+static void intel_modeset_clear_plls(struct intel_atomic_state *state)
 {
-	struct drm_device *dev = state->dev;
-	struct drm_i915_private *dev_priv = to_i915(dev);
-	struct drm_crtc *crtc;
-	struct drm_crtc_state *old_crtc_state, *new_crtc_state;
+	struct drm_i915_private *dev_priv = to_i915(state->base.dev);
+	struct intel_crtc_state *old_crtc_state, *new_crtc_state;
+	struct intel_crtc *crtc;
 	int i;
 
 	if (!dev_priv->display.crtc_compute_clock)
 		return;
 
-	for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) {
-		struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+	for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state,
+					    new_crtc_state, i) {
 		struct intel_shared_dpll *old_dpll =
-			to_intel_crtc_state(old_crtc_state)->shared_dpll;
+			old_crtc_state->shared_dpll;
 
-		if (!needs_modeset(new_crtc_state))
+		if (!needs_modeset(&new_crtc_state->base))
 			continue;
 
-		to_intel_crtc_state(new_crtc_state)->shared_dpll = NULL;
+		new_crtc_state->shared_dpll = NULL;
 
 		if (!old_dpll)
 			continue;
 
-		intel_release_shared_dpll(old_dpll, intel_crtc, state);
+		intel_release_shared_dpll(old_dpll, crtc, &state->base);
 	}
 }
 
@@ -12912,29 +13198,27 @@ static void intel_modeset_clear_plls(struct drm_atomic_state *state)
  * multiple pipes, and planes are enabled after the pipe, we need to wait at
  * least 2 vblanks on the first pipe before enabling planes on the second pipe.
  */
-static int haswell_mode_set_planes_workaround(struct drm_atomic_state *state)
+static int haswell_mode_set_planes_workaround(struct intel_atomic_state *state)
 {
-	struct drm_crtc_state *crtc_state;
-	struct intel_crtc *intel_crtc;
-	struct drm_crtc *crtc;
+	struct intel_crtc_state *crtc_state;
+	struct intel_crtc *crtc;
 	struct intel_crtc_state *first_crtc_state = NULL;
 	struct intel_crtc_state *other_crtc_state = NULL;
 	enum pipe first_pipe = INVALID_PIPE, enabled_pipe = INVALID_PIPE;
 	int i;
 
 	/* look at all crtc's that are going to be enabled in during modeset */
-	for_each_new_crtc_in_state(state, crtc, crtc_state, i) {
-		intel_crtc = to_intel_crtc(crtc);
-
-		if (!crtc_state->active || !needs_modeset(crtc_state))
+	for_each_new_intel_crtc_in_state(state, crtc, crtc_state, i) {
+		if (!crtc_state->base.active ||
+		    !needs_modeset(&crtc_state->base))
 			continue;
 
 		if (first_crtc_state) {
-			other_crtc_state = to_intel_crtc_state(crtc_state);
+			other_crtc_state = crtc_state;
 			break;
 		} else {
-			first_crtc_state = to_intel_crtc_state(crtc_state);
-			first_pipe = intel_crtc->pipe;
+			first_crtc_state = crtc_state;
+			first_pipe = crtc->pipe;
 		}
 	}
 
@@ -12943,24 +13227,22 @@ static int haswell_mode_set_planes_workaround(struct drm_atomic_state *state)
 		return 0;
 
 	/* w/a possibly needed, check how many crtc's are already enabled. */
-	for_each_intel_crtc(state->dev, intel_crtc) {
-		struct intel_crtc_state *pipe_config;
-
-		pipe_config = intel_atomic_get_crtc_state(state, intel_crtc);
-		if (IS_ERR(pipe_config))
-			return PTR_ERR(pipe_config);
+	for_each_intel_crtc(state->base.dev, crtc) {
+		crtc_state = intel_atomic_get_crtc_state(&state->base, crtc);
+		if (IS_ERR(crtc_state))
+			return PTR_ERR(crtc_state);
 
-		pipe_config->hsw_workaround_pipe = INVALID_PIPE;
+		crtc_state->hsw_workaround_pipe = INVALID_PIPE;
 
-		if (!pipe_config->base.active ||
-		    needs_modeset(&pipe_config->base))
+		if (!crtc_state->base.active ||
+		    needs_modeset(&crtc_state->base))
 			continue;
 
 		/* 2 or more enabled crtcs means no need for w/a */
 		if (enabled_pipe != INVALID_PIPE)
 			return 0;
 
-		enabled_pipe = intel_crtc->pipe;
+		enabled_pipe = crtc->pipe;
 	}
 
 	if (enabled_pipe != INVALID_PIPE)
@@ -13020,12 +13302,11 @@ static int intel_modeset_all_pipes(struct drm_atomic_state *state)
 	return 0;
 }
 
-static int intel_modeset_checks(struct drm_atomic_state *state)
+static int intel_modeset_checks(struct intel_atomic_state *state)
 {
-	struct intel_atomic_state *intel_state = to_intel_atomic_state(state);
-	struct drm_i915_private *dev_priv = to_i915(state->dev);
-	struct drm_crtc *crtc;
-	struct drm_crtc_state *old_crtc_state, *new_crtc_state;
+	struct drm_i915_private *dev_priv = to_i915(state->base.dev);
+	struct intel_crtc_state *old_crtc_state, *new_crtc_state;
+	struct intel_crtc *crtc;
 	int ret = 0, i;
 
 	if (!check_digital_port_conflicts(state)) {
@@ -13034,24 +13315,24 @@ static int intel_modeset_checks(struct drm_atomic_state *state)
 	}
 
 	/* keep the current setting */
-	if (!intel_state->cdclk.force_min_cdclk_changed)
-		intel_state->cdclk.force_min_cdclk =
-			dev_priv->cdclk.force_min_cdclk;
-
-	intel_state->modeset = true;
-	intel_state->active_crtcs = dev_priv->active_crtcs;
-	intel_state->cdclk.logical = dev_priv->cdclk.logical;
-	intel_state->cdclk.actual = dev_priv->cdclk.actual;
-	intel_state->cdclk.pipe = INVALID_PIPE;
-
-	for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) {
-		if (new_crtc_state->active)
-			intel_state->active_crtcs |= 1 << i;
+	if (!state->cdclk.force_min_cdclk_changed)
+		state->cdclk.force_min_cdclk = dev_priv->cdclk.force_min_cdclk;
+
+	state->modeset = true;
+	state->active_crtcs = dev_priv->active_crtcs;
+	state->cdclk.logical = dev_priv->cdclk.logical;
+	state->cdclk.actual = dev_priv->cdclk.actual;
+	state->cdclk.pipe = INVALID_PIPE;
+
+	for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state,
+					    new_crtc_state, i) {
+		if (new_crtc_state->base.active)
+			state->active_crtcs |= 1 << i;
 		else
-			intel_state->active_crtcs &= ~(1 << i);
+			state->active_crtcs &= ~(1 << i);
 
-		if (old_crtc_state->active != new_crtc_state->active)
-			intel_state->active_pipe_changes |= drm_crtc_mask(crtc);
+		if (old_crtc_state->base.active != new_crtc_state->base.active)
+			state->active_pipe_changes |= drm_crtc_mask(&crtc->base);
 	}
 
 	/*
@@ -13074,19 +13355,19 @@ static int intel_modeset_checks(struct drm_atomic_state *state)
 		 * touching the hardware
 		 */
 		if (intel_cdclk_changed(&dev_priv->cdclk.logical,
-					&intel_state->cdclk.logical)) {
-			ret = intel_lock_all_pipes(state);
+					&state->cdclk.logical)) {
+			ret = intel_lock_all_pipes(&state->base);
 			if (ret < 0)
 				return ret;
 		}
 
-		if (is_power_of_2(intel_state->active_crtcs)) {
+		if (is_power_of_2(state->active_crtcs)) {
 			struct drm_crtc *crtc;
 			struct drm_crtc_state *crtc_state;
 
-			pipe = ilog2(intel_state->active_crtcs);
+			pipe = ilog2(state->active_crtcs);
 			crtc = &intel_get_crtc_for_pipe(dev_priv, pipe)->base;
-			crtc_state = drm_atomic_get_new_crtc_state(state, crtc);
+			crtc_state = drm_atomic_get_new_crtc_state(&state->base, crtc);
 			if (crtc_state && needs_modeset(crtc_state))
 				pipe = INVALID_PIPE;
 		} else {
@@ -13097,27 +13378,27 @@ static int intel_modeset_checks(struct drm_atomic_state *state)
 		if (pipe != INVALID_PIPE &&
 		    intel_cdclk_needs_cd2x_update(dev_priv,
 						  &dev_priv->cdclk.actual,
-						  &intel_state->cdclk.actual)) {
-			ret = intel_lock_all_pipes(state);
+						  &state->cdclk.actual)) {
+			ret = intel_lock_all_pipes(&state->base);
 			if (ret < 0)
 				return ret;
 
-			intel_state->cdclk.pipe = pipe;
+			state->cdclk.pipe = pipe;
 		} else if (intel_cdclk_needs_modeset(&dev_priv->cdclk.actual,
-						     &intel_state->cdclk.actual)) {
-			ret = intel_modeset_all_pipes(state);
+						     &state->cdclk.actual)) {
+			ret = intel_modeset_all_pipes(&state->base);
 			if (ret < 0)
 				return ret;
 
-			intel_state->cdclk.pipe = INVALID_PIPE;
+			state->cdclk.pipe = INVALID_PIPE;
 		}
 
 		DRM_DEBUG_KMS("New cdclk calculated to be logical %u kHz, actual %u kHz\n",
-			      intel_state->cdclk.logical.cdclk,
-			      intel_state->cdclk.actual.cdclk);
+			      state->cdclk.logical.cdclk,
+			      state->cdclk.actual.cdclk);
 		DRM_DEBUG_KMS("New voltage level calculated to be logical %u, actual %u\n",
-			      intel_state->cdclk.logical.voltage_level,
-			      intel_state->cdclk.actual.voltage_level);
+			      state->cdclk.logical.voltage_level,
+			      state->cdclk.actual.voltage_level);
 	}
 
 	intel_modeset_clear_plls(state);
@@ -13145,92 +13426,131 @@ static int calc_watermark_data(struct intel_atomic_state *state)
 	return 0;
 }
 
+static void intel_crtc_check_fastset(const struct intel_crtc_state *old_crtc_state,
+				     struct intel_crtc_state *new_crtc_state)
+{
+	if (!intel_pipe_config_compare(old_crtc_state, new_crtc_state, true))
+		return;
+
+	new_crtc_state->base.mode_changed = false;
+	new_crtc_state->update_pipe = true;
+
+	/*
+	 * If we're not doing the full modeset we want to
+	 * keep the current M/N values as they may be
+	 * sufficiently different to the computed values
+	 * to cause problems.
+	 *
+	 * FIXME: should really copy more fuzzy state here
+	 */
+	new_crtc_state->fdi_m_n = old_crtc_state->fdi_m_n;
+	new_crtc_state->dp_m_n = old_crtc_state->dp_m_n;
+	new_crtc_state->dp_m2_n2 = old_crtc_state->dp_m2_n2;
+	new_crtc_state->has_drrs = old_crtc_state->has_drrs;
+}
+
 /**
  * intel_atomic_check - validate state object
  * @dev: drm device
- * @state: state to validate
+ * @_state: state to validate
  */
 static int intel_atomic_check(struct drm_device *dev,
-			      struct drm_atomic_state *state)
+			      struct drm_atomic_state *_state)
 {
 	struct drm_i915_private *dev_priv = to_i915(dev);
-	struct intel_atomic_state *intel_state = to_intel_atomic_state(state);
-	struct drm_crtc *crtc;
-	struct drm_crtc_state *old_crtc_state, *crtc_state;
+	struct intel_atomic_state *state = to_intel_atomic_state(_state);
+	struct intel_crtc_state *old_crtc_state, *new_crtc_state;
+	struct intel_crtc *crtc;
 	int ret, i;
-	bool any_ms = intel_state->cdclk.force_min_cdclk_changed;
+	bool any_ms = state->cdclk.force_min_cdclk_changed;
 
 	/* Catch I915_MODE_FLAG_INHERITED */
-	for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state,
-				      crtc_state, i) {
-		if (crtc_state->mode.private_flags !=
-		    old_crtc_state->mode.private_flags)
-			crtc_state->mode_changed = true;
+	for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state,
+					    new_crtc_state, i) {
+		if (new_crtc_state->base.mode.private_flags !=
+		    old_crtc_state->base.mode.private_flags)
+			new_crtc_state->base.mode_changed = true;
 	}
 
-	ret = drm_atomic_helper_check_modeset(dev, state);
+	ret = drm_atomic_helper_check_modeset(dev, &state->base);
 	if (ret)
-		return ret;
-
-	for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, crtc_state, i) {
-		struct intel_crtc_state *pipe_config =
-			to_intel_crtc_state(crtc_state);
+		goto fail;
 
-		if (!needs_modeset(crtc_state))
+	for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state,
+					    new_crtc_state, i) {
+		if (!needs_modeset(&new_crtc_state->base))
 			continue;
 
-		if (!crtc_state->enable) {
+		if (!new_crtc_state->base.enable) {
 			any_ms = true;
 			continue;
 		}
 
-		ret = intel_modeset_pipe_config(crtc, pipe_config);
-		if (ret == -EDEADLK)
-			return ret;
-		if (ret) {
-			intel_dump_pipe_config(to_intel_crtc(crtc),
-					       pipe_config, "[failed]");
-			return ret;
-		}
+		ret = intel_modeset_pipe_config(new_crtc_state);
+		if (ret)
+			goto fail;
 
-		if (intel_pipe_config_compare(dev_priv,
-					to_intel_crtc_state(old_crtc_state),
-					pipe_config, true)) {
-			crtc_state->mode_changed = false;
-			pipe_config->update_pipe = true;
-		}
+		intel_crtc_check_fastset(old_crtc_state, new_crtc_state);
 
-		if (needs_modeset(crtc_state))
+		if (needs_modeset(&new_crtc_state->base))
 			any_ms = true;
-
-		intel_dump_pipe_config(to_intel_crtc(crtc), pipe_config,
-				       needs_modeset(crtc_state) ?
-				       "[modeset]" : "[fastset]");
 	}
 
-	ret = drm_dp_mst_atomic_check(state);
+	ret = drm_dp_mst_atomic_check(&state->base);
 	if (ret)
-		return ret;
+		goto fail;
 
 	if (any_ms) {
 		ret = intel_modeset_checks(state);
-
 		if (ret)
-			return ret;
+			goto fail;
 	} else {
-		intel_state->cdclk.logical = dev_priv->cdclk.logical;
+		state->cdclk.logical = dev_priv->cdclk.logical;
 	}
 
-	ret = icl_add_linked_planes(intel_state);
+	ret = icl_add_linked_planes(state);
 	if (ret)
-		return ret;
+		goto fail;
 
-	ret = drm_atomic_helper_check_planes(dev, state);
+	ret = drm_atomic_helper_check_planes(dev, &state->base);
 	if (ret)
+		goto fail;
+
+	intel_fbc_choose_crtc(dev_priv, state);
+	ret = calc_watermark_data(state);
+	if (ret)
+		goto fail;
+
+	ret = intel_bw_atomic_check(state);
+	if (ret)
+		goto fail;
+
+	for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state,
+					    new_crtc_state, i) {
+		if (!needs_modeset(&new_crtc_state->base) &&
+		    !new_crtc_state->update_pipe)
+			continue;
+
+		intel_dump_pipe_config(new_crtc_state, state,
+				       needs_modeset(&new_crtc_state->base) ?
+				       "[modeset]" : "[fastset]");
+	}
+
+	return 0;
+
+ fail:
+	if (ret == -EDEADLK)
 		return ret;
 
-	intel_fbc_choose_crtc(dev_priv, intel_state);
-	return calc_watermark_data(intel_state);
+	/*
+	 * FIXME would probably be nice to know which crtc specifically
+	 * caused the failure, in cases where we can pinpoint it.
+	 */
+	for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state,
+					    new_crtc_state, i)
+		intel_dump_pipe_config(new_crtc_state, state, "[failed]");
+
+	return ret;
 }
 
 static int intel_atomic_prepare_commit(struct drm_device *dev,
@@ -13619,6 +13939,7 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state)
 		intel_uncore_arm_unclaimed_mmio_detection(&dev_priv->uncore);
 		intel_display_power_put(dev_priv, POWER_DOMAIN_MODESET, wakeref);
 	}
+	intel_runtime_pm_put(&dev_priv->runtime_pm, intel_state->wakeref);
 
 	/*
 	 * Defer the cleanup of the old state to a separate worker to not
@@ -13697,6 +14018,8 @@ static int intel_atomic_commit(struct drm_device *dev,
 	struct drm_i915_private *dev_priv = to_i915(dev);
 	int ret = 0;
 
+	intel_state->wakeref = intel_runtime_pm_get(&dev_priv->runtime_pm);
+
 	drm_atomic_state_get(state);
 	i915_sw_fence_init(&intel_state->commit_ready,
 			   intel_atomic_commit_ready);
@@ -13733,6 +14056,7 @@ static int intel_atomic_commit(struct drm_device *dev,
 	if (ret) {
 		DRM_DEBUG_ATOMIC("Preparing state failed with %i\n", ret);
 		i915_sw_fence_commit(&intel_state->commit_ready);
+		intel_runtime_pm_put(&dev_priv->runtime_pm, intel_state->wakeref);
 		return ret;
 	}
 
@@ -13744,6 +14068,7 @@ static int intel_atomic_commit(struct drm_device *dev,
 		i915_sw_fence_commit(&intel_state->commit_ready);
 
 		drm_atomic_helper_cleanup_planes(dev, state);
+		intel_runtime_pm_put(&dev_priv->runtime_pm, intel_state->wakeref);
 		return ret;
 	}
 	dev_priv->wm.distrust_bios_wm = false;
@@ -13942,7 +14267,7 @@ intel_prepare_plane_fb(struct drm_plane *plane,
 		 */
 		if (needs_modeset(crtc_state)) {
 			ret = i915_sw_fence_await_reservation(&intel_state->commit_ready,
-							      old_obj->resv, NULL,
+							      old_obj->base.resv, NULL,
 							      false, 0,
 							      GFP_KERNEL);
 			if (ret < 0)
@@ -13986,13 +14311,13 @@ intel_prepare_plane_fb(struct drm_plane *plane,
 		struct dma_fence *fence;
 
 		ret = i915_sw_fence_await_reservation(&intel_state->commit_ready,
-						      obj->resv, NULL,
+						      obj->base.resv, NULL,
 						      false, I915_FENCE_TIMEOUT,
 						      GFP_KERNEL);
 		if (ret < 0)
 			return ret;
 
-		fence = reservation_object_get_excl_rcu(obj->resv);
+		fence = reservation_object_get_excl_rcu(obj->base.resv);
 		if (fence) {
 			add_rps_boost_after_vblank(new_state->crtc, fence);
 			dma_fence_put(fence);
@@ -14105,6 +14430,9 @@ static void intel_begin_crtc_commit(struct intel_atomic_state *state,
 	else if (INTEL_GEN(dev_priv) >= 9)
 		skl_detach_scalers(new_crtc_state);
 
+	if (INTEL_GEN(dev_priv) >= 9 || IS_BROADWELL(dev_priv))
+		bdw_set_pipemisc(new_crtc_state);
+
 out:
 	if (dev_priv->display.atomic_update_watermarks)
 		dev_priv->display.atomic_update_watermarks(state,
@@ -14215,8 +14543,6 @@ static const struct drm_plane_funcs i965_plane_funcs = {
 	.update_plane = drm_atomic_helper_update_plane,
 	.disable_plane = drm_atomic_helper_disable_plane,
 	.destroy = intel_plane_destroy,
-	.atomic_get_property = intel_plane_atomic_get_property,
-	.atomic_set_property = intel_plane_atomic_set_property,
 	.atomic_duplicate_state = intel_plane_duplicate_state,
 	.atomic_destroy_state = intel_plane_destroy_state,
 	.format_mod_supported = i965_plane_format_mod_supported,
@@ -14226,8 +14552,6 @@ static const struct drm_plane_funcs i8xx_plane_funcs = {
 	.update_plane = drm_atomic_helper_update_plane,
 	.disable_plane = drm_atomic_helper_disable_plane,
 	.destroy = intel_plane_destroy,
-	.atomic_get_property = intel_plane_atomic_get_property,
-	.atomic_set_property = intel_plane_atomic_set_property,
 	.atomic_duplicate_state = intel_plane_duplicate_state,
 	.atomic_destroy_state = intel_plane_destroy_state,
 	.format_mod_supported = i8xx_plane_format_mod_supported,
@@ -14368,8 +14692,6 @@ static const struct drm_plane_funcs intel_cursor_plane_funcs = {
 	.update_plane = intel_legacy_cursor_update,
 	.disable_plane = drm_atomic_helper_disable_plane,
 	.destroy = intel_plane_destroy,
-	.atomic_get_property = intel_plane_atomic_get_property,
-	.atomic_set_property = intel_plane_atomic_set_property,
 	.atomic_duplicate_state = intel_plane_duplicate_state,
 	.atomic_destroy_state = intel_plane_destroy_state,
 	.format_mod_supported = intel_cursor_format_mod_supported,
@@ -14605,9 +14927,8 @@ static int intel_crtc_init(struct drm_i915_private *dev_priv, enum pipe pipe)
 		ret = -ENOMEM;
 		goto fail;
 	}
+	__drm_atomic_helper_crtc_reset(&intel_crtc->base, &crtc_state->base);
 	intel_crtc->config = crtc_state;
-	intel_crtc->base.state = &crtc_state->base;
-	crtc_state->base.crtc = &intel_crtc->base;
 
 	primary = intel_primary_plane_create(dev_priv, pipe);
 	if (IS_ERR(primary)) {
@@ -15060,31 +15381,13 @@ static const struct drm_framebuffer_funcs intel_fb_funcs = {
 	.dirty = intel_user_framebuffer_dirty,
 };
 
-static
-u32 intel_fb_pitch_limit(struct drm_i915_private *dev_priv,
-			 u32 pixel_format, u64 fb_modifier)
-{
-	struct intel_crtc *crtc;
-	struct intel_plane *plane;
-
-	/*
-	 * We assume the primary plane for pipe A has
-	 * the highest stride limits of them all.
-	 */
-	crtc = intel_get_crtc_for_pipe(dev_priv, PIPE_A);
-	plane = to_intel_plane(crtc->base.primary);
-
-	return plane->max_stride(plane, pixel_format, fb_modifier,
-				 DRM_MODE_ROTATE_0);
-}
-
 static int intel_framebuffer_init(struct intel_framebuffer *intel_fb,
 				  struct drm_i915_gem_object *obj,
 				  struct drm_mode_fb_cmd2 *mode_cmd)
 {
 	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
 	struct drm_framebuffer *fb = &intel_fb->base;
-	u32 pitch_limit;
+	u32 max_stride;
 	unsigned int tiling, stride;
 	int ret = -EINVAL;
 	int i;
@@ -15136,13 +15439,13 @@ static int intel_framebuffer_init(struct intel_framebuffer *intel_fb,
 		goto err;
 	}
 
-	pitch_limit = intel_fb_pitch_limit(dev_priv, mode_cmd->pixel_format,
-					   mode_cmd->modifier[0]);
-	if (mode_cmd->pitches[0] > pitch_limit) {
+	max_stride = intel_fb_max_stride(dev_priv, mode_cmd->pixel_format,
+					 mode_cmd->modifier[0]);
+	if (mode_cmd->pitches[0] > max_stride) {
 		DRM_DEBUG_KMS("%s pitch (%u) must be at most %d\n",
 			      mode_cmd->modifier[0] != DRM_FORMAT_MOD_LINEAR ?
 			      "tiled" : "linear",
-			      mode_cmd->pitches[0], pitch_limit);
+			      mode_cmd->pitches[0], max_stride);
 		goto err;
 	}
 
@@ -15419,6 +15722,16 @@ void intel_init_display_hooks(struct drm_i915_private *dev_priv)
 		dev_priv->display.update_crtcs = intel_update_crtcs;
 }
 
+static i915_reg_t i915_vgacntrl_reg(struct drm_i915_private *dev_priv)
+{
+	if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
+		return VLV_VGACNTRL;
+	else if (INTEL_GEN(dev_priv) >= 5)
+		return CPU_VGACNTRL;
+	else
+		return VGACNTRL;
+}
+
 /* Disable the VGA plane that we never use */
 static void i915_disable_vga(struct drm_i915_private *dev_priv)
 {
@@ -15618,6 +15931,10 @@ int intel_modeset_init(struct drm_device *dev)
 
 	drm_mode_config_init(dev);
 
+	ret = intel_bw_init(dev_priv);
+	if (ret)
+		return ret;
+
 	dev->mode_config.min_width = 0;
 	dev->mode_config.min_height = 0;
 
@@ -15656,16 +15973,22 @@ int intel_modeset_init(struct drm_device *dev)
 		}
 	}
 
-	/* maximum framebuffer dimensions */
-	if (IS_GEN(dev_priv, 2)) {
-		dev->mode_config.max_width = 2048;
-		dev->mode_config.max_height = 2048;
+	/*
+	 * Maximum framebuffer dimensions, chosen to match
+	 * the maximum render engine surface size on gen4+.
+	 */
+	if (INTEL_GEN(dev_priv) >= 7) {
+		dev->mode_config.max_width = 16384;
+		dev->mode_config.max_height = 16384;
+	} else if (INTEL_GEN(dev_priv) >= 4) {
+		dev->mode_config.max_width = 8192;
+		dev->mode_config.max_height = 8192;
 	} else if (IS_GEN(dev_priv, 3)) {
 		dev->mode_config.max_width = 4096;
 		dev->mode_config.max_height = 4096;
 	} else {
-		dev->mode_config.max_width = 8192;
-		dev->mode_config.max_height = 8192;
+		dev->mode_config.max_width = 2048;
+		dev->mode_config.max_height = 2048;
 	}
 
 	if (IS_I845G(dev_priv) || IS_I865G(dev_priv)) {
@@ -16149,7 +16472,7 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev)
 
 		__drm_atomic_helper_crtc_destroy_state(&crtc_state->base);
 		memset(crtc_state, 0, sizeof(*crtc_state));
-		crtc_state->base.crtc = &crtc->base;
+		__drm_atomic_helper_crtc_reset(&crtc->base, &crtc_state->base);
 
 		crtc_state->base.active = crtc_state->base.enable =
 			dev_priv->display.get_pipe_config(crtc, crtc_state);
@@ -16240,8 +16563,11 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev)
 	drm_connector_list_iter_end(&conn_iter);
 
 	for_each_intel_crtc(dev, crtc) {
+		struct intel_bw_state *bw_state =
+			to_intel_bw_state(dev_priv->bw_obj.state);
 		struct intel_crtc_state *crtc_state =
 			to_intel_crtc_state(crtc->base.state);
+		struct intel_plane *plane;
 		int min_cdclk = 0;
 
 		memset(&crtc->base.mode, 0, sizeof(crtc->base.mode));
@@ -16280,6 +16606,21 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev)
 		dev_priv->min_voltage_level[crtc->pipe] =
 			crtc_state->min_voltage_level;
 
+		for_each_intel_plane_on_crtc(&dev_priv->drm, crtc, plane) {
+			const struct intel_plane_state *plane_state =
+				to_intel_plane_state(plane->base.state);
+
+			/*
+			 * FIXME don't have the fb yet, so can't
+			 * use intel_plane_data_rate() :(
+			 */
+			if (plane_state->base.visible)
+				crtc_state->data_rate[plane->id] =
+					4 * crtc_state->pixel_rate;
+		}
+
+		intel_bw_crtc_update(bw_state, crtc_state);
+
 		intel_pipe_config_sanity_check(dev_priv, crtc_state);
 	}
 }
@@ -16429,8 +16770,7 @@ intel_modeset_setup_hw_state(struct drm_device *dev,
 	for_each_intel_crtc(&dev_priv->drm, crtc) {
 		crtc_state = to_intel_crtc_state(crtc->base.state);
 		intel_sanitize_crtc(crtc, ctx);
-		intel_dump_pipe_config(crtc, crtc_state,
-				       "[setup_hw_state]");
+		intel_dump_pipe_config(crtc_state, NULL, "[setup_hw_state]");
 	}
 
 	intel_modeset_update_connector_atomic_state(dev);
@@ -16564,7 +16904,7 @@ void intel_modeset_cleanup(struct drm_device *dev)
 
 	intel_overlay_cleanup(dev_priv);
 
-	intel_teardown_gmbus(dev_priv);
+	intel_gmbus_teardown(dev_priv);
 
 	destroy_workqueue(dev_priv->modeset_wq);
 
diff --git a/drivers/gpu/drm/i915/intel_display.h b/drivers/gpu/drm/i915/display/intel_display.h
index 2220588e86ac..ee6b8194a459 100644
--- a/drivers/gpu/drm/i915/intel_display.h
+++ b/drivers/gpu/drm/i915/display/intel_display.h
@@ -28,6 +28,9 @@
 #include <drm/drm_util.h>
 #include <drm/i915_drm.h>
 
+struct drm_i915_private;
+struct intel_plane_state;
+
 enum i915_gpio {
 	GPIOA,
 	GPIOB,
@@ -217,64 +220,6 @@ enum aux_ch {
 
 #define aux_ch_name(a) ((a) + 'A')
 
-enum intel_display_power_domain {
-	POWER_DOMAIN_PIPE_A,
-	POWER_DOMAIN_PIPE_B,
-	POWER_DOMAIN_PIPE_C,
-	POWER_DOMAIN_PIPE_A_PANEL_FITTER,
-	POWER_DOMAIN_PIPE_B_PANEL_FITTER,
-	POWER_DOMAIN_PIPE_C_PANEL_FITTER,
-	POWER_DOMAIN_TRANSCODER_A,
-	POWER_DOMAIN_TRANSCODER_B,
-	POWER_DOMAIN_TRANSCODER_C,
-	POWER_DOMAIN_TRANSCODER_EDP,
-	POWER_DOMAIN_TRANSCODER_EDP_VDSC,
-	POWER_DOMAIN_TRANSCODER_DSI_A,
-	POWER_DOMAIN_TRANSCODER_DSI_C,
-	POWER_DOMAIN_PORT_DDI_A_LANES,
-	POWER_DOMAIN_PORT_DDI_B_LANES,
-	POWER_DOMAIN_PORT_DDI_C_LANES,
-	POWER_DOMAIN_PORT_DDI_D_LANES,
-	POWER_DOMAIN_PORT_DDI_E_LANES,
-	POWER_DOMAIN_PORT_DDI_F_LANES,
-	POWER_DOMAIN_PORT_DDI_A_IO,
-	POWER_DOMAIN_PORT_DDI_B_IO,
-	POWER_DOMAIN_PORT_DDI_C_IO,
-	POWER_DOMAIN_PORT_DDI_D_IO,
-	POWER_DOMAIN_PORT_DDI_E_IO,
-	POWER_DOMAIN_PORT_DDI_F_IO,
-	POWER_DOMAIN_PORT_DSI,
-	POWER_DOMAIN_PORT_CRT,
-	POWER_DOMAIN_PORT_OTHER,
-	POWER_DOMAIN_VGA,
-	POWER_DOMAIN_AUDIO,
-	POWER_DOMAIN_PLLS,
-	POWER_DOMAIN_AUX_A,
-	POWER_DOMAIN_AUX_B,
-	POWER_DOMAIN_AUX_C,
-	POWER_DOMAIN_AUX_D,
-	POWER_DOMAIN_AUX_E,
-	POWER_DOMAIN_AUX_F,
-	POWER_DOMAIN_AUX_IO_A,
-	POWER_DOMAIN_AUX_TBT1,
-	POWER_DOMAIN_AUX_TBT2,
-	POWER_DOMAIN_AUX_TBT3,
-	POWER_DOMAIN_AUX_TBT4,
-	POWER_DOMAIN_GMBUS,
-	POWER_DOMAIN_MODESET,
-	POWER_DOMAIN_GT_IRQ,
-	POWER_DOMAIN_INIT,
-
-	POWER_DOMAIN_NUM,
-};
-
-#define POWER_DOMAIN_PIPE(pipe) ((pipe) + POWER_DOMAIN_PIPE_A)
-#define POWER_DOMAIN_PIPE_PANEL_FITTER(pipe) \
-		((pipe) + POWER_DOMAIN_PIPE_A_PANEL_FITTER)
-#define POWER_DOMAIN_TRANSCODER(tran) \
-	((tran) == TRANSCODER_EDP ? POWER_DOMAIN_TRANSCODER_EDP : \
-	 (tran) + POWER_DOMAIN_TRANSCODER_A)
-
 /* Used by dp and fdi links */
 struct intel_link_m_n {
 	u32 tu;
@@ -361,30 +306,6 @@ struct intel_link_m_n {
 	list_for_each_entry((intel_connector), &(dev)->mode_config.connector_list, base.head) \
 		for_each_if((intel_connector)->base.encoder == (__encoder))
 
-#define for_each_power_domain(domain, mask)				\
-	for ((domain) = 0; (domain) < POWER_DOMAIN_NUM; (domain)++)	\
-		for_each_if(BIT_ULL(domain) & (mask))
-
-#define for_each_power_well(__dev_priv, __power_well)				\
-	for ((__power_well) = (__dev_priv)->power_domains.power_wells;	\
-	     (__power_well) - (__dev_priv)->power_domains.power_wells <	\
-		(__dev_priv)->power_domains.power_well_count;		\
-	     (__power_well)++)
-
-#define for_each_power_well_reverse(__dev_priv, __power_well)			\
-	for ((__power_well) = (__dev_priv)->power_domains.power_wells +		\
-			      (__dev_priv)->power_domains.power_well_count - 1;	\
-	     (__power_well) - (__dev_priv)->power_domains.power_wells >= 0;	\
-	     (__power_well)--)
-
-#define for_each_power_domain_well(__dev_priv, __power_well, __domain_mask)	\
-	for_each_power_well(__dev_priv, __power_well)				\
-		for_each_if((__power_well)->desc->domains & (__domain_mask))
-
-#define for_each_power_domain_well_reverse(__dev_priv, __power_well, __domain_mask) \
-	for_each_power_well_reverse(__dev_priv, __power_well)		        \
-		for_each_if((__power_well)->desc->domains & (__domain_mask))
-
 #define for_each_old_intel_plane_in_state(__state, plane, old_plane_state, __i) \
 	for ((__i) = 0; \
 	     (__i) < (__state)->base.dev->mode_config.num_total_plane && \
@@ -432,4 +353,9 @@ void intel_link_compute_m_n(u16 bpp, int nlanes,
 			    struct intel_link_m_n *m_n,
 			    bool constant_n);
 bool is_ccs_modifier(u64 modifier);
+void lpt_disable_clkout_dp(struct drm_i915_private *dev_priv);
+u32 intel_plane_fb_max_stride(struct drm_i915_private *dev_priv,
+			      u32 pixel_format, u64 modifier);
+bool intel_plane_can_remap(const struct intel_plane_state *plane_state);
+
 #endif
diff --git a/drivers/gpu/drm/i915/display/intel_display_power.c b/drivers/gpu/drm/i915/display/intel_display_power.c
new file mode 100644
index 000000000000..c93ad512014c
--- /dev/null
+++ b/drivers/gpu/drm/i915/display/intel_display_power.c
@@ -0,0 +1,4618 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include <linux/vgaarb.h>
+
+#include "display/intel_crt.h"
+#include "display/intel_dp.h"
+
+#include "i915_drv.h"
+#include "i915_irq.h"
+#include "intel_cdclk.h"
+#include "intel_combo_phy.h"
+#include "intel_csr.h"
+#include "intel_dpio_phy.h"
+#include "intel_drv.h"
+#include "intel_hotplug.h"
+#include "intel_sideband.h"
+
+bool intel_display_power_well_is_enabled(struct drm_i915_private *dev_priv,
+					 enum i915_power_well_id power_well_id);
+
+const char *
+intel_display_power_domain_str(enum intel_display_power_domain domain)
+{
+	switch (domain) {
+	case POWER_DOMAIN_DISPLAY_CORE:
+		return "DISPLAY_CORE";
+	case POWER_DOMAIN_PIPE_A:
+		return "PIPE_A";
+	case POWER_DOMAIN_PIPE_B:
+		return "PIPE_B";
+	case POWER_DOMAIN_PIPE_C:
+		return "PIPE_C";
+	case POWER_DOMAIN_PIPE_A_PANEL_FITTER:
+		return "PIPE_A_PANEL_FITTER";
+	case POWER_DOMAIN_PIPE_B_PANEL_FITTER:
+		return "PIPE_B_PANEL_FITTER";
+	case POWER_DOMAIN_PIPE_C_PANEL_FITTER:
+		return "PIPE_C_PANEL_FITTER";
+	case POWER_DOMAIN_TRANSCODER_A:
+		return "TRANSCODER_A";
+	case POWER_DOMAIN_TRANSCODER_B:
+		return "TRANSCODER_B";
+	case POWER_DOMAIN_TRANSCODER_C:
+		return "TRANSCODER_C";
+	case POWER_DOMAIN_TRANSCODER_EDP:
+		return "TRANSCODER_EDP";
+	case POWER_DOMAIN_TRANSCODER_EDP_VDSC:
+		return "TRANSCODER_EDP_VDSC";
+	case POWER_DOMAIN_TRANSCODER_DSI_A:
+		return "TRANSCODER_DSI_A";
+	case POWER_DOMAIN_TRANSCODER_DSI_C:
+		return "TRANSCODER_DSI_C";
+	case POWER_DOMAIN_PORT_DDI_A_LANES:
+		return "PORT_DDI_A_LANES";
+	case POWER_DOMAIN_PORT_DDI_B_LANES:
+		return "PORT_DDI_B_LANES";
+	case POWER_DOMAIN_PORT_DDI_C_LANES:
+		return "PORT_DDI_C_LANES";
+	case POWER_DOMAIN_PORT_DDI_D_LANES:
+		return "PORT_DDI_D_LANES";
+	case POWER_DOMAIN_PORT_DDI_E_LANES:
+		return "PORT_DDI_E_LANES";
+	case POWER_DOMAIN_PORT_DDI_F_LANES:
+		return "PORT_DDI_F_LANES";
+	case POWER_DOMAIN_PORT_DDI_A_IO:
+		return "PORT_DDI_A_IO";
+	case POWER_DOMAIN_PORT_DDI_B_IO:
+		return "PORT_DDI_B_IO";
+	case POWER_DOMAIN_PORT_DDI_C_IO:
+		return "PORT_DDI_C_IO";
+	case POWER_DOMAIN_PORT_DDI_D_IO:
+		return "PORT_DDI_D_IO";
+	case POWER_DOMAIN_PORT_DDI_E_IO:
+		return "PORT_DDI_E_IO";
+	case POWER_DOMAIN_PORT_DDI_F_IO:
+		return "PORT_DDI_F_IO";
+	case POWER_DOMAIN_PORT_DSI:
+		return "PORT_DSI";
+	case POWER_DOMAIN_PORT_CRT:
+		return "PORT_CRT";
+	case POWER_DOMAIN_PORT_OTHER:
+		return "PORT_OTHER";
+	case POWER_DOMAIN_VGA:
+		return "VGA";
+	case POWER_DOMAIN_AUDIO:
+		return "AUDIO";
+	case POWER_DOMAIN_AUX_A:
+		return "AUX_A";
+	case POWER_DOMAIN_AUX_B:
+		return "AUX_B";
+	case POWER_DOMAIN_AUX_C:
+		return "AUX_C";
+	case POWER_DOMAIN_AUX_D:
+		return "AUX_D";
+	case POWER_DOMAIN_AUX_E:
+		return "AUX_E";
+	case POWER_DOMAIN_AUX_F:
+		return "AUX_F";
+	case POWER_DOMAIN_AUX_IO_A:
+		return "AUX_IO_A";
+	case POWER_DOMAIN_AUX_TBT1:
+		return "AUX_TBT1";
+	case POWER_DOMAIN_AUX_TBT2:
+		return "AUX_TBT2";
+	case POWER_DOMAIN_AUX_TBT3:
+		return "AUX_TBT3";
+	case POWER_DOMAIN_AUX_TBT4:
+		return "AUX_TBT4";
+	case POWER_DOMAIN_GMBUS:
+		return "GMBUS";
+	case POWER_DOMAIN_INIT:
+		return "INIT";
+	case POWER_DOMAIN_MODESET:
+		return "MODESET";
+	case POWER_DOMAIN_GT_IRQ:
+		return "GT_IRQ";
+	default:
+		MISSING_CASE(domain);
+		return "?";
+	}
+}
+
+static void intel_power_well_enable(struct drm_i915_private *dev_priv,
+				    struct i915_power_well *power_well)
+{
+	DRM_DEBUG_KMS("enabling %s\n", power_well->desc->name);
+	power_well->desc->ops->enable(dev_priv, power_well);
+	power_well->hw_enabled = true;
+}
+
+static void intel_power_well_disable(struct drm_i915_private *dev_priv,
+				     struct i915_power_well *power_well)
+{
+	DRM_DEBUG_KMS("disabling %s\n", power_well->desc->name);
+	power_well->hw_enabled = false;
+	power_well->desc->ops->disable(dev_priv, power_well);
+}
+
+static void intel_power_well_get(struct drm_i915_private *dev_priv,
+				 struct i915_power_well *power_well)
+{
+	if (!power_well->count++)
+		intel_power_well_enable(dev_priv, power_well);
+}
+
+static void intel_power_well_put(struct drm_i915_private *dev_priv,
+				 struct i915_power_well *power_well)
+{
+	WARN(!power_well->count, "Use count on power well %s is already zero",
+	     power_well->desc->name);
+
+	if (!--power_well->count)
+		intel_power_well_disable(dev_priv, power_well);
+}
+
+/**
+ * __intel_display_power_is_enabled - unlocked check for a power domain
+ * @dev_priv: i915 device instance
+ * @domain: power domain to check
+ *
+ * This is the unlocked version of intel_display_power_is_enabled() and should
+ * only be used from error capture and recovery code where deadlocks are
+ * possible.
+ *
+ * Returns:
+ * True when the power domain is enabled, false otherwise.
+ */
+bool __intel_display_power_is_enabled(struct drm_i915_private *dev_priv,
+				      enum intel_display_power_domain domain)
+{
+	struct i915_power_well *power_well;
+	bool is_enabled;
+
+	if (dev_priv->runtime_pm.suspended)
+		return false;
+
+	is_enabled = true;
+
+	for_each_power_domain_well_reverse(dev_priv, power_well, BIT_ULL(domain)) {
+		if (power_well->desc->always_on)
+			continue;
+
+		if (!power_well->hw_enabled) {
+			is_enabled = false;
+			break;
+		}
+	}
+
+	return is_enabled;
+}
+
+/**
+ * intel_display_power_is_enabled - check for a power domain
+ * @dev_priv: i915 device instance
+ * @domain: power domain to check
+ *
+ * This function can be used to check the hw power domain state. It is mostly
+ * used in hardware state readout functions. Everywhere else code should rely
+ * upon explicit power domain reference counting to ensure that the hardware
+ * block is powered up before accessing it.
+ *
+ * Callers must hold the relevant modesetting locks to ensure that concurrent
+ * threads can't disable the power well while the caller tries to read a few
+ * registers.
+ *
+ * Returns:
+ * True when the power domain is enabled, false otherwise.
+ */
+bool intel_display_power_is_enabled(struct drm_i915_private *dev_priv,
+				    enum intel_display_power_domain domain)
+{
+	struct i915_power_domains *power_domains;
+	bool ret;
+
+	power_domains = &dev_priv->power_domains;
+
+	mutex_lock(&power_domains->lock);
+	ret = __intel_display_power_is_enabled(dev_priv, domain);
+	mutex_unlock(&power_domains->lock);
+
+	return ret;
+}
+
+/*
+ * Starting with Haswell, we have a "Power Down Well" that can be turned off
+ * when not needed anymore. We have 4 registers that can request the power well
+ * to be enabled, and it will only be disabled if none of the registers is
+ * requesting it to be enabled.
+ */
+static void hsw_power_well_post_enable(struct drm_i915_private *dev_priv,
+				       u8 irq_pipe_mask, bool has_vga)
+{
+	struct pci_dev *pdev = dev_priv->drm.pdev;
+
+	/*
+	 * After we re-enable the power well, if we touch VGA register 0x3d5
+	 * we'll get unclaimed register interrupts. This stops after we write
+	 * anything to the VGA MSR register. The vgacon module uses this
+	 * register all the time, so if we unbind our driver and, as a
+	 * consequence, bind vgacon, we'll get stuck in an infinite loop at
+	 * console_unlock(). So make here we touch the VGA MSR register, making
+	 * sure vgacon can keep working normally without triggering interrupts
+	 * and error messages.
+	 */
+	if (has_vga) {
+		vga_get_uninterruptible(pdev, VGA_RSRC_LEGACY_IO);
+		outb(inb(VGA_MSR_READ), VGA_MSR_WRITE);
+		vga_put(pdev, VGA_RSRC_LEGACY_IO);
+	}
+
+	if (irq_pipe_mask)
+		gen8_irq_power_well_post_enable(dev_priv, irq_pipe_mask);
+}
+
+static void hsw_power_well_pre_disable(struct drm_i915_private *dev_priv,
+				       u8 irq_pipe_mask)
+{
+	if (irq_pipe_mask)
+		gen8_irq_power_well_pre_disable(dev_priv, irq_pipe_mask);
+}
+
+static void hsw_wait_for_power_well_enable(struct drm_i915_private *dev_priv,
+					   struct i915_power_well *power_well)
+{
+	const struct i915_power_well_regs *regs = power_well->desc->hsw.regs;
+	int pw_idx = power_well->desc->hsw.idx;
+
+	/* Timeout for PW1:10 us, AUX:not specified, other PWs:20 us. */
+	WARN_ON(intel_wait_for_register(&dev_priv->uncore,
+					regs->driver,
+					HSW_PWR_WELL_CTL_STATE(pw_idx),
+					HSW_PWR_WELL_CTL_STATE(pw_idx),
+					1));
+}
+
+static u32 hsw_power_well_requesters(struct drm_i915_private *dev_priv,
+				     const struct i915_power_well_regs *regs,
+				     int pw_idx)
+{
+	u32 req_mask = HSW_PWR_WELL_CTL_REQ(pw_idx);
+	u32 ret;
+
+	ret = I915_READ(regs->bios) & req_mask ? 1 : 0;
+	ret |= I915_READ(regs->driver) & req_mask ? 2 : 0;
+	if (regs->kvmr.reg)
+		ret |= I915_READ(regs->kvmr) & req_mask ? 4 : 0;
+	ret |= I915_READ(regs->debug) & req_mask ? 8 : 0;
+
+	return ret;
+}
+
+static void hsw_wait_for_power_well_disable(struct drm_i915_private *dev_priv,
+					    struct i915_power_well *power_well)
+{
+	const struct i915_power_well_regs *regs = power_well->desc->hsw.regs;
+	int pw_idx = power_well->desc->hsw.idx;
+	bool disabled;
+	u32 reqs;
+
+	/*
+	 * Bspec doesn't require waiting for PWs to get disabled, but still do
+	 * this for paranoia. The known cases where a PW will be forced on:
+	 * - a KVMR request on any power well via the KVMR request register
+	 * - a DMC request on PW1 and MISC_IO power wells via the BIOS and
+	 *   DEBUG request registers
+	 * Skip the wait in case any of the request bits are set and print a
+	 * diagnostic message.
+	 */
+	wait_for((disabled = !(I915_READ(regs->driver) &
+			       HSW_PWR_WELL_CTL_STATE(pw_idx))) ||
+		 (reqs = hsw_power_well_requesters(dev_priv, regs, pw_idx)), 1);
+	if (disabled)
+		return;
+
+	DRM_DEBUG_KMS("%s forced on (bios:%d driver:%d kvmr:%d debug:%d)\n",
+		      power_well->desc->name,
+		      !!(reqs & 1), !!(reqs & 2), !!(reqs & 4), !!(reqs & 8));
+}
+
+static void gen9_wait_for_power_well_fuses(struct drm_i915_private *dev_priv,
+					   enum skl_power_gate pg)
+{
+	/* Timeout 5us for PG#0, for other PGs 1us */
+	WARN_ON(intel_wait_for_register(&dev_priv->uncore, SKL_FUSE_STATUS,
+					SKL_FUSE_PG_DIST_STATUS(pg),
+					SKL_FUSE_PG_DIST_STATUS(pg), 1));
+}
+
+static void hsw_power_well_enable(struct drm_i915_private *dev_priv,
+				  struct i915_power_well *power_well)
+{
+	const struct i915_power_well_regs *regs = power_well->desc->hsw.regs;
+	int pw_idx = power_well->desc->hsw.idx;
+	bool wait_fuses = power_well->desc->hsw.has_fuses;
+	enum skl_power_gate uninitialized_var(pg);
+	u32 val;
+
+	if (wait_fuses) {
+		pg = INTEL_GEN(dev_priv) >= 11 ? ICL_PW_CTL_IDX_TO_PG(pw_idx) :
+						 SKL_PW_CTL_IDX_TO_PG(pw_idx);
+		/*
+		 * For PW1 we have to wait both for the PW0/PG0 fuse state
+		 * before enabling the power well and PW1/PG1's own fuse
+		 * state after the enabling. For all other power wells with
+		 * fuses we only have to wait for that PW/PG's fuse state
+		 * after the enabling.
+		 */
+		if (pg == SKL_PG1)
+			gen9_wait_for_power_well_fuses(dev_priv, SKL_PG0);
+	}
+
+	val = I915_READ(regs->driver);
+	I915_WRITE(regs->driver, val | HSW_PWR_WELL_CTL_REQ(pw_idx));
+	hsw_wait_for_power_well_enable(dev_priv, power_well);
+
+	/* Display WA #1178: cnl */
+	if (IS_CANNONLAKE(dev_priv) &&
+	    pw_idx >= GLK_PW_CTL_IDX_AUX_B &&
+	    pw_idx <= CNL_PW_CTL_IDX_AUX_F) {
+		val = I915_READ(CNL_AUX_ANAOVRD1(pw_idx));
+		val |= CNL_AUX_ANAOVRD1_ENABLE | CNL_AUX_ANAOVRD1_LDO_BYPASS;
+		I915_WRITE(CNL_AUX_ANAOVRD1(pw_idx), val);
+	}
+
+	if (wait_fuses)
+		gen9_wait_for_power_well_fuses(dev_priv, pg);
+
+	hsw_power_well_post_enable(dev_priv,
+				   power_well->desc->hsw.irq_pipe_mask,
+				   power_well->desc->hsw.has_vga);
+}
+
+static void hsw_power_well_disable(struct drm_i915_private *dev_priv,
+				   struct i915_power_well *power_well)
+{
+	const struct i915_power_well_regs *regs = power_well->desc->hsw.regs;
+	int pw_idx = power_well->desc->hsw.idx;
+	u32 val;
+
+	hsw_power_well_pre_disable(dev_priv,
+				   power_well->desc->hsw.irq_pipe_mask);
+
+	val = I915_READ(regs->driver);
+	I915_WRITE(regs->driver, val & ~HSW_PWR_WELL_CTL_REQ(pw_idx));
+	hsw_wait_for_power_well_disable(dev_priv, power_well);
+}
+
+#define ICL_AUX_PW_TO_PORT(pw_idx)	((pw_idx) - ICL_PW_CTL_IDX_AUX_A)
+
+static void
+icl_combo_phy_aux_power_well_enable(struct drm_i915_private *dev_priv,
+				    struct i915_power_well *power_well)
+{
+	const struct i915_power_well_regs *regs = power_well->desc->hsw.regs;
+	int pw_idx = power_well->desc->hsw.idx;
+	enum port port = ICL_AUX_PW_TO_PORT(pw_idx);
+	u32 val;
+
+	val = I915_READ(regs->driver);
+	I915_WRITE(regs->driver, val | HSW_PWR_WELL_CTL_REQ(pw_idx));
+
+	val = I915_READ(ICL_PORT_CL_DW12(port));
+	I915_WRITE(ICL_PORT_CL_DW12(port), val | ICL_LANE_ENABLE_AUX);
+
+	hsw_wait_for_power_well_enable(dev_priv, power_well);
+
+	/* Display WA #1178: icl */
+	if (IS_ICELAKE(dev_priv) &&
+	    pw_idx >= ICL_PW_CTL_IDX_AUX_A && pw_idx <= ICL_PW_CTL_IDX_AUX_B &&
+	    !intel_bios_is_port_edp(dev_priv, port)) {
+		val = I915_READ(ICL_AUX_ANAOVRD1(pw_idx));
+		val |= ICL_AUX_ANAOVRD1_ENABLE | ICL_AUX_ANAOVRD1_LDO_BYPASS;
+		I915_WRITE(ICL_AUX_ANAOVRD1(pw_idx), val);
+	}
+}
+
+static void
+icl_combo_phy_aux_power_well_disable(struct drm_i915_private *dev_priv,
+				     struct i915_power_well *power_well)
+{
+	const struct i915_power_well_regs *regs = power_well->desc->hsw.regs;
+	int pw_idx = power_well->desc->hsw.idx;
+	enum port port = ICL_AUX_PW_TO_PORT(pw_idx);
+	u32 val;
+
+	val = I915_READ(ICL_PORT_CL_DW12(port));
+	I915_WRITE(ICL_PORT_CL_DW12(port), val & ~ICL_LANE_ENABLE_AUX);
+
+	val = I915_READ(regs->driver);
+	I915_WRITE(regs->driver, val & ~HSW_PWR_WELL_CTL_REQ(pw_idx));
+
+	hsw_wait_for_power_well_disable(dev_priv, power_well);
+}
+
+#define ICL_AUX_PW_TO_CH(pw_idx)	\
+	((pw_idx) - ICL_PW_CTL_IDX_AUX_A + AUX_CH_A)
+
+static void
+icl_tc_phy_aux_power_well_enable(struct drm_i915_private *dev_priv,
+				 struct i915_power_well *power_well)
+{
+	enum aux_ch aux_ch = ICL_AUX_PW_TO_CH(power_well->desc->hsw.idx);
+	u32 val;
+
+	val = I915_READ(DP_AUX_CH_CTL(aux_ch));
+	val &= ~DP_AUX_CH_CTL_TBT_IO;
+	if (power_well->desc->hsw.is_tc_tbt)
+		val |= DP_AUX_CH_CTL_TBT_IO;
+	I915_WRITE(DP_AUX_CH_CTL(aux_ch), val);
+
+	hsw_power_well_enable(dev_priv, power_well);
+}
+
+/*
+ * We should only use the power well if we explicitly asked the hardware to
+ * enable it, so check if it's enabled and also check if we've requested it to
+ * be enabled.
+ */
+static bool hsw_power_well_enabled(struct drm_i915_private *dev_priv,
+				   struct i915_power_well *power_well)
+{
+	const struct i915_power_well_regs *regs = power_well->desc->hsw.regs;
+	enum i915_power_well_id id = power_well->desc->id;
+	int pw_idx = power_well->desc->hsw.idx;
+	u32 mask = HSW_PWR_WELL_CTL_REQ(pw_idx) |
+		   HSW_PWR_WELL_CTL_STATE(pw_idx);
+	u32 val;
+
+	val = I915_READ(regs->driver);
+
+	/*
+	 * On GEN9 big core due to a DMC bug the driver's request bits for PW1
+	 * and the MISC_IO PW will be not restored, so check instead for the
+	 * BIOS's own request bits, which are forced-on for these power wells
+	 * when exiting DC5/6.
+	 */
+	if (IS_GEN(dev_priv, 9) && !IS_GEN9_LP(dev_priv) &&
+	    (id == SKL_DISP_PW_1 || id == SKL_DISP_PW_MISC_IO))
+		val |= I915_READ(regs->bios);
+
+	return (val & mask) == mask;
+}
+
+static void assert_can_enable_dc9(struct drm_i915_private *dev_priv)
+{
+	WARN_ONCE((I915_READ(DC_STATE_EN) & DC_STATE_EN_DC9),
+		  "DC9 already programmed to be enabled.\n");
+	WARN_ONCE(I915_READ(DC_STATE_EN) & DC_STATE_EN_UPTO_DC5,
+		  "DC5 still not disabled to enable DC9.\n");
+	WARN_ONCE(I915_READ(HSW_PWR_WELL_CTL2) &
+		  HSW_PWR_WELL_CTL_REQ(SKL_PW_CTL_IDX_PW_2),
+		  "Power well 2 on.\n");
+	WARN_ONCE(intel_irqs_enabled(dev_priv),
+		  "Interrupts not disabled yet.\n");
+
+	 /*
+	  * TODO: check for the following to verify the conditions to enter DC9
+	  * state are satisfied:
+	  * 1] Check relevant display engine registers to verify if mode set
+	  * disable sequence was followed.
+	  * 2] Check if display uninitialize sequence is initialized.
+	  */
+}
+
+static void assert_can_disable_dc9(struct drm_i915_private *dev_priv)
+{
+	WARN_ONCE(intel_irqs_enabled(dev_priv),
+		  "Interrupts not disabled yet.\n");
+	WARN_ONCE(I915_READ(DC_STATE_EN) & DC_STATE_EN_UPTO_DC5,
+		  "DC5 still not disabled.\n");
+
+	 /*
+	  * TODO: check for the following to verify DC9 state was indeed
+	  * entered before programming to disable it:
+	  * 1] Check relevant display engine registers to verify if mode
+	  *  set disable sequence was followed.
+	  * 2] Check if display uninitialize sequence is initialized.
+	  */
+}
+
+static void gen9_write_dc_state(struct drm_i915_private *dev_priv,
+				u32 state)
+{
+	int rewrites = 0;
+	int rereads = 0;
+	u32 v;
+
+	I915_WRITE(DC_STATE_EN, state);
+
+	/* It has been observed that disabling the dc6 state sometimes
+	 * doesn't stick and dmc keeps returning old value. Make sure
+	 * the write really sticks enough times and also force rewrite until
+	 * we are confident that state is exactly what we want.
+	 */
+	do  {
+		v = I915_READ(DC_STATE_EN);
+
+		if (v != state) {
+			I915_WRITE(DC_STATE_EN, state);
+			rewrites++;
+			rereads = 0;
+		} else if (rereads++ > 5) {
+			break;
+		}
+
+	} while (rewrites < 100);
+
+	if (v != state)
+		DRM_ERROR("Writing dc state to 0x%x failed, now 0x%x\n",
+			  state, v);
+
+	/* Most of the times we need one retry, avoid spam */
+	if (rewrites > 1)
+		DRM_DEBUG_KMS("Rewrote dc state to 0x%x %d times\n",
+			      state, rewrites);
+}
+
+static u32 gen9_dc_mask(struct drm_i915_private *dev_priv)
+{
+	u32 mask;
+
+	mask = DC_STATE_EN_UPTO_DC5;
+	if (INTEL_GEN(dev_priv) >= 11)
+		mask |= DC_STATE_EN_UPTO_DC6 | DC_STATE_EN_DC9;
+	else if (IS_GEN9_LP(dev_priv))
+		mask |= DC_STATE_EN_DC9;
+	else
+		mask |= DC_STATE_EN_UPTO_DC6;
+
+	return mask;
+}
+
+void gen9_sanitize_dc_state(struct drm_i915_private *dev_priv)
+{
+	u32 val;
+
+	val = I915_READ(DC_STATE_EN) & gen9_dc_mask(dev_priv);
+
+	DRM_DEBUG_KMS("Resetting DC state tracking from %02x to %02x\n",
+		      dev_priv->csr.dc_state, val);
+	dev_priv->csr.dc_state = val;
+}
+
+/**
+ * gen9_set_dc_state - set target display C power state
+ * @dev_priv: i915 device instance
+ * @state: target DC power state
+ * - DC_STATE_DISABLE
+ * - DC_STATE_EN_UPTO_DC5
+ * - DC_STATE_EN_UPTO_DC6
+ * - DC_STATE_EN_DC9
+ *
+ * Signal to DMC firmware/HW the target DC power state passed in @state.
+ * DMC/HW can turn off individual display clocks and power rails when entering
+ * a deeper DC power state (higher in number) and turns these back when exiting
+ * that state to a shallower power state (lower in number). The HW will decide
+ * when to actually enter a given state on an on-demand basis, for instance
+ * depending on the active state of display pipes. The state of display
+ * registers backed by affected power rails are saved/restored as needed.
+ *
+ * Based on the above enabling a deeper DC power state is asynchronous wrt.
+ * enabling it. Disabling a deeper power state is synchronous: for instance
+ * setting %DC_STATE_DISABLE won't complete until all HW resources are turned
+ * back on and register state is restored. This is guaranteed by the MMIO write
+ * to DC_STATE_EN blocking until the state is restored.
+ */
+static void gen9_set_dc_state(struct drm_i915_private *dev_priv, u32 state)
+{
+	u32 val;
+	u32 mask;
+
+	if (WARN_ON_ONCE(state & ~dev_priv->csr.allowed_dc_mask))
+		state &= dev_priv->csr.allowed_dc_mask;
+
+	val = I915_READ(DC_STATE_EN);
+	mask = gen9_dc_mask(dev_priv);
+	DRM_DEBUG_KMS("Setting DC state from %02x to %02x\n",
+		      val & mask, state);
+
+	/* Check if DMC is ignoring our DC state requests */
+	if ((val & mask) != dev_priv->csr.dc_state)
+		DRM_ERROR("DC state mismatch (0x%x -> 0x%x)\n",
+			  dev_priv->csr.dc_state, val & mask);
+
+	val &= ~mask;
+	val |= state;
+
+	gen9_write_dc_state(dev_priv, val);
+
+	dev_priv->csr.dc_state = val & mask;
+}
+
+void bxt_enable_dc9(struct drm_i915_private *dev_priv)
+{
+	assert_can_enable_dc9(dev_priv);
+
+	DRM_DEBUG_KMS("Enabling DC9\n");
+	/*
+	 * Power sequencer reset is not needed on
+	 * platforms with South Display Engine on PCH,
+	 * because PPS registers are always on.
+	 */
+	if (!HAS_PCH_SPLIT(dev_priv))
+		intel_power_sequencer_reset(dev_priv);
+	gen9_set_dc_state(dev_priv, DC_STATE_EN_DC9);
+}
+
+void bxt_disable_dc9(struct drm_i915_private *dev_priv)
+{
+	assert_can_disable_dc9(dev_priv);
+
+	DRM_DEBUG_KMS("Disabling DC9\n");
+
+	gen9_set_dc_state(dev_priv, DC_STATE_DISABLE);
+
+	intel_pps_unlock_regs_wa(dev_priv);
+}
+
+static void assert_csr_loaded(struct drm_i915_private *dev_priv)
+{
+	WARN_ONCE(!I915_READ(CSR_PROGRAM(0)),
+		  "CSR program storage start is NULL\n");
+	WARN_ONCE(!I915_READ(CSR_SSP_BASE), "CSR SSP Base Not fine\n");
+	WARN_ONCE(!I915_READ(CSR_HTP_SKL), "CSR HTP Not fine\n");
+}
+
+static struct i915_power_well *
+lookup_power_well(struct drm_i915_private *dev_priv,
+		  enum i915_power_well_id power_well_id)
+{
+	struct i915_power_well *power_well;
+
+	for_each_power_well(dev_priv, power_well)
+		if (power_well->desc->id == power_well_id)
+			return power_well;
+
+	/*
+	 * It's not feasible to add error checking code to the callers since
+	 * this condition really shouldn't happen and it doesn't even make sense
+	 * to abort things like display initialization sequences. Just return
+	 * the first power well and hope the WARN gets reported so we can fix
+	 * our driver.
+	 */
+	WARN(1, "Power well %d not defined for this platform\n", power_well_id);
+	return &dev_priv->power_domains.power_wells[0];
+}
+
+static void assert_can_enable_dc5(struct drm_i915_private *dev_priv)
+{
+	bool pg2_enabled = intel_display_power_well_is_enabled(dev_priv,
+					SKL_DISP_PW_2);
+
+	WARN_ONCE(pg2_enabled, "PG2 not disabled to enable DC5.\n");
+
+	WARN_ONCE((I915_READ(DC_STATE_EN) & DC_STATE_EN_UPTO_DC5),
+		  "DC5 already programmed to be enabled.\n");
+	assert_rpm_wakelock_held(&dev_priv->runtime_pm);
+
+	assert_csr_loaded(dev_priv);
+}
+
+void gen9_enable_dc5(struct drm_i915_private *dev_priv)
+{
+	assert_can_enable_dc5(dev_priv);
+
+	DRM_DEBUG_KMS("Enabling DC5\n");
+
+	/* Wa Display #1183: skl,kbl,cfl */
+	if (IS_GEN9_BC(dev_priv))
+		I915_WRITE(GEN8_CHICKEN_DCPR_1, I915_READ(GEN8_CHICKEN_DCPR_1) |
+			   SKL_SELECT_ALTERNATE_DC_EXIT);
+
+	gen9_set_dc_state(dev_priv, DC_STATE_EN_UPTO_DC5);
+}
+
+static void assert_can_enable_dc6(struct drm_i915_private *dev_priv)
+{
+	WARN_ONCE(I915_READ(UTIL_PIN_CTL) & UTIL_PIN_ENABLE,
+		  "Backlight is not disabled.\n");
+	WARN_ONCE((I915_READ(DC_STATE_EN) & DC_STATE_EN_UPTO_DC6),
+		  "DC6 already programmed to be enabled.\n");
+
+	assert_csr_loaded(dev_priv);
+}
+
+void skl_enable_dc6(struct drm_i915_private *dev_priv)
+{
+	assert_can_enable_dc6(dev_priv);
+
+	DRM_DEBUG_KMS("Enabling DC6\n");
+
+	/* Wa Display #1183: skl,kbl,cfl */
+	if (IS_GEN9_BC(dev_priv))
+		I915_WRITE(GEN8_CHICKEN_DCPR_1, I915_READ(GEN8_CHICKEN_DCPR_1) |
+			   SKL_SELECT_ALTERNATE_DC_EXIT);
+
+	gen9_set_dc_state(dev_priv, DC_STATE_EN_UPTO_DC6);
+}
+
+static void hsw_power_well_sync_hw(struct drm_i915_private *dev_priv,
+				   struct i915_power_well *power_well)
+{
+	const struct i915_power_well_regs *regs = power_well->desc->hsw.regs;
+	int pw_idx = power_well->desc->hsw.idx;
+	u32 mask = HSW_PWR_WELL_CTL_REQ(pw_idx);
+	u32 bios_req = I915_READ(regs->bios);
+
+	/* Take over the request bit if set by BIOS. */
+	if (bios_req & mask) {
+		u32 drv_req = I915_READ(regs->driver);
+
+		if (!(drv_req & mask))
+			I915_WRITE(regs->driver, drv_req | mask);
+		I915_WRITE(regs->bios, bios_req & ~mask);
+	}
+}
+
+static void bxt_dpio_cmn_power_well_enable(struct drm_i915_private *dev_priv,
+					   struct i915_power_well *power_well)
+{
+	bxt_ddi_phy_init(dev_priv, power_well->desc->bxt.phy);
+}
+
+static void bxt_dpio_cmn_power_well_disable(struct drm_i915_private *dev_priv,
+					    struct i915_power_well *power_well)
+{
+	bxt_ddi_phy_uninit(dev_priv, power_well->desc->bxt.phy);
+}
+
+static bool bxt_dpio_cmn_power_well_enabled(struct drm_i915_private *dev_priv,
+					    struct i915_power_well *power_well)
+{
+	return bxt_ddi_phy_is_enabled(dev_priv, power_well->desc->bxt.phy);
+}
+
+static void bxt_verify_ddi_phy_power_wells(struct drm_i915_private *dev_priv)
+{
+	struct i915_power_well *power_well;
+
+	power_well = lookup_power_well(dev_priv, BXT_DISP_PW_DPIO_CMN_A);
+	if (power_well->count > 0)
+		bxt_ddi_phy_verify_state(dev_priv, power_well->desc->bxt.phy);
+
+	power_well = lookup_power_well(dev_priv, VLV_DISP_PW_DPIO_CMN_BC);
+	if (power_well->count > 0)
+		bxt_ddi_phy_verify_state(dev_priv, power_well->desc->bxt.phy);
+
+	if (IS_GEMINILAKE(dev_priv)) {
+		power_well = lookup_power_well(dev_priv,
+					       GLK_DISP_PW_DPIO_CMN_C);
+		if (power_well->count > 0)
+			bxt_ddi_phy_verify_state(dev_priv,
+						 power_well->desc->bxt.phy);
+	}
+}
+
+static bool gen9_dc_off_power_well_enabled(struct drm_i915_private *dev_priv,
+					   struct i915_power_well *power_well)
+{
+	return (I915_READ(DC_STATE_EN) & DC_STATE_EN_UPTO_DC5_DC6_MASK) == 0;
+}
+
+static void gen9_assert_dbuf_enabled(struct drm_i915_private *dev_priv)
+{
+	u32 tmp = I915_READ(DBUF_CTL);
+
+	WARN((tmp & (DBUF_POWER_STATE | DBUF_POWER_REQUEST)) !=
+	     (DBUF_POWER_STATE | DBUF_POWER_REQUEST),
+	     "Unexpected DBuf power power state (0x%08x)\n", tmp);
+}
+
+static void gen9_dc_off_power_well_enable(struct drm_i915_private *dev_priv,
+					  struct i915_power_well *power_well)
+{
+	struct intel_cdclk_state cdclk_state = {};
+
+	gen9_set_dc_state(dev_priv, DC_STATE_DISABLE);
+
+	dev_priv->display.get_cdclk(dev_priv, &cdclk_state);
+	/* Can't read out voltage_level so can't use intel_cdclk_changed() */
+	WARN_ON(intel_cdclk_needs_modeset(&dev_priv->cdclk.hw, &cdclk_state));
+
+	gen9_assert_dbuf_enabled(dev_priv);
+
+	if (IS_GEN9_LP(dev_priv))
+		bxt_verify_ddi_phy_power_wells(dev_priv);
+
+	if (INTEL_GEN(dev_priv) >= 11)
+		/*
+		 * DMC retains HW context only for port A, the other combo
+		 * PHY's HW context for port B is lost after DC transitions,
+		 * so we need to restore it manually.
+		 */
+		intel_combo_phy_init(dev_priv);
+}
+
+static void gen9_dc_off_power_well_disable(struct drm_i915_private *dev_priv,
+					   struct i915_power_well *power_well)
+{
+	if (!dev_priv->csr.dmc_payload)
+		return;
+
+	if (dev_priv->csr.allowed_dc_mask & DC_STATE_EN_UPTO_DC6)
+		skl_enable_dc6(dev_priv);
+	else if (dev_priv->csr.allowed_dc_mask & DC_STATE_EN_UPTO_DC5)
+		gen9_enable_dc5(dev_priv);
+}
+
+static void i9xx_power_well_sync_hw_noop(struct drm_i915_private *dev_priv,
+					 struct i915_power_well *power_well)
+{
+}
+
+static void i9xx_always_on_power_well_noop(struct drm_i915_private *dev_priv,
+					   struct i915_power_well *power_well)
+{
+}
+
+static bool i9xx_always_on_power_well_enabled(struct drm_i915_private *dev_priv,
+					     struct i915_power_well *power_well)
+{
+	return true;
+}
+
+static void i830_pipes_power_well_enable(struct drm_i915_private *dev_priv,
+					 struct i915_power_well *power_well)
+{
+	if ((I915_READ(PIPECONF(PIPE_A)) & PIPECONF_ENABLE) == 0)
+		i830_enable_pipe(dev_priv, PIPE_A);
+	if ((I915_READ(PIPECONF(PIPE_B)) & PIPECONF_ENABLE) == 0)
+		i830_enable_pipe(dev_priv, PIPE_B);
+}
+
+static void i830_pipes_power_well_disable(struct drm_i915_private *dev_priv,
+					  struct i915_power_well *power_well)
+{
+	i830_disable_pipe(dev_priv, PIPE_B);
+	i830_disable_pipe(dev_priv, PIPE_A);
+}
+
+static bool i830_pipes_power_well_enabled(struct drm_i915_private *dev_priv,
+					  struct i915_power_well *power_well)
+{
+	return I915_READ(PIPECONF(PIPE_A)) & PIPECONF_ENABLE &&
+		I915_READ(PIPECONF(PIPE_B)) & PIPECONF_ENABLE;
+}
+
+static void i830_pipes_power_well_sync_hw(struct drm_i915_private *dev_priv,
+					  struct i915_power_well *power_well)
+{
+	if (power_well->count > 0)
+		i830_pipes_power_well_enable(dev_priv, power_well);
+	else
+		i830_pipes_power_well_disable(dev_priv, power_well);
+}
+
+static void vlv_set_power_well(struct drm_i915_private *dev_priv,
+			       struct i915_power_well *power_well, bool enable)
+{
+	int pw_idx = power_well->desc->vlv.idx;
+	u32 mask;
+	u32 state;
+	u32 ctrl;
+
+	mask = PUNIT_PWRGT_MASK(pw_idx);
+	state = enable ? PUNIT_PWRGT_PWR_ON(pw_idx) :
+			 PUNIT_PWRGT_PWR_GATE(pw_idx);
+
+	vlv_punit_get(dev_priv);
+
+#define COND \
+	((vlv_punit_read(dev_priv, PUNIT_REG_PWRGT_STATUS) & mask) == state)
+
+	if (COND)
+		goto out;
+
+	ctrl = vlv_punit_read(dev_priv, PUNIT_REG_PWRGT_CTRL);
+	ctrl &= ~mask;
+	ctrl |= state;
+	vlv_punit_write(dev_priv, PUNIT_REG_PWRGT_CTRL, ctrl);
+
+	if (wait_for(COND, 100))
+		DRM_ERROR("timeout setting power well state %08x (%08x)\n",
+			  state,
+			  vlv_punit_read(dev_priv, PUNIT_REG_PWRGT_CTRL));
+
+#undef COND
+
+out:
+	vlv_punit_put(dev_priv);
+}
+
+static void vlv_power_well_enable(struct drm_i915_private *dev_priv,
+				  struct i915_power_well *power_well)
+{
+	vlv_set_power_well(dev_priv, power_well, true);
+}
+
+static void vlv_power_well_disable(struct drm_i915_private *dev_priv,
+				   struct i915_power_well *power_well)
+{
+	vlv_set_power_well(dev_priv, power_well, false);
+}
+
+static bool vlv_power_well_enabled(struct drm_i915_private *dev_priv,
+				   struct i915_power_well *power_well)
+{
+	int pw_idx = power_well->desc->vlv.idx;
+	bool enabled = false;
+	u32 mask;
+	u32 state;
+	u32 ctrl;
+
+	mask = PUNIT_PWRGT_MASK(pw_idx);
+	ctrl = PUNIT_PWRGT_PWR_ON(pw_idx);
+
+	vlv_punit_get(dev_priv);
+
+	state = vlv_punit_read(dev_priv, PUNIT_REG_PWRGT_STATUS) & mask;
+	/*
+	 * We only ever set the power-on and power-gate states, anything
+	 * else is unexpected.
+	 */
+	WARN_ON(state != PUNIT_PWRGT_PWR_ON(pw_idx) &&
+		state != PUNIT_PWRGT_PWR_GATE(pw_idx));
+	if (state == ctrl)
+		enabled = true;
+
+	/*
+	 * A transient state at this point would mean some unexpected party
+	 * is poking at the power controls too.
+	 */
+	ctrl = vlv_punit_read(dev_priv, PUNIT_REG_PWRGT_CTRL) & mask;
+	WARN_ON(ctrl != state);
+
+	vlv_punit_put(dev_priv);
+
+	return enabled;
+}
+
+static void vlv_init_display_clock_gating(struct drm_i915_private *dev_priv)
+{
+	u32 val;
+
+	/*
+	 * On driver load, a pipe may be active and driving a DSI display.
+	 * Preserve DPOUNIT_CLOCK_GATE_DISABLE to avoid the pipe getting stuck
+	 * (and never recovering) in this case. intel_dsi_post_disable() will
+	 * clear it when we turn off the display.
+	 */
+	val = I915_READ(DSPCLK_GATE_D);
+	val &= DPOUNIT_CLOCK_GATE_DISABLE;
+	val |= VRHUNIT_CLOCK_GATE_DISABLE;
+	I915_WRITE(DSPCLK_GATE_D, val);
+
+	/*
+	 * Disable trickle feed and enable pnd deadline calculation
+	 */
+	I915_WRITE(MI_ARB_VLV, MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE);
+	I915_WRITE(CBR1_VLV, 0);
+
+	WARN_ON(dev_priv->rawclk_freq == 0);
+
+	I915_WRITE(RAWCLK_FREQ_VLV,
+		   DIV_ROUND_CLOSEST(dev_priv->rawclk_freq, 1000));
+}
+
+static void vlv_display_power_well_init(struct drm_i915_private *dev_priv)
+{
+	struct intel_encoder *encoder;
+	enum pipe pipe;
+
+	/*
+	 * Enable the CRI clock source so we can get at the
+	 * display and the reference clock for VGA
+	 * hotplug / manual detection. Supposedly DSI also
+	 * needs the ref clock up and running.
+	 *
+	 * CHV DPLL B/C have some issues if VGA mode is enabled.
+	 */
+	for_each_pipe(dev_priv, pipe) {
+		u32 val = I915_READ(DPLL(pipe));
+
+		val |= DPLL_REF_CLK_ENABLE_VLV | DPLL_VGA_MODE_DIS;
+		if (pipe != PIPE_A)
+			val |= DPLL_INTEGRATED_CRI_CLK_VLV;
+
+		I915_WRITE(DPLL(pipe), val);
+	}
+
+	vlv_init_display_clock_gating(dev_priv);
+
+	spin_lock_irq(&dev_priv->irq_lock);
+	valleyview_enable_display_irqs(dev_priv);
+	spin_unlock_irq(&dev_priv->irq_lock);
+
+	/*
+	 * During driver initialization/resume we can avoid restoring the
+	 * part of the HW/SW state that will be inited anyway explicitly.
+	 */
+	if (dev_priv->power_domains.initializing)
+		return;
+
+	intel_hpd_init(dev_priv);
+
+	/* Re-enable the ADPA, if we have one */
+	for_each_intel_encoder(&dev_priv->drm, encoder) {
+		if (encoder->type == INTEL_OUTPUT_ANALOG)
+			intel_crt_reset(&encoder->base);
+	}
+
+	i915_redisable_vga_power_on(dev_priv);
+
+	intel_pps_unlock_regs_wa(dev_priv);
+}
+
+static void vlv_display_power_well_deinit(struct drm_i915_private *dev_priv)
+{
+	spin_lock_irq(&dev_priv->irq_lock);
+	valleyview_disable_display_irqs(dev_priv);
+	spin_unlock_irq(&dev_priv->irq_lock);
+
+	/* make sure we're done processing display irqs */
+	synchronize_irq(dev_priv->drm.irq);
+
+	intel_power_sequencer_reset(dev_priv);
+
+	/* Prevent us from re-enabling polling on accident in late suspend */
+	if (!dev_priv->drm.dev->power.is_suspended)
+		intel_hpd_poll_init(dev_priv);
+}
+
+static void vlv_display_power_well_enable(struct drm_i915_private *dev_priv,
+					  struct i915_power_well *power_well)
+{
+	vlv_set_power_well(dev_priv, power_well, true);
+
+	vlv_display_power_well_init(dev_priv);
+}
+
+static void vlv_display_power_well_disable(struct drm_i915_private *dev_priv,
+					   struct i915_power_well *power_well)
+{
+	vlv_display_power_well_deinit(dev_priv);
+
+	vlv_set_power_well(dev_priv, power_well, false);
+}
+
+static void vlv_dpio_cmn_power_well_enable(struct drm_i915_private *dev_priv,
+					   struct i915_power_well *power_well)
+{
+	/* since ref/cri clock was enabled */
+	udelay(1); /* >10ns for cmnreset, >0ns for sidereset */
+
+	vlv_set_power_well(dev_priv, power_well, true);
+
+	/*
+	 * From VLV2A0_DP_eDP_DPIO_driver_vbios_notes_10.docx -
+	 *  6.	De-assert cmn_reset/side_reset. Same as VLV X0.
+	 *   a.	GUnit 0x2110 bit[0] set to 1 (def 0)
+	 *   b.	The other bits such as sfr settings / modesel may all
+	 *	be set to 0.
+	 *
+	 * This should only be done on init and resume from S3 with
+	 * both PLLs disabled, or we risk losing DPIO and PLL
+	 * synchronization.
+	 */
+	I915_WRITE(DPIO_CTL, I915_READ(DPIO_CTL) | DPIO_CMNRST);
+}
+
+static void vlv_dpio_cmn_power_well_disable(struct drm_i915_private *dev_priv,
+					    struct i915_power_well *power_well)
+{
+	enum pipe pipe;
+
+	for_each_pipe(dev_priv, pipe)
+		assert_pll_disabled(dev_priv, pipe);
+
+	/* Assert common reset */
+	I915_WRITE(DPIO_CTL, I915_READ(DPIO_CTL) & ~DPIO_CMNRST);
+
+	vlv_set_power_well(dev_priv, power_well, false);
+}
+
+#define POWER_DOMAIN_MASK (GENMASK_ULL(POWER_DOMAIN_NUM - 1, 0))
+
+#define BITS_SET(val, bits) (((val) & (bits)) == (bits))
+
+static void assert_chv_phy_status(struct drm_i915_private *dev_priv)
+{
+	struct i915_power_well *cmn_bc =
+		lookup_power_well(dev_priv, VLV_DISP_PW_DPIO_CMN_BC);
+	struct i915_power_well *cmn_d =
+		lookup_power_well(dev_priv, CHV_DISP_PW_DPIO_CMN_D);
+	u32 phy_control = dev_priv->chv_phy_control;
+	u32 phy_status = 0;
+	u32 phy_status_mask = 0xffffffff;
+
+	/*
+	 * The BIOS can leave the PHY is some weird state
+	 * where it doesn't fully power down some parts.
+	 * Disable the asserts until the PHY has been fully
+	 * reset (ie. the power well has been disabled at
+	 * least once).
+	 */
+	if (!dev_priv->chv_phy_assert[DPIO_PHY0])
+		phy_status_mask &= ~(PHY_STATUS_CMN_LDO(DPIO_PHY0, DPIO_CH0) |
+				     PHY_STATUS_SPLINE_LDO(DPIO_PHY0, DPIO_CH0, 0) |
+				     PHY_STATUS_SPLINE_LDO(DPIO_PHY0, DPIO_CH0, 1) |
+				     PHY_STATUS_CMN_LDO(DPIO_PHY0, DPIO_CH1) |
+				     PHY_STATUS_SPLINE_LDO(DPIO_PHY0, DPIO_CH1, 0) |
+				     PHY_STATUS_SPLINE_LDO(DPIO_PHY0, DPIO_CH1, 1));
+
+	if (!dev_priv->chv_phy_assert[DPIO_PHY1])
+		phy_status_mask &= ~(PHY_STATUS_CMN_LDO(DPIO_PHY1, DPIO_CH0) |
+				     PHY_STATUS_SPLINE_LDO(DPIO_PHY1, DPIO_CH0, 0) |
+				     PHY_STATUS_SPLINE_LDO(DPIO_PHY1, DPIO_CH0, 1));
+
+	if (cmn_bc->desc->ops->is_enabled(dev_priv, cmn_bc)) {
+		phy_status |= PHY_POWERGOOD(DPIO_PHY0);
+
+		/* this assumes override is only used to enable lanes */
+		if ((phy_control & PHY_CH_POWER_DOWN_OVRD_EN(DPIO_PHY0, DPIO_CH0)) == 0)
+			phy_control |= PHY_CH_POWER_DOWN_OVRD(0xf, DPIO_PHY0, DPIO_CH0);
+
+		if ((phy_control & PHY_CH_POWER_DOWN_OVRD_EN(DPIO_PHY0, DPIO_CH1)) == 0)
+			phy_control |= PHY_CH_POWER_DOWN_OVRD(0xf, DPIO_PHY0, DPIO_CH1);
+
+		/* CL1 is on whenever anything is on in either channel */
+		if (BITS_SET(phy_control,
+			     PHY_CH_POWER_DOWN_OVRD(0xf, DPIO_PHY0, DPIO_CH0) |
+			     PHY_CH_POWER_DOWN_OVRD(0xf, DPIO_PHY0, DPIO_CH1)))
+			phy_status |= PHY_STATUS_CMN_LDO(DPIO_PHY0, DPIO_CH0);
+
+		/*
+		 * The DPLLB check accounts for the pipe B + port A usage
+		 * with CL2 powered up but all the lanes in the second channel
+		 * powered down.
+		 */
+		if (BITS_SET(phy_control,
+			     PHY_CH_POWER_DOWN_OVRD(0xf, DPIO_PHY0, DPIO_CH1)) &&
+		    (I915_READ(DPLL(PIPE_B)) & DPLL_VCO_ENABLE) == 0)
+			phy_status |= PHY_STATUS_CMN_LDO(DPIO_PHY0, DPIO_CH1);
+
+		if (BITS_SET(phy_control,
+			     PHY_CH_POWER_DOWN_OVRD(0x3, DPIO_PHY0, DPIO_CH0)))
+			phy_status |= PHY_STATUS_SPLINE_LDO(DPIO_PHY0, DPIO_CH0, 0);
+		if (BITS_SET(phy_control,
+			     PHY_CH_POWER_DOWN_OVRD(0xc, DPIO_PHY0, DPIO_CH0)))
+			phy_status |= PHY_STATUS_SPLINE_LDO(DPIO_PHY0, DPIO_CH0, 1);
+
+		if (BITS_SET(phy_control,
+			     PHY_CH_POWER_DOWN_OVRD(0x3, DPIO_PHY0, DPIO_CH1)))
+			phy_status |= PHY_STATUS_SPLINE_LDO(DPIO_PHY0, DPIO_CH1, 0);
+		if (BITS_SET(phy_control,
+			     PHY_CH_POWER_DOWN_OVRD(0xc, DPIO_PHY0, DPIO_CH1)))
+			phy_status |= PHY_STATUS_SPLINE_LDO(DPIO_PHY0, DPIO_CH1, 1);
+	}
+
+	if (cmn_d->desc->ops->is_enabled(dev_priv, cmn_d)) {
+		phy_status |= PHY_POWERGOOD(DPIO_PHY1);
+
+		/* this assumes override is only used to enable lanes */
+		if ((phy_control & PHY_CH_POWER_DOWN_OVRD_EN(DPIO_PHY1, DPIO_CH0)) == 0)
+			phy_control |= PHY_CH_POWER_DOWN_OVRD(0xf, DPIO_PHY1, DPIO_CH0);
+
+		if (BITS_SET(phy_control,
+			     PHY_CH_POWER_DOWN_OVRD(0xf, DPIO_PHY1, DPIO_CH0)))
+			phy_status |= PHY_STATUS_CMN_LDO(DPIO_PHY1, DPIO_CH0);
+
+		if (BITS_SET(phy_control,
+			     PHY_CH_POWER_DOWN_OVRD(0x3, DPIO_PHY1, DPIO_CH0)))
+			phy_status |= PHY_STATUS_SPLINE_LDO(DPIO_PHY1, DPIO_CH0, 0);
+		if (BITS_SET(phy_control,
+			     PHY_CH_POWER_DOWN_OVRD(0xc, DPIO_PHY1, DPIO_CH0)))
+			phy_status |= PHY_STATUS_SPLINE_LDO(DPIO_PHY1, DPIO_CH0, 1);
+	}
+
+	phy_status &= phy_status_mask;
+
+	/*
+	 * The PHY may be busy with some initial calibration and whatnot,
+	 * so the power state can take a while to actually change.
+	 */
+	if (intel_wait_for_register(&dev_priv->uncore,
+				    DISPLAY_PHY_STATUS,
+				    phy_status_mask,
+				    phy_status,
+				    10))
+		DRM_ERROR("Unexpected PHY_STATUS 0x%08x, expected 0x%08x (PHY_CONTROL=0x%08x)\n",
+			  I915_READ(DISPLAY_PHY_STATUS) & phy_status_mask,
+			   phy_status, dev_priv->chv_phy_control);
+}
+
+#undef BITS_SET
+
+static void chv_dpio_cmn_power_well_enable(struct drm_i915_private *dev_priv,
+					   struct i915_power_well *power_well)
+{
+	enum dpio_phy phy;
+	enum pipe pipe;
+	u32 tmp;
+
+	WARN_ON_ONCE(power_well->desc->id != VLV_DISP_PW_DPIO_CMN_BC &&
+		     power_well->desc->id != CHV_DISP_PW_DPIO_CMN_D);
+
+	if (power_well->desc->id == VLV_DISP_PW_DPIO_CMN_BC) {
+		pipe = PIPE_A;
+		phy = DPIO_PHY0;
+	} else {
+		pipe = PIPE_C;
+		phy = DPIO_PHY1;
+	}
+
+	/* since ref/cri clock was enabled */
+	udelay(1); /* >10ns for cmnreset, >0ns for sidereset */
+	vlv_set_power_well(dev_priv, power_well, true);
+
+	/* Poll for phypwrgood signal */
+	if (intel_wait_for_register(&dev_priv->uncore,
+				    DISPLAY_PHY_STATUS,
+				    PHY_POWERGOOD(phy),
+				    PHY_POWERGOOD(phy),
+				    1))
+		DRM_ERROR("Display PHY %d is not power up\n", phy);
+
+	vlv_dpio_get(dev_priv);
+
+	/* Enable dynamic power down */
+	tmp = vlv_dpio_read(dev_priv, pipe, CHV_CMN_DW28);
+	tmp |= DPIO_DYNPWRDOWNEN_CH0 | DPIO_CL1POWERDOWNEN |
+		DPIO_SUS_CLK_CONFIG_GATE_CLKREQ;
+	vlv_dpio_write(dev_priv, pipe, CHV_CMN_DW28, tmp);
+
+	if (power_well->desc->id == VLV_DISP_PW_DPIO_CMN_BC) {
+		tmp = vlv_dpio_read(dev_priv, pipe, _CHV_CMN_DW6_CH1);
+		tmp |= DPIO_DYNPWRDOWNEN_CH1;
+		vlv_dpio_write(dev_priv, pipe, _CHV_CMN_DW6_CH1, tmp);
+	} else {
+		/*
+		 * Force the non-existing CL2 off. BXT does this
+		 * too, so maybe it saves some power even though
+		 * CL2 doesn't exist?
+		 */
+		tmp = vlv_dpio_read(dev_priv, pipe, CHV_CMN_DW30);
+		tmp |= DPIO_CL2_LDOFUSE_PWRENB;
+		vlv_dpio_write(dev_priv, pipe, CHV_CMN_DW30, tmp);
+	}
+
+	vlv_dpio_put(dev_priv);
+
+	dev_priv->chv_phy_control |= PHY_COM_LANE_RESET_DEASSERT(phy);
+	I915_WRITE(DISPLAY_PHY_CONTROL, dev_priv->chv_phy_control);
+
+	DRM_DEBUG_KMS("Enabled DPIO PHY%d (PHY_CONTROL=0x%08x)\n",
+		      phy, dev_priv->chv_phy_control);
+
+	assert_chv_phy_status(dev_priv);
+}
+
+static void chv_dpio_cmn_power_well_disable(struct drm_i915_private *dev_priv,
+					    struct i915_power_well *power_well)
+{
+	enum dpio_phy phy;
+
+	WARN_ON_ONCE(power_well->desc->id != VLV_DISP_PW_DPIO_CMN_BC &&
+		     power_well->desc->id != CHV_DISP_PW_DPIO_CMN_D);
+
+	if (power_well->desc->id == VLV_DISP_PW_DPIO_CMN_BC) {
+		phy = DPIO_PHY0;
+		assert_pll_disabled(dev_priv, PIPE_A);
+		assert_pll_disabled(dev_priv, PIPE_B);
+	} else {
+		phy = DPIO_PHY1;
+		assert_pll_disabled(dev_priv, PIPE_C);
+	}
+
+	dev_priv->chv_phy_control &= ~PHY_COM_LANE_RESET_DEASSERT(phy);
+	I915_WRITE(DISPLAY_PHY_CONTROL, dev_priv->chv_phy_control);
+
+	vlv_set_power_well(dev_priv, power_well, false);
+
+	DRM_DEBUG_KMS("Disabled DPIO PHY%d (PHY_CONTROL=0x%08x)\n",
+		      phy, dev_priv->chv_phy_control);
+
+	/* PHY is fully reset now, so we can enable the PHY state asserts */
+	dev_priv->chv_phy_assert[phy] = true;
+
+	assert_chv_phy_status(dev_priv);
+}
+
+static void assert_chv_phy_powergate(struct drm_i915_private *dev_priv, enum dpio_phy phy,
+				     enum dpio_channel ch, bool override, unsigned int mask)
+{
+	enum pipe pipe = phy == DPIO_PHY0 ? PIPE_A : PIPE_C;
+	u32 reg, val, expected, actual;
+
+	/*
+	 * The BIOS can leave the PHY is some weird state
+	 * where it doesn't fully power down some parts.
+	 * Disable the asserts until the PHY has been fully
+	 * reset (ie. the power well has been disabled at
+	 * least once).
+	 */
+	if (!dev_priv->chv_phy_assert[phy])
+		return;
+
+	if (ch == DPIO_CH0)
+		reg = _CHV_CMN_DW0_CH0;
+	else
+		reg = _CHV_CMN_DW6_CH1;
+
+	vlv_dpio_get(dev_priv);
+	val = vlv_dpio_read(dev_priv, pipe, reg);
+	vlv_dpio_put(dev_priv);
+
+	/*
+	 * This assumes !override is only used when the port is disabled.
+	 * All lanes should power down even without the override when
+	 * the port is disabled.
+	 */
+	if (!override || mask == 0xf) {
+		expected = DPIO_ALLDL_POWERDOWN | DPIO_ANYDL_POWERDOWN;
+		/*
+		 * If CH1 common lane is not active anymore
+		 * (eg. for pipe B DPLL) the entire channel will
+		 * shut down, which causes the common lane registers
+		 * to read as 0. That means we can't actually check
+		 * the lane power down status bits, but as the entire
+		 * register reads as 0 it's a good indication that the
+		 * channel is indeed entirely powered down.
+		 */
+		if (ch == DPIO_CH1 && val == 0)
+			expected = 0;
+	} else if (mask != 0x0) {
+		expected = DPIO_ANYDL_POWERDOWN;
+	} else {
+		expected = 0;
+	}
+
+	if (ch == DPIO_CH0)
+		actual = val >> DPIO_ANYDL_POWERDOWN_SHIFT_CH0;
+	else
+		actual = val >> DPIO_ANYDL_POWERDOWN_SHIFT_CH1;
+	actual &= DPIO_ALLDL_POWERDOWN | DPIO_ANYDL_POWERDOWN;
+
+	WARN(actual != expected,
+	     "Unexpected DPIO lane power down: all %d, any %d. Expected: all %d, any %d. (0x%x = 0x%08x)\n",
+	     !!(actual & DPIO_ALLDL_POWERDOWN), !!(actual & DPIO_ANYDL_POWERDOWN),
+	     !!(expected & DPIO_ALLDL_POWERDOWN), !!(expected & DPIO_ANYDL_POWERDOWN),
+	     reg, val);
+}
+
+bool chv_phy_powergate_ch(struct drm_i915_private *dev_priv, enum dpio_phy phy,
+			  enum dpio_channel ch, bool override)
+{
+	struct i915_power_domains *power_domains = &dev_priv->power_domains;
+	bool was_override;
+
+	mutex_lock(&power_domains->lock);
+
+	was_override = dev_priv->chv_phy_control & PHY_CH_POWER_DOWN_OVRD_EN(phy, ch);
+
+	if (override == was_override)
+		goto out;
+
+	if (override)
+		dev_priv->chv_phy_control |= PHY_CH_POWER_DOWN_OVRD_EN(phy, ch);
+	else
+		dev_priv->chv_phy_control &= ~PHY_CH_POWER_DOWN_OVRD_EN(phy, ch);
+
+	I915_WRITE(DISPLAY_PHY_CONTROL, dev_priv->chv_phy_control);
+
+	DRM_DEBUG_KMS("Power gating DPIO PHY%d CH%d (DPIO_PHY_CONTROL=0x%08x)\n",
+		      phy, ch, dev_priv->chv_phy_control);
+
+	assert_chv_phy_status(dev_priv);
+
+out:
+	mutex_unlock(&power_domains->lock);
+
+	return was_override;
+}
+
+void chv_phy_powergate_lanes(struct intel_encoder *encoder,
+			     bool override, unsigned int mask)
+{
+	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
+	struct i915_power_domains *power_domains = &dev_priv->power_domains;
+	enum dpio_phy phy = vlv_dport_to_phy(enc_to_dig_port(&encoder->base));
+	enum dpio_channel ch = vlv_dport_to_channel(enc_to_dig_port(&encoder->base));
+
+	mutex_lock(&power_domains->lock);
+
+	dev_priv->chv_phy_control &= ~PHY_CH_POWER_DOWN_OVRD(0xf, phy, ch);
+	dev_priv->chv_phy_control |= PHY_CH_POWER_DOWN_OVRD(mask, phy, ch);
+
+	if (override)
+		dev_priv->chv_phy_control |= PHY_CH_POWER_DOWN_OVRD_EN(phy, ch);
+	else
+		dev_priv->chv_phy_control &= ~PHY_CH_POWER_DOWN_OVRD_EN(phy, ch);
+
+	I915_WRITE(DISPLAY_PHY_CONTROL, dev_priv->chv_phy_control);
+
+	DRM_DEBUG_KMS("Power gating DPIO PHY%d CH%d lanes 0x%x (PHY_CONTROL=0x%08x)\n",
+		      phy, ch, mask, dev_priv->chv_phy_control);
+
+	assert_chv_phy_status(dev_priv);
+
+	assert_chv_phy_powergate(dev_priv, phy, ch, override, mask);
+
+	mutex_unlock(&power_domains->lock);
+}
+
+static bool chv_pipe_power_well_enabled(struct drm_i915_private *dev_priv,
+					struct i915_power_well *power_well)
+{
+	enum pipe pipe = PIPE_A;
+	bool enabled;
+	u32 state, ctrl;
+
+	vlv_punit_get(dev_priv);
+
+	state = vlv_punit_read(dev_priv, PUNIT_REG_DSPSSPM) & DP_SSS_MASK(pipe);
+	/*
+	 * We only ever set the power-on and power-gate states, anything
+	 * else is unexpected.
+	 */
+	WARN_ON(state != DP_SSS_PWR_ON(pipe) && state != DP_SSS_PWR_GATE(pipe));
+	enabled = state == DP_SSS_PWR_ON(pipe);
+
+	/*
+	 * A transient state at this point would mean some unexpected party
+	 * is poking at the power controls too.
+	 */
+	ctrl = vlv_punit_read(dev_priv, PUNIT_REG_DSPSSPM) & DP_SSC_MASK(pipe);
+	WARN_ON(ctrl << 16 != state);
+
+	vlv_punit_put(dev_priv);
+
+	return enabled;
+}
+
+static void chv_set_pipe_power_well(struct drm_i915_private *dev_priv,
+				    struct i915_power_well *power_well,
+				    bool enable)
+{
+	enum pipe pipe = PIPE_A;
+	u32 state;
+	u32 ctrl;
+
+	state = enable ? DP_SSS_PWR_ON(pipe) : DP_SSS_PWR_GATE(pipe);
+
+	vlv_punit_get(dev_priv);
+
+#define COND \
+	((vlv_punit_read(dev_priv, PUNIT_REG_DSPSSPM) & DP_SSS_MASK(pipe)) == state)
+
+	if (COND)
+		goto out;
+
+	ctrl = vlv_punit_read(dev_priv, PUNIT_REG_DSPSSPM);
+	ctrl &= ~DP_SSC_MASK(pipe);
+	ctrl |= enable ? DP_SSC_PWR_ON(pipe) : DP_SSC_PWR_GATE(pipe);
+	vlv_punit_write(dev_priv, PUNIT_REG_DSPSSPM, ctrl);
+
+	if (wait_for(COND, 100))
+		DRM_ERROR("timeout setting power well state %08x (%08x)\n",
+			  state,
+			  vlv_punit_read(dev_priv, PUNIT_REG_DSPSSPM));
+
+#undef COND
+
+out:
+	vlv_punit_put(dev_priv);
+}
+
+static void chv_pipe_power_well_enable(struct drm_i915_private *dev_priv,
+				       struct i915_power_well *power_well)
+{
+	chv_set_pipe_power_well(dev_priv, power_well, true);
+
+	vlv_display_power_well_init(dev_priv);
+}
+
+static void chv_pipe_power_well_disable(struct drm_i915_private *dev_priv,
+					struct i915_power_well *power_well)
+{
+	vlv_display_power_well_deinit(dev_priv);
+
+	chv_set_pipe_power_well(dev_priv, power_well, false);
+}
+
+static u64 __async_put_domains_mask(struct i915_power_domains *power_domains)
+{
+	return power_domains->async_put_domains[0] |
+	       power_domains->async_put_domains[1];
+}
+
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_RUNTIME_PM)
+
+static bool
+assert_async_put_domain_masks_disjoint(struct i915_power_domains *power_domains)
+{
+	return !WARN_ON(power_domains->async_put_domains[0] &
+			power_domains->async_put_domains[1]);
+}
+
+static bool
+__async_put_domains_state_ok(struct i915_power_domains *power_domains)
+{
+	enum intel_display_power_domain domain;
+	bool err = false;
+
+	err |= !assert_async_put_domain_masks_disjoint(power_domains);
+	err |= WARN_ON(!!power_domains->async_put_wakeref !=
+		       !!__async_put_domains_mask(power_domains));
+
+	for_each_power_domain(domain, __async_put_domains_mask(power_domains))
+		err |= WARN_ON(power_domains->domain_use_count[domain] != 1);
+
+	return !err;
+}
+
+static void print_power_domains(struct i915_power_domains *power_domains,
+				const char *prefix, u64 mask)
+{
+	enum intel_display_power_domain domain;
+
+	DRM_DEBUG_DRIVER("%s (%lu):\n", prefix, hweight64(mask));
+	for_each_power_domain(domain, mask)
+		DRM_DEBUG_DRIVER("%s use_count %d\n",
+				 intel_display_power_domain_str(domain),
+				 power_domains->domain_use_count[domain]);
+}
+
+static void
+print_async_put_domains_state(struct i915_power_domains *power_domains)
+{
+	DRM_DEBUG_DRIVER("async_put_wakeref %u\n",
+			 power_domains->async_put_wakeref);
+
+	print_power_domains(power_domains, "async_put_domains[0]",
+			    power_domains->async_put_domains[0]);
+	print_power_domains(power_domains, "async_put_domains[1]",
+			    power_domains->async_put_domains[1]);
+}
+
+static void
+verify_async_put_domains_state(struct i915_power_domains *power_domains)
+{
+	if (!__async_put_domains_state_ok(power_domains))
+		print_async_put_domains_state(power_domains);
+}
+
+#else
+
+static void
+assert_async_put_domain_masks_disjoint(struct i915_power_domains *power_domains)
+{
+}
+
+static void
+verify_async_put_domains_state(struct i915_power_domains *power_domains)
+{
+}
+
+#endif /* CONFIG_DRM_I915_DEBUG_RUNTIME_PM */
+
+static u64 async_put_domains_mask(struct i915_power_domains *power_domains)
+{
+	assert_async_put_domain_masks_disjoint(power_domains);
+
+	return __async_put_domains_mask(power_domains);
+}
+
+static void
+async_put_domains_clear_domain(struct i915_power_domains *power_domains,
+			       enum intel_display_power_domain domain)
+{
+	assert_async_put_domain_masks_disjoint(power_domains);
+
+	power_domains->async_put_domains[0] &= ~BIT_ULL(domain);
+	power_domains->async_put_domains[1] &= ~BIT_ULL(domain);
+}
+
+static bool
+intel_display_power_grab_async_put_ref(struct drm_i915_private *dev_priv,
+				       enum intel_display_power_domain domain)
+{
+	struct i915_power_domains *power_domains = &dev_priv->power_domains;
+	bool ret = false;
+
+	if (!(async_put_domains_mask(power_domains) & BIT_ULL(domain)))
+		goto out_verify;
+
+	async_put_domains_clear_domain(power_domains, domain);
+
+	ret = true;
+
+	if (async_put_domains_mask(power_domains))
+		goto out_verify;
+
+	cancel_delayed_work(&power_domains->async_put_work);
+	intel_runtime_pm_put_raw(&dev_priv->runtime_pm,
+				 fetch_and_zero(&power_domains->async_put_wakeref));
+out_verify:
+	verify_async_put_domains_state(power_domains);
+
+	return ret;
+}
+
+static void
+__intel_display_power_get_domain(struct drm_i915_private *dev_priv,
+				 enum intel_display_power_domain domain)
+{
+	struct i915_power_domains *power_domains = &dev_priv->power_domains;
+	struct i915_power_well *power_well;
+
+	if (intel_display_power_grab_async_put_ref(dev_priv, domain))
+		return;
+
+	for_each_power_domain_well(dev_priv, power_well, BIT_ULL(domain))
+		intel_power_well_get(dev_priv, power_well);
+
+	power_domains->domain_use_count[domain]++;
+}
+
+/**
+ * intel_display_power_get - grab a power domain reference
+ * @dev_priv: i915 device instance
+ * @domain: power domain to reference
+ *
+ * This function grabs a power domain reference for @domain and ensures that the
+ * power domain and all its parents are powered up. Therefore users should only
+ * grab a reference to the innermost power domain they need.
+ *
+ * Any power domain reference obtained by this function must have a symmetric
+ * call to intel_display_power_put() to release the reference again.
+ */
+intel_wakeref_t intel_display_power_get(struct drm_i915_private *dev_priv,
+					enum intel_display_power_domain domain)
+{
+	struct i915_power_domains *power_domains = &dev_priv->power_domains;
+	intel_wakeref_t wakeref = intel_runtime_pm_get(&dev_priv->runtime_pm);
+
+	mutex_lock(&power_domains->lock);
+	__intel_display_power_get_domain(dev_priv, domain);
+	mutex_unlock(&power_domains->lock);
+
+	return wakeref;
+}
+
+/**
+ * intel_display_power_get_if_enabled - grab a reference for an enabled display power domain
+ * @dev_priv: i915 device instance
+ * @domain: power domain to reference
+ *
+ * This function grabs a power domain reference for @domain and ensures that the
+ * power domain and all its parents are powered up. Therefore users should only
+ * grab a reference to the innermost power domain they need.
+ *
+ * Any power domain reference obtained by this function must have a symmetric
+ * call to intel_display_power_put() to release the reference again.
+ */
+intel_wakeref_t
+intel_display_power_get_if_enabled(struct drm_i915_private *dev_priv,
+				   enum intel_display_power_domain domain)
+{
+	struct i915_power_domains *power_domains = &dev_priv->power_domains;
+	intel_wakeref_t wakeref;
+	bool is_enabled;
+
+	wakeref = intel_runtime_pm_get_if_in_use(&dev_priv->runtime_pm);
+	if (!wakeref)
+		return false;
+
+	mutex_lock(&power_domains->lock);
+
+	if (__intel_display_power_is_enabled(dev_priv, domain)) {
+		__intel_display_power_get_domain(dev_priv, domain);
+		is_enabled = true;
+	} else {
+		is_enabled = false;
+	}
+
+	mutex_unlock(&power_domains->lock);
+
+	if (!is_enabled) {
+		intel_runtime_pm_put(&dev_priv->runtime_pm, wakeref);
+		wakeref = 0;
+	}
+
+	return wakeref;
+}
+
+static void
+__intel_display_power_put_domain(struct drm_i915_private *dev_priv,
+				 enum intel_display_power_domain domain)
+{
+	struct i915_power_domains *power_domains;
+	struct i915_power_well *power_well;
+	const char *name = intel_display_power_domain_str(domain);
+
+	power_domains = &dev_priv->power_domains;
+
+	WARN(!power_domains->domain_use_count[domain],
+	     "Use count on domain %s is already zero\n",
+	     name);
+	WARN(async_put_domains_mask(power_domains) & BIT_ULL(domain),
+	     "Async disabling of domain %s is pending\n",
+	     name);
+
+	power_domains->domain_use_count[domain]--;
+
+	for_each_power_domain_well_reverse(dev_priv, power_well, BIT_ULL(domain))
+		intel_power_well_put(dev_priv, power_well);
+}
+
+static void __intel_display_power_put(struct drm_i915_private *dev_priv,
+				      enum intel_display_power_domain domain)
+{
+	struct i915_power_domains *power_domains = &dev_priv->power_domains;
+
+	mutex_lock(&power_domains->lock);
+	__intel_display_power_put_domain(dev_priv, domain);
+	mutex_unlock(&power_domains->lock);
+}
+
+/**
+ * intel_display_power_put_unchecked - release an unchecked power domain reference
+ * @dev_priv: i915 device instance
+ * @domain: power domain to reference
+ *
+ * This function drops the power domain reference obtained by
+ * intel_display_power_get() and might power down the corresponding hardware
+ * block right away if this is the last reference.
+ *
+ * This function exists only for historical reasons and should be avoided in
+ * new code, as the correctness of its use cannot be checked. Always use
+ * intel_display_power_put() instead.
+ */
+void intel_display_power_put_unchecked(struct drm_i915_private *dev_priv,
+				       enum intel_display_power_domain domain)
+{
+	__intel_display_power_put(dev_priv, domain);
+	intel_runtime_pm_put_unchecked(&dev_priv->runtime_pm);
+}
+
+static void
+queue_async_put_domains_work(struct i915_power_domains *power_domains,
+			     intel_wakeref_t wakeref)
+{
+	WARN_ON(power_domains->async_put_wakeref);
+	power_domains->async_put_wakeref = wakeref;
+	WARN_ON(!queue_delayed_work(system_unbound_wq,
+				    &power_domains->async_put_work,
+				    msecs_to_jiffies(100)));
+}
+
+static void
+release_async_put_domains(struct i915_power_domains *power_domains, u64 mask)
+{
+	struct drm_i915_private *dev_priv =
+		container_of(power_domains, struct drm_i915_private,
+			     power_domains);
+	struct intel_runtime_pm *rpm = &dev_priv->runtime_pm;
+	enum intel_display_power_domain domain;
+	intel_wakeref_t wakeref;
+
+	/*
+	 * The caller must hold already raw wakeref, upgrade that to a proper
+	 * wakeref to make the state checker happy about the HW access during
+	 * power well disabling.
+	 */
+	assert_rpm_raw_wakeref_held(rpm);
+	wakeref = intel_runtime_pm_get(rpm);
+
+	for_each_power_domain(domain, mask) {
+		/* Clear before put, so put's sanity check is happy. */
+		async_put_domains_clear_domain(power_domains, domain);
+		__intel_display_power_put_domain(dev_priv, domain);
+	}
+
+	intel_runtime_pm_put(rpm, wakeref);
+}
+
+static void
+intel_display_power_put_async_work(struct work_struct *work)
+{
+	struct drm_i915_private *dev_priv =
+		container_of(work, struct drm_i915_private,
+			     power_domains.async_put_work.work);
+	struct i915_power_domains *power_domains = &dev_priv->power_domains;
+	struct intel_runtime_pm *rpm = &dev_priv->runtime_pm;
+	intel_wakeref_t new_work_wakeref = intel_runtime_pm_get_raw(rpm);
+	intel_wakeref_t old_work_wakeref = 0;
+
+	mutex_lock(&power_domains->lock);
+
+	/*
+	 * Bail out if all the domain refs pending to be released were grabbed
+	 * by subsequent gets or a flush_work.
+	 */
+	old_work_wakeref = fetch_and_zero(&power_domains->async_put_wakeref);
+	if (!old_work_wakeref)
+		goto out_verify;
+
+	release_async_put_domains(power_domains,
+				  power_domains->async_put_domains[0]);
+
+	/* Requeue the work if more domains were async put meanwhile. */
+	if (power_domains->async_put_domains[1]) {
+		power_domains->async_put_domains[0] =
+			fetch_and_zero(&power_domains->async_put_domains[1]);
+		queue_async_put_domains_work(power_domains,
+					     fetch_and_zero(&new_work_wakeref));
+	}
+
+out_verify:
+	verify_async_put_domains_state(power_domains);
+
+	mutex_unlock(&power_domains->lock);
+
+	if (old_work_wakeref)
+		intel_runtime_pm_put_raw(rpm, old_work_wakeref);
+	if (new_work_wakeref)
+		intel_runtime_pm_put_raw(rpm, new_work_wakeref);
+}
+
+/**
+ * intel_display_power_put_async - release a power domain reference asynchronously
+ * @i915: i915 device instance
+ * @domain: power domain to reference
+ * @wakeref: wakeref acquired for the reference that is being released
+ *
+ * This function drops the power domain reference obtained by
+ * intel_display_power_get*() and schedules a work to power down the
+ * corresponding hardware block if this is the last reference.
+ */
+void __intel_display_power_put_async(struct drm_i915_private *i915,
+				     enum intel_display_power_domain domain,
+				     intel_wakeref_t wakeref)
+{
+	struct i915_power_domains *power_domains = &i915->power_domains;
+	struct intel_runtime_pm *rpm = &i915->runtime_pm;
+	intel_wakeref_t work_wakeref = intel_runtime_pm_get_raw(rpm);
+
+	mutex_lock(&power_domains->lock);
+
+	if (power_domains->domain_use_count[domain] > 1) {
+		__intel_display_power_put_domain(i915, domain);
+
+		goto out_verify;
+	}
+
+	WARN_ON(power_domains->domain_use_count[domain] != 1);
+
+	/* Let a pending work requeue itself or queue a new one. */
+	if (power_domains->async_put_wakeref) {
+		power_domains->async_put_domains[1] |= BIT_ULL(domain);
+	} else {
+		power_domains->async_put_domains[0] |= BIT_ULL(domain);
+		queue_async_put_domains_work(power_domains,
+					     fetch_and_zero(&work_wakeref));
+	}
+
+out_verify:
+	verify_async_put_domains_state(power_domains);
+
+	mutex_unlock(&power_domains->lock);
+
+	if (work_wakeref)
+		intel_runtime_pm_put_raw(rpm, work_wakeref);
+
+	intel_runtime_pm_put(rpm, wakeref);
+}
+
+/**
+ * intel_display_power_flush_work - flushes the async display power disabling work
+ * @i915: i915 device instance
+ *
+ * Flushes any pending work that was scheduled by a preceding
+ * intel_display_power_put_async() call, completing the disabling of the
+ * corresponding power domains.
+ *
+ * Note that the work handler function may still be running after this
+ * function returns; to ensure that the work handler isn't running use
+ * intel_display_power_flush_work_sync() instead.
+ */
+void intel_display_power_flush_work(struct drm_i915_private *i915)
+{
+	struct i915_power_domains *power_domains = &i915->power_domains;
+	intel_wakeref_t work_wakeref;
+
+	mutex_lock(&power_domains->lock);
+
+	work_wakeref = fetch_and_zero(&power_domains->async_put_wakeref);
+	if (!work_wakeref)
+		goto out_verify;
+
+	release_async_put_domains(power_domains,
+				  async_put_domains_mask(power_domains));
+	cancel_delayed_work(&power_domains->async_put_work);
+
+out_verify:
+	verify_async_put_domains_state(power_domains);
+
+	mutex_unlock(&power_domains->lock);
+
+	if (work_wakeref)
+		intel_runtime_pm_put_raw(&i915->runtime_pm, work_wakeref);
+}
+
+/**
+ * intel_display_power_flush_work_sync - flushes and syncs the async display power disabling work
+ * @i915: i915 device instance
+ *
+ * Like intel_display_power_flush_work(), but also ensure that the work
+ * handler function is not running any more when this function returns.
+ */
+static void
+intel_display_power_flush_work_sync(struct drm_i915_private *i915)
+{
+	struct i915_power_domains *power_domains = &i915->power_domains;
+
+	intel_display_power_flush_work(i915);
+	cancel_delayed_work_sync(&power_domains->async_put_work);
+
+	verify_async_put_domains_state(power_domains);
+
+	WARN_ON(power_domains->async_put_wakeref);
+}
+
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_RUNTIME_PM)
+/**
+ * intel_display_power_put - release a power domain reference
+ * @dev_priv: i915 device instance
+ * @domain: power domain to reference
+ * @wakeref: wakeref acquired for the reference that is being released
+ *
+ * This function drops the power domain reference obtained by
+ * intel_display_power_get() and might power down the corresponding hardware
+ * block right away if this is the last reference.
+ */
+void intel_display_power_put(struct drm_i915_private *dev_priv,
+			     enum intel_display_power_domain domain,
+			     intel_wakeref_t wakeref)
+{
+	__intel_display_power_put(dev_priv, domain);
+	intel_runtime_pm_put(&dev_priv->runtime_pm, wakeref);
+}
+#endif
+
+#define I830_PIPES_POWER_DOMAINS (		\
+	BIT_ULL(POWER_DOMAIN_PIPE_A) |		\
+	BIT_ULL(POWER_DOMAIN_PIPE_B) |		\
+	BIT_ULL(POWER_DOMAIN_PIPE_A_PANEL_FITTER) |	\
+	BIT_ULL(POWER_DOMAIN_PIPE_B_PANEL_FITTER) |	\
+	BIT_ULL(POWER_DOMAIN_TRANSCODER_A) |	\
+	BIT_ULL(POWER_DOMAIN_TRANSCODER_B) |	\
+	BIT_ULL(POWER_DOMAIN_INIT))
+
+#define VLV_DISPLAY_POWER_DOMAINS (		\
+	BIT_ULL(POWER_DOMAIN_DISPLAY_CORE) |	\
+	BIT_ULL(POWER_DOMAIN_PIPE_A) |		\
+	BIT_ULL(POWER_DOMAIN_PIPE_B) |		\
+	BIT_ULL(POWER_DOMAIN_PIPE_A_PANEL_FITTER) |	\
+	BIT_ULL(POWER_DOMAIN_PIPE_B_PANEL_FITTER) |	\
+	BIT_ULL(POWER_DOMAIN_TRANSCODER_A) |	\
+	BIT_ULL(POWER_DOMAIN_TRANSCODER_B) |	\
+	BIT_ULL(POWER_DOMAIN_PORT_DDI_B_LANES) |	\
+	BIT_ULL(POWER_DOMAIN_PORT_DDI_C_LANES) |	\
+	BIT_ULL(POWER_DOMAIN_PORT_DSI) |		\
+	BIT_ULL(POWER_DOMAIN_PORT_CRT) |		\
+	BIT_ULL(POWER_DOMAIN_VGA) |			\
+	BIT_ULL(POWER_DOMAIN_AUDIO) |		\
+	BIT_ULL(POWER_DOMAIN_AUX_B) |		\
+	BIT_ULL(POWER_DOMAIN_AUX_C) |		\
+	BIT_ULL(POWER_DOMAIN_GMBUS) |		\
+	BIT_ULL(POWER_DOMAIN_INIT))
+
+#define VLV_DPIO_CMN_BC_POWER_DOMAINS (		\
+	BIT_ULL(POWER_DOMAIN_PORT_DDI_B_LANES) |	\
+	BIT_ULL(POWER_DOMAIN_PORT_DDI_C_LANES) |	\
+	BIT_ULL(POWER_DOMAIN_PORT_CRT) |		\
+	BIT_ULL(POWER_DOMAIN_AUX_B) |		\
+	BIT_ULL(POWER_DOMAIN_AUX_C) |		\
+	BIT_ULL(POWER_DOMAIN_INIT))
+
+#define VLV_DPIO_TX_B_LANES_01_POWER_DOMAINS (	\
+	BIT_ULL(POWER_DOMAIN_PORT_DDI_B_LANES) |	\
+	BIT_ULL(POWER_DOMAIN_AUX_B) |		\
+	BIT_ULL(POWER_DOMAIN_INIT))
+
+#define VLV_DPIO_TX_B_LANES_23_POWER_DOMAINS (	\
+	BIT_ULL(POWER_DOMAIN_PORT_DDI_B_LANES) |	\
+	BIT_ULL(POWER_DOMAIN_AUX_B) |		\
+	BIT_ULL(POWER_DOMAIN_INIT))
+
+#define VLV_DPIO_TX_C_LANES_01_POWER_DOMAINS (	\
+	BIT_ULL(POWER_DOMAIN_PORT_DDI_C_LANES) |	\
+	BIT_ULL(POWER_DOMAIN_AUX_C) |		\
+	BIT_ULL(POWER_DOMAIN_INIT))
+
+#define VLV_DPIO_TX_C_LANES_23_POWER_DOMAINS (	\
+	BIT_ULL(POWER_DOMAIN_PORT_DDI_C_LANES) |	\
+	BIT_ULL(POWER_DOMAIN_AUX_C) |		\
+	BIT_ULL(POWER_DOMAIN_INIT))
+
+#define CHV_DISPLAY_POWER_DOMAINS (		\
+	BIT_ULL(POWER_DOMAIN_DISPLAY_CORE) |	\
+	BIT_ULL(POWER_DOMAIN_PIPE_A) |		\
+	BIT_ULL(POWER_DOMAIN_PIPE_B) |		\
+	BIT_ULL(POWER_DOMAIN_PIPE_C) |		\
+	BIT_ULL(POWER_DOMAIN_PIPE_A_PANEL_FITTER) |	\
+	BIT_ULL(POWER_DOMAIN_PIPE_B_PANEL_FITTER) |	\
+	BIT_ULL(POWER_DOMAIN_PIPE_C_PANEL_FITTER) |	\
+	BIT_ULL(POWER_DOMAIN_TRANSCODER_A) |	\
+	BIT_ULL(POWER_DOMAIN_TRANSCODER_B) |	\
+	BIT_ULL(POWER_DOMAIN_TRANSCODER_C) |	\
+	BIT_ULL(POWER_DOMAIN_PORT_DDI_B_LANES) |	\
+	BIT_ULL(POWER_DOMAIN_PORT_DDI_C_LANES) |	\
+	BIT_ULL(POWER_DOMAIN_PORT_DDI_D_LANES) |	\
+	BIT_ULL(POWER_DOMAIN_PORT_DSI) |		\
+	BIT_ULL(POWER_DOMAIN_VGA) |			\
+	BIT_ULL(POWER_DOMAIN_AUDIO) |		\
+	BIT_ULL(POWER_DOMAIN_AUX_B) |		\
+	BIT_ULL(POWER_DOMAIN_AUX_C) |		\
+	BIT_ULL(POWER_DOMAIN_AUX_D) |		\
+	BIT_ULL(POWER_DOMAIN_GMBUS) |		\
+	BIT_ULL(POWER_DOMAIN_INIT))
+
+#define CHV_DPIO_CMN_BC_POWER_DOMAINS (		\
+	BIT_ULL(POWER_DOMAIN_PORT_DDI_B_LANES) |	\
+	BIT_ULL(POWER_DOMAIN_PORT_DDI_C_LANES) |	\
+	BIT_ULL(POWER_DOMAIN_AUX_B) |		\
+	BIT_ULL(POWER_DOMAIN_AUX_C) |		\
+	BIT_ULL(POWER_DOMAIN_INIT))
+
+#define CHV_DPIO_CMN_D_POWER_DOMAINS (		\
+	BIT_ULL(POWER_DOMAIN_PORT_DDI_D_LANES) |	\
+	BIT_ULL(POWER_DOMAIN_AUX_D) |		\
+	BIT_ULL(POWER_DOMAIN_INIT))
+
+#define HSW_DISPLAY_POWER_DOMAINS (			\
+	BIT_ULL(POWER_DOMAIN_PIPE_B) |			\
+	BIT_ULL(POWER_DOMAIN_PIPE_C) |			\
+	BIT_ULL(POWER_DOMAIN_PIPE_A_PANEL_FITTER) |		\
+	BIT_ULL(POWER_DOMAIN_PIPE_B_PANEL_FITTER) |		\
+	BIT_ULL(POWER_DOMAIN_PIPE_C_PANEL_FITTER) |		\
+	BIT_ULL(POWER_DOMAIN_TRANSCODER_A) |		\
+	BIT_ULL(POWER_DOMAIN_TRANSCODER_B) |		\
+	BIT_ULL(POWER_DOMAIN_TRANSCODER_C) |		\
+	BIT_ULL(POWER_DOMAIN_PORT_DDI_B_LANES) |		\
+	BIT_ULL(POWER_DOMAIN_PORT_DDI_C_LANES) |		\
+	BIT_ULL(POWER_DOMAIN_PORT_DDI_D_LANES) |		\
+	BIT_ULL(POWER_DOMAIN_PORT_CRT) | /* DDI E */	\
+	BIT_ULL(POWER_DOMAIN_VGA) |				\
+	BIT_ULL(POWER_DOMAIN_AUDIO) |			\
+	BIT_ULL(POWER_DOMAIN_INIT))
+
+#define BDW_DISPLAY_POWER_DOMAINS (			\
+	BIT_ULL(POWER_DOMAIN_PIPE_B) |			\
+	BIT_ULL(POWER_DOMAIN_PIPE_C) |			\
+	BIT_ULL(POWER_DOMAIN_PIPE_B_PANEL_FITTER) |		\
+	BIT_ULL(POWER_DOMAIN_PIPE_C_PANEL_FITTER) |		\
+	BIT_ULL(POWER_DOMAIN_TRANSCODER_A) |		\
+	BIT_ULL(POWER_DOMAIN_TRANSCODER_B) |		\
+	BIT_ULL(POWER_DOMAIN_TRANSCODER_C) |		\
+	BIT_ULL(POWER_DOMAIN_PORT_DDI_B_LANES) |		\
+	BIT_ULL(POWER_DOMAIN_PORT_DDI_C_LANES) |		\
+	BIT_ULL(POWER_DOMAIN_PORT_DDI_D_LANES) |		\
+	BIT_ULL(POWER_DOMAIN_PORT_CRT) | /* DDI E */	\
+	BIT_ULL(POWER_DOMAIN_VGA) |				\
+	BIT_ULL(POWER_DOMAIN_AUDIO) |			\
+	BIT_ULL(POWER_DOMAIN_INIT))
+
+#define SKL_DISPLAY_POWERWELL_2_POWER_DOMAINS (		\
+	BIT_ULL(POWER_DOMAIN_TRANSCODER_A) |		\
+	BIT_ULL(POWER_DOMAIN_PIPE_B) |			\
+	BIT_ULL(POWER_DOMAIN_TRANSCODER_B) |		\
+	BIT_ULL(POWER_DOMAIN_PIPE_C) |			\
+	BIT_ULL(POWER_DOMAIN_TRANSCODER_C) |		\
+	BIT_ULL(POWER_DOMAIN_PIPE_B_PANEL_FITTER) |		\
+	BIT_ULL(POWER_DOMAIN_PIPE_C_PANEL_FITTER) |		\
+	BIT_ULL(POWER_DOMAIN_PORT_DDI_B_LANES) |		\
+	BIT_ULL(POWER_DOMAIN_PORT_DDI_C_LANES) |		\
+	BIT_ULL(POWER_DOMAIN_PORT_DDI_D_LANES) |		\
+	BIT_ULL(POWER_DOMAIN_PORT_DDI_E_LANES) |		\
+	BIT_ULL(POWER_DOMAIN_AUX_B) |                       \
+	BIT_ULL(POWER_DOMAIN_AUX_C) |			\
+	BIT_ULL(POWER_DOMAIN_AUX_D) |			\
+	BIT_ULL(POWER_DOMAIN_AUDIO) |			\
+	BIT_ULL(POWER_DOMAIN_VGA) |				\
+	BIT_ULL(POWER_DOMAIN_INIT))
+#define SKL_DISPLAY_DDI_IO_A_E_POWER_DOMAINS (		\
+	BIT_ULL(POWER_DOMAIN_PORT_DDI_A_IO) |		\
+	BIT_ULL(POWER_DOMAIN_PORT_DDI_E_IO) |		\
+	BIT_ULL(POWER_DOMAIN_INIT))
+#define SKL_DISPLAY_DDI_IO_B_POWER_DOMAINS (		\
+	BIT_ULL(POWER_DOMAIN_PORT_DDI_B_IO) |		\
+	BIT_ULL(POWER_DOMAIN_INIT))
+#define SKL_DISPLAY_DDI_IO_C_POWER_DOMAINS (		\
+	BIT_ULL(POWER_DOMAIN_PORT_DDI_C_IO) |		\
+	BIT_ULL(POWER_DOMAIN_INIT))
+#define SKL_DISPLAY_DDI_IO_D_POWER_DOMAINS (		\
+	BIT_ULL(POWER_DOMAIN_PORT_DDI_D_IO) |		\
+	BIT_ULL(POWER_DOMAIN_INIT))
+#define SKL_DISPLAY_DC_OFF_POWER_DOMAINS (		\
+	SKL_DISPLAY_POWERWELL_2_POWER_DOMAINS |		\
+	BIT_ULL(POWER_DOMAIN_GT_IRQ) |			\
+	BIT_ULL(POWER_DOMAIN_MODESET) |			\
+	BIT_ULL(POWER_DOMAIN_AUX_A) |			\
+	BIT_ULL(POWER_DOMAIN_INIT))
+
+#define BXT_DISPLAY_POWERWELL_2_POWER_DOMAINS (		\
+	BIT_ULL(POWER_DOMAIN_TRANSCODER_A) |		\
+	BIT_ULL(POWER_DOMAIN_PIPE_B) |			\
+	BIT_ULL(POWER_DOMAIN_TRANSCODER_B) |		\
+	BIT_ULL(POWER_DOMAIN_PIPE_C) |			\
+	BIT_ULL(POWER_DOMAIN_TRANSCODER_C) |		\
+	BIT_ULL(POWER_DOMAIN_PIPE_B_PANEL_FITTER) |		\
+	BIT_ULL(POWER_DOMAIN_PIPE_C_PANEL_FITTER) |		\
+	BIT_ULL(POWER_DOMAIN_PORT_DDI_B_LANES) |		\
+	BIT_ULL(POWER_DOMAIN_PORT_DDI_C_LANES) |		\
+	BIT_ULL(POWER_DOMAIN_AUX_B) |			\
+	BIT_ULL(POWER_DOMAIN_AUX_C) |			\
+	BIT_ULL(POWER_DOMAIN_AUDIO) |			\
+	BIT_ULL(POWER_DOMAIN_VGA) |				\
+	BIT_ULL(POWER_DOMAIN_INIT))
+#define BXT_DISPLAY_DC_OFF_POWER_DOMAINS (		\
+	BXT_DISPLAY_POWERWELL_2_POWER_DOMAINS |		\
+	BIT_ULL(POWER_DOMAIN_GT_IRQ) |			\
+	BIT_ULL(POWER_DOMAIN_MODESET) |			\
+	BIT_ULL(POWER_DOMAIN_AUX_A) |			\
+	BIT_ULL(POWER_DOMAIN_GMBUS) |			\
+	BIT_ULL(POWER_DOMAIN_INIT))
+#define BXT_DPIO_CMN_A_POWER_DOMAINS (			\
+	BIT_ULL(POWER_DOMAIN_PORT_DDI_A_LANES) |		\
+	BIT_ULL(POWER_DOMAIN_AUX_A) |			\
+	BIT_ULL(POWER_DOMAIN_INIT))
+#define BXT_DPIO_CMN_BC_POWER_DOMAINS (			\
+	BIT_ULL(POWER_DOMAIN_PORT_DDI_B_LANES) |		\
+	BIT_ULL(POWER_DOMAIN_PORT_DDI_C_LANES) |		\
+	BIT_ULL(POWER_DOMAIN_AUX_B) |			\
+	BIT_ULL(POWER_DOMAIN_AUX_C) |			\
+	BIT_ULL(POWER_DOMAIN_INIT))
+
+#define GLK_DISPLAY_POWERWELL_2_POWER_DOMAINS (		\
+	BIT_ULL(POWER_DOMAIN_TRANSCODER_A) |		\
+	BIT_ULL(POWER_DOMAIN_PIPE_B) |			\
+	BIT_ULL(POWER_DOMAIN_TRANSCODER_B) |		\
+	BIT_ULL(POWER_DOMAIN_PIPE_C) |			\
+	BIT_ULL(POWER_DOMAIN_TRANSCODER_C) |		\
+	BIT_ULL(POWER_DOMAIN_PIPE_B_PANEL_FITTER) |		\
+	BIT_ULL(POWER_DOMAIN_PIPE_C_PANEL_FITTER) |		\
+	BIT_ULL(POWER_DOMAIN_PORT_DDI_B_LANES) |		\
+	BIT_ULL(POWER_DOMAIN_PORT_DDI_C_LANES) |		\
+	BIT_ULL(POWER_DOMAIN_AUX_B) |                       \
+	BIT_ULL(POWER_DOMAIN_AUX_C) |			\
+	BIT_ULL(POWER_DOMAIN_AUDIO) |			\
+	BIT_ULL(POWER_DOMAIN_VGA) |				\
+	BIT_ULL(POWER_DOMAIN_INIT))
+#define GLK_DISPLAY_DDI_IO_A_POWER_DOMAINS (		\
+	BIT_ULL(POWER_DOMAIN_PORT_DDI_A_IO))
+#define GLK_DISPLAY_DDI_IO_B_POWER_DOMAINS (		\
+	BIT_ULL(POWER_DOMAIN_PORT_DDI_B_IO))
+#define GLK_DISPLAY_DDI_IO_C_POWER_DOMAINS (		\
+	BIT_ULL(POWER_DOMAIN_PORT_DDI_C_IO))
+#define GLK_DPIO_CMN_A_POWER_DOMAINS (			\
+	BIT_ULL(POWER_DOMAIN_PORT_DDI_A_LANES) |		\
+	BIT_ULL(POWER_DOMAIN_AUX_A) |			\
+	BIT_ULL(POWER_DOMAIN_INIT))
+#define GLK_DPIO_CMN_B_POWER_DOMAINS (			\
+	BIT_ULL(POWER_DOMAIN_PORT_DDI_B_LANES) |		\
+	BIT_ULL(POWER_DOMAIN_AUX_B) |			\
+	BIT_ULL(POWER_DOMAIN_INIT))
+#define GLK_DPIO_CMN_C_POWER_DOMAINS (			\
+	BIT_ULL(POWER_DOMAIN_PORT_DDI_C_LANES) |		\
+	BIT_ULL(POWER_DOMAIN_AUX_C) |			\
+	BIT_ULL(POWER_DOMAIN_INIT))
+#define GLK_DISPLAY_AUX_A_POWER_DOMAINS (		\
+	BIT_ULL(POWER_DOMAIN_AUX_A) |		\
+	BIT_ULL(POWER_DOMAIN_AUX_IO_A) |		\
+	BIT_ULL(POWER_DOMAIN_INIT))
+#define GLK_DISPLAY_AUX_B_POWER_DOMAINS (		\
+	BIT_ULL(POWER_DOMAIN_AUX_B) |		\
+	BIT_ULL(POWER_DOMAIN_INIT))
+#define GLK_DISPLAY_AUX_C_POWER_DOMAINS (		\
+	BIT_ULL(POWER_DOMAIN_AUX_C) |		\
+	BIT_ULL(POWER_DOMAIN_INIT))
+#define GLK_DISPLAY_DC_OFF_POWER_DOMAINS (		\
+	GLK_DISPLAY_POWERWELL_2_POWER_DOMAINS |		\
+	BIT_ULL(POWER_DOMAIN_GT_IRQ) |			\
+	BIT_ULL(POWER_DOMAIN_MODESET) |			\
+	BIT_ULL(POWER_DOMAIN_AUX_A) |			\
+	BIT_ULL(POWER_DOMAIN_GMBUS) |			\
+	BIT_ULL(POWER_DOMAIN_INIT))
+
+#define CNL_DISPLAY_POWERWELL_2_POWER_DOMAINS (		\
+	BIT_ULL(POWER_DOMAIN_TRANSCODER_A) |		\
+	BIT_ULL(POWER_DOMAIN_PIPE_B) |			\
+	BIT_ULL(POWER_DOMAIN_TRANSCODER_B) |		\
+	BIT_ULL(POWER_DOMAIN_PIPE_C) |			\
+	BIT_ULL(POWER_DOMAIN_TRANSCODER_C) |		\
+	BIT_ULL(POWER_DOMAIN_PIPE_B_PANEL_FITTER) |		\
+	BIT_ULL(POWER_DOMAIN_PIPE_C_PANEL_FITTER) |		\
+	BIT_ULL(POWER_DOMAIN_PORT_DDI_B_LANES) |		\
+	BIT_ULL(POWER_DOMAIN_PORT_DDI_C_LANES) |		\
+	BIT_ULL(POWER_DOMAIN_PORT_DDI_D_LANES) |		\
+	BIT_ULL(POWER_DOMAIN_PORT_DDI_F_LANES) |		\
+	BIT_ULL(POWER_DOMAIN_AUX_B) |                       \
+	BIT_ULL(POWER_DOMAIN_AUX_C) |			\
+	BIT_ULL(POWER_DOMAIN_AUX_D) |			\
+	BIT_ULL(POWER_DOMAIN_AUX_F) |			\
+	BIT_ULL(POWER_DOMAIN_AUDIO) |			\
+	BIT_ULL(POWER_DOMAIN_VGA) |				\
+	BIT_ULL(POWER_DOMAIN_INIT))
+#define CNL_DISPLAY_DDI_A_IO_POWER_DOMAINS (		\
+	BIT_ULL(POWER_DOMAIN_PORT_DDI_A_IO) |		\
+	BIT_ULL(POWER_DOMAIN_INIT))
+#define CNL_DISPLAY_DDI_B_IO_POWER_DOMAINS (		\
+	BIT_ULL(POWER_DOMAIN_PORT_DDI_B_IO) |		\
+	BIT_ULL(POWER_DOMAIN_INIT))
+#define CNL_DISPLAY_DDI_C_IO_POWER_DOMAINS (		\
+	BIT_ULL(POWER_DOMAIN_PORT_DDI_C_IO) |		\
+	BIT_ULL(POWER_DOMAIN_INIT))
+#define CNL_DISPLAY_DDI_D_IO_POWER_DOMAINS (		\
+	BIT_ULL(POWER_DOMAIN_PORT_DDI_D_IO) |		\
+	BIT_ULL(POWER_DOMAIN_INIT))
+#define CNL_DISPLAY_AUX_A_POWER_DOMAINS (		\
+	BIT_ULL(POWER_DOMAIN_AUX_A) |			\
+	BIT_ULL(POWER_DOMAIN_AUX_IO_A) |		\
+	BIT_ULL(POWER_DOMAIN_INIT))
+#define CNL_DISPLAY_AUX_B_POWER_DOMAINS (		\
+	BIT_ULL(POWER_DOMAIN_AUX_B) |			\
+	BIT_ULL(POWER_DOMAIN_INIT))
+#define CNL_DISPLAY_AUX_C_POWER_DOMAINS (		\
+	BIT_ULL(POWER_DOMAIN_AUX_C) |			\
+	BIT_ULL(POWER_DOMAIN_INIT))
+#define CNL_DISPLAY_AUX_D_POWER_DOMAINS (		\
+	BIT_ULL(POWER_DOMAIN_AUX_D) |			\
+	BIT_ULL(POWER_DOMAIN_INIT))
+#define CNL_DISPLAY_AUX_F_POWER_DOMAINS (		\
+	BIT_ULL(POWER_DOMAIN_AUX_F) |			\
+	BIT_ULL(POWER_DOMAIN_INIT))
+#define CNL_DISPLAY_DDI_F_IO_POWER_DOMAINS (		\
+	BIT_ULL(POWER_DOMAIN_PORT_DDI_F_IO) |		\
+	BIT_ULL(POWER_DOMAIN_INIT))
+#define CNL_DISPLAY_DC_OFF_POWER_DOMAINS (		\
+	CNL_DISPLAY_POWERWELL_2_POWER_DOMAINS |		\
+	BIT_ULL(POWER_DOMAIN_GT_IRQ) |			\
+	BIT_ULL(POWER_DOMAIN_MODESET) |			\
+	BIT_ULL(POWER_DOMAIN_AUX_A) |			\
+	BIT_ULL(POWER_DOMAIN_INIT))
+
+/*
+ * ICL PW_0/PG_0 domains (HW/DMC control):
+ * - PCI
+ * - clocks except port PLL
+ * - central power except FBC
+ * - shared functions except pipe interrupts, pipe MBUS, DBUF registers
+ * ICL PW_1/PG_1 domains (HW/DMC control):
+ * - DBUF function
+ * - PIPE_A and its planes, except VGA
+ * - transcoder EDP + PSR
+ * - transcoder DSI
+ * - DDI_A
+ * - FBC
+ */
+#define ICL_PW_4_POWER_DOMAINS (			\
+	BIT_ULL(POWER_DOMAIN_PIPE_C) |			\
+	BIT_ULL(POWER_DOMAIN_PIPE_C_PANEL_FITTER) |	\
+	BIT_ULL(POWER_DOMAIN_INIT))
+	/* VDSC/joining */
+#define ICL_PW_3_POWER_DOMAINS (			\
+	ICL_PW_4_POWER_DOMAINS |			\
+	BIT_ULL(POWER_DOMAIN_PIPE_B) |			\
+	BIT_ULL(POWER_DOMAIN_TRANSCODER_A) |		\
+	BIT_ULL(POWER_DOMAIN_TRANSCODER_B) |		\
+	BIT_ULL(POWER_DOMAIN_TRANSCODER_C) |		\
+	BIT_ULL(POWER_DOMAIN_PIPE_B_PANEL_FITTER) |	\
+	BIT_ULL(POWER_DOMAIN_PORT_DDI_B_LANES) |	\
+	BIT_ULL(POWER_DOMAIN_PORT_DDI_B_IO) |		\
+	BIT_ULL(POWER_DOMAIN_PORT_DDI_C_LANES) |	\
+	BIT_ULL(POWER_DOMAIN_PORT_DDI_C_IO) |		\
+	BIT_ULL(POWER_DOMAIN_PORT_DDI_D_LANES) |	\
+	BIT_ULL(POWER_DOMAIN_PORT_DDI_D_IO) |		\
+	BIT_ULL(POWER_DOMAIN_PORT_DDI_E_LANES) |	\
+	BIT_ULL(POWER_DOMAIN_PORT_DDI_E_IO) |		\
+	BIT_ULL(POWER_DOMAIN_PORT_DDI_F_LANES) |	\
+	BIT_ULL(POWER_DOMAIN_PORT_DDI_F_IO) |		\
+	BIT_ULL(POWER_DOMAIN_AUX_B) |			\
+	BIT_ULL(POWER_DOMAIN_AUX_C) |			\
+	BIT_ULL(POWER_DOMAIN_AUX_D) |			\
+	BIT_ULL(POWER_DOMAIN_AUX_E) |			\
+	BIT_ULL(POWER_DOMAIN_AUX_F) |			\
+	BIT_ULL(POWER_DOMAIN_AUX_TBT1) |		\
+	BIT_ULL(POWER_DOMAIN_AUX_TBT2) |		\
+	BIT_ULL(POWER_DOMAIN_AUX_TBT3) |		\
+	BIT_ULL(POWER_DOMAIN_AUX_TBT4) |		\
+	BIT_ULL(POWER_DOMAIN_VGA) |			\
+	BIT_ULL(POWER_DOMAIN_AUDIO) |			\
+	BIT_ULL(POWER_DOMAIN_INIT))
+	/*
+	 * - transcoder WD
+	 * - KVMR (HW control)
+	 */
+#define ICL_PW_2_POWER_DOMAINS (			\
+	ICL_PW_3_POWER_DOMAINS |			\
+	BIT_ULL(POWER_DOMAIN_TRANSCODER_EDP_VDSC) |		\
+	BIT_ULL(POWER_DOMAIN_INIT))
+	/*
+	 * - KVMR (HW control)
+	 */
+#define ICL_DISPLAY_DC_OFF_POWER_DOMAINS (		\
+	ICL_PW_2_POWER_DOMAINS |			\
+	BIT_ULL(POWER_DOMAIN_MODESET) |			\
+	BIT_ULL(POWER_DOMAIN_AUX_A) |			\
+	BIT_ULL(POWER_DOMAIN_INIT))
+
+#define ICL_DDI_IO_A_POWER_DOMAINS (			\
+	BIT_ULL(POWER_DOMAIN_PORT_DDI_A_IO))
+#define ICL_DDI_IO_B_POWER_DOMAINS (			\
+	BIT_ULL(POWER_DOMAIN_PORT_DDI_B_IO))
+#define ICL_DDI_IO_C_POWER_DOMAINS (			\
+	BIT_ULL(POWER_DOMAIN_PORT_DDI_C_IO))
+#define ICL_DDI_IO_D_POWER_DOMAINS (			\
+	BIT_ULL(POWER_DOMAIN_PORT_DDI_D_IO))
+#define ICL_DDI_IO_E_POWER_DOMAINS (			\
+	BIT_ULL(POWER_DOMAIN_PORT_DDI_E_IO))
+#define ICL_DDI_IO_F_POWER_DOMAINS (			\
+	BIT_ULL(POWER_DOMAIN_PORT_DDI_F_IO))
+
+#define ICL_AUX_A_IO_POWER_DOMAINS (			\
+	BIT_ULL(POWER_DOMAIN_AUX_IO_A) |		\
+	BIT_ULL(POWER_DOMAIN_AUX_A))
+#define ICL_AUX_B_IO_POWER_DOMAINS (			\
+	BIT_ULL(POWER_DOMAIN_AUX_B))
+#define ICL_AUX_C_IO_POWER_DOMAINS (			\
+	BIT_ULL(POWER_DOMAIN_AUX_C))
+#define ICL_AUX_D_IO_POWER_DOMAINS (			\
+	BIT_ULL(POWER_DOMAIN_AUX_D))
+#define ICL_AUX_E_IO_POWER_DOMAINS (			\
+	BIT_ULL(POWER_DOMAIN_AUX_E))
+#define ICL_AUX_F_IO_POWER_DOMAINS (			\
+	BIT_ULL(POWER_DOMAIN_AUX_F))
+#define ICL_AUX_TBT1_IO_POWER_DOMAINS (			\
+	BIT_ULL(POWER_DOMAIN_AUX_TBT1))
+#define ICL_AUX_TBT2_IO_POWER_DOMAINS (			\
+	BIT_ULL(POWER_DOMAIN_AUX_TBT2))
+#define ICL_AUX_TBT3_IO_POWER_DOMAINS (			\
+	BIT_ULL(POWER_DOMAIN_AUX_TBT3))
+#define ICL_AUX_TBT4_IO_POWER_DOMAINS (			\
+	BIT_ULL(POWER_DOMAIN_AUX_TBT4))
+
+static const struct i915_power_well_ops i9xx_always_on_power_well_ops = {
+	.sync_hw = i9xx_power_well_sync_hw_noop,
+	.enable = i9xx_always_on_power_well_noop,
+	.disable = i9xx_always_on_power_well_noop,
+	.is_enabled = i9xx_always_on_power_well_enabled,
+};
+
+static const struct i915_power_well_ops chv_pipe_power_well_ops = {
+	.sync_hw = i9xx_power_well_sync_hw_noop,
+	.enable = chv_pipe_power_well_enable,
+	.disable = chv_pipe_power_well_disable,
+	.is_enabled = chv_pipe_power_well_enabled,
+};
+
+static const struct i915_power_well_ops chv_dpio_cmn_power_well_ops = {
+	.sync_hw = i9xx_power_well_sync_hw_noop,
+	.enable = chv_dpio_cmn_power_well_enable,
+	.disable = chv_dpio_cmn_power_well_disable,
+	.is_enabled = vlv_power_well_enabled,
+};
+
+static const struct i915_power_well_desc i9xx_always_on_power_well[] = {
+	{
+		.name = "always-on",
+		.always_on = true,
+		.domains = POWER_DOMAIN_MASK,
+		.ops = &i9xx_always_on_power_well_ops,
+		.id = DISP_PW_ID_NONE,
+	},
+};
+
+static const struct i915_power_well_ops i830_pipes_power_well_ops = {
+	.sync_hw = i830_pipes_power_well_sync_hw,
+	.enable = i830_pipes_power_well_enable,
+	.disable = i830_pipes_power_well_disable,
+	.is_enabled = i830_pipes_power_well_enabled,
+};
+
+static const struct i915_power_well_desc i830_power_wells[] = {
+	{
+		.name = "always-on",
+		.always_on = true,
+		.domains = POWER_DOMAIN_MASK,
+		.ops = &i9xx_always_on_power_well_ops,
+		.id = DISP_PW_ID_NONE,
+	},
+	{
+		.name = "pipes",
+		.domains = I830_PIPES_POWER_DOMAINS,
+		.ops = &i830_pipes_power_well_ops,
+		.id = DISP_PW_ID_NONE,
+	},
+};
+
+static const struct i915_power_well_ops hsw_power_well_ops = {
+	.sync_hw = hsw_power_well_sync_hw,
+	.enable = hsw_power_well_enable,
+	.disable = hsw_power_well_disable,
+	.is_enabled = hsw_power_well_enabled,
+};
+
+static const struct i915_power_well_ops gen9_dc_off_power_well_ops = {
+	.sync_hw = i9xx_power_well_sync_hw_noop,
+	.enable = gen9_dc_off_power_well_enable,
+	.disable = gen9_dc_off_power_well_disable,
+	.is_enabled = gen9_dc_off_power_well_enabled,
+};
+
+static const struct i915_power_well_ops bxt_dpio_cmn_power_well_ops = {
+	.sync_hw = i9xx_power_well_sync_hw_noop,
+	.enable = bxt_dpio_cmn_power_well_enable,
+	.disable = bxt_dpio_cmn_power_well_disable,
+	.is_enabled = bxt_dpio_cmn_power_well_enabled,
+};
+
+static const struct i915_power_well_regs hsw_power_well_regs = {
+	.bios	= HSW_PWR_WELL_CTL1,
+	.driver	= HSW_PWR_WELL_CTL2,
+	.kvmr	= HSW_PWR_WELL_CTL3,
+	.debug	= HSW_PWR_WELL_CTL4,
+};
+
+static const struct i915_power_well_desc hsw_power_wells[] = {
+	{
+		.name = "always-on",
+		.always_on = true,
+		.domains = POWER_DOMAIN_MASK,
+		.ops = &i9xx_always_on_power_well_ops,
+		.id = DISP_PW_ID_NONE,
+	},
+	{
+		.name = "display",
+		.domains = HSW_DISPLAY_POWER_DOMAINS,
+		.ops = &hsw_power_well_ops,
+		.id = HSW_DISP_PW_GLOBAL,
+		{
+			.hsw.regs = &hsw_power_well_regs,
+			.hsw.idx = HSW_PW_CTL_IDX_GLOBAL,
+			.hsw.has_vga = true,
+		},
+	},
+};
+
+static const struct i915_power_well_desc bdw_power_wells[] = {
+	{
+		.name = "always-on",
+		.always_on = true,
+		.domains = POWER_DOMAIN_MASK,
+		.ops = &i9xx_always_on_power_well_ops,
+		.id = DISP_PW_ID_NONE,
+	},
+	{
+		.name = "display",
+		.domains = BDW_DISPLAY_POWER_DOMAINS,
+		.ops = &hsw_power_well_ops,
+		.id = HSW_DISP_PW_GLOBAL,
+		{
+			.hsw.regs = &hsw_power_well_regs,
+			.hsw.idx = HSW_PW_CTL_IDX_GLOBAL,
+			.hsw.irq_pipe_mask = BIT(PIPE_B) | BIT(PIPE_C),
+			.hsw.has_vga = true,
+		},
+	},
+};
+
+static const struct i915_power_well_ops vlv_display_power_well_ops = {
+	.sync_hw = i9xx_power_well_sync_hw_noop,
+	.enable = vlv_display_power_well_enable,
+	.disable = vlv_display_power_well_disable,
+	.is_enabled = vlv_power_well_enabled,
+};
+
+static const struct i915_power_well_ops vlv_dpio_cmn_power_well_ops = {
+	.sync_hw = i9xx_power_well_sync_hw_noop,
+	.enable = vlv_dpio_cmn_power_well_enable,
+	.disable = vlv_dpio_cmn_power_well_disable,
+	.is_enabled = vlv_power_well_enabled,
+};
+
+static const struct i915_power_well_ops vlv_dpio_power_well_ops = {
+	.sync_hw = i9xx_power_well_sync_hw_noop,
+	.enable = vlv_power_well_enable,
+	.disable = vlv_power_well_disable,
+	.is_enabled = vlv_power_well_enabled,
+};
+
+static const struct i915_power_well_desc vlv_power_wells[] = {
+	{
+		.name = "always-on",
+		.always_on = true,
+		.domains = POWER_DOMAIN_MASK,
+		.ops = &i9xx_always_on_power_well_ops,
+		.id = DISP_PW_ID_NONE,
+	},
+	{
+		.name = "display",
+		.domains = VLV_DISPLAY_POWER_DOMAINS,
+		.ops = &vlv_display_power_well_ops,
+		.id = VLV_DISP_PW_DISP2D,
+		{
+			.vlv.idx = PUNIT_PWGT_IDX_DISP2D,
+		},
+	},
+	{
+		.name = "dpio-tx-b-01",
+		.domains = VLV_DPIO_TX_B_LANES_01_POWER_DOMAINS |
+			   VLV_DPIO_TX_B_LANES_23_POWER_DOMAINS |
+			   VLV_DPIO_TX_C_LANES_01_POWER_DOMAINS |
+			   VLV_DPIO_TX_C_LANES_23_POWER_DOMAINS,
+		.ops = &vlv_dpio_power_well_ops,
+		.id = DISP_PW_ID_NONE,
+		{
+			.vlv.idx = PUNIT_PWGT_IDX_DPIO_TX_B_LANES_01,
+		},
+	},
+	{
+		.name = "dpio-tx-b-23",
+		.domains = VLV_DPIO_TX_B_LANES_01_POWER_DOMAINS |
+			   VLV_DPIO_TX_B_LANES_23_POWER_DOMAINS |
+			   VLV_DPIO_TX_C_LANES_01_POWER_DOMAINS |
+			   VLV_DPIO_TX_C_LANES_23_POWER_DOMAINS,
+		.ops = &vlv_dpio_power_well_ops,
+		.id = DISP_PW_ID_NONE,
+		{
+			.vlv.idx = PUNIT_PWGT_IDX_DPIO_TX_B_LANES_23,
+		},
+	},
+	{
+		.name = "dpio-tx-c-01",
+		.domains = VLV_DPIO_TX_B_LANES_01_POWER_DOMAINS |
+			   VLV_DPIO_TX_B_LANES_23_POWER_DOMAINS |
+			   VLV_DPIO_TX_C_LANES_01_POWER_DOMAINS |
+			   VLV_DPIO_TX_C_LANES_23_POWER_DOMAINS,
+		.ops = &vlv_dpio_power_well_ops,
+		.id = DISP_PW_ID_NONE,
+		{
+			.vlv.idx = PUNIT_PWGT_IDX_DPIO_TX_C_LANES_01,
+		},
+	},
+	{
+		.name = "dpio-tx-c-23",
+		.domains = VLV_DPIO_TX_B_LANES_01_POWER_DOMAINS |
+			   VLV_DPIO_TX_B_LANES_23_POWER_DOMAINS |
+			   VLV_DPIO_TX_C_LANES_01_POWER_DOMAINS |
+			   VLV_DPIO_TX_C_LANES_23_POWER_DOMAINS,
+		.ops = &vlv_dpio_power_well_ops,
+		.id = DISP_PW_ID_NONE,
+		{
+			.vlv.idx = PUNIT_PWGT_IDX_DPIO_TX_C_LANES_23,
+		},
+	},
+	{
+		.name = "dpio-common",
+		.domains = VLV_DPIO_CMN_BC_POWER_DOMAINS,
+		.ops = &vlv_dpio_cmn_power_well_ops,
+		.id = VLV_DISP_PW_DPIO_CMN_BC,
+		{
+			.vlv.idx = PUNIT_PWGT_IDX_DPIO_CMN_BC,
+		},
+	},
+};
+
+static const struct i915_power_well_desc chv_power_wells[] = {
+	{
+		.name = "always-on",
+		.always_on = true,
+		.domains = POWER_DOMAIN_MASK,
+		.ops = &i9xx_always_on_power_well_ops,
+		.id = DISP_PW_ID_NONE,
+	},
+	{
+		.name = "display",
+		/*
+		 * Pipe A power well is the new disp2d well. Pipe B and C
+		 * power wells don't actually exist. Pipe A power well is
+		 * required for any pipe to work.
+		 */
+		.domains = CHV_DISPLAY_POWER_DOMAINS,
+		.ops = &chv_pipe_power_well_ops,
+		.id = DISP_PW_ID_NONE,
+	},
+	{
+		.name = "dpio-common-bc",
+		.domains = CHV_DPIO_CMN_BC_POWER_DOMAINS,
+		.ops = &chv_dpio_cmn_power_well_ops,
+		.id = VLV_DISP_PW_DPIO_CMN_BC,
+		{
+			.vlv.idx = PUNIT_PWGT_IDX_DPIO_CMN_BC,
+		},
+	},
+	{
+		.name = "dpio-common-d",
+		.domains = CHV_DPIO_CMN_D_POWER_DOMAINS,
+		.ops = &chv_dpio_cmn_power_well_ops,
+		.id = CHV_DISP_PW_DPIO_CMN_D,
+		{
+			.vlv.idx = PUNIT_PWGT_IDX_DPIO_CMN_D,
+		},
+	},
+};
+
+bool intel_display_power_well_is_enabled(struct drm_i915_private *dev_priv,
+					 enum i915_power_well_id power_well_id)
+{
+	struct i915_power_well *power_well;
+	bool ret;
+
+	power_well = lookup_power_well(dev_priv, power_well_id);
+	ret = power_well->desc->ops->is_enabled(dev_priv, power_well);
+
+	return ret;
+}
+
+static const struct i915_power_well_desc skl_power_wells[] = {
+	{
+		.name = "always-on",
+		.always_on = true,
+		.domains = POWER_DOMAIN_MASK,
+		.ops = &i9xx_always_on_power_well_ops,
+		.id = DISP_PW_ID_NONE,
+	},
+	{
+		.name = "power well 1",
+		/* Handled by the DMC firmware */
+		.always_on = true,
+		.domains = 0,
+		.ops = &hsw_power_well_ops,
+		.id = SKL_DISP_PW_1,
+		{
+			.hsw.regs = &hsw_power_well_regs,
+			.hsw.idx = SKL_PW_CTL_IDX_PW_1,
+			.hsw.has_fuses = true,
+		},
+	},
+	{
+		.name = "MISC IO power well",
+		/* Handled by the DMC firmware */
+		.always_on = true,
+		.domains = 0,
+		.ops = &hsw_power_well_ops,
+		.id = SKL_DISP_PW_MISC_IO,
+		{
+			.hsw.regs = &hsw_power_well_regs,
+			.hsw.idx = SKL_PW_CTL_IDX_MISC_IO,
+		},
+	},
+	{
+		.name = "DC off",
+		.domains = SKL_DISPLAY_DC_OFF_POWER_DOMAINS,
+		.ops = &gen9_dc_off_power_well_ops,
+		.id = DISP_PW_ID_NONE,
+	},
+	{
+		.name = "power well 2",
+		.domains = SKL_DISPLAY_POWERWELL_2_POWER_DOMAINS,
+		.ops = &hsw_power_well_ops,
+		.id = SKL_DISP_PW_2,
+		{
+			.hsw.regs = &hsw_power_well_regs,
+			.hsw.idx = SKL_PW_CTL_IDX_PW_2,
+			.hsw.irq_pipe_mask = BIT(PIPE_B) | BIT(PIPE_C),
+			.hsw.has_vga = true,
+			.hsw.has_fuses = true,
+		},
+	},
+	{
+		.name = "DDI A/E IO power well",
+		.domains = SKL_DISPLAY_DDI_IO_A_E_POWER_DOMAINS,
+		.ops = &hsw_power_well_ops,
+		.id = DISP_PW_ID_NONE,
+		{
+			.hsw.regs = &hsw_power_well_regs,
+			.hsw.idx = SKL_PW_CTL_IDX_DDI_A_E,
+		},
+	},
+	{
+		.name = "DDI B IO power well",
+		.domains = SKL_DISPLAY_DDI_IO_B_POWER_DOMAINS,
+		.ops = &hsw_power_well_ops,
+		.id = DISP_PW_ID_NONE,
+		{
+			.hsw.regs = &hsw_power_well_regs,
+			.hsw.idx = SKL_PW_CTL_IDX_DDI_B,
+		},
+	},
+	{
+		.name = "DDI C IO power well",
+		.domains = SKL_DISPLAY_DDI_IO_C_POWER_DOMAINS,
+		.ops = &hsw_power_well_ops,
+		.id = DISP_PW_ID_NONE,
+		{
+			.hsw.regs = &hsw_power_well_regs,
+			.hsw.idx = SKL_PW_CTL_IDX_DDI_C,
+		},
+	},
+	{
+		.name = "DDI D IO power well",
+		.domains = SKL_DISPLAY_DDI_IO_D_POWER_DOMAINS,
+		.ops = &hsw_power_well_ops,
+		.id = DISP_PW_ID_NONE,
+		{
+			.hsw.regs = &hsw_power_well_regs,
+			.hsw.idx = SKL_PW_CTL_IDX_DDI_D,
+		},
+	},
+};
+
+static const struct i915_power_well_desc bxt_power_wells[] = {
+	{
+		.name = "always-on",
+		.always_on = true,
+		.domains = POWER_DOMAIN_MASK,
+		.ops = &i9xx_always_on_power_well_ops,
+		.id = DISP_PW_ID_NONE,
+	},
+	{
+		.name = "power well 1",
+		/* Handled by the DMC firmware */
+		.always_on = true,
+		.domains = 0,
+		.ops = &hsw_power_well_ops,
+		.id = SKL_DISP_PW_1,
+		{
+			.hsw.regs = &hsw_power_well_regs,
+			.hsw.idx = SKL_PW_CTL_IDX_PW_1,
+			.hsw.has_fuses = true,
+		},
+	},
+	{
+		.name = "DC off",
+		.domains = BXT_DISPLAY_DC_OFF_POWER_DOMAINS,
+		.ops = &gen9_dc_off_power_well_ops,
+		.id = DISP_PW_ID_NONE,
+	},
+	{
+		.name = "power well 2",
+		.domains = BXT_DISPLAY_POWERWELL_2_POWER_DOMAINS,
+		.ops = &hsw_power_well_ops,
+		.id = SKL_DISP_PW_2,
+		{
+			.hsw.regs = &hsw_power_well_regs,
+			.hsw.idx = SKL_PW_CTL_IDX_PW_2,
+			.hsw.irq_pipe_mask = BIT(PIPE_B) | BIT(PIPE_C),
+			.hsw.has_vga = true,
+			.hsw.has_fuses = true,
+		},
+	},
+	{
+		.name = "dpio-common-a",
+		.domains = BXT_DPIO_CMN_A_POWER_DOMAINS,
+		.ops = &bxt_dpio_cmn_power_well_ops,
+		.id = BXT_DISP_PW_DPIO_CMN_A,
+		{
+			.bxt.phy = DPIO_PHY1,
+		},
+	},
+	{
+		.name = "dpio-common-bc",
+		.domains = BXT_DPIO_CMN_BC_POWER_DOMAINS,
+		.ops = &bxt_dpio_cmn_power_well_ops,
+		.id = VLV_DISP_PW_DPIO_CMN_BC,
+		{
+			.bxt.phy = DPIO_PHY0,
+		},
+	},
+};
+
+static const struct i915_power_well_desc glk_power_wells[] = {
+	{
+		.name = "always-on",
+		.always_on = true,
+		.domains = POWER_DOMAIN_MASK,
+		.ops = &i9xx_always_on_power_well_ops,
+		.id = DISP_PW_ID_NONE,
+	},
+	{
+		.name = "power well 1",
+		/* Handled by the DMC firmware */
+		.always_on = true,
+		.domains = 0,
+		.ops = &hsw_power_well_ops,
+		.id = SKL_DISP_PW_1,
+		{
+			.hsw.regs = &hsw_power_well_regs,
+			.hsw.idx = SKL_PW_CTL_IDX_PW_1,
+			.hsw.has_fuses = true,
+		},
+	},
+	{
+		.name = "DC off",
+		.domains = GLK_DISPLAY_DC_OFF_POWER_DOMAINS,
+		.ops = &gen9_dc_off_power_well_ops,
+		.id = DISP_PW_ID_NONE,
+	},
+	{
+		.name = "power well 2",
+		.domains = GLK_DISPLAY_POWERWELL_2_POWER_DOMAINS,
+		.ops = &hsw_power_well_ops,
+		.id = SKL_DISP_PW_2,
+		{
+			.hsw.regs = &hsw_power_well_regs,
+			.hsw.idx = SKL_PW_CTL_IDX_PW_2,
+			.hsw.irq_pipe_mask = BIT(PIPE_B) | BIT(PIPE_C),
+			.hsw.has_vga = true,
+			.hsw.has_fuses = true,
+		},
+	},
+	{
+		.name = "dpio-common-a",
+		.domains = GLK_DPIO_CMN_A_POWER_DOMAINS,
+		.ops = &bxt_dpio_cmn_power_well_ops,
+		.id = BXT_DISP_PW_DPIO_CMN_A,
+		{
+			.bxt.phy = DPIO_PHY1,
+		},
+	},
+	{
+		.name = "dpio-common-b",
+		.domains = GLK_DPIO_CMN_B_POWER_DOMAINS,
+		.ops = &bxt_dpio_cmn_power_well_ops,
+		.id = VLV_DISP_PW_DPIO_CMN_BC,
+		{
+			.bxt.phy = DPIO_PHY0,
+		},
+	},
+	{
+		.name = "dpio-common-c",
+		.domains = GLK_DPIO_CMN_C_POWER_DOMAINS,
+		.ops = &bxt_dpio_cmn_power_well_ops,
+		.id = GLK_DISP_PW_DPIO_CMN_C,
+		{
+			.bxt.phy = DPIO_PHY2,
+		},
+	},
+	{
+		.name = "AUX A",
+		.domains = GLK_DISPLAY_AUX_A_POWER_DOMAINS,
+		.ops = &hsw_power_well_ops,
+		.id = DISP_PW_ID_NONE,
+		{
+			.hsw.regs = &hsw_power_well_regs,
+			.hsw.idx = GLK_PW_CTL_IDX_AUX_A,
+		},
+	},
+	{
+		.name = "AUX B",
+		.domains = GLK_DISPLAY_AUX_B_POWER_DOMAINS,
+		.ops = &hsw_power_well_ops,
+		.id = DISP_PW_ID_NONE,
+		{
+			.hsw.regs = &hsw_power_well_regs,
+			.hsw.idx = GLK_PW_CTL_IDX_AUX_B,
+		},
+	},
+	{
+		.name = "AUX C",
+		.domains = GLK_DISPLAY_AUX_C_POWER_DOMAINS,
+		.ops = &hsw_power_well_ops,
+		.id = DISP_PW_ID_NONE,
+		{
+			.hsw.regs = &hsw_power_well_regs,
+			.hsw.idx = GLK_PW_CTL_IDX_AUX_C,
+		},
+	},
+	{
+		.name = "DDI A IO power well",
+		.domains = GLK_DISPLAY_DDI_IO_A_POWER_DOMAINS,
+		.ops = &hsw_power_well_ops,
+		.id = DISP_PW_ID_NONE,
+		{
+			.hsw.regs = &hsw_power_well_regs,
+			.hsw.idx = GLK_PW_CTL_IDX_DDI_A,
+		},
+	},
+	{
+		.name = "DDI B IO power well",
+		.domains = GLK_DISPLAY_DDI_IO_B_POWER_DOMAINS,
+		.ops = &hsw_power_well_ops,
+		.id = DISP_PW_ID_NONE,
+		{
+			.hsw.regs = &hsw_power_well_regs,
+			.hsw.idx = SKL_PW_CTL_IDX_DDI_B,
+		},
+	},
+	{
+		.name = "DDI C IO power well",
+		.domains = GLK_DISPLAY_DDI_IO_C_POWER_DOMAINS,
+		.ops = &hsw_power_well_ops,
+		.id = DISP_PW_ID_NONE,
+		{
+			.hsw.regs = &hsw_power_well_regs,
+			.hsw.idx = SKL_PW_CTL_IDX_DDI_C,
+		},
+	},
+};
+
+static const struct i915_power_well_desc cnl_power_wells[] = {
+	{
+		.name = "always-on",
+		.always_on = true,
+		.domains = POWER_DOMAIN_MASK,
+		.ops = &i9xx_always_on_power_well_ops,
+		.id = DISP_PW_ID_NONE,
+	},
+	{
+		.name = "power well 1",
+		/* Handled by the DMC firmware */
+		.always_on = true,
+		.domains = 0,
+		.ops = &hsw_power_well_ops,
+		.id = SKL_DISP_PW_1,
+		{
+			.hsw.regs = &hsw_power_well_regs,
+			.hsw.idx = SKL_PW_CTL_IDX_PW_1,
+			.hsw.has_fuses = true,
+		},
+	},
+	{
+		.name = "AUX A",
+		.domains = CNL_DISPLAY_AUX_A_POWER_DOMAINS,
+		.ops = &hsw_power_well_ops,
+		.id = DISP_PW_ID_NONE,
+		{
+			.hsw.regs = &hsw_power_well_regs,
+			.hsw.idx = GLK_PW_CTL_IDX_AUX_A,
+		},
+	},
+	{
+		.name = "AUX B",
+		.domains = CNL_DISPLAY_AUX_B_POWER_DOMAINS,
+		.ops = &hsw_power_well_ops,
+		.id = DISP_PW_ID_NONE,
+		{
+			.hsw.regs = &hsw_power_well_regs,
+			.hsw.idx = GLK_PW_CTL_IDX_AUX_B,
+		},
+	},
+	{
+		.name = "AUX C",
+		.domains = CNL_DISPLAY_AUX_C_POWER_DOMAINS,
+		.ops = &hsw_power_well_ops,
+		.id = DISP_PW_ID_NONE,
+		{
+			.hsw.regs = &hsw_power_well_regs,
+			.hsw.idx = GLK_PW_CTL_IDX_AUX_C,
+		},
+	},
+	{
+		.name = "AUX D",
+		.domains = CNL_DISPLAY_AUX_D_POWER_DOMAINS,
+		.ops = &hsw_power_well_ops,
+		.id = DISP_PW_ID_NONE,
+		{
+			.hsw.regs = &hsw_power_well_regs,
+			.hsw.idx = CNL_PW_CTL_IDX_AUX_D,
+		},
+	},
+	{
+		.name = "DC off",
+		.domains = CNL_DISPLAY_DC_OFF_POWER_DOMAINS,
+		.ops = &gen9_dc_off_power_well_ops,
+		.id = DISP_PW_ID_NONE,
+	},
+	{
+		.name = "power well 2",
+		.domains = CNL_DISPLAY_POWERWELL_2_POWER_DOMAINS,
+		.ops = &hsw_power_well_ops,
+		.id = SKL_DISP_PW_2,
+		{
+			.hsw.regs = &hsw_power_well_regs,
+			.hsw.idx = SKL_PW_CTL_IDX_PW_2,
+			.hsw.irq_pipe_mask = BIT(PIPE_B) | BIT(PIPE_C),
+			.hsw.has_vga = true,
+			.hsw.has_fuses = true,
+		},
+	},
+	{
+		.name = "DDI A IO power well",
+		.domains = CNL_DISPLAY_DDI_A_IO_POWER_DOMAINS,
+		.ops = &hsw_power_well_ops,
+		.id = DISP_PW_ID_NONE,
+		{
+			.hsw.regs = &hsw_power_well_regs,
+			.hsw.idx = GLK_PW_CTL_IDX_DDI_A,
+		},
+	},
+	{
+		.name = "DDI B IO power well",
+		.domains = CNL_DISPLAY_DDI_B_IO_POWER_DOMAINS,
+		.ops = &hsw_power_well_ops,
+		.id = DISP_PW_ID_NONE,
+		{
+			.hsw.regs = &hsw_power_well_regs,
+			.hsw.idx = SKL_PW_CTL_IDX_DDI_B,
+		},
+	},
+	{
+		.name = "DDI C IO power well",
+		.domains = CNL_DISPLAY_DDI_C_IO_POWER_DOMAINS,
+		.ops = &hsw_power_well_ops,
+		.id = DISP_PW_ID_NONE,
+		{
+			.hsw.regs = &hsw_power_well_regs,
+			.hsw.idx = SKL_PW_CTL_IDX_DDI_C,
+		},
+	},
+	{
+		.name = "DDI D IO power well",
+		.domains = CNL_DISPLAY_DDI_D_IO_POWER_DOMAINS,
+		.ops = &hsw_power_well_ops,
+		.id = DISP_PW_ID_NONE,
+		{
+			.hsw.regs = &hsw_power_well_regs,
+			.hsw.idx = SKL_PW_CTL_IDX_DDI_D,
+		},
+	},
+	{
+		.name = "DDI F IO power well",
+		.domains = CNL_DISPLAY_DDI_F_IO_POWER_DOMAINS,
+		.ops = &hsw_power_well_ops,
+		.id = DISP_PW_ID_NONE,
+		{
+			.hsw.regs = &hsw_power_well_regs,
+			.hsw.idx = CNL_PW_CTL_IDX_DDI_F,
+		},
+	},
+	{
+		.name = "AUX F",
+		.domains = CNL_DISPLAY_AUX_F_POWER_DOMAINS,
+		.ops = &hsw_power_well_ops,
+		.id = DISP_PW_ID_NONE,
+		{
+			.hsw.regs = &hsw_power_well_regs,
+			.hsw.idx = CNL_PW_CTL_IDX_AUX_F,
+		},
+	},
+};
+
+static const struct i915_power_well_ops icl_combo_phy_aux_power_well_ops = {
+	.sync_hw = hsw_power_well_sync_hw,
+	.enable = icl_combo_phy_aux_power_well_enable,
+	.disable = icl_combo_phy_aux_power_well_disable,
+	.is_enabled = hsw_power_well_enabled,
+};
+
+static const struct i915_power_well_ops icl_tc_phy_aux_power_well_ops = {
+	.sync_hw = hsw_power_well_sync_hw,
+	.enable = icl_tc_phy_aux_power_well_enable,
+	.disable = hsw_power_well_disable,
+	.is_enabled = hsw_power_well_enabled,
+};
+
+static const struct i915_power_well_regs icl_aux_power_well_regs = {
+	.bios	= ICL_PWR_WELL_CTL_AUX1,
+	.driver	= ICL_PWR_WELL_CTL_AUX2,
+	.debug	= ICL_PWR_WELL_CTL_AUX4,
+};
+
+static const struct i915_power_well_regs icl_ddi_power_well_regs = {
+	.bios	= ICL_PWR_WELL_CTL_DDI1,
+	.driver	= ICL_PWR_WELL_CTL_DDI2,
+	.debug	= ICL_PWR_WELL_CTL_DDI4,
+};
+
+static const struct i915_power_well_desc icl_power_wells[] = {
+	{
+		.name = "always-on",
+		.always_on = true,
+		.domains = POWER_DOMAIN_MASK,
+		.ops = &i9xx_always_on_power_well_ops,
+		.id = DISP_PW_ID_NONE,
+	},
+	{
+		.name = "power well 1",
+		/* Handled by the DMC firmware */
+		.always_on = true,
+		.domains = 0,
+		.ops = &hsw_power_well_ops,
+		.id = SKL_DISP_PW_1,
+		{
+			.hsw.regs = &hsw_power_well_regs,
+			.hsw.idx = ICL_PW_CTL_IDX_PW_1,
+			.hsw.has_fuses = true,
+		},
+	},
+	{
+		.name = "DC off",
+		.domains = ICL_DISPLAY_DC_OFF_POWER_DOMAINS,
+		.ops = &gen9_dc_off_power_well_ops,
+		.id = DISP_PW_ID_NONE,
+	},
+	{
+		.name = "power well 2",
+		.domains = ICL_PW_2_POWER_DOMAINS,
+		.ops = &hsw_power_well_ops,
+		.id = SKL_DISP_PW_2,
+		{
+			.hsw.regs = &hsw_power_well_regs,
+			.hsw.idx = ICL_PW_CTL_IDX_PW_2,
+			.hsw.has_fuses = true,
+		},
+	},
+	{
+		.name = "power well 3",
+		.domains = ICL_PW_3_POWER_DOMAINS,
+		.ops = &hsw_power_well_ops,
+		.id = DISP_PW_ID_NONE,
+		{
+			.hsw.regs = &hsw_power_well_regs,
+			.hsw.idx = ICL_PW_CTL_IDX_PW_3,
+			.hsw.irq_pipe_mask = BIT(PIPE_B),
+			.hsw.has_vga = true,
+			.hsw.has_fuses = true,
+		},
+	},
+	{
+		.name = "DDI A IO",
+		.domains = ICL_DDI_IO_A_POWER_DOMAINS,
+		.ops = &hsw_power_well_ops,
+		.id = DISP_PW_ID_NONE,
+		{
+			.hsw.regs = &icl_ddi_power_well_regs,
+			.hsw.idx = ICL_PW_CTL_IDX_DDI_A,
+		},
+	},
+	{
+		.name = "DDI B IO",
+		.domains = ICL_DDI_IO_B_POWER_DOMAINS,
+		.ops = &hsw_power_well_ops,
+		.id = DISP_PW_ID_NONE,
+		{
+			.hsw.regs = &icl_ddi_power_well_regs,
+			.hsw.idx = ICL_PW_CTL_IDX_DDI_B,
+		},
+	},
+	{
+		.name = "DDI C IO",
+		.domains = ICL_DDI_IO_C_POWER_DOMAINS,
+		.ops = &hsw_power_well_ops,
+		.id = DISP_PW_ID_NONE,
+		{
+			.hsw.regs = &icl_ddi_power_well_regs,
+			.hsw.idx = ICL_PW_CTL_IDX_DDI_C,
+		},
+	},
+	{
+		.name = "DDI D IO",
+		.domains = ICL_DDI_IO_D_POWER_DOMAINS,
+		.ops = &hsw_power_well_ops,
+		.id = DISP_PW_ID_NONE,
+		{
+			.hsw.regs = &icl_ddi_power_well_regs,
+			.hsw.idx = ICL_PW_CTL_IDX_DDI_D,
+		},
+	},
+	{
+		.name = "DDI E IO",
+		.domains = ICL_DDI_IO_E_POWER_DOMAINS,
+		.ops = &hsw_power_well_ops,
+		.id = DISP_PW_ID_NONE,
+		{
+			.hsw.regs = &icl_ddi_power_well_regs,
+			.hsw.idx = ICL_PW_CTL_IDX_DDI_E,
+		},
+	},
+	{
+		.name = "DDI F IO",
+		.domains = ICL_DDI_IO_F_POWER_DOMAINS,
+		.ops = &hsw_power_well_ops,
+		.id = DISP_PW_ID_NONE,
+		{
+			.hsw.regs = &icl_ddi_power_well_regs,
+			.hsw.idx = ICL_PW_CTL_IDX_DDI_F,
+		},
+	},
+	{
+		.name = "AUX A",
+		.domains = ICL_AUX_A_IO_POWER_DOMAINS,
+		.ops = &icl_combo_phy_aux_power_well_ops,
+		.id = DISP_PW_ID_NONE,
+		{
+			.hsw.regs = &icl_aux_power_well_regs,
+			.hsw.idx = ICL_PW_CTL_IDX_AUX_A,
+		},
+	},
+	{
+		.name = "AUX B",
+		.domains = ICL_AUX_B_IO_POWER_DOMAINS,
+		.ops = &icl_combo_phy_aux_power_well_ops,
+		.id = DISP_PW_ID_NONE,
+		{
+			.hsw.regs = &icl_aux_power_well_regs,
+			.hsw.idx = ICL_PW_CTL_IDX_AUX_B,
+		},
+	},
+	{
+		.name = "AUX C",
+		.domains = ICL_AUX_C_IO_POWER_DOMAINS,
+		.ops = &icl_tc_phy_aux_power_well_ops,
+		.id = DISP_PW_ID_NONE,
+		{
+			.hsw.regs = &icl_aux_power_well_regs,
+			.hsw.idx = ICL_PW_CTL_IDX_AUX_C,
+			.hsw.is_tc_tbt = false,
+		},
+	},
+	{
+		.name = "AUX D",
+		.domains = ICL_AUX_D_IO_POWER_DOMAINS,
+		.ops = &icl_tc_phy_aux_power_well_ops,
+		.id = DISP_PW_ID_NONE,
+		{
+			.hsw.regs = &icl_aux_power_well_regs,
+			.hsw.idx = ICL_PW_CTL_IDX_AUX_D,
+			.hsw.is_tc_tbt = false,
+		},
+	},
+	{
+		.name = "AUX E",
+		.domains = ICL_AUX_E_IO_POWER_DOMAINS,
+		.ops = &icl_tc_phy_aux_power_well_ops,
+		.id = DISP_PW_ID_NONE,
+		{
+			.hsw.regs = &icl_aux_power_well_regs,
+			.hsw.idx = ICL_PW_CTL_IDX_AUX_E,
+			.hsw.is_tc_tbt = false,
+		},
+	},
+	{
+		.name = "AUX F",
+		.domains = ICL_AUX_F_IO_POWER_DOMAINS,
+		.ops = &icl_tc_phy_aux_power_well_ops,
+		.id = DISP_PW_ID_NONE,
+		{
+			.hsw.regs = &icl_aux_power_well_regs,
+			.hsw.idx = ICL_PW_CTL_IDX_AUX_F,
+			.hsw.is_tc_tbt = false,
+		},
+	},
+	{
+		.name = "AUX TBT1",
+		.domains = ICL_AUX_TBT1_IO_POWER_DOMAINS,
+		.ops = &icl_tc_phy_aux_power_well_ops,
+		.id = DISP_PW_ID_NONE,
+		{
+			.hsw.regs = &icl_aux_power_well_regs,
+			.hsw.idx = ICL_PW_CTL_IDX_AUX_TBT1,
+			.hsw.is_tc_tbt = true,
+		},
+	},
+	{
+		.name = "AUX TBT2",
+		.domains = ICL_AUX_TBT2_IO_POWER_DOMAINS,
+		.ops = &icl_tc_phy_aux_power_well_ops,
+		.id = DISP_PW_ID_NONE,
+		{
+			.hsw.regs = &icl_aux_power_well_regs,
+			.hsw.idx = ICL_PW_CTL_IDX_AUX_TBT2,
+			.hsw.is_tc_tbt = true,
+		},
+	},
+	{
+		.name = "AUX TBT3",
+		.domains = ICL_AUX_TBT3_IO_POWER_DOMAINS,
+		.ops = &icl_tc_phy_aux_power_well_ops,
+		.id = DISP_PW_ID_NONE,
+		{
+			.hsw.regs = &icl_aux_power_well_regs,
+			.hsw.idx = ICL_PW_CTL_IDX_AUX_TBT3,
+			.hsw.is_tc_tbt = true,
+		},
+	},
+	{
+		.name = "AUX TBT4",
+		.domains = ICL_AUX_TBT4_IO_POWER_DOMAINS,
+		.ops = &icl_tc_phy_aux_power_well_ops,
+		.id = DISP_PW_ID_NONE,
+		{
+			.hsw.regs = &icl_aux_power_well_regs,
+			.hsw.idx = ICL_PW_CTL_IDX_AUX_TBT4,
+			.hsw.is_tc_tbt = true,
+		},
+	},
+	{
+		.name = "power well 4",
+		.domains = ICL_PW_4_POWER_DOMAINS,
+		.ops = &hsw_power_well_ops,
+		.id = DISP_PW_ID_NONE,
+		{
+			.hsw.regs = &hsw_power_well_regs,
+			.hsw.idx = ICL_PW_CTL_IDX_PW_4,
+			.hsw.has_fuses = true,
+			.hsw.irq_pipe_mask = BIT(PIPE_C),
+		},
+	},
+};
+
+static int
+sanitize_disable_power_well_option(const struct drm_i915_private *dev_priv,
+				   int disable_power_well)
+{
+	if (disable_power_well >= 0)
+		return !!disable_power_well;
+
+	return 1;
+}
+
+static u32 get_allowed_dc_mask(const struct drm_i915_private *dev_priv,
+			       int enable_dc)
+{
+	u32 mask;
+	int requested_dc;
+	int max_dc;
+
+	if (INTEL_GEN(dev_priv) >= 11) {
+		max_dc = 2;
+		/*
+		 * DC9 has a separate HW flow from the rest of the DC states,
+		 * not depending on the DMC firmware. It's needed by system
+		 * suspend/resume, so allow it unconditionally.
+		 */
+		mask = DC_STATE_EN_DC9;
+	} else if (IS_GEN(dev_priv, 10) || IS_GEN9_BC(dev_priv)) {
+		max_dc = 2;
+		mask = 0;
+	} else if (IS_GEN9_LP(dev_priv)) {
+		max_dc = 1;
+		mask = DC_STATE_EN_DC9;
+	} else {
+		max_dc = 0;
+		mask = 0;
+	}
+
+	if (!i915_modparams.disable_power_well)
+		max_dc = 0;
+
+	if (enable_dc >= 0 && enable_dc <= max_dc) {
+		requested_dc = enable_dc;
+	} else if (enable_dc == -1) {
+		requested_dc = max_dc;
+	} else if (enable_dc > max_dc && enable_dc <= 2) {
+		DRM_DEBUG_KMS("Adjusting requested max DC state (%d->%d)\n",
+			      enable_dc, max_dc);
+		requested_dc = max_dc;
+	} else {
+		DRM_ERROR("Unexpected value for enable_dc (%d)\n", enable_dc);
+		requested_dc = max_dc;
+	}
+
+	if (requested_dc > 1)
+		mask |= DC_STATE_EN_UPTO_DC6;
+	if (requested_dc > 0)
+		mask |= DC_STATE_EN_UPTO_DC5;
+
+	DRM_DEBUG_KMS("Allowed DC state mask %02x\n", mask);
+
+	return mask;
+}
+
+static int
+__set_power_wells(struct i915_power_domains *power_domains,
+		  const struct i915_power_well_desc *power_well_descs,
+		  int power_well_count)
+{
+	u64 power_well_ids = 0;
+	int i;
+
+	power_domains->power_well_count = power_well_count;
+	power_domains->power_wells =
+				kcalloc(power_well_count,
+					sizeof(*power_domains->power_wells),
+					GFP_KERNEL);
+	if (!power_domains->power_wells)
+		return -ENOMEM;
+
+	for (i = 0; i < power_well_count; i++) {
+		enum i915_power_well_id id = power_well_descs[i].id;
+
+		power_domains->power_wells[i].desc = &power_well_descs[i];
+
+		if (id == DISP_PW_ID_NONE)
+			continue;
+
+		WARN_ON(id >= sizeof(power_well_ids) * 8);
+		WARN_ON(power_well_ids & BIT_ULL(id));
+		power_well_ids |= BIT_ULL(id);
+	}
+
+	return 0;
+}
+
+#define set_power_wells(power_domains, __power_well_descs) \
+	__set_power_wells(power_domains, __power_well_descs, \
+			  ARRAY_SIZE(__power_well_descs))
+
+/**
+ * intel_power_domains_init - initializes the power domain structures
+ * @dev_priv: i915 device instance
+ *
+ * Initializes the power domain structures for @dev_priv depending upon the
+ * supported platform.
+ */
+int intel_power_domains_init(struct drm_i915_private *dev_priv)
+{
+	struct i915_power_domains *power_domains = &dev_priv->power_domains;
+	int err;
+
+	i915_modparams.disable_power_well =
+		sanitize_disable_power_well_option(dev_priv,
+						   i915_modparams.disable_power_well);
+	dev_priv->csr.allowed_dc_mask =
+		get_allowed_dc_mask(dev_priv, i915_modparams.enable_dc);
+
+	BUILD_BUG_ON(POWER_DOMAIN_NUM > 64);
+
+	mutex_init(&power_domains->lock);
+
+	INIT_DELAYED_WORK(&power_domains->async_put_work,
+			  intel_display_power_put_async_work);
+
+	/*
+	 * The enabling order will be from lower to higher indexed wells,
+	 * the disabling order is reversed.
+	 */
+	if (IS_GEN(dev_priv, 11)) {
+		err = set_power_wells(power_domains, icl_power_wells);
+	} else if (IS_CANNONLAKE(dev_priv)) {
+		err = set_power_wells(power_domains, cnl_power_wells);
+
+		/*
+		 * DDI and Aux IO are getting enabled for all ports
+		 * regardless the presence or use. So, in order to avoid
+		 * timeouts, lets remove them from the list
+		 * for the SKUs without port F.
+		 */
+		if (!IS_CNL_WITH_PORT_F(dev_priv))
+			power_domains->power_well_count -= 2;
+	} else if (IS_GEMINILAKE(dev_priv)) {
+		err = set_power_wells(power_domains, glk_power_wells);
+	} else if (IS_BROXTON(dev_priv)) {
+		err = set_power_wells(power_domains, bxt_power_wells);
+	} else if (IS_GEN9_BC(dev_priv)) {
+		err = set_power_wells(power_domains, skl_power_wells);
+	} else if (IS_CHERRYVIEW(dev_priv)) {
+		err = set_power_wells(power_domains, chv_power_wells);
+	} else if (IS_BROADWELL(dev_priv)) {
+		err = set_power_wells(power_domains, bdw_power_wells);
+	} else if (IS_HASWELL(dev_priv)) {
+		err = set_power_wells(power_domains, hsw_power_wells);
+	} else if (IS_VALLEYVIEW(dev_priv)) {
+		err = set_power_wells(power_domains, vlv_power_wells);
+	} else if (IS_I830(dev_priv)) {
+		err = set_power_wells(power_domains, i830_power_wells);
+	} else {
+		err = set_power_wells(power_domains, i9xx_always_on_power_well);
+	}
+
+	return err;
+}
+
+/**
+ * intel_power_domains_cleanup - clean up power domains resources
+ * @dev_priv: i915 device instance
+ *
+ * Release any resources acquired by intel_power_domains_init()
+ */
+void intel_power_domains_cleanup(struct drm_i915_private *dev_priv)
+{
+	kfree(dev_priv->power_domains.power_wells);
+}
+
+static void intel_power_domains_sync_hw(struct drm_i915_private *dev_priv)
+{
+	struct i915_power_domains *power_domains = &dev_priv->power_domains;
+	struct i915_power_well *power_well;
+
+	mutex_lock(&power_domains->lock);
+	for_each_power_well(dev_priv, power_well) {
+		power_well->desc->ops->sync_hw(dev_priv, power_well);
+		power_well->hw_enabled =
+			power_well->desc->ops->is_enabled(dev_priv, power_well);
+	}
+	mutex_unlock(&power_domains->lock);
+}
+
+static inline
+bool intel_dbuf_slice_set(struct drm_i915_private *dev_priv,
+			  i915_reg_t reg, bool enable)
+{
+	u32 val, status;
+
+	val = I915_READ(reg);
+	val = enable ? (val | DBUF_POWER_REQUEST) : (val & ~DBUF_POWER_REQUEST);
+	I915_WRITE(reg, val);
+	POSTING_READ(reg);
+	udelay(10);
+
+	status = I915_READ(reg) & DBUF_POWER_STATE;
+	if ((enable && !status) || (!enable && status)) {
+		DRM_ERROR("DBus power %s timeout!\n",
+			  enable ? "enable" : "disable");
+		return false;
+	}
+	return true;
+}
+
+static void gen9_dbuf_enable(struct drm_i915_private *dev_priv)
+{
+	intel_dbuf_slice_set(dev_priv, DBUF_CTL, true);
+}
+
+static void gen9_dbuf_disable(struct drm_i915_private *dev_priv)
+{
+	intel_dbuf_slice_set(dev_priv, DBUF_CTL, false);
+}
+
+static u8 intel_dbuf_max_slices(struct drm_i915_private *dev_priv)
+{
+	if (INTEL_GEN(dev_priv) < 11)
+		return 1;
+	return 2;
+}
+
+void icl_dbuf_slices_update(struct drm_i915_private *dev_priv,
+			    u8 req_slices)
+{
+	const u8 hw_enabled_slices = dev_priv->wm.skl_hw.ddb.enabled_slices;
+	bool ret;
+
+	if (req_slices > intel_dbuf_max_slices(dev_priv)) {
+		DRM_ERROR("Invalid number of dbuf slices requested\n");
+		return;
+	}
+
+	if (req_slices == hw_enabled_slices || req_slices == 0)
+		return;
+
+	if (req_slices > hw_enabled_slices)
+		ret = intel_dbuf_slice_set(dev_priv, DBUF_CTL_S2, true);
+	else
+		ret = intel_dbuf_slice_set(dev_priv, DBUF_CTL_S2, false);
+
+	if (ret)
+		dev_priv->wm.skl_hw.ddb.enabled_slices = req_slices;
+}
+
+static void icl_dbuf_enable(struct drm_i915_private *dev_priv)
+{
+	I915_WRITE(DBUF_CTL_S1, I915_READ(DBUF_CTL_S1) | DBUF_POWER_REQUEST);
+	I915_WRITE(DBUF_CTL_S2, I915_READ(DBUF_CTL_S2) | DBUF_POWER_REQUEST);
+	POSTING_READ(DBUF_CTL_S2);
+
+	udelay(10);
+
+	if (!(I915_READ(DBUF_CTL_S1) & DBUF_POWER_STATE) ||
+	    !(I915_READ(DBUF_CTL_S2) & DBUF_POWER_STATE))
+		DRM_ERROR("DBuf power enable timeout\n");
+	else
+		/*
+		 * FIXME: for now pretend that we only have 1 slice, see
+		 * intel_enabled_dbuf_slices_num().
+		 */
+		dev_priv->wm.skl_hw.ddb.enabled_slices = 1;
+}
+
+static void icl_dbuf_disable(struct drm_i915_private *dev_priv)
+{
+	I915_WRITE(DBUF_CTL_S1, I915_READ(DBUF_CTL_S1) & ~DBUF_POWER_REQUEST);
+	I915_WRITE(DBUF_CTL_S2, I915_READ(DBUF_CTL_S2) & ~DBUF_POWER_REQUEST);
+	POSTING_READ(DBUF_CTL_S2);
+
+	udelay(10);
+
+	if ((I915_READ(DBUF_CTL_S1) & DBUF_POWER_STATE) ||
+	    (I915_READ(DBUF_CTL_S2) & DBUF_POWER_STATE))
+		DRM_ERROR("DBuf power disable timeout!\n");
+	else
+		/*
+		 * FIXME: for now pretend that the first slice is always
+		 * enabled, see intel_enabled_dbuf_slices_num().
+		 */
+		dev_priv->wm.skl_hw.ddb.enabled_slices = 1;
+}
+
+static void icl_mbus_init(struct drm_i915_private *dev_priv)
+{
+	u32 val;
+
+	val = MBUS_ABOX_BT_CREDIT_POOL1(16) |
+	      MBUS_ABOX_BT_CREDIT_POOL2(16) |
+	      MBUS_ABOX_B_CREDIT(1) |
+	      MBUS_ABOX_BW_CREDIT(1);
+
+	I915_WRITE(MBUS_ABOX_CTL, val);
+}
+
+static void hsw_assert_cdclk(struct drm_i915_private *dev_priv)
+{
+	u32 val = I915_READ(LCPLL_CTL);
+
+	/*
+	 * The LCPLL register should be turned on by the BIOS. For now
+	 * let's just check its state and print errors in case
+	 * something is wrong.  Don't even try to turn it on.
+	 */
+
+	if (val & LCPLL_CD_SOURCE_FCLK)
+		DRM_ERROR("CDCLK source is not LCPLL\n");
+
+	if (val & LCPLL_PLL_DISABLE)
+		DRM_ERROR("LCPLL is disabled\n");
+
+	if ((val & LCPLL_REF_MASK) != LCPLL_REF_NON_SSC)
+		DRM_ERROR("LCPLL not using non-SSC reference\n");
+}
+
+static void assert_can_disable_lcpll(struct drm_i915_private *dev_priv)
+{
+	struct drm_device *dev = &dev_priv->drm;
+	struct intel_crtc *crtc;
+
+	for_each_intel_crtc(dev, crtc)
+		I915_STATE_WARN(crtc->active, "CRTC for pipe %c enabled\n",
+				pipe_name(crtc->pipe));
+
+	I915_STATE_WARN(I915_READ(HSW_PWR_WELL_CTL2),
+			"Display power well on\n");
+	I915_STATE_WARN(I915_READ(SPLL_CTL) & SPLL_PLL_ENABLE,
+			"SPLL enabled\n");
+	I915_STATE_WARN(I915_READ(WRPLL_CTL(0)) & WRPLL_PLL_ENABLE,
+			"WRPLL1 enabled\n");
+	I915_STATE_WARN(I915_READ(WRPLL_CTL(1)) & WRPLL_PLL_ENABLE,
+			"WRPLL2 enabled\n");
+	I915_STATE_WARN(I915_READ(PP_STATUS(0)) & PP_ON,
+			"Panel power on\n");
+	I915_STATE_WARN(I915_READ(BLC_PWM_CPU_CTL2) & BLM_PWM_ENABLE,
+			"CPU PWM1 enabled\n");
+	if (IS_HASWELL(dev_priv))
+		I915_STATE_WARN(I915_READ(HSW_BLC_PWM2_CTL) & BLM_PWM_ENABLE,
+				"CPU PWM2 enabled\n");
+	I915_STATE_WARN(I915_READ(BLC_PWM_PCH_CTL1) & BLM_PCH_PWM_ENABLE,
+			"PCH PWM1 enabled\n");
+	I915_STATE_WARN(I915_READ(UTIL_PIN_CTL) & UTIL_PIN_ENABLE,
+			"Utility pin enabled\n");
+	I915_STATE_WARN(I915_READ(PCH_GTC_CTL) & PCH_GTC_ENABLE,
+			"PCH GTC enabled\n");
+
+	/*
+	 * In theory we can still leave IRQs enabled, as long as only the HPD
+	 * interrupts remain enabled. We used to check for that, but since it's
+	 * gen-specific and since we only disable LCPLL after we fully disable
+	 * the interrupts, the check below should be enough.
+	 */
+	I915_STATE_WARN(intel_irqs_enabled(dev_priv), "IRQs enabled\n");
+}
+
+static u32 hsw_read_dcomp(struct drm_i915_private *dev_priv)
+{
+	if (IS_HASWELL(dev_priv))
+		return I915_READ(D_COMP_HSW);
+	else
+		return I915_READ(D_COMP_BDW);
+}
+
+static void hsw_write_dcomp(struct drm_i915_private *dev_priv, u32 val)
+{
+	if (IS_HASWELL(dev_priv)) {
+		if (sandybridge_pcode_write(dev_priv,
+					    GEN6_PCODE_WRITE_D_COMP, val))
+			DRM_DEBUG_KMS("Failed to write to D_COMP\n");
+	} else {
+		I915_WRITE(D_COMP_BDW, val);
+		POSTING_READ(D_COMP_BDW);
+	}
+}
+
+/*
+ * This function implements pieces of two sequences from BSpec:
+ * - Sequence for display software to disable LCPLL
+ * - Sequence for display software to allow package C8+
+ * The steps implemented here are just the steps that actually touch the LCPLL
+ * register. Callers should take care of disabling all the display engine
+ * functions, doing the mode unset, fixing interrupts, etc.
+ */
+static void hsw_disable_lcpll(struct drm_i915_private *dev_priv,
+			      bool switch_to_fclk, bool allow_power_down)
+{
+	u32 val;
+
+	assert_can_disable_lcpll(dev_priv);
+
+	val = I915_READ(LCPLL_CTL);
+
+	if (switch_to_fclk) {
+		val |= LCPLL_CD_SOURCE_FCLK;
+		I915_WRITE(LCPLL_CTL, val);
+
+		if (wait_for_us(I915_READ(LCPLL_CTL) &
+				LCPLL_CD_SOURCE_FCLK_DONE, 1))
+			DRM_ERROR("Switching to FCLK failed\n");
+
+		val = I915_READ(LCPLL_CTL);
+	}
+
+	val |= LCPLL_PLL_DISABLE;
+	I915_WRITE(LCPLL_CTL, val);
+	POSTING_READ(LCPLL_CTL);
+
+	if (intel_wait_for_register(&dev_priv->uncore, LCPLL_CTL,
+				    LCPLL_PLL_LOCK, 0, 1))
+		DRM_ERROR("LCPLL still locked\n");
+
+	val = hsw_read_dcomp(dev_priv);
+	val |= D_COMP_COMP_DISABLE;
+	hsw_write_dcomp(dev_priv, val);
+	ndelay(100);
+
+	if (wait_for((hsw_read_dcomp(dev_priv) &
+		      D_COMP_RCOMP_IN_PROGRESS) == 0, 1))
+		DRM_ERROR("D_COMP RCOMP still in progress\n");
+
+	if (allow_power_down) {
+		val = I915_READ(LCPLL_CTL);
+		val |= LCPLL_POWER_DOWN_ALLOW;
+		I915_WRITE(LCPLL_CTL, val);
+		POSTING_READ(LCPLL_CTL);
+	}
+}
+
+/*
+ * Fully restores LCPLL, disallowing power down and switching back to LCPLL
+ * source.
+ */
+static void hsw_restore_lcpll(struct drm_i915_private *dev_priv)
+{
+	u32 val;
+
+	val = I915_READ(LCPLL_CTL);
+
+	if ((val & (LCPLL_PLL_LOCK | LCPLL_PLL_DISABLE | LCPLL_CD_SOURCE_FCLK |
+		    LCPLL_POWER_DOWN_ALLOW)) == LCPLL_PLL_LOCK)
+		return;
+
+	/*
+	 * Make sure we're not on PC8 state before disabling PC8, otherwise
+	 * we'll hang the machine. To prevent PC8 state, just enable force_wake.
+	 */
+	intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL);
+
+	if (val & LCPLL_POWER_DOWN_ALLOW) {
+		val &= ~LCPLL_POWER_DOWN_ALLOW;
+		I915_WRITE(LCPLL_CTL, val);
+		POSTING_READ(LCPLL_CTL);
+	}
+
+	val = hsw_read_dcomp(dev_priv);
+	val |= D_COMP_COMP_FORCE;
+	val &= ~D_COMP_COMP_DISABLE;
+	hsw_write_dcomp(dev_priv, val);
+
+	val = I915_READ(LCPLL_CTL);
+	val &= ~LCPLL_PLL_DISABLE;
+	I915_WRITE(LCPLL_CTL, val);
+
+	if (intel_wait_for_register(&dev_priv->uncore, LCPLL_CTL,
+				    LCPLL_PLL_LOCK, LCPLL_PLL_LOCK, 5))
+		DRM_ERROR("LCPLL not locked yet\n");
+
+	if (val & LCPLL_CD_SOURCE_FCLK) {
+		val = I915_READ(LCPLL_CTL);
+		val &= ~LCPLL_CD_SOURCE_FCLK;
+		I915_WRITE(LCPLL_CTL, val);
+
+		if (wait_for_us((I915_READ(LCPLL_CTL) &
+				 LCPLL_CD_SOURCE_FCLK_DONE) == 0, 1))
+			DRM_ERROR("Switching back to LCPLL failed\n");
+	}
+
+	intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
+
+	intel_update_cdclk(dev_priv);
+	intel_dump_cdclk_state(&dev_priv->cdclk.hw, "Current CDCLK");
+}
+
+/*
+ * Package states C8 and deeper are really deep PC states that can only be
+ * reached when all the devices on the system allow it, so even if the graphics
+ * device allows PC8+, it doesn't mean the system will actually get to these
+ * states. Our driver only allows PC8+ when going into runtime PM.
+ *
+ * The requirements for PC8+ are that all the outputs are disabled, the power
+ * well is disabled and most interrupts are disabled, and these are also
+ * requirements for runtime PM. When these conditions are met, we manually do
+ * the other conditions: disable the interrupts, clocks and switch LCPLL refclk
+ * to Fclk. If we're in PC8+ and we get an non-hotplug interrupt, we can hard
+ * hang the machine.
+ *
+ * When we really reach PC8 or deeper states (not just when we allow it) we lose
+ * the state of some registers, so when we come back from PC8+ we need to
+ * restore this state. We don't get into PC8+ if we're not in RC6, so we don't
+ * need to take care of the registers kept by RC6. Notice that this happens even
+ * if we don't put the device in PCI D3 state (which is what currently happens
+ * because of the runtime PM support).
+ *
+ * For more, read "Display Sequences for Package C8" on the hardware
+ * documentation.
+ */
+void hsw_enable_pc8(struct drm_i915_private *dev_priv)
+{
+	u32 val;
+
+	DRM_DEBUG_KMS("Enabling package C8+\n");
+
+	if (HAS_PCH_LPT_LP(dev_priv)) {
+		val = I915_READ(SOUTH_DSPCLK_GATE_D);
+		val &= ~PCH_LP_PARTITION_LEVEL_DISABLE;
+		I915_WRITE(SOUTH_DSPCLK_GATE_D, val);
+	}
+
+	lpt_disable_clkout_dp(dev_priv);
+	hsw_disable_lcpll(dev_priv, true, true);
+}
+
+void hsw_disable_pc8(struct drm_i915_private *dev_priv)
+{
+	u32 val;
+
+	DRM_DEBUG_KMS("Disabling package C8+\n");
+
+	hsw_restore_lcpll(dev_priv);
+	intel_init_pch_refclk(dev_priv);
+
+	if (HAS_PCH_LPT_LP(dev_priv)) {
+		val = I915_READ(SOUTH_DSPCLK_GATE_D);
+		val |= PCH_LP_PARTITION_LEVEL_DISABLE;
+		I915_WRITE(SOUTH_DSPCLK_GATE_D, val);
+	}
+}
+
+static void intel_pch_reset_handshake(struct drm_i915_private *dev_priv,
+				      bool enable)
+{
+	i915_reg_t reg;
+	u32 reset_bits, val;
+
+	if (IS_IVYBRIDGE(dev_priv)) {
+		reg = GEN7_MSG_CTL;
+		reset_bits = WAIT_FOR_PCH_FLR_ACK | WAIT_FOR_PCH_RESET_ACK;
+	} else {
+		reg = HSW_NDE_RSTWRN_OPT;
+		reset_bits = RESET_PCH_HANDSHAKE_ENABLE;
+	}
+
+	val = I915_READ(reg);
+
+	if (enable)
+		val |= reset_bits;
+	else
+		val &= ~reset_bits;
+
+	I915_WRITE(reg, val);
+}
+
+static void skl_display_core_init(struct drm_i915_private *dev_priv,
+				  bool resume)
+{
+	struct i915_power_domains *power_domains = &dev_priv->power_domains;
+	struct i915_power_well *well;
+
+	gen9_set_dc_state(dev_priv, DC_STATE_DISABLE);
+
+	/* enable PCH reset handshake */
+	intel_pch_reset_handshake(dev_priv, !HAS_PCH_NOP(dev_priv));
+
+	/* enable PG1 and Misc I/O */
+	mutex_lock(&power_domains->lock);
+
+	well = lookup_power_well(dev_priv, SKL_DISP_PW_1);
+	intel_power_well_enable(dev_priv, well);
+
+	well = lookup_power_well(dev_priv, SKL_DISP_PW_MISC_IO);
+	intel_power_well_enable(dev_priv, well);
+
+	mutex_unlock(&power_domains->lock);
+
+	intel_cdclk_init(dev_priv);
+
+	gen9_dbuf_enable(dev_priv);
+
+	if (resume && dev_priv->csr.dmc_payload)
+		intel_csr_load_program(dev_priv);
+}
+
+static void skl_display_core_uninit(struct drm_i915_private *dev_priv)
+{
+	struct i915_power_domains *power_domains = &dev_priv->power_domains;
+	struct i915_power_well *well;
+
+	gen9_set_dc_state(dev_priv, DC_STATE_DISABLE);
+
+	gen9_dbuf_disable(dev_priv);
+
+	intel_cdclk_uninit(dev_priv);
+
+	/* The spec doesn't call for removing the reset handshake flag */
+	/* disable PG1 and Misc I/O */
+
+	mutex_lock(&power_domains->lock);
+
+	/*
+	 * BSpec says to keep the MISC IO power well enabled here, only
+	 * remove our request for power well 1.
+	 * Note that even though the driver's request is removed power well 1
+	 * may stay enabled after this due to DMC's own request on it.
+	 */
+	well = lookup_power_well(dev_priv, SKL_DISP_PW_1);
+	intel_power_well_disable(dev_priv, well);
+
+	mutex_unlock(&power_domains->lock);
+
+	usleep_range(10, 30);		/* 10 us delay per Bspec */
+}
+
+void bxt_display_core_init(struct drm_i915_private *dev_priv,
+			   bool resume)
+{
+	struct i915_power_domains *power_domains = &dev_priv->power_domains;
+	struct i915_power_well *well;
+
+	gen9_set_dc_state(dev_priv, DC_STATE_DISABLE);
+
+	/*
+	 * NDE_RSTWRN_OPT RST PCH Handshake En must always be 0b on BXT
+	 * or else the reset will hang because there is no PCH to respond.
+	 * Move the handshake programming to initialization sequence.
+	 * Previously was left up to BIOS.
+	 */
+	intel_pch_reset_handshake(dev_priv, false);
+
+	/* Enable PG1 */
+	mutex_lock(&power_domains->lock);
+
+	well = lookup_power_well(dev_priv, SKL_DISP_PW_1);
+	intel_power_well_enable(dev_priv, well);
+
+	mutex_unlock(&power_domains->lock);
+
+	intel_cdclk_init(dev_priv);
+
+	gen9_dbuf_enable(dev_priv);
+
+	if (resume && dev_priv->csr.dmc_payload)
+		intel_csr_load_program(dev_priv);
+}
+
+void bxt_display_core_uninit(struct drm_i915_private *dev_priv)
+{
+	struct i915_power_domains *power_domains = &dev_priv->power_domains;
+	struct i915_power_well *well;
+
+	gen9_set_dc_state(dev_priv, DC_STATE_DISABLE);
+
+	gen9_dbuf_disable(dev_priv);
+
+	intel_cdclk_uninit(dev_priv);
+
+	/* The spec doesn't call for removing the reset handshake flag */
+
+	/*
+	 * Disable PW1 (PG1).
+	 * Note that even though the driver's request is removed power well 1
+	 * may stay enabled after this due to DMC's own request on it.
+	 */
+	mutex_lock(&power_domains->lock);
+
+	well = lookup_power_well(dev_priv, SKL_DISP_PW_1);
+	intel_power_well_disable(dev_priv, well);
+
+	mutex_unlock(&power_domains->lock);
+
+	usleep_range(10, 30);		/* 10 us delay per Bspec */
+}
+
+static void cnl_display_core_init(struct drm_i915_private *dev_priv, bool resume)
+{
+	struct i915_power_domains *power_domains = &dev_priv->power_domains;
+	struct i915_power_well *well;
+
+	gen9_set_dc_state(dev_priv, DC_STATE_DISABLE);
+
+	/* 1. Enable PCH Reset Handshake */
+	intel_pch_reset_handshake(dev_priv, !HAS_PCH_NOP(dev_priv));
+
+	/* 2-3. */
+	intel_combo_phy_init(dev_priv);
+
+	/*
+	 * 4. Enable Power Well 1 (PG1).
+	 *    The AUX IO power wells will be enabled on demand.
+	 */
+	mutex_lock(&power_domains->lock);
+	well = lookup_power_well(dev_priv, SKL_DISP_PW_1);
+	intel_power_well_enable(dev_priv, well);
+	mutex_unlock(&power_domains->lock);
+
+	/* 5. Enable CD clock */
+	intel_cdclk_init(dev_priv);
+
+	/* 6. Enable DBUF */
+	gen9_dbuf_enable(dev_priv);
+
+	if (resume && dev_priv->csr.dmc_payload)
+		intel_csr_load_program(dev_priv);
+}
+
+static void cnl_display_core_uninit(struct drm_i915_private *dev_priv)
+{
+	struct i915_power_domains *power_domains = &dev_priv->power_domains;
+	struct i915_power_well *well;
+
+	gen9_set_dc_state(dev_priv, DC_STATE_DISABLE);
+
+	/* 1. Disable all display engine functions -> aready done */
+
+	/* 2. Disable DBUF */
+	gen9_dbuf_disable(dev_priv);
+
+	/* 3. Disable CD clock */
+	intel_cdclk_uninit(dev_priv);
+
+	/*
+	 * 4. Disable Power Well 1 (PG1).
+	 *    The AUX IO power wells are toggled on demand, so they are already
+	 *    disabled at this point.
+	 */
+	mutex_lock(&power_domains->lock);
+	well = lookup_power_well(dev_priv, SKL_DISP_PW_1);
+	intel_power_well_disable(dev_priv, well);
+	mutex_unlock(&power_domains->lock);
+
+	usleep_range(10, 30);		/* 10 us delay per Bspec */
+
+	/* 5. */
+	intel_combo_phy_uninit(dev_priv);
+}
+
+void icl_display_core_init(struct drm_i915_private *dev_priv,
+			   bool resume)
+{
+	struct i915_power_domains *power_domains = &dev_priv->power_domains;
+	struct i915_power_well *well;
+
+	gen9_set_dc_state(dev_priv, DC_STATE_DISABLE);
+
+	/* 1. Enable PCH reset handshake. */
+	intel_pch_reset_handshake(dev_priv, !HAS_PCH_NOP(dev_priv));
+
+	/* 2. Initialize all combo phys */
+	intel_combo_phy_init(dev_priv);
+
+	/*
+	 * 3. Enable Power Well 1 (PG1).
+	 *    The AUX IO power wells will be enabled on demand.
+	 */
+	mutex_lock(&power_domains->lock);
+	well = lookup_power_well(dev_priv, SKL_DISP_PW_1);
+	intel_power_well_enable(dev_priv, well);
+	mutex_unlock(&power_domains->lock);
+
+	/* 4. Enable CDCLK. */
+	intel_cdclk_init(dev_priv);
+
+	/* 5. Enable DBUF. */
+	icl_dbuf_enable(dev_priv);
+
+	/* 6. Setup MBUS. */
+	icl_mbus_init(dev_priv);
+
+	if (resume && dev_priv->csr.dmc_payload)
+		intel_csr_load_program(dev_priv);
+}
+
+void icl_display_core_uninit(struct drm_i915_private *dev_priv)
+{
+	struct i915_power_domains *power_domains = &dev_priv->power_domains;
+	struct i915_power_well *well;
+
+	gen9_set_dc_state(dev_priv, DC_STATE_DISABLE);
+
+	/* 1. Disable all display engine functions -> aready done */
+
+	/* 2. Disable DBUF */
+	icl_dbuf_disable(dev_priv);
+
+	/* 3. Disable CD clock */
+	intel_cdclk_uninit(dev_priv);
+
+	/*
+	 * 4. Disable Power Well 1 (PG1).
+	 *    The AUX IO power wells are toggled on demand, so they are already
+	 *    disabled at this point.
+	 */
+	mutex_lock(&power_domains->lock);
+	well = lookup_power_well(dev_priv, SKL_DISP_PW_1);
+	intel_power_well_disable(dev_priv, well);
+	mutex_unlock(&power_domains->lock);
+
+	/* 5. */
+	intel_combo_phy_uninit(dev_priv);
+}
+
+static void chv_phy_control_init(struct drm_i915_private *dev_priv)
+{
+	struct i915_power_well *cmn_bc =
+		lookup_power_well(dev_priv, VLV_DISP_PW_DPIO_CMN_BC);
+	struct i915_power_well *cmn_d =
+		lookup_power_well(dev_priv, CHV_DISP_PW_DPIO_CMN_D);
+
+	/*
+	 * DISPLAY_PHY_CONTROL can get corrupted if read. As a
+	 * workaround never ever read DISPLAY_PHY_CONTROL, and
+	 * instead maintain a shadow copy ourselves. Use the actual
+	 * power well state and lane status to reconstruct the
+	 * expected initial value.
+	 */
+	dev_priv->chv_phy_control =
+		PHY_LDO_SEQ_DELAY(PHY_LDO_DELAY_600NS, DPIO_PHY0) |
+		PHY_LDO_SEQ_DELAY(PHY_LDO_DELAY_600NS, DPIO_PHY1) |
+		PHY_CH_POWER_MODE(PHY_CH_DEEP_PSR, DPIO_PHY0, DPIO_CH0) |
+		PHY_CH_POWER_MODE(PHY_CH_DEEP_PSR, DPIO_PHY0, DPIO_CH1) |
+		PHY_CH_POWER_MODE(PHY_CH_DEEP_PSR, DPIO_PHY1, DPIO_CH0);
+
+	/*
+	 * If all lanes are disabled we leave the override disabled
+	 * with all power down bits cleared to match the state we
+	 * would use after disabling the port. Otherwise enable the
+	 * override and set the lane powerdown bits accding to the
+	 * current lane status.
+	 */
+	if (cmn_bc->desc->ops->is_enabled(dev_priv, cmn_bc)) {
+		u32 status = I915_READ(DPLL(PIPE_A));
+		unsigned int mask;
+
+		mask = status & DPLL_PORTB_READY_MASK;
+		if (mask == 0xf)
+			mask = 0x0;
+		else
+			dev_priv->chv_phy_control |=
+				PHY_CH_POWER_DOWN_OVRD_EN(DPIO_PHY0, DPIO_CH0);
+
+		dev_priv->chv_phy_control |=
+			PHY_CH_POWER_DOWN_OVRD(mask, DPIO_PHY0, DPIO_CH0);
+
+		mask = (status & DPLL_PORTC_READY_MASK) >> 4;
+		if (mask == 0xf)
+			mask = 0x0;
+		else
+			dev_priv->chv_phy_control |=
+				PHY_CH_POWER_DOWN_OVRD_EN(DPIO_PHY0, DPIO_CH1);
+
+		dev_priv->chv_phy_control |=
+			PHY_CH_POWER_DOWN_OVRD(mask, DPIO_PHY0, DPIO_CH1);
+
+		dev_priv->chv_phy_control |= PHY_COM_LANE_RESET_DEASSERT(DPIO_PHY0);
+
+		dev_priv->chv_phy_assert[DPIO_PHY0] = false;
+	} else {
+		dev_priv->chv_phy_assert[DPIO_PHY0] = true;
+	}
+
+	if (cmn_d->desc->ops->is_enabled(dev_priv, cmn_d)) {
+		u32 status = I915_READ(DPIO_PHY_STATUS);
+		unsigned int mask;
+
+		mask = status & DPLL_PORTD_READY_MASK;
+
+		if (mask == 0xf)
+			mask = 0x0;
+		else
+			dev_priv->chv_phy_control |=
+				PHY_CH_POWER_DOWN_OVRD_EN(DPIO_PHY1, DPIO_CH0);
+
+		dev_priv->chv_phy_control |=
+			PHY_CH_POWER_DOWN_OVRD(mask, DPIO_PHY1, DPIO_CH0);
+
+		dev_priv->chv_phy_control |= PHY_COM_LANE_RESET_DEASSERT(DPIO_PHY1);
+
+		dev_priv->chv_phy_assert[DPIO_PHY1] = false;
+	} else {
+		dev_priv->chv_phy_assert[DPIO_PHY1] = true;
+	}
+
+	I915_WRITE(DISPLAY_PHY_CONTROL, dev_priv->chv_phy_control);
+
+	DRM_DEBUG_KMS("Initial PHY_CONTROL=0x%08x\n",
+		      dev_priv->chv_phy_control);
+}
+
+static void vlv_cmnlane_wa(struct drm_i915_private *dev_priv)
+{
+	struct i915_power_well *cmn =
+		lookup_power_well(dev_priv, VLV_DISP_PW_DPIO_CMN_BC);
+	struct i915_power_well *disp2d =
+		lookup_power_well(dev_priv, VLV_DISP_PW_DISP2D);
+
+	/* If the display might be already active skip this */
+	if (cmn->desc->ops->is_enabled(dev_priv, cmn) &&
+	    disp2d->desc->ops->is_enabled(dev_priv, disp2d) &&
+	    I915_READ(DPIO_CTL) & DPIO_CMNRST)
+		return;
+
+	DRM_DEBUG_KMS("toggling display PHY side reset\n");
+
+	/* cmnlane needs DPLL registers */
+	disp2d->desc->ops->enable(dev_priv, disp2d);
+
+	/*
+	 * From VLV2A0_DP_eDP_HDMI_DPIO_driver_vbios_notes_11.docx:
+	 * Need to assert and de-assert PHY SB reset by gating the
+	 * common lane power, then un-gating it.
+	 * Simply ungating isn't enough to reset the PHY enough to get
+	 * ports and lanes running.
+	 */
+	cmn->desc->ops->disable(dev_priv, cmn);
+}
+
+static bool vlv_punit_is_power_gated(struct drm_i915_private *dev_priv, u32 reg0)
+{
+	bool ret;
+
+	vlv_punit_get(dev_priv);
+	ret = (vlv_punit_read(dev_priv, reg0) & SSPM0_SSC_MASK) == SSPM0_SSC_PWR_GATE;
+	vlv_punit_put(dev_priv);
+
+	return ret;
+}
+
+static void assert_ved_power_gated(struct drm_i915_private *dev_priv)
+{
+	WARN(!vlv_punit_is_power_gated(dev_priv, PUNIT_REG_VEDSSPM0),
+	     "VED not power gated\n");
+}
+
+static void assert_isp_power_gated(struct drm_i915_private *dev_priv)
+{
+	static const struct pci_device_id isp_ids[] = {
+		{PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x0f38)},
+		{PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x22b8)},
+		{}
+	};
+
+	WARN(!pci_dev_present(isp_ids) &&
+	     !vlv_punit_is_power_gated(dev_priv, PUNIT_REG_ISPSSPM0),
+	     "ISP not power gated\n");
+}
+
+static void intel_power_domains_verify_state(struct drm_i915_private *dev_priv);
+
+/**
+ * intel_power_domains_init_hw - initialize hardware power domain state
+ * @i915: i915 device instance
+ * @resume: Called from resume code paths or not
+ *
+ * This function initializes the hardware power domain state and enables all
+ * power wells belonging to the INIT power domain. Power wells in other
+ * domains (and not in the INIT domain) are referenced or disabled by
+ * intel_modeset_readout_hw_state(). After that the reference count of each
+ * power well must match its HW enabled state, see
+ * intel_power_domains_verify_state().
+ *
+ * It will return with power domains disabled (to be enabled later by
+ * intel_power_domains_enable()) and must be paired with
+ * intel_power_domains_fini_hw().
+ */
+void intel_power_domains_init_hw(struct drm_i915_private *i915, bool resume)
+{
+	struct i915_power_domains *power_domains = &i915->power_domains;
+
+	power_domains->initializing = true;
+
+	if (INTEL_GEN(i915) >= 11) {
+		icl_display_core_init(i915, resume);
+	} else if (IS_CANNONLAKE(i915)) {
+		cnl_display_core_init(i915, resume);
+	} else if (IS_GEN9_BC(i915)) {
+		skl_display_core_init(i915, resume);
+	} else if (IS_GEN9_LP(i915)) {
+		bxt_display_core_init(i915, resume);
+	} else if (IS_CHERRYVIEW(i915)) {
+		mutex_lock(&power_domains->lock);
+		chv_phy_control_init(i915);
+		mutex_unlock(&power_domains->lock);
+		assert_isp_power_gated(i915);
+	} else if (IS_VALLEYVIEW(i915)) {
+		mutex_lock(&power_domains->lock);
+		vlv_cmnlane_wa(i915);
+		mutex_unlock(&power_domains->lock);
+		assert_ved_power_gated(i915);
+		assert_isp_power_gated(i915);
+	} else if (IS_BROADWELL(i915) || IS_HASWELL(i915)) {
+		hsw_assert_cdclk(i915);
+		intel_pch_reset_handshake(i915, !HAS_PCH_NOP(i915));
+	} else if (IS_IVYBRIDGE(i915)) {
+		intel_pch_reset_handshake(i915, !HAS_PCH_NOP(i915));
+	}
+
+	/*
+	 * Keep all power wells enabled for any dependent HW access during
+	 * initialization and to make sure we keep BIOS enabled display HW
+	 * resources powered until display HW readout is complete. We drop
+	 * this reference in intel_power_domains_enable().
+	 */
+	power_domains->wakeref =
+		intel_display_power_get(i915, POWER_DOMAIN_INIT);
+
+	/* Disable power support if the user asked so. */
+	if (!i915_modparams.disable_power_well)
+		intel_display_power_get(i915, POWER_DOMAIN_INIT);
+	intel_power_domains_sync_hw(i915);
+
+	power_domains->initializing = false;
+}
+
+/**
+ * intel_power_domains_fini_hw - deinitialize hw power domain state
+ * @i915: i915 device instance
+ *
+ * De-initializes the display power domain HW state. It also ensures that the
+ * device stays powered up so that the driver can be reloaded.
+ *
+ * It must be called with power domains already disabled (after a call to
+ * intel_power_domains_disable()) and must be paired with
+ * intel_power_domains_init_hw().
+ */
+void intel_power_domains_fini_hw(struct drm_i915_private *i915)
+{
+	intel_wakeref_t wakeref __maybe_unused =
+		fetch_and_zero(&i915->power_domains.wakeref);
+
+	/* Remove the refcount we took to keep power well support disabled. */
+	if (!i915_modparams.disable_power_well)
+		intel_display_power_put_unchecked(i915, POWER_DOMAIN_INIT);
+
+	intel_display_power_flush_work_sync(i915);
+
+	intel_power_domains_verify_state(i915);
+
+	/* Keep the power well enabled, but cancel its rpm wakeref. */
+	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
+}
+
+/**
+ * intel_power_domains_enable - enable toggling of display power wells
+ * @i915: i915 device instance
+ *
+ * Enable the ondemand enabling/disabling of the display power wells. Note that
+ * power wells not belonging to POWER_DOMAIN_INIT are allowed to be toggled
+ * only at specific points of the display modeset sequence, thus they are not
+ * affected by the intel_power_domains_enable()/disable() calls. The purpose
+ * of these function is to keep the rest of power wells enabled until the end
+ * of display HW readout (which will acquire the power references reflecting
+ * the current HW state).
+ */
+void intel_power_domains_enable(struct drm_i915_private *i915)
+{
+	intel_wakeref_t wakeref __maybe_unused =
+		fetch_and_zero(&i915->power_domains.wakeref);
+
+	intel_display_power_put(i915, POWER_DOMAIN_INIT, wakeref);
+	intel_power_domains_verify_state(i915);
+}
+
+/**
+ * intel_power_domains_disable - disable toggling of display power wells
+ * @i915: i915 device instance
+ *
+ * Disable the ondemand enabling/disabling of the display power wells. See
+ * intel_power_domains_enable() for which power wells this call controls.
+ */
+void intel_power_domains_disable(struct drm_i915_private *i915)
+{
+	struct i915_power_domains *power_domains = &i915->power_domains;
+
+	WARN_ON(power_domains->wakeref);
+	power_domains->wakeref =
+		intel_display_power_get(i915, POWER_DOMAIN_INIT);
+
+	intel_power_domains_verify_state(i915);
+}
+
+/**
+ * intel_power_domains_suspend - suspend power domain state
+ * @i915: i915 device instance
+ * @suspend_mode: specifies the target suspend state (idle, mem, hibernation)
+ *
+ * This function prepares the hardware power domain state before entering
+ * system suspend.
+ *
+ * It must be called with power domains already disabled (after a call to
+ * intel_power_domains_disable()) and paired with intel_power_domains_resume().
+ */
+void intel_power_domains_suspend(struct drm_i915_private *i915,
+				 enum i915_drm_suspend_mode suspend_mode)
+{
+	struct i915_power_domains *power_domains = &i915->power_domains;
+	intel_wakeref_t wakeref __maybe_unused =
+		fetch_and_zero(&power_domains->wakeref);
+
+	intel_display_power_put(i915, POWER_DOMAIN_INIT, wakeref);
+
+	/*
+	 * In case of suspend-to-idle (aka S0ix) on a DMC platform without DC9
+	 * support don't manually deinit the power domains. This also means the
+	 * CSR/DMC firmware will stay active, it will power down any HW
+	 * resources as required and also enable deeper system power states
+	 * that would be blocked if the firmware was inactive.
+	 */
+	if (!(i915->csr.allowed_dc_mask & DC_STATE_EN_DC9) &&
+	    suspend_mode == I915_DRM_SUSPEND_IDLE &&
+	    i915->csr.dmc_payload) {
+		intel_display_power_flush_work(i915);
+		intel_power_domains_verify_state(i915);
+		return;
+	}
+
+	/*
+	 * Even if power well support was disabled we still want to disable
+	 * power wells if power domains must be deinitialized for suspend.
+	 */
+	if (!i915_modparams.disable_power_well)
+		intel_display_power_put_unchecked(i915, POWER_DOMAIN_INIT);
+
+	intel_display_power_flush_work(i915);
+	intel_power_domains_verify_state(i915);
+
+	if (INTEL_GEN(i915) >= 11)
+		icl_display_core_uninit(i915);
+	else if (IS_CANNONLAKE(i915))
+		cnl_display_core_uninit(i915);
+	else if (IS_GEN9_BC(i915))
+		skl_display_core_uninit(i915);
+	else if (IS_GEN9_LP(i915))
+		bxt_display_core_uninit(i915);
+
+	power_domains->display_core_suspended = true;
+}
+
+/**
+ * intel_power_domains_resume - resume power domain state
+ * @i915: i915 device instance
+ *
+ * This function resume the hardware power domain state during system resume.
+ *
+ * It will return with power domain support disabled (to be enabled later by
+ * intel_power_domains_enable()) and must be paired with
+ * intel_power_domains_suspend().
+ */
+void intel_power_domains_resume(struct drm_i915_private *i915)
+{
+	struct i915_power_domains *power_domains = &i915->power_domains;
+
+	if (power_domains->display_core_suspended) {
+		intel_power_domains_init_hw(i915, true);
+		power_domains->display_core_suspended = false;
+	} else {
+		WARN_ON(power_domains->wakeref);
+		power_domains->wakeref =
+			intel_display_power_get(i915, POWER_DOMAIN_INIT);
+	}
+
+	intel_power_domains_verify_state(i915);
+}
+
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_RUNTIME_PM)
+
+static void intel_power_domains_dump_info(struct drm_i915_private *i915)
+{
+	struct i915_power_domains *power_domains = &i915->power_domains;
+	struct i915_power_well *power_well;
+
+	for_each_power_well(i915, power_well) {
+		enum intel_display_power_domain domain;
+
+		DRM_DEBUG_DRIVER("%-25s %d\n",
+				 power_well->desc->name, power_well->count);
+
+		for_each_power_domain(domain, power_well->desc->domains)
+			DRM_DEBUG_DRIVER("  %-23s %d\n",
+					 intel_display_power_domain_str(domain),
+					 power_domains->domain_use_count[domain]);
+	}
+}
+
+/**
+ * intel_power_domains_verify_state - verify the HW/SW state for all power wells
+ * @i915: i915 device instance
+ *
+ * Verify if the reference count of each power well matches its HW enabled
+ * state and the total refcount of the domains it belongs to. This must be
+ * called after modeset HW state sanitization, which is responsible for
+ * acquiring reference counts for any power wells in use and disabling the
+ * ones left on by BIOS but not required by any active output.
+ */
+static void intel_power_domains_verify_state(struct drm_i915_private *i915)
+{
+	struct i915_power_domains *power_domains = &i915->power_domains;
+	struct i915_power_well *power_well;
+	bool dump_domain_info;
+
+	mutex_lock(&power_domains->lock);
+
+	verify_async_put_domains_state(power_domains);
+
+	dump_domain_info = false;
+	for_each_power_well(i915, power_well) {
+		enum intel_display_power_domain domain;
+		int domains_count;
+		bool enabled;
+
+		enabled = power_well->desc->ops->is_enabled(i915, power_well);
+		if ((power_well->count || power_well->desc->always_on) !=
+		    enabled)
+			DRM_ERROR("power well %s state mismatch (refcount %d/enabled %d)",
+				  power_well->desc->name,
+				  power_well->count, enabled);
+
+		domains_count = 0;
+		for_each_power_domain(domain, power_well->desc->domains)
+			domains_count += power_domains->domain_use_count[domain];
+
+		if (power_well->count != domains_count) {
+			DRM_ERROR("power well %s refcount/domain refcount mismatch "
+				  "(refcount %d/domains refcount %d)\n",
+				  power_well->desc->name, power_well->count,
+				  domains_count);
+			dump_domain_info = true;
+		}
+	}
+
+	if (dump_domain_info) {
+		static bool dumped;
+
+		if (!dumped) {
+			intel_power_domains_dump_info(i915);
+			dumped = true;
+		}
+	}
+
+	mutex_unlock(&power_domains->lock);
+}
+
+#else
+
+static void intel_power_domains_verify_state(struct drm_i915_private *i915)
+{
+}
+
+#endif
diff --git a/drivers/gpu/drm/i915/display/intel_display_power.h b/drivers/gpu/drm/i915/display/intel_display_power.h
new file mode 100644
index 000000000000..ff57b0a7fe59
--- /dev/null
+++ b/drivers/gpu/drm/i915/display/intel_display_power.h
@@ -0,0 +1,288 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef __INTEL_DISPLAY_POWER_H__
+#define __INTEL_DISPLAY_POWER_H__
+
+#include "intel_display.h"
+#include "intel_runtime_pm.h"
+#include "i915_reg.h"
+
+struct drm_i915_private;
+struct intel_encoder;
+
+enum intel_display_power_domain {
+	POWER_DOMAIN_DISPLAY_CORE,
+	POWER_DOMAIN_PIPE_A,
+	POWER_DOMAIN_PIPE_B,
+	POWER_DOMAIN_PIPE_C,
+	POWER_DOMAIN_PIPE_A_PANEL_FITTER,
+	POWER_DOMAIN_PIPE_B_PANEL_FITTER,
+	POWER_DOMAIN_PIPE_C_PANEL_FITTER,
+	POWER_DOMAIN_TRANSCODER_A,
+	POWER_DOMAIN_TRANSCODER_B,
+	POWER_DOMAIN_TRANSCODER_C,
+	POWER_DOMAIN_TRANSCODER_EDP,
+	POWER_DOMAIN_TRANSCODER_EDP_VDSC,
+	POWER_DOMAIN_TRANSCODER_DSI_A,
+	POWER_DOMAIN_TRANSCODER_DSI_C,
+	POWER_DOMAIN_PORT_DDI_A_LANES,
+	POWER_DOMAIN_PORT_DDI_B_LANES,
+	POWER_DOMAIN_PORT_DDI_C_LANES,
+	POWER_DOMAIN_PORT_DDI_D_LANES,
+	POWER_DOMAIN_PORT_DDI_E_LANES,
+	POWER_DOMAIN_PORT_DDI_F_LANES,
+	POWER_DOMAIN_PORT_DDI_A_IO,
+	POWER_DOMAIN_PORT_DDI_B_IO,
+	POWER_DOMAIN_PORT_DDI_C_IO,
+	POWER_DOMAIN_PORT_DDI_D_IO,
+	POWER_DOMAIN_PORT_DDI_E_IO,
+	POWER_DOMAIN_PORT_DDI_F_IO,
+	POWER_DOMAIN_PORT_DSI,
+	POWER_DOMAIN_PORT_CRT,
+	POWER_DOMAIN_PORT_OTHER,
+	POWER_DOMAIN_VGA,
+	POWER_DOMAIN_AUDIO,
+	POWER_DOMAIN_AUX_A,
+	POWER_DOMAIN_AUX_B,
+	POWER_DOMAIN_AUX_C,
+	POWER_DOMAIN_AUX_D,
+	POWER_DOMAIN_AUX_E,
+	POWER_DOMAIN_AUX_F,
+	POWER_DOMAIN_AUX_IO_A,
+	POWER_DOMAIN_AUX_TBT1,
+	POWER_DOMAIN_AUX_TBT2,
+	POWER_DOMAIN_AUX_TBT3,
+	POWER_DOMAIN_AUX_TBT4,
+	POWER_DOMAIN_GMBUS,
+	POWER_DOMAIN_MODESET,
+	POWER_DOMAIN_GT_IRQ,
+	POWER_DOMAIN_INIT,
+
+	POWER_DOMAIN_NUM,
+};
+
+#define POWER_DOMAIN_PIPE(pipe) ((pipe) + POWER_DOMAIN_PIPE_A)
+#define POWER_DOMAIN_PIPE_PANEL_FITTER(pipe) \
+		((pipe) + POWER_DOMAIN_PIPE_A_PANEL_FITTER)
+#define POWER_DOMAIN_TRANSCODER(tran) \
+	((tran) == TRANSCODER_EDP ? POWER_DOMAIN_TRANSCODER_EDP : \
+	 (tran) + POWER_DOMAIN_TRANSCODER_A)
+
+struct i915_power_well;
+
+struct i915_power_well_ops {
+	/*
+	 * Synchronize the well's hw state to match the current sw state, for
+	 * example enable/disable it based on the current refcount. Called
+	 * during driver init and resume time, possibly after first calling
+	 * the enable/disable handlers.
+	 */
+	void (*sync_hw)(struct drm_i915_private *dev_priv,
+			struct i915_power_well *power_well);
+	/*
+	 * Enable the well and resources that depend on it (for example
+	 * interrupts located on the well). Called after the 0->1 refcount
+	 * transition.
+	 */
+	void (*enable)(struct drm_i915_private *dev_priv,
+		       struct i915_power_well *power_well);
+	/*
+	 * Disable the well and resources that depend on it. Called after
+	 * the 1->0 refcount transition.
+	 */
+	void (*disable)(struct drm_i915_private *dev_priv,
+			struct i915_power_well *power_well);
+	/* Returns the hw enabled state. */
+	bool (*is_enabled)(struct drm_i915_private *dev_priv,
+			   struct i915_power_well *power_well);
+};
+
+struct i915_power_well_regs {
+	i915_reg_t bios;
+	i915_reg_t driver;
+	i915_reg_t kvmr;
+	i915_reg_t debug;
+};
+
+/* Power well structure for haswell */
+struct i915_power_well_desc {
+	const char *name;
+	bool always_on;
+	u64 domains;
+	/* unique identifier for this power well */
+	enum i915_power_well_id id;
+	/*
+	 * Arbitraty data associated with this power well. Platform and power
+	 * well specific.
+	 */
+	union {
+		struct {
+			/*
+			 * request/status flag index in the PUNIT power well
+			 * control/status registers.
+			 */
+			u8 idx;
+		} vlv;
+		struct {
+			enum dpio_phy phy;
+		} bxt;
+		struct {
+			const struct i915_power_well_regs *regs;
+			/*
+			 * request/status flag index in the power well
+			 * constrol/status registers.
+			 */
+			u8 idx;
+			/* Mask of pipes whose IRQ logic is backed by the pw */
+			u8 irq_pipe_mask;
+			/* The pw is backing the VGA functionality */
+			bool has_vga:1;
+			bool has_fuses:1;
+			/*
+			 * The pw is for an ICL+ TypeC PHY port in
+			 * Thunderbolt mode.
+			 */
+			bool is_tc_tbt:1;
+		} hsw;
+	};
+	const struct i915_power_well_ops *ops;
+};
+
+struct i915_power_well {
+	const struct i915_power_well_desc *desc;
+	/* power well enable/disable usage count */
+	int count;
+	/* cached hw enabled state */
+	bool hw_enabled;
+};
+
+struct i915_power_domains {
+	/*
+	 * Power wells needed for initialization at driver init and suspend
+	 * time are on. They are kept on until after the first modeset.
+	 */
+	bool initializing;
+	bool display_core_suspended;
+	int power_well_count;
+
+	intel_wakeref_t wakeref;
+
+	struct mutex lock;
+	int domain_use_count[POWER_DOMAIN_NUM];
+
+	struct delayed_work async_put_work;
+	intel_wakeref_t async_put_wakeref;
+	u64 async_put_domains[2];
+
+	struct i915_power_well *power_wells;
+};
+
+#define for_each_power_domain(domain, mask)				\
+	for ((domain) = 0; (domain) < POWER_DOMAIN_NUM; (domain)++)	\
+		for_each_if(BIT_ULL(domain) & (mask))
+
+#define for_each_power_well(__dev_priv, __power_well)				\
+	for ((__power_well) = (__dev_priv)->power_domains.power_wells;	\
+	     (__power_well) - (__dev_priv)->power_domains.power_wells <	\
+		(__dev_priv)->power_domains.power_well_count;		\
+	     (__power_well)++)
+
+#define for_each_power_well_reverse(__dev_priv, __power_well)			\
+	for ((__power_well) = (__dev_priv)->power_domains.power_wells +		\
+			      (__dev_priv)->power_domains.power_well_count - 1;	\
+	     (__power_well) - (__dev_priv)->power_domains.power_wells >= 0;	\
+	     (__power_well)--)
+
+#define for_each_power_domain_well(__dev_priv, __power_well, __domain_mask)	\
+	for_each_power_well(__dev_priv, __power_well)				\
+		for_each_if((__power_well)->desc->domains & (__domain_mask))
+
+#define for_each_power_domain_well_reverse(__dev_priv, __power_well, __domain_mask) \
+	for_each_power_well_reverse(__dev_priv, __power_well)		        \
+		for_each_if((__power_well)->desc->domains & (__domain_mask))
+
+void skl_enable_dc6(struct drm_i915_private *dev_priv);
+void gen9_sanitize_dc_state(struct drm_i915_private *dev_priv);
+void bxt_enable_dc9(struct drm_i915_private *dev_priv);
+void bxt_disable_dc9(struct drm_i915_private *dev_priv);
+void gen9_enable_dc5(struct drm_i915_private *dev_priv);
+
+int intel_power_domains_init(struct drm_i915_private *dev_priv);
+void intel_power_domains_cleanup(struct drm_i915_private *dev_priv);
+void intel_power_domains_init_hw(struct drm_i915_private *dev_priv, bool resume);
+void intel_power_domains_fini_hw(struct drm_i915_private *dev_priv);
+void icl_display_core_init(struct drm_i915_private *dev_priv, bool resume);
+void icl_display_core_uninit(struct drm_i915_private *dev_priv);
+void intel_power_domains_enable(struct drm_i915_private *dev_priv);
+void intel_power_domains_disable(struct drm_i915_private *dev_priv);
+void intel_power_domains_suspend(struct drm_i915_private *dev_priv,
+				 enum i915_drm_suspend_mode);
+void intel_power_domains_resume(struct drm_i915_private *dev_priv);
+void hsw_enable_pc8(struct drm_i915_private *dev_priv);
+void hsw_disable_pc8(struct drm_i915_private *dev_priv);
+void bxt_display_core_init(struct drm_i915_private *dev_priv, bool resume);
+void bxt_display_core_uninit(struct drm_i915_private *dev_priv);
+
+const char *
+intel_display_power_domain_str(enum intel_display_power_domain domain);
+
+bool intel_display_power_is_enabled(struct drm_i915_private *dev_priv,
+				    enum intel_display_power_domain domain);
+bool __intel_display_power_is_enabled(struct drm_i915_private *dev_priv,
+				      enum intel_display_power_domain domain);
+intel_wakeref_t intel_display_power_get(struct drm_i915_private *dev_priv,
+					enum intel_display_power_domain domain);
+intel_wakeref_t
+intel_display_power_get_if_enabled(struct drm_i915_private *dev_priv,
+				   enum intel_display_power_domain domain);
+void intel_display_power_put_unchecked(struct drm_i915_private *dev_priv,
+				       enum intel_display_power_domain domain);
+void __intel_display_power_put_async(struct drm_i915_private *i915,
+				     enum intel_display_power_domain domain,
+				     intel_wakeref_t wakeref);
+void intel_display_power_flush_work(struct drm_i915_private *i915);
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_RUNTIME_PM)
+void intel_display_power_put(struct drm_i915_private *dev_priv,
+			     enum intel_display_power_domain domain,
+			     intel_wakeref_t wakeref);
+static inline void
+intel_display_power_put_async(struct drm_i915_private *i915,
+			      enum intel_display_power_domain domain,
+			      intel_wakeref_t wakeref)
+{
+	__intel_display_power_put_async(i915, domain, wakeref);
+}
+#else
+static inline void
+intel_display_power_put(struct drm_i915_private *i915,
+			enum intel_display_power_domain domain,
+			intel_wakeref_t wakeref)
+{
+	intel_display_power_put_unchecked(i915, domain);
+}
+
+static inline void
+intel_display_power_put_async(struct drm_i915_private *i915,
+			      enum intel_display_power_domain domain,
+			      intel_wakeref_t wakeref)
+{
+	__intel_display_power_put_async(i915, domain, -1);
+}
+#endif
+
+#define with_intel_display_power(i915, domain, wf) \
+	for ((wf) = intel_display_power_get((i915), (domain)); (wf); \
+	     intel_display_power_put_async((i915), (domain), (wf)), (wf) = 0)
+
+void icl_dbuf_slices_update(struct drm_i915_private *dev_priv,
+			    u8 req_slices);
+
+void chv_phy_powergate_lanes(struct intel_encoder *encoder,
+			     bool override, unsigned int mask);
+bool chv_phy_powergate_ch(struct drm_i915_private *dev_priv, enum dpio_phy phy,
+			  enum dpio_channel ch, bool override);
+
+#endif /* __INTEL_DISPLAY_POWER_H__ */
diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c
index 560274d1c50b..4336df46fe78 100644
--- a/drivers/gpu/drm/i915/intel_dp.c
+++ b/drivers/gpu/drm/i915/display/intel_dp.c
@@ -31,6 +31,7 @@
 #include <linux/reboot.h>
 #include <linux/slab.h>
 #include <linux/types.h>
+
 #include <asm/byteorder.h>
 
 #include <drm/drm_atomic_helper.h>
@@ -41,18 +42,27 @@
 #include <drm/drm_probe_helper.h>
 #include <drm/i915_drm.h>
 
+#include "i915_debugfs.h"
 #include "i915_drv.h"
+#include "intel_atomic.h"
 #include "intel_audio.h"
 #include "intel_connector.h"
 #include "intel_ddi.h"
 #include "intel_dp.h"
+#include "intel_dp_link_training.h"
+#include "intel_dp_mst.h"
+#include "intel_dpio_phy.h"
 #include "intel_drv.h"
+#include "intel_fifo_underrun.h"
 #include "intel_hdcp.h"
 #include "intel_hdmi.h"
+#include "intel_hotplug.h"
 #include "intel_lspcon.h"
 #include "intel_lvds.h"
 #include "intel_panel.h"
 #include "intel_psr.h"
+#include "intel_sideband.h"
+#include "intel_vdsc.h"
 
 #define DP_DPRX_ESI_LEN 14
 
@@ -206,14 +216,17 @@ static int intel_dp_get_fia_supported_lane_count(struct intel_dp *intel_dp)
 	struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp);
 	struct drm_i915_private *dev_priv = to_i915(dig_port->base.base.dev);
 	enum tc_port tc_port = intel_port_to_tc(dev_priv, dig_port->base.port);
+	intel_wakeref_t wakeref;
 	u32 lane_info;
 
 	if (tc_port == PORT_TC_NONE || dig_port->tc_type != TC_PORT_TYPEC)
 		return 4;
 
-	lane_info = (I915_READ(PORT_TX_DFLEXDPSP) &
-		     DP_LANE_ASSIGNMENT_MASK(tc_port)) >>
-		    DP_LANE_ASSIGNMENT_SHIFT(tc_port);
+	lane_info = 0;
+	with_intel_display_power(dev_priv, POWER_DOMAIN_DISPLAY_CORE, wakeref)
+		lane_info = (I915_READ(PORT_TX_DFLEXDPSP) &
+			     DP_LANE_ASSIGNMENT_MASK(tc_port)) >>
+				DP_LANE_ASSIGNMENT_SHIFT(tc_port);
 
 	switch (lane_info) {
 	default:
@@ -319,6 +332,7 @@ static int icl_max_source_rate(struct intel_dp *intel_dp)
 	enum port port = dig_port->base.port;
 
 	if (intel_port_is_combophy(dev_priv, port) &&
+	    !IS_ELKHARTLAKE(dev_priv) &&
 	    !intel_dp_is_edp(intel_dp))
 		return 540000;
 
@@ -1068,13 +1082,13 @@ intel_dp_check_edp(struct intel_dp *intel_dp)
 static u32
 intel_dp_aux_wait_done(struct intel_dp *intel_dp)
 {
-	struct drm_i915_private *dev_priv = dp_to_i915(intel_dp);
+	struct drm_i915_private *i915 = dp_to_i915(intel_dp);
 	i915_reg_t ch_ctl = intel_dp->aux_ch_ctl_reg(intel_dp);
 	u32 status;
 	bool done;
 
-#define C (((status = I915_READ_NOTRACE(ch_ctl)) & DP_AUX_CH_CTL_SEND_BUSY) == 0)
-	done = wait_event_timeout(dev_priv->gmbus_wait_queue, C,
+#define C (((status = intel_uncore_read_notrace(&i915->uncore, ch_ctl)) & DP_AUX_CH_CTL_SEND_BUSY) == 0)
+	done = wait_event_timeout(i915->gmbus_wait_queue, C,
 				  msecs_to_jiffies_timeout(10));
 
 	/* just trace the final value */
@@ -1207,11 +1221,15 @@ intel_dp_aux_xfer(struct intel_dp *intel_dp,
 		  u32 aux_send_ctl_flags)
 {
 	struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp);
-	struct drm_i915_private *dev_priv =
+	struct drm_i915_private *i915 =
 			to_i915(intel_dig_port->base.base.dev);
+	struct intel_uncore *uncore = &i915->uncore;
 	i915_reg_t ch_ctl, ch_data[5];
 	u32 aux_clock_divider;
-	intel_wakeref_t wakeref;
+	enum intel_display_power_domain aux_domain =
+		intel_aux_power_domain(intel_dig_port);
+	intel_wakeref_t aux_wakeref;
+	intel_wakeref_t pps_wakeref;
 	int i, ret, recv_bytes;
 	int try, clock = 0;
 	u32 status;
@@ -1221,7 +1239,8 @@ intel_dp_aux_xfer(struct intel_dp *intel_dp,
 	for (i = 0; i < ARRAY_SIZE(ch_data); i++)
 		ch_data[i] = intel_dp->aux_ch_data_reg(intel_dp, i);
 
-	wakeref = pps_lock(intel_dp);
+	aux_wakeref = intel_display_power_get(i915, aux_domain);
+	pps_wakeref = pps_lock(intel_dp);
 
 	/*
 	 * We will be called with VDD already enabled for dpcd/edid/oui reads.
@@ -1235,13 +1254,13 @@ intel_dp_aux_xfer(struct intel_dp *intel_dp,
 	 * lowest possible wakeup latency and so prevent the cpu from going into
 	 * deep sleep states.
 	 */
-	pm_qos_update_request(&dev_priv->pm_qos, 0);
+	pm_qos_update_request(&i915->pm_qos, 0);
 
 	intel_dp_check_edp(intel_dp);
 
 	/* Try to wait for any previous AUX channel activity */
 	for (try = 0; try < 3; try++) {
-		status = I915_READ_NOTRACE(ch_ctl);
+		status = intel_uncore_read_notrace(uncore, ch_ctl);
 		if ((status & DP_AUX_CH_CTL_SEND_BUSY) == 0)
 			break;
 		msleep(1);
@@ -1251,7 +1270,7 @@ intel_dp_aux_xfer(struct intel_dp *intel_dp,
 
 	if (try == 3) {
 		static u32 last_status = -1;
-		const u32 status = I915_READ(ch_ctl);
+		const u32 status = intel_uncore_read(uncore, ch_ctl);
 
 		if (status != last_status) {
 			WARN(1, "dp_aux_ch not started status 0x%08x\n",
@@ -1280,21 +1299,23 @@ intel_dp_aux_xfer(struct intel_dp *intel_dp,
 		for (try = 0; try < 5; try++) {
 			/* Load the send data into the aux channel data registers */
 			for (i = 0; i < send_bytes; i += 4)
-				I915_WRITE(ch_data[i >> 2],
-					   intel_dp_pack_aux(send + i,
-							     send_bytes - i));
+				intel_uncore_write(uncore,
+						   ch_data[i >> 2],
+						   intel_dp_pack_aux(send + i,
+								     send_bytes - i));
 
 			/* Send the command and wait for it to complete */
-			I915_WRITE(ch_ctl, send_ctl);
+			intel_uncore_write(uncore, ch_ctl, send_ctl);
 
 			status = intel_dp_aux_wait_done(intel_dp);
 
 			/* Clear done status and any errors */
-			I915_WRITE(ch_ctl,
-				   status |
-				   DP_AUX_CH_CTL_DONE |
-				   DP_AUX_CH_CTL_TIME_OUT_ERROR |
-				   DP_AUX_CH_CTL_RECEIVE_ERROR);
+			intel_uncore_write(uncore,
+					   ch_ctl,
+					   status |
+					   DP_AUX_CH_CTL_DONE |
+					   DP_AUX_CH_CTL_TIME_OUT_ERROR |
+					   DP_AUX_CH_CTL_RECEIVE_ERROR);
 
 			/* DP CTS 1.2 Core Rev 1.1, 4.2.1.1 & 4.2.1.2
 			 *   400us delay required for errors and timeouts
@@ -1357,17 +1378,18 @@ done:
 		recv_bytes = recv_size;
 
 	for (i = 0; i < recv_bytes; i += 4)
-		intel_dp_unpack_aux(I915_READ(ch_data[i >> 2]),
+		intel_dp_unpack_aux(intel_uncore_read(uncore, ch_data[i >> 2]),
 				    recv + i, recv_bytes - i);
 
 	ret = recv_bytes;
 out:
-	pm_qos_update_request(&dev_priv->pm_qos, PM_QOS_DEFAULT_VALUE);
+	pm_qos_update_request(&i915->pm_qos, PM_QOS_DEFAULT_VALUE);
 
 	if (vdd)
 		edp_panel_vdd_off(intel_dp, false);
 
-	pps_unlock(intel_dp, wakeref);
+	pps_unlock(intel_dp, pps_wakeref);
+	intel_display_power_put_async(i915, aux_domain, aux_wakeref);
 
 	return ret;
 }
@@ -1832,6 +1854,19 @@ intel_dp_adjust_compliance_config(struct intel_dp *intel_dp,
 	}
 }
 
+static int intel_dp_output_bpp(const struct intel_crtc_state *crtc_state, int bpp)
+{
+	/*
+	 * bpp value was assumed to RGB format. And YCbCr 4:2:0 output
+	 * format of the number of bytes per pixel will be half the number
+	 * of bytes of RGB pixel.
+	 */
+	if (crtc_state->output_format == INTEL_OUTPUT_FORMAT_YCBCR420)
+		bpp /= 2;
+
+	return bpp;
+}
+
 /* Optimize link config in order: max bpp, min clock, min lanes */
 static int
 intel_dp_compute_link_config_wide(struct intel_dp *intel_dp,
@@ -2075,6 +2110,36 @@ intel_dp_compute_link_config(struct intel_encoder *encoder,
 	return 0;
 }
 
+static int
+intel_dp_ycbcr420_config(struct intel_dp *intel_dp,
+			 struct drm_connector *connector,
+			 struct intel_crtc_state *crtc_state)
+{
+	const struct drm_display_info *info = &connector->display_info;
+	const struct drm_display_mode *adjusted_mode =
+		&crtc_state->base.adjusted_mode;
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+	int ret;
+
+	if (!drm_mode_is_420_only(info, adjusted_mode) ||
+	    !intel_dp_get_colorimetry_status(intel_dp) ||
+	    !connector->ycbcr_420_allowed)
+		return 0;
+
+	crtc_state->output_format = INTEL_OUTPUT_FORMAT_YCBCR420;
+
+	/* YCBCR 420 output conversion needs a scaler */
+	ret = skl_update_scaler_crtc(crtc_state);
+	if (ret) {
+		DRM_DEBUG_KMS("Scaler allocation for output failed\n");
+		return ret;
+	}
+
+	intel_pch_panel_fitting(crtc, crtc_state, DRM_MODE_SCALE_FULLSCREEN);
+
+	return 0;
+}
+
 bool intel_dp_limited_color_range(const struct intel_crtc_state *crtc_state,
 				  const struct drm_connector_state *conn_state)
 {
@@ -2114,7 +2179,7 @@ intel_dp_compute_config(struct intel_encoder *encoder,
 		to_intel_digital_connector_state(conn_state);
 	bool constant_n = drm_dp_has_quirk(&intel_dp->desc,
 					   DP_DPCD_QUIRK_CONSTANT_N);
-	int ret, output_bpp;
+	int ret = 0, output_bpp;
 
 	if (HAS_PCH_SPLIT(dev_priv) && !HAS_DDI(dev_priv) && port != PORT_A)
 		pipe_config->has_pch_encoder = true;
@@ -2122,6 +2187,12 @@ intel_dp_compute_config(struct intel_encoder *encoder,
 	pipe_config->output_format = INTEL_OUTPUT_FORMAT_RGB;
 	if (lspcon->active)
 		lspcon_ycbcr420_config(&intel_connector->base, pipe_config);
+	else
+		ret = intel_dp_ycbcr420_config(intel_dp, &intel_connector->base,
+					       pipe_config);
+
+	if (ret)
+		return ret;
 
 	pipe_config->has_drrs = false;
 	if (IS_G4X(dev_priv) || port == PORT_A)
@@ -2169,7 +2240,7 @@ intel_dp_compute_config(struct intel_encoder *encoder,
 	if (pipe_config->dsc_params.compression_enable)
 		output_bpp = pipe_config->dsc_params.compressed_bpp;
 	else
-		output_bpp = pipe_config->pipe_bpp;
+		output_bpp = intel_dp_output_bpp(pipe_config, pipe_config->pipe_bpp);
 
 	intel_link_compute_m_n(output_bpp,
 			       pipe_config->lane_count,
@@ -3148,12 +3219,12 @@ static void chv_post_disable_dp(struct intel_encoder *encoder,
 
 	intel_dp_link_down(encoder, old_crtc_state);
 
-	mutex_lock(&dev_priv->sb_lock);
+	vlv_dpio_get(dev_priv);
 
 	/* Assert data lane reset */
 	chv_data_lane_soft_reset(encoder, old_crtc_state, true);
 
-	mutex_unlock(&dev_priv->sb_lock);
+	vlv_dpio_put(dev_priv);
 }
 
 static void
@@ -3927,9 +3998,6 @@ intel_dp_link_down(struct intel_encoder *encoder,
 	enum port port = encoder->port;
 	u32 DP = intel_dp->DP;
 
-	if (WARN_ON(HAS_DDI(dev_priv)))
-		return;
-
 	if (WARN_ON((I915_READ(intel_dp->output_reg) & DP_PORT_EN) == 0))
 		return;
 
@@ -4041,6 +4109,16 @@ intel_dp_read_dpcd(struct intel_dp *intel_dp)
 	return intel_dp->dpcd[DP_DPCD_REV] != 0;
 }
 
+bool intel_dp_get_colorimetry_status(struct intel_dp *intel_dp)
+{
+	u8 dprx = 0;
+
+	if (drm_dp_dpcd_readb(&intel_dp->aux, DP_DPRX_FEATURE_ENUMERATION_LIST,
+			      &dprx) != 1)
+		return false;
+	return dprx & DP_VSC_SDP_EXT_FOR_COLORIMETRY_SUPPORTED;
+}
+
 static void intel_dp_get_dsc_sink_cap(struct intel_dp *intel_dp)
 {
 	/*
@@ -4351,6 +4429,96 @@ u8 intel_dp_dsc_get_slice_count(struct intel_dp *intel_dp,
 	return 0;
 }
 
+static void
+intel_pixel_encoding_setup_vsc(struct intel_dp *intel_dp,
+			       const struct intel_crtc_state *crtc_state)
+{
+	struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp);
+	struct dp_sdp vsc_sdp = {};
+
+	/* Prepare VSC Header for SU as per DP 1.4a spec, Table 2-119 */
+	vsc_sdp.sdp_header.HB0 = 0;
+	vsc_sdp.sdp_header.HB1 = 0x7;
+
+	/*
+	 * VSC SDP supporting 3D stereo, PSR2, and Pixel Encoding/
+	 * Colorimetry Format indication.
+	 */
+	vsc_sdp.sdp_header.HB2 = 0x5;
+
+	/*
+	 * VSC SDP supporting 3D stereo, + PSR2, + Pixel Encoding/
+	 * Colorimetry Format indication (HB2 = 05h).
+	 */
+	vsc_sdp.sdp_header.HB3 = 0x13;
+
+	/*
+	 * YCbCr 420 = 3h DB16[7:4] ITU-R BT.601 = 0h, ITU-R BT.709 = 1h
+	 * DB16[3:0] DP 1.4a spec, Table 2-120
+	 */
+	vsc_sdp.db[16] = 0x3 << 4; /* 0x3 << 4 , YCbCr 420*/
+	/* RGB->YCBCR color conversion uses the BT.709 color space. */
+	vsc_sdp.db[16] |= 0x1; /* 0x1, ITU-R BT.709 */
+
+	/*
+	 * For pixel encoding formats YCbCr444, YCbCr422, YCbCr420, and Y Only,
+	 * the following Component Bit Depth values are defined:
+	 * 001b = 8bpc.
+	 * 010b = 10bpc.
+	 * 011b = 12bpc.
+	 * 100b = 16bpc.
+	 */
+	switch (crtc_state->pipe_bpp) {
+	case 24: /* 8bpc */
+		vsc_sdp.db[17] = 0x1;
+		break;
+	case 30: /* 10bpc */
+		vsc_sdp.db[17] = 0x2;
+		break;
+	case 36: /* 12bpc */
+		vsc_sdp.db[17] = 0x3;
+		break;
+	case 48: /* 16bpc */
+		vsc_sdp.db[17] = 0x4;
+		break;
+	default:
+		MISSING_CASE(crtc_state->pipe_bpp);
+		break;
+	}
+
+	/*
+	 * Dynamic Range (Bit 7)
+	 * 0 = VESA range, 1 = CTA range.
+	 * all YCbCr are always limited range
+	 */
+	vsc_sdp.db[17] |= 0x80;
+
+	/*
+	 * Content Type (Bits 2:0)
+	 * 000b = Not defined.
+	 * 001b = Graphics.
+	 * 010b = Photo.
+	 * 011b = Video.
+	 * 100b = Game
+	 * All other values are RESERVED.
+	 * Note: See CTA-861-G for the definition and expected
+	 * processing by a stream sink for the above contect types.
+	 */
+	vsc_sdp.db[18] = 0;
+
+	intel_dig_port->write_infoframe(&intel_dig_port->base,
+			crtc_state, DP_SDP_VSC, &vsc_sdp, sizeof(vsc_sdp));
+}
+
+void intel_dp_ycbcr_420_enable(struct intel_dp *intel_dp,
+			       const struct intel_crtc_state *crtc_state)
+{
+	if (crtc_state->output_format != INTEL_OUTPUT_FORMAT_YCBCR420)
+		return;
+
+	intel_pixel_encoding_setup_vsc(intel_dp, crtc_state);
+}
+
 static u8 intel_dp_autotest_link_training(struct intel_dp *intel_dp)
 {
 	int status = 0;
@@ -4829,15 +4997,15 @@ intel_dp_detect_dpcd(struct intel_dp *intel_dp)
 	u8 *dpcd = intel_dp->dpcd;
 	u8 type;
 
+	if (WARN_ON(intel_dp_is_edp(intel_dp)))
+		return connector_status_connected;
+
 	if (lspcon->active)
 		lspcon_resume(lspcon);
 
 	if (!intel_dp_get_dpcd(intel_dp))
 		return connector_status_disconnected;
 
-	if (intel_dp_is_edp(intel_dp))
-		return connector_status_connected;
-
 	/* if there's no downstream port, we're done */
 	if (!drm_dp_is_branch(dpcd))
 		return connector_status_connected;
@@ -5219,12 +5387,15 @@ static bool icl_tc_port_connected(struct drm_i915_private *dev_priv,
 	u32 dpsp;
 
 	/*
-	 * WARN if we got a legacy port HPD, but VBT didn't mark the port as
+	 * Complain if we got a legacy port HPD, but VBT didn't mark the port as
 	 * legacy. Treat the port as legacy from now on.
 	 */
-	if (WARN_ON(!intel_dig_port->tc_legacy_port &&
-		    I915_READ(SDEISR) & SDE_TC_HOTPLUG_ICP(tc_port)))
+	if (!intel_dig_port->tc_legacy_port &&
+	    I915_READ(SDEISR) & SDE_TC_HOTPLUG_ICP(tc_port)) {
+		DRM_ERROR("VBT incorrectly claims port %c is not TypeC legacy\n",
+			  port_name(port));
 		intel_dig_port->tc_legacy_port = true;
+	}
 	is_legacy = intel_dig_port->tc_legacy_port;
 
 	/*
@@ -5276,7 +5447,7 @@ static bool icl_digital_port_connected(struct intel_encoder *encoder)
  *
  * Return %true if port is connected, %false otherwise.
  */
-bool intel_digital_port_connected(struct intel_encoder *encoder)
+static bool __intel_digital_port_connected(struct intel_encoder *encoder)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
 
@@ -5306,6 +5477,18 @@ bool intel_digital_port_connected(struct intel_encoder *encoder)
 	return false;
 }
 
+bool intel_digital_port_connected(struct intel_encoder *encoder)
+{
+	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
+	bool is_connected = false;
+	intel_wakeref_t wakeref;
+
+	with_intel_display_power(dev_priv, POWER_DOMAIN_DISPLAY_CORE, wakeref)
+		is_connected = __intel_digital_port_connected(encoder);
+
+	return is_connected;
+}
+
 static struct edid *
 intel_dp_get_edid(struct intel_dp *intel_dp)
 {
@@ -5359,16 +5542,11 @@ intel_dp_detect(struct drm_connector *connector,
 	struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp);
 	struct intel_encoder *encoder = &dig_port->base;
 	enum drm_connector_status status;
-	enum intel_display_power_domain aux_domain =
-		intel_aux_power_domain(dig_port);
-	intel_wakeref_t wakeref;
 
 	DRM_DEBUG_KMS("[CONNECTOR:%d:%s]\n",
 		      connector->base.id, connector->name);
 	WARN_ON(!drm_modeset_is_locked(&dev_priv->drm.mode_config.connection_mutex));
 
-	wakeref = intel_display_power_get(dev_priv, aux_domain);
-
 	/* Can't disconnect eDP */
 	if (intel_dp_is_edp(intel_dp))
 		status = edp_detect(intel_dp);
@@ -5432,10 +5610,8 @@ intel_dp_detect(struct drm_connector *connector,
 		int ret;
 
 		ret = intel_dp_retrain_link(encoder, ctx);
-		if (ret) {
-			intel_display_power_put(dev_priv, aux_domain, wakeref);
+		if (ret)
 			return ret;
-		}
 	}
 
 	/*
@@ -5457,7 +5633,6 @@ out:
 	if (status != connector_status_connected && !intel_dp->is_mst)
 		intel_dp_unset_edid(intel_dp);
 
-	intel_display_power_put(dev_priv, aux_domain, wakeref);
 	return status;
 }
 
@@ -6235,6 +6410,10 @@ void intel_dp_encoder_reset(struct drm_encoder *encoder)
 
 	intel_dp->reset_link_params = true;
 
+	if (!IS_VALLEYVIEW(dev_priv) && !IS_CHERRYVIEW(dev_priv) &&
+	    !intel_dp_is_edp(intel_dp))
+		return;
+
 	with_pps_lock(intel_dp, wakeref) {
 		if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
 			intel_dp->active_pipe = vlv_active_pipe(intel_dp);
@@ -6278,9 +6457,6 @@ enum irqreturn
 intel_dp_hpd_pulse(struct intel_digital_port *intel_dig_port, bool long_hpd)
 {
 	struct intel_dp *intel_dp = &intel_dig_port->dp;
-	struct drm_i915_private *dev_priv = dp_to_i915(intel_dp);
-	enum irqreturn ret = IRQ_NONE;
-	intel_wakeref_t wakeref;
 
 	if (long_hpd && intel_dig_port->base.type == INTEL_OUTPUT_EDP) {
 		/*
@@ -6303,9 +6479,6 @@ intel_dp_hpd_pulse(struct intel_digital_port *intel_dig_port, bool long_hpd)
 		return IRQ_NONE;
 	}
 
-	wakeref = intel_display_power_get(dev_priv,
-					  intel_aux_power_domain(intel_dig_port));
-
 	if (intel_dp->is_mst) {
 		if (intel_dp_check_mst_status(intel_dp) == -EINVAL) {
 			/*
@@ -6317,7 +6490,8 @@ intel_dp_hpd_pulse(struct intel_digital_port *intel_dig_port, bool long_hpd)
 			intel_dp->is_mst = false;
 			drm_dp_mst_topology_mgr_set_mst(&intel_dp->mst_mgr,
 							intel_dp->is_mst);
-			goto put_power;
+
+			return IRQ_NONE;
 		}
 	}
 
@@ -6327,17 +6501,10 @@ intel_dp_hpd_pulse(struct intel_digital_port *intel_dig_port, bool long_hpd)
 		handled = intel_dp_short_pulse(intel_dp);
 
 		if (!handled)
-			goto put_power;
+			return IRQ_NONE;
 	}
 
-	ret = IRQ_HANDLED;
-
-put_power:
-	intel_display_power_put(dev_priv,
-				intel_aux_power_domain(intel_dig_port),
-				wakeref);
-
-	return ret;
+	return IRQ_HANDLED;
 }
 
 /* check the VBT to see whether the eDP is on another port */
@@ -7182,18 +7349,20 @@ intel_dp_init_connector(struct intel_digital_port *intel_dig_port,
 	intel_dp->pps_pipe = INVALID_PIPE;
 	intel_dp->active_pipe = INVALID_PIPE;
 
-	/* intel_dp vfuncs */
-	if (HAS_DDI(dev_priv))
-		intel_dp->prepare_link_retrain = intel_ddi_prepare_link_retrain;
-
 	/* Preserve the current hw state. */
 	intel_dp->DP = I915_READ(intel_dp->output_reg);
 	intel_dp->attached_connector = intel_connector;
 
-	if (intel_dp_is_port_edp(dev_priv, port))
+	if (intel_dp_is_port_edp(dev_priv, port)) {
+		/*
+		 * Currently we don't support eDP on TypeC ports, although in
+		 * theory it could work on TypeC legacy ports.
+		 */
+		WARN_ON(intel_port_is_tc(dev_priv, port));
 		type = DRM_MODE_CONNECTOR_eDP;
-	else
+	} else {
 		type = DRM_MODE_CONNECTOR_DisplayPort;
+	}
 
 	if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
 		intel_dp->active_pipe = vlv_active_pipe(intel_dp);
@@ -7223,6 +7392,9 @@ intel_dp_init_connector(struct intel_digital_port *intel_dig_port,
 		connector->interlace_allowed = true;
 	connector->doublescan_allowed = 0;
 
+	if (INTEL_GEN(dev_priv) >= 11)
+		connector->ycbcr_420_allowed = true;
+
 	intel_encoder->hpd_pin = intel_hpd_pin_default(dev_priv, port);
 
 	intel_dp_aux_init(intel_dp);
diff --git a/drivers/gpu/drm/i915/intel_dp.h b/drivers/gpu/drm/i915/display/intel_dp.h
index 5e9e8d13de6e..da70b1a41c83 100644
--- a/drivers/gpu/drm/i915/intel_dp.h
+++ b/drivers/gpu/drm/i915/display/intel_dp.h
@@ -108,6 +108,7 @@ u8 intel_dp_dsc_get_slice_count(struct intel_dp *intel_dp, int mode_clock,
 				int mode_hdisplay);
 
 bool intel_dp_read_dpcd(struct intel_dp *intel_dp);
+bool intel_dp_get_colorimetry_status(struct intel_dp *intel_dp);
 int intel_dp_link_required(int pixel_clock, int bpp);
 int intel_dp_max_data_rate(int max_link_clock, int max_lanes);
 bool intel_digital_port_connected(struct intel_encoder *encoder);
diff --git a/drivers/gpu/drm/i915/intel_dp_aux_backlight.c b/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c
index 357136f17f85..7ded95a334db 100644
--- a/drivers/gpu/drm/i915/intel_dp_aux_backlight.c
+++ b/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c
@@ -22,6 +22,7 @@
  *
  */
 
+#include "intel_dp_aux_backlight.h"
 #include "intel_drv.h"
 
 static void set_aux_backlight_enable(struct intel_dp *intel_dp, bool enable)
diff --git a/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.h b/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.h
new file mode 100644
index 000000000000..ed60c2858967
--- /dev/null
+++ b/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef __INTEL_DP_AUX_BACKLIGHT_H__
+#define __INTEL_DP_AUX_BACKLIGHT_H__
+
+struct intel_connector;
+
+int intel_dp_aux_init_backlight_funcs(struct intel_connector *intel_connector);
+
+#endif /* __INTEL_DP_AUX_BACKLIGHT_H__ */
diff --git a/drivers/gpu/drm/i915/intel_dp_link_training.c b/drivers/gpu/drm/i915/display/intel_dp_link_training.c
index 54b069333e2f..9b1fccea966b 100644
--- a/drivers/gpu/drm/i915/intel_dp_link_training.c
+++ b/drivers/gpu/drm/i915/display/intel_dp_link_training.c
@@ -22,6 +22,7 @@
  */
 
 #include "intel_dp.h"
+#include "intel_dp_link_training.h"
 #include "intel_drv.h"
 
 static void
diff --git a/drivers/gpu/drm/i915/display/intel_dp_link_training.h b/drivers/gpu/drm/i915/display/intel_dp_link_training.h
new file mode 100644
index 000000000000..174566adcc92
--- /dev/null
+++ b/drivers/gpu/drm/i915/display/intel_dp_link_training.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef __INTEL_DP_LINK_TRAINING_H__
+#define __INTEL_DP_LINK_TRAINING_H__
+
+struct intel_dp;
+
+void intel_dp_start_link_train(struct intel_dp *intel_dp);
+void intel_dp_stop_link_train(struct intel_dp *intel_dp);
+
+#endif /* __INTEL_DP_LINK_TRAINING_H__ */
diff --git a/drivers/gpu/drm/i915/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c
index 8839eaea8371..60652ebbdf61 100644
--- a/drivers/gpu/drm/i915/intel_dp_mst.c
+++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c
@@ -28,10 +28,13 @@
 #include <drm/drm_probe_helper.h>
 
 #include "i915_drv.h"
+#include "intel_atomic.h"
 #include "intel_audio.h"
 #include "intel_connector.h"
 #include "intel_ddi.h"
 #include "intel_dp.h"
+#include "intel_dp_mst.h"
+#include "intel_dpio_phy.h"
 #include "intel_drv.h"
 
 static int intel_dp_mst_compute_link_config(struct intel_encoder *encoder,
@@ -148,9 +151,10 @@ static int intel_dp_mst_compute_config(struct intel_encoder *encoder,
 
 static int
 intel_dp_mst_atomic_check(struct drm_connector *connector,
-			  struct drm_connector_state *new_conn_state)
+			  struct drm_atomic_state *state)
 {
-	struct drm_atomic_state *state = new_conn_state->state;
+	struct drm_connector_state *new_conn_state =
+		drm_atomic_get_new_connector_state(state, connector);
 	struct drm_connector_state *old_conn_state =
 		drm_atomic_get_old_connector_state(state, connector);
 	struct intel_connector *intel_connector =
@@ -160,7 +164,7 @@ intel_dp_mst_atomic_check(struct drm_connector *connector,
 	struct drm_dp_mst_topology_mgr *mgr;
 	int ret;
 
-	ret = intel_digital_connector_atomic_check(connector, new_conn_state);
+	ret = intel_digital_connector_atomic_check(connector, state);
 	if (ret)
 		return ret;
 
diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.h b/drivers/gpu/drm/i915/display/intel_dp_mst.h
new file mode 100644
index 000000000000..1470c6e0514b
--- /dev/null
+++ b/drivers/gpu/drm/i915/display/intel_dp_mst.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef __INTEL_DP_MST_H__
+#define __INTEL_DP_MST_H__
+
+struct intel_digital_port;
+
+int intel_dp_mst_encoder_init(struct intel_digital_port *intel_dig_port, int conn_id);
+void intel_dp_mst_encoder_cleanup(struct intel_digital_port *intel_dig_port);
+
+#endif /* __INTEL_DP_MST_H__ */
diff --git a/drivers/gpu/drm/i915/intel_dpio_phy.c b/drivers/gpu/drm/i915/display/intel_dpio_phy.c
index ab4ac7158b79..7ccf7f3974db 100644
--- a/drivers/gpu/drm/i915/intel_dpio_phy.c
+++ b/drivers/gpu/drm/i915/display/intel_dpio_phy.c
@@ -21,8 +21,11 @@
  * DEALINGS IN THE SOFTWARE.
  */
 
-#include "intel_dp.h"
+#include "display/intel_dp.h"
+
+#include "intel_dpio_phy.h"
 #include "intel_drv.h"
+#include "intel_sideband.h"
 
 /**
  * DOC: DPIO
@@ -648,7 +651,7 @@ void chv_set_phy_signal_level(struct intel_encoder *encoder,
 	u32 val;
 	int i;
 
-	mutex_lock(&dev_priv->sb_lock);
+	vlv_dpio_get(dev_priv);
 
 	/* Clear calc init */
 	val = vlv_dpio_read(dev_priv, pipe, VLV_PCS01_DW10(ch));
@@ -729,8 +732,7 @@ void chv_set_phy_signal_level(struct intel_encoder *encoder,
 		vlv_dpio_write(dev_priv, pipe, VLV_PCS23_DW10(ch), val);
 	}
 
-	mutex_unlock(&dev_priv->sb_lock);
-
+	vlv_dpio_put(dev_priv);
 }
 
 void chv_data_lane_soft_reset(struct intel_encoder *encoder,
@@ -800,7 +802,7 @@ void chv_phy_pre_pll_enable(struct intel_encoder *encoder,
 
 	chv_phy_powergate_lanes(encoder, true, lane_mask);
 
-	mutex_lock(&dev_priv->sb_lock);
+	vlv_dpio_get(dev_priv);
 
 	/* Assert data lane reset */
 	chv_data_lane_soft_reset(encoder, crtc_state, true);
@@ -855,7 +857,7 @@ void chv_phy_pre_pll_enable(struct intel_encoder *encoder,
 		val |= CHV_CMN_USEDCLKCHANNEL;
 	vlv_dpio_write(dev_priv, pipe, CHV_CMN_DW19(ch), val);
 
-	mutex_unlock(&dev_priv->sb_lock);
+	vlv_dpio_put(dev_priv);
 }
 
 void chv_phy_pre_encoder_enable(struct intel_encoder *encoder,
@@ -870,7 +872,7 @@ void chv_phy_pre_encoder_enable(struct intel_encoder *encoder,
 	int data, i, stagger;
 	u32 val;
 
-	mutex_lock(&dev_priv->sb_lock);
+	vlv_dpio_get(dev_priv);
 
 	/* allow hardware to manage TX FIFO reset source */
 	val = vlv_dpio_read(dev_priv, pipe, VLV_PCS01_DW11(ch));
@@ -935,7 +937,7 @@ void chv_phy_pre_encoder_enable(struct intel_encoder *encoder,
 	/* Deassert data lane reset */
 	chv_data_lane_soft_reset(encoder, crtc_state, false);
 
-	mutex_unlock(&dev_priv->sb_lock);
+	vlv_dpio_put(dev_priv);
 }
 
 void chv_phy_release_cl2_override(struct intel_encoder *encoder)
@@ -956,7 +958,7 @@ void chv_phy_post_pll_disable(struct intel_encoder *encoder,
 	enum pipe pipe = to_intel_crtc(old_crtc_state->base.crtc)->pipe;
 	u32 val;
 
-	mutex_lock(&dev_priv->sb_lock);
+	vlv_dpio_get(dev_priv);
 
 	/* disable left/right clock distribution */
 	if (pipe != PIPE_B) {
@@ -969,7 +971,7 @@ void chv_phy_post_pll_disable(struct intel_encoder *encoder,
 		vlv_dpio_write(dev_priv, pipe, _CHV_CMN_DW1_CH1, val);
 	}
 
-	mutex_unlock(&dev_priv->sb_lock);
+	vlv_dpio_put(dev_priv);
 
 	/*
 	 * Leave the power down bit cleared for at least one
@@ -993,7 +995,8 @@ void vlv_set_phy_signal_level(struct intel_encoder *encoder,
 	enum dpio_channel port = vlv_dport_to_channel(dport);
 	enum pipe pipe = intel_crtc->pipe;
 
-	mutex_lock(&dev_priv->sb_lock);
+	vlv_dpio_get(dev_priv);
+
 	vlv_dpio_write(dev_priv, pipe, VLV_TX_DW5(port), 0x00000000);
 	vlv_dpio_write(dev_priv, pipe, VLV_TX_DW4(port), demph_reg_value);
 	vlv_dpio_write(dev_priv, pipe, VLV_TX_DW2(port),
@@ -1006,7 +1009,8 @@ void vlv_set_phy_signal_level(struct intel_encoder *encoder,
 	vlv_dpio_write(dev_priv, pipe, VLV_PCS_DW11(port), 0x00030000);
 	vlv_dpio_write(dev_priv, pipe, VLV_PCS_DW9(port), preemph_reg_value);
 	vlv_dpio_write(dev_priv, pipe, VLV_TX_DW5(port), DPIO_TX_OCALINIT_EN);
-	mutex_unlock(&dev_priv->sb_lock);
+
+	vlv_dpio_put(dev_priv);
 }
 
 void vlv_phy_pre_pll_enable(struct intel_encoder *encoder,
@@ -1019,7 +1023,8 @@ void vlv_phy_pre_pll_enable(struct intel_encoder *encoder,
 	enum pipe pipe = crtc->pipe;
 
 	/* Program Tx lane resets to default */
-	mutex_lock(&dev_priv->sb_lock);
+	vlv_dpio_get(dev_priv);
+
 	vlv_dpio_write(dev_priv, pipe, VLV_PCS_DW0(port),
 			 DPIO_PCS_TX_LANE2_RESET |
 			 DPIO_PCS_TX_LANE1_RESET);
@@ -1033,7 +1038,8 @@ void vlv_phy_pre_pll_enable(struct intel_encoder *encoder,
 	vlv_dpio_write(dev_priv, pipe, VLV_PCS_DW12(port), 0x00750f00);
 	vlv_dpio_write(dev_priv, pipe, VLV_TX_DW11(port), 0x00001500);
 	vlv_dpio_write(dev_priv, pipe, VLV_TX_DW14(port), 0x40400000);
-	mutex_unlock(&dev_priv->sb_lock);
+
+	vlv_dpio_put(dev_priv);
 }
 
 void vlv_phy_pre_encoder_enable(struct intel_encoder *encoder,
@@ -1047,7 +1053,7 @@ void vlv_phy_pre_encoder_enable(struct intel_encoder *encoder,
 	enum pipe pipe = crtc->pipe;
 	u32 val;
 
-	mutex_lock(&dev_priv->sb_lock);
+	vlv_dpio_get(dev_priv);
 
 	/* Enable clock channels for this port */
 	val = vlv_dpio_read(dev_priv, pipe, VLV_PCS01_DW8(port));
@@ -1063,7 +1069,7 @@ void vlv_phy_pre_encoder_enable(struct intel_encoder *encoder,
 	vlv_dpio_write(dev_priv, pipe, VLV_PCS_DW14(port), 0x00760018);
 	vlv_dpio_write(dev_priv, pipe, VLV_PCS_DW23(port), 0x00400888);
 
-	mutex_unlock(&dev_priv->sb_lock);
+	vlv_dpio_put(dev_priv);
 }
 
 void vlv_phy_reset_lanes(struct intel_encoder *encoder,
@@ -1075,8 +1081,8 @@ void vlv_phy_reset_lanes(struct intel_encoder *encoder,
 	enum dpio_channel port = vlv_dport_to_channel(dport);
 	enum pipe pipe = crtc->pipe;
 
-	mutex_lock(&dev_priv->sb_lock);
+	vlv_dpio_get(dev_priv);
 	vlv_dpio_write(dev_priv, pipe, VLV_PCS_DW0(port), 0x00000000);
 	vlv_dpio_write(dev_priv, pipe, VLV_PCS_DW1(port), 0x00e00060);
-	mutex_unlock(&dev_priv->sb_lock);
+	vlv_dpio_put(dev_priv);
 }
diff --git a/drivers/gpu/drm/i915/display/intel_dpio_phy.h b/drivers/gpu/drm/i915/display/intel_dpio_phy.h
new file mode 100644
index 000000000000..f418aab90b7e
--- /dev/null
+++ b/drivers/gpu/drm/i915/display/intel_dpio_phy.h
@@ -0,0 +1,58 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef __INTEL_DPIO_PHY_H__
+#define __INTEL_DPIO_PHY_H__
+
+#include <linux/types.h>
+
+enum dpio_channel;
+enum dpio_phy;
+enum port;
+struct drm_i915_private;
+struct intel_crtc_state;
+struct intel_encoder;
+
+void bxt_port_to_phy_channel(struct drm_i915_private *dev_priv, enum port port,
+			     enum dpio_phy *phy, enum dpio_channel *ch);
+void bxt_ddi_phy_set_signal_level(struct drm_i915_private *dev_priv,
+				  enum port port, u32 margin, u32 scale,
+				  u32 enable, u32 deemphasis);
+void bxt_ddi_phy_init(struct drm_i915_private *dev_priv, enum dpio_phy phy);
+void bxt_ddi_phy_uninit(struct drm_i915_private *dev_priv, enum dpio_phy phy);
+bool bxt_ddi_phy_is_enabled(struct drm_i915_private *dev_priv,
+			    enum dpio_phy phy);
+bool bxt_ddi_phy_verify_state(struct drm_i915_private *dev_priv,
+			      enum dpio_phy phy);
+u8 bxt_ddi_phy_calc_lane_lat_optim_mask(u8 lane_count);
+void bxt_ddi_phy_set_lane_optim_mask(struct intel_encoder *encoder,
+				     u8 lane_lat_optim_mask);
+u8 bxt_ddi_phy_get_lane_lat_optim_mask(struct intel_encoder *encoder);
+
+void chv_set_phy_signal_level(struct intel_encoder *encoder,
+			      u32 deemph_reg_value, u32 margin_reg_value,
+			      bool uniq_trans_scale);
+void chv_data_lane_soft_reset(struct intel_encoder *encoder,
+			      const struct intel_crtc_state *crtc_state,
+			      bool reset);
+void chv_phy_pre_pll_enable(struct intel_encoder *encoder,
+			    const struct intel_crtc_state *crtc_state);
+void chv_phy_pre_encoder_enable(struct intel_encoder *encoder,
+				const struct intel_crtc_state *crtc_state);
+void chv_phy_release_cl2_override(struct intel_encoder *encoder);
+void chv_phy_post_pll_disable(struct intel_encoder *encoder,
+			      const struct intel_crtc_state *old_crtc_state);
+
+void vlv_set_phy_signal_level(struct intel_encoder *encoder,
+			      u32 demph_reg_value, u32 preemph_reg_value,
+			      u32 uniqtranscale_reg_value, u32 tx3_demph);
+void vlv_phy_pre_pll_enable(struct intel_encoder *encoder,
+			    const struct intel_crtc_state *crtc_state);
+void vlv_phy_pre_encoder_enable(struct intel_encoder *encoder,
+				const struct intel_crtc_state *crtc_state);
+void vlv_phy_reset_lanes(struct intel_encoder *encoder,
+			 const struct intel_crtc_state *old_crtc_state);
+
+#endif /* __INTEL_DPIO_PHY_H__ */
diff --git a/drivers/gpu/drm/i915/intel_dpll_mgr.c b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c
index e01c057ce50b..2d4e7b9a7b9d 100644
--- a/drivers/gpu/drm/i915/intel_dpll_mgr.c
+++ b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c
@@ -21,6 +21,8 @@
  * DEALINGS IN THE SOFTWARE.
  */
 
+#include "intel_dpio_phy.h"
+#include "intel_dpll_mgr.h"
 #include "intel_drv.h"
 
 /**
@@ -349,7 +351,7 @@ static bool ibx_pch_dpll_get_hw_state(struct drm_i915_private *dev_priv,
 	u32 val;
 
 	wakeref = intel_display_power_get_if_enabled(dev_priv,
-						     POWER_DOMAIN_PLLS);
+						     POWER_DOMAIN_DISPLAY_CORE);
 	if (!wakeref)
 		return false;
 
@@ -358,7 +360,7 @@ static bool ibx_pch_dpll_get_hw_state(struct drm_i915_private *dev_priv,
 	hw_state->fp0 = I915_READ(PCH_FP0(id));
 	hw_state->fp1 = I915_READ(PCH_FP1(id));
 
-	intel_display_power_put(dev_priv, POWER_DOMAIN_PLLS, wakeref);
+	intel_display_power_put(dev_priv, POWER_DOMAIN_DISPLAY_CORE, wakeref);
 
 	return val & DPLL_VCO_ENABLE;
 }
@@ -452,7 +454,7 @@ ibx_get_dpll(struct intel_crtc_state *crtc_state,
 }
 
 static void ibx_dump_hw_state(struct drm_i915_private *dev_priv,
-			      struct intel_dpll_hw_state *hw_state)
+			      const struct intel_dpll_hw_state *hw_state)
 {
 	DRM_DEBUG_KMS("dpll_hw_state: dpll: 0x%x, dpll_md: 0x%x, "
 		      "fp0: 0x%x, fp1: 0x%x\n",
@@ -517,14 +519,14 @@ static bool hsw_ddi_wrpll_get_hw_state(struct drm_i915_private *dev_priv,
 	u32 val;
 
 	wakeref = intel_display_power_get_if_enabled(dev_priv,
-						     POWER_DOMAIN_PLLS);
+						     POWER_DOMAIN_DISPLAY_CORE);
 	if (!wakeref)
 		return false;
 
 	val = I915_READ(WRPLL_CTL(id));
 	hw_state->wrpll = val;
 
-	intel_display_power_put(dev_priv, POWER_DOMAIN_PLLS, wakeref);
+	intel_display_power_put(dev_priv, POWER_DOMAIN_DISPLAY_CORE, wakeref);
 
 	return val & WRPLL_PLL_ENABLE;
 }
@@ -537,14 +539,14 @@ static bool hsw_ddi_spll_get_hw_state(struct drm_i915_private *dev_priv,
 	u32 val;
 
 	wakeref = intel_display_power_get_if_enabled(dev_priv,
-						     POWER_DOMAIN_PLLS);
+						     POWER_DOMAIN_DISPLAY_CORE);
 	if (!wakeref)
 		return false;
 
 	val = I915_READ(SPLL_CTL);
 	hw_state->spll = val;
 
-	intel_display_power_put(dev_priv, POWER_DOMAIN_PLLS, wakeref);
+	intel_display_power_put(dev_priv, POWER_DOMAIN_DISPLAY_CORE, wakeref);
 
 	return val & SPLL_PLL_ENABLE;
 }
@@ -773,7 +775,7 @@ static struct intel_shared_dpll *hsw_ddi_hdmi_get_dpll(struct intel_crtc_state *
 
 	hsw_ddi_calculate_wrpll(crtc_state->port_clock * 1000, &r2, &n2, &p);
 
-	val = WRPLL_PLL_ENABLE | WRPLL_PLL_LCPLL |
+	val = WRPLL_PLL_ENABLE | WRPLL_REF_LCPLL |
 	      WRPLL_DIVIDER_REFERENCE(r2) | WRPLL_DIVIDER_FEEDBACK(n2) |
 	      WRPLL_DIVIDER_POST(p);
 
@@ -837,7 +839,7 @@ hsw_get_dpll(struct intel_crtc_state *crtc_state,
 			return NULL;
 
 		crtc_state->dpll_hw_state.spll =
-			SPLL_PLL_ENABLE | SPLL_PLL_FREQ_1350MHz | SPLL_PLL_SSC;
+			SPLL_PLL_ENABLE | SPLL_FREQ_1350MHz | SPLL_REF_MUXED_SSC;
 
 		pll = intel_find_shared_dpll(crtc_state,
 					     DPLL_ID_SPLL, DPLL_ID_SPLL);
@@ -854,7 +856,7 @@ hsw_get_dpll(struct intel_crtc_state *crtc_state,
 }
 
 static void hsw_dump_hw_state(struct drm_i915_private *dev_priv,
-			      struct intel_dpll_hw_state *hw_state)
+			      const struct intel_dpll_hw_state *hw_state)
 {
 	DRM_DEBUG_KMS("dpll_hw_state: wrpll: 0x%x spll: 0x%x\n",
 		      hw_state->wrpll, hw_state->spll);
@@ -1002,7 +1004,7 @@ static bool skl_ddi_pll_get_hw_state(struct drm_i915_private *dev_priv,
 	bool ret;
 
 	wakeref = intel_display_power_get_if_enabled(dev_priv,
-						     POWER_DOMAIN_PLLS);
+						     POWER_DOMAIN_DISPLAY_CORE);
 	if (!wakeref)
 		return false;
 
@@ -1023,7 +1025,7 @@ static bool skl_ddi_pll_get_hw_state(struct drm_i915_private *dev_priv,
 	ret = true;
 
 out:
-	intel_display_power_put(dev_priv, POWER_DOMAIN_PLLS, wakeref);
+	intel_display_power_put(dev_priv, POWER_DOMAIN_DISPLAY_CORE, wakeref);
 
 	return ret;
 }
@@ -1039,7 +1041,7 @@ static bool skl_ddi_dpll0_get_hw_state(struct drm_i915_private *dev_priv,
 	bool ret;
 
 	wakeref = intel_display_power_get_if_enabled(dev_priv,
-						     POWER_DOMAIN_PLLS);
+						     POWER_DOMAIN_DISPLAY_CORE);
 	if (!wakeref)
 		return false;
 
@@ -1056,7 +1058,7 @@ static bool skl_ddi_dpll0_get_hw_state(struct drm_i915_private *dev_priv,
 	ret = true;
 
 out:
-	intel_display_power_put(dev_priv, POWER_DOMAIN_PLLS, wakeref);
+	intel_display_power_put(dev_priv, POWER_DOMAIN_DISPLAY_CORE, wakeref);
 
 	return ret;
 }
@@ -1423,7 +1425,7 @@ skl_get_dpll(struct intel_crtc_state *crtc_state,
 }
 
 static void skl_dump_hw_state(struct drm_i915_private *dev_priv,
-			      struct intel_dpll_hw_state *hw_state)
+			      const struct intel_dpll_hw_state *hw_state)
 {
 	DRM_DEBUG_KMS("dpll_hw_state: "
 		      "ctrl1: 0x%x, cfgcr1: 0x%x, cfgcr2: 0x%x\n",
@@ -1600,7 +1602,7 @@ static bool bxt_ddi_pll_get_hw_state(struct drm_i915_private *dev_priv,
 	bxt_port_to_phy_channel(dev_priv, port, &phy, &ch);
 
 	wakeref = intel_display_power_get_if_enabled(dev_priv,
-						     POWER_DOMAIN_PLLS);
+						     POWER_DOMAIN_DISPLAY_CORE);
 	if (!wakeref)
 		return false;
 
@@ -1658,7 +1660,7 @@ static bool bxt_ddi_pll_get_hw_state(struct drm_i915_private *dev_priv,
 	ret = true;
 
 out:
-	intel_display_power_put(dev_priv, POWER_DOMAIN_PLLS, wakeref);
+	intel_display_power_put(dev_priv, POWER_DOMAIN_DISPLAY_CORE, wakeref);
 
 	return ret;
 }
@@ -1855,7 +1857,7 @@ bxt_get_dpll(struct intel_crtc_state *crtc_state,
 }
 
 static void bxt_dump_hw_state(struct drm_i915_private *dev_priv,
-			      struct intel_dpll_hw_state *hw_state)
+			      const struct intel_dpll_hw_state *hw_state)
 {
 	DRM_DEBUG_KMS("dpll_hw_state: ebb0: 0x%x, ebb4: 0x%x,"
 		      "pll0: 0x%x, pll1: 0x%x, pll2: 0x%x, pll3: 0x%x, "
@@ -1879,27 +1881,6 @@ static const struct intel_shared_dpll_funcs bxt_ddi_pll_funcs = {
 	.get_hw_state = bxt_ddi_pll_get_hw_state,
 };
 
-static void intel_ddi_pll_init(struct drm_device *dev)
-{
-	struct drm_i915_private *dev_priv = to_i915(dev);
-
-	if (INTEL_GEN(dev_priv) < 9) {
-		u32 val = I915_READ(LCPLL_CTL);
-
-		/*
-		 * The LCPLL register should be turned on by the BIOS. For now
-		 * let's just check its state and print errors in case
-		 * something is wrong.  Don't even try to turn it on.
-		 */
-
-		if (val & LCPLL_CD_SOURCE_FCLK)
-			DRM_ERROR("CDCLK source is not LCPLL\n");
-
-		if (val & LCPLL_PLL_DISABLE)
-			DRM_ERROR("LCPLL is disabled\n");
-	}
-}
-
 struct intel_dpll_mgr {
 	const struct dpll_info *dpll_info;
 
@@ -1907,7 +1888,7 @@ struct intel_dpll_mgr {
 					      struct intel_encoder *encoder);
 
 	void (*dump_hw_state)(struct drm_i915_private *dev_priv,
-			      struct intel_dpll_hw_state *hw_state);
+			      const struct intel_dpll_hw_state *hw_state);
 };
 
 static const struct dpll_info pch_plls[] = {
@@ -2106,7 +2087,7 @@ static bool cnl_ddi_pll_get_hw_state(struct drm_i915_private *dev_priv,
 	bool ret;
 
 	wakeref = intel_display_power_get_if_enabled(dev_priv,
-						     POWER_DOMAIN_PLLS);
+						     POWER_DOMAIN_DISPLAY_CORE);
 	if (!wakeref)
 		return false;
 
@@ -2126,7 +2107,7 @@ static bool cnl_ddi_pll_get_hw_state(struct drm_i915_private *dev_priv,
 	ret = true;
 
 out:
-	intel_display_power_put(dev_priv, POWER_DOMAIN_PLLS, wakeref);
+	intel_display_power_put(dev_priv, POWER_DOMAIN_DISPLAY_CORE, wakeref);
 
 	return ret;
 }
@@ -2390,7 +2371,7 @@ cnl_get_dpll(struct intel_crtc_state *crtc_state,
 }
 
 static void cnl_dump_hw_state(struct drm_i915_private *dev_priv,
-			      struct intel_dpll_hw_state *hw_state)
+			      const struct intel_dpll_hw_state *hw_state)
 {
 	DRM_DEBUG_KMS("dpll_hw_state: "
 		      "cfgcr0: 0x%x, cfgcr1: 0x%x\n",
@@ -2741,11 +2722,11 @@ static bool icl_calc_mg_pll_state(struct intel_crtc_state *crtc_state)
 	}
 
 	if (use_ssc) {
-		tmp = (u64)dco_khz * 47 * 32;
+		tmp = mul_u32_u32(dco_khz, 47 * 32);
 		do_div(tmp, refclk_khz * m1div * 10000);
 		ssc_stepsize = tmp;
 
-		tmp = (u64)dco_khz * 1000;
+		tmp = mul_u32_u32(dco_khz, 1000);
 		ssc_steplen = DIV_ROUND_UP_ULL(tmp, 32 * 2 * 32);
 	} else {
 		ssc_stepsize = 0;
@@ -2881,7 +2862,7 @@ static bool mg_pll_get_hw_state(struct drm_i915_private *dev_priv,
 	u32 val;
 
 	wakeref = intel_display_power_get_if_enabled(dev_priv,
-						     POWER_DOMAIN_PLLS);
+						     POWER_DOMAIN_DISPLAY_CORE);
 	if (!wakeref)
 		return false;
 
@@ -2928,7 +2909,7 @@ static bool mg_pll_get_hw_state(struct drm_i915_private *dev_priv,
 
 	ret = true;
 out:
-	intel_display_power_put(dev_priv, POWER_DOMAIN_PLLS, wakeref);
+	intel_display_power_put(dev_priv, POWER_DOMAIN_DISPLAY_CORE, wakeref);
 	return ret;
 }
 
@@ -2943,7 +2924,7 @@ static bool icl_pll_get_hw_state(struct drm_i915_private *dev_priv,
 	u32 val;
 
 	wakeref = intel_display_power_get_if_enabled(dev_priv,
-						     POWER_DOMAIN_PLLS);
+						     POWER_DOMAIN_DISPLAY_CORE);
 	if (!wakeref)
 		return false;
 
@@ -2956,7 +2937,7 @@ static bool icl_pll_get_hw_state(struct drm_i915_private *dev_priv,
 
 	ret = true;
 out:
-	intel_display_power_put(dev_priv, POWER_DOMAIN_PLLS, wakeref);
+	intel_display_power_put(dev_priv, POWER_DOMAIN_DISPLAY_CORE, wakeref);
 	return ret;
 }
 
@@ -3190,7 +3171,7 @@ static void mg_pll_disable(struct drm_i915_private *dev_priv,
 }
 
 static void icl_dump_hw_state(struct drm_i915_private *dev_priv,
-			      struct intel_dpll_hw_state *hw_state)
+			      const struct intel_dpll_hw_state *hw_state)
 {
 	DRM_DEBUG_KMS("dpll_hw_state: cfgcr0: 0x%x, cfgcr1: 0x%x, "
 		      "mg_refclkin_ctl: 0x%x, hg_clktop2_coreclkctl1: 0x%x, "
@@ -3303,10 +3284,6 @@ void intel_shared_dpll_init(struct drm_device *dev)
 	mutex_init(&dev_priv->dpll_lock);
 
 	BUG_ON(dev_priv->num_shared_dpll > I915_NUM_PLLS);
-
-	/* FIXME: Move this to a more suitable place */
-	if (HAS_DDI(dev_priv))
-		intel_ddi_pll_init(dev);
 }
 
 /**
@@ -3364,7 +3341,7 @@ void intel_release_shared_dpll(struct intel_shared_dpll *dpll,
  * Write the relevant values in @hw_state to dmesg using DRM_DEBUG_KMS.
  */
 void intel_dpll_dump_hw_state(struct drm_i915_private *dev_priv,
-			      struct intel_dpll_hw_state *hw_state)
+			      const struct intel_dpll_hw_state *hw_state)
 {
 	if (dev_priv->dpll_mgr) {
 		dev_priv->dpll_mgr->dump_hw_state(dev_priv, hw_state);
diff --git a/drivers/gpu/drm/i915/intel_dpll_mgr.h b/drivers/gpu/drm/i915/display/intel_dpll_mgr.h
index bd8124cc81ed..d0570414f3d1 100644
--- a/drivers/gpu/drm/i915/intel_dpll_mgr.h
+++ b/drivers/gpu/drm/i915/display/intel_dpll_mgr.h
@@ -25,6 +25,10 @@
 #ifndef _INTEL_DPLL_MGR_H_
 #define _INTEL_DPLL_MGR_H_
 
+#include <linux/types.h>
+
+#include "intel_display.h"
+
 /*FIXME: Move this to a more appropriate place. */
 #define abs_diff(a, b) ({			\
 	typeof(a) __a = (a);			\
@@ -32,13 +36,13 @@
 	(void) (&__a == &__b);			\
 	__a > __b ? (__a - __b) : (__b - __a); })
 
+struct drm_atomic_state;
+struct drm_device;
 struct drm_i915_private;
 struct intel_crtc;
 struct intel_crtc_state;
 struct intel_encoder;
-
 struct intel_shared_dpll;
-struct intel_dpll_mgr;
 
 /**
  * enum intel_dpll_id - possible DPLL ids
@@ -289,7 +293,7 @@ struct intel_shared_dpll {
 	/**
 	 * @state:
 	 *
-	 * Store the state for the pll, including the its hw state
+	 * Store the state for the pll, including its hw state
 	 * and CRTCs using it.
 	 */
 	struct intel_shared_dpll_state state;
@@ -339,7 +343,7 @@ void intel_shared_dpll_swap_state(struct drm_atomic_state *state);
 void intel_shared_dpll_init(struct drm_device *dev);
 
 void intel_dpll_dump_hw_state(struct drm_i915_private *dev_priv,
-			      struct intel_dpll_hw_state *hw_state);
+			      const struct intel_dpll_hw_state *hw_state);
 int cnl_hdmi_pll_ref_clock(struct drm_i915_private *dev_priv);
 enum intel_dpll_id icl_tc_port_to_pll_id(enum tc_port tc_port);
 bool intel_dpll_is_combophy(enum intel_dpll_id id);
diff --git a/drivers/gpu/drm/i915/intel_dsi.c b/drivers/gpu/drm/i915/display/intel_dsi.c
index 5fec02aceaed..5fec02aceaed 100644
--- a/drivers/gpu/drm/i915/intel_dsi.c
+++ b/drivers/gpu/drm/i915/display/intel_dsi.c
diff --git a/drivers/gpu/drm/i915/intel_dsi.h b/drivers/gpu/drm/i915/display/intel_dsi.h
index 705a609050c0..6d20434636cd 100644
--- a/drivers/gpu/drm/i915/intel_dsi.h
+++ b/drivers/gpu/drm/i915/display/intel_dsi.h
@@ -28,6 +28,9 @@
 #include <drm/drm_mipi_dsi.h>
 #include "intel_drv.h"
 
+#define INTEL_DSI_VIDEO_MODE	0
+#define INTEL_DSI_COMMAND_MODE	1
+
 /* Dual Link support */
 #define DSI_DUAL_LINK_NONE		0
 #define DSI_DUAL_LINK_FRONT_BACK	1
@@ -151,6 +154,9 @@ static inline u16 intel_dsi_encoder_ports(struct intel_encoder *encoder)
 	return enc_to_intel_dsi(&encoder->base)->ports;
 }
 
+/* icl_dsi.c */
+void icl_dsi_init(struct drm_i915_private *dev_priv);
+
 /* intel_dsi.c */
 int intel_dsi_bitrate(const struct intel_dsi *intel_dsi);
 int intel_dsi_tlpx_ns(const struct intel_dsi *intel_dsi);
@@ -166,6 +172,7 @@ enum drm_mode_status intel_dsi_mode_valid(struct drm_connector *connector,
 struct intel_dsi_host *intel_dsi_host_init(struct intel_dsi *intel_dsi,
 					   const struct mipi_dsi_host_ops *funcs,
 					   enum port port);
+void vlv_dsi_init(struct drm_i915_private *dev_priv);
 
 /* vlv_dsi_pll.c */
 int vlv_dsi_pll_compute(struct intel_encoder *encoder,
@@ -192,5 +199,6 @@ bool intel_dsi_vbt_init(struct intel_dsi *intel_dsi, u16 panel_id);
 void intel_dsi_vbt_exec_sequence(struct intel_dsi *intel_dsi,
 				 enum mipi_seq seq_id);
 void intel_dsi_msleep(struct intel_dsi *intel_dsi, int msec);
+void intel_dsi_log_params(struct intel_dsi *intel_dsi);
 
 #endif /* _INTEL_DSI_H */
diff --git a/drivers/gpu/drm/i915/intel_dsi_dcs_backlight.c b/drivers/gpu/drm/i915/display/intel_dsi_dcs_backlight.c
index 150a156f3b1e..8c33262cb0b2 100644
--- a/drivers/gpu/drm/i915/intel_dsi_dcs_backlight.c
+++ b/drivers/gpu/drm/i915/display/intel_dsi_dcs_backlight.c
@@ -23,11 +23,13 @@
  * Author: Deepak M <m.deepak at intel.com>
  */
 
+#include <drm/drm_mipi_dsi.h>
+#include <video/mipi_display.h>
+
+#include "i915_drv.h"
 #include "intel_drv.h"
 #include "intel_dsi.h"
-#include "i915_drv.h"
-#include <video/mipi_display.h>
-#include <drm/drm_mipi_dsi.h>
+#include "intel_dsi_dcs_backlight.h"
 
 #define CONTROL_DISPLAY_BCTRL		(1 << 5)
 #define CONTROL_DISPLAY_DD		(1 << 3)
diff --git a/drivers/gpu/drm/i915/display/intel_dsi_dcs_backlight.h b/drivers/gpu/drm/i915/display/intel_dsi_dcs_backlight.h
new file mode 100644
index 000000000000..eb01947843bf
--- /dev/null
+++ b/drivers/gpu/drm/i915/display/intel_dsi_dcs_backlight.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef __INTEL_DSI_DCS_BACKLIGHT_H__
+#define __INTEL_DSI_DCS_BACKLIGHT_H__
+
+struct intel_connector;
+
+int intel_dsi_dcs_init_backlight_funcs(struct intel_connector *intel_connector);
+
+#endif /* __INTEL_DSI_DCS_BACKLIGHT_H__ */
diff --git a/drivers/gpu/drm/i915/intel_dsi_vbt.c b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c
index 3074448446bc..e5b178660408 100644
--- a/drivers/gpu/drm/i915/intel_dsi_vbt.c
+++ b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c
@@ -24,30 +24,28 @@
  *
  */
 
-#include <drm/drm_crtc.h>
-#include <drm/drm_edid.h>
-#include <drm/i915_drm.h>
 #include <linux/gpio/consumer.h>
 #include <linux/mfd/intel_soc_pmic.h>
 #include <linux/slab.h>
-#include <video/mipi_display.h>
+
 #include <asm/intel-mid.h>
 #include <asm/unaligned.h>
+
+#include <drm/drm_crtc.h>
+#include <drm/drm_edid.h>
+#include <drm/i915_drm.h>
+
+#include <video/mipi_display.h>
+
 #include "i915_drv.h"
 #include "intel_drv.h"
 #include "intel_dsi.h"
+#include "intel_sideband.h"
 
 #define MIPI_TRANSFER_MODE_SHIFT	0
 #define MIPI_VIRTUAL_CHANNEL_SHIFT	1
 #define MIPI_PORT_SHIFT			3
 
-#define PREPARE_CNT_MAX		0x3F
-#define EXIT_ZERO_CNT_MAX	0x3F
-#define CLK_ZERO_CNT_MAX	0xFF
-#define TRAIL_CNT_MAX		0x1F
-
-#define NS_KHZ_RATIO 1000000
-
 /* base offsets for gpio pads */
 #define VLV_GPIO_NC_0_HV_DDI0_HPD	0x4130
 #define VLV_GPIO_NC_1_HV_DDI0_DDC_SDA	0x4120
@@ -248,7 +246,7 @@ static void vlv_exec_gpio(struct drm_i915_private *dev_priv,
 	pconf0 = VLV_GPIO_PCONF0(map->base_offset);
 	padval = VLV_GPIO_PAD_VAL(map->base_offset);
 
-	mutex_lock(&dev_priv->sb_lock);
+	vlv_iosf_sb_get(dev_priv, BIT(VLV_IOSF_SB_GPIO));
 	if (!map->init) {
 		/* FIXME: remove constant below */
 		vlv_iosf_sb_write(dev_priv, port, pconf0, 0x2000CC00);
@@ -257,7 +255,7 @@ static void vlv_exec_gpio(struct drm_i915_private *dev_priv,
 
 	tmp = 0x4 | value;
 	vlv_iosf_sb_write(dev_priv, port, padval, tmp);
-	mutex_unlock(&dev_priv->sb_lock);
+	vlv_iosf_sb_put(dev_priv, BIT(VLV_IOSF_SB_GPIO));
 }
 
 static void chv_exec_gpio(struct drm_i915_private *dev_priv,
@@ -303,12 +301,12 @@ static void chv_exec_gpio(struct drm_i915_private *dev_priv,
 	cfg0 = CHV_GPIO_PAD_CFG0(family_num, gpio_index);
 	cfg1 = CHV_GPIO_PAD_CFG1(family_num, gpio_index);
 
-	mutex_lock(&dev_priv->sb_lock);
+	vlv_iosf_sb_get(dev_priv, BIT(VLV_IOSF_SB_GPIO));
 	vlv_iosf_sb_write(dev_priv, port, cfg1, 0);
 	vlv_iosf_sb_write(dev_priv, port, cfg0,
 			  CHV_GPIO_GPIOEN | CHV_GPIO_GPIOCFG_GPO |
 			  CHV_GPIO_GPIOTXSTATE(value));
-	mutex_unlock(&dev_priv->sb_lock);
+	vlv_iosf_sb_put(dev_priv, BIT(VLV_IOSF_SB_GPIO));
 }
 
 static void bxt_exec_gpio(struct drm_i915_private *dev_priv,
@@ -532,268 +530,42 @@ void intel_dsi_msleep(struct intel_dsi *intel_dsi, int msec)
 	msleep(msec);
 }
 
-#define ICL_PREPARE_CNT_MAX	0x7
-#define ICL_CLK_ZERO_CNT_MAX	0xf
-#define ICL_TRAIL_CNT_MAX	0x7
-#define ICL_TCLK_PRE_CNT_MAX	0x3
-#define ICL_TCLK_POST_CNT_MAX	0x7
-#define ICL_HS_ZERO_CNT_MAX	0xf
-#define ICL_EXIT_ZERO_CNT_MAX	0x7
-
-static void icl_dphy_param_init(struct intel_dsi *intel_dsi)
+void intel_dsi_log_params(struct intel_dsi *intel_dsi)
 {
-	struct drm_device *dev = intel_dsi->base.base.dev;
-	struct drm_i915_private *dev_priv = to_i915(dev);
-	struct mipi_config *mipi_config = dev_priv->vbt.dsi.config;
-	u32 tlpx_ns;
-	u32 prepare_cnt, exit_zero_cnt, clk_zero_cnt, trail_cnt;
-	u32 ths_prepare_ns, tclk_trail_ns;
-	u32 hs_zero_cnt;
-	u32 tclk_pre_cnt, tclk_post_cnt;
-
-	tlpx_ns = intel_dsi_tlpx_ns(intel_dsi);
-
-	tclk_trail_ns = max(mipi_config->tclk_trail, mipi_config->ths_trail);
-	ths_prepare_ns = max(mipi_config->ths_prepare,
-			     mipi_config->tclk_prepare);
-
-	/*
-	 * prepare cnt in escape clocks
-	 * this field represents a hexadecimal value with a precision
-	 * of 1.2 – i.e. the most significant bit is the integer
-	 * and the least significant 2 bits are fraction bits.
-	 * so, the field can represent a range of 0.25 to 1.75
-	 */
-	prepare_cnt = DIV_ROUND_UP(ths_prepare_ns * 4, tlpx_ns);
-	if (prepare_cnt > ICL_PREPARE_CNT_MAX) {
-		DRM_DEBUG_KMS("prepare_cnt out of range (%d)\n", prepare_cnt);
-		prepare_cnt = ICL_PREPARE_CNT_MAX;
-	}
-
-	/* clk zero count in escape clocks */
-	clk_zero_cnt = DIV_ROUND_UP(mipi_config->tclk_prepare_clkzero -
-				    ths_prepare_ns, tlpx_ns);
-	if (clk_zero_cnt > ICL_CLK_ZERO_CNT_MAX) {
-		DRM_DEBUG_KMS("clk_zero_cnt out of range (%d)\n", clk_zero_cnt);
-		clk_zero_cnt = ICL_CLK_ZERO_CNT_MAX;
-	}
-
-	/* trail cnt in escape clocks*/
-	trail_cnt = DIV_ROUND_UP(tclk_trail_ns, tlpx_ns);
-	if (trail_cnt > ICL_TRAIL_CNT_MAX) {
-		DRM_DEBUG_KMS("trail_cnt out of range (%d)\n", trail_cnt);
-		trail_cnt = ICL_TRAIL_CNT_MAX;
-	}
-
-	/* tclk pre count in escape clocks */
-	tclk_pre_cnt = DIV_ROUND_UP(mipi_config->tclk_pre, tlpx_ns);
-	if (tclk_pre_cnt > ICL_TCLK_PRE_CNT_MAX) {
-		DRM_DEBUG_KMS("tclk_pre_cnt out of range (%d)\n", tclk_pre_cnt);
-		tclk_pre_cnt = ICL_TCLK_PRE_CNT_MAX;
-	}
-
-	/* tclk post count in escape clocks */
-	tclk_post_cnt = DIV_ROUND_UP(mipi_config->tclk_post, tlpx_ns);
-	if (tclk_post_cnt > ICL_TCLK_POST_CNT_MAX) {
-		DRM_DEBUG_KMS("tclk_post_cnt out of range (%d)\n", tclk_post_cnt);
-		tclk_post_cnt = ICL_TCLK_POST_CNT_MAX;
-	}
-
-	/* hs zero cnt in escape clocks */
-	hs_zero_cnt = DIV_ROUND_UP(mipi_config->ths_prepare_hszero -
-				   ths_prepare_ns, tlpx_ns);
-	if (hs_zero_cnt > ICL_HS_ZERO_CNT_MAX) {
-		DRM_DEBUG_KMS("hs_zero_cnt out of range (%d)\n", hs_zero_cnt);
-		hs_zero_cnt = ICL_HS_ZERO_CNT_MAX;
-	}
-
-	/* hs exit zero cnt in escape clocks */
-	exit_zero_cnt = DIV_ROUND_UP(mipi_config->ths_exit, tlpx_ns);
-	if (exit_zero_cnt > ICL_EXIT_ZERO_CNT_MAX) {
-		DRM_DEBUG_KMS("exit_zero_cnt out of range (%d)\n", exit_zero_cnt);
-		exit_zero_cnt = ICL_EXIT_ZERO_CNT_MAX;
-	}
-
-	/* clock lane dphy timings */
-	intel_dsi->dphy_reg = (CLK_PREPARE_OVERRIDE |
-			       CLK_PREPARE(prepare_cnt) |
-			       CLK_ZERO_OVERRIDE |
-			       CLK_ZERO(clk_zero_cnt) |
-			       CLK_PRE_OVERRIDE |
-			       CLK_PRE(tclk_pre_cnt) |
-			       CLK_POST_OVERRIDE |
-			       CLK_POST(tclk_post_cnt) |
-			       CLK_TRAIL_OVERRIDE |
-			       CLK_TRAIL(trail_cnt));
-
-	/* data lanes dphy timings */
-	intel_dsi->dphy_data_lane_reg = (HS_PREPARE_OVERRIDE |
-					 HS_PREPARE(prepare_cnt) |
-					 HS_ZERO_OVERRIDE |
-					 HS_ZERO(hs_zero_cnt) |
-					 HS_TRAIL_OVERRIDE |
-					 HS_TRAIL(trail_cnt) |
-					 HS_EXIT_OVERRIDE |
-					 HS_EXIT(exit_zero_cnt));
-}
-
-static void vlv_dphy_param_init(struct intel_dsi *intel_dsi)
-{
-	struct drm_device *dev = intel_dsi->base.base.dev;
-	struct drm_i915_private *dev_priv = to_i915(dev);
-	struct mipi_config *mipi_config = dev_priv->vbt.dsi.config;
-	u32 tlpx_ns, extra_byte_count, tlpx_ui;
-	u32 ui_num, ui_den;
-	u32 prepare_cnt, exit_zero_cnt, clk_zero_cnt, trail_cnt;
-	u32 ths_prepare_ns, tclk_trail_ns;
-	u32 tclk_prepare_clkzero, ths_prepare_hszero;
-	u32 lp_to_hs_switch, hs_to_lp_switch;
-	u32 mul;
-
-	tlpx_ns = intel_dsi_tlpx_ns(intel_dsi);
-
-	switch (intel_dsi->lane_count) {
-	case 1:
-	case 2:
-		extra_byte_count = 2;
-		break;
-	case 3:
-		extra_byte_count = 4;
-		break;
-	case 4:
-	default:
-		extra_byte_count = 3;
-		break;
-	}
-
-	/* in Kbps */
-	ui_num = NS_KHZ_RATIO;
-	ui_den = intel_dsi_bitrate(intel_dsi);
-
-	tclk_prepare_clkzero = mipi_config->tclk_prepare_clkzero;
-	ths_prepare_hszero = mipi_config->ths_prepare_hszero;
-
-	/*
-	 * B060
-	 * LP byte clock = TLPX/ (8UI)
-	 */
-	intel_dsi->lp_byte_clk = DIV_ROUND_UP(tlpx_ns * ui_den, 8 * ui_num);
-
-	/* DDR clock period = 2 * UI
-	 * UI(sec) = 1/(bitrate * 10^3) (bitrate is in KHZ)
-	 * UI(nsec) = 10^6 / bitrate
-	 * DDR clock period (nsec) = 2 * UI = (2 * 10^6)/ bitrate
-	 * DDR clock count  = ns_value / DDR clock period
-	 *
-	 * For GEMINILAKE dphy_param_reg will be programmed in terms of
-	 * HS byte clock count for other platform in HS ddr clock count
-	 */
-	mul = IS_GEMINILAKE(dev_priv) ? 8 : 2;
-	ths_prepare_ns = max(mipi_config->ths_prepare,
-			     mipi_config->tclk_prepare);
-
-	/* prepare count */
-	prepare_cnt = DIV_ROUND_UP(ths_prepare_ns * ui_den, ui_num * mul);
-
-	if (prepare_cnt > PREPARE_CNT_MAX) {
-		DRM_DEBUG_KMS("prepare count too high %u\n", prepare_cnt);
-		prepare_cnt = PREPARE_CNT_MAX;
-	}
-
-	/* exit zero count */
-	exit_zero_cnt = DIV_ROUND_UP(
-				(ths_prepare_hszero - ths_prepare_ns) * ui_den,
-				ui_num * mul
-				);
-
-	/*
-	 * Exit zero is unified val ths_zero and ths_exit
-	 * minimum value for ths_exit = 110ns
-	 * min (exit_zero_cnt * 2) = 110/UI
-	 * exit_zero_cnt = 55/UI
-	 */
-	if (exit_zero_cnt < (55 * ui_den / ui_num) && (55 * ui_den) % ui_num)
-		exit_zero_cnt += 1;
-
-	if (exit_zero_cnt > EXIT_ZERO_CNT_MAX) {
-		DRM_DEBUG_KMS("exit zero count too high %u\n", exit_zero_cnt);
-		exit_zero_cnt = EXIT_ZERO_CNT_MAX;
-	}
-
-	/* clk zero count */
-	clk_zero_cnt = DIV_ROUND_UP(
-				(tclk_prepare_clkzero -	ths_prepare_ns)
-				* ui_den, ui_num * mul);
-
-	if (clk_zero_cnt > CLK_ZERO_CNT_MAX) {
-		DRM_DEBUG_KMS("clock zero count too high %u\n", clk_zero_cnt);
-		clk_zero_cnt = CLK_ZERO_CNT_MAX;
-	}
-
-	/* trail count */
-	tclk_trail_ns = max(mipi_config->tclk_trail, mipi_config->ths_trail);
-	trail_cnt = DIV_ROUND_UP(tclk_trail_ns * ui_den, ui_num * mul);
-
-	if (trail_cnt > TRAIL_CNT_MAX) {
-		DRM_DEBUG_KMS("trail count too high %u\n", trail_cnt);
-		trail_cnt = TRAIL_CNT_MAX;
-	}
-
-	/* B080 */
-	intel_dsi->dphy_reg = exit_zero_cnt << 24 | trail_cnt << 16 |
-						clk_zero_cnt << 8 | prepare_cnt;
-
-	/*
-	 * LP to HS switch count = 4TLPX + PREP_COUNT * mul + EXIT_ZERO_COUNT *
-	 *					mul + 10UI + Extra Byte Count
-	 *
-	 * HS to LP switch count = THS-TRAIL + 2TLPX + Extra Byte Count
-	 * Extra Byte Count is calculated according to number of lanes.
-	 * High Low Switch Count is the Max of LP to HS and
-	 * HS to LP switch count
-	 *
-	 */
-	tlpx_ui = DIV_ROUND_UP(tlpx_ns * ui_den, ui_num);
-
-	/* B044 */
-	/* FIXME:
-	 * The comment above does not match with the code */
-	lp_to_hs_switch = DIV_ROUND_UP(4 * tlpx_ui + prepare_cnt * mul +
-						exit_zero_cnt * mul + 10, 8);
-
-	hs_to_lp_switch = DIV_ROUND_UP(mipi_config->ths_trail + 2 * tlpx_ui, 8);
-
-	intel_dsi->hs_to_lp_count = max(lp_to_hs_switch, hs_to_lp_switch);
-	intel_dsi->hs_to_lp_count += extra_byte_count;
-
-	/* B088 */
-	/* LP -> HS for clock lanes
-	 * LP clk sync + LP11 + LP01 + tclk_prepare + tclk_zero +
-	 *						extra byte count
-	 * 2TPLX + 1TLPX + 1 TPLX(in ns) + prepare_cnt * 2 + clk_zero_cnt *
-	 *					2(in UI) + extra byte count
-	 * In byteclks = (4TLPX + prepare_cnt * 2 + clk_zero_cnt *2 (in UI)) /
-	 *					8 + extra byte count
-	 */
-	intel_dsi->clk_lp_to_hs_count =
-		DIV_ROUND_UP(
-			4 * tlpx_ui + prepare_cnt * 2 +
-			clk_zero_cnt * 2,
-			8);
-
-	intel_dsi->clk_lp_to_hs_count += extra_byte_count;
-
-	/* HS->LP for Clock Lanes
-	 * Low Power clock synchronisations + 1Tx byteclk + tclk_trail +
-	 *						Extra byte count
-	 * 2TLPX + 8UI + (trail_count*2)(in UI) + Extra byte count
-	 * In byteclks = (2*TLpx(in UI) + trail_count*2 +8)(in UI)/8 +
-	 *						Extra byte count
-	 */
-	intel_dsi->clk_hs_to_lp_count =
-		DIV_ROUND_UP(2 * tlpx_ui + trail_cnt * 2 + 8,
-			8);
-	intel_dsi->clk_hs_to_lp_count += extra_byte_count;
+	DRM_DEBUG_KMS("Pclk %d\n", intel_dsi->pclk);
+	DRM_DEBUG_KMS("Pixel overlap %d\n", intel_dsi->pixel_overlap);
+	DRM_DEBUG_KMS("Lane count %d\n", intel_dsi->lane_count);
+	DRM_DEBUG_KMS("DPHY param reg 0x%x\n", intel_dsi->dphy_reg);
+	DRM_DEBUG_KMS("Video mode format %s\n",
+		      intel_dsi->video_mode_format == VIDEO_MODE_NON_BURST_WITH_SYNC_PULSE ?
+		      "non-burst with sync pulse" :
+		      intel_dsi->video_mode_format == VIDEO_MODE_NON_BURST_WITH_SYNC_EVENTS ?
+		      "non-burst with sync events" :
+		      intel_dsi->video_mode_format == VIDEO_MODE_BURST ?
+		      "burst" : "<unknown>");
+	DRM_DEBUG_KMS("Burst mode ratio %d\n", intel_dsi->burst_mode_ratio);
+	DRM_DEBUG_KMS("Reset timer %d\n", intel_dsi->rst_timer_val);
+	DRM_DEBUG_KMS("Eot %s\n", enableddisabled(intel_dsi->eotp_pkt));
+	DRM_DEBUG_KMS("Clockstop %s\n", enableddisabled(!intel_dsi->clock_stop));
+	DRM_DEBUG_KMS("Mode %s\n", intel_dsi->operation_mode ? "command" : "video");
+	if (intel_dsi->dual_link == DSI_DUAL_LINK_FRONT_BACK)
+		DRM_DEBUG_KMS("Dual link: DSI_DUAL_LINK_FRONT_BACK\n");
+	else if (intel_dsi->dual_link == DSI_DUAL_LINK_PIXEL_ALT)
+		DRM_DEBUG_KMS("Dual link: DSI_DUAL_LINK_PIXEL_ALT\n");
+	else
+		DRM_DEBUG_KMS("Dual link: NONE\n");
+	DRM_DEBUG_KMS("Pixel Format %d\n", intel_dsi->pixel_format);
+	DRM_DEBUG_KMS("TLPX %d\n", intel_dsi->escape_clk_div);
+	DRM_DEBUG_KMS("LP RX Timeout 0x%x\n", intel_dsi->lp_rx_timeout);
+	DRM_DEBUG_KMS("Turnaround Timeout 0x%x\n", intel_dsi->turn_arnd_val);
+	DRM_DEBUG_KMS("Init Count 0x%x\n", intel_dsi->init_count);
+	DRM_DEBUG_KMS("HS to LP Count 0x%x\n", intel_dsi->hs_to_lp_count);
+	DRM_DEBUG_KMS("LP Byte Clock %d\n", intel_dsi->lp_byte_clk);
+	DRM_DEBUG_KMS("DBI BW Timer 0x%x\n", intel_dsi->bw_timer);
+	DRM_DEBUG_KMS("LP to HS Clock Count 0x%x\n", intel_dsi->clk_lp_to_hs_count);
+	DRM_DEBUG_KMS("HS to LP Clock Count 0x%x\n", intel_dsi->clk_hs_to_lp_count);
+	DRM_DEBUG_KMS("BTA %s\n",
+			enableddisabled(!(intel_dsi->video_frmt_cfg_bits & DISABLE_VIDEO_BTA)));
 }
 
 bool intel_dsi_vbt_init(struct intel_dsi *intel_dsi, u16 panel_id)
@@ -853,6 +625,17 @@ bool intel_dsi_vbt_init(struct intel_dsi *intel_dsi, u16 panel_id)
 		if (mipi_config->target_burst_mode_freq) {
 			u32 bitrate = intel_dsi_bitrate(intel_dsi);
 
+			/*
+			 * Sometimes the VBT contains a slightly lower clock,
+			 * then the bitrate we have calculated, in this case
+			 * just replace it with the calculated bitrate.
+			 */
+			if (mipi_config->target_burst_mode_freq < bitrate &&
+			    intel_fuzzy_clock_check(
+					mipi_config->target_burst_mode_freq,
+					bitrate))
+				mipi_config->target_burst_mode_freq = bitrate;
+
 			if (mipi_config->target_burst_mode_freq < bitrate) {
 				DRM_ERROR("Burst mode freq is less than computed\n");
 				return false;
@@ -872,46 +655,6 @@ bool intel_dsi_vbt_init(struct intel_dsi *intel_dsi, u16 panel_id)
 
 	intel_dsi->burst_mode_ratio = burst_mode_ratio;
 
-	if (INTEL_GEN(dev_priv) >= 11)
-		icl_dphy_param_init(intel_dsi);
-	else
-		vlv_dphy_param_init(intel_dsi);
-
-	DRM_DEBUG_KMS("Pclk %d\n", intel_dsi->pclk);
-	DRM_DEBUG_KMS("Pixel overlap %d\n", intel_dsi->pixel_overlap);
-	DRM_DEBUG_KMS("Lane count %d\n", intel_dsi->lane_count);
-	DRM_DEBUG_KMS("DPHY param reg 0x%x\n", intel_dsi->dphy_reg);
-	DRM_DEBUG_KMS("Video mode format %s\n",
-		      intel_dsi->video_mode_format == VIDEO_MODE_NON_BURST_WITH_SYNC_PULSE ?
-		      "non-burst with sync pulse" :
-		      intel_dsi->video_mode_format == VIDEO_MODE_NON_BURST_WITH_SYNC_EVENTS ?
-		      "non-burst with sync events" :
-		      intel_dsi->video_mode_format == VIDEO_MODE_BURST ?
-		      "burst" : "<unknown>");
-	DRM_DEBUG_KMS("Burst mode ratio %d\n", intel_dsi->burst_mode_ratio);
-	DRM_DEBUG_KMS("Reset timer %d\n", intel_dsi->rst_timer_val);
-	DRM_DEBUG_KMS("Eot %s\n", enableddisabled(intel_dsi->eotp_pkt));
-	DRM_DEBUG_KMS("Clockstop %s\n", enableddisabled(!intel_dsi->clock_stop));
-	DRM_DEBUG_KMS("Mode %s\n", intel_dsi->operation_mode ? "command" : "video");
-	if (intel_dsi->dual_link == DSI_DUAL_LINK_FRONT_BACK)
-		DRM_DEBUG_KMS("Dual link: DSI_DUAL_LINK_FRONT_BACK\n");
-	else if (intel_dsi->dual_link == DSI_DUAL_LINK_PIXEL_ALT)
-		DRM_DEBUG_KMS("Dual link: DSI_DUAL_LINK_PIXEL_ALT\n");
-	else
-		DRM_DEBUG_KMS("Dual link: NONE\n");
-	DRM_DEBUG_KMS("Pixel Format %d\n", intel_dsi->pixel_format);
-	DRM_DEBUG_KMS("TLPX %d\n", intel_dsi->escape_clk_div);
-	DRM_DEBUG_KMS("LP RX Timeout 0x%x\n", intel_dsi->lp_rx_timeout);
-	DRM_DEBUG_KMS("Turnaround Timeout 0x%x\n", intel_dsi->turn_arnd_val);
-	DRM_DEBUG_KMS("Init Count 0x%x\n", intel_dsi->init_count);
-	DRM_DEBUG_KMS("HS to LP Count 0x%x\n", intel_dsi->hs_to_lp_count);
-	DRM_DEBUG_KMS("LP Byte Clock %d\n", intel_dsi->lp_byte_clk);
-	DRM_DEBUG_KMS("DBI BW Timer 0x%x\n", intel_dsi->bw_timer);
-	DRM_DEBUG_KMS("LP to HS Clock Count 0x%x\n", intel_dsi->clk_lp_to_hs_count);
-	DRM_DEBUG_KMS("HS to LP Clock Count 0x%x\n", intel_dsi->clk_hs_to_lp_count);
-	DRM_DEBUG_KMS("BTA %s\n",
-			enableddisabled(!(intel_dsi->video_frmt_cfg_bits & DISABLE_VIDEO_BTA)));
-
 	/* delays in VBT are in unit of 100us, so need to convert
 	 * here in ms
 	 * Delay (100us) * 100 /1000 = Delay / 10 (ms) */
diff --git a/drivers/gpu/drm/i915/intel_dvo.c b/drivers/gpu/drm/i915/display/intel_dvo.c
index adef81c8cccb..22666d28f4aa 100644
--- a/drivers/gpu/drm/i915/intel_dvo.c
+++ b/drivers/gpu/drm/i915/display/intel_dvo.c
@@ -32,13 +32,19 @@
 #include <drm/drm_crtc.h>
 #include <drm/i915_drm.h>
 
-#include "dvo.h"
 #include "i915_drv.h"
 #include "intel_connector.h"
 #include "intel_drv.h"
 #include "intel_dvo.h"
+#include "intel_dvo_dev.h"
+#include "intel_gmbus.h"
 #include "intel_panel.h"
 
+#define INTEL_DVO_CHIP_NONE	0
+#define INTEL_DVO_CHIP_LVDS	1
+#define INTEL_DVO_CHIP_TMDS	2
+#define INTEL_DVO_CHIP_TVOUT	4
+
 #define SIL164_ADDR	0x38
 #define CH7xxx_ADDR	0x76
 #define TFP410_ADDR	0x38
diff --git a/drivers/gpu/drm/i915/intel_dvo.h b/drivers/gpu/drm/i915/display/intel_dvo.h
index 3ed0fdf8efff..3ed0fdf8efff 100644
--- a/drivers/gpu/drm/i915/intel_dvo.h
+++ b/drivers/gpu/drm/i915/display/intel_dvo.h
diff --git a/drivers/gpu/drm/i915/dvo.h b/drivers/gpu/drm/i915/display/intel_dvo_dev.h
index 16e0345b711f..94a6ae1e0292 100644
--- a/drivers/gpu/drm/i915/dvo.h
+++ b/drivers/gpu/drm/i915/display/intel_dvo_dev.h
@@ -20,12 +20,14 @@
  * OF THIS SOFTWARE.
  */
 
-#ifndef _INTEL_DVO_H
-#define _INTEL_DVO_H
+#ifndef __INTEL_DVO_DEV_H__
+#define __INTEL_DVO_DEV_H__
 
 #include <linux/i2c.h>
+
 #include <drm/drm_crtc.h>
-#include "intel_drv.h"
+
+#include "i915_reg.h"
 
 struct intel_dvo_device {
 	const char *name;
@@ -135,4 +137,4 @@ extern const struct intel_dvo_dev_ops tfp410_ops;
 extern const struct intel_dvo_dev_ops ch7017_ops;
 extern const struct intel_dvo_dev_ops ns2501_ops;
 
-#endif /* _INTEL_DVO_H */
+#endif /* __INTEL_DVO_DEV_H__ */
diff --git a/drivers/gpu/drm/i915/intel_fbc.c b/drivers/gpu/drm/i915/display/intel_fbc.c
index 5679f2fffb7c..d36cada2cc7d 100644
--- a/drivers/gpu/drm/i915/intel_fbc.c
+++ b/drivers/gpu/drm/i915/display/intel_fbc.c
@@ -344,6 +344,10 @@ static void gen7_fbc_activate(struct drm_i915_private *dev_priv)
 			   HSW_FBCQ_DIS);
 	}
 
+	if (IS_GEN(dev_priv, 11))
+		/* Wa_1409120013:icl,ehl */
+		I915_WRITE(ILK_DPFC_CHICKEN, ILK_DPFC_CHICKEN_COMP_DUMMY_PIXEL);
+
 	I915_WRITE(ILK_DPFC_CONTROL, dpfc_ctl | DPFC_CTL_EN);
 
 	intel_fbc_recompress(dev_priv);
diff --git a/drivers/gpu/drm/i915/intel_fbc.h b/drivers/gpu/drm/i915/display/intel_fbc.h
index 50272eda8d43..50272eda8d43 100644
--- a/drivers/gpu/drm/i915/intel_fbc.h
+++ b/drivers/gpu/drm/i915/display/intel_fbc.h
diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/display/intel_fbdev.c
index 89db71996148..1edd44ee32b2 100644
--- a/drivers/gpu/drm/i915/intel_fbdev.c
+++ b/drivers/gpu/drm/i915/display/intel_fbdev.c
@@ -144,7 +144,7 @@ static int intelfb_alloc(struct drm_fb_helper *helper,
 	if (size * 2 < dev_priv->stolen_usable_size)
 		obj = i915_gem_object_create_stolen(dev_priv, size);
 	if (obj == NULL)
-		obj = i915_gem_object_create(dev_priv, size);
+		obj = i915_gem_object_create_shmem(dev_priv, size);
 	if (IS_ERR(obj)) {
 		DRM_ERROR("failed to allocate framebuffer\n");
 		ret = PTR_ERR(obj);
@@ -213,7 +213,7 @@ static int intelfb_create(struct drm_fb_helper *helper,
 	}
 
 	mutex_lock(&dev->struct_mutex);
-	wakeref = intel_runtime_pm_get(dev_priv);
+	wakeref = intel_runtime_pm_get(&dev_priv->runtime_pm);
 
 	/* Pin the GGTT vma for our access via info->screen_base.
 	 * This also validates that any existing fb inherited from the
@@ -272,7 +272,7 @@ static int intelfb_create(struct drm_fb_helper *helper,
 	ifbdev->vma = vma;
 	ifbdev->vma_flags = flags;
 
-	intel_runtime_pm_put(dev_priv, wakeref);
+	intel_runtime_pm_put(&dev_priv->runtime_pm, wakeref);
 	mutex_unlock(&dev->struct_mutex);
 	vga_switcheroo_client_fb_set(pdev, info);
 	return 0;
@@ -280,7 +280,7 @@ static int intelfb_create(struct drm_fb_helper *helper,
 out_unpin:
 	intel_unpin_fb_vma(vma, flags);
 out_unlock:
-	intel_runtime_pm_put(dev_priv, wakeref);
+	intel_runtime_pm_put(&dev_priv->runtime_pm, wakeref);
 	mutex_unlock(&dev->struct_mutex);
 	return ret;
 }
diff --git a/drivers/gpu/drm/i915/intel_fbdev.h b/drivers/gpu/drm/i915/display/intel_fbdev.h
index de7c84250eb5..de7c84250eb5 100644
--- a/drivers/gpu/drm/i915/intel_fbdev.h
+++ b/drivers/gpu/drm/i915/display/intel_fbdev.h
diff --git a/drivers/gpu/drm/i915/intel_fifo_underrun.c b/drivers/gpu/drm/i915/display/intel_fifo_underrun.c
index 74c8b0528294..8545ad32bb50 100644
--- a/drivers/gpu/drm/i915/intel_fifo_underrun.c
+++ b/drivers/gpu/drm/i915/display/intel_fifo_underrun.c
@@ -28,6 +28,7 @@
 #include "i915_drv.h"
 #include "intel_drv.h"
 #include "intel_fbc.h"
+#include "intel_fifo_underrun.h"
 
 /**
  * DOC: fifo underrun handling
diff --git a/drivers/gpu/drm/i915/display/intel_fifo_underrun.h b/drivers/gpu/drm/i915/display/intel_fifo_underrun.h
new file mode 100644
index 000000000000..e04f22ac1f49
--- /dev/null
+++ b/drivers/gpu/drm/i915/display/intel_fifo_underrun.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef __INTEL_FIFO_UNDERRUN_H__
+#define __INTEL_FIFO_UNDERRUN_H__
+
+#include <linux/types.h>
+
+#include "intel_display.h"
+
+struct drm_i915_private;
+
+bool intel_set_cpu_fifo_underrun_reporting(struct drm_i915_private *dev_priv,
+					   enum pipe pipe, bool enable);
+bool intel_set_pch_fifo_underrun_reporting(struct drm_i915_private *dev_priv,
+					   enum pipe pch_transcoder,
+					   bool enable);
+void intel_cpu_fifo_underrun_irq_handler(struct drm_i915_private *dev_priv,
+					 enum pipe pipe);
+void intel_pch_fifo_underrun_irq_handler(struct drm_i915_private *dev_priv,
+					 enum pipe pch_transcoder);
+void intel_check_cpu_fifo_underruns(struct drm_i915_private *dev_priv);
+void intel_check_pch_fifo_underruns(struct drm_i915_private *dev_priv);
+
+#endif /* __INTEL_FIFO_UNDERRUN_H__ */
diff --git a/drivers/gpu/drm/i915/intel_frontbuffer.c b/drivers/gpu/drm/i915/display/intel_frontbuffer.c
index aa34e33b6087..44273c10cea5 100644
--- a/drivers/gpu/drm/i915/intel_frontbuffer.c
+++ b/drivers/gpu/drm/i915/display/intel_frontbuffer.c
@@ -53,16 +53,11 @@
  * busyness. There is no direct way to detect idleness. Instead an idle timer
  * work delayed work should be started from the flush and flip functions and
  * cancelled as soon as busyness is detected.
- *
- * Note that there's also an older frontbuffer activity tracking scheme which
- * just tracks general activity. This is done by the various mark_busy and
- * mark_idle functions. For display power management features using these
- * functions is deprecated and should be avoided.
  */
 
+#include "display/intel_dp.h"
 
 #include "i915_drv.h"
-#include "intel_dp.h"
 #include "intel_drv.h"
 #include "intel_fbc.h"
 #include "intel_frontbuffer.h"
diff --git a/drivers/gpu/drm/i915/intel_frontbuffer.h b/drivers/gpu/drm/i915/display/intel_frontbuffer.h
index d5894666f658..5727320c8084 100644
--- a/drivers/gpu/drm/i915/intel_frontbuffer.h
+++ b/drivers/gpu/drm/i915/display/intel_frontbuffer.h
@@ -24,7 +24,7 @@
 #ifndef __INTEL_FRONTBUFFER_H__
 #define __INTEL_FRONTBUFFER_H__
 
-#include "i915_gem_object.h"
+#include "gem/i915_gem_object.h"
 
 struct drm_i915_private;
 struct drm_i915_gem_object;
diff --git a/drivers/gpu/drm/i915/intel_i2c.c b/drivers/gpu/drm/i915/display/intel_gmbus.c
index 422685d120e9..4f6a9bd5af47 100644
--- a/drivers/gpu/drm/i915/intel_i2c.c
+++ b/drivers/gpu/drm/i915/display/intel_gmbus.c
@@ -26,13 +26,17 @@
  *	Eric Anholt <eric@anholt.net>
  *	Chris Wilson <chris@chris-wilson.co.uk>
  */
-#include <linux/i2c.h>
-#include <linux/i2c-algo-bit.h>
+
 #include <linux/export.h>
+#include <linux/i2c-algo-bit.h>
+#include <linux/i2c.h>
+
 #include <drm/drm_hdcp.h>
-#include "intel_drv.h"
 #include <drm/i915_drm.h>
+
 #include "i915_drv.h"
+#include "intel_drv.h"
+#include "intel_gmbus.h"
 
 struct gmbus_pin {
 	const char *name;
@@ -84,11 +88,19 @@ static const struct gmbus_pin gmbus_pins_icp[] = {
 	[GMBUS_PIN_12_TC4_ICP] = { "tc4", GPIOM },
 };
 
+static const struct gmbus_pin gmbus_pins_mcc[] = {
+	[GMBUS_PIN_1_BXT] = { "dpa", GPIOB },
+	[GMBUS_PIN_2_BXT] = { "dpb", GPIOC },
+	[GMBUS_PIN_9_TC1_ICP] = { "dpc", GPIOJ },
+};
+
 /* pin is expected to be valid */
 static const struct gmbus_pin *get_gmbus_pin(struct drm_i915_private *dev_priv,
 					     unsigned int pin)
 {
-	if (HAS_PCH_ICP(dev_priv))
+	if (HAS_PCH_MCC(dev_priv))
+		return &gmbus_pins_mcc[pin];
+	else if (HAS_PCH_ICP(dev_priv))
 		return &gmbus_pins_icp[pin];
 	else if (HAS_PCH_CNP(dev_priv))
 		return &gmbus_pins_cnp[pin];
@@ -107,7 +119,9 @@ bool intel_gmbus_is_valid_pin(struct drm_i915_private *dev_priv,
 {
 	unsigned int size;
 
-	if (HAS_PCH_ICP(dev_priv))
+	if (HAS_PCH_MCC(dev_priv))
+		size = ARRAY_SIZE(gmbus_pins_mcc);
+	else if (HAS_PCH_ICP(dev_priv))
 		size = ARRAY_SIZE(gmbus_pins_icp);
 	else if (HAS_PCH_CNP(dev_priv))
 		size = ARRAY_SIZE(gmbus_pins_cnp);
@@ -134,7 +148,7 @@ to_intel_gmbus(struct i2c_adapter *i2c)
 }
 
 void
-intel_i2c_reset(struct drm_i915_private *dev_priv)
+intel_gmbus_reset(struct drm_i915_private *dev_priv)
 {
 	I915_WRITE(GMBUS0, 0);
 	I915_WRITE(GMBUS4, 0);
@@ -182,14 +196,15 @@ static void bxt_gmbus_clock_gating(struct drm_i915_private *dev_priv,
 
 static u32 get_reserved(struct intel_gmbus *bus)
 {
-	struct drm_i915_private *dev_priv = bus->dev_priv;
+	struct drm_i915_private *i915 = bus->dev_priv;
+	struct intel_uncore *uncore = &i915->uncore;
 	u32 reserved = 0;
 
 	/* On most chips, these bits must be preserved in software. */
-	if (!IS_I830(dev_priv) && !IS_I845G(dev_priv))
-		reserved = I915_READ_NOTRACE(bus->gpio_reg) &
-					     (GPIO_DATA_PULLUP_DISABLE |
-					      GPIO_CLOCK_PULLUP_DISABLE);
+	if (!IS_I830(i915) && !IS_I845G(i915))
+		reserved = intel_uncore_read_notrace(uncore, bus->gpio_reg) &
+			   (GPIO_DATA_PULLUP_DISABLE |
+			    GPIO_CLOCK_PULLUP_DISABLE);
 
 	return reserved;
 }
@@ -197,27 +212,37 @@ static u32 get_reserved(struct intel_gmbus *bus)
 static int get_clock(void *data)
 {
 	struct intel_gmbus *bus = data;
-	struct drm_i915_private *dev_priv = bus->dev_priv;
+	struct intel_uncore *uncore = &bus->dev_priv->uncore;
 	u32 reserved = get_reserved(bus);
-	I915_WRITE_NOTRACE(bus->gpio_reg, reserved | GPIO_CLOCK_DIR_MASK);
-	I915_WRITE_NOTRACE(bus->gpio_reg, reserved);
-	return (I915_READ_NOTRACE(bus->gpio_reg) & GPIO_CLOCK_VAL_IN) != 0;
+
+	intel_uncore_write_notrace(uncore,
+				   bus->gpio_reg,
+				   reserved | GPIO_CLOCK_DIR_MASK);
+	intel_uncore_write_notrace(uncore, bus->gpio_reg, reserved);
+
+	return (intel_uncore_read_notrace(uncore, bus->gpio_reg) &
+		GPIO_CLOCK_VAL_IN) != 0;
 }
 
 static int get_data(void *data)
 {
 	struct intel_gmbus *bus = data;
-	struct drm_i915_private *dev_priv = bus->dev_priv;
+	struct intel_uncore *uncore = &bus->dev_priv->uncore;
 	u32 reserved = get_reserved(bus);
-	I915_WRITE_NOTRACE(bus->gpio_reg, reserved | GPIO_DATA_DIR_MASK);
-	I915_WRITE_NOTRACE(bus->gpio_reg, reserved);
-	return (I915_READ_NOTRACE(bus->gpio_reg) & GPIO_DATA_VAL_IN) != 0;
+
+	intel_uncore_write_notrace(uncore,
+				   bus->gpio_reg,
+				   reserved | GPIO_DATA_DIR_MASK);
+	intel_uncore_write_notrace(uncore, bus->gpio_reg, reserved);
+
+	return (intel_uncore_read_notrace(uncore, bus->gpio_reg) &
+		GPIO_DATA_VAL_IN) != 0;
 }
 
 static void set_clock(void *data, int state_high)
 {
 	struct intel_gmbus *bus = data;
-	struct drm_i915_private *dev_priv = bus->dev_priv;
+	struct intel_uncore *uncore = &bus->dev_priv->uncore;
 	u32 reserved = get_reserved(bus);
 	u32 clock_bits;
 
@@ -225,16 +250,18 @@ static void set_clock(void *data, int state_high)
 		clock_bits = GPIO_CLOCK_DIR_IN | GPIO_CLOCK_DIR_MASK;
 	else
 		clock_bits = GPIO_CLOCK_DIR_OUT | GPIO_CLOCK_DIR_MASK |
-			GPIO_CLOCK_VAL_MASK;
+			     GPIO_CLOCK_VAL_MASK;
 
-	I915_WRITE_NOTRACE(bus->gpio_reg, reserved | clock_bits);
-	POSTING_READ(bus->gpio_reg);
+	intel_uncore_write_notrace(uncore,
+				   bus->gpio_reg,
+				   reserved | clock_bits);
+	intel_uncore_posting_read(uncore, bus->gpio_reg);
 }
 
 static void set_data(void *data, int state_high)
 {
 	struct intel_gmbus *bus = data;
-	struct drm_i915_private *dev_priv = bus->dev_priv;
+	struct intel_uncore *uncore = &bus->dev_priv->uncore;
 	u32 reserved = get_reserved(bus);
 	u32 data_bits;
 
@@ -244,8 +271,8 @@ static void set_data(void *data, int state_high)
 		data_bits = GPIO_DATA_DIR_OUT | GPIO_DATA_DIR_MASK |
 			GPIO_DATA_VAL_MASK;
 
-	I915_WRITE_NOTRACE(bus->gpio_reg, reserved | data_bits);
-	POSTING_READ(bus->gpio_reg);
+	intel_uncore_write_notrace(uncore, bus->gpio_reg, reserved | data_bits);
+	intel_uncore_posting_read(uncore, bus->gpio_reg);
 }
 
 static int
@@ -256,7 +283,7 @@ intel_gpio_pre_xfer(struct i2c_adapter *adapter)
 					       adapter);
 	struct drm_i915_private *dev_priv = bus->dev_priv;
 
-	intel_i2c_reset(dev_priv);
+	intel_gmbus_reset(dev_priv);
 
 	if (IS_PINEVIEW(dev_priv))
 		pnv_gmbus_clock_gating(dev_priv, false);
@@ -577,8 +604,7 @@ do_gmbus_xfer(struct i2c_adapter *adapter, struct i2c_msg *msgs, int num,
 	/* Display WA #0868: skl,bxt,kbl,cfl,glk,cnl */
 	if (IS_GEN9_LP(dev_priv))
 		bxt_gmbus_clock_gating(dev_priv, false);
-	else if (HAS_PCH_SPT(dev_priv) ||
-		 HAS_PCH_KBP(dev_priv) || HAS_PCH_CNP(dev_priv))
+	else if (HAS_PCH_SPT(dev_priv) || HAS_PCH_CNP(dev_priv))
 		pch_gmbus_clock_gating(dev_priv, false);
 
 retry:
@@ -687,8 +713,7 @@ out:
 	/* Display WA #0868: skl,bxt,kbl,cfl,glk,cnl */
 	if (IS_GEN9_LP(dev_priv))
 		bxt_gmbus_clock_gating(dev_priv, true);
-	else if (HAS_PCH_SPT(dev_priv) ||
-		 HAS_PCH_KBP(dev_priv) || HAS_PCH_CNP(dev_priv))
+	else if (HAS_PCH_SPT(dev_priv) || HAS_PCH_CNP(dev_priv))
 		pch_gmbus_clock_gating(dev_priv, true);
 
 	return ret;
@@ -811,7 +836,7 @@ static const struct i2c_lock_operations gmbus_lock_ops = {
  * intel_gmbus_setup - instantiate all Intel i2c GMBuses
  * @dev_priv: i915 device private
  */
-int intel_setup_gmbus(struct drm_i915_private *dev_priv)
+int intel_gmbus_setup(struct drm_i915_private *dev_priv)
 {
 	struct pci_dev *pdev = dev_priv->drm.pdev;
 	struct intel_gmbus *bus;
@@ -872,7 +897,7 @@ int intel_setup_gmbus(struct drm_i915_private *dev_priv)
 			goto err;
 	}
 
-	intel_i2c_reset(dev_priv);
+	intel_gmbus_reset(dev_priv);
 
 	return 0;
 
@@ -918,7 +943,14 @@ void intel_gmbus_force_bit(struct i2c_adapter *adapter, bool force_bit)
 	mutex_unlock(&dev_priv->gmbus_mutex);
 }
 
-void intel_teardown_gmbus(struct drm_i915_private *dev_priv)
+bool intel_gmbus_is_forced_bit(struct i2c_adapter *adapter)
+{
+	struct intel_gmbus *bus = to_intel_gmbus(adapter);
+
+	return bus->force_bit;
+}
+
+void intel_gmbus_teardown(struct drm_i915_private *dev_priv)
 {
 	struct intel_gmbus *bus;
 	unsigned int pin;
diff --git a/drivers/gpu/drm/i915/display/intel_gmbus.h b/drivers/gpu/drm/i915/display/intel_gmbus.h
new file mode 100644
index 000000000000..d989085b8d22
--- /dev/null
+++ b/drivers/gpu/drm/i915/display/intel_gmbus.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef __INTEL_GMBUS_H__
+#define __INTEL_GMBUS_H__
+
+#include <linux/types.h>
+
+struct drm_i915_private;
+struct i2c_adapter;
+
+int intel_gmbus_setup(struct drm_i915_private *dev_priv);
+void intel_gmbus_teardown(struct drm_i915_private *dev_priv);
+bool intel_gmbus_is_valid_pin(struct drm_i915_private *dev_priv,
+			      unsigned int pin);
+int intel_gmbus_output_aksv(struct i2c_adapter *adapter);
+
+struct i2c_adapter *
+intel_gmbus_get_adapter(struct drm_i915_private *dev_priv, unsigned int pin);
+void intel_gmbus_set_speed(struct i2c_adapter *adapter, int speed);
+void intel_gmbus_force_bit(struct i2c_adapter *adapter, bool force_bit);
+bool intel_gmbus_is_forced_bit(struct i2c_adapter *adapter);
+void intel_gmbus_reset(struct drm_i915_private *dev_priv);
+
+#endif /* __INTEL_GMBUS_H__ */
diff --git a/drivers/gpu/drm/i915/intel_hdcp.c b/drivers/gpu/drm/i915/display/intel_hdcp.c
index 99b007169c49..bc3a94d491c4 100644
--- a/drivers/gpu/drm/i915/intel_hdcp.c
+++ b/drivers/gpu/drm/i915/display/intel_hdcp.c
@@ -16,6 +16,7 @@
 #include "i915_reg.h"
 #include "intel_drv.h"
 #include "intel_hdcp.h"
+#include "intel_sideband.h"
 
 #define KEY_LOAD_TRIES	5
 #define ENCRYPT_STATUS_CHANGE_TIMEOUT_MS	50
@@ -78,7 +79,7 @@ bool intel_hdcp_capable(struct intel_connector *connector)
 }
 
 /* Is HDCP2.2 capable on Platform and Sink */
-static bool intel_hdcp2_capable(struct intel_connector *connector)
+bool intel_hdcp2_capable(struct intel_connector *connector)
 {
 	struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
 	struct intel_digital_port *intel_dig_port = conn_to_dig_port(connector);
@@ -213,10 +214,8 @@ static int intel_hdcp_load_keys(struct drm_i915_private *dev_priv)
 	 * from other platforms. So GEN9_BC uses the GT Driver Mailbox i/f.
 	 */
 	if (IS_GEN9_BC(dev_priv)) {
-		mutex_lock(&dev_priv->pcu_lock);
 		ret = sandybridge_pcode_write(dev_priv,
 					      SKL_PCODE_LOAD_HDCP_KEYS, 1);
-		mutex_unlock(&dev_priv->pcu_lock);
 		if (ret) {
 			DRM_ERROR("Failed to initiate HDCP key load (%d)\n",
 			          ret);
@@ -492,9 +491,11 @@ int intel_hdcp_validate_v_prime(struct intel_digital_port *intel_dig_port,
 
 /* Implements Part 2 of the HDCP authorization procedure */
 static
-int intel_hdcp_auth_downstream(struct intel_digital_port *intel_dig_port,
-			       const struct intel_hdcp_shim *shim)
+int intel_hdcp_auth_downstream(struct intel_connector *connector)
 {
+	struct intel_digital_port *intel_dig_port = conn_to_dig_port(connector);
+	const struct intel_hdcp_shim *shim = connector->hdcp.shim;
+	struct drm_device *dev = connector->base.dev;
 	u8 bstatus[2], num_downstream, *ksv_fifo;
 	int ret, i, tries = 3;
 
@@ -533,6 +534,11 @@ int intel_hdcp_auth_downstream(struct intel_digital_port *intel_dig_port,
 	if (ret)
 		goto err;
 
+	if (drm_hdcp_check_ksvs_revoked(dev, ksv_fifo, num_downstream)) {
+		DRM_ERROR("Revoked Ksv(s) in ksv_fifo\n");
+		return -EPERM;
+	}
+
 	/*
 	 * When V prime mismatches, DP Spec mandates re-read of
 	 * V prime atleast twice.
@@ -559,9 +565,12 @@ err:
 }
 
 /* Implements Part 1 of the HDCP authorization procedure */
-static int intel_hdcp_auth(struct intel_digital_port *intel_dig_port,
-			   const struct intel_hdcp_shim *shim)
+static int intel_hdcp_auth(struct intel_connector *connector)
 {
+	struct intel_digital_port *intel_dig_port = conn_to_dig_port(connector);
+	struct intel_hdcp *hdcp = &connector->hdcp;
+	struct drm_device *dev = connector->base.dev;
+	const struct intel_hdcp_shim *shim = hdcp->shim;
 	struct drm_i915_private *dev_priv;
 	enum port port;
 	unsigned long r0_prime_gen_start;
@@ -627,6 +636,11 @@ static int intel_hdcp_auth(struct intel_digital_port *intel_dig_port,
 	if (ret < 0)
 		return ret;
 
+	if (drm_hdcp_check_ksvs_revoked(dev, bksv.shim, 1)) {
+		DRM_ERROR("BKSV is revoked\n");
+		return -EPERM;
+	}
+
 	I915_WRITE(PORT_HDCP_BKSVLO(port), bksv.reg[0]);
 	I915_WRITE(PORT_HDCP_BKSVHI(port), bksv.reg[1]);
 
@@ -700,7 +714,7 @@ static int intel_hdcp_auth(struct intel_digital_port *intel_dig_port,
 	 */
 
 	if (repeater_present)
-		return intel_hdcp_auth_downstream(intel_dig_port, shim);
+		return intel_hdcp_auth_downstream(connector);
 
 	DRM_DEBUG_KMS("HDCP is enabled (no repeater present)\n");
 	return 0;
@@ -763,7 +777,7 @@ static int _intel_hdcp_enable(struct intel_connector *connector)
 
 	/* Incase of authentication failures, HDCP spec expects reauth. */
 	for (i = 0; i < tries; i++) {
-		ret = intel_hdcp_auth(conn_to_dig_port(connector), hdcp->shim);
+		ret = intel_hdcp_auth(connector);
 		if (!ret) {
 			hdcp->hdcp_encrypted = true;
 			return 0;
@@ -1162,6 +1176,7 @@ static int hdcp2_authentication_key_exchange(struct intel_connector *connector)
 {
 	struct intel_digital_port *intel_dig_port = conn_to_dig_port(connector);
 	struct intel_hdcp *hdcp = &connector->hdcp;
+	struct drm_device *dev = connector->base.dev;
 	union {
 		struct hdcp2_ake_init ake_init;
 		struct hdcp2_ake_send_cert send_cert;
@@ -1196,6 +1211,12 @@ static int hdcp2_authentication_key_exchange(struct intel_connector *connector)
 
 	hdcp->is_repeater = HDCP_2_2_RX_REPEATER(msgs.send_cert.rx_caps[2]);
 
+	if (drm_hdcp_check_ksvs_revoked(dev, msgs.send_cert.cert_rx.receiver_id,
+					1)) {
+		DRM_ERROR("Receiver ID is revoked\n");
+		return -EPERM;
+	}
+
 	/*
 	 * Here msgs.no_stored_km will hold msgs corresponding to the km
 	 * stored also.
@@ -1306,7 +1327,7 @@ int hdcp2_propagate_stream_management_info(struct intel_connector *connector)
 
 	/* Prepare RepeaterAuth_Stream_Manage msg */
 	msgs.stream_manage.msg_id = HDCP_2_2_REP_STREAM_MANAGE;
-	drm_hdcp2_u32_to_seq_num(msgs.stream_manage.seq_num_m, hdcp->seq_num_m);
+	drm_hdcp_cpu_to_be24(msgs.stream_manage.seq_num_m, hdcp->seq_num_m);
 
 	/* K no of streams is fixed as 1. Stored as big-endian. */
 	msgs.stream_manage.k = cpu_to_be16(1);
@@ -1348,13 +1369,14 @@ int hdcp2_authenticate_repeater_topology(struct intel_connector *connector)
 {
 	struct intel_digital_port *intel_dig_port = conn_to_dig_port(connector);
 	struct intel_hdcp *hdcp = &connector->hdcp;
+	struct drm_device *dev = connector->base.dev;
 	union {
 		struct hdcp2_rep_send_receiverid_list recvid_list;
 		struct hdcp2_rep_send_ack rep_ack;
 	} msgs;
 	const struct intel_hdcp_shim *shim = hdcp->shim;
+	u32 seq_num_v, device_cnt;
 	u8 *rx_info;
-	u32 seq_num_v;
 	int ret;
 
 	ret = shim->read_2_2_msg(intel_dig_port, HDCP_2_2_REP_SEND_RECVID_LIST,
@@ -1371,7 +1393,8 @@ int hdcp2_authenticate_repeater_topology(struct intel_connector *connector)
 	}
 
 	/* Converting and Storing the seq_num_v to local variable as DWORD */
-	seq_num_v = drm_hdcp2_seq_num_to_u32(msgs.recvid_list.seq_num_v);
+	seq_num_v =
+		drm_hdcp_be24_to_cpu((const u8 *)msgs.recvid_list.seq_num_v);
 
 	if (seq_num_v < hdcp->seq_num_v) {
 		/* Roll over of the seq_num_v from repeater. Reauthenticate. */
@@ -1379,6 +1402,14 @@ int hdcp2_authenticate_repeater_topology(struct intel_connector *connector)
 		return -EINVAL;
 	}
 
+	device_cnt = (HDCP_2_2_DEV_COUNT_HI(rx_info[0]) << 4 |
+		      HDCP_2_2_DEV_COUNT_LO(rx_info[1]));
+	if (drm_hdcp_check_ksvs_revoked(dev, msgs.recvid_list.receiver_ids,
+					device_cnt)) {
+		DRM_ERROR("Revoked receiver ID(s) is in list\n");
+		return -EPERM;
+	}
+
 	ret = hdcp2_verify_rep_topology_prepare_ack(connector,
 						    &msgs.recvid_list,
 						    &msgs.rep_ack);
diff --git a/drivers/gpu/drm/i915/intel_hdcp.h b/drivers/gpu/drm/i915/display/intel_hdcp.h
index a75f25f09d39..be8da85c866a 100644
--- a/drivers/gpu/drm/i915/intel_hdcp.h
+++ b/drivers/gpu/drm/i915/display/intel_hdcp.h
@@ -25,6 +25,7 @@ int intel_hdcp_enable(struct intel_connector *connector);
 int intel_hdcp_disable(struct intel_connector *connector);
 bool is_hdcp_supported(struct drm_i915_private *dev_priv, enum port port);
 bool intel_hdcp_capable(struct intel_connector *connector);
+bool intel_hdcp2_capable(struct intel_connector *connector);
 void intel_hdcp_component_init(struct drm_i915_private *dev_priv);
 void intel_hdcp_component_fini(struct drm_i915_private *dev_priv);
 void intel_hdcp_cleanup(struct intel_connector *connector);
diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/display/intel_hdmi.c
index 34be2cfd0ec8..0ebec69bbbfc 100644
--- a/drivers/gpu/drm/i915/intel_hdmi.c
+++ b/drivers/gpu/drm/i915/display/intel_hdmi.c
@@ -39,17 +39,24 @@
 #include <drm/i915_drm.h>
 #include <drm/intel_lpe_audio.h>
 
+#include "i915_debugfs.h"
 #include "i915_drv.h"
+#include "intel_atomic.h"
 #include "intel_audio.h"
 #include "intel_connector.h"
 #include "intel_ddi.h"
 #include "intel_dp.h"
+#include "intel_dpio_phy.h"
 #include "intel_drv.h"
+#include "intel_fifo_underrun.h"
+#include "intel_gmbus.h"
 #include "intel_hdcp.h"
 #include "intel_hdmi.h"
+#include "intel_hotplug.h"
 #include "intel_lspcon.h"
 #include "intel_sdvo.h"
 #include "intel_panel.h"
+#include "intel_sideband.h"
 
 static struct drm_device *intel_hdmi_to_dev(struct intel_hdmi *intel_hdmi)
 {
@@ -122,6 +129,8 @@ static u32 g4x_infoframe_enable(unsigned int type)
 		return VIDEO_DIP_ENABLE_SPD;
 	case HDMI_INFOFRAME_TYPE_VENDOR:
 		return VIDEO_DIP_ENABLE_VENDOR;
+	case HDMI_INFOFRAME_TYPE_DRM:
+		return 0;
 	default:
 		MISSING_CASE(type);
 		return 0;
@@ -145,6 +154,8 @@ static u32 hsw_infoframe_enable(unsigned int type)
 		return VIDEO_DIP_ENABLE_SPD_HSW;
 	case HDMI_INFOFRAME_TYPE_VENDOR:
 		return VIDEO_DIP_ENABLE_VS_HSW;
+	case HDMI_INFOFRAME_TYPE_DRM:
+		return VIDEO_DIP_ENABLE_DRM_GLK;
 	default:
 		MISSING_CASE(type);
 		return 0;
@@ -170,6 +181,8 @@ hsw_dip_data_reg(struct drm_i915_private *dev_priv,
 		return HSW_TVIDEO_DIP_SPD_DATA(cpu_transcoder, i);
 	case HDMI_INFOFRAME_TYPE_VENDOR:
 		return HSW_TVIDEO_DIP_VS_DATA(cpu_transcoder, i);
+	case HDMI_INFOFRAME_TYPE_DRM:
+		return GLK_TVIDEO_DIP_DRM_DATA(cpu_transcoder, i);
 	default:
 		MISSING_CASE(type);
 		return INVALID_MMIO_REG;
@@ -543,10 +556,16 @@ static u32 hsw_infoframes_enabled(struct intel_encoder *encoder,
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
 	u32 val = I915_READ(HSW_TVIDEO_DIP_CTL(pipe_config->cpu_transcoder));
+	u32 mask;
+
+	mask = (VIDEO_DIP_ENABLE_VSC_HSW | VIDEO_DIP_ENABLE_AVI_HSW |
+		VIDEO_DIP_ENABLE_GCP_HSW | VIDEO_DIP_ENABLE_VS_HSW |
+		VIDEO_DIP_ENABLE_GMP_HSW | VIDEO_DIP_ENABLE_SPD_HSW);
+
+	if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv))
+		mask |= VIDEO_DIP_ENABLE_DRM_GLK;
 
-	return val & (VIDEO_DIP_ENABLE_VSC_HSW | VIDEO_DIP_ENABLE_AVI_HSW |
-		      VIDEO_DIP_ENABLE_GCP_HSW | VIDEO_DIP_ENABLE_VS_HSW |
-		      VIDEO_DIP_ENABLE_GMP_HSW | VIDEO_DIP_ENABLE_SPD_HSW);
+	return val & mask;
 }
 
 static const u8 infoframe_type_to_idx[] = {
@@ -556,6 +575,7 @@ static const u8 infoframe_type_to_idx[] = {
 	HDMI_INFOFRAME_TYPE_AVI,
 	HDMI_INFOFRAME_TYPE_SPD,
 	HDMI_INFOFRAME_TYPE_VENDOR,
+	HDMI_INFOFRAME_TYPE_DRM,
 };
 
 u32 intel_hdmi_infoframe_enable(unsigned int type)
@@ -778,6 +798,40 @@ intel_hdmi_compute_hdmi_infoframe(struct intel_encoder *encoder,
 	return true;
 }
 
+static bool
+intel_hdmi_compute_drm_infoframe(struct intel_encoder *encoder,
+				 struct intel_crtc_state *crtc_state,
+				 struct drm_connector_state *conn_state)
+{
+	struct hdmi_drm_infoframe *frame = &crtc_state->infoframes.drm.drm;
+	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
+	int ret;
+
+	if (!(INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv)))
+		return true;
+
+	if (!crtc_state->has_infoframe)
+		return true;
+
+	if (!conn_state->hdr_output_metadata)
+		return true;
+
+	crtc_state->infoframes.enable |=
+		intel_hdmi_infoframe_enable(HDMI_INFOFRAME_TYPE_DRM);
+
+	ret = drm_hdmi_infoframe_set_hdr_metadata(frame, conn_state);
+	if (ret < 0) {
+		DRM_DEBUG_KMS("couldn't set HDR metadata in infoframe\n");
+		return false;
+	}
+
+	ret = hdmi_drm_infoframe_check(frame);
+	if (WARN_ON(ret))
+		return false;
+
+	return true;
+}
+
 static void g4x_set_infoframes(struct intel_encoder *encoder,
 			       bool enable,
 			       const struct intel_crtc_state *crtc_state,
@@ -846,19 +900,6 @@ static void g4x_set_infoframes(struct intel_encoder *encoder,
 			      &crtc_state->infoframes.hdmi);
 }
 
-static bool hdmi_sink_is_deep_color(const struct drm_connector_state *conn_state)
-{
-	struct drm_connector *connector = conn_state->connector;
-
-	/*
-	 * HDMI cloning is only supported on g4x which doesn't
-	 * support deep color or GCP infoframes anyway so no
-	 * need to worry about multiple HDMI sinks here.
-	 */
-
-	return connector->display_info.bpc > 8;
-}
-
 /*
  * Determine if default_phase=1 can be indicated in the GCP infoframe.
  *
@@ -963,8 +1004,8 @@ static void intel_hdmi_compute_gcp_infoframe(struct intel_encoder *encoder,
 	crtc_state->infoframes.enable |=
 		intel_hdmi_infoframe_enable(HDMI_PACKET_TYPE_GENERAL_CONTROL);
 
-	/* Indicate color depth whenever the sink supports deep color */
-	if (hdmi_sink_is_deep_color(conn_state))
+	/* Indicate color indication for deep color mode */
+	if (crtc_state->pipe_bpp > 24)
 		crtc_state->infoframes.gcp |= GCP_COLOR_INDICATION;
 
 	/* Enable default_phase whenever the display mode is suitably aligned */
@@ -1153,7 +1194,8 @@ static void hsw_set_infoframes(struct intel_encoder *encoder,
 
 	val &= ~(VIDEO_DIP_ENABLE_VSC_HSW | VIDEO_DIP_ENABLE_AVI_HSW |
 		 VIDEO_DIP_ENABLE_GCP_HSW | VIDEO_DIP_ENABLE_VS_HSW |
-		 VIDEO_DIP_ENABLE_GMP_HSW | VIDEO_DIP_ENABLE_SPD_HSW);
+		 VIDEO_DIP_ENABLE_GMP_HSW | VIDEO_DIP_ENABLE_SPD_HSW |
+		 VIDEO_DIP_ENABLE_DRM_GLK);
 
 	if (!enable) {
 		I915_WRITE(reg, val);
@@ -1176,6 +1218,9 @@ static void hsw_set_infoframes(struct intel_encoder *encoder,
 	intel_write_infoframe(encoder, crtc_state,
 			      HDMI_INFOFRAME_TYPE_VENDOR,
 			      &crtc_state->infoframes.hdmi);
+	intel_write_infoframe(encoder, crtc_state,
+			      HDMI_INFOFRAME_TYPE_DRM,
+			      &crtc_state->infoframes.drm);
 }
 
 void intel_dp_dual_mode_set_tmds_output(struct intel_hdmi *hdmi, bool enable)
@@ -1762,7 +1807,7 @@ static void intel_hdmi_get_config(struct intel_encoder *encoder,
 	if (pipe_config->infoframes.enable)
 		pipe_config->has_infoframe = true;
 
-	if (tmp & SDVO_AUDIO_ENABLE)
+	if (tmp & HDMI_AUDIO_ENABLE)
 		pipe_config->has_audio = true;
 
 	if (!HAS_PCH_SPLIT(dev_priv) &&
@@ -1821,7 +1866,7 @@ static void g4x_enable_hdmi(struct intel_encoder *encoder,
 
 	temp |= SDVO_ENABLE;
 	if (pipe_config->has_audio)
-		temp |= SDVO_AUDIO_ENABLE;
+		temp |= HDMI_AUDIO_ENABLE;
 
 	I915_WRITE(intel_hdmi->hdmi_reg, temp);
 	POSTING_READ(intel_hdmi->hdmi_reg);
@@ -1843,7 +1888,7 @@ static void ibx_enable_hdmi(struct intel_encoder *encoder,
 
 	temp |= SDVO_ENABLE;
 	if (pipe_config->has_audio)
-		temp |= SDVO_AUDIO_ENABLE;
+		temp |= HDMI_AUDIO_ENABLE;
 
 	/*
 	 * HW workaround, need to write this twice for issue
@@ -1895,7 +1940,7 @@ static void cpt_enable_hdmi(struct intel_encoder *encoder,
 
 	temp |= SDVO_ENABLE;
 	if (pipe_config->has_audio)
-		temp |= SDVO_AUDIO_ENABLE;
+		temp |= HDMI_AUDIO_ENABLE;
 
 	/*
 	 * WaEnableHDMI8bpcBefore12bpc:snb,ivb
@@ -1955,7 +2000,7 @@ static void intel_disable_hdmi(struct intel_encoder *encoder,
 
 	temp = I915_READ(intel_hdmi->hdmi_reg);
 
-	temp &= ~(SDVO_ENABLE | SDVO_AUDIO_ENABLE);
+	temp &= ~(SDVO_ENABLE | HDMI_AUDIO_ENABLE);
 	I915_WRITE(intel_hdmi->hdmi_reg, temp);
 	POSTING_READ(intel_hdmi->hdmi_reg);
 
@@ -2162,7 +2207,7 @@ static bool hdmi_deep_color_possible(const struct intel_crtc_state *crtc_state,
 	if (bpc == 10 && INTEL_GEN(dev_priv) < 11)
 		return false;
 
-	if (crtc_state->pipe_bpp <= 8*3)
+	if (crtc_state->pipe_bpp < bpc * 3)
 		return false;
 
 	if (!crtc_state->has_hdmi_sink)
@@ -2382,6 +2427,11 @@ int intel_hdmi_compute_config(struct intel_encoder *encoder,
 		return -EINVAL;
 	}
 
+	if (!intel_hdmi_compute_drm_infoframe(encoder, pipe_config, conn_state)) {
+		DRM_DEBUG_KMS("bad DRM infoframe\n");
+		return -EINVAL;
+	}
+
 	return 0;
 }
 
@@ -2620,12 +2670,12 @@ static void chv_hdmi_post_disable(struct intel_encoder *encoder,
 	struct drm_device *dev = encoder->base.dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
 
-	mutex_lock(&dev_priv->sb_lock);
+	vlv_dpio_get(dev_priv);
 
 	/* Assert data lane reset */
 	chv_data_lane_soft_reset(encoder, old_crtc_state, true);
 
-	mutex_unlock(&dev_priv->sb_lock);
+	vlv_dpio_put(dev_priv);
 }
 
 static void chv_hdmi_pre_enable(struct intel_encoder *encoder,
@@ -2654,6 +2704,36 @@ static void chv_hdmi_pre_enable(struct intel_encoder *encoder,
 	chv_phy_release_cl2_override(encoder);
 }
 
+static struct i2c_adapter *
+intel_hdmi_get_i2c_adapter(struct drm_connector *connector)
+{
+	struct drm_i915_private *dev_priv = to_i915(connector->dev);
+	struct intel_hdmi *intel_hdmi = intel_attached_hdmi(connector);
+
+	return intel_gmbus_get_adapter(dev_priv, intel_hdmi->ddc_bus);
+}
+
+static void intel_hdmi_create_i2c_symlink(struct drm_connector *connector)
+{
+	struct i2c_adapter *adapter = intel_hdmi_get_i2c_adapter(connector);
+	struct kobject *i2c_kobj = &adapter->dev.kobj;
+	struct kobject *connector_kobj = &connector->kdev->kobj;
+	int ret;
+
+	ret = sysfs_create_link(connector_kobj, i2c_kobj, i2c_kobj->name);
+	if (ret)
+		DRM_ERROR("Failed to create i2c symlink (%d)\n", ret);
+}
+
+static void intel_hdmi_remove_i2c_symlink(struct drm_connector *connector)
+{
+	struct i2c_adapter *adapter = intel_hdmi_get_i2c_adapter(connector);
+	struct kobject *i2c_kobj = &adapter->dev.kobj;
+	struct kobject *connector_kobj = &connector->kdev->kobj;
+
+	sysfs_remove_link(connector_kobj, i2c_kobj->name);
+}
+
 static int
 intel_hdmi_connector_register(struct drm_connector *connector)
 {
@@ -2665,6 +2745,8 @@ intel_hdmi_connector_register(struct drm_connector *connector)
 
 	i915_debugfs_connector_add(connector);
 
+	intel_hdmi_create_i2c_symlink(connector);
+
 	return ret;
 }
 
@@ -2676,6 +2758,13 @@ static void intel_hdmi_destroy(struct drm_connector *connector)
 	intel_connector_destroy(connector);
 }
 
+static void intel_hdmi_connector_unregister(struct drm_connector *connector)
+{
+	intel_hdmi_remove_i2c_symlink(connector);
+
+	intel_connector_unregister(connector);
+}
+
 static const struct drm_connector_funcs intel_hdmi_connector_funcs = {
 	.detect = intel_hdmi_detect,
 	.force = intel_hdmi_force,
@@ -2683,7 +2772,7 @@ static const struct drm_connector_funcs intel_hdmi_connector_funcs = {
 	.atomic_get_property = intel_digital_connector_atomic_get_property,
 	.atomic_set_property = intel_digital_connector_atomic_set_property,
 	.late_register = intel_hdmi_connector_register,
-	.early_unregister = intel_connector_unregister,
+	.early_unregister = intel_hdmi_connector_unregister,
 	.destroy = intel_hdmi_destroy,
 	.atomic_destroy_state = drm_atomic_helper_connector_destroy_state,
 	.atomic_duplicate_state = intel_digital_connector_duplicate_state,
@@ -2721,6 +2810,10 @@ intel_hdmi_add_properties(struct intel_hdmi *intel_hdmi, struct drm_connector *c
 	drm_connector_attach_content_type_property(connector);
 	connector->state->picture_aspect_ratio = HDMI_PICTURE_ASPECT_NONE;
 
+	if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv))
+		drm_object_attach_property(&connector->base,
+			connector->dev->mode_config.hdr_output_metadata_property, 0);
+
 	if (!HAS_GMCH(dev_priv))
 		drm_connector_attach_max_bpc_property(connector, 8, 12);
 }
@@ -2866,6 +2959,28 @@ static u8 icl_port_to_ddc_pin(struct drm_i915_private *dev_priv, enum port port)
 	return ddc_pin;
 }
 
+static u8 mcc_port_to_ddc_pin(struct drm_i915_private *dev_priv, enum port port)
+{
+	u8 ddc_pin;
+
+	switch (port) {
+	case PORT_A:
+		ddc_pin = GMBUS_PIN_1_BXT;
+		break;
+	case PORT_B:
+		ddc_pin = GMBUS_PIN_2_BXT;
+		break;
+	case PORT_C:
+		ddc_pin = GMBUS_PIN_9_TC1_ICP;
+		break;
+	default:
+		MISSING_CASE(port);
+		ddc_pin = GMBUS_PIN_1_BXT;
+		break;
+	}
+	return ddc_pin;
+}
+
 static u8 g4x_port_to_ddc_pin(struct drm_i915_private *dev_priv,
 			      enum port port)
 {
@@ -2902,7 +3017,9 @@ static u8 intel_hdmi_ddc_pin(struct drm_i915_private *dev_priv,
 		return info->alternate_ddc_pin;
 	}
 
-	if (HAS_PCH_ICP(dev_priv))
+	if (HAS_PCH_MCC(dev_priv))
+		ddc_pin = mcc_port_to_ddc_pin(dev_priv, port);
+	else if (HAS_PCH_ICP(dev_priv))
 		ddc_pin = icl_port_to_ddc_pin(dev_priv, port);
 	else if (HAS_PCH_CNP(dev_priv))
 		ddc_pin = cnp_port_to_ddc_pin(dev_priv, port);
diff --git a/drivers/gpu/drm/i915/intel_hdmi.h b/drivers/gpu/drm/i915/display/intel_hdmi.h
index 106c2e0bc3c9..106c2e0bc3c9 100644
--- a/drivers/gpu/drm/i915/intel_hdmi.h
+++ b/drivers/gpu/drm/i915/display/intel_hdmi.h
diff --git a/drivers/gpu/drm/i915/intel_hotplug.c b/drivers/gpu/drm/i915/display/intel_hotplug.c
index b8937c788f03..ea3de4acc850 100644
--- a/drivers/gpu/drm/i915/intel_hotplug.c
+++ b/drivers/gpu/drm/i915/display/intel_hotplug.c
@@ -27,6 +27,7 @@
 
 #include "i915_drv.h"
 #include "intel_drv.h"
+#include "intel_hotplug.h"
 
 /**
  * DOC: Hotplug
@@ -229,7 +230,7 @@ static void intel_hpd_irq_storm_reenable_work(struct work_struct *work)
 	intel_wakeref_t wakeref;
 	enum hpd_pin pin;
 
-	wakeref = intel_runtime_pm_get(dev_priv);
+	wakeref = intel_runtime_pm_get(&dev_priv->runtime_pm);
 
 	spin_lock_irq(&dev_priv->irq_lock);
 	for_each_hpd_pin(pin) {
@@ -262,7 +263,7 @@ static void intel_hpd_irq_storm_reenable_work(struct work_struct *work)
 		dev_priv->display.hpd_irq_setup(dev_priv);
 	spin_unlock_irq(&dev_priv->irq_lock);
 
-	intel_runtime_pm_put(dev_priv, wakeref);
+	intel_runtime_pm_put(&dev_priv->runtime_pm, wakeref);
 }
 
 bool intel_encoder_hotplug(struct intel_encoder *encoder,
diff --git a/drivers/gpu/drm/i915/display/intel_hotplug.h b/drivers/gpu/drm/i915/display/intel_hotplug.h
new file mode 100644
index 000000000000..805f897dbb7a
--- /dev/null
+++ b/drivers/gpu/drm/i915/display/intel_hotplug.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef __INTEL_HOTPLUG_H__
+#define __INTEL_HOTPLUG_H__
+
+#include <linux/types.h>
+
+#include <drm/i915_drm.h>
+
+struct drm_i915_private;
+struct intel_connector;
+struct intel_encoder;
+
+void intel_hpd_poll_init(struct drm_i915_private *dev_priv);
+bool intel_encoder_hotplug(struct intel_encoder *encoder,
+			   struct intel_connector *connector);
+void intel_hpd_irq_handler(struct drm_i915_private *dev_priv,
+			   u32 pin_mask, u32 long_mask);
+void intel_hpd_init(struct drm_i915_private *dev_priv);
+void intel_hpd_init_work(struct drm_i915_private *dev_priv);
+void intel_hpd_cancel_work(struct drm_i915_private *dev_priv);
+enum hpd_pin intel_hpd_pin_default(struct drm_i915_private *dev_priv,
+				   enum port port);
+bool intel_hpd_disable(struct drm_i915_private *dev_priv, enum hpd_pin pin);
+void intel_hpd_enable(struct drm_i915_private *dev_priv, enum hpd_pin pin);
+
+#endif /* __INTEL_HOTPLUG_H__ */
diff --git a/drivers/gpu/drm/i915/intel_lpe_audio.c b/drivers/gpu/drm/i915/display/intel_lpe_audio.c
index f8239bca3820..b19800b58442 100644
--- a/drivers/gpu/drm/i915/intel_lpe_audio.c
+++ b/drivers/gpu/drm/i915/display/intel_lpe_audio.c
@@ -61,16 +61,18 @@
  */
 
 #include <linux/acpi.h>
+#include <linux/delay.h>
 #include <linux/device.h>
 #include <linux/irq.h>
 #include <linux/pci.h>
-#include <linux/pm_runtime.h>
 #include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
 
-#include "i915_drv.h"
-#include <linux/delay.h>
 #include <drm/intel_lpe_audio.h>
 
+#include "i915_drv.h"
+#include "intel_lpe_audio.h"
+
 #define HAS_LPE_AUDIO(dev_priv) ((dev_priv)->lpe_audio.platdev != NULL)
 
 static struct platform_device *
diff --git a/drivers/gpu/drm/i915/display/intel_lpe_audio.h b/drivers/gpu/drm/i915/display/intel_lpe_audio.h
new file mode 100644
index 000000000000..f848c5038714
--- /dev/null
+++ b/drivers/gpu/drm/i915/display/intel_lpe_audio.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef __INTEL_LPE_AUDIO_H__
+#define __INTEL_LPE_AUDIO_H__
+
+#include <linux/types.h>
+
+enum pipe;
+enum port;
+struct drm_i915_private;
+
+int  intel_lpe_audio_init(struct drm_i915_private *dev_priv);
+void intel_lpe_audio_teardown(struct drm_i915_private *dev_priv);
+void intel_lpe_audio_irq_handler(struct drm_i915_private *dev_priv);
+void intel_lpe_audio_notify(struct drm_i915_private *dev_priv,
+			    enum pipe pipe, enum port port,
+			    const void *eld, int ls_clock, bool dp_output);
+
+#endif /* __INTEL_LPE_AUDIO_H__ */
diff --git a/drivers/gpu/drm/i915/intel_lspcon.c b/drivers/gpu/drm/i915/display/intel_lspcon.c
index 7028d0cf3bb1..7028d0cf3bb1 100644
--- a/drivers/gpu/drm/i915/intel_lspcon.c
+++ b/drivers/gpu/drm/i915/display/intel_lspcon.c
diff --git a/drivers/gpu/drm/i915/intel_lspcon.h b/drivers/gpu/drm/i915/display/intel_lspcon.h
index 37cfddf8a9c5..37cfddf8a9c5 100644
--- a/drivers/gpu/drm/i915/intel_lspcon.h
+++ b/drivers/gpu/drm/i915/display/intel_lspcon.h
diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/display/intel_lvds.c
index 51d1d59c1619..efefed62a7f8 100644
--- a/drivers/gpu/drm/i915/intel_lvds.c
+++ b/drivers/gpu/drm/i915/display/intel_lvds.c
@@ -40,8 +40,10 @@
 #include <drm/i915_drm.h>
 
 #include "i915_drv.h"
+#include "intel_atomic.h"
 #include "intel_connector.h"
 #include "intel_drv.h"
+#include "intel_gmbus.h"
 #include "intel_lvds.h"
 #include "intel_panel.h"
 
diff --git a/drivers/gpu/drm/i915/intel_lvds.h b/drivers/gpu/drm/i915/display/intel_lvds.h
index bc9c8b84ba2f..bc9c8b84ba2f 100644
--- a/drivers/gpu/drm/i915/intel_lvds.h
+++ b/drivers/gpu/drm/i915/display/intel_lvds.h
diff --git a/drivers/gpu/drm/i915/intel_opregion.c b/drivers/gpu/drm/i915/display/intel_opregion.c
index 8fa1159d097f..824881271351 100644
--- a/drivers/gpu/drm/i915/intel_opregion.c
+++ b/drivers/gpu/drm/i915/display/intel_opregion.c
@@ -32,10 +32,11 @@
 
 #include <drm/i915_drm.h>
 
+#include "display/intel_panel.h"
+
 #include "i915_drv.h"
 #include "intel_drv.h"
 #include "intel_opregion.h"
-#include "intel_panel.h"
 
 #define OPREGION_HEADER_OFFSET 0
 #define OPREGION_ACPI_OFFSET   0x100
diff --git a/drivers/gpu/drm/i915/intel_opregion.h b/drivers/gpu/drm/i915/display/intel_opregion.h
index 4aa68ffbd30e..4aa68ffbd30e 100644
--- a/drivers/gpu/drm/i915/intel_opregion.h
+++ b/drivers/gpu/drm/i915/display/intel_opregion.h
diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/display/intel_overlay.c
index eb317759b5d3..21339b7f6a3e 100644
--- a/drivers/gpu/drm/i915/intel_overlay.c
+++ b/drivers/gpu/drm/i915/display/intel_overlay.c
@@ -25,13 +25,17 @@
  *
  * Derived from Xorg ddx, xf86-video-intel, src/i830_video.c
  */
-#include <drm/i915_drm.h>
+
 #include <drm/drm_fourcc.h>
+#include <drm/i915_drm.h>
+
+#include "gem/i915_gem_pm.h"
 
 #include "i915_drv.h"
 #include "i915_reg.h"
 #include "intel_drv.h"
 #include "intel_frontbuffer.h"
+#include "intel_overlay.h"
 
 /* Limits for overlay size. According to intel doc, the real limits are:
  * Y width: 4095, UV width (planar): 2047, Y height: 2047,
@@ -235,10 +239,9 @@ static int intel_overlay_do_wait_request(struct intel_overlay *overlay,
 
 static struct i915_request *alloc_request(struct intel_overlay *overlay)
 {
-	struct drm_i915_private *dev_priv = overlay->i915;
-	struct intel_engine_cs *engine = dev_priv->engine[RCS0];
+	struct intel_engine_cs *engine = overlay->i915->engine[RCS0];
 
-	return i915_request_alloc(engine, dev_priv->kernel_context);
+	return i915_request_create(engine->kernel_context);
 }
 
 /* overlay needs to be disable in OCMD reg */
@@ -762,8 +765,10 @@ static int intel_overlay_do_put_image(struct intel_overlay *overlay,
 
 	atomic_inc(&dev_priv->gpu_error.pending_fb_pin);
 
+	i915_gem_object_lock(new_bo);
 	vma = i915_gem_object_pin_to_display_plane(new_bo,
 						   0, NULL, PIN_MAPPABLE);
+	i915_gem_object_unlock(new_bo);
 	if (IS_ERR(vma)) {
 		ret = PTR_ERR(vma);
 		goto out_pin_section;
@@ -1302,15 +1307,20 @@ out_unlock:
 
 static int get_registers(struct intel_overlay *overlay, bool use_phys)
 {
+	struct drm_i915_private *i915 = overlay->i915;
 	struct drm_i915_gem_object *obj;
 	struct i915_vma *vma;
 	int err;
 
-	obj = i915_gem_object_create_stolen(overlay->i915, PAGE_SIZE);
+	mutex_lock(&i915->drm.struct_mutex);
+
+	obj = i915_gem_object_create_stolen(i915, PAGE_SIZE);
 	if (obj == NULL)
-		obj = i915_gem_object_create_internal(overlay->i915, PAGE_SIZE);
-	if (IS_ERR(obj))
-		return PTR_ERR(obj);
+		obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
+	if (IS_ERR(obj)) {
+		err = PTR_ERR(obj);
+		goto err_unlock;
+	}
 
 	vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE);
 	if (IS_ERR(vma)) {
@@ -1331,10 +1341,13 @@ static int get_registers(struct intel_overlay *overlay, bool use_phys)
 	}
 
 	overlay->reg_bo = obj;
+	mutex_unlock(&i915->drm.struct_mutex);
 	return 0;
 
 err_put_bo:
 	i915_gem_object_put(obj);
+err_unlock:
+	mutex_unlock(&i915->drm.struct_mutex);
 	return err;
 }
 
@@ -1360,18 +1373,10 @@ void intel_overlay_setup(struct drm_i915_private *dev_priv)
 
 	INIT_ACTIVE_REQUEST(&overlay->last_flip);
 
-	mutex_lock(&dev_priv->drm.struct_mutex);
-
 	ret = get_registers(overlay, OVERLAY_NEEDS_PHYSICAL(dev_priv));
 	if (ret)
 		goto out_free;
 
-	ret = i915_gem_object_set_to_gtt_domain(overlay->reg_bo, true);
-	if (ret)
-		goto out_reg_bo;
-
-	mutex_unlock(&dev_priv->drm.struct_mutex);
-
 	memset_io(overlay->regs, 0, sizeof(struct overlay_registers));
 	update_polyphase_filter(overlay->regs);
 	update_reg_attrs(overlay, overlay->regs);
@@ -1380,10 +1385,7 @@ void intel_overlay_setup(struct drm_i915_private *dev_priv)
 	DRM_INFO("Initialized overlay support.\n");
 	return;
 
-out_reg_bo:
-	i915_gem_object_put(overlay->reg_bo);
 out_free:
-	mutex_unlock(&dev_priv->drm.struct_mutex);
 	kfree(overlay);
 }
 
diff --git a/drivers/gpu/drm/i915/display/intel_overlay.h b/drivers/gpu/drm/i915/display/intel_overlay.h
new file mode 100644
index 000000000000..a167c28acd27
--- /dev/null
+++ b/drivers/gpu/drm/i915/display/intel_overlay.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef __INTEL_OVERLAY_H__
+#define __INTEL_OVERLAY_H__
+
+struct drm_device;
+struct drm_file;
+struct drm_i915_error_state_buf;
+struct drm_i915_private;
+struct intel_overlay;
+struct intel_overlay_error_state;
+
+void intel_overlay_setup(struct drm_i915_private *dev_priv);
+void intel_overlay_cleanup(struct drm_i915_private *dev_priv);
+int intel_overlay_switch_off(struct intel_overlay *overlay);
+int intel_overlay_put_image_ioctl(struct drm_device *dev, void *data,
+				  struct drm_file *file_priv);
+int intel_overlay_attrs_ioctl(struct drm_device *dev, void *data,
+			      struct drm_file *file_priv);
+void intel_overlay_reset(struct drm_i915_private *dev_priv);
+struct intel_overlay_error_state *
+intel_overlay_capture_error_state(struct drm_i915_private *dev_priv);
+void intel_overlay_print_error_state(struct drm_i915_error_state_buf *e,
+				     struct intel_overlay_error_state *error);
+
+#endif /* __INTEL_OVERLAY_H__ */
diff --git a/drivers/gpu/drm/i915/intel_panel.c b/drivers/gpu/drm/i915/display/intel_panel.c
index 4ab4ce6569e7..39d742094065 100644
--- a/drivers/gpu/drm/i915/intel_panel.c
+++ b/drivers/gpu/drm/i915/display/intel_panel.c
@@ -35,7 +35,9 @@
 #include <linux/pwm.h>
 
 #include "intel_connector.h"
+#include "intel_dp_aux_backlight.h"
 #include "intel_drv.h"
+#include "intel_dsi_dcs_backlight.h"
 #include "intel_panel.h"
 
 #define CRC_PMIC_PWM_PERIOD_NS	21333
@@ -1286,7 +1288,7 @@ static int intel_backlight_device_get_brightness(struct backlight_device *bd)
 	intel_wakeref_t wakeref;
 	int ret = 0;
 
-	with_intel_runtime_pm(dev_priv, wakeref) {
+	with_intel_runtime_pm(&dev_priv->runtime_pm, wakeref) {
 		u32 hw_level;
 
 		drm_modeset_lock(&dev->mode_config.connection_mutex, NULL);
diff --git a/drivers/gpu/drm/i915/intel_panel.h b/drivers/gpu/drm/i915/display/intel_panel.h
index cedeea443336..cedeea443336 100644
--- a/drivers/gpu/drm/i915/intel_panel.h
+++ b/drivers/gpu/drm/i915/display/intel_panel.h
diff --git a/drivers/gpu/drm/i915/intel_pipe_crc.c b/drivers/gpu/drm/i915/display/intel_pipe_crc.c
index e7c7be4911c1..1e2c4307d05a 100644
--- a/drivers/gpu/drm/i915/intel_pipe_crc.c
+++ b/drivers/gpu/drm/i915/display/intel_pipe_crc.c
@@ -29,6 +29,7 @@
 #include <linux/debugfs.h>
 #include <linux/seq_file.h>
 
+#include "intel_atomic.h"
 #include "intel_drv.h"
 #include "intel_pipe_crc.h"
 
@@ -313,17 +314,8 @@ retry:
 
 	if (IS_HASWELL(dev_priv) &&
 	    pipe_config->base.active && crtc->pipe == PIPE_A &&
-	    pipe_config->cpu_transcoder == TRANSCODER_EDP) {
-		bool old_need_power_well = pipe_config->pch_pfit.enabled ||
-			pipe_config->pch_pfit.force_thru;
-		bool new_need_power_well = pipe_config->pch_pfit.enabled ||
-			enable;
-
-		pipe_config->pch_pfit.force_thru = enable;
-
-		if (old_need_power_well != new_need_power_well)
-			pipe_config->base.connectors_changed = true;
-	}
+	    pipe_config->cpu_transcoder == TRANSCODER_EDP)
+		pipe_config->base.mode_changed = true;
 
 	ret = drm_atomic_commit(state);
 
diff --git a/drivers/gpu/drm/i915/intel_pipe_crc.h b/drivers/gpu/drm/i915/display/intel_pipe_crc.h
index 81eaf1854788..db258a756fc6 100644
--- a/drivers/gpu/drm/i915/intel_pipe_crc.h
+++ b/drivers/gpu/drm/i915/display/intel_pipe_crc.h
@@ -9,9 +9,11 @@
 #include <linux/types.h>
 
 struct drm_crtc;
+struct drm_i915_private;
 struct intel_crtc;
 
 #ifdef CONFIG_DEBUG_FS
+void intel_display_crc_init(struct drm_i915_private *dev_priv);
 int intel_crtc_set_crc_source(struct drm_crtc *crtc, const char *source_name);
 int intel_crtc_verify_crc_source(struct drm_crtc *crtc,
 				 const char *source_name, size_t *values_cnt);
@@ -20,6 +22,7 @@ const char *const *intel_crtc_get_crc_sources(struct drm_crtc *crtc,
 void intel_crtc_disable_pipe_crc(struct intel_crtc *crtc);
 void intel_crtc_enable_pipe_crc(struct intel_crtc *crtc);
 #else
+static inline void intel_display_crc_init(struct drm_i915_private *dev_priv) {}
 #define intel_crtc_set_crc_source NULL
 #define intel_crtc_verify_crc_source NULL
 #define intel_crtc_get_crc_sources NULL
diff --git a/drivers/gpu/drm/i915/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c
index 963663ba0edf..69d908e6a050 100644
--- a/drivers/gpu/drm/i915/intel_psr.c
+++ b/drivers/gpu/drm/i915/display/intel_psr.c
@@ -23,8 +23,9 @@
 
 #include <drm/drm_atomic_helper.h>
 
+#include "display/intel_dp.h"
+
 #include "i915_drv.h"
-#include "intel_dp.h"
 #include "intel_drv.h"
 #include "intel_psr.h"
 #include "intel_sprite.h"
@@ -229,16 +230,6 @@ void intel_psr_irq_handler(struct drm_i915_private *dev_priv, u32 psr_iir)
 	}
 }
 
-static bool intel_dp_get_colorimetry_status(struct intel_dp *intel_dp)
-{
-	u8 dprx = 0;
-
-	if (drm_dp_dpcd_readb(&intel_dp->aux, DP_DPRX_FEATURE_ENUMERATION_LIST,
-			      &dprx) != 1)
-		return false;
-	return dprx & DP_VSC_SDP_EXT_FOR_COLORIMETRY_SUPPORTED;
-}
-
 static bool intel_dp_get_alpm_status(struct intel_dp *intel_dp)
 {
 	u8 alpm_caps = 0;
@@ -352,7 +343,7 @@ static void intel_psr_setup_vsc(struct intel_dp *intel_dp,
 {
 	struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp);
 	struct drm_i915_private *dev_priv = dp_to_i915(intel_dp);
-	struct edp_vsc_psr psr_vsc;
+	struct dp_sdp psr_vsc;
 
 	if (dev_priv->psr.psr2_enabled) {
 		/* Prepare VSC Header for SU as per EDP 1.4 spec, Table 6.11 */
@@ -872,16 +863,23 @@ void intel_psr_disable(struct intel_dp *intel_dp,
 
 static void psr_force_hw_tracking_exit(struct drm_i915_private *dev_priv)
 {
-	/*
-	 * Display WA #0884: all
-	 * This documented WA for bxt can be safely applied
-	 * broadly so we can force HW tracking to exit PSR
-	 * instead of disabling and re-enabling.
-	 * Workaround tells us to write 0 to CUR_SURFLIVE_A,
-	 * but it makes more sense write to the current active
-	 * pipe.
-	 */
-	I915_WRITE(CURSURFLIVE(dev_priv->psr.pipe), 0);
+	if (INTEL_GEN(dev_priv) >= 9)
+		/*
+		 * Display WA #0884: skl+
+		 * This documented WA for bxt can be safely applied
+		 * broadly so we can force HW tracking to exit PSR
+		 * instead of disabling and re-enabling.
+		 * Workaround tells us to write 0 to CUR_SURFLIVE_A,
+		 * but it makes more sense write to the current active
+		 * pipe.
+		 */
+		I915_WRITE(CURSURFLIVE(dev_priv->psr.pipe), 0);
+	else
+		/*
+		 * A write to CURSURFLIVE do not cause HW tracking to exit PSR
+		 * on older gens so doing the manual exit instead.
+		 */
+		intel_psr_exit(dev_priv);
 }
 
 /**
@@ -912,6 +910,15 @@ void intel_psr_update(struct intel_dp *intel_dp,
 		/* Force a PSR exit when enabling CRC to avoid CRC timeouts */
 		if (crtc_state->crc_enabled && psr->enabled)
 			psr_force_hw_tracking_exit(dev_priv);
+		else if (INTEL_GEN(dev_priv) < 9 && psr->enabled) {
+			/*
+			 * Activate PSR again after a force exit when enabling
+			 * CRC in older gens
+			 */
+			if (!dev_priv->psr.active &&
+			    !dev_priv->psr.busy_frontbuffer_bits)
+				schedule_work(&dev_priv->psr.work);
+		}
 
 		goto unlock;
 	}
diff --git a/drivers/gpu/drm/i915/intel_psr.h b/drivers/gpu/drm/i915/display/intel_psr.h
index dc818826f36d..dc818826f36d 100644
--- a/drivers/gpu/drm/i915/intel_psr.h
+++ b/drivers/gpu/drm/i915/display/intel_psr.h
diff --git a/drivers/gpu/drm/i915/intel_quirks.c b/drivers/gpu/drm/i915/display/intel_quirks.c
index ec2b0fc92b8b..0b749c28541f 100644
--- a/drivers/gpu/drm/i915/intel_quirks.c
+++ b/drivers/gpu/drm/i915/display/intel_quirks.c
@@ -6,6 +6,7 @@
 #include <linux/dmi.h>
 
 #include "intel_drv.h"
+#include "intel_quirks.h"
 
 /*
  * Some machines (Lenovo U160) do not work with SSC on LVDS for some reason
diff --git a/drivers/gpu/drm/i915/display/intel_quirks.h b/drivers/gpu/drm/i915/display/intel_quirks.h
new file mode 100644
index 000000000000..b0fcff142a56
--- /dev/null
+++ b/drivers/gpu/drm/i915/display/intel_quirks.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef __INTEL_QUIRKS_H__
+#define __INTEL_QUIRKS_H__
+
+struct drm_i915_private;
+
+void intel_init_quirks(struct drm_i915_private *dev_priv);
+
+#endif /* __INTEL_QUIRKS_H__ */
diff --git a/drivers/gpu/drm/i915/intel_sdvo.c b/drivers/gpu/drm/i915/display/intel_sdvo.c
index 0e3d91d9ef13..ceda03e5a3d4 100644
--- a/drivers/gpu/drm/i915/intel_sdvo.c
+++ b/drivers/gpu/drm/i915/display/intel_sdvo.c
@@ -37,9 +37,13 @@
 #include <drm/i915_drm.h>
 
 #include "i915_drv.h"
+#include "intel_atomic.h"
 #include "intel_connector.h"
 #include "intel_drv.h"
+#include "intel_fifo_underrun.h"
+#include "intel_gmbus.h"
 #include "intel_hdmi.h"
+#include "intel_hotplug.h"
 #include "intel_panel.h"
 #include "intel_sdvo.h"
 #include "intel_sdvo_regs.h"
@@ -518,6 +522,7 @@ static bool intel_sdvo_read_response(struct intel_sdvo *intel_sdvo,
 #define BUF_LEN 256
 	char buffer[BUF_LEN];
 
+	buffer[0] = '\0';
 
 	/*
 	 * The documentation states that all commands will be
@@ -581,7 +586,8 @@ static bool intel_sdvo_read_response(struct intel_sdvo *intel_sdvo,
 	return true;
 
 log_fail:
-	DRM_DEBUG_KMS("%s: R: ... failed\n", SDVO_NAME(intel_sdvo));
+	DRM_DEBUG_KMS("%s: R: ... failed %s\n",
+		      SDVO_NAME(intel_sdvo), buffer);
 	return false;
 }
 
@@ -916,6 +922,13 @@ static bool intel_sdvo_set_colorimetry(struct intel_sdvo *intel_sdvo,
 	return intel_sdvo_set_value(intel_sdvo, SDVO_CMD_SET_COLORIMETRY, &mode, 1);
 }
 
+static bool intel_sdvo_set_audio_state(struct intel_sdvo *intel_sdvo,
+				       u8 audio_state)
+{
+	return intel_sdvo_set_value(intel_sdvo, SDVO_CMD_SET_AUDIO_STAT,
+				    &audio_state, 1);
+}
+
 #if 0
 static void intel_sdvo_dump_hdmi_buf(struct intel_sdvo *intel_sdvo)
 {
@@ -969,6 +982,9 @@ static bool intel_sdvo_write_infoframe(struct intel_sdvo *intel_sdvo,
 	DRM_DEBUG_KMS("writing sdvo hbuf: %i, hbuf_size %i, hbuf_size: %i\n",
 		      if_index, length, hbuf_size);
 
+	if (hbuf_size < length)
+		return false;
+
 	for (i = 0; i < hbuf_size; i += 8) {
 		memset(tmp, 0, 8);
 		if (i < length)
@@ -1001,6 +1017,11 @@ static ssize_t intel_sdvo_read_infoframe(struct intel_sdvo *intel_sdvo,
 	if (av_split < if_index)
 		return 0;
 
+	if (!intel_sdvo_set_value(intel_sdvo,
+				  SDVO_CMD_SET_HBUF_INDEX,
+				  set_buf_index, 2))
+		return -ENXIO;
+
 	if (!intel_sdvo_get_value(intel_sdvo,
 				  SDVO_CMD_GET_HBUF_TXRATE,
 				  &tx_rate, 1))
@@ -1009,11 +1030,6 @@ static ssize_t intel_sdvo_read_infoframe(struct intel_sdvo *intel_sdvo,
 	if (tx_rate == SDVO_HBUF_TX_DISABLED)
 		return 0;
 
-	if (!intel_sdvo_set_value(intel_sdvo,
-				  SDVO_CMD_SET_HBUF_INDEX,
-				  set_buf_index, 2))
-		return -ENXIO;
-
 	if (!intel_sdvo_get_value(intel_sdvo, SDVO_CMD_GET_HBUF_INFO,
 				  &hbuf_size, 1))
 		return -ENXIO;
@@ -1092,7 +1108,7 @@ static bool intel_sdvo_set_avi_infoframe(struct intel_sdvo *intel_sdvo,
 
 	return intel_sdvo_write_infoframe(intel_sdvo, SDVO_HBUF_INDEX_AVI_IF,
 					  SDVO_HBUF_TX_VSYNC,
-					  sdvo_data, sizeof(sdvo_data));
+					  sdvo_data, len);
 }
 
 static void intel_sdvo_get_avi_infoframe(struct intel_sdvo *intel_sdvo,
@@ -1118,7 +1134,7 @@ static void intel_sdvo_get_avi_infoframe(struct intel_sdvo *intel_sdvo,
 	crtc_state->infoframes.enable |=
 		intel_hdmi_infoframe_enable(HDMI_INFOFRAME_TYPE_AVI);
 
-	ret = hdmi_infoframe_unpack(frame, sdvo_data, sizeof(sdvo_data));
+	ret = hdmi_infoframe_unpack(frame, sdvo_data, len);
 	if (ret) {
 		DRM_DEBUG_KMS("Failed to unpack AVI infoframe\n");
 		return;
@@ -1487,11 +1503,6 @@ static void intel_sdvo_pre_enable(struct intel_encoder *intel_encoder,
 	else
 		sdvox |= SDVO_PIPE_SEL(crtc->pipe);
 
-	if (crtc_state->has_audio) {
-		WARN_ON_ONCE(INTEL_GEN(dev_priv) < 4);
-		sdvox |= SDVO_AUDIO_ENABLE;
-	}
-
 	if (INTEL_GEN(dev_priv) >= 4) {
 		/* done in crtc_mode_set as the dpll_md reg must be written early */
 	} else if (IS_I945G(dev_priv) || IS_I945GM(dev_priv) ||
@@ -1635,8 +1646,13 @@ static void intel_sdvo_get_config(struct intel_encoder *encoder,
 	if (sdvox & HDMI_COLOR_RANGE_16_235)
 		pipe_config->limited_color_range = true;
 
-	if (sdvox & SDVO_AUDIO_ENABLE)
-		pipe_config->has_audio = true;
+	if (intel_sdvo_get_value(intel_sdvo, SDVO_CMD_GET_AUDIO_STAT,
+				 &val, 1)) {
+		u8 mask = SDVO_AUDIO_ELD_VALID | SDVO_AUDIO_PRESENCE_DETECT;
+
+		if ((val & mask) == mask)
+			pipe_config->has_audio = true;
+	}
 
 	if (intel_sdvo_get_value(intel_sdvo, SDVO_CMD_GET_ENCODE,
 				 &val, 1)) {
@@ -1647,6 +1663,32 @@ static void intel_sdvo_get_config(struct intel_encoder *encoder,
 	intel_sdvo_get_avi_infoframe(intel_sdvo, pipe_config);
 }
 
+static void intel_sdvo_disable_audio(struct intel_sdvo *intel_sdvo)
+{
+	intel_sdvo_set_audio_state(intel_sdvo, 0);
+}
+
+static void intel_sdvo_enable_audio(struct intel_sdvo *intel_sdvo,
+				    const struct intel_crtc_state *crtc_state,
+				    const struct drm_connector_state *conn_state)
+{
+	const struct drm_display_mode *adjusted_mode =
+		&crtc_state->base.adjusted_mode;
+	struct drm_connector *connector = conn_state->connector;
+	u8 *eld = connector->eld;
+
+	eld[6] = drm_av_sync_delay(connector, adjusted_mode) / 2;
+
+	intel_sdvo_set_audio_state(intel_sdvo, 0);
+
+	intel_sdvo_write_infoframe(intel_sdvo, SDVO_HBUF_INDEX_ELD,
+				   SDVO_HBUF_TX_DISABLED,
+				   eld, drm_eld_size(eld));
+
+	intel_sdvo_set_audio_state(intel_sdvo, SDVO_AUDIO_ELD_VALID |
+				   SDVO_AUDIO_PRESENCE_DETECT);
+}
+
 static void intel_disable_sdvo(struct intel_encoder *encoder,
 			       const struct intel_crtc_state *old_crtc_state,
 			       const struct drm_connector_state *conn_state)
@@ -1656,6 +1698,9 @@ static void intel_disable_sdvo(struct intel_encoder *encoder,
 	struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->base.crtc);
 	u32 temp;
 
+	if (old_crtc_state->has_audio)
+		intel_sdvo_disable_audio(intel_sdvo);
+
 	intel_sdvo_set_active_outputs(intel_sdvo, 0);
 	if (0)
 		intel_sdvo_set_encoder_power_state(intel_sdvo,
@@ -1741,6 +1786,9 @@ static void intel_enable_sdvo(struct intel_encoder *encoder,
 		intel_sdvo_set_encoder_power_state(intel_sdvo,
 						   DRM_MODE_DPMS_ON);
 	intel_sdvo_set_active_outputs(intel_sdvo, intel_sdvo->attached_output);
+
+	if (pipe_config->has_audio)
+		intel_sdvo_enable_audio(intel_sdvo, pipe_config, conn_state);
 }
 
 static enum drm_mode_status
@@ -2349,9 +2397,10 @@ static const struct drm_connector_funcs intel_sdvo_connector_funcs = {
 };
 
 static int intel_sdvo_atomic_check(struct drm_connector *conn,
-				   struct drm_connector_state *new_conn_state)
+				   struct drm_atomic_state *state)
 {
-	struct drm_atomic_state *state = new_conn_state->state;
+	struct drm_connector_state *new_conn_state =
+		drm_atomic_get_new_connector_state(state, conn);
 	struct drm_connector_state *old_conn_state =
 		drm_atomic_get_old_connector_state(state, conn);
 	struct intel_sdvo_connector_state *old_state =
@@ -2363,13 +2412,13 @@ static int intel_sdvo_atomic_check(struct drm_connector *conn,
 	    (memcmp(&old_state->tv, &new_state->tv, sizeof(old_state->tv)) ||
 	     memcmp(&old_conn_state->tv, &new_conn_state->tv, sizeof(old_conn_state->tv)))) {
 		struct drm_crtc_state *crtc_state =
-			drm_atomic_get_new_crtc_state(new_conn_state->state,
+			drm_atomic_get_new_crtc_state(state,
 						      new_conn_state->crtc);
 
 		crtc_state->connectors_changed = true;
 	}
 
-	return intel_digital_connector_atomic_check(conn, new_conn_state);
+	return intel_digital_connector_atomic_check(conn, state);
 }
 
 static const struct drm_connector_helper_funcs intel_sdvo_connector_helper_funcs = {
@@ -2603,7 +2652,6 @@ static bool
 intel_sdvo_dvi_init(struct intel_sdvo *intel_sdvo, int device)
 {
 	struct drm_encoder *encoder = &intel_sdvo->base.base;
-	struct drm_i915_private *dev_priv = to_i915(encoder->dev);
 	struct drm_connector *connector;
 	struct intel_encoder *intel_encoder = to_intel_encoder(encoder);
 	struct intel_connector *intel_connector;
@@ -2640,9 +2688,7 @@ intel_sdvo_dvi_init(struct intel_sdvo *intel_sdvo, int device)
 	encoder->encoder_type = DRM_MODE_ENCODER_TMDS;
 	connector->connector_type = DRM_MODE_CONNECTOR_DVID;
 
-	/* gen3 doesn't do the hdmi bits in the SDVO register */
-	if (INTEL_GEN(dev_priv) >= 4 &&
-	    intel_sdvo_is_hdmi_connector(intel_sdvo, device)) {
+	if (intel_sdvo_is_hdmi_connector(intel_sdvo, device)) {
 		connector->connector_type = DRM_MODE_CONNECTOR_HDMIA;
 		intel_sdvo_connector->is_hdmi = true;
 	}
diff --git a/drivers/gpu/drm/i915/intel_sdvo.h b/drivers/gpu/drm/i915/display/intel_sdvo.h
index c9e05bcdd141..c9e05bcdd141 100644
--- a/drivers/gpu/drm/i915/intel_sdvo.h
+++ b/drivers/gpu/drm/i915/display/intel_sdvo.h
diff --git a/drivers/gpu/drm/i915/intel_sdvo_regs.h b/drivers/gpu/drm/i915/display/intel_sdvo_regs.h
index db0ed499268a..13b9a8e257bb 100644
--- a/drivers/gpu/drm/i915/intel_sdvo_regs.h
+++ b/drivers/gpu/drm/i915/display/intel_sdvo_regs.h
@@ -24,6 +24,12 @@
  *	Eric Anholt <eric@anholt.net>
  */
 
+#ifndef __INTEL_SDVO_REGS_H__
+#define __INTEL_SDVO_REGS_H__
+
+#include <linux/compiler.h>
+#include <linux/types.h>
+
 /*
  * SDVO command definitions and structures.
  */
@@ -707,6 +713,9 @@ struct intel_sdvo_enhancements_arg {
 #define SDVO_CMD_GET_AUDIO_ENCRYPT_PREFER 0x90
 #define SDVO_CMD_SET_AUDIO_STAT		0x91
 #define SDVO_CMD_GET_AUDIO_STAT		0x92
+  #define SDVO_AUDIO_ELD_VALID		(1 << 0)
+  #define SDVO_AUDIO_PRESENCE_DETECT	(1 << 1)
+  #define SDVO_AUDIO_CP_READY		(1 << 2)
 #define SDVO_CMD_SET_HBUF_INDEX		0x93
   #define SDVO_HBUF_INDEX_ELD		0
   #define SDVO_HBUF_INDEX_AVI_IF	1
@@ -728,3 +737,5 @@ struct intel_sdvo_encode {
 	u8 dvi_rev;
 	u8 hdmi_rev;
 } __packed;
+
+#endif /* __INTEL_SDVO_REGS_H__ */
diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/display/intel_sprite.c
index 2913e89280d7..004b52027ae8 100644
--- a/drivers/gpu/drm/i915/intel_sprite.c
+++ b/drivers/gpu/drm/i915/display/intel_sprite.c
@@ -256,6 +256,16 @@ int intel_plane_check_stride(const struct intel_plane_state *plane_state)
 	unsigned int rotation = plane_state->base.rotation;
 	u32 stride, max_stride;
 
+	/*
+	 * We ignore stride for all invisible planes that
+	 * can be remapped. Otherwise we could end up
+	 * with a false positive when the remapping didn't
+	 * kick in due the plane being invisible.
+	 */
+	if (intel_plane_can_remap(plane_state) &&
+	    !plane_state->base.visible)
+		return 0;
+
 	/* FIXME other color planes? */
 	stride = plane_state->color_plane[0].stride;
 	max_stride = plane->max_stride(plane, fb->format->format,
@@ -325,7 +335,8 @@ skl_plane_max_stride(struct intel_plane *plane,
 		     u32 pixel_format, u64 modifier,
 		     unsigned int rotation)
 {
-	int cpp = drm_format_plane_cpp(pixel_format, 0);
+	const struct drm_format_info *info = drm_format_info(pixel_format);
+	int cpp = info->cpp[0];
 
 	/*
 	 * "The stride in bytes must not exceed the
@@ -1417,6 +1428,10 @@ g4x_sprite_check(struct intel_crtc_state *crtc_state,
 	if (ret)
 		return ret;
 
+	ret = i9xx_check_plane_surface(plane_state);
+	if (ret)
+		return ret;
+
 	if (!plane_state->base.visible)
 		return 0;
 
@@ -1428,10 +1443,6 @@ g4x_sprite_check(struct intel_crtc_state *crtc_state,
 	if (ret)
 		return ret;
 
-	ret = i9xx_check_plane_surface(plane_state);
-	if (ret)
-		return ret;
-
 	if (INTEL_GEN(dev_priv) >= 7)
 		plane_state->ctl = ivb_sprite_ctl(crtc_state, plane_state);
 	else
@@ -1475,6 +1486,10 @@ vlv_sprite_check(struct intel_crtc_state *crtc_state,
 	if (ret)
 		return ret;
 
+	ret = i9xx_check_plane_surface(plane_state);
+	if (ret)
+		return ret;
+
 	if (!plane_state->base.visible)
 		return 0;
 
@@ -1482,10 +1497,6 @@ vlv_sprite_check(struct intel_crtc_state *crtc_state,
 	if (ret)
 		return ret;
 
-	ret = i9xx_check_plane_surface(plane_state);
-	if (ret)
-		return ret;
-
 	plane_state->ctl = vlv_sprite_ctl(crtc_state, plane_state);
 
 	return 0;
@@ -1639,6 +1650,10 @@ static int skl_plane_check(struct intel_crtc_state *crtc_state,
 	if (ret)
 		return ret;
 
+	ret = skl_check_plane_surface(plane_state);
+	if (ret)
+		return ret;
+
 	if (!plane_state->base.visible)
 		return 0;
 
@@ -1654,10 +1669,6 @@ static int skl_plane_check(struct intel_crtc_state *crtc_state,
 	if (ret)
 		return ret;
 
-	ret = skl_check_plane_surface(plane_state);
-	if (ret)
-		return ret;
-
 	/* HW only has 8 bits pixel precision, disable plane if invisible */
 	if (!(plane_state->base.alpha >> 8))
 		plane_state->base.visible = false;
@@ -2146,8 +2157,6 @@ static const struct drm_plane_funcs g4x_sprite_funcs = {
 	.update_plane = drm_atomic_helper_update_plane,
 	.disable_plane = drm_atomic_helper_disable_plane,
 	.destroy = intel_plane_destroy,
-	.atomic_get_property = intel_plane_atomic_get_property,
-	.atomic_set_property = intel_plane_atomic_set_property,
 	.atomic_duplicate_state = intel_plane_duplicate_state,
 	.atomic_destroy_state = intel_plane_destroy_state,
 	.format_mod_supported = g4x_sprite_format_mod_supported,
@@ -2157,8 +2166,6 @@ static const struct drm_plane_funcs snb_sprite_funcs = {
 	.update_plane = drm_atomic_helper_update_plane,
 	.disable_plane = drm_atomic_helper_disable_plane,
 	.destroy = intel_plane_destroy,
-	.atomic_get_property = intel_plane_atomic_get_property,
-	.atomic_set_property = intel_plane_atomic_set_property,
 	.atomic_duplicate_state = intel_plane_duplicate_state,
 	.atomic_destroy_state = intel_plane_destroy_state,
 	.format_mod_supported = snb_sprite_format_mod_supported,
@@ -2168,8 +2175,6 @@ static const struct drm_plane_funcs vlv_sprite_funcs = {
 	.update_plane = drm_atomic_helper_update_plane,
 	.disable_plane = drm_atomic_helper_disable_plane,
 	.destroy = intel_plane_destroy,
-	.atomic_get_property = intel_plane_atomic_get_property,
-	.atomic_set_property = intel_plane_atomic_set_property,
 	.atomic_duplicate_state = intel_plane_duplicate_state,
 	.atomic_destroy_state = intel_plane_destroy_state,
 	.format_mod_supported = vlv_sprite_format_mod_supported,
@@ -2179,8 +2184,6 @@ static const struct drm_plane_funcs skl_plane_funcs = {
 	.update_plane = drm_atomic_helper_update_plane,
 	.disable_plane = drm_atomic_helper_disable_plane,
 	.destroy = intel_plane_destroy,
-	.atomic_get_property = intel_plane_atomic_get_property,
-	.atomic_set_property = intel_plane_atomic_set_property,
 	.atomic_duplicate_state = intel_plane_duplicate_state,
 	.atomic_destroy_state = intel_plane_destroy_state,
 	.format_mod_supported = skl_plane_format_mod_supported,
diff --git a/drivers/gpu/drm/i915/intel_sprite.h b/drivers/gpu/drm/i915/display/intel_sprite.h
index 84be8686be16..500f6bffb139 100644
--- a/drivers/gpu/drm/i915/intel_sprite.h
+++ b/drivers/gpu/drm/i915/display/intel_sprite.h
@@ -43,13 +43,17 @@ static inline bool icl_is_nv12_y_plane(enum plane_id id)
 	return false;
 }
 
+static inline u8 icl_hdr_plane_mask(void)
+{
+	return BIT(PLANE_PRIMARY) |
+		BIT(PLANE_SPRITE0) | BIT(PLANE_SPRITE1);
+}
+
 static inline bool icl_is_hdr_plane(struct drm_i915_private *dev_priv,
 				    enum plane_id plane_id)
 {
-	if (INTEL_GEN(dev_priv) < 11)
-		return false;
-
-	return plane_id < PLANE_SPRITE2;
+	return INTEL_GEN(dev_priv) >= 11 &&
+		icl_hdr_plane_mask() & BIT(plane_id);
 }
 
 #endif /* __INTEL_SPRITE_H__ */
diff --git a/drivers/gpu/drm/i915/intel_tv.c b/drivers/gpu/drm/i915/display/intel_tv.c
index 5dbba33f4202..0a95df6c6a57 100644
--- a/drivers/gpu/drm/i915/intel_tv.c
+++ b/drivers/gpu/drm/i915/display/intel_tv.c
@@ -38,6 +38,7 @@
 #include "i915_drv.h"
 #include "intel_connector.h"
 #include "intel_drv.h"
+#include "intel_hotplug.h"
 #include "intel_tv.h"
 
 enum tv_margin {
@@ -1820,16 +1821,18 @@ static const struct drm_connector_funcs intel_tv_connector_funcs = {
 };
 
 static int intel_tv_atomic_check(struct drm_connector *connector,
-				 struct drm_connector_state *new_state)
+				 struct drm_atomic_state *state)
 {
+	struct drm_connector_state *new_state;
 	struct drm_crtc_state *new_crtc_state;
 	struct drm_connector_state *old_state;
 
+	new_state = drm_atomic_get_new_connector_state(state, connector);
 	if (!new_state->crtc)
 		return 0;
 
-	old_state = drm_atomic_get_old_connector_state(new_state->state, connector);
-	new_crtc_state = drm_atomic_get_new_crtc_state(new_state->state, new_state->crtc);
+	old_state = drm_atomic_get_old_connector_state(state, connector);
+	new_crtc_state = drm_atomic_get_new_crtc_state(state, new_state->crtc);
 
 	if (old_state->tv.mode != new_state->tv.mode ||
 	    old_state->tv.margins.left != new_state->tv.margins.left ||
diff --git a/drivers/gpu/drm/i915/intel_tv.h b/drivers/gpu/drm/i915/display/intel_tv.h
index 44518575ec5c..44518575ec5c 100644
--- a/drivers/gpu/drm/i915/intel_tv.h
+++ b/drivers/gpu/drm/i915/display/intel_tv.h
diff --git a/drivers/gpu/drm/i915/intel_vbt_defs.h b/drivers/gpu/drm/i915/display/intel_vbt_defs.h
index fdbbb9a53804..2f4894e9a03d 100644
--- a/drivers/gpu/drm/i915/intel_vbt_defs.h
+++ b/drivers/gpu/drm/i915/display/intel_vbt_defs.h
@@ -75,65 +75,51 @@ struct bdb_header {
 	u16 bdb_size;
 } __packed;
 
-/* strictly speaking, this is a "skip" block, but it has interesting info */
-struct vbios_data {
-	u8 type; /* 0 == desktop, 1 == mobile */
-	u8 relstage;
-	u8 chipset;
-	u8 lvds_present:1;
-	u8 tv_present:1;
-	u8 rsvd2:6; /* finish byte */
-	u8 rsvd3[4];
-	u8 signon[155];
-	u8 copyright[61];
-	u16 code_segment;
-	u8 dos_boot_mode;
-	u8 bandwidth_percent;
-	u8 rsvd4; /* popup memory size */
-	u8 resize_pci_bios;
-	u8 rsvd5; /* is crt already on ddc2 */
-} __packed;
-
 /*
  * There are several types of BIOS data blocks (BDBs), each block has
  * an ID and size in the first 3 bytes (ID in first, size in next 2).
  * Known types are listed below.
  */
-#define BDB_GENERAL_FEATURES	  1
-#define BDB_GENERAL_DEFINITIONS	  2
-#define BDB_OLD_TOGGLE_LIST	  3
-#define BDB_MODE_SUPPORT_LIST	  4
-#define BDB_GENERIC_MODE_TABLE	  5
-#define BDB_EXT_MMIO_REGS	  6
-#define BDB_SWF_IO		  7
-#define BDB_SWF_MMIO		  8
-#define BDB_PSR			  9
-#define BDB_MODE_REMOVAL_TABLE	 10
-#define BDB_CHILD_DEVICE_TABLE	 11
-#define BDB_DRIVER_FEATURES	 12
-#define BDB_DRIVER_PERSISTENCE	 13
-#define BDB_EXT_TABLE_PTRS	 14
-#define BDB_DOT_CLOCK_OVERRIDE	 15
-#define BDB_DISPLAY_SELECT	 16
-/* 17 rsvd */
-#define BDB_DRIVER_ROTATION	 18
-#define BDB_DISPLAY_REMOVE	 19
-#define BDB_OEM_CUSTOM		 20
-#define BDB_EFP_LIST		 21 /* workarounds for VGA hsync/vsync */
-#define BDB_SDVO_LVDS_OPTIONS	 22
-#define BDB_SDVO_PANEL_DTDS	 23
-#define BDB_SDVO_LVDS_PNP_IDS	 24
-#define BDB_SDVO_LVDS_POWER_SEQ	 25
-#define BDB_TV_OPTIONS		 26
-#define BDB_EDP			 27
-#define BDB_LVDS_OPTIONS	 40
-#define BDB_LVDS_LFP_DATA_PTRS	 41
-#define BDB_LVDS_LFP_DATA	 42
-#define BDB_LVDS_BACKLIGHT	 43
-#define BDB_LVDS_POWER		 44
-#define BDB_MIPI_CONFIG		 52
-#define BDB_MIPI_SEQUENCE	 53
-#define BDB_SKIP		254 /* VBIOS private block, ignore */
+enum bdb_block_id {
+	BDB_GENERAL_FEATURES		= 1,
+	BDB_GENERAL_DEFINITIONS		= 2,
+	BDB_OLD_TOGGLE_LIST		= 3,
+	BDB_MODE_SUPPORT_LIST		= 4,
+	BDB_GENERIC_MODE_TABLE		= 5,
+	BDB_EXT_MMIO_REGS		= 6,
+	BDB_SWF_IO			= 7,
+	BDB_SWF_MMIO			= 8,
+	BDB_PSR				= 9,
+	BDB_MODE_REMOVAL_TABLE		= 10,
+	BDB_CHILD_DEVICE_TABLE		= 11,
+	BDB_DRIVER_FEATURES		= 12,
+	BDB_DRIVER_PERSISTENCE		= 13,
+	BDB_EXT_TABLE_PTRS		= 14,
+	BDB_DOT_CLOCK_OVERRIDE		= 15,
+	BDB_DISPLAY_SELECT		= 16,
+	BDB_DRIVER_ROTATION		= 18,
+	BDB_DISPLAY_REMOVE		= 19,
+	BDB_OEM_CUSTOM			= 20,
+	BDB_EFP_LIST			= 21, /* workarounds for VGA hsync/vsync */
+	BDB_SDVO_LVDS_OPTIONS		= 22,
+	BDB_SDVO_PANEL_DTDS		= 23,
+	BDB_SDVO_LVDS_PNP_IDS		= 24,
+	BDB_SDVO_LVDS_POWER_SEQ		= 25,
+	BDB_TV_OPTIONS			= 26,
+	BDB_EDP				= 27,
+	BDB_LVDS_OPTIONS		= 40,
+	BDB_LVDS_LFP_DATA_PTRS		= 41,
+	BDB_LVDS_LFP_DATA		= 42,
+	BDB_LVDS_BACKLIGHT		= 43,
+	BDB_LVDS_POWER			= 44,
+	BDB_MIPI_CONFIG			= 52,
+	BDB_MIPI_SEQUENCE		= 53,
+	BDB_SKIP			= 254, /* VBIOS private block, ignore */
+};
+
+/*
+ * Block 1 - General Bit Definitions
+ */
 
 struct bdb_general_features {
         /* bits 1 */
@@ -176,6 +162,10 @@ struct bdb_general_features {
 	u8 rsvd11:2; /* finish byte */
 } __packed;
 
+/*
+ * Block 2 - General Bytes Definition
+ */
+
 /* pre-915 */
 #define GPIO_PIN_DVI_LVDS	0x03 /* "DVI/LVDS DDC GPIO pins" */
 #define GPIO_PIN_ADD_I2C	0x05 /* "ADDCARD I2C GPIO pins" */
@@ -324,6 +314,9 @@ enum vbt_gmbus_ddi {
 	ICL_DDC_BUS_PORT_2,
 	ICL_DDC_BUS_PORT_3,
 	ICL_DDC_BUS_PORT_4,
+	MCC_DDC_BUS_DDI_A = 0x1,
+	MCC_DDC_BUS_DDI_B,
+	MCC_DDC_BUS_DDI_C = 0x4,
 };
 
 #define DP_AUX_A 0x40
@@ -402,7 +395,8 @@ struct child_device_config {
 			u8 lspcon:1;				/* 192 */
 			u8 iboost:1;				/* 196 */
 			u8 hpd_invert:1;			/* 196 */
-			u8 flag_reserved:3;
+			u8 use_vbt_vswing:1;			/* 218 */
+			u8 flag_reserved:2;
 			u8 hdmi_support:1;			/* 158 */
 			u8 dp_support:1;			/* 158 */
 			u8 tmds_support:1;			/* 158 */
@@ -466,186 +460,36 @@ struct bdb_general_definitions {
 	u8 devices[0];
 } __packed;
 
-/* Mask for DRRS / Panel Channel / SSC / BLT control bits extraction */
-#define MODE_MASK		0x3
-
-struct bdb_lvds_options {
-	u8 panel_type;
-	u8 rsvd1;
-	/* LVDS capabilities, stored in a dword */
-	u8 pfit_mode:2;
-	u8 pfit_text_mode_enhanced:1;
-	u8 pfit_gfx_mode_enhanced:1;
-	u8 pfit_ratio_auto:1;
-	u8 pixel_dither:1;
-	u8 lvds_edid:1;
-	u8 rsvd2:1;
-	u8 rsvd4;
-	/* LVDS Panel channel bits stored here */
-	u32 lvds_panel_channel_bits;
-	/* LVDS SSC (Spread Spectrum Clock) bits stored here. */
-	u16 ssc_bits;
-	u16 ssc_freq;
-	u16 ssc_ddt;
-	/* Panel color depth defined here */
-	u16 panel_color_depth;
-	/* LVDS panel type bits stored here */
-	u32 dps_panel_type_bits;
-	/* LVDS backlight control type bits stored here */
-	u32 blt_control_type_bits;
-} __packed;
-
-/* LFP pointer table contains entries to the struct below */
-struct bdb_lvds_lfp_data_ptr {
-	u16 fp_timing_offset; /* offsets are from start of bdb */
-	u8 fp_table_size;
-	u16 dvo_timing_offset;
-	u8 dvo_table_size;
-	u16 panel_pnp_id_offset;
-	u8 pnp_table_size;
-} __packed;
-
-struct bdb_lvds_lfp_data_ptrs {
-	u8 lvds_entries; /* followed by one or more lvds_data_ptr structs */
-	struct bdb_lvds_lfp_data_ptr ptr[16];
-} __packed;
-
-/* LFP data has 3 blocks per entry */
-struct lvds_fp_timing {
-	u16 x_res;
-	u16 y_res;
-	u32 lvds_reg;
-	u32 lvds_reg_val;
-	u32 pp_on_reg;
-	u32 pp_on_reg_val;
-	u32 pp_off_reg;
-	u32 pp_off_reg_val;
-	u32 pp_cycle_reg;
-	u32 pp_cycle_reg_val;
-	u32 pfit_reg;
-	u32 pfit_reg_val;
-	u16 terminator;
-} __packed;
-
-struct lvds_dvo_timing {
-	u16 clock;		/**< In 10khz */
-	u8 hactive_lo;
-	u8 hblank_lo;
-	u8 hblank_hi:4;
-	u8 hactive_hi:4;
-	u8 vactive_lo;
-	u8 vblank_lo;
-	u8 vblank_hi:4;
-	u8 vactive_hi:4;
-	u8 hsync_off_lo;
-	u8 hsync_pulse_width_lo;
-	u8 vsync_pulse_width_lo:4;
-	u8 vsync_off_lo:4;
-	u8 vsync_pulse_width_hi:2;
-	u8 vsync_off_hi:2;
-	u8 hsync_pulse_width_hi:2;
-	u8 hsync_off_hi:2;
-	u8 himage_lo;
-	u8 vimage_lo;
-	u8 vimage_hi:4;
-	u8 himage_hi:4;
-	u8 h_border;
-	u8 v_border;
-	u8 rsvd1:3;
-	u8 digital:2;
-	u8 vsync_positive:1;
-	u8 hsync_positive:1;
-	u8 non_interlaced:1;
-} __packed;
-
-struct lvds_pnp_id {
-	u16 mfg_name;
-	u16 product_code;
-	u32 serial;
-	u8 mfg_week;
-	u8 mfg_year;
-} __packed;
-
-struct bdb_lvds_lfp_data_entry {
-	struct lvds_fp_timing fp_timing;
-	struct lvds_dvo_timing dvo_timing;
-	struct lvds_pnp_id pnp_id;
-} __packed;
-
-struct bdb_lvds_lfp_data {
-	struct bdb_lvds_lfp_data_entry data[16];
-} __packed;
-
-#define BDB_BACKLIGHT_TYPE_NONE	0
-#define BDB_BACKLIGHT_TYPE_PWM	2
-
-struct bdb_lfp_backlight_data_entry {
-	u8 type:2;
-	u8 active_low_pwm:1;
-	u8 obsolete1:5;
-	u16 pwm_freq_hz;
-	u8 min_brightness;
-	u8 obsolete2;
-	u8 obsolete3;
-} __packed;
-
-struct bdb_lfp_backlight_control_method {
-	u8 type:4;
-	u8 controller:4;
-} __packed;
-
-struct bdb_lfp_backlight_data {
-	u8 entry_size;
-	struct bdb_lfp_backlight_data_entry data[16];
-	u8 level[16];
-	struct bdb_lfp_backlight_control_method backlight_control[16];
-} __packed;
+/*
+ * Block 9 - SRD Feature Block
+ */
 
-struct aimdb_header {
-	char signature[16];
-	char oem_device[20];
-	u16 aimdb_version;
-	u16 aimdb_header_size;
-	u16 aimdb_size;
-} __packed;
+struct psr_table {
+	/* Feature bits */
+	u8 full_link:1;
+	u8 require_aux_to_wakeup:1;
+	u8 feature_bits_rsvd:6;
 
-struct aimdb_block {
-	u8 aimdb_id;
-	u16 aimdb_size;
-} __packed;
+	/* Wait times */
+	u8 idle_frames:4;
+	u8 lines_to_wait:3;
+	u8 wait_times_rsvd:1;
 
-struct vch_panel_data {
-	u16 fp_timing_offset;
-	u8 fp_timing_size;
-	u16 dvo_timing_offset;
-	u8 dvo_timing_size;
-	u16 text_fitting_offset;
-	u8 text_fitting_size;
-	u16 graphics_fitting_offset;
-	u8 graphics_fitting_size;
-} __packed;
+	/* TP wake up time in multiple of 100 */
+	u16 tp1_wakeup_time;
+	u16 tp2_tp3_wakeup_time;
 
-struct vch_bdb_22 {
-	struct aimdb_block aimdb_block;
-	struct vch_panel_data panels[16];
+	/* PSR2 TP2/TP3 wakeup time for 16 panels */
+	u32 psr2_tp2_tp3_wakeup_time;
 } __packed;
 
-struct bdb_sdvo_lvds_options {
-	u8 panel_backlight;
-	u8 h40_set_panel_type;
-	u8 panel_type;
-	u8 ssc_clk_freq;
-	u16 als_low_trip;
-	u16 als_high_trip;
-	u8 sclalarcoeff_tab_row_num;
-	u8 sclalarcoeff_tab_row_size;
-	u8 coefficient[8];
-	u8 panel_misc_bits_1;
-	u8 panel_misc_bits_2;
-	u8 panel_misc_bits_3;
-	u8 panel_misc_bits_4;
+struct bdb_psr {
+	struct psr_table psr_table[16];
 } __packed;
 
+/*
+ * Block 12 - Driver Features Data Block
+ */
 
 #define BDB_DRIVER_FEATURE_NO_LVDS		0
 #define BDB_DRIVER_FEATURE_INT_LVDS		1
@@ -706,6 +550,69 @@ struct bdb_driver_features {
 	u16 pc_feature_valid:1;
 } __packed;
 
+/*
+ * Block 22 - SDVO LVDS General Options
+ */
+
+struct bdb_sdvo_lvds_options {
+	u8 panel_backlight;
+	u8 h40_set_panel_type;
+	u8 panel_type;
+	u8 ssc_clk_freq;
+	u16 als_low_trip;
+	u16 als_high_trip;
+	u8 sclalarcoeff_tab_row_num;
+	u8 sclalarcoeff_tab_row_size;
+	u8 coefficient[8];
+	u8 panel_misc_bits_1;
+	u8 panel_misc_bits_2;
+	u8 panel_misc_bits_3;
+	u8 panel_misc_bits_4;
+} __packed;
+
+/*
+ * Block 23 - SDVO LVDS Panel DTDs
+ */
+
+struct lvds_dvo_timing {
+	u16 clock;		/**< In 10khz */
+	u8 hactive_lo;
+	u8 hblank_lo;
+	u8 hblank_hi:4;
+	u8 hactive_hi:4;
+	u8 vactive_lo;
+	u8 vblank_lo;
+	u8 vblank_hi:4;
+	u8 vactive_hi:4;
+	u8 hsync_off_lo;
+	u8 hsync_pulse_width_lo;
+	u8 vsync_pulse_width_lo:4;
+	u8 vsync_off_lo:4;
+	u8 vsync_pulse_width_hi:2;
+	u8 vsync_off_hi:2;
+	u8 hsync_pulse_width_hi:2;
+	u8 hsync_off_hi:2;
+	u8 himage_lo;
+	u8 vimage_lo;
+	u8 vimage_hi:4;
+	u8 himage_hi:4;
+	u8 h_border;
+	u8 v_border;
+	u8 rsvd1:3;
+	u8 digital:2;
+	u8 vsync_positive:1;
+	u8 hsync_positive:1;
+	u8 non_interlaced:1;
+} __packed;
+
+struct bdb_sdvo_panel_dtds {
+	struct lvds_dvo_timing dtds[4];
+} __packed;
+
+/*
+ * Block 27 - eDP VBT Block
+ */
+
 #define EDP_18BPP	0
 #define EDP_24BPP	1
 #define EDP_30BPP	2
@@ -758,154 +665,133 @@ struct bdb_edp {
 	struct edp_full_link_params full_link_params[16];	/* 199 */
 } __packed;
 
-struct psr_table {
-	/* Feature bits */
-	u8 full_link:1;
-	u8 require_aux_to_wakeup:1;
-	u8 feature_bits_rsvd:6;
+/*
+ * Block 40 - LFP Data Block
+ */
 
-	/* Wait times */
-	u8 idle_frames:4;
-	u8 lines_to_wait:3;
-	u8 wait_times_rsvd:1;
+/* Mask for DRRS / Panel Channel / SSC / BLT control bits extraction */
+#define MODE_MASK		0x3
 
-	/* TP wake up time in multiple of 100 */
-	u16 tp1_wakeup_time;
-	u16 tp2_tp3_wakeup_time;
+struct bdb_lvds_options {
+	u8 panel_type;
+	u8 panel_type2;						/* 212 */
+	/* LVDS capabilities, stored in a dword */
+	u8 pfit_mode:2;
+	u8 pfit_text_mode_enhanced:1;
+	u8 pfit_gfx_mode_enhanced:1;
+	u8 pfit_ratio_auto:1;
+	u8 pixel_dither:1;
+	u8 lvds_edid:1;
+	u8 rsvd2:1;
+	u8 rsvd4;
+	/* LVDS Panel channel bits stored here */
+	u32 lvds_panel_channel_bits;
+	/* LVDS SSC (Spread Spectrum Clock) bits stored here. */
+	u16 ssc_bits;
+	u16 ssc_freq;
+	u16 ssc_ddt;
+	/* Panel color depth defined here */
+	u16 panel_color_depth;
+	/* LVDS panel type bits stored here */
+	u32 dps_panel_type_bits;
+	/* LVDS backlight control type bits stored here */
+	u32 blt_control_type_bits;
 
-	/* PSR2 TP2/TP3 wakeup time for 16 panels */
-	u32 psr2_tp2_tp3_wakeup_time;
+	u16 lcdvcc_s0_enable;					/* 200 */
+	u32 rotation;						/* 228 */
 } __packed;
 
-struct bdb_psr {
-	struct psr_table psr_table[16];
+/*
+ * Block 41 - LFP Data Table Pointers
+ */
+
+/* LFP pointer table contains entries to the struct below */
+struct lvds_lfp_data_ptr {
+	u16 fp_timing_offset; /* offsets are from start of bdb */
+	u8 fp_table_size;
+	u16 dvo_timing_offset;
+	u8 dvo_table_size;
+	u16 panel_pnp_id_offset;
+	u8 pnp_table_size;
+} __packed;
+
+struct bdb_lvds_lfp_data_ptrs {
+	u8 lvds_entries; /* followed by one or more lvds_data_ptr structs */
+	struct lvds_lfp_data_ptr ptr[16];
 } __packed;
 
 /*
- * Driver<->VBIOS interaction occurs through scratch bits in
- * GR18 & SWF*.
+ * Block 42 - LFP Data Tables
  */
 
-/* GR18 bits are set on display switch and hotkey events */
-#define GR18_DRIVER_SWITCH_EN	(1<<7) /* 0: VBIOS control, 1: driver control */
-#define GR18_HOTKEY_MASK	0x78 /* See also SWF4 15:0 */
-#define   GR18_HK_NONE		(0x0<<3)
-#define   GR18_HK_LFP_STRETCH	(0x1<<3)
-#define   GR18_HK_TOGGLE_DISP	(0x2<<3)
-#define   GR18_HK_DISP_SWITCH	(0x4<<3) /* see SWF14 15:0 for what to enable */
-#define   GR18_HK_POPUP_DISABLED (0x6<<3)
-#define   GR18_HK_POPUP_ENABLED	(0x7<<3)
-#define   GR18_HK_PFIT		(0x8<<3)
-#define   GR18_HK_APM_CHANGE	(0xa<<3)
-#define   GR18_HK_MULTIPLE	(0xc<<3)
-#define GR18_USER_INT_EN	(1<<2)
-#define GR18_A0000_FLUSH_EN	(1<<1)
-#define GR18_SMM_EN		(1<<0)
-
-/* Set by driver, cleared by VBIOS */
-#define SWF00_YRES_SHIFT	16
-#define SWF00_XRES_SHIFT	0
-#define SWF00_RES_MASK		0xffff
-
-/* Set by VBIOS at boot time and driver at runtime */
-#define SWF01_TV2_FORMAT_SHIFT	8
-#define SWF01_TV1_FORMAT_SHIFT	0
-#define SWF01_TV_FORMAT_MASK	0xffff
-
-#define SWF10_VBIOS_BLC_I2C_EN	(1<<29)
-#define SWF10_GTT_OVERRIDE_EN	(1<<28)
-#define SWF10_LFP_DPMS_OVR	(1<<27) /* override DPMS on display switch */
-#define SWF10_ACTIVE_TOGGLE_LIST_MASK (7<<24)
-#define   SWF10_OLD_TOGGLE	0x0
-#define   SWF10_TOGGLE_LIST_1	0x1
-#define   SWF10_TOGGLE_LIST_2	0x2
-#define   SWF10_TOGGLE_LIST_3	0x3
-#define   SWF10_TOGGLE_LIST_4	0x4
-#define SWF10_PANNING_EN	(1<<23)
-#define SWF10_DRIVER_LOADED	(1<<22)
-#define SWF10_EXTENDED_DESKTOP	(1<<21)
-#define SWF10_EXCLUSIVE_MODE	(1<<20)
-#define SWF10_OVERLAY_EN	(1<<19)
-#define SWF10_PLANEB_HOLDOFF	(1<<18)
-#define SWF10_PLANEA_HOLDOFF	(1<<17)
-#define SWF10_VGA_HOLDOFF	(1<<16)
-#define SWF10_ACTIVE_DISP_MASK	0xffff
-#define   SWF10_PIPEB_LFP2	(1<<15)
-#define   SWF10_PIPEB_EFP2	(1<<14)
-#define   SWF10_PIPEB_TV2	(1<<13)
-#define   SWF10_PIPEB_CRT2	(1<<12)
-#define   SWF10_PIPEB_LFP	(1<<11)
-#define   SWF10_PIPEB_EFP	(1<<10)
-#define   SWF10_PIPEB_TV	(1<<9)
-#define   SWF10_PIPEB_CRT	(1<<8)
-#define   SWF10_PIPEA_LFP2	(1<<7)
-#define   SWF10_PIPEA_EFP2	(1<<6)
-#define   SWF10_PIPEA_TV2	(1<<5)
-#define   SWF10_PIPEA_CRT2	(1<<4)
-#define   SWF10_PIPEA_LFP	(1<<3)
-#define   SWF10_PIPEA_EFP	(1<<2)
-#define   SWF10_PIPEA_TV	(1<<1)
-#define   SWF10_PIPEA_CRT	(1<<0)
-
-#define SWF11_MEMORY_SIZE_SHIFT	16
-#define SWF11_SV_TEST_EN	(1<<15)
-#define SWF11_IS_AGP		(1<<14)
-#define SWF11_DISPLAY_HOLDOFF	(1<<13)
-#define SWF11_DPMS_REDUCED	(1<<12)
-#define SWF11_IS_VBE_MODE	(1<<11)
-#define SWF11_PIPEB_ACCESS	(1<<10) /* 0 here means pipe a */
-#define SWF11_DPMS_MASK		0x07
-#define   SWF11_DPMS_OFF	(1<<2)
-#define   SWF11_DPMS_SUSPEND	(1<<1)
-#define   SWF11_DPMS_STANDBY	(1<<0)
-#define   SWF11_DPMS_ON		0
-
-#define SWF14_GFX_PFIT_EN	(1<<31)
-#define SWF14_TEXT_PFIT_EN	(1<<30)
-#define SWF14_LID_STATUS_CLOSED	(1<<29) /* 0 here means open */
-#define SWF14_POPUP_EN		(1<<28)
-#define SWF14_DISPLAY_HOLDOFF	(1<<27)
-#define SWF14_DISP_DETECT_EN	(1<<26)
-#define SWF14_DOCKING_STATUS_DOCKED (1<<25) /* 0 here means undocked */
-#define SWF14_DRIVER_STATUS	(1<<24)
-#define SWF14_OS_TYPE_WIN9X	(1<<23)
-#define SWF14_OS_TYPE_WINNT	(1<<22)
-/* 21:19 rsvd */
-#define SWF14_PM_TYPE_MASK	0x00070000
-#define   SWF14_PM_ACPI_VIDEO	(0x4 << 16)
-#define   SWF14_PM_ACPI		(0x3 << 16)
-#define   SWF14_PM_APM_12	(0x2 << 16)
-#define   SWF14_PM_APM_11	(0x1 << 16)
-#define SWF14_HK_REQUEST_MASK	0x0000ffff /* see GR18 6:3 for event type */
-          /* if GR18 indicates a display switch */
-#define   SWF14_DS_PIPEB_LFP2_EN (1<<15)
-#define   SWF14_DS_PIPEB_EFP2_EN (1<<14)
-#define   SWF14_DS_PIPEB_TV2_EN  (1<<13)
-#define   SWF14_DS_PIPEB_CRT2_EN (1<<12)
-#define   SWF14_DS_PIPEB_LFP_EN  (1<<11)
-#define   SWF14_DS_PIPEB_EFP_EN  (1<<10)
-#define   SWF14_DS_PIPEB_TV_EN   (1<<9)
-#define   SWF14_DS_PIPEB_CRT_EN  (1<<8)
-#define   SWF14_DS_PIPEA_LFP2_EN (1<<7)
-#define   SWF14_DS_PIPEA_EFP2_EN (1<<6)
-#define   SWF14_DS_PIPEA_TV2_EN  (1<<5)
-#define   SWF14_DS_PIPEA_CRT2_EN (1<<4)
-#define   SWF14_DS_PIPEA_LFP_EN  (1<<3)
-#define   SWF14_DS_PIPEA_EFP_EN  (1<<2)
-#define   SWF14_DS_PIPEA_TV_EN   (1<<1)
-#define   SWF14_DS_PIPEA_CRT_EN  (1<<0)
-          /* if GR18 indicates a panel fitting request */
-#define   SWF14_PFIT_EN		(1<<0) /* 0 means disable */
-          /* if GR18 indicates an APM change request */
-#define   SWF14_APM_HIBERNATE	0x4
-#define   SWF14_APM_SUSPEND	0x3
-#define   SWF14_APM_STANDBY	0x1
-#define   SWF14_APM_RESTORE	0x0
-
-/* Block 52 contains MIPI configuration block
- * 6 * bdb_mipi_config, followed by 6 pps data block
- * block below
+/* LFP data has 3 blocks per entry */
+struct lvds_fp_timing {
+	u16 x_res;
+	u16 y_res;
+	u32 lvds_reg;
+	u32 lvds_reg_val;
+	u32 pp_on_reg;
+	u32 pp_on_reg_val;
+	u32 pp_off_reg;
+	u32 pp_off_reg_val;
+	u32 pp_cycle_reg;
+	u32 pp_cycle_reg_val;
+	u32 pfit_reg;
+	u32 pfit_reg_val;
+	u16 terminator;
+} __packed;
+
+struct lvds_pnp_id {
+	u16 mfg_name;
+	u16 product_code;
+	u32 serial;
+	u8 mfg_week;
+	u8 mfg_year;
+} __packed;
+
+struct lvds_lfp_data_entry {
+	struct lvds_fp_timing fp_timing;
+	struct lvds_dvo_timing dvo_timing;
+	struct lvds_pnp_id pnp_id;
+} __packed;
+
+struct bdb_lvds_lfp_data {
+	struct lvds_lfp_data_entry data[16];
+} __packed;
+
+/*
+ * Block 43 - LFP Backlight Control Data Block
  */
+
+#define BDB_BACKLIGHT_TYPE_NONE	0
+#define BDB_BACKLIGHT_TYPE_PWM	2
+
+struct lfp_backlight_data_entry {
+	u8 type:2;
+	u8 active_low_pwm:1;
+	u8 obsolete1:5;
+	u16 pwm_freq_hz;
+	u8 min_brightness;
+	u8 obsolete2;
+	u8 obsolete3;
+} __packed;
+
+struct lfp_backlight_control_method {
+	u8 type:4;
+	u8 controller:4;
+} __packed;
+
+struct bdb_lfp_backlight_data {
+	u8 entry_size;
+	struct lfp_backlight_data_entry data[16];
+	u8 level[16];
+	struct lfp_backlight_control_method backlight_control[16];
+} __packed;
+
+/*
+ * Block 52 - MIPI Configuration Block
+ */
+
 #define MAX_MIPI_CONFIGURATIONS	6
 
 struct bdb_mipi_config {
@@ -913,24 +799,13 @@ struct bdb_mipi_config {
 	struct mipi_pps_data pps[MAX_MIPI_CONFIGURATIONS];
 } __packed;
 
-/* Block 53 contains MIPI sequences as needed by the panel
- * for enabling it. This block can be variable in size and
- * can be maximum of 6 blocks
+/*
+ * Block 53 - MIPI Sequence Block
  */
+
 struct bdb_mipi_sequence {
 	u8 version;
-	u8 data[0];
+	u8 data[0]; /* up to 6 variable length blocks */
 } __packed;
 
-enum mipi_gpio_pin_index {
-	MIPI_GPIO_UNDEFINED = 0,
-	MIPI_GPIO_PANEL_ENABLE,
-	MIPI_GPIO_BL_ENABLE,
-	MIPI_GPIO_PWM_ENABLE,
-	MIPI_GPIO_RESET_N,
-	MIPI_GPIO_PWR_DOWN_R,
-	MIPI_GPIO_STDBY_RST_N,
-	MIPI_GPIO_MAX
-};
-
 #endif /* _INTEL_VBT_DEFS_H_ */
diff --git a/drivers/gpu/drm/i915/intel_vdsc.c b/drivers/gpu/drm/i915/display/intel_vdsc.c
index 3f9921ba4a76..ffec807b8960 100644
--- a/drivers/gpu/drm/i915/intel_vdsc.c
+++ b/drivers/gpu/drm/i915/display/intel_vdsc.c
@@ -7,8 +7,10 @@
  */
 
 #include <drm/i915_drm.h>
+
 #include "i915_drv.h"
 #include "intel_drv.h"
+#include "intel_vdsc.h"
 
 enum ROW_INDEX_BPP {
 	ROW_INDEX_6BPP = 0,
diff --git a/drivers/gpu/drm/i915/display/intel_vdsc.h b/drivers/gpu/drm/i915/display/intel_vdsc.h
new file mode 100644
index 000000000000..90d3f6017fcb
--- /dev/null
+++ b/drivers/gpu/drm/i915/display/intel_vdsc.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef __INTEL_VDSC_H__
+#define __INTEL_VDSC_H__
+
+struct intel_encoder;
+struct intel_crtc_state;
+struct intel_dp;
+
+void intel_dsc_enable(struct intel_encoder *encoder,
+		      const struct intel_crtc_state *crtc_state);
+void intel_dsc_disable(const struct intel_crtc_state *crtc_state);
+int intel_dp_compute_dsc_params(struct intel_dp *intel_dp,
+				struct intel_crtc_state *pipe_config);
+enum intel_display_power_domain
+intel_dsc_power_domain(const struct intel_crtc_state *crtc_state);
+
+#endif /* __INTEL_VDSC_H__ */
diff --git a/drivers/gpu/drm/i915/vlv_dsi.c b/drivers/gpu/drm/i915/display/vlv_dsi.c
index e0b1ec821960..e272d826210a 100644
--- a/drivers/gpu/drm/i915/vlv_dsi.c
+++ b/drivers/gpu/drm/i915/display/vlv_dsi.c
@@ -30,13 +30,15 @@
 #include <drm/drm_crtc.h>
 #include <drm/drm_edid.h>
 #include <drm/drm_mipi_dsi.h>
-#include <drm/i915_drm.h>
 
 #include "i915_drv.h"
+#include "intel_atomic.h"
 #include "intel_connector.h"
 #include "intel_drv.h"
 #include "intel_dsi.h"
+#include "intel_fifo_underrun.h"
 #include "intel_panel.h"
+#include "intel_sideband.h"
 
 /* return pixels in terms of txbyteclkhs */
 static u16 txbyteclkhs(u16 pixels, int bpp, int lane_count,
@@ -248,7 +250,7 @@ static int dpi_send_cmd(struct intel_dsi *intel_dsi, u32 cmd, bool hs,
 
 static void band_gap_reset(struct drm_i915_private *dev_priv)
 {
-	mutex_lock(&dev_priv->sb_lock);
+	vlv_flisdsi_get(dev_priv);
 
 	vlv_flisdsi_write(dev_priv, 0x08, 0x0001);
 	vlv_flisdsi_write(dev_priv, 0x0F, 0x0005);
@@ -257,29 +259,7 @@ static void band_gap_reset(struct drm_i915_private *dev_priv)
 	vlv_flisdsi_write(dev_priv, 0x0F, 0x0000);
 	vlv_flisdsi_write(dev_priv, 0x08, 0x0000);
 
-	mutex_unlock(&dev_priv->sb_lock);
-}
-
-static int bdw_get_pipemisc_bpp(struct intel_crtc *crtc)
-{
-	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
-	u32 tmp;
-
-	tmp = I915_READ(PIPEMISC(crtc->pipe));
-
-	switch (tmp & PIPEMISC_DITHER_BPC_MASK) {
-	case PIPEMISC_DITHER_6_BPC:
-		return 18;
-	case PIPEMISC_DITHER_8_BPC:
-		return 24;
-	case PIPEMISC_DITHER_10_BPC:
-		return 30;
-	case PIPEMISC_DITHER_12_BPC:
-		return 36;
-	default:
-		MISSING_CASE(tmp);
-		return 0;
-	}
+	vlv_flisdsi_put(dev_priv);
 }
 
 static int intel_dsi_compute_config(struct intel_encoder *encoder,
@@ -515,11 +495,11 @@ static void vlv_dsi_device_ready(struct intel_encoder *encoder)
 
 	DRM_DEBUG_KMS("\n");
 
-	mutex_lock(&dev_priv->sb_lock);
+	vlv_flisdsi_get(dev_priv);
 	/* program rcomp for compliance, reduce from 50 ohms to 45 ohms
 	 * needed everytime after power gate */
 	vlv_flisdsi_write(dev_priv, 0x04, 0x0004);
-	mutex_unlock(&dev_priv->sb_lock);
+	vlv_flisdsi_put(dev_priv);
 
 	/* bandgap reset is needed after everytime we do power gate */
 	band_gap_reset(dev_priv);
@@ -1689,6 +1669,174 @@ static void intel_dsi_add_properties(struct intel_connector *connector)
 	}
 }
 
+#define NS_KHZ_RATIO		1000000
+
+#define PREPARE_CNT_MAX		0x3F
+#define EXIT_ZERO_CNT_MAX	0x3F
+#define CLK_ZERO_CNT_MAX	0xFF
+#define TRAIL_CNT_MAX		0x1F
+
+static void vlv_dphy_param_init(struct intel_dsi *intel_dsi)
+{
+	struct drm_device *dev = intel_dsi->base.base.dev;
+	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct mipi_config *mipi_config = dev_priv->vbt.dsi.config;
+	u32 tlpx_ns, extra_byte_count, tlpx_ui;
+	u32 ui_num, ui_den;
+	u32 prepare_cnt, exit_zero_cnt, clk_zero_cnt, trail_cnt;
+	u32 ths_prepare_ns, tclk_trail_ns;
+	u32 tclk_prepare_clkzero, ths_prepare_hszero;
+	u32 lp_to_hs_switch, hs_to_lp_switch;
+	u32 mul;
+
+	tlpx_ns = intel_dsi_tlpx_ns(intel_dsi);
+
+	switch (intel_dsi->lane_count) {
+	case 1:
+	case 2:
+		extra_byte_count = 2;
+		break;
+	case 3:
+		extra_byte_count = 4;
+		break;
+	case 4:
+	default:
+		extra_byte_count = 3;
+		break;
+	}
+
+	/* in Kbps */
+	ui_num = NS_KHZ_RATIO;
+	ui_den = intel_dsi_bitrate(intel_dsi);
+
+	tclk_prepare_clkzero = mipi_config->tclk_prepare_clkzero;
+	ths_prepare_hszero = mipi_config->ths_prepare_hszero;
+
+	/*
+	 * B060
+	 * LP byte clock = TLPX/ (8UI)
+	 */
+	intel_dsi->lp_byte_clk = DIV_ROUND_UP(tlpx_ns * ui_den, 8 * ui_num);
+
+	/* DDR clock period = 2 * UI
+	 * UI(sec) = 1/(bitrate * 10^3) (bitrate is in KHZ)
+	 * UI(nsec) = 10^6 / bitrate
+	 * DDR clock period (nsec) = 2 * UI = (2 * 10^6)/ bitrate
+	 * DDR clock count  = ns_value / DDR clock period
+	 *
+	 * For GEMINILAKE dphy_param_reg will be programmed in terms of
+	 * HS byte clock count for other platform in HS ddr clock count
+	 */
+	mul = IS_GEMINILAKE(dev_priv) ? 8 : 2;
+	ths_prepare_ns = max(mipi_config->ths_prepare,
+			     mipi_config->tclk_prepare);
+
+	/* prepare count */
+	prepare_cnt = DIV_ROUND_UP(ths_prepare_ns * ui_den, ui_num * mul);
+
+	if (prepare_cnt > PREPARE_CNT_MAX) {
+		DRM_DEBUG_KMS("prepare count too high %u\n", prepare_cnt);
+		prepare_cnt = PREPARE_CNT_MAX;
+	}
+
+	/* exit zero count */
+	exit_zero_cnt = DIV_ROUND_UP(
+				(ths_prepare_hszero - ths_prepare_ns) * ui_den,
+				ui_num * mul
+				);
+
+	/*
+	 * Exit zero is unified val ths_zero and ths_exit
+	 * minimum value for ths_exit = 110ns
+	 * min (exit_zero_cnt * 2) = 110/UI
+	 * exit_zero_cnt = 55/UI
+	 */
+	if (exit_zero_cnt < (55 * ui_den / ui_num) && (55 * ui_den) % ui_num)
+		exit_zero_cnt += 1;
+
+	if (exit_zero_cnt > EXIT_ZERO_CNT_MAX) {
+		DRM_DEBUG_KMS("exit zero count too high %u\n", exit_zero_cnt);
+		exit_zero_cnt = EXIT_ZERO_CNT_MAX;
+	}
+
+	/* clk zero count */
+	clk_zero_cnt = DIV_ROUND_UP(
+				(tclk_prepare_clkzero -	ths_prepare_ns)
+				* ui_den, ui_num * mul);
+
+	if (clk_zero_cnt > CLK_ZERO_CNT_MAX) {
+		DRM_DEBUG_KMS("clock zero count too high %u\n", clk_zero_cnt);
+		clk_zero_cnt = CLK_ZERO_CNT_MAX;
+	}
+
+	/* trail count */
+	tclk_trail_ns = max(mipi_config->tclk_trail, mipi_config->ths_trail);
+	trail_cnt = DIV_ROUND_UP(tclk_trail_ns * ui_den, ui_num * mul);
+
+	if (trail_cnt > TRAIL_CNT_MAX) {
+		DRM_DEBUG_KMS("trail count too high %u\n", trail_cnt);
+		trail_cnt = TRAIL_CNT_MAX;
+	}
+
+	/* B080 */
+	intel_dsi->dphy_reg = exit_zero_cnt << 24 | trail_cnt << 16 |
+						clk_zero_cnt << 8 | prepare_cnt;
+
+	/*
+	 * LP to HS switch count = 4TLPX + PREP_COUNT * mul + EXIT_ZERO_COUNT *
+	 *					mul + 10UI + Extra Byte Count
+	 *
+	 * HS to LP switch count = THS-TRAIL + 2TLPX + Extra Byte Count
+	 * Extra Byte Count is calculated according to number of lanes.
+	 * High Low Switch Count is the Max of LP to HS and
+	 * HS to LP switch count
+	 *
+	 */
+	tlpx_ui = DIV_ROUND_UP(tlpx_ns * ui_den, ui_num);
+
+	/* B044 */
+	/* FIXME:
+	 * The comment above does not match with the code */
+	lp_to_hs_switch = DIV_ROUND_UP(4 * tlpx_ui + prepare_cnt * mul +
+						exit_zero_cnt * mul + 10, 8);
+
+	hs_to_lp_switch = DIV_ROUND_UP(mipi_config->ths_trail + 2 * tlpx_ui, 8);
+
+	intel_dsi->hs_to_lp_count = max(lp_to_hs_switch, hs_to_lp_switch);
+	intel_dsi->hs_to_lp_count += extra_byte_count;
+
+	/* B088 */
+	/* LP -> HS for clock lanes
+	 * LP clk sync + LP11 + LP01 + tclk_prepare + tclk_zero +
+	 *						extra byte count
+	 * 2TPLX + 1TLPX + 1 TPLX(in ns) + prepare_cnt * 2 + clk_zero_cnt *
+	 *					2(in UI) + extra byte count
+	 * In byteclks = (4TLPX + prepare_cnt * 2 + clk_zero_cnt *2 (in UI)) /
+	 *					8 + extra byte count
+	 */
+	intel_dsi->clk_lp_to_hs_count =
+		DIV_ROUND_UP(
+			4 * tlpx_ui + prepare_cnt * 2 +
+			clk_zero_cnt * 2,
+			8);
+
+	intel_dsi->clk_lp_to_hs_count += extra_byte_count;
+
+	/* HS->LP for Clock Lanes
+	 * Low Power clock synchronisations + 1Tx byteclk + tclk_trail +
+	 *						Extra byte count
+	 * 2TLPX + 8UI + (trail_count*2)(in UI) + Extra byte count
+	 * In byteclks = (2*TLpx(in UI) + trail_count*2 +8)(in UI)/8 +
+	 *						Extra byte count
+	 */
+	intel_dsi->clk_hs_to_lp_count =
+		DIV_ROUND_UP(2 * tlpx_ui + trail_cnt * 2 + 8,
+			8);
+	intel_dsi->clk_hs_to_lp_count += extra_byte_count;
+
+	intel_dsi_log_params(intel_dsi);
+}
+
 void vlv_dsi_init(struct drm_i915_private *dev_priv)
 {
 	struct drm_device *dev = &dev_priv->drm;
@@ -1697,7 +1845,7 @@ void vlv_dsi_init(struct drm_i915_private *dev_priv)
 	struct drm_encoder *encoder;
 	struct intel_connector *intel_connector;
 	struct drm_connector *connector;
-	struct drm_display_mode *fixed_mode;
+	struct drm_display_mode *current_mode, *fixed_mode;
 	enum port port;
 
 	DRM_DEBUG_KMS("\n");
@@ -1741,6 +1889,9 @@ void vlv_dsi_init(struct drm_i915_private *dev_priv)
 	intel_connector->get_hw_state = intel_connector_get_hw_state;
 
 	intel_encoder->port = port;
+	intel_encoder->type = INTEL_OUTPUT_DSI;
+	intel_encoder->power_domain = POWER_DOMAIN_PORT_DSI;
+	intel_encoder->cloneable = 0;
 
 	/*
 	 * On BYT/CHV, pipe A maps to MIPI DSI port A, pipe B maps to MIPI DSI
@@ -1778,6 +1929,22 @@ void vlv_dsi_init(struct drm_i915_private *dev_priv)
 		goto err;
 	}
 
+	/* Use clock read-back from current hw-state for fastboot */
+	current_mode = intel_encoder_current_mode(intel_encoder);
+	if (current_mode) {
+		DRM_DEBUG_KMS("Calculated pclk %d GOP %d\n",
+			      intel_dsi->pclk, current_mode->clock);
+		if (intel_fuzzy_clock_check(intel_dsi->pclk,
+					    current_mode->clock)) {
+			DRM_DEBUG_KMS("Using GOP pclk\n");
+			intel_dsi->pclk = current_mode->clock;
+		}
+
+		kfree(current_mode);
+	}
+
+	vlv_dphy_param_init(intel_dsi);
+
 	/*
 	 * In case of BYT with CRC PMIC, we need to use GPIO for
 	 * Panel control.
@@ -1793,9 +1960,6 @@ void vlv_dsi_init(struct drm_i915_private *dev_priv)
 		}
 	}
 
-	intel_encoder->type = INTEL_OUTPUT_DSI;
-	intel_encoder->power_domain = POWER_DOMAIN_PORT_DSI;
-	intel_encoder->cloneable = 0;
 	drm_connector_init(dev, connector, &intel_dsi_connector_funcs,
 			   DRM_MODE_CONNECTOR_DSI);
 
@@ -1813,7 +1977,7 @@ void vlv_dsi_init(struct drm_i915_private *dev_priv)
 
 	if (!fixed_mode) {
 		DRM_DEBUG_KMS("no fixed mode\n");
-		goto err;
+		goto err_cleanup_connector;
 	}
 
 	intel_panel_init(&intel_connector->panel, fixed_mode, NULL);
@@ -1823,6 +1987,8 @@ void vlv_dsi_init(struct drm_i915_private *dev_priv)
 
 	return;
 
+err_cleanup_connector:
+	drm_connector_cleanup(&intel_connector->base);
 err:
 	drm_encoder_cleanup(&intel_encoder->base);
 	kfree(intel_dsi);
diff --git a/drivers/gpu/drm/i915/vlv_dsi_pll.c b/drivers/gpu/drm/i915/display/vlv_dsi_pll.c
index 5e7b1fb2db5d..99cc3e2e9c2c 100644
--- a/drivers/gpu/drm/i915/vlv_dsi_pll.c
+++ b/drivers/gpu/drm/i915/display/vlv_dsi_pll.c
@@ -26,9 +26,11 @@
  */
 
 #include <linux/kernel.h>
-#include "intel_drv.h"
+
 #include "i915_drv.h"
+#include "intel_drv.h"
 #include "intel_dsi.h"
+#include "intel_sideband.h"
 
 static const u16 lfsr_converts[] = {
 	426, 469, 234, 373, 442, 221, 110, 311, 411,		/* 62 - 70 */
@@ -149,7 +151,7 @@ void vlv_dsi_pll_enable(struct intel_encoder *encoder,
 
 	DRM_DEBUG_KMS("\n");
 
-	mutex_lock(&dev_priv->sb_lock);
+	vlv_cck_get(dev_priv);
 
 	vlv_cck_write(dev_priv, CCK_REG_DSI_PLL_CONTROL, 0);
 	vlv_cck_write(dev_priv, CCK_REG_DSI_PLL_DIVIDER, config->dsi_pll.div);
@@ -166,11 +168,11 @@ void vlv_dsi_pll_enable(struct intel_encoder *encoder,
 	if (wait_for(vlv_cck_read(dev_priv, CCK_REG_DSI_PLL_CONTROL) &
 						DSI_PLL_LOCK, 20)) {
 
-		mutex_unlock(&dev_priv->sb_lock);
+		vlv_cck_put(dev_priv);
 		DRM_ERROR("DSI PLL lock failed\n");
 		return;
 	}
-	mutex_unlock(&dev_priv->sb_lock);
+	vlv_cck_put(dev_priv);
 
 	DRM_DEBUG_KMS("DSI PLL locked\n");
 }
@@ -182,14 +184,14 @@ void vlv_dsi_pll_disable(struct intel_encoder *encoder)
 
 	DRM_DEBUG_KMS("\n");
 
-	mutex_lock(&dev_priv->sb_lock);
+	vlv_cck_get(dev_priv);
 
 	tmp = vlv_cck_read(dev_priv, CCK_REG_DSI_PLL_CONTROL);
 	tmp &= ~DSI_PLL_VCO_EN;
 	tmp |= DSI_PLL_LDO_GATE;
 	vlv_cck_write(dev_priv, CCK_REG_DSI_PLL_CONTROL, tmp);
 
-	mutex_unlock(&dev_priv->sb_lock);
+	vlv_cck_put(dev_priv);
 }
 
 bool bxt_dsi_pll_is_enabled(struct drm_i915_private *dev_priv)
@@ -266,10 +268,10 @@ u32 vlv_dsi_get_pclk(struct intel_encoder *encoder,
 
 	DRM_DEBUG_KMS("\n");
 
-	mutex_lock(&dev_priv->sb_lock);
+	vlv_cck_get(dev_priv);
 	pll_ctl = vlv_cck_read(dev_priv, CCK_REG_DSI_PLL_CONTROL);
 	pll_div = vlv_cck_read(dev_priv, CCK_REG_DSI_PLL_DIVIDER);
-	mutex_unlock(&dev_priv->sb_lock);
+	vlv_cck_put(dev_priv);
 
 	config->dsi_pll.ctrl = pll_ctl & ~DSI_PLL_LOCK;
 	config->dsi_pll.div = pll_div;
diff --git a/drivers/gpu/drm/i915/gem/Makefile b/drivers/gpu/drm/i915/gem/Makefile
new file mode 100644
index 000000000000..07e7b8b840ea
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/Makefile
@@ -0,0 +1 @@
+include $(src)/Makefile.header-test # Extra header tests
diff --git a/drivers/gpu/drm/i915/gem/Makefile.header-test b/drivers/gpu/drm/i915/gem/Makefile.header-test
new file mode 100644
index 000000000000..61e06cbb4b32
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/Makefile.header-test
@@ -0,0 +1,16 @@
+# SPDX-License-Identifier: MIT
+# Copyright © 2019 Intel Corporation
+
+# Test the headers are compilable as standalone units
+header_test := $(notdir $(wildcard $(src)/*.h))
+
+quiet_cmd_header_test = HDRTEST $@
+      cmd_header_test = echo "\#include \"$(<F)\"" > $@
+
+header_test_%.c: %.h
+	$(call cmd,header_test)
+
+extra-$(CONFIG_DRM_I915_WERROR) += \
+	$(foreach h,$(header_test),$(patsubst %.h,header_test_%.o,$(h)))
+
+clean-files += $(foreach h,$(header_test),$(patsubst %.h,header_test_%.c,$(h)))
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_busy.c b/drivers/gpu/drm/i915/gem/i915_gem_busy.c
new file mode 100644
index 000000000000..6ad93a09968c
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_busy.c
@@ -0,0 +1,139 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2014-2016 Intel Corporation
+ */
+
+#include "gt/intel_engine.h"
+
+#include "i915_gem_ioctls.h"
+#include "i915_gem_object.h"
+
+static __always_inline u32 __busy_read_flag(u8 id)
+{
+	if (id == (u8)I915_ENGINE_CLASS_INVALID)
+		return 0xffff0000u;
+
+	GEM_BUG_ON(id >= 16);
+	return 0x10000u << id;
+}
+
+static __always_inline u32 __busy_write_id(u8 id)
+{
+	/*
+	 * The uABI guarantees an active writer is also amongst the read
+	 * engines. This would be true if we accessed the activity tracking
+	 * under the lock, but as we perform the lookup of the object and
+	 * its activity locklessly we can not guarantee that the last_write
+	 * being active implies that we have set the same engine flag from
+	 * last_read - hence we always set both read and write busy for
+	 * last_write.
+	 */
+	if (id == (u8)I915_ENGINE_CLASS_INVALID)
+		return 0xffffffffu;
+
+	return (id + 1) | __busy_read_flag(id);
+}
+
+static __always_inline unsigned int
+__busy_set_if_active(const struct dma_fence *fence, u32 (*flag)(u8 id))
+{
+	const struct i915_request *rq;
+
+	/*
+	 * We have to check the current hw status of the fence as the uABI
+	 * guarantees forward progress. We could rely on the idle worker
+	 * to eventually flush us, but to minimise latency just ask the
+	 * hardware.
+	 *
+	 * Note we only report on the status of native fences.
+	 */
+	if (!dma_fence_is_i915(fence))
+		return 0;
+
+	/* opencode to_request() in order to avoid const warnings */
+	rq = container_of(fence, const struct i915_request, fence);
+	if (i915_request_completed(rq))
+		return 0;
+
+	/* Beware type-expansion follies! */
+	BUILD_BUG_ON(!typecheck(u8, rq->engine->uabi_class));
+	return flag(rq->engine->uabi_class);
+}
+
+static __always_inline unsigned int
+busy_check_reader(const struct dma_fence *fence)
+{
+	return __busy_set_if_active(fence, __busy_read_flag);
+}
+
+static __always_inline unsigned int
+busy_check_writer(const struct dma_fence *fence)
+{
+	if (!fence)
+		return 0;
+
+	return __busy_set_if_active(fence, __busy_write_id);
+}
+
+int
+i915_gem_busy_ioctl(struct drm_device *dev, void *data,
+		    struct drm_file *file)
+{
+	struct drm_i915_gem_busy *args = data;
+	struct drm_i915_gem_object *obj;
+	struct reservation_object_list *list;
+	unsigned int seq;
+	int err;
+
+	err = -ENOENT;
+	rcu_read_lock();
+	obj = i915_gem_object_lookup_rcu(file, args->handle);
+	if (!obj)
+		goto out;
+
+	/*
+	 * A discrepancy here is that we do not report the status of
+	 * non-i915 fences, i.e. even though we may report the object as idle,
+	 * a call to set-domain may still stall waiting for foreign rendering.
+	 * This also means that wait-ioctl may report an object as busy,
+	 * where busy-ioctl considers it idle.
+	 *
+	 * We trade the ability to warn of foreign fences to report on which
+	 * i915 engines are active for the object.
+	 *
+	 * Alternatively, we can trade that extra information on read/write
+	 * activity with
+	 *	args->busy =
+	 *		!reservation_object_test_signaled_rcu(obj->resv, true);
+	 * to report the overall busyness. This is what the wait-ioctl does.
+	 *
+	 */
+retry:
+	seq = raw_read_seqcount(&obj->base.resv->seq);
+
+	/* Translate the exclusive fence to the READ *and* WRITE engine */
+	args->busy =
+		busy_check_writer(rcu_dereference(obj->base.resv->fence_excl));
+
+	/* Translate shared fences to READ set of engines */
+	list = rcu_dereference(obj->base.resv->fence);
+	if (list) {
+		unsigned int shared_count = list->shared_count, i;
+
+		for (i = 0; i < shared_count; ++i) {
+			struct dma_fence *fence =
+				rcu_dereference(list->shared[i]);
+
+			args->busy |= busy_check_reader(fence);
+		}
+	}
+
+	if (args->busy && read_seqcount_retry(&obj->base.resv->seq, seq))
+		goto retry;
+
+	err = 0;
+out:
+	rcu_read_unlock();
+	return err;
+}
diff --git a/drivers/gpu/drm/i915/i915_gem_clflush.c b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c
index 8e74c23cbd91..5295285d5843 100644
--- a/drivers/gpu/drm/i915/i915_gem_clflush.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c
@@ -1,29 +1,12 @@
 /*
- * Copyright © 2016 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
+ * SPDX-License-Identifier: MIT
  *
+ * Copyright © 2016 Intel Corporation
  */
 
+#include "display/intel_frontbuffer.h"
+
 #include "i915_drv.h"
-#include "intel_frontbuffer.h"
 #include "i915_gem_clflush.h"
 
 static DEFINE_SPINLOCK(clflush_lock);
@@ -113,6 +96,8 @@ bool i915_gem_clflush_object(struct drm_i915_gem_object *obj,
 {
 	struct clflush *clflush;
 
+	assert_object_held(obj);
+
 	/*
 	 * Stolen memory is always coherent with the GPU as it is explicitly
 	 * marked as wc by the system, or the system is cache-coherent.
@@ -158,13 +143,12 @@ bool i915_gem_clflush_object(struct drm_i915_gem_object *obj,
 		dma_fence_get(&clflush->dma);
 
 		i915_sw_fence_await_reservation(&clflush->wait,
-						obj->resv, NULL,
+						obj->base.resv, NULL,
 						true, I915_FENCE_TIMEOUT,
 						I915_FENCE_GFP);
 
-		reservation_object_lock(obj->resv, NULL);
-		reservation_object_add_excl_fence(obj->resv, &clflush->dma);
-		reservation_object_unlock(obj->resv);
+		reservation_object_add_excl_fence(obj->base.resv,
+						  &clflush->dma);
 
 		i915_sw_fence_commit(&clflush->wait);
 	} else if (obj->mm.pages) {
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_clflush.h b/drivers/gpu/drm/i915/gem/i915_gem_clflush.h
new file mode 100644
index 000000000000..e6c382973129
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_clflush.h
@@ -0,0 +1,20 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2016 Intel Corporation
+ */
+
+#ifndef __I915_GEM_CLFLUSH_H__
+#define __I915_GEM_CLFLUSH_H__
+
+#include <linux/types.h>
+
+struct drm_i915_private;
+struct drm_i915_gem_object;
+
+bool i915_gem_clflush_object(struct drm_i915_gem_object *obj,
+			     unsigned int flags);
+#define I915_CLFLUSH_FORCE BIT(0)
+#define I915_CLFLUSH_SYNC BIT(1)
+
+#endif /* __I915_GEM_CLFLUSH_H__ */
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c
new file mode 100644
index 000000000000..1fdab0767a47
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c
@@ -0,0 +1,304 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+#include "i915_gem_client_blt.h"
+
+#include "i915_gem_object_blt.h"
+#include "intel_drv.h"
+
+struct i915_sleeve {
+	struct i915_vma *vma;
+	struct drm_i915_gem_object *obj;
+	struct sg_table *pages;
+	struct i915_page_sizes page_sizes;
+};
+
+static int vma_set_pages(struct i915_vma *vma)
+{
+	struct i915_sleeve *sleeve = vma->private;
+
+	vma->pages = sleeve->pages;
+	vma->page_sizes = sleeve->page_sizes;
+
+	return 0;
+}
+
+static void vma_clear_pages(struct i915_vma *vma)
+{
+	GEM_BUG_ON(!vma->pages);
+	vma->pages = NULL;
+}
+
+static int vma_bind(struct i915_vma *vma,
+		    enum i915_cache_level cache_level,
+		    u32 flags)
+{
+	return vma->vm->vma_ops.bind_vma(vma, cache_level, flags);
+}
+
+static void vma_unbind(struct i915_vma *vma)
+{
+	vma->vm->vma_ops.unbind_vma(vma);
+}
+
+static const struct i915_vma_ops proxy_vma_ops = {
+	.set_pages = vma_set_pages,
+	.clear_pages = vma_clear_pages,
+	.bind_vma = vma_bind,
+	.unbind_vma = vma_unbind,
+};
+
+static struct i915_sleeve *create_sleeve(struct i915_address_space *vm,
+					 struct drm_i915_gem_object *obj,
+					 struct sg_table *pages,
+					 struct i915_page_sizes *page_sizes)
+{
+	struct i915_sleeve *sleeve;
+	struct i915_vma *vma;
+	int err;
+
+	sleeve = kzalloc(sizeof(*sleeve), GFP_KERNEL);
+	if (!sleeve)
+		return ERR_PTR(-ENOMEM);
+
+	vma = i915_vma_instance(obj, vm, NULL);
+	if (IS_ERR(vma)) {
+		err = PTR_ERR(vma);
+		goto err_free;
+	}
+
+	vma->private = sleeve;
+	vma->ops = &proxy_vma_ops;
+
+	sleeve->vma = vma;
+	sleeve->obj = i915_gem_object_get(obj);
+	sleeve->pages = pages;
+	sleeve->page_sizes = *page_sizes;
+
+	return sleeve;
+
+err_free:
+	kfree(sleeve);
+	return ERR_PTR(err);
+}
+
+static void destroy_sleeve(struct i915_sleeve *sleeve)
+{
+	i915_gem_object_put(sleeve->obj);
+	kfree(sleeve);
+}
+
+struct clear_pages_work {
+	struct dma_fence dma;
+	struct dma_fence_cb cb;
+	struct i915_sw_fence wait;
+	struct work_struct work;
+	struct irq_work irq_work;
+	struct i915_sleeve *sleeve;
+	struct intel_context *ce;
+	u32 value;
+};
+
+static const char *clear_pages_work_driver_name(struct dma_fence *fence)
+{
+	return DRIVER_NAME;
+}
+
+static const char *clear_pages_work_timeline_name(struct dma_fence *fence)
+{
+	return "clear";
+}
+
+static void clear_pages_work_release(struct dma_fence *fence)
+{
+	struct clear_pages_work *w = container_of(fence, typeof(*w), dma);
+
+	destroy_sleeve(w->sleeve);
+
+	i915_sw_fence_fini(&w->wait);
+
+	BUILD_BUG_ON(offsetof(typeof(*w), dma));
+	dma_fence_free(&w->dma);
+}
+
+static const struct dma_fence_ops clear_pages_work_ops = {
+	.get_driver_name = clear_pages_work_driver_name,
+	.get_timeline_name = clear_pages_work_timeline_name,
+	.release = clear_pages_work_release,
+};
+
+static void clear_pages_signal_irq_worker(struct irq_work *work)
+{
+	struct clear_pages_work *w = container_of(work, typeof(*w), irq_work);
+
+	dma_fence_signal(&w->dma);
+	dma_fence_put(&w->dma);
+}
+
+static void clear_pages_dma_fence_cb(struct dma_fence *fence,
+				     struct dma_fence_cb *cb)
+{
+	struct clear_pages_work *w = container_of(cb, typeof(*w), cb);
+
+	if (fence->error)
+		dma_fence_set_error(&w->dma, fence->error);
+
+	/*
+	 * Push the signalling of the fence into yet another worker to avoid
+	 * the nightmare locking around the fence spinlock.
+	 */
+	irq_work_queue(&w->irq_work);
+}
+
+static void clear_pages_worker(struct work_struct *work)
+{
+	struct clear_pages_work *w = container_of(work, typeof(*w), work);
+	struct drm_i915_private *i915 = w->ce->gem_context->i915;
+	struct drm_i915_gem_object *obj = w->sleeve->obj;
+	struct i915_vma *vma = w->sleeve->vma;
+	struct i915_request *rq;
+	int err = w->dma.error;
+
+	if (unlikely(err))
+		goto out_signal;
+
+	if (obj->cache_dirty) {
+		obj->write_domain = 0;
+		if (i915_gem_object_has_struct_page(obj))
+			drm_clflush_sg(w->sleeve->pages);
+		obj->cache_dirty = false;
+	}
+
+	/* XXX: we need to kill this */
+	mutex_lock(&i915->drm.struct_mutex);
+	err = i915_vma_pin(vma, 0, 0, PIN_USER);
+	if (unlikely(err))
+		goto out_unlock;
+
+	rq = i915_request_create(w->ce);
+	if (IS_ERR(rq)) {
+		err = PTR_ERR(rq);
+		goto out_unpin;
+	}
+
+	/* There's no way the fence has signalled */
+	if (dma_fence_add_callback(&rq->fence, &w->cb,
+				   clear_pages_dma_fence_cb))
+		GEM_BUG_ON(1);
+
+	if (w->ce->engine->emit_init_breadcrumb) {
+		err = w->ce->engine->emit_init_breadcrumb(rq);
+		if (unlikely(err))
+			goto out_request;
+	}
+
+	/* XXX: more feverish nightmares await */
+	i915_vma_lock(vma);
+	err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
+	i915_vma_unlock(vma);
+	if (err)
+		goto out_request;
+
+	err = intel_emit_vma_fill_blt(rq, vma, w->value);
+out_request:
+	if (unlikely(err)) {
+		i915_request_skip(rq, err);
+		err = 0;
+	}
+
+	i915_request_add(rq);
+out_unpin:
+	i915_vma_unpin(vma);
+out_unlock:
+	mutex_unlock(&i915->drm.struct_mutex);
+out_signal:
+	if (unlikely(err)) {
+		dma_fence_set_error(&w->dma, err);
+		dma_fence_signal(&w->dma);
+		dma_fence_put(&w->dma);
+	}
+}
+
+static int __i915_sw_fence_call
+clear_pages_work_notify(struct i915_sw_fence *fence,
+			enum i915_sw_fence_notify state)
+{
+	struct clear_pages_work *w = container_of(fence, typeof(*w), wait);
+
+	switch (state) {
+	case FENCE_COMPLETE:
+		schedule_work(&w->work);
+		break;
+
+	case FENCE_FREE:
+		dma_fence_put(&w->dma);
+		break;
+	}
+
+	return NOTIFY_DONE;
+}
+
+static DEFINE_SPINLOCK(fence_lock);
+
+/* XXX: better name please */
+int i915_gem_schedule_fill_pages_blt(struct drm_i915_gem_object *obj,
+				     struct intel_context *ce,
+				     struct sg_table *pages,
+				     struct i915_page_sizes *page_sizes,
+				     u32 value)
+{
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+	struct i915_gem_context *ctx = ce->gem_context;
+	struct i915_address_space *vm = ctx->vm ?: &i915->ggtt.vm;
+	struct clear_pages_work *work;
+	struct i915_sleeve *sleeve;
+	int err;
+
+	sleeve = create_sleeve(vm, obj, pages, page_sizes);
+	if (IS_ERR(sleeve))
+		return PTR_ERR(sleeve);
+
+	work = kmalloc(sizeof(*work), GFP_KERNEL);
+	if (!work) {
+		destroy_sleeve(sleeve);
+		return -ENOMEM;
+	}
+
+	work->value = value;
+	work->sleeve = sleeve;
+	work->ce = ce;
+
+	INIT_WORK(&work->work, clear_pages_worker);
+
+	init_irq_work(&work->irq_work, clear_pages_signal_irq_worker);
+
+	dma_fence_init(&work->dma,
+		       &clear_pages_work_ops,
+		       &fence_lock,
+		       i915->mm.unordered_timeline,
+		       0);
+	i915_sw_fence_init(&work->wait, clear_pages_work_notify);
+
+	i915_gem_object_lock(obj);
+	err = i915_sw_fence_await_reservation(&work->wait,
+					      obj->base.resv, NULL,
+					      true, I915_FENCE_TIMEOUT,
+					      I915_FENCE_GFP);
+	if (err < 0) {
+		dma_fence_set_error(&work->dma, err);
+	} else {
+		reservation_object_add_excl_fence(obj->base.resv, &work->dma);
+		err = 0;
+	}
+	i915_gem_object_unlock(obj);
+
+	dma_fence_get(&work->dma);
+	i915_sw_fence_commit(&work->wait);
+
+	return err;
+}
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftests/i915_gem_client_blt.c"
+#endif
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.h b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.h
new file mode 100644
index 000000000000..3dbd28c22ff5
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+#ifndef __I915_GEM_CLIENT_BLT_H__
+#define __I915_GEM_CLIENT_BLT_H__
+
+#include <linux/types.h>
+
+struct drm_i915_gem_object;
+struct i915_page_sizes;
+struct intel_context;
+struct sg_table;
+
+int i915_gem_schedule_fill_pages_blt(struct drm_i915_gem_object *obj,
+				     struct intel_context *ce,
+				     struct sg_table *pages,
+				     struct i915_page_sizes *page_sizes,
+				     u32 value);
+
+#endif
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index dd728b26b5aa..0f2c22a3bcb6 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -1,28 +1,7 @@
 /*
- * Copyright © 2011-2012 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * Authors:
- *    Ben Widawsky <ben@bwidawsk.net>
+ * SPDX-License-Identifier: MIT
  *
+ * Copyright © 2011-2012 Intel Corporation
  */
 
 /*
@@ -86,16 +65,16 @@
  */
 
 #include <linux/log2.h>
+#include <linux/nospec.h>
+
 #include <drm/i915_drm.h>
-#include "i915_drv.h"
+
+#include "gt/intel_lrc_reg.h"
+
+#include "i915_gem_context.h"
 #include "i915_globals.h"
 #include "i915_trace.h"
 #include "i915_user_extensions.h"
-#include "intel_lrc_reg.h"
-#include "intel_workarounds.h"
-
-#define I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE (1 << 1)
-#define I915_CONTEXT_PARAM_VM 0x9
 
 #define ALL_L3_SLICES(dev) (1 << NUM_L3_SLICES(dev)) - 1
 
@@ -116,28 +95,77 @@ void i915_lut_handle_free(struct i915_lut_handle *lut)
 
 static void lut_close(struct i915_gem_context *ctx)
 {
-	struct i915_lut_handle *lut, *ln;
 	struct radix_tree_iter iter;
 	void __rcu **slot;
 
-	list_for_each_entry_safe(lut, ln, &ctx->handles_list, ctx_link) {
-		list_del(&lut->obj_link);
-		i915_lut_handle_free(lut);
-	}
-	INIT_LIST_HEAD(&ctx->handles_list);
+	lockdep_assert_held(&ctx->mutex);
 
 	rcu_read_lock();
 	radix_tree_for_each_slot(slot, &ctx->handles_vma, &iter, 0) {
 		struct i915_vma *vma = rcu_dereference_raw(*slot);
+		struct drm_i915_gem_object *obj = vma->obj;
+		struct i915_lut_handle *lut;
+
+		if (!kref_get_unless_zero(&obj->base.refcount))
+			continue;
+
+		rcu_read_unlock();
+		i915_gem_object_lock(obj);
+		list_for_each_entry(lut, &obj->lut_list, obj_link) {
+			if (lut->ctx != ctx)
+				continue;
 
-		radix_tree_iter_delete(&ctx->handles_vma, &iter, slot);
+			if (lut->handle != iter.index)
+				continue;
 
-		vma->open_count--;
-		__i915_gem_object_release_unless_active(vma->obj);
+			list_del(&lut->obj_link);
+			break;
+		}
+		i915_gem_object_unlock(obj);
+		rcu_read_lock();
+
+		if (&lut->obj_link != &obj->lut_list) {
+			i915_lut_handle_free(lut);
+			radix_tree_iter_delete(&ctx->handles_vma, &iter, slot);
+			if (atomic_dec_and_test(&vma->open_count) &&
+			    !i915_vma_is_ggtt(vma))
+				i915_vma_close(vma);
+			i915_gem_object_put(obj);
+		}
+
+		i915_gem_object_put(obj);
 	}
 	rcu_read_unlock();
 }
 
+static struct intel_context *
+lookup_user_engine(struct i915_gem_context *ctx,
+		   unsigned long flags,
+		   const struct i915_engine_class_instance *ci)
+#define LOOKUP_USER_INDEX BIT(0)
+{
+	int idx;
+
+	if (!!(flags & LOOKUP_USER_INDEX) != i915_gem_context_user_engines(ctx))
+		return ERR_PTR(-EINVAL);
+
+	if (!i915_gem_context_user_engines(ctx)) {
+		struct intel_engine_cs *engine;
+
+		engine = intel_engine_lookup_user(ctx->i915,
+						  ci->engine_class,
+						  ci->engine_instance);
+		if (!engine)
+			return ERR_PTR(-EINVAL);
+
+		idx = engine->id;
+	} else {
+		idx = ci->engine_instance;
+	}
+
+	return i915_gem_context_get_engine(ctx, idx);
+}
+
 static inline int new_hw_id(struct drm_i915_private *i915, gfp_t gfp)
 {
 	unsigned int max;
@@ -227,19 +255,65 @@ static void release_hw_id(struct i915_gem_context *ctx)
 	mutex_unlock(&i915->contexts.mutex);
 }
 
-static void i915_gem_context_free(struct i915_gem_context *ctx)
+static void __free_engines(struct i915_gem_engines *e, unsigned int count)
+{
+	while (count--) {
+		if (!e->engines[count])
+			continue;
+
+		intel_context_put(e->engines[count]);
+	}
+	kfree(e);
+}
+
+static void free_engines(struct i915_gem_engines *e)
 {
-	struct intel_context *it, *n;
+	__free_engines(e, e->num_engines);
+}
+
+static void free_engines_rcu(struct rcu_head *rcu)
+{
+	free_engines(container_of(rcu, struct i915_gem_engines, rcu));
+}
+
+static struct i915_gem_engines *default_engines(struct i915_gem_context *ctx)
+{
+	struct intel_engine_cs *engine;
+	struct i915_gem_engines *e;
+	enum intel_engine_id id;
+
+	e = kzalloc(struct_size(e, engines, I915_NUM_ENGINES), GFP_KERNEL);
+	if (!e)
+		return ERR_PTR(-ENOMEM);
+
+	init_rcu_head(&e->rcu);
+	for_each_engine(engine, ctx->i915, id) {
+		struct intel_context *ce;
+
+		ce = intel_context_create(ctx, engine);
+		if (IS_ERR(ce)) {
+			__free_engines(e, id);
+			return ERR_CAST(ce);
+		}
+
+		e->engines[id] = ce;
+	}
+	e->num_engines = id;
+
+	return e;
+}
 
+static void i915_gem_context_free(struct i915_gem_context *ctx)
+{
 	lockdep_assert_held(&ctx->i915->drm.struct_mutex);
 	GEM_BUG_ON(!i915_gem_context_is_closed(ctx));
-	GEM_BUG_ON(!list_empty(&ctx->active_engines));
 
 	release_hw_id(ctx);
-	i915_ppgtt_put(ctx->ppgtt);
+	if (ctx->vm)
+		i915_vm_put(ctx->vm);
 
-	rbtree_postorder_for_each_entry_safe(it, n, &ctx->hw_contexts, node)
-		intel_context_put(it);
+	free_engines(rcu_access_pointer(ctx->engines));
+	mutex_destroy(&ctx->engines_mutex);
 
 	if (ctx->timeline)
 		i915_timeline_put(ctx->timeline);
@@ -301,7 +375,10 @@ void i915_gem_context_release(struct kref *ref)
 
 static void context_close(struct i915_gem_context *ctx)
 {
+	mutex_lock(&ctx->mutex);
+
 	i915_gem_context_set_closed(ctx);
+	ctx->file_priv = ERR_PTR(-EBADF);
 
 	/*
 	 * This context will never again be assinged to HW, so we can
@@ -316,12 +393,12 @@ static void context_close(struct i915_gem_context *ctx)
 	 */
 	lut_close(ctx);
 
-	ctx->file_priv = ERR_PTR(-EBADF);
+	mutex_unlock(&ctx->mutex);
 	i915_gem_context_put(ctx);
 }
 
 static u32 default_desc_template(const struct drm_i915_private *i915,
-				 const struct i915_hw_ppgtt *ppgtt)
+				 const struct i915_address_space *vm)
 {
 	u32 address_mode;
 	u32 desc;
@@ -329,7 +406,7 @@ static u32 default_desc_template(const struct drm_i915_private *i915,
 	desc = GEN8_CTX_VALID | GEN8_CTX_PRIVILEGE;
 
 	address_mode = INTEL_LEGACY_32B_CONTEXT;
-	if (ppgtt && i915_vm_is_4lvl(&ppgtt->vm))
+	if (vm && i915_vm_is_4lvl(vm))
 		address_mode = INTEL_LEGACY_64B_CONTEXT;
 	desc |= address_mode << GEN8_CTX_ADDRESSING_MODE_SHIFT;
 
@@ -345,9 +422,11 @@ static u32 default_desc_template(const struct drm_i915_private *i915,
 }
 
 static struct i915_gem_context *
-__create_context(struct drm_i915_private *dev_priv)
+__create_context(struct drm_i915_private *i915)
 {
 	struct i915_gem_context *ctx;
+	struct i915_gem_engines *e;
+	int err;
 	int i;
 
 	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
@@ -355,57 +434,64 @@ __create_context(struct drm_i915_private *dev_priv)
 		return ERR_PTR(-ENOMEM);
 
 	kref_init(&ctx->ref);
-	list_add_tail(&ctx->link, &dev_priv->contexts.list);
-	ctx->i915 = dev_priv;
+	list_add_tail(&ctx->link, &i915->contexts.list);
+	ctx->i915 = i915;
 	ctx->sched.priority = I915_USER_PRIORITY(I915_PRIORITY_NORMAL);
-	INIT_LIST_HEAD(&ctx->active_engines);
 	mutex_init(&ctx->mutex);
 
-	ctx->hw_contexts = RB_ROOT;
-	spin_lock_init(&ctx->hw_contexts_lock);
+	mutex_init(&ctx->engines_mutex);
+	e = default_engines(ctx);
+	if (IS_ERR(e)) {
+		err = PTR_ERR(e);
+		goto err_free;
+	}
+	RCU_INIT_POINTER(ctx->engines, e);
 
 	INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL);
-	INIT_LIST_HEAD(&ctx->handles_list);
 	INIT_LIST_HEAD(&ctx->hw_id_link);
 
 	/* NB: Mark all slices as needing a remap so that when the context first
 	 * loads it will restore whatever remap state already exists. If there
 	 * is no remap info, it will be a NOP. */
-	ctx->remap_slice = ALL_L3_SLICES(dev_priv);
+	ctx->remap_slice = ALL_L3_SLICES(i915);
 
 	i915_gem_context_set_bannable(ctx);
 	i915_gem_context_set_recoverable(ctx);
 
 	ctx->ring_size = 4 * PAGE_SIZE;
 	ctx->desc_template =
-		default_desc_template(dev_priv, dev_priv->mm.aliasing_ppgtt);
+		default_desc_template(i915, &i915->mm.aliasing_ppgtt->vm);
 
 	for (i = 0; i < ARRAY_SIZE(ctx->hang_timestamp); i++)
 		ctx->hang_timestamp[i] = jiffies - CONTEXT_FAST_HANG_JIFFIES;
 
 	return ctx;
+
+err_free:
+	kfree(ctx);
+	return ERR_PTR(err);
 }
 
-static struct i915_hw_ppgtt *
-__set_ppgtt(struct i915_gem_context *ctx, struct i915_hw_ppgtt *ppgtt)
+static struct i915_address_space *
+__set_ppgtt(struct i915_gem_context *ctx, struct i915_address_space *vm)
 {
-	struct i915_hw_ppgtt *old = ctx->ppgtt;
+	struct i915_address_space *old = ctx->vm;
 
-	ctx->ppgtt = i915_ppgtt_get(ppgtt);
-	ctx->desc_template = default_desc_template(ctx->i915, ppgtt);
+	ctx->vm = i915_vm_get(vm);
+	ctx->desc_template = default_desc_template(ctx->i915, vm);
 
 	return old;
 }
 
 static void __assign_ppgtt(struct i915_gem_context *ctx,
-			   struct i915_hw_ppgtt *ppgtt)
+			   struct i915_address_space *vm)
 {
-	if (ppgtt == ctx->ppgtt)
+	if (vm == ctx->vm)
 		return;
 
-	ppgtt = __set_ppgtt(ctx, ppgtt);
-	if (ppgtt)
-		i915_ppgtt_put(ppgtt);
+	vm = __set_ppgtt(ctx, vm);
+	if (vm)
+		i915_vm_put(vm);
 }
 
 static struct i915_gem_context *
@@ -415,8 +501,6 @@ i915_gem_create_context(struct drm_i915_private *dev_priv, unsigned int flags)
 
 	lockdep_assert_held(&dev_priv->drm.struct_mutex);
 
-	BUILD_BUG_ON(I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE &
-		     ~I915_CONTEXT_CREATE_FLAGS_UNKNOWN);
 	if (flags & I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE &&
 	    !HAS_EXECLISTS(dev_priv))
 		return ERR_PTR(-EINVAL);
@@ -429,7 +513,7 @@ i915_gem_create_context(struct drm_i915_private *dev_priv, unsigned int flags)
 		return ctx;
 
 	if (HAS_FULL_PPGTT(dev_priv)) {
-		struct i915_hw_ppgtt *ppgtt;
+		struct i915_ppgtt *ppgtt;
 
 		ppgtt = i915_ppgtt_create(dev_priv);
 		if (IS_ERR(ppgtt)) {
@@ -439,8 +523,8 @@ i915_gem_create_context(struct drm_i915_private *dev_priv, unsigned int flags)
 			return ERR_CAST(ppgtt);
 		}
 
-		__assign_ppgtt(ctx, ppgtt);
-		i915_ppgtt_put(ppgtt);
+		__assign_ppgtt(ctx, &ppgtt->vm);
+		i915_vm_put(&ppgtt->vm);
 	}
 
 	if (flags & I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE) {
@@ -608,17 +692,6 @@ int i915_gem_contexts_init(struct drm_i915_private *dev_priv)
 	return 0;
 }
 
-void i915_gem_contexts_lost(struct drm_i915_private *dev_priv)
-{
-	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
-
-	lockdep_assert_held(&dev_priv->drm.struct_mutex);
-
-	for_each_engine(engine, dev_priv, id)
-		intel_engine_lost_context(engine);
-}
-
 void i915_gem_contexts_fini(struct drm_i915_private *i915)
 {
 	lockdep_assert_held(&i915->drm.struct_mutex);
@@ -640,7 +713,7 @@ static int context_idr_cleanup(int id, void *p, void *data)
 
 static int vm_idr_cleanup(int id, void *p, void *data)
 {
-	i915_ppgtt_put(p);
+	i915_vm_put(p);
 	return 0;
 }
 
@@ -650,8 +723,8 @@ static int gem_context_register(struct i915_gem_context *ctx,
 	int ret;
 
 	ctx->file_priv = fpriv;
-	if (ctx->ppgtt)
-		ctx->ppgtt->vm.file = fpriv;
+	if (ctx->vm)
+		ctx->vm->file = fpriv;
 
 	ctx->pid = get_task_pid(current, PIDTYPE_PID);
 	ctx->name = kasprintf(GFP_KERNEL, "%s[%d]",
@@ -706,9 +779,7 @@ int i915_gem_context_open(struct drm_i915_private *i915,
 	return 0;
 
 err_ctx:
-	mutex_lock(&i915->drm.struct_mutex);
 	context_close(ctx);
-	mutex_unlock(&i915->drm.struct_mutex);
 err:
 	idr_destroy(&file_priv->vm_idr);
 	idr_destroy(&file_priv->context_idr);
@@ -721,8 +792,6 @@ void i915_gem_context_close(struct drm_file *file)
 {
 	struct drm_i915_file_private *file_priv = file->driver_priv;
 
-	lockdep_assert_held(&file_priv->dev_priv->drm.struct_mutex);
-
 	idr_for_each(&file_priv->context_idr, context_idr_cleanup, NULL);
 	idr_destroy(&file_priv->context_idr);
 	mutex_destroy(&file_priv->context_idr_lock);
@@ -738,7 +807,7 @@ int i915_gem_vm_create_ioctl(struct drm_device *dev, void *data,
 	struct drm_i915_private *i915 = to_i915(dev);
 	struct drm_i915_gem_vm_control *args = data;
 	struct drm_i915_file_private *file_priv = file->driver_priv;
-	struct i915_hw_ppgtt *ppgtt;
+	struct i915_ppgtt *ppgtt;
 	int err;
 
 	if (!HAS_FULL_PPGTT(i915))
@@ -765,12 +834,11 @@ int i915_gem_vm_create_ioctl(struct drm_device *dev, void *data,
 	if (err)
 		goto err_put;
 
-	err = idr_alloc(&file_priv->vm_idr, ppgtt, 0, 0, GFP_KERNEL);
+	err = idr_alloc(&file_priv->vm_idr, &ppgtt->vm, 0, 0, GFP_KERNEL);
 	if (err < 0)
 		goto err_unlock;
 
-	GEM_BUG_ON(err == 0); /* reserved for default/unassigned ppgtt */
-	ppgtt->user_handle = err;
+	GEM_BUG_ON(err == 0); /* reserved for invalid/unassigned ppgtt */
 
 	mutex_unlock(&file_priv->vm_idr_lock);
 
@@ -780,7 +848,7 @@ int i915_gem_vm_create_ioctl(struct drm_device *dev, void *data,
 err_unlock:
 	mutex_unlock(&file_priv->vm_idr_lock);
 err_put:
-	i915_ppgtt_put(ppgtt);
+	i915_vm_put(&ppgtt->vm);
 	return err;
 }
 
@@ -789,7 +857,7 @@ int i915_gem_vm_destroy_ioctl(struct drm_device *dev, void *data,
 {
 	struct drm_i915_file_private *file_priv = file->driver_priv;
 	struct drm_i915_gem_vm_control *args = data;
-	struct i915_hw_ppgtt *ppgtt;
+	struct i915_address_space *vm;
 	int err;
 	u32 id;
 
@@ -807,40 +875,16 @@ int i915_gem_vm_destroy_ioctl(struct drm_device *dev, void *data,
 	if (err)
 		return err;
 
-	ppgtt = idr_remove(&file_priv->vm_idr, id);
-	if (ppgtt) {
-		GEM_BUG_ON(ppgtt->user_handle != id);
-		ppgtt->user_handle = 0;
-	}
+	vm = idr_remove(&file_priv->vm_idr, id);
 
 	mutex_unlock(&file_priv->vm_idr_lock);
-	if (!ppgtt)
+	if (!vm)
 		return -ENOENT;
 
-	i915_ppgtt_put(ppgtt);
+	i915_vm_put(vm);
 	return 0;
 }
 
-static struct i915_request *
-last_request_on_engine(struct i915_timeline *timeline,
-		       struct intel_engine_cs *engine)
-{
-	struct i915_request *rq;
-
-	GEM_BUG_ON(timeline == &engine->timeline);
-
-	rq = i915_active_request_raw(&timeline->last_request,
-				     &engine->i915->drm.struct_mutex);
-	if (rq && rq->engine->mask & engine->mask) {
-		GEM_TRACE("last request on engine %s: %llx:%llu\n",
-			  engine->name, rq->fence.context, rq->fence.seqno);
-		GEM_BUG_ON(rq->timeline != timeline);
-		return rq;
-	}
-
-	return NULL;
-}
-
 struct context_barrier_task {
 	struct i915_active base;
 	void (*task)(void *data);
@@ -861,14 +905,15 @@ static void cb_retire(struct i915_active *base)
 I915_SELFTEST_DECLARE(static intel_engine_mask_t context_barrier_inject_fault);
 static int context_barrier_task(struct i915_gem_context *ctx,
 				intel_engine_mask_t engines,
+				bool (*skip)(struct intel_context *ce, void *data),
 				int (*emit)(struct i915_request *rq, void *data),
 				void (*task)(void *data),
 				void *data)
 {
 	struct drm_i915_private *i915 = ctx->i915;
 	struct context_barrier_task *cb;
-	struct intel_context *ce, *next;
-	intel_wakeref_t wakeref;
+	struct i915_gem_engines_iter it;
+	struct intel_context *ce;
 	int err = 0;
 
 	lockdep_assert_held(&i915->drm.struct_mutex);
@@ -881,21 +926,22 @@ static int context_barrier_task(struct i915_gem_context *ctx,
 	i915_active_init(i915, &cb->base, cb_retire);
 	i915_active_acquire(&cb->base);
 
-	wakeref = intel_runtime_pm_get(i915);
-	rbtree_postorder_for_each_entry_safe(ce, next, &ctx->hw_contexts, node) {
-		struct intel_engine_cs *engine = ce->engine;
+	for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) {
 		struct i915_request *rq;
 
-		if (!(engine->mask & engines))
-			continue;
-
 		if (I915_SELFTEST_ONLY(context_barrier_inject_fault &
-				       engine->mask)) {
+				       ce->engine->mask)) {
 			err = -ENXIO;
 			break;
 		}
 
-		rq = i915_request_alloc(engine, ctx);
+		if (!(ce->engine->mask & engines))
+			continue;
+
+		if (skip && skip(ce, data))
+			continue;
+
+		rq = intel_context_create_request(ce);
 		if (IS_ERR(rq)) {
 			err = PTR_ERR(rq);
 			break;
@@ -911,7 +957,7 @@ static int context_barrier_task(struct i915_gem_context *ctx,
 		if (err)
 			break;
 	}
-	intel_runtime_pm_put(i915, wakeref);
+	i915_gem_context_unlock_engines(ctx);
 
 	cb->task = err ? NULL : task; /* caller needs to unwind instead */
 	cb->data = data;
@@ -921,64 +967,14 @@ static int context_barrier_task(struct i915_gem_context *ctx,
 	return err;
 }
 
-int i915_gem_switch_to_kernel_context(struct drm_i915_private *i915,
-				      intel_engine_mask_t mask)
-{
-	struct intel_engine_cs *engine;
-
-	GEM_TRACE("awake?=%s\n", yesno(i915->gt.awake));
-
-	lockdep_assert_held(&i915->drm.struct_mutex);
-	GEM_BUG_ON(!i915->kernel_context);
-
-	/* Inoperable, so presume the GPU is safely pointing into the void! */
-	if (i915_terminally_wedged(i915))
-		return 0;
-
-	for_each_engine_masked(engine, i915, mask, mask) {
-		struct intel_ring *ring;
-		struct i915_request *rq;
-
-		rq = i915_request_alloc(engine, i915->kernel_context);
-		if (IS_ERR(rq))
-			return PTR_ERR(rq);
-
-		/* Queue this switch after all other activity */
-		list_for_each_entry(ring, &i915->gt.active_rings, active_link) {
-			struct i915_request *prev;
-
-			prev = last_request_on_engine(ring->timeline, engine);
-			if (!prev)
-				continue;
-
-			if (prev->gem_context == i915->kernel_context)
-				continue;
-
-			GEM_TRACE("add barrier on %s for %llx:%lld\n",
-				  engine->name,
-				  prev->fence.context,
-				  prev->fence.seqno);
-			i915_sw_fence_await_sw_fence_gfp(&rq->submit,
-							 &prev->submit,
-							 I915_FENCE_GFP);
-		}
-
-		i915_request_add(rq);
-	}
-
-	return 0;
-}
-
 static int get_ppgtt(struct drm_i915_file_private *file_priv,
 		     struct i915_gem_context *ctx,
 		     struct drm_i915_gem_context_param *args)
 {
-	struct i915_hw_ppgtt *ppgtt;
+	struct i915_address_space *vm;
 	int ret;
 
-	return -EINVAL; /* nothing to see here; please move along */
-
-	if (!ctx->ppgtt)
+	if (!ctx->vm)
 		return -ENODEV;
 
 	/* XXX rcu acquire? */
@@ -986,54 +982,52 @@ static int get_ppgtt(struct drm_i915_file_private *file_priv,
 	if (ret)
 		return ret;
 
-	ppgtt = i915_ppgtt_get(ctx->ppgtt);
+	vm = i915_vm_get(ctx->vm);
 	mutex_unlock(&ctx->i915->drm.struct_mutex);
 
 	ret = mutex_lock_interruptible(&file_priv->vm_idr_lock);
 	if (ret)
 		goto err_put;
 
-	if (!ppgtt->user_handle) {
-		ret = idr_alloc(&file_priv->vm_idr, ppgtt, 0, 0, GFP_KERNEL);
-		GEM_BUG_ON(!ret);
-		if (ret < 0)
-			goto err_unlock;
+	ret = idr_alloc(&file_priv->vm_idr, vm, 0, 0, GFP_KERNEL);
+	GEM_BUG_ON(!ret);
+	if (ret < 0)
+		goto err_unlock;
 
-		ppgtt->user_handle = ret;
-		i915_ppgtt_get(ppgtt);
-	}
+	i915_vm_get(vm);
 
 	args->size = 0;
-	args->value = ppgtt->user_handle;
+	args->value = ret;
 
 	ret = 0;
 err_unlock:
 	mutex_unlock(&file_priv->vm_idr_lock);
 err_put:
-	i915_ppgtt_put(ppgtt);
+	i915_vm_put(vm);
 	return ret;
 }
 
 static void set_ppgtt_barrier(void *data)
 {
-	struct i915_hw_ppgtt *old = data;
+	struct i915_address_space *old = data;
 
-	if (INTEL_GEN(old->vm.i915) < 8)
-		gen6_ppgtt_unpin_all(old);
+	if (INTEL_GEN(old->i915) < 8)
+		gen6_ppgtt_unpin_all(i915_vm_to_ppgtt(old));
 
-	i915_ppgtt_put(old);
+	i915_vm_put(old);
 }
 
 static int emit_ppgtt_update(struct i915_request *rq, void *data)
 {
-	struct i915_hw_ppgtt *ppgtt = rq->gem_context->ppgtt;
+	struct i915_address_space *vm = rq->gem_context->vm;
 	struct intel_engine_cs *engine = rq->engine;
 	u32 base = engine->mmio_base;
 	u32 *cs;
 	int i;
 
-	if (i915_vm_is_4lvl(&ppgtt->vm)) {
-		const dma_addr_t pd_daddr = px_dma(&ppgtt->pml4);
+	if (i915_vm_is_4lvl(vm)) {
+		struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
+		const dma_addr_t pd_daddr = px_dma(ppgtt->pd);
 
 		cs = intel_ring_begin(rq, 6);
 		if (IS_ERR(cs))
@@ -1049,6 +1043,8 @@ static int emit_ppgtt_update(struct i915_request *rq, void *data)
 		*cs++ = MI_NOOP;
 		intel_ring_advance(rq, cs);
 	} else if (HAS_LOGICAL_RING_CONTEXTS(engine->i915)) {
+		struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
+
 		cs = intel_ring_begin(rq, 4 * GEN8_3LVL_PDPES + 2);
 		if (IS_ERR(cs))
 			return PTR_ERR(cs);
@@ -1066,25 +1062,31 @@ static int emit_ppgtt_update(struct i915_request *rq, void *data)
 		intel_ring_advance(rq, cs);
 	} else {
 		/* ppGTT is not part of the legacy context image */
-		gen6_ppgtt_pin(ppgtt);
+		gen6_ppgtt_pin(i915_vm_to_ppgtt(vm));
 	}
 
 	return 0;
 }
 
+static bool skip_ppgtt_update(struct intel_context *ce, void *data)
+{
+	if (HAS_LOGICAL_RING_CONTEXTS(ce->engine->i915))
+		return !ce->state;
+	else
+		return !atomic_read(&ce->pin_count);
+}
+
 static int set_ppgtt(struct drm_i915_file_private *file_priv,
 		     struct i915_gem_context *ctx,
 		     struct drm_i915_gem_context_param *args)
 {
-	struct i915_hw_ppgtt *ppgtt, *old;
+	struct i915_address_space *vm, *old;
 	int err;
 
-	return -EINVAL; /* nothing to see here; please move along */
-
 	if (args->size)
 		return -EINVAL;
 
-	if (!ctx->ppgtt)
+	if (!ctx->vm)
 		return -ENODEV;
 
 	if (upper_32_bits(args->value))
@@ -1094,26 +1096,26 @@ static int set_ppgtt(struct drm_i915_file_private *file_priv,
 	if (err)
 		return err;
 
-	ppgtt = idr_find(&file_priv->vm_idr, args->value);
-	if (ppgtt) {
-		GEM_BUG_ON(ppgtt->user_handle != args->value);
-		i915_ppgtt_get(ppgtt);
-	}
+	vm = idr_find(&file_priv->vm_idr, args->value);
+	if (vm)
+		i915_vm_get(vm);
 	mutex_unlock(&file_priv->vm_idr_lock);
-	if (!ppgtt)
+	if (!vm)
 		return -ENOENT;
 
 	err = mutex_lock_interruptible(&ctx->i915->drm.struct_mutex);
 	if (err)
 		goto out;
 
-	if (ppgtt == ctx->ppgtt)
+	if (vm == ctx->vm)
 		goto unlock;
 
 	/* Teardown the existing obj:vma cache, it will have to be rebuilt. */
+	mutex_lock(&ctx->mutex);
 	lut_close(ctx);
+	mutex_unlock(&ctx->mutex);
 
-	old = __set_ppgtt(ctx, ppgtt);
+	old = __set_ppgtt(ctx, vm);
 
 	/*
 	 * We need to flush any requests using the current ppgtt before
@@ -1121,20 +1123,21 @@ static int set_ppgtt(struct drm_i915_file_private *file_priv,
 	 * only indirectly through the context.
 	 */
 	err = context_barrier_task(ctx, ALL_ENGINES,
+				   skip_ppgtt_update,
 				   emit_ppgtt_update,
 				   set_ppgtt_barrier,
 				   old);
 	if (err) {
-		ctx->ppgtt = old;
+		ctx->vm = old;
 		ctx->desc_template = default_desc_template(ctx->i915, old);
-		i915_ppgtt_put(ppgtt);
+		i915_vm_put(vm);
 	}
 
 unlock:
 	mutex_unlock(&ctx->i915->drm.struct_mutex);
 
 out:
-	i915_ppgtt_put(ppgtt);
+	i915_vm_put(vm);
 	return err;
 }
 
@@ -1156,7 +1159,7 @@ static int gen8_emit_rpcs_config(struct i915_request *rq,
 	*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
 	*cs++ = lower_32_bits(offset);
 	*cs++ = upper_32_bits(offset);
-	*cs++ = gen8_make_rpcs(rq->i915, &sseu);
+	*cs++ = intel_sseu_make_rpcs(rq->i915, &sseu);
 
 	intel_ring_advance(rq, cs);
 
@@ -1166,9 +1169,7 @@ static int gen8_emit_rpcs_config(struct i915_request *rq,
 static int
 gen8_modify_rpcs(struct intel_context *ce, struct intel_sseu sseu)
 {
-	struct drm_i915_private *i915 = ce->engine->i915;
 	struct i915_request *rq;
-	intel_wakeref_t wakeref;
 	int ret;
 
 	lockdep_assert_held(&ce->pin_mutex);
@@ -1182,24 +1183,15 @@ gen8_modify_rpcs(struct intel_context *ce, struct intel_sseu sseu)
 	if (!intel_context_is_pinned(ce))
 		return 0;
 
-	/* Submitting requests etc needs the hw awake. */
-	wakeref = intel_runtime_pm_get(i915);
-
-	rq = i915_request_alloc(ce->engine, i915->kernel_context);
-	if (IS_ERR(rq)) {
-		ret = PTR_ERR(rq);
-		goto out_put;
-	}
+	rq = i915_request_create(ce->engine->kernel_context);
+	if (IS_ERR(rq))
+		return PTR_ERR(rq);
 
 	/* Queue this switch after all other activity by this context. */
 	ret = i915_active_request_set(&ce->ring->timeline->last_request, rq);
 	if (ret)
 		goto out_add;
 
-	ret = gen8_emit_rpcs_config(rq, ce, sseu);
-	if (ret)
-		goto out_add;
-
 	/*
 	 * Guarantee context image and the timeline remains pinned until the
 	 * modifying request is retired by setting the ce activity tracker.
@@ -1207,32 +1199,29 @@ gen8_modify_rpcs(struct intel_context *ce, struct intel_sseu sseu)
 	 * But we only need to take one pin on the account of it. Or in other
 	 * words transfer the pinned ce object to tracked active request.
 	 */
-	if (!i915_active_request_isset(&ce->active_tracker))
-		__intel_context_pin(ce);
-	__i915_active_request_set(&ce->active_tracker, rq);
+	GEM_BUG_ON(i915_active_is_idle(&ce->active));
+	ret = i915_active_ref(&ce->active, rq->fence.context, rq);
+	if (ret)
+		goto out_add;
+
+	ret = gen8_emit_rpcs_config(rq, ce, sseu);
 
 out_add:
 	i915_request_add(rq);
-out_put:
-	intel_runtime_pm_put(i915, wakeref);
-
 	return ret;
 }
 
 static int
-__i915_gem_context_reconfigure_sseu(struct i915_gem_context *ctx,
-				    struct intel_engine_cs *engine,
-				    struct intel_sseu sseu)
+__intel_context_reconfigure_sseu(struct intel_context *ce,
+				 struct intel_sseu sseu)
 {
-	struct intel_context *ce;
-	int ret = 0;
+	int ret;
 
-	GEM_BUG_ON(INTEL_GEN(ctx->i915) < 8);
-	GEM_BUG_ON(engine->id != RCS0);
+	GEM_BUG_ON(INTEL_GEN(ce->gem_context->i915) < 8);
 
-	ce = intel_context_pin_lock(ctx, engine);
-	if (IS_ERR(ce))
-		return PTR_ERR(ce);
+	ret = intel_context_lock_pinned(ce);
+	if (ret)
+		return ret;
 
 	/* Nothing to do if unmodified. */
 	if (!memcmp(&ce->sseu, &sseu, sizeof(sseu)))
@@ -1243,24 +1232,23 @@ __i915_gem_context_reconfigure_sseu(struct i915_gem_context *ctx,
 		ce->sseu = sseu;
 
 unlock:
-	intel_context_pin_unlock(ce);
+	intel_context_unlock_pinned(ce);
 	return ret;
 }
 
 static int
-i915_gem_context_reconfigure_sseu(struct i915_gem_context *ctx,
-				  struct intel_engine_cs *engine,
-				  struct intel_sseu sseu)
+intel_context_reconfigure_sseu(struct intel_context *ce, struct intel_sseu sseu)
 {
+	struct drm_i915_private *i915 = ce->gem_context->i915;
 	int ret;
 
-	ret = mutex_lock_interruptible(&ctx->i915->drm.struct_mutex);
+	ret = mutex_lock_interruptible(&i915->drm.struct_mutex);
 	if (ret)
 		return ret;
 
-	ret = __i915_gem_context_reconfigure_sseu(ctx, engine, sseu);
+	ret = __intel_context_reconfigure_sseu(ce, sseu);
 
-	mutex_unlock(&ctx->i915->drm.struct_mutex);
+	mutex_unlock(&i915->drm.struct_mutex);
 
 	return ret;
 }
@@ -1368,8 +1356,9 @@ static int set_sseu(struct i915_gem_context *ctx,
 {
 	struct drm_i915_private *i915 = ctx->i915;
 	struct drm_i915_gem_context_param_sseu user_sseu;
-	struct intel_engine_cs *engine;
+	struct intel_context *ce;
 	struct intel_sseu sseu;
+	unsigned long lookup;
 	int ret;
 
 	if (args->size < sizeof(user_sseu))
@@ -1382,32 +1371,427 @@ static int set_sseu(struct i915_gem_context *ctx,
 			   sizeof(user_sseu)))
 		return -EFAULT;
 
-	if (user_sseu.flags || user_sseu.rsvd)
+	if (user_sseu.rsvd)
 		return -EINVAL;
 
-	engine = intel_engine_lookup_user(i915,
-					  user_sseu.engine.engine_class,
-					  user_sseu.engine.engine_instance);
-	if (!engine)
+	if (user_sseu.flags & ~(I915_CONTEXT_SSEU_FLAG_ENGINE_INDEX))
 		return -EINVAL;
 
+	lookup = 0;
+	if (user_sseu.flags & I915_CONTEXT_SSEU_FLAG_ENGINE_INDEX)
+		lookup |= LOOKUP_USER_INDEX;
+
+	ce = lookup_user_engine(ctx, lookup, &user_sseu.engine);
+	if (IS_ERR(ce))
+		return PTR_ERR(ce);
+
 	/* Only render engine supports RPCS configuration. */
-	if (engine->class != RENDER_CLASS)
-		return -ENODEV;
+	if (ce->engine->class != RENDER_CLASS) {
+		ret = -ENODEV;
+		goto out_ce;
+	}
 
 	ret = user_to_context_sseu(i915, &user_sseu, &sseu);
 	if (ret)
-		return ret;
+		goto out_ce;
 
-	ret = i915_gem_context_reconfigure_sseu(ctx, engine, sseu);
+	ret = intel_context_reconfigure_sseu(ce, sseu);
 	if (ret)
-		return ret;
+		goto out_ce;
 
 	args->size = sizeof(user_sseu);
 
+out_ce:
+	intel_context_put(ce);
+	return ret;
+}
+
+struct set_engines {
+	struct i915_gem_context *ctx;
+	struct i915_gem_engines *engines;
+};
+
+static int
+set_engines__load_balance(struct i915_user_extension __user *base, void *data)
+{
+	struct i915_context_engines_load_balance __user *ext =
+		container_of_user(base, typeof(*ext), base);
+	const struct set_engines *set = data;
+	struct intel_engine_cs *stack[16];
+	struct intel_engine_cs **siblings;
+	struct intel_context *ce;
+	u16 num_siblings, idx;
+	unsigned int n;
+	int err;
+
+	if (!HAS_EXECLISTS(set->ctx->i915))
+		return -ENODEV;
+
+	if (USES_GUC_SUBMISSION(set->ctx->i915))
+		return -ENODEV; /* not implement yet */
+
+	if (get_user(idx, &ext->engine_index))
+		return -EFAULT;
+
+	if (idx >= set->engines->num_engines) {
+		DRM_DEBUG("Invalid placement value, %d >= %d\n",
+			  idx, set->engines->num_engines);
+		return -EINVAL;
+	}
+
+	idx = array_index_nospec(idx, set->engines->num_engines);
+	if (set->engines->engines[idx]) {
+		DRM_DEBUG("Invalid placement[%d], already occupied\n", idx);
+		return -EEXIST;
+	}
+
+	if (get_user(num_siblings, &ext->num_siblings))
+		return -EFAULT;
+
+	err = check_user_mbz(&ext->flags);
+	if (err)
+		return err;
+
+	err = check_user_mbz(&ext->mbz64);
+	if (err)
+		return err;
+
+	siblings = stack;
+	if (num_siblings > ARRAY_SIZE(stack)) {
+		siblings = kmalloc_array(num_siblings,
+					 sizeof(*siblings),
+					 GFP_KERNEL);
+		if (!siblings)
+			return -ENOMEM;
+	}
+
+	for (n = 0; n < num_siblings; n++) {
+		struct i915_engine_class_instance ci;
+
+		if (copy_from_user(&ci, &ext->engines[n], sizeof(ci))) {
+			err = -EFAULT;
+			goto out_siblings;
+		}
+
+		siblings[n] = intel_engine_lookup_user(set->ctx->i915,
+						       ci.engine_class,
+						       ci.engine_instance);
+		if (!siblings[n]) {
+			DRM_DEBUG("Invalid sibling[%d]: { class:%d, inst:%d }\n",
+				  n, ci.engine_class, ci.engine_instance);
+			err = -EINVAL;
+			goto out_siblings;
+		}
+	}
+
+	ce = intel_execlists_create_virtual(set->ctx, siblings, n);
+	if (IS_ERR(ce)) {
+		err = PTR_ERR(ce);
+		goto out_siblings;
+	}
+
+	if (cmpxchg(&set->engines->engines[idx], NULL, ce)) {
+		intel_context_put(ce);
+		err = -EEXIST;
+		goto out_siblings;
+	}
+
+out_siblings:
+	if (siblings != stack)
+		kfree(siblings);
+
+	return err;
+}
+
+static int
+set_engines__bond(struct i915_user_extension __user *base, void *data)
+{
+	struct i915_context_engines_bond __user *ext =
+		container_of_user(base, typeof(*ext), base);
+	const struct set_engines *set = data;
+	struct i915_engine_class_instance ci;
+	struct intel_engine_cs *virtual;
+	struct intel_engine_cs *master;
+	u16 idx, num_bonds;
+	int err, n;
+
+	if (get_user(idx, &ext->virtual_index))
+		return -EFAULT;
+
+	if (idx >= set->engines->num_engines) {
+		DRM_DEBUG("Invalid index for virtual engine: %d >= %d\n",
+			  idx, set->engines->num_engines);
+		return -EINVAL;
+	}
+
+	idx = array_index_nospec(idx, set->engines->num_engines);
+	if (!set->engines->engines[idx]) {
+		DRM_DEBUG("Invalid engine at %d\n", idx);
+		return -EINVAL;
+	}
+	virtual = set->engines->engines[idx]->engine;
+
+	err = check_user_mbz(&ext->flags);
+	if (err)
+		return err;
+
+	for (n = 0; n < ARRAY_SIZE(ext->mbz64); n++) {
+		err = check_user_mbz(&ext->mbz64[n]);
+		if (err)
+			return err;
+	}
+
+	if (copy_from_user(&ci, &ext->master, sizeof(ci)))
+		return -EFAULT;
+
+	master = intel_engine_lookup_user(set->ctx->i915,
+					  ci.engine_class, ci.engine_instance);
+	if (!master) {
+		DRM_DEBUG("Unrecognised master engine: { class:%u, instance:%u }\n",
+			  ci.engine_class, ci.engine_instance);
+		return -EINVAL;
+	}
+
+	if (get_user(num_bonds, &ext->num_bonds))
+		return -EFAULT;
+
+	for (n = 0; n < num_bonds; n++) {
+		struct intel_engine_cs *bond;
+
+		if (copy_from_user(&ci, &ext->engines[n], sizeof(ci)))
+			return -EFAULT;
+
+		bond = intel_engine_lookup_user(set->ctx->i915,
+						ci.engine_class,
+						ci.engine_instance);
+		if (!bond) {
+			DRM_DEBUG("Unrecognised engine[%d] for bonding: { class:%d, instance: %d }\n",
+				  n, ci.engine_class, ci.engine_instance);
+			return -EINVAL;
+		}
+
+		/*
+		 * A non-virtual engine has no siblings to choose between; and
+		 * a submit fence will always be directed to the one engine.
+		 */
+		if (intel_engine_is_virtual(virtual)) {
+			err = intel_virtual_engine_attach_bond(virtual,
+							       master,
+							       bond);
+			if (err)
+				return err;
+		}
+	}
+
 	return 0;
 }
 
+static const i915_user_extension_fn set_engines__extensions[] = {
+	[I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE] = set_engines__load_balance,
+	[I915_CONTEXT_ENGINES_EXT_BOND] = set_engines__bond,
+};
+
+static int
+set_engines(struct i915_gem_context *ctx,
+	    const struct drm_i915_gem_context_param *args)
+{
+	struct i915_context_param_engines __user *user =
+		u64_to_user_ptr(args->value);
+	struct set_engines set = { .ctx = ctx };
+	unsigned int num_engines, n;
+	u64 extensions;
+	int err;
+
+	if (!args->size) { /* switch back to legacy user_ring_map */
+		if (!i915_gem_context_user_engines(ctx))
+			return 0;
+
+		set.engines = default_engines(ctx);
+		if (IS_ERR(set.engines))
+			return PTR_ERR(set.engines);
+
+		goto replace;
+	}
+
+	BUILD_BUG_ON(!IS_ALIGNED(sizeof(*user), sizeof(*user->engines)));
+	if (args->size < sizeof(*user) ||
+	    !IS_ALIGNED(args->size, sizeof(*user->engines))) {
+		DRM_DEBUG("Invalid size for engine array: %d\n",
+			  args->size);
+		return -EINVAL;
+	}
+
+	/*
+	 * Note that I915_EXEC_RING_MASK limits execbuf to only using the
+	 * first 64 engines defined here.
+	 */
+	num_engines = (args->size - sizeof(*user)) / sizeof(*user->engines);
+
+	set.engines = kmalloc(struct_size(set.engines, engines, num_engines),
+			      GFP_KERNEL);
+	if (!set.engines)
+		return -ENOMEM;
+
+	init_rcu_head(&set.engines->rcu);
+	for (n = 0; n < num_engines; n++) {
+		struct i915_engine_class_instance ci;
+		struct intel_engine_cs *engine;
+
+		if (copy_from_user(&ci, &user->engines[n], sizeof(ci))) {
+			__free_engines(set.engines, n);
+			return -EFAULT;
+		}
+
+		if (ci.engine_class == (u16)I915_ENGINE_CLASS_INVALID &&
+		    ci.engine_instance == (u16)I915_ENGINE_CLASS_INVALID_NONE) {
+			set.engines->engines[n] = NULL;
+			continue;
+		}
+
+		engine = intel_engine_lookup_user(ctx->i915,
+						  ci.engine_class,
+						  ci.engine_instance);
+		if (!engine) {
+			DRM_DEBUG("Invalid engine[%d]: { class:%d, instance:%d }\n",
+				  n, ci.engine_class, ci.engine_instance);
+			__free_engines(set.engines, n);
+			return -ENOENT;
+		}
+
+		set.engines->engines[n] = intel_context_create(ctx, engine);
+		if (!set.engines->engines[n]) {
+			__free_engines(set.engines, n);
+			return -ENOMEM;
+		}
+	}
+	set.engines->num_engines = num_engines;
+
+	err = -EFAULT;
+	if (!get_user(extensions, &user->extensions))
+		err = i915_user_extensions(u64_to_user_ptr(extensions),
+					   set_engines__extensions,
+					   ARRAY_SIZE(set_engines__extensions),
+					   &set);
+	if (err) {
+		free_engines(set.engines);
+		return err;
+	}
+
+replace:
+	mutex_lock(&ctx->engines_mutex);
+	if (args->size)
+		i915_gem_context_set_user_engines(ctx);
+	else
+		i915_gem_context_clear_user_engines(ctx);
+	rcu_swap_protected(ctx->engines, set.engines, 1);
+	mutex_unlock(&ctx->engines_mutex);
+
+	call_rcu(&set.engines->rcu, free_engines_rcu);
+
+	return 0;
+}
+
+static struct i915_gem_engines *
+__copy_engines(struct i915_gem_engines *e)
+{
+	struct i915_gem_engines *copy;
+	unsigned int n;
+
+	copy = kmalloc(struct_size(e, engines, e->num_engines), GFP_KERNEL);
+	if (!copy)
+		return ERR_PTR(-ENOMEM);
+
+	init_rcu_head(&copy->rcu);
+	for (n = 0; n < e->num_engines; n++) {
+		if (e->engines[n])
+			copy->engines[n] = intel_context_get(e->engines[n]);
+		else
+			copy->engines[n] = NULL;
+	}
+	copy->num_engines = n;
+
+	return copy;
+}
+
+static int
+get_engines(struct i915_gem_context *ctx,
+	    struct drm_i915_gem_context_param *args)
+{
+	struct i915_context_param_engines __user *user;
+	struct i915_gem_engines *e;
+	size_t n, count, size;
+	int err = 0;
+
+	err = mutex_lock_interruptible(&ctx->engines_mutex);
+	if (err)
+		return err;
+
+	e = NULL;
+	if (i915_gem_context_user_engines(ctx))
+		e = __copy_engines(i915_gem_context_engines(ctx));
+	mutex_unlock(&ctx->engines_mutex);
+	if (IS_ERR_OR_NULL(e)) {
+		args->size = 0;
+		return PTR_ERR_OR_ZERO(e);
+	}
+
+	count = e->num_engines;
+
+	/* Be paranoid in case we have an impedance mismatch */
+	if (!check_struct_size(user, engines, count, &size)) {
+		err = -EINVAL;
+		goto err_free;
+	}
+	if (overflows_type(size, args->size)) {
+		err = -EINVAL;
+		goto err_free;
+	}
+
+	if (!args->size) {
+		args->size = size;
+		goto err_free;
+	}
+
+	if (args->size < size) {
+		err = -EINVAL;
+		goto err_free;
+	}
+
+	user = u64_to_user_ptr(args->value);
+	if (!access_ok(user, size)) {
+		err = -EFAULT;
+		goto err_free;
+	}
+
+	if (put_user(0, &user->extensions)) {
+		err = -EFAULT;
+		goto err_free;
+	}
+
+	for (n = 0; n < count; n++) {
+		struct i915_engine_class_instance ci = {
+			.engine_class = I915_ENGINE_CLASS_INVALID,
+			.engine_instance = I915_ENGINE_CLASS_INVALID_NONE,
+		};
+
+		if (e->engines[n]) {
+			ci.engine_class = e->engines[n]->engine->uabi_class;
+			ci.engine_instance = e->engines[n]->engine->instance;
+		}
+
+		if (copy_to_user(&user->engines[n], &ci, sizeof(ci))) {
+			err = -EFAULT;
+			goto err_free;
+		}
+	}
+
+	args->size = size;
+
+err_free:
+	free_engines(e);
+	return err;
+}
+
 static int ctx_setparam(struct drm_i915_file_private *fpriv,
 			struct i915_gem_context *ctx,
 			struct drm_i915_gem_context_param *args)
@@ -1481,6 +1865,10 @@ static int ctx_setparam(struct drm_i915_file_private *fpriv,
 		ret = set_ppgtt(fpriv, ctx, args);
 		break;
 
+	case I915_CONTEXT_PARAM_ENGINES:
+		ret = set_engines(ctx, args);
+		break;
+
 	case I915_CONTEXT_PARAM_BAN_PERIOD:
 	default:
 		ret = -EINVAL;
@@ -1509,8 +1897,229 @@ static int create_setparam(struct i915_user_extension __user *ext, void *data)
 	return ctx_setparam(arg->fpriv, arg->ctx, &local.param);
 }
 
+static int clone_engines(struct i915_gem_context *dst,
+			 struct i915_gem_context *src)
+{
+	struct i915_gem_engines *e = i915_gem_context_lock_engines(src);
+	struct i915_gem_engines *clone;
+	bool user_engines;
+	unsigned long n;
+
+	clone = kmalloc(struct_size(e, engines, e->num_engines), GFP_KERNEL);
+	if (!clone)
+		goto err_unlock;
+
+	init_rcu_head(&clone->rcu);
+	for (n = 0; n < e->num_engines; n++) {
+		struct intel_engine_cs *engine;
+
+		if (!e->engines[n]) {
+			clone->engines[n] = NULL;
+			continue;
+		}
+		engine = e->engines[n]->engine;
+
+		/*
+		 * Virtual engines are singletons; they can only exist
+		 * inside a single context, because they embed their
+		 * HW context... As each virtual context implies a single
+		 * timeline (each engine can only dequeue a single request
+		 * at any time), it would be surprising for two contexts
+		 * to use the same engine. So let's create a copy of
+		 * the virtual engine instead.
+		 */
+		if (intel_engine_is_virtual(engine))
+			clone->engines[n] =
+				intel_execlists_clone_virtual(dst, engine);
+		else
+			clone->engines[n] = intel_context_create(dst, engine);
+		if (IS_ERR_OR_NULL(clone->engines[n])) {
+			__free_engines(clone, n);
+			goto err_unlock;
+		}
+	}
+	clone->num_engines = n;
+
+	user_engines = i915_gem_context_user_engines(src);
+	i915_gem_context_unlock_engines(src);
+
+	free_engines(dst->engines);
+	RCU_INIT_POINTER(dst->engines, clone);
+	if (user_engines)
+		i915_gem_context_set_user_engines(dst);
+	else
+		i915_gem_context_clear_user_engines(dst);
+	return 0;
+
+err_unlock:
+	i915_gem_context_unlock_engines(src);
+	return -ENOMEM;
+}
+
+static int clone_flags(struct i915_gem_context *dst,
+		       struct i915_gem_context *src)
+{
+	dst->user_flags = src->user_flags;
+	return 0;
+}
+
+static int clone_schedattr(struct i915_gem_context *dst,
+			   struct i915_gem_context *src)
+{
+	dst->sched = src->sched;
+	return 0;
+}
+
+static int clone_sseu(struct i915_gem_context *dst,
+		      struct i915_gem_context *src)
+{
+	struct i915_gem_engines *e = i915_gem_context_lock_engines(src);
+	struct i915_gem_engines *clone;
+	unsigned long n;
+	int err;
+
+	clone = dst->engines; /* no locking required; sole access */
+	if (e->num_engines != clone->num_engines) {
+		err = -EINVAL;
+		goto unlock;
+	}
+
+	for (n = 0; n < e->num_engines; n++) {
+		struct intel_context *ce = e->engines[n];
+
+		if (clone->engines[n]->engine->class != ce->engine->class) {
+			/* Must have compatible engine maps! */
+			err = -EINVAL;
+			goto unlock;
+		}
+
+		/* serialises with set_sseu */
+		err = intel_context_lock_pinned(ce);
+		if (err)
+			goto unlock;
+
+		clone->engines[n]->sseu = ce->sseu;
+		intel_context_unlock_pinned(ce);
+	}
+
+	err = 0;
+unlock:
+	i915_gem_context_unlock_engines(src);
+	return err;
+}
+
+static int clone_timeline(struct i915_gem_context *dst,
+			  struct i915_gem_context *src)
+{
+	if (src->timeline) {
+		GEM_BUG_ON(src->timeline == dst->timeline);
+
+		if (dst->timeline)
+			i915_timeline_put(dst->timeline);
+		dst->timeline = i915_timeline_get(src->timeline);
+	}
+
+	return 0;
+}
+
+static int clone_vm(struct i915_gem_context *dst,
+		    struct i915_gem_context *src)
+{
+	struct i915_address_space *vm;
+
+	rcu_read_lock();
+	do {
+		vm = READ_ONCE(src->vm);
+		if (!vm)
+			break;
+
+		if (!kref_get_unless_zero(&vm->ref))
+			continue;
+
+		/*
+		 * This ppgtt may have be reallocated between
+		 * the read and the kref, and reassigned to a third
+		 * context. In order to avoid inadvertent sharing
+		 * of this ppgtt with that third context (and not
+		 * src), we have to confirm that we have the same
+		 * ppgtt after passing through the strong memory
+		 * barrier implied by a successful
+		 * kref_get_unless_zero().
+		 *
+		 * Once we have acquired the current ppgtt of src,
+		 * we no longer care if it is released from src, as
+		 * it cannot be reallocated elsewhere.
+		 */
+
+		if (vm == READ_ONCE(src->vm))
+			break;
+
+		i915_vm_put(vm);
+	} while (1);
+	rcu_read_unlock();
+
+	if (vm) {
+		__assign_ppgtt(dst, vm);
+		i915_vm_put(vm);
+	}
+
+	return 0;
+}
+
+static int create_clone(struct i915_user_extension __user *ext, void *data)
+{
+	static int (* const fn[])(struct i915_gem_context *dst,
+				  struct i915_gem_context *src) = {
+#define MAP(x, y) [ilog2(I915_CONTEXT_CLONE_##x)] = y
+		MAP(ENGINES, clone_engines),
+		MAP(FLAGS, clone_flags),
+		MAP(SCHEDATTR, clone_schedattr),
+		MAP(SSEU, clone_sseu),
+		MAP(TIMELINE, clone_timeline),
+		MAP(VM, clone_vm),
+#undef MAP
+	};
+	struct drm_i915_gem_context_create_ext_clone local;
+	const struct create_ext *arg = data;
+	struct i915_gem_context *dst = arg->ctx;
+	struct i915_gem_context *src;
+	int err, bit;
+
+	if (copy_from_user(&local, ext, sizeof(local)))
+		return -EFAULT;
+
+	BUILD_BUG_ON(GENMASK(BITS_PER_TYPE(local.flags) - 1, ARRAY_SIZE(fn)) !=
+		     I915_CONTEXT_CLONE_UNKNOWN);
+
+	if (local.flags & I915_CONTEXT_CLONE_UNKNOWN)
+		return -EINVAL;
+
+	if (local.rsvd)
+		return -EINVAL;
+
+	rcu_read_lock();
+	src = __i915_gem_context_lookup_rcu(arg->fpriv, local.clone_id);
+	rcu_read_unlock();
+	if (!src)
+		return -ENOENT;
+
+	GEM_BUG_ON(src == dst);
+
+	for (bit = 0; bit < ARRAY_SIZE(fn); bit++) {
+		if (!(local.flags & BIT(bit)))
+			continue;
+
+		err = fn[bit](dst, src);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
 static const i915_user_extension_fn create_extensions[] = {
 	[I915_CONTEXT_CREATE_EXT_SETPARAM] = create_setparam,
+	[I915_CONTEXT_CREATE_EXT_CLONE] = create_clone,
 };
 
 static bool client_is_banned(struct drm_i915_file_private *file_priv)
@@ -1572,9 +2181,7 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
 	return 0;
 
 err_ctx:
-	mutex_lock(&dev->struct_mutex);
 	context_close(ext_data.ctx);
-	mutex_unlock(&dev->struct_mutex);
 	return ret;
 }
 
@@ -1599,10 +2206,7 @@ int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data,
 	if (!ctx)
 		return -ENOENT;
 
-	mutex_lock(&dev->struct_mutex);
 	context_close(ctx);
-	mutex_unlock(&dev->struct_mutex);
-
 	return 0;
 }
 
@@ -1610,8 +2214,9 @@ static int get_sseu(struct i915_gem_context *ctx,
 		    struct drm_i915_gem_context_param *args)
 {
 	struct drm_i915_gem_context_param_sseu user_sseu;
-	struct intel_engine_cs *engine;
 	struct intel_context *ce;
+	unsigned long lookup;
+	int err;
 
 	if (args->size == 0)
 		goto out;
@@ -1622,25 +2227,33 @@ static int get_sseu(struct i915_gem_context *ctx,
 			   sizeof(user_sseu)))
 		return -EFAULT;
 
-	if (user_sseu.flags || user_sseu.rsvd)
+	if (user_sseu.rsvd)
 		return -EINVAL;
 
-	engine = intel_engine_lookup_user(ctx->i915,
-					  user_sseu.engine.engine_class,
-					  user_sseu.engine.engine_instance);
-	if (!engine)
+	if (user_sseu.flags & ~(I915_CONTEXT_SSEU_FLAG_ENGINE_INDEX))
 		return -EINVAL;
 
-	ce = intel_context_pin_lock(ctx, engine); /* serialises with set_sseu */
+	lookup = 0;
+	if (user_sseu.flags & I915_CONTEXT_SSEU_FLAG_ENGINE_INDEX)
+		lookup |= LOOKUP_USER_INDEX;
+
+	ce = lookup_user_engine(ctx, lookup, &user_sseu.engine);
 	if (IS_ERR(ce))
 		return PTR_ERR(ce);
 
+	err = intel_context_lock_pinned(ce); /* serialises with set_sseu */
+	if (err) {
+		intel_context_put(ce);
+		return err;
+	}
+
 	user_sseu.slice_mask = ce->sseu.slice_mask;
 	user_sseu.subslice_mask = ce->sseu.subslice_mask;
 	user_sseu.min_eus_per_subslice = ce->sseu.min_eus_per_subslice;
 	user_sseu.max_eus_per_subslice = ce->sseu.max_eus_per_subslice;
 
-	intel_context_pin_unlock(ce);
+	intel_context_unlock_pinned(ce);
+	intel_context_put(ce);
 
 	if (copy_to_user(u64_to_user_ptr(args->value), &user_sseu,
 			 sizeof(user_sseu)))
@@ -1672,8 +2285,8 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
 
 	case I915_CONTEXT_PARAM_GTT_SIZE:
 		args->size = 0;
-		if (ctx->ppgtt)
-			args->value = ctx->ppgtt->vm.total;
+		if (ctx->vm)
+			args->value = ctx->vm->total;
 		else if (to_i915(dev)->mm.aliasing_ppgtt)
 			args->value = to_i915(dev)->mm.aliasing_ppgtt->vm.total;
 		else
@@ -1708,6 +2321,10 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
 		ret = get_ppgtt(file_priv, ctx, args);
 		break;
 
+	case I915_CONTEXT_PARAM_ENGINES:
+		ret = get_engines(ctx, args);
+		break;
+
 	case I915_CONTEXT_PARAM_BAN_PERIOD:
 	default:
 		ret = -EINVAL;
@@ -1801,6 +2418,23 @@ out_unlock:
 	return err;
 }
 
+/* GEM context-engines iterator: for_each_gem_engine() */
+struct intel_context *
+i915_gem_engines_iter_next(struct i915_gem_engines_iter *it)
+{
+	const struct i915_gem_engines *e = it->engines;
+	struct intel_context *ctx;
+
+	do {
+		if (it->idx >= e->num_engines)
+			return NULL;
+
+		ctx = e->engines[it->idx++];
+	} while (!ctx);
+
+	return ctx;
+}
+
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
 #include "selftests/mock_context.c"
 #include "selftests/i915_gem_context.c"
diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/gem/i915_gem_context.h
index 23dcb01bfd82..9691dd062f72 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.h
@@ -1,25 +1,7 @@
 /*
- * Copyright © 2016 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
+ * SPDX-License-Identifier: MIT
  *
+ * Copyright © 2016 Intel Corporation
  */
 
 #ifndef __I915_GEM_CONTEXT_H__
@@ -27,9 +9,10 @@
 
 #include "i915_gem_context_types.h"
 
+#include "gt/intel_context.h"
+
 #include "i915_gem.h"
 #include "i915_scheduler.h"
-#include "intel_context.h"
 #include "intel_device_info.h"
 
 struct drm_device;
@@ -111,6 +94,24 @@ static inline void i915_gem_context_set_force_single_submission(struct i915_gem_
 	__set_bit(CONTEXT_FORCE_SINGLE_SUBMISSION, &ctx->flags);
 }
 
+static inline bool
+i915_gem_context_user_engines(const struct i915_gem_context *ctx)
+{
+	return test_bit(CONTEXT_USER_ENGINES, &ctx->flags);
+}
+
+static inline void
+i915_gem_context_set_user_engines(struct i915_gem_context *ctx)
+{
+	set_bit(CONTEXT_USER_ENGINES, &ctx->flags);
+}
+
+static inline void
+i915_gem_context_clear_user_engines(struct i915_gem_context *ctx)
+{
+	clear_bit(CONTEXT_USER_ENGINES, &ctx->flags);
+}
+
 int __i915_gem_context_pin_hw_id(struct i915_gem_context *ctx);
 static inline int i915_gem_context_pin_hw_id(struct i915_gem_context *ctx)
 {
@@ -133,17 +134,12 @@ static inline bool i915_gem_context_is_kernel(struct i915_gem_context *ctx)
 
 /* i915_gem_context.c */
 int __must_check i915_gem_contexts_init(struct drm_i915_private *dev_priv);
-void i915_gem_contexts_lost(struct drm_i915_private *dev_priv);
 void i915_gem_contexts_fini(struct drm_i915_private *dev_priv);
 
 int i915_gem_context_open(struct drm_i915_private *i915,
 			  struct drm_file *file);
 void i915_gem_context_close(struct drm_file *file);
 
-int i915_switch_context(struct i915_request *rq);
-int i915_gem_switch_to_kernel_context(struct drm_i915_private *i915,
-				      intel_engine_mask_t engine_mask);
-
 void i915_gem_context_release(struct kref *ctx_ref);
 struct i915_gem_context *
 i915_gem_context_create_gvt(struct drm_device *dev);
@@ -179,6 +175,64 @@ static inline void i915_gem_context_put(struct i915_gem_context *ctx)
 	kref_put(&ctx->ref, i915_gem_context_release);
 }
 
+static inline struct i915_gem_engines *
+i915_gem_context_engines(struct i915_gem_context *ctx)
+{
+	return rcu_dereference_protected(ctx->engines,
+					 lockdep_is_held(&ctx->engines_mutex));
+}
+
+static inline struct i915_gem_engines *
+i915_gem_context_lock_engines(struct i915_gem_context *ctx)
+	__acquires(&ctx->engines_mutex)
+{
+	mutex_lock(&ctx->engines_mutex);
+	return i915_gem_context_engines(ctx);
+}
+
+static inline void
+i915_gem_context_unlock_engines(struct i915_gem_context *ctx)
+	__releases(&ctx->engines_mutex)
+{
+	mutex_unlock(&ctx->engines_mutex);
+}
+
+static inline struct intel_context *
+i915_gem_context_lookup_engine(struct i915_gem_context *ctx, unsigned int idx)
+{
+	return i915_gem_context_engines(ctx)->engines[idx];
+}
+
+static inline struct intel_context *
+i915_gem_context_get_engine(struct i915_gem_context *ctx, unsigned int idx)
+{
+	struct intel_context *ce = ERR_PTR(-EINVAL);
+
+	rcu_read_lock(); {
+		struct i915_gem_engines *e = rcu_dereference(ctx->engines);
+		if (likely(idx < e->num_engines && e->engines[idx]))
+			ce = intel_context_get(e->engines[idx]);
+	} rcu_read_unlock();
+
+	return ce;
+}
+
+static inline void
+i915_gem_engines_iter_init(struct i915_gem_engines_iter *it,
+			   struct i915_gem_engines *engines)
+{
+	GEM_BUG_ON(!engines);
+	it->engines = engines;
+	it->idx = 0;
+}
+
+struct intel_context *
+i915_gem_engines_iter_next(struct i915_gem_engines_iter *it);
+
+#define for_each_gem_engine(ce, engines, it) \
+	for (i915_gem_engines_iter_init(&(it), (engines)); \
+	     ((ce) = i915_gem_engines_iter_next(&(it)));)
+
 struct i915_lut_handle *i915_lut_handle_alloc(void);
 void i915_lut_handle_free(struct i915_lut_handle *lut);
 
diff --git a/drivers/gpu/drm/i915/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
index e2ec58b10fb2..cc513410eeef 100644
--- a/drivers/gpu/drm/i915/i915_gem_context_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
@@ -17,17 +17,29 @@
 #include <linux/rcupdate.h>
 #include <linux/types.h>
 
+#include "gt/intel_context_types.h"
+
 #include "i915_scheduler.h"
-#include "intel_context_types.h"
 
 struct pid;
 
 struct drm_i915_private;
 struct drm_i915_file_private;
-struct i915_hw_ppgtt;
+struct i915_address_space;
 struct i915_timeline;
 struct intel_ring;
 
+struct i915_gem_engines {
+	struct rcu_head rcu;
+	unsigned int num_engines;
+	struct intel_context *engines[];
+};
+
+struct i915_gem_engines_iter {
+	unsigned int idx;
+	const struct i915_gem_engines *engines;
+};
+
 /**
  * struct i915_gem_context - client state
  *
@@ -41,10 +53,34 @@ struct i915_gem_context {
 	/** file_priv: owning file descriptor */
 	struct drm_i915_file_private *file_priv;
 
+	/**
+	 * @engines: User defined engines for this context
+	 *
+	 * Various uAPI offer the ability to lookup up an
+	 * index from this array to select an engine operate on.
+	 *
+	 * Multiple logically distinct instances of the same engine
+	 * may be defined in the array, as well as composite virtual
+	 * engines.
+	 *
+	 * Execbuf uses the I915_EXEC_RING_MASK as an index into this
+	 * array to select which HW context + engine to execute on. For
+	 * the default array, the user_ring_map[] is used to translate
+	 * the legacy uABI onto the approprate index (e.g. both
+	 * I915_EXEC_DEFAULT and I915_EXEC_RENDER select the same
+	 * context, and I915_EXEC_BSD is weird). For a use defined
+	 * array, execbuf uses I915_EXEC_RING_MASK as a plain index.
+	 *
+	 * User defined by I915_CONTEXT_PARAM_ENGINE (when the
+	 * CONTEXT_USER_ENGINES flag is set).
+	 */
+	struct i915_gem_engines __rcu *engines;
+	struct mutex engines_mutex; /* guards writes to engines */
+
 	struct i915_timeline *timeline;
 
 	/**
-	 * @ppgtt: unique address space (GTT)
+	 * @vm: unique address space (GTT)
 	 *
 	 * In full-ppgtt mode, each context has its own address space ensuring
 	 * complete seperation of one client from all others.
@@ -52,7 +88,7 @@ struct i915_gem_context {
 	 * In other modes, this is a NULL pointer with the expectation that
 	 * the caller uses the shared global GTT.
 	 */
-	struct i915_hw_ppgtt *ppgtt;
+	struct i915_address_space *vm;
 
 	/**
 	 * @pid: process id of creator
@@ -109,6 +145,7 @@ struct i915_gem_context {
 #define CONTEXT_BANNED			0
 #define CONTEXT_CLOSED			1
 #define CONTEXT_FORCE_SINGLE_SUBMISSION	2
+#define CONTEXT_USER_ENGINES		3
 
 	/**
 	 * @hw_id: - unique identifier for the context
@@ -128,15 +165,10 @@ struct i915_gem_context {
 	atomic_t hw_id_pin_count;
 	struct list_head hw_id_link;
 
-	struct list_head active_engines;
 	struct mutex mutex;
 
 	struct i915_sched_attr sched;
 
-	/** hw_contexts: per-engine logical HW state */
-	struct rb_root hw_contexts;
-	spinlock_t hw_contexts_lock;
-
 	/** ring_size: size for allocating the per-engine ring buffer */
 	u32 ring_size;
 	/** desc_template: invariant fields for the HW context descriptor */
@@ -159,17 +191,12 @@ struct i915_gem_context {
 	/** remap_slice: Bitmask of cache lines that need remapping */
 	u8 remap_slice;
 
-	/** handles_vma: rbtree to look up our context specific obj/vma for
+	/**
+	 * handles_vma: rbtree to look up our context specific obj/vma for
 	 * the user handle. (user handles are per fd, but the binding is
 	 * per vm, which may be one per context or shared with the global GTT)
 	 */
 	struct radix_tree_root handles_vma;
-
-	/** handles_list: reverse list of all the rbtree entries in use for
-	 * this context, which allows us to free all the allocations on
-	 * context close.
-	 */
-	struct list_head handles_list;
 };
 
 #endif /* __I915_GEM_CONTEXT_TYPES_H__ */
diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
index 5a101a9462d8..cbf1701d3acc 100644
--- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
@@ -1,34 +1,16 @@
 /*
- * Copyright 2012 Red Hat Inc
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
+ * SPDX-License-Identifier: MIT
  *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- *
- * Authors:
- *	Dave Airlie <airlied@redhat.com>
+ * Copyright 2012 Red Hat Inc
  */
 
 #include <linux/dma-buf.h>
+#include <linux/highmem.h>
 #include <linux/reservation.h>
 
-
 #include "i915_drv.h"
+#include "i915_gem_object.h"
+#include "i915_scatterlist.h"
 
 static struct drm_i915_gem_object *dma_buf_to_obj(struct dma_buf *buf)
 {
@@ -169,7 +151,6 @@ static int i915_gem_dmabuf_mmap(struct dma_buf *dma_buf, struct vm_area_struct *
 static int i915_gem_begin_cpu_access(struct dma_buf *dma_buf, enum dma_data_direction direction)
 {
 	struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf);
-	struct drm_device *dev = obj->base.dev;
 	bool write = (direction == DMA_BIDIRECTIONAL || direction == DMA_TO_DEVICE);
 	int err;
 
@@ -177,12 +158,12 @@ static int i915_gem_begin_cpu_access(struct dma_buf *dma_buf, enum dma_data_dire
 	if (err)
 		return err;
 
-	err = i915_mutex_lock_interruptible(dev);
+	err = i915_gem_object_lock_interruptible(obj);
 	if (err)
 		goto out;
 
 	err = i915_gem_object_set_to_cpu_domain(obj, write);
-	mutex_unlock(&dev->struct_mutex);
+	i915_gem_object_unlock(obj);
 
 out:
 	i915_gem_object_unpin_pages(obj);
@@ -192,19 +173,18 @@ out:
 static int i915_gem_end_cpu_access(struct dma_buf *dma_buf, enum dma_data_direction direction)
 {
 	struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf);
-	struct drm_device *dev = obj->base.dev;
 	int err;
 
 	err = i915_gem_object_pin_pages(obj);
 	if (err)
 		return err;
 
-	err = i915_mutex_lock_interruptible(dev);
+	err = i915_gem_object_lock_interruptible(obj);
 	if (err)
 		goto out;
 
 	err = i915_gem_object_set_to_gtt_domain(obj, false);
-	mutex_unlock(&dev->struct_mutex);
+	i915_gem_object_unlock(obj);
 
 out:
 	i915_gem_object_unpin_pages(obj);
@@ -234,7 +214,7 @@ struct dma_buf *i915_gem_prime_export(struct drm_device *dev,
 	exp_info.size = gem_obj->size;
 	exp_info.flags = flags;
 	exp_info.priv = gem_obj;
-	exp_info.resv = obj->resv;
+	exp_info.resv = obj->base.resv;
 
 	if (obj->ops->dmabuf_export) {
 		int ret = obj->ops->dmabuf_export(obj);
@@ -310,7 +290,7 @@ struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev,
 	drm_gem_private_object_init(dev, &obj->base, dma_buf->size);
 	i915_gem_object_init(obj, &i915_gem_object_dmabuf_ops);
 	obj->base.import_attach = attach;
-	obj->resv = dma_buf->resv;
+	obj->base.resv = dma_buf->resv;
 
 	/* We use GTT as shorthand for a coherent domain, one that is
 	 * neither in the GPU cache nor in the CPU cache, where all
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
new file mode 100644
index 000000000000..2e3ce2a69653
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
@@ -0,0 +1,796 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2014-2016 Intel Corporation
+ */
+
+#include "display/intel_frontbuffer.h"
+
+#include "i915_drv.h"
+#include "i915_gem_clflush.h"
+#include "i915_gem_gtt.h"
+#include "i915_gem_ioctls.h"
+#include "i915_gem_object.h"
+#include "i915_vma.h"
+
+static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj)
+{
+	/*
+	 * We manually flush the CPU domain so that we can override and
+	 * force the flush for the display, and perform it asyncrhonously.
+	 */
+	i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
+	if (obj->cache_dirty)
+		i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE);
+	obj->write_domain = 0;
+}
+
+void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj)
+{
+	if (!READ_ONCE(obj->pin_global))
+		return;
+
+	i915_gem_object_lock(obj);
+	__i915_gem_object_flush_for_display(obj);
+	i915_gem_object_unlock(obj);
+}
+
+/**
+ * Moves a single object to the WC read, and possibly write domain.
+ * @obj: object to act on
+ * @write: ask for write access or read only
+ *
+ * This function returns when the move is complete, including waiting on
+ * flushes to occur.
+ */
+int
+i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write)
+{
+	int ret;
+
+	assert_object_held(obj);
+
+	ret = i915_gem_object_wait(obj,
+				   I915_WAIT_INTERRUPTIBLE |
+				   (write ? I915_WAIT_ALL : 0),
+				   MAX_SCHEDULE_TIMEOUT);
+	if (ret)
+		return ret;
+
+	if (obj->write_domain == I915_GEM_DOMAIN_WC)
+		return 0;
+
+	/* Flush and acquire obj->pages so that we are coherent through
+	 * direct access in memory with previous cached writes through
+	 * shmemfs and that our cache domain tracking remains valid.
+	 * For example, if the obj->filp was moved to swap without us
+	 * being notified and releasing the pages, we would mistakenly
+	 * continue to assume that the obj remained out of the CPU cached
+	 * domain.
+	 */
+	ret = i915_gem_object_pin_pages(obj);
+	if (ret)
+		return ret;
+
+	i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_WC);
+
+	/* Serialise direct access to this object with the barriers for
+	 * coherent writes from the GPU, by effectively invalidating the
+	 * WC domain upon first access.
+	 */
+	if ((obj->read_domains & I915_GEM_DOMAIN_WC) == 0)
+		mb();
+
+	/* It should now be out of any other write domains, and we can update
+	 * the domain values for our changes.
+	 */
+	GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_WC) != 0);
+	obj->read_domains |= I915_GEM_DOMAIN_WC;
+	if (write) {
+		obj->read_domains = I915_GEM_DOMAIN_WC;
+		obj->write_domain = I915_GEM_DOMAIN_WC;
+		obj->mm.dirty = true;
+	}
+
+	i915_gem_object_unpin_pages(obj);
+	return 0;
+}
+
+/**
+ * Moves a single object to the GTT read, and possibly write domain.
+ * @obj: object to act on
+ * @write: ask for write access or read only
+ *
+ * This function returns when the move is complete, including waiting on
+ * flushes to occur.
+ */
+int
+i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
+{
+	int ret;
+
+	assert_object_held(obj);
+
+	ret = i915_gem_object_wait(obj,
+				   I915_WAIT_INTERRUPTIBLE |
+				   (write ? I915_WAIT_ALL : 0),
+				   MAX_SCHEDULE_TIMEOUT);
+	if (ret)
+		return ret;
+
+	if (obj->write_domain == I915_GEM_DOMAIN_GTT)
+		return 0;
+
+	/* Flush and acquire obj->pages so that we are coherent through
+	 * direct access in memory with previous cached writes through
+	 * shmemfs and that our cache domain tracking remains valid.
+	 * For example, if the obj->filp was moved to swap without us
+	 * being notified and releasing the pages, we would mistakenly
+	 * continue to assume that the obj remained out of the CPU cached
+	 * domain.
+	 */
+	ret = i915_gem_object_pin_pages(obj);
+	if (ret)
+		return ret;
+
+	i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_GTT);
+
+	/* Serialise direct access to this object with the barriers for
+	 * coherent writes from the GPU, by effectively invalidating the
+	 * GTT domain upon first access.
+	 */
+	if ((obj->read_domains & I915_GEM_DOMAIN_GTT) == 0)
+		mb();
+
+	/* It should now be out of any other write domains, and we can update
+	 * the domain values for our changes.
+	 */
+	GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
+	obj->read_domains |= I915_GEM_DOMAIN_GTT;
+	if (write) {
+		obj->read_domains = I915_GEM_DOMAIN_GTT;
+		obj->write_domain = I915_GEM_DOMAIN_GTT;
+		obj->mm.dirty = true;
+	}
+
+	i915_gem_object_unpin_pages(obj);
+	return 0;
+}
+
+/**
+ * Changes the cache-level of an object across all VMA.
+ * @obj: object to act on
+ * @cache_level: new cache level to set for the object
+ *
+ * After this function returns, the object will be in the new cache-level
+ * across all GTT and the contents of the backing storage will be coherent,
+ * with respect to the new cache-level. In order to keep the backing storage
+ * coherent for all users, we only allow a single cache level to be set
+ * globally on the object and prevent it from being changed whilst the
+ * hardware is reading from the object. That is if the object is currently
+ * on the scanout it will be set to uncached (or equivalent display
+ * cache coherency) and all non-MOCS GPU access will also be uncached so
+ * that all direct access to the scanout remains coherent.
+ */
+int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
+				    enum i915_cache_level cache_level)
+{
+	struct i915_vma *vma;
+	int ret;
+
+	assert_object_held(obj);
+
+	if (obj->cache_level == cache_level)
+		return 0;
+
+	/* Inspect the list of currently bound VMA and unbind any that would
+	 * be invalid given the new cache-level. This is principally to
+	 * catch the issue of the CS prefetch crossing page boundaries and
+	 * reading an invalid PTE on older architectures.
+	 */
+restart:
+	list_for_each_entry(vma, &obj->vma.list, obj_link) {
+		if (!drm_mm_node_allocated(&vma->node))
+			continue;
+
+		if (i915_vma_is_pinned(vma)) {
+			DRM_DEBUG("can not change the cache level of pinned objects\n");
+			return -EBUSY;
+		}
+
+		if (!i915_vma_is_closed(vma) &&
+		    i915_gem_valid_gtt_space(vma, cache_level))
+			continue;
+
+		ret = i915_vma_unbind(vma);
+		if (ret)
+			return ret;
+
+		/* As unbinding may affect other elements in the
+		 * obj->vma_list (due to side-effects from retiring
+		 * an active vma), play safe and restart the iterator.
+		 */
+		goto restart;
+	}
+
+	/* We can reuse the existing drm_mm nodes but need to change the
+	 * cache-level on the PTE. We could simply unbind them all and
+	 * rebind with the correct cache-level on next use. However since
+	 * we already have a valid slot, dma mapping, pages etc, we may as
+	 * rewrite the PTE in the belief that doing so tramples upon less
+	 * state and so involves less work.
+	 */
+	if (atomic_read(&obj->bind_count)) {
+		/* Before we change the PTE, the GPU must not be accessing it.
+		 * If we wait upon the object, we know that all the bound
+		 * VMA are no longer active.
+		 */
+		ret = i915_gem_object_wait(obj,
+					   I915_WAIT_INTERRUPTIBLE |
+					   I915_WAIT_ALL,
+					   MAX_SCHEDULE_TIMEOUT);
+		if (ret)
+			return ret;
+
+		if (!HAS_LLC(to_i915(obj->base.dev)) &&
+		    cache_level != I915_CACHE_NONE) {
+			/* Access to snoopable pages through the GTT is
+			 * incoherent and on some machines causes a hard
+			 * lockup. Relinquish the CPU mmaping to force
+			 * userspace to refault in the pages and we can
+			 * then double check if the GTT mapping is still
+			 * valid for that pointer access.
+			 */
+			i915_gem_object_release_mmap(obj);
+
+			/* As we no longer need a fence for GTT access,
+			 * we can relinquish it now (and so prevent having
+			 * to steal a fence from someone else on the next
+			 * fence request). Note GPU activity would have
+			 * dropped the fence as all snoopable access is
+			 * supposed to be linear.
+			 */
+			for_each_ggtt_vma(vma, obj) {
+				ret = i915_vma_put_fence(vma);
+				if (ret)
+					return ret;
+			}
+		} else {
+			/* We either have incoherent backing store and
+			 * so no GTT access or the architecture is fully
+			 * coherent. In such cases, existing GTT mmaps
+			 * ignore the cache bit in the PTE and we can
+			 * rewrite it without confusing the GPU or having
+			 * to force userspace to fault back in its mmaps.
+			 */
+		}
+
+		list_for_each_entry(vma, &obj->vma.list, obj_link) {
+			if (!drm_mm_node_allocated(&vma->node))
+				continue;
+
+			ret = i915_vma_bind(vma, cache_level, PIN_UPDATE);
+			if (ret)
+				return ret;
+		}
+	}
+
+	list_for_each_entry(vma, &obj->vma.list, obj_link)
+		vma->node.color = cache_level;
+	i915_gem_object_set_cache_coherency(obj, cache_level);
+	obj->cache_dirty = true; /* Always invalidate stale cachelines */
+
+	return 0;
+}
+
+int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
+			       struct drm_file *file)
+{
+	struct drm_i915_gem_caching *args = data;
+	struct drm_i915_gem_object *obj;
+	int err = 0;
+
+	rcu_read_lock();
+	obj = i915_gem_object_lookup_rcu(file, args->handle);
+	if (!obj) {
+		err = -ENOENT;
+		goto out;
+	}
+
+	switch (obj->cache_level) {
+	case I915_CACHE_LLC:
+	case I915_CACHE_L3_LLC:
+		args->caching = I915_CACHING_CACHED;
+		break;
+
+	case I915_CACHE_WT:
+		args->caching = I915_CACHING_DISPLAY;
+		break;
+
+	default:
+		args->caching = I915_CACHING_NONE;
+		break;
+	}
+out:
+	rcu_read_unlock();
+	return err;
+}
+
+int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
+			       struct drm_file *file)
+{
+	struct drm_i915_private *i915 = to_i915(dev);
+	struct drm_i915_gem_caching *args = data;
+	struct drm_i915_gem_object *obj;
+	enum i915_cache_level level;
+	int ret = 0;
+
+	switch (args->caching) {
+	case I915_CACHING_NONE:
+		level = I915_CACHE_NONE;
+		break;
+	case I915_CACHING_CACHED:
+		/*
+		 * Due to a HW issue on BXT A stepping, GPU stores via a
+		 * snooped mapping may leave stale data in a corresponding CPU
+		 * cacheline, whereas normally such cachelines would get
+		 * invalidated.
+		 */
+		if (!HAS_LLC(i915) && !HAS_SNOOP(i915))
+			return -ENODEV;
+
+		level = I915_CACHE_LLC;
+		break;
+	case I915_CACHING_DISPLAY:
+		level = HAS_WT(i915) ? I915_CACHE_WT : I915_CACHE_NONE;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	obj = i915_gem_object_lookup(file, args->handle);
+	if (!obj)
+		return -ENOENT;
+
+	/*
+	 * The caching mode of proxy object is handled by its generator, and
+	 * not allowed to be changed by userspace.
+	 */
+	if (i915_gem_object_is_proxy(obj)) {
+		ret = -ENXIO;
+		goto out;
+	}
+
+	if (obj->cache_level == level)
+		goto out;
+
+	ret = i915_gem_object_wait(obj,
+				   I915_WAIT_INTERRUPTIBLE,
+				   MAX_SCHEDULE_TIMEOUT);
+	if (ret)
+		goto out;
+
+	ret = mutex_lock_interruptible(&i915->drm.struct_mutex);
+	if (ret)
+		goto out;
+
+	ret = i915_gem_object_lock_interruptible(obj);
+	if (ret == 0) {
+		ret = i915_gem_object_set_cache_level(obj, level);
+		i915_gem_object_unlock(obj);
+	}
+	mutex_unlock(&i915->drm.struct_mutex);
+
+out:
+	i915_gem_object_put(obj);
+	return ret;
+}
+
+/*
+ * Prepare buffer for display plane (scanout, cursors, etc). Can be called from
+ * an uninterruptible phase (modesetting) and allows any flushes to be pipelined
+ * (for pageflips). We only flush the caches while preparing the buffer for
+ * display, the callers are responsible for frontbuffer flush.
+ */
+struct i915_vma *
+i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
+				     u32 alignment,
+				     const struct i915_ggtt_view *view,
+				     unsigned int flags)
+{
+	struct i915_vma *vma;
+	int ret;
+
+	assert_object_held(obj);
+
+	/* Mark the global pin early so that we account for the
+	 * display coherency whilst setting up the cache domains.
+	 */
+	obj->pin_global++;
+
+	/* The display engine is not coherent with the LLC cache on gen6.  As
+	 * a result, we make sure that the pinning that is about to occur is
+	 * done with uncached PTEs. This is lowest common denominator for all
+	 * chipsets.
+	 *
+	 * However for gen6+, we could do better by using the GFDT bit instead
+	 * of uncaching, which would allow us to flush all the LLC-cached data
+	 * with that bit in the PTE to main memory with just one PIPE_CONTROL.
+	 */
+	ret = i915_gem_object_set_cache_level(obj,
+					      HAS_WT(to_i915(obj->base.dev)) ?
+					      I915_CACHE_WT : I915_CACHE_NONE);
+	if (ret) {
+		vma = ERR_PTR(ret);
+		goto err_unpin_global;
+	}
+
+	/* As the user may map the buffer once pinned in the display plane
+	 * (e.g. libkms for the bootup splash), we have to ensure that we
+	 * always use map_and_fenceable for all scanout buffers. However,
+	 * it may simply be too big to fit into mappable, in which case
+	 * put it anyway and hope that userspace can cope (but always first
+	 * try to preserve the existing ABI).
+	 */
+	vma = ERR_PTR(-ENOSPC);
+	if ((flags & PIN_MAPPABLE) == 0 &&
+	    (!view || view->type == I915_GGTT_VIEW_NORMAL))
+		vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment,
+					       flags |
+					       PIN_MAPPABLE |
+					       PIN_NONBLOCK);
+	if (IS_ERR(vma))
+		vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, flags);
+	if (IS_ERR(vma))
+		goto err_unpin_global;
+
+	vma->display_alignment = max_t(u64, vma->display_alignment, alignment);
+
+	__i915_gem_object_flush_for_display(obj);
+
+	/* It should now be out of any other write domains, and we can update
+	 * the domain values for our changes.
+	 */
+	obj->read_domains |= I915_GEM_DOMAIN_GTT;
+
+	return vma;
+
+err_unpin_global:
+	obj->pin_global--;
+	return vma;
+}
+
+static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj)
+{
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+	struct i915_vma *vma;
+
+	GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
+
+	mutex_lock(&i915->ggtt.vm.mutex);
+	for_each_ggtt_vma(vma, obj) {
+		if (!drm_mm_node_allocated(&vma->node))
+			continue;
+
+		list_move_tail(&vma->vm_link, &vma->vm->bound_list);
+	}
+	mutex_unlock(&i915->ggtt.vm.mutex);
+
+	if (i915_gem_object_is_shrinkable(obj)) {
+		unsigned long flags;
+
+		spin_lock_irqsave(&i915->mm.obj_lock, flags);
+
+		if (obj->mm.madv == I915_MADV_WILLNEED)
+			list_move_tail(&obj->mm.link, &i915->mm.shrink_list);
+
+		spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
+	}
+}
+
+void
+i915_gem_object_unpin_from_display_plane(struct i915_vma *vma)
+{
+	struct drm_i915_gem_object *obj = vma->obj;
+
+	assert_object_held(obj);
+
+	if (WARN_ON(obj->pin_global == 0))
+		return;
+
+	if (--obj->pin_global == 0)
+		vma->display_alignment = I915_GTT_MIN_ALIGNMENT;
+
+	/* Bump the LRU to try and avoid premature eviction whilst flipping  */
+	i915_gem_object_bump_inactive_ggtt(obj);
+
+	i915_vma_unpin(vma);
+}
+
+/**
+ * Moves a single object to the CPU read, and possibly write domain.
+ * @obj: object to act on
+ * @write: requesting write or read-only access
+ *
+ * This function returns when the move is complete, including waiting on
+ * flushes to occur.
+ */
+int
+i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
+{
+	int ret;
+
+	assert_object_held(obj);
+
+	ret = i915_gem_object_wait(obj,
+				   I915_WAIT_INTERRUPTIBLE |
+				   (write ? I915_WAIT_ALL : 0),
+				   MAX_SCHEDULE_TIMEOUT);
+	if (ret)
+		return ret;
+
+	i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
+
+	/* Flush the CPU cache if it's still invalid. */
+	if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) {
+		i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
+		obj->read_domains |= I915_GEM_DOMAIN_CPU;
+	}
+
+	/* It should now be out of any other write domains, and we can update
+	 * the domain values for our changes.
+	 */
+	GEM_BUG_ON(obj->write_domain & ~I915_GEM_DOMAIN_CPU);
+
+	/* If we're writing through the CPU, then the GPU read domains will
+	 * need to be invalidated at next use.
+	 */
+	if (write)
+		__start_cpu_write(obj);
+
+	return 0;
+}
+
+static inline enum fb_op_origin
+fb_write_origin(struct drm_i915_gem_object *obj, unsigned int domain)
+{
+	return (domain == I915_GEM_DOMAIN_GTT ?
+		obj->frontbuffer_ggtt_origin : ORIGIN_CPU);
+}
+
+/**
+ * Called when user space prepares to use an object with the CPU, either
+ * through the mmap ioctl's mapping or a GTT mapping.
+ * @dev: drm device
+ * @data: ioctl data blob
+ * @file: drm file
+ */
+int
+i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
+			  struct drm_file *file)
+{
+	struct drm_i915_gem_set_domain *args = data;
+	struct drm_i915_gem_object *obj;
+	u32 read_domains = args->read_domains;
+	u32 write_domain = args->write_domain;
+	int err;
+
+	/* Only handle setting domains to types used by the CPU. */
+	if ((write_domain | read_domains) & I915_GEM_GPU_DOMAINS)
+		return -EINVAL;
+
+	/*
+	 * Having something in the write domain implies it's in the read
+	 * domain, and only that read domain.  Enforce that in the request.
+	 */
+	if (write_domain && read_domains != write_domain)
+		return -EINVAL;
+
+	if (!read_domains)
+		return 0;
+
+	obj = i915_gem_object_lookup(file, args->handle);
+	if (!obj)
+		return -ENOENT;
+
+	/*
+	 * Already in the desired write domain? Nothing for us to do!
+	 *
+	 * We apply a little bit of cunning here to catch a broader set of
+	 * no-ops. If obj->write_domain is set, we must be in the same
+	 * obj->read_domains, and only that domain. Therefore, if that
+	 * obj->write_domain matches the request read_domains, we are
+	 * already in the same read/write domain and can skip the operation,
+	 * without having to further check the requested write_domain.
+	 */
+	if (READ_ONCE(obj->write_domain) == read_domains) {
+		err = 0;
+		goto out;
+	}
+
+	/*
+	 * Try to flush the object off the GPU without holding the lock.
+	 * We will repeat the flush holding the lock in the normal manner
+	 * to catch cases where we are gazumped.
+	 */
+	err = i915_gem_object_wait(obj,
+				   I915_WAIT_INTERRUPTIBLE |
+				   I915_WAIT_PRIORITY |
+				   (write_domain ? I915_WAIT_ALL : 0),
+				   MAX_SCHEDULE_TIMEOUT);
+	if (err)
+		goto out;
+
+	/*
+	 * Proxy objects do not control access to the backing storage, ergo
+	 * they cannot be used as a means to manipulate the cache domain
+	 * tracking for that backing storage. The proxy object is always
+	 * considered to be outside of any cache domain.
+	 */
+	if (i915_gem_object_is_proxy(obj)) {
+		err = -ENXIO;
+		goto out;
+	}
+
+	/*
+	 * Flush and acquire obj->pages so that we are coherent through
+	 * direct access in memory with previous cached writes through
+	 * shmemfs and that our cache domain tracking remains valid.
+	 * For example, if the obj->filp was moved to swap without us
+	 * being notified and releasing the pages, we would mistakenly
+	 * continue to assume that the obj remained out of the CPU cached
+	 * domain.
+	 */
+	err = i915_gem_object_pin_pages(obj);
+	if (err)
+		goto out;
+
+	err = i915_gem_object_lock_interruptible(obj);
+	if (err)
+		goto out_unpin;
+
+	if (read_domains & I915_GEM_DOMAIN_WC)
+		err = i915_gem_object_set_to_wc_domain(obj, write_domain);
+	else if (read_domains & I915_GEM_DOMAIN_GTT)
+		err = i915_gem_object_set_to_gtt_domain(obj, write_domain);
+	else
+		err = i915_gem_object_set_to_cpu_domain(obj, write_domain);
+
+	/* And bump the LRU for this access */
+	i915_gem_object_bump_inactive_ggtt(obj);
+
+	i915_gem_object_unlock(obj);
+
+	if (write_domain != 0)
+		intel_fb_obj_invalidate(obj,
+					fb_write_origin(obj, write_domain));
+
+out_unpin:
+	i915_gem_object_unpin_pages(obj);
+out:
+	i915_gem_object_put(obj);
+	return err;
+}
+
+/*
+ * Pins the specified object's pages and synchronizes the object with
+ * GPU accesses. Sets needs_clflush to non-zero if the caller should
+ * flush the object from the CPU cache.
+ */
+int i915_gem_object_prepare_read(struct drm_i915_gem_object *obj,
+				 unsigned int *needs_clflush)
+{
+	int ret;
+
+	*needs_clflush = 0;
+	if (!i915_gem_object_has_struct_page(obj))
+		return -ENODEV;
+
+	ret = i915_gem_object_lock_interruptible(obj);
+	if (ret)
+		return ret;
+
+	ret = i915_gem_object_wait(obj,
+				   I915_WAIT_INTERRUPTIBLE,
+				   MAX_SCHEDULE_TIMEOUT);
+	if (ret)
+		goto err_unlock;
+
+	ret = i915_gem_object_pin_pages(obj);
+	if (ret)
+		goto err_unlock;
+
+	if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ ||
+	    !static_cpu_has(X86_FEATURE_CLFLUSH)) {
+		ret = i915_gem_object_set_to_cpu_domain(obj, false);
+		if (ret)
+			goto err_unpin;
+		else
+			goto out;
+	}
+
+	i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
+
+	/* If we're not in the cpu read domain, set ourself into the gtt
+	 * read domain and manually flush cachelines (if required). This
+	 * optimizes for the case when the gpu will dirty the data
+	 * anyway again before the next pread happens.
+	 */
+	if (!obj->cache_dirty &&
+	    !(obj->read_domains & I915_GEM_DOMAIN_CPU))
+		*needs_clflush = CLFLUSH_BEFORE;
+
+out:
+	/* return with the pages pinned */
+	return 0;
+
+err_unpin:
+	i915_gem_object_unpin_pages(obj);
+err_unlock:
+	i915_gem_object_unlock(obj);
+	return ret;
+}
+
+int i915_gem_object_prepare_write(struct drm_i915_gem_object *obj,
+				  unsigned int *needs_clflush)
+{
+	int ret;
+
+	*needs_clflush = 0;
+	if (!i915_gem_object_has_struct_page(obj))
+		return -ENODEV;
+
+	ret = i915_gem_object_lock_interruptible(obj);
+	if (ret)
+		return ret;
+
+	ret = i915_gem_object_wait(obj,
+				   I915_WAIT_INTERRUPTIBLE |
+				   I915_WAIT_ALL,
+				   MAX_SCHEDULE_TIMEOUT);
+	if (ret)
+		goto err_unlock;
+
+	ret = i915_gem_object_pin_pages(obj);
+	if (ret)
+		goto err_unlock;
+
+	if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE ||
+	    !static_cpu_has(X86_FEATURE_CLFLUSH)) {
+		ret = i915_gem_object_set_to_cpu_domain(obj, true);
+		if (ret)
+			goto err_unpin;
+		else
+			goto out;
+	}
+
+	i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
+
+	/* If we're not in the cpu write domain, set ourself into the
+	 * gtt write domain and manually flush cachelines (as required).
+	 * This optimizes for the case when the gpu will use the data
+	 * right away and we therefore have to clflush anyway.
+	 */
+	if (!obj->cache_dirty) {
+		*needs_clflush |= CLFLUSH_AFTER;
+
+		/*
+		 * Same trick applies to invalidate partially written
+		 * cachelines read before writing.
+		 */
+		if (!(obj->read_domains & I915_GEM_DOMAIN_CPU))
+			*needs_clflush |= CLFLUSH_BEFORE;
+	}
+
+out:
+	intel_fb_obj_invalidate(obj, ORIGIN_CPU);
+	obj->mm.dirty = true;
+	/* return with the pages pinned */
+	return 0;
+
+err_unpin:
+	i915_gem_object_unpin_pages(obj);
+err_unlock:
+	i915_gem_object_unlock(obj);
+	return ret;
+}
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index c83d2a195d15..5fae0e50aad0 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -1,29 +1,7 @@
 /*
- * Copyright © 2008,2010 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * Authors:
- *    Eric Anholt <eric@anholt.net>
- *    Chris Wilson <chris@chris-wilson.co.uk>
+ * SPDX-License-Identifier: MIT
  *
+ * Copyright © 2008,2010 Intel Corporation
  */
 
 #include <linux/intel-iommu.h>
@@ -34,11 +12,17 @@
 #include <drm/drm_syncobj.h>
 #include <drm/i915_drm.h>
 
-#include "i915_drv.h"
+#include "display/intel_frontbuffer.h"
+
+#include "gem/i915_gem_ioctls.h"
+#include "gt/intel_context.h"
+#include "gt/intel_gt_pm.h"
+
+#include "i915_gem_ioctls.h"
 #include "i915_gem_clflush.h"
+#include "i915_gem_context.h"
 #include "i915_trace.h"
 #include "intel_drv.h"
-#include "intel_frontbuffer.h"
 
 enum {
 	FORCE_CPU_RELOC = 1,
@@ -236,7 +220,8 @@ struct i915_execbuffer {
 	unsigned int *flags;
 
 	struct intel_engine_cs *engine; /** engine to queue the request to */
-	struct i915_gem_context *ctx; /** context for building the request */
+	struct intel_context *context; /* logical state for the request */
+	struct i915_gem_context *gem_context; /** caller's context */
 	struct i915_address_space *vm; /** GTT and vma for the request */
 
 	struct i915_request *request; /** our request to build */
@@ -738,9 +723,9 @@ static int eb_select_context(struct i915_execbuffer *eb)
 	if (unlikely(!ctx))
 		return -ENOENT;
 
-	eb->ctx = ctx;
-	if (ctx->ppgtt) {
-		eb->vm = &ctx->ppgtt->vm;
+	eb->gem_context = ctx;
+	if (ctx->vm) {
+		eb->vm = ctx->vm;
 		eb->invalid_flags |= EXEC_OBJECT_NEEDS_GTT;
 	} else {
 		eb->vm = &eb->i915->ggtt.vm;
@@ -784,7 +769,6 @@ static struct i915_request *__eb_wait_for_ring(struct intel_ring *ring)
 
 static int eb_wait_for_ring(const struct i915_execbuffer *eb)
 {
-	const struct intel_context *ce;
 	struct i915_request *rq;
 	int ret = 0;
 
@@ -794,11 +778,7 @@ static int eb_wait_for_ring(const struct i915_execbuffer *eb)
 	 * keeping all of their resources pinned.
 	 */
 
-	ce = intel_context_lookup(eb->ctx, eb->engine);
-	if (!ce || !ce->ring) /* first use, assume empty! */
-		return 0;
-
-	rq = __eb_wait_for_ring(ce->ring);
+	rq = __eb_wait_for_ring(eb->context->ring);
 	if (rq) {
 		mutex_unlock(&eb->i915->drm.struct_mutex);
 
@@ -817,15 +797,12 @@ static int eb_wait_for_ring(const struct i915_execbuffer *eb)
 
 static int eb_lookup_vmas(struct i915_execbuffer *eb)
 {
-	struct radix_tree_root *handles_vma = &eb->ctx->handles_vma;
+	struct radix_tree_root *handles_vma = &eb->gem_context->handles_vma;
 	struct drm_i915_gem_object *obj;
 	unsigned int i, batch;
 	int err;
 
-	if (unlikely(i915_gem_context_is_closed(eb->ctx)))
-		return -ENOENT;
-
-	if (unlikely(i915_gem_context_is_banned(eb->ctx)))
+	if (unlikely(i915_gem_context_is_banned(eb->gem_context)))
 		return -EIO;
 
 	INIT_LIST_HEAD(&eb->relocs);
@@ -833,6 +810,12 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb)
 
 	batch = eb_batch_index(eb);
 
+	mutex_lock(&eb->gem_context->mutex);
+	if (unlikely(i915_gem_context_is_closed(eb->gem_context))) {
+		err = -ENOENT;
+		goto err_ctx;
+	}
+
 	for (i = 0; i < eb->buffer_count; i++) {
 		u32 handle = eb->exec[i].handle;
 		struct i915_lut_handle *lut;
@@ -866,13 +849,15 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb)
 			goto err_obj;
 		}
 
-		/* transfer ref to ctx */
-		if (!vma->open_count++)
+		/* transfer ref to lut */
+		if (!atomic_fetch_inc(&vma->open_count))
 			i915_vma_reopen(vma);
-		list_add(&lut->obj_link, &obj->lut_list);
-		list_add(&lut->ctx_link, &eb->ctx->handles_list);
-		lut->ctx = eb->ctx;
 		lut->handle = handle;
+		lut->ctx = eb->gem_context;
+
+		i915_gem_object_lock(obj);
+		list_add(&lut->obj_link, &obj->lut_list);
+		i915_gem_object_unlock(obj);
 
 add_vma:
 		err = eb_add_vma(eb, i, batch, vma);
@@ -885,6 +870,8 @@ add_vma:
 			   eb_vma_misplaced(&eb->exec[i], vma, eb->flags[i]));
 	}
 
+	mutex_unlock(&eb->gem_context->mutex);
+
 	eb->args->flags |= __EXEC_VALIDATED;
 	return eb_reserve(eb);
 
@@ -892,6 +879,8 @@ err_obj:
 	i915_gem_object_put(obj);
 err_vma:
 	eb->vma[i] = NULL;
+err_ctx:
+	mutex_unlock(&eb->gem_context->mutex);
 	return err;
 }
 
@@ -1027,7 +1016,7 @@ static void reloc_cache_reset(struct reloc_cache *cache)
 			mb();
 
 		kunmap_atomic(vaddr);
-		i915_gem_obj_finish_shmem_access((struct drm_i915_gem_object *)cache->node.mm);
+		i915_gem_object_finish_access((struct drm_i915_gem_object *)cache->node.mm);
 	} else {
 		wmb();
 		io_mapping_unmap_atomic((void __iomem *)vaddr);
@@ -1059,7 +1048,7 @@ static void *reloc_kmap(struct drm_i915_gem_object *obj,
 		unsigned int flushes;
 		int err;
 
-		err = i915_gem_obj_prepare_shmem_write(obj, &flushes);
+		err = i915_gem_object_prepare_write(obj, &flushes);
 		if (err)
 			return ERR_PTR(err);
 
@@ -1096,7 +1085,9 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj,
 		if (use_cpu_reloc(cache, obj))
 			return NULL;
 
+		i915_gem_object_lock(obj);
 		err = i915_gem_object_set_to_gtt_domain(obj, true);
+		i915_gem_object_unlock(obj);
 		if (err)
 			return ERR_PTR(err);
 
@@ -1185,6 +1176,26 @@ static void clflush_write32(u32 *addr, u32 value, unsigned int flushes)
 		*addr = value;
 }
 
+static int reloc_move_to_gpu(struct i915_request *rq, struct i915_vma *vma)
+{
+	struct drm_i915_gem_object *obj = vma->obj;
+	int err;
+
+	i915_vma_lock(vma);
+
+	if (obj->cache_dirty & ~obj->cache_coherent)
+		i915_gem_clflush_object(obj, 0);
+	obj->write_domain = 0;
+
+	err = i915_request_await_object(rq, vma->obj, true);
+	if (err == 0)
+		err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
+
+	i915_vma_unlock(vma);
+
+	return err;
+}
+
 static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
 			     struct i915_vma *vma,
 			     unsigned int len)
@@ -1196,15 +1207,6 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
 	u32 *cmd;
 	int err;
 
-	if (DBG_FORCE_RELOC == FORCE_GPU_RELOC) {
-		obj = vma->obj;
-		if (obj->cache_dirty & ~obj->cache_coherent)
-			i915_gem_clflush_object(obj, 0);
-		obj->write_domain = 0;
-	}
-
-	GEM_BUG_ON(vma->obj->write_domain & I915_GEM_DOMAIN_CPU);
-
 	obj = i915_gem_batch_pool_get(&eb->engine->batch_pool, PAGE_SIZE);
 	if (IS_ERR(obj))
 		return PTR_ERR(obj);
@@ -1227,13 +1229,13 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
 	if (err)
 		goto err_unmap;
 
-	rq = i915_request_alloc(eb->engine, eb->ctx);
+	rq = i915_request_create(eb->context);
 	if (IS_ERR(rq)) {
 		err = PTR_ERR(rq);
 		goto err_unpin;
 	}
 
-	err = i915_request_await_object(rq, vma->obj, true);
+	err = reloc_move_to_gpu(rq, vma);
 	if (err)
 		goto err_request;
 
@@ -1241,14 +1243,12 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
 					batch->node.start, PAGE_SIZE,
 					cache->gen > 5 ? 0 : I915_DISPATCH_SECURE);
 	if (err)
-		goto err_request;
+		goto skip_request;
 
+	i915_vma_lock(batch);
 	GEM_BUG_ON(!reservation_object_test_signaled_rcu(batch->resv, true));
 	err = i915_vma_move_to_active(batch, rq, 0);
-	if (err)
-		goto skip_request;
-
-	err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
+	i915_vma_unlock(batch);
 	if (err)
 		goto skip_request;
 
@@ -1858,24 +1858,59 @@ slow:
 static int eb_move_to_gpu(struct i915_execbuffer *eb)
 {
 	const unsigned int count = eb->buffer_count;
+	struct ww_acquire_ctx acquire;
 	unsigned int i;
-	int err;
+	int err = 0;
+
+	ww_acquire_init(&acquire, &reservation_ww_class);
 
 	for (i = 0; i < count; i++) {
+		struct i915_vma *vma = eb->vma[i];
+
+		err = ww_mutex_lock_interruptible(&vma->resv->lock, &acquire);
+		if (!err)
+			continue;
+
+		GEM_BUG_ON(err == -EALREADY); /* No duplicate vma */
+
+		if (err == -EDEADLK) {
+			GEM_BUG_ON(i == 0);
+			do {
+				int j = i - 1;
+
+				ww_mutex_unlock(&eb->vma[j]->resv->lock);
+
+				swap(eb->flags[i], eb->flags[j]);
+				swap(eb->vma[i],  eb->vma[j]);
+				eb->vma[i]->exec_flags = &eb->flags[i];
+			} while (--i);
+			GEM_BUG_ON(vma != eb->vma[0]);
+			vma->exec_flags = &eb->flags[0];
+
+			err = ww_mutex_lock_slow_interruptible(&vma->resv->lock,
+							       &acquire);
+		}
+		if (err)
+			break;
+	}
+	ww_acquire_done(&acquire);
+
+	while (i--) {
 		unsigned int flags = eb->flags[i];
 		struct i915_vma *vma = eb->vma[i];
 		struct drm_i915_gem_object *obj = vma->obj;
 
+		assert_vma_held(vma);
+
 		if (flags & EXEC_OBJECT_CAPTURE) {
 			struct i915_capture_list *capture;
 
 			capture = kmalloc(sizeof(*capture), GFP_KERNEL);
-			if (unlikely(!capture))
-				return -ENOMEM;
-
-			capture->next = eb->request->capture_list;
-			capture->vma = eb->vma[i];
-			eb->request->capture_list = capture;
+			if (capture) {
+				capture->next = eb->request->capture_list;
+				capture->vma = vma;
+				eb->request->capture_list = capture;
+			}
 		}
 
 		/*
@@ -1895,24 +1930,15 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb)
 				flags &= ~EXEC_OBJECT_ASYNC;
 		}
 
-		if (flags & EXEC_OBJECT_ASYNC)
-			continue;
-
-		err = i915_request_await_object
-			(eb->request, obj, flags & EXEC_OBJECT_WRITE);
-		if (err)
-			return err;
-	}
+		if (err == 0 && !(flags & EXEC_OBJECT_ASYNC)) {
+			err = i915_request_await_object
+				(eb->request, obj, flags & EXEC_OBJECT_WRITE);
+		}
 
-	for (i = 0; i < count; i++) {
-		unsigned int flags = eb->flags[i];
-		struct i915_vma *vma = eb->vma[i];
+		if (err == 0)
+			err = i915_vma_move_to_active(vma, eb->request, flags);
 
-		err = i915_vma_move_to_active(vma, eb->request, flags);
-		if (unlikely(err)) {
-			i915_request_skip(eb->request, err);
-			return err;
-		}
+		i915_vma_unlock(vma);
 
 		__eb_unreserve_vma(vma, flags);
 		vma->exec_flags = NULL;
@@ -1920,12 +1946,20 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb)
 		if (unlikely(flags & __EXEC_OBJECT_HAS_REF))
 			i915_vma_put(vma);
 	}
+	ww_acquire_fini(&acquire);
+
+	if (unlikely(err))
+		goto err_skip;
+
 	eb->exec = NULL;
 
 	/* Unconditionally flush any chipset caches (for streaming writes). */
 	i915_gem_chipset_flush(eb->i915);
-
 	return 0;
+
+err_skip:
+	i915_request_skip(eb->request, err);
+	return err;
 }
 
 static bool i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec)
@@ -2079,9 +2113,7 @@ gen8_dispatch_bsd_engine(struct drm_i915_private *dev_priv,
 	return file_priv->bsd_engine;
 }
 
-#define I915_USER_RINGS (4)
-
-static const enum intel_engine_id user_ring_map[I915_USER_RINGS + 1] = {
+static const enum intel_engine_id user_ring_map[] = {
 	[I915_EXEC_DEFAULT]	= RCS0,
 	[I915_EXEC_RENDER]	= RCS0,
 	[I915_EXEC_BLT]		= BCS0,
@@ -2089,31 +2121,57 @@ static const enum intel_engine_id user_ring_map[I915_USER_RINGS + 1] = {
 	[I915_EXEC_VEBOX]	= VECS0
 };
 
-static struct intel_engine_cs *
-eb_select_engine(struct drm_i915_private *dev_priv,
-		 struct drm_file *file,
-		 struct drm_i915_gem_execbuffer2 *args)
+static int eb_pin_context(struct i915_execbuffer *eb, struct intel_context *ce)
 {
-	unsigned int user_ring_id = args->flags & I915_EXEC_RING_MASK;
-	struct intel_engine_cs *engine;
+	int err;
 
-	if (user_ring_id > I915_USER_RINGS) {
-		DRM_DEBUG("execbuf with unknown ring: %u\n", user_ring_id);
-		return NULL;
-	}
+	/*
+	 * ABI: Before userspace accesses the GPU (e.g. execbuffer), report
+	 * EIO if the GPU is already wedged.
+	 */
+	err = i915_terminally_wedged(eb->i915);
+	if (err)
+		return err;
+
+	/*
+	 * Pinning the contexts may generate requests in order to acquire
+	 * GGTT space, so do this first before we reserve a seqno for
+	 * ourselves.
+	 */
+	err = intel_context_pin(ce);
+	if (err)
+		return err;
 
-	if ((user_ring_id != I915_EXEC_BSD) &&
-	    ((args->flags & I915_EXEC_BSD_MASK) != 0)) {
+	eb->engine = ce->engine;
+	eb->context = ce;
+	return 0;
+}
+
+static void eb_unpin_context(struct i915_execbuffer *eb)
+{
+	intel_context_unpin(eb->context);
+}
+
+static unsigned int
+eb_select_legacy_ring(struct i915_execbuffer *eb,
+		      struct drm_file *file,
+		      struct drm_i915_gem_execbuffer2 *args)
+{
+	struct drm_i915_private *i915 = eb->i915;
+	unsigned int user_ring_id = args->flags & I915_EXEC_RING_MASK;
+
+	if (user_ring_id != I915_EXEC_BSD &&
+	    (args->flags & I915_EXEC_BSD_MASK)) {
 		DRM_DEBUG("execbuf with non bsd ring but with invalid "
 			  "bsd dispatch flags: %d\n", (int)(args->flags));
-		return NULL;
+		return -1;
 	}
 
-	if (user_ring_id == I915_EXEC_BSD && HAS_ENGINE(dev_priv, VCS1)) {
+	if (user_ring_id == I915_EXEC_BSD && HAS_ENGINE(i915, VCS1)) {
 		unsigned int bsd_idx = args->flags & I915_EXEC_BSD_MASK;
 
 		if (bsd_idx == I915_EXEC_BSD_DEFAULT) {
-			bsd_idx = gen8_dispatch_bsd_engine(dev_priv, file);
+			bsd_idx = gen8_dispatch_bsd_engine(i915, file);
 		} else if (bsd_idx >= I915_EXEC_BSD_RING1 &&
 			   bsd_idx <= I915_EXEC_BSD_RING2) {
 			bsd_idx >>= I915_EXEC_BSD_SHIFT;
@@ -2121,20 +2179,42 @@ eb_select_engine(struct drm_i915_private *dev_priv,
 		} else {
 			DRM_DEBUG("execbuf with unknown bsd ring: %u\n",
 				  bsd_idx);
-			return NULL;
+			return -1;
 		}
 
-		engine = dev_priv->engine[_VCS(bsd_idx)];
-	} else {
-		engine = dev_priv->engine[user_ring_map[user_ring_id]];
+		return _VCS(bsd_idx);
 	}
 
-	if (!engine) {
-		DRM_DEBUG("execbuf with invalid ring: %u\n", user_ring_id);
-		return NULL;
+	if (user_ring_id >= ARRAY_SIZE(user_ring_map)) {
+		DRM_DEBUG("execbuf with unknown ring: %u\n", user_ring_id);
+		return -1;
 	}
 
-	return engine;
+	return user_ring_map[user_ring_id];
+}
+
+static int
+eb_select_engine(struct i915_execbuffer *eb,
+		 struct drm_file *file,
+		 struct drm_i915_gem_execbuffer2 *args)
+{
+	struct intel_context *ce;
+	unsigned int idx;
+	int err;
+
+	if (i915_gem_context_user_engines(eb->gem_context))
+		idx = args->flags & I915_EXEC_RING_MASK;
+	else
+		idx = eb_select_legacy_ring(eb, file, args);
+
+	ce = i915_gem_context_get_engine(eb->gem_context, idx);
+	if (IS_ERR(ce))
+		return PTR_ERR(ce);
+
+	err = eb_pin_context(eb, ce);
+	intel_context_put(ce);
+
+	return err;
 }
 
 static void
@@ -2275,8 +2355,8 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 {
 	struct i915_execbuffer eb;
 	struct dma_fence *in_fence = NULL;
+	struct dma_fence *exec_fence = NULL;
 	struct sync_file *out_fence = NULL;
-	intel_wakeref_t wakeref;
 	int out_fence_fd = -1;
 	int err;
 
@@ -2318,11 +2398,24 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 			return -EINVAL;
 	}
 
+	if (args->flags & I915_EXEC_FENCE_SUBMIT) {
+		if (in_fence) {
+			err = -EINVAL;
+			goto err_in_fence;
+		}
+
+		exec_fence = sync_file_get_fence(lower_32_bits(args->rsvd2));
+		if (!exec_fence) {
+			err = -EINVAL;
+			goto err_in_fence;
+		}
+	}
+
 	if (args->flags & I915_EXEC_FENCE_OUT) {
 		out_fence_fd = get_unused_fd_flags(O_CLOEXEC);
 		if (out_fence_fd < 0) {
 			err = out_fence_fd;
-			goto err_in_fence;
+			goto err_exec_fence;
 		}
 	}
 
@@ -2336,12 +2429,6 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 	if (unlikely(err))
 		goto err_destroy;
 
-	eb.engine = eb_select_engine(eb.i915, file, args);
-	if (!eb.engine) {
-		err = -EINVAL;
-		goto err_engine;
-	}
-
 	/*
 	 * Take a local wakeref for preparing to dispatch the execbuf as
 	 * we expect to access the hardware fairly frequently in the
@@ -2349,16 +2436,20 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 	 * wakeref that we hold until the GPU has been idle for at least
 	 * 100ms.
 	 */
-	wakeref = intel_runtime_pm_get(eb.i915);
+	intel_gt_pm_get(eb.i915);
 
 	err = i915_mutex_lock_interruptible(dev);
 	if (err)
 		goto err_rpm;
 
-	err = eb_wait_for_ring(&eb); /* may temporarily drop struct_mutex */
+	err = eb_select_engine(&eb, file, args);
 	if (unlikely(err))
 		goto err_unlock;
 
+	err = eb_wait_for_ring(&eb); /* may temporarily drop struct_mutex */
+	if (unlikely(err))
+		goto err_engine;
+
 	err = eb_relocate(&eb);
 	if (err) {
 		/*
@@ -2442,7 +2533,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 	GEM_BUG_ON(eb.reloc_cache.rq);
 
 	/* Allocate a request for this batch buffer nice and early. */
-	eb.request = i915_request_alloc(eb.engine, eb.ctx);
+	eb.request = i915_request_create(eb.context);
 	if (IS_ERR(eb.request)) {
 		err = PTR_ERR(eb.request);
 		goto err_batch_unpin;
@@ -2454,6 +2545,13 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 			goto err_request;
 	}
 
+	if (exec_fence) {
+		err = i915_request_await_execution(eb.request, exec_fence,
+						   eb.engine->bond_execute);
+		if (err < 0)
+			goto err_request;
+	}
+
 	if (fences) {
 		err = await_fence_array(&eb, fences);
 		if (err)
@@ -2480,8 +2578,8 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 	trace_i915_request_queue(eb.request, eb.batch_flags);
 	err = eb_submit(&eb);
 err_request:
-	i915_request_add(eb.request);
 	add_to_client(eb.request, file);
+	i915_request_add(eb.request);
 
 	if (fences)
 		signal_fence_array(&eb, fences);
@@ -2503,17 +2601,20 @@ err_batch_unpin:
 err_vma:
 	if (eb.exec)
 		eb_release_vmas(&eb);
+err_engine:
+	eb_unpin_context(&eb);
 err_unlock:
 	mutex_unlock(&dev->struct_mutex);
 err_rpm:
-	intel_runtime_pm_put(eb.i915, wakeref);
-err_engine:
-	i915_gem_context_put(eb.ctx);
+	intel_gt_pm_put(eb.i915);
+	i915_gem_context_put(eb.gem_context);
 err_destroy:
 	eb_destroy(&eb);
 err_out_fence:
 	if (out_fence_fd != -1)
 		put_unused_fd(out_fence_fd);
+err_exec_fence:
+	dma_fence_put(exec_fence);
 err_in_fence:
 	dma_fence_put(in_fence);
 	return err;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_fence.c b/drivers/gpu/drm/i915/gem/i915_gem_fence.c
new file mode 100644
index 000000000000..cf0439e6be83
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_fence.c
@@ -0,0 +1,96 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include "i915_drv.h"
+#include "i915_gem_object.h"
+
+struct stub_fence {
+	struct dma_fence dma;
+	struct i915_sw_fence chain;
+};
+
+static int __i915_sw_fence_call
+stub_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
+{
+	struct stub_fence *stub = container_of(fence, typeof(*stub), chain);
+
+	switch (state) {
+	case FENCE_COMPLETE:
+		dma_fence_signal(&stub->dma);
+		break;
+
+	case FENCE_FREE:
+		dma_fence_put(&stub->dma);
+		break;
+	}
+
+	return NOTIFY_DONE;
+}
+
+static const char *stub_driver_name(struct dma_fence *fence)
+{
+	return DRIVER_NAME;
+}
+
+static const char *stub_timeline_name(struct dma_fence *fence)
+{
+	return "object";
+}
+
+static void stub_release(struct dma_fence *fence)
+{
+	struct stub_fence *stub = container_of(fence, typeof(*stub), dma);
+
+	i915_sw_fence_fini(&stub->chain);
+
+	BUILD_BUG_ON(offsetof(typeof(*stub), dma));
+	dma_fence_free(&stub->dma);
+}
+
+static const struct dma_fence_ops stub_fence_ops = {
+	.get_driver_name = stub_driver_name,
+	.get_timeline_name = stub_timeline_name,
+	.release = stub_release,
+};
+
+struct dma_fence *
+i915_gem_object_lock_fence(struct drm_i915_gem_object *obj)
+{
+	struct stub_fence *stub;
+
+	assert_object_held(obj);
+
+	stub = kmalloc(sizeof(*stub), GFP_KERNEL);
+	if (!stub)
+		return NULL;
+
+	i915_sw_fence_init(&stub->chain, stub_notify);
+	dma_fence_init(&stub->dma, &stub_fence_ops, &stub->chain.wait.lock,
+		       to_i915(obj->base.dev)->mm.unordered_timeline,
+		       0);
+
+	if (i915_sw_fence_await_reservation(&stub->chain,
+					    obj->base.resv, NULL,
+					    true, I915_FENCE_TIMEOUT,
+					    I915_FENCE_GFP) < 0)
+		goto err;
+
+	reservation_object_add_excl_fence(obj->base.resv, &stub->dma);
+
+	return &stub->dma;
+
+err:
+	stub_release(&stub->dma);
+	return NULL;
+}
+
+void i915_gem_object_unlock_fence(struct drm_i915_gem_object *obj,
+				  struct dma_fence *fence)
+{
+	struct stub_fence *stub = container_of(fence, typeof(*stub), dma);
+
+	i915_sw_fence_commit(&stub->chain);
+}
diff --git a/drivers/gpu/drm/i915/i915_gem_internal.c b/drivers/gpu/drm/i915/gem/i915_gem_internal.c
index ab627ed1269c..0c41e04ab8fa 100644
--- a/drivers/gpu/drm/i915/i915_gem_internal.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_internal.c
@@ -1,36 +1,24 @@
 /*
- * Copyright © 2014-2016 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
+ * SPDX-License-Identifier: MIT
  *
+ * Copyright © 2014-2016 Intel Corporation
  */
 
+#include <linux/scatterlist.h>
+#include <linux/slab.h>
+#include <linux/swiotlb.h>
+
 #include <drm/i915_drm.h>
+
 #include "i915_drv.h"
+#include "i915_gem.h"
+#include "i915_gem_object.h"
+#include "i915_scatterlist.h"
+#include "i915_utils.h"
 
 #define QUIET (__GFP_NORETRY | __GFP_NOWARN)
 #define MAYFAIL (__GFP_RETRY_MAYFAIL | __GFP_NOWARN)
 
-/* convert swiotlb segment size into sensible units (pages)! */
-#define IO_TLB_SEGPAGES (IO_TLB_SEGSIZE << IO_TLB_SHIFT >> PAGE_SHIFT)
-
 static void internal_free_pages(struct sg_table *st)
 {
 	struct scatterlist *sg;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ioctls.h b/drivers/gpu/drm/i915/gem/i915_gem_ioctls.h
new file mode 100644
index 000000000000..ddc7f2a52b3e
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ioctls.h
@@ -0,0 +1,52 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef I915_GEM_IOCTLS_H
+#define I915_GEM_IOCTLS_H
+
+struct drm_device;
+struct drm_file;
+
+int i915_gem_busy_ioctl(struct drm_device *dev, void *data,
+			struct drm_file *file);
+int i915_gem_create_ioctl(struct drm_device *dev, void *data,
+			  struct drm_file *file);
+int i915_gem_execbuffer_ioctl(struct drm_device *dev, void *data,
+			      struct drm_file *file);
+int i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data,
+			       struct drm_file *file);
+int i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
+				struct drm_file *file);
+int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
+			       struct drm_file *file);
+int i915_gem_get_tiling_ioctl(struct drm_device *dev, void *data,
+			      struct drm_file *file);
+int i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
+			   struct drm_file *file);
+int i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
+			struct drm_file *file);
+int i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
+			    struct drm_file *file);
+int i915_gem_pread_ioctl(struct drm_device *dev, void *data,
+			 struct drm_file *file);
+int i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
+			  struct drm_file *file);
+int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
+			       struct drm_file *file);
+int i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
+			      struct drm_file *file);
+int i915_gem_set_tiling_ioctl(struct drm_device *dev, void *data,
+			      struct drm_file *file);
+int i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
+			     struct drm_file *file);
+int i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
+			    struct drm_file *file);
+int i915_gem_userptr_ioctl(struct drm_device *dev, void *data,
+			   struct drm_file *file);
+int i915_gem_wait_ioctl(struct drm_device *dev, void *data,
+			struct drm_file *file);
+
+#endif
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
new file mode 100644
index 000000000000..391621ee3cbb
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
@@ -0,0 +1,508 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2014-2016 Intel Corporation
+ */
+
+#include <linux/mman.h>
+#include <linux/sizes.h>
+
+#include "i915_drv.h"
+#include "i915_gem_gtt.h"
+#include "i915_gem_ioctls.h"
+#include "i915_gem_object.h"
+#include "i915_vma.h"
+#include "intel_drv.h"
+
+static inline bool
+__vma_matches(struct vm_area_struct *vma, struct file *filp,
+	      unsigned long addr, unsigned long size)
+{
+	if (vma->vm_file != filp)
+		return false;
+
+	return vma->vm_start == addr &&
+	       (vma->vm_end - vma->vm_start) == PAGE_ALIGN(size);
+}
+
+/**
+ * i915_gem_mmap_ioctl - Maps the contents of an object, returning the address
+ *			 it is mapped to.
+ * @dev: drm device
+ * @data: ioctl data blob
+ * @file: drm file
+ *
+ * While the mapping holds a reference on the contents of the object, it doesn't
+ * imply a ref on the object itself.
+ *
+ * IMPORTANT:
+ *
+ * DRM driver writers who look a this function as an example for how to do GEM
+ * mmap support, please don't implement mmap support like here. The modern way
+ * to implement DRM mmap support is with an mmap offset ioctl (like
+ * i915_gem_mmap_gtt) and then using the mmap syscall on the DRM fd directly.
+ * That way debug tooling like valgrind will understand what's going on, hiding
+ * the mmap call in a driver private ioctl will break that. The i915 driver only
+ * does cpu mmaps this way because we didn't know better.
+ */
+int
+i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
+		    struct drm_file *file)
+{
+	struct drm_i915_gem_mmap *args = data;
+	struct drm_i915_gem_object *obj;
+	unsigned long addr;
+
+	if (args->flags & ~(I915_MMAP_WC))
+		return -EINVAL;
+
+	if (args->flags & I915_MMAP_WC && !boot_cpu_has(X86_FEATURE_PAT))
+		return -ENODEV;
+
+	obj = i915_gem_object_lookup(file, args->handle);
+	if (!obj)
+		return -ENOENT;
+
+	/* prime objects have no backing filp to GEM mmap
+	 * pages from.
+	 */
+	if (!obj->base.filp) {
+		addr = -ENXIO;
+		goto err;
+	}
+
+	if (range_overflows(args->offset, args->size, (u64)obj->base.size)) {
+		addr = -EINVAL;
+		goto err;
+	}
+
+	addr = vm_mmap(obj->base.filp, 0, args->size,
+		       PROT_READ | PROT_WRITE, MAP_SHARED,
+		       args->offset);
+	if (IS_ERR_VALUE(addr))
+		goto err;
+
+	if (args->flags & I915_MMAP_WC) {
+		struct mm_struct *mm = current->mm;
+		struct vm_area_struct *vma;
+
+		if (down_write_killable(&mm->mmap_sem)) {
+			addr = -EINTR;
+			goto err;
+		}
+		vma = find_vma(mm, addr);
+		if (vma && __vma_matches(vma, obj->base.filp, addr, args->size))
+			vma->vm_page_prot =
+				pgprot_writecombine(vm_get_page_prot(vma->vm_flags));
+		else
+			addr = -ENOMEM;
+		up_write(&mm->mmap_sem);
+		if (IS_ERR_VALUE(addr))
+			goto err;
+
+		/* This may race, but that's ok, it only gets set */
+		WRITE_ONCE(obj->frontbuffer_ggtt_origin, ORIGIN_CPU);
+	}
+	i915_gem_object_put(obj);
+
+	args->addr_ptr = (u64)addr;
+	return 0;
+
+err:
+	i915_gem_object_put(obj);
+	return addr;
+}
+
+static unsigned int tile_row_pages(const struct drm_i915_gem_object *obj)
+{
+	return i915_gem_object_get_tile_row_size(obj) >> PAGE_SHIFT;
+}
+
+/**
+ * i915_gem_mmap_gtt_version - report the current feature set for GTT mmaps
+ *
+ * A history of the GTT mmap interface:
+ *
+ * 0 - Everything had to fit into the GTT. Both parties of a memcpy had to
+ *     aligned and suitable for fencing, and still fit into the available
+ *     mappable space left by the pinned display objects. A classic problem
+ *     we called the page-fault-of-doom where we would ping-pong between
+ *     two objects that could not fit inside the GTT and so the memcpy
+ *     would page one object in at the expense of the other between every
+ *     single byte.
+ *
+ * 1 - Objects can be any size, and have any compatible fencing (X Y, or none
+ *     as set via i915_gem_set_tiling() [DRM_I915_GEM_SET_TILING]). If the
+ *     object is too large for the available space (or simply too large
+ *     for the mappable aperture!), a view is created instead and faulted
+ *     into userspace. (This view is aligned and sized appropriately for
+ *     fenced access.)
+ *
+ * 2 - Recognise WC as a separate cache domain so that we can flush the
+ *     delayed writes via GTT before performing direct access via WC.
+ *
+ * 3 - Remove implicit set-domain(GTT) and synchronisation on initial
+ *     pagefault; swapin remains transparent.
+ *
+ * Restrictions:
+ *
+ *  * snoopable objects cannot be accessed via the GTT. It can cause machine
+ *    hangs on some architectures, corruption on others. An attempt to service
+ *    a GTT page fault from a snoopable object will generate a SIGBUS.
+ *
+ *  * the object must be able to fit into RAM (physical memory, though no
+ *    limited to the mappable aperture).
+ *
+ *
+ * Caveats:
+ *
+ *  * a new GTT page fault will synchronize rendering from the GPU and flush
+ *    all data to system memory. Subsequent access will not be synchronized.
+ *
+ *  * all mappings are revoked on runtime device suspend.
+ *
+ *  * there are only 8, 16 or 32 fence registers to share between all users
+ *    (older machines require fence register for display and blitter access
+ *    as well). Contention of the fence registers will cause the previous users
+ *    to be unmapped and any new access will generate new page faults.
+ *
+ *  * running out of memory while servicing a fault may generate a SIGBUS,
+ *    rather than the expected SIGSEGV.
+ */
+int i915_gem_mmap_gtt_version(void)
+{
+	return 3;
+}
+
+static inline struct i915_ggtt_view
+compute_partial_view(const struct drm_i915_gem_object *obj,
+		     pgoff_t page_offset,
+		     unsigned int chunk)
+{
+	struct i915_ggtt_view view;
+
+	if (i915_gem_object_is_tiled(obj))
+		chunk = roundup(chunk, tile_row_pages(obj));
+
+	view.type = I915_GGTT_VIEW_PARTIAL;
+	view.partial.offset = rounddown(page_offset, chunk);
+	view.partial.size =
+		min_t(unsigned int, chunk,
+		      (obj->base.size >> PAGE_SHIFT) - view.partial.offset);
+
+	/* If the partial covers the entire object, just create a normal VMA. */
+	if (chunk >= obj->base.size >> PAGE_SHIFT)
+		view.type = I915_GGTT_VIEW_NORMAL;
+
+	return view;
+}
+
+/**
+ * i915_gem_fault - fault a page into the GTT
+ * @vmf: fault info
+ *
+ * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped
+ * from userspace.  The fault handler takes care of binding the object to
+ * the GTT (if needed), allocating and programming a fence register (again,
+ * only if needed based on whether the old reg is still valid or the object
+ * is tiled) and inserting a new PTE into the faulting process.
+ *
+ * Note that the faulting process may involve evicting existing objects
+ * from the GTT and/or fence registers to make room.  So performance may
+ * suffer if the GTT working set is large or there are few fence registers
+ * left.
+ *
+ * The current feature set supported by i915_gem_fault() and thus GTT mmaps
+ * is exposed via I915_PARAM_MMAP_GTT_VERSION (see i915_gem_mmap_gtt_version).
+ */
+vm_fault_t i915_gem_fault(struct vm_fault *vmf)
+{
+#define MIN_CHUNK_PAGES (SZ_1M >> PAGE_SHIFT)
+	struct vm_area_struct *area = vmf->vma;
+	struct drm_i915_gem_object *obj = to_intel_bo(area->vm_private_data);
+	struct drm_device *dev = obj->base.dev;
+	struct drm_i915_private *i915 = to_i915(dev);
+	struct intel_runtime_pm *rpm = &i915->runtime_pm;
+	struct i915_ggtt *ggtt = &i915->ggtt;
+	bool write = area->vm_flags & VM_WRITE;
+	intel_wakeref_t wakeref;
+	struct i915_vma *vma;
+	pgoff_t page_offset;
+	int srcu;
+	int ret;
+
+	/* Sanity check that we allow writing into this object */
+	if (i915_gem_object_is_readonly(obj) && write)
+		return VM_FAULT_SIGBUS;
+
+	/* We don't use vmf->pgoff since that has the fake offset */
+	page_offset = (vmf->address - area->vm_start) >> PAGE_SHIFT;
+
+	trace_i915_gem_object_fault(obj, page_offset, true, write);
+
+	ret = i915_gem_object_pin_pages(obj);
+	if (ret)
+		goto err;
+
+	wakeref = intel_runtime_pm_get(rpm);
+
+	srcu = i915_reset_trylock(i915);
+	if (srcu < 0) {
+		ret = srcu;
+		goto err_rpm;
+	}
+
+	ret = i915_mutex_lock_interruptible(dev);
+	if (ret)
+		goto err_reset;
+
+	/* Access to snoopable pages through the GTT is incoherent. */
+	if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(i915)) {
+		ret = -EFAULT;
+		goto err_unlock;
+	}
+
+	/* Now pin it into the GTT as needed */
+	vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
+				       PIN_MAPPABLE |
+				       PIN_NONBLOCK |
+				       PIN_NONFAULT);
+	if (IS_ERR(vma)) {
+		/* Use a partial view if it is bigger than available space */
+		struct i915_ggtt_view view =
+			compute_partial_view(obj, page_offset, MIN_CHUNK_PAGES);
+		unsigned int flags;
+
+		flags = PIN_MAPPABLE;
+		if (view.type == I915_GGTT_VIEW_NORMAL)
+			flags |= PIN_NONBLOCK; /* avoid warnings for pinned */
+
+		/*
+		 * Userspace is now writing through an untracked VMA, abandon
+		 * all hope that the hardware is able to track future writes.
+		 */
+		obj->frontbuffer_ggtt_origin = ORIGIN_CPU;
+
+		vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, flags);
+		if (IS_ERR(vma) && !view.type) {
+			flags = PIN_MAPPABLE;
+			view.type = I915_GGTT_VIEW_PARTIAL;
+			vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, flags);
+		}
+	}
+	if (IS_ERR(vma)) {
+		ret = PTR_ERR(vma);
+		goto err_unlock;
+	}
+
+	ret = i915_vma_pin_fence(vma);
+	if (ret)
+		goto err_unpin;
+
+	/* Finally, remap it using the new GTT offset */
+	ret = remap_io_mapping(area,
+			       area->vm_start + (vma->ggtt_view.partial.offset << PAGE_SHIFT),
+			       (ggtt->gmadr.start + vma->node.start) >> PAGE_SHIFT,
+			       min_t(u64, vma->size, area->vm_end - area->vm_start),
+			       &ggtt->iomap);
+	if (ret)
+		goto err_fence;
+
+	/* Mark as being mmapped into userspace for later revocation */
+	assert_rpm_wakelock_held(rpm);
+	if (!i915_vma_set_userfault(vma) && !obj->userfault_count++)
+		list_add(&obj->userfault_link, &i915->ggtt.userfault_list);
+	if (CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND)
+		intel_wakeref_auto(&i915->ggtt.userfault_wakeref,
+				   msecs_to_jiffies_timeout(CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND));
+	GEM_BUG_ON(!obj->userfault_count);
+
+	i915_vma_set_ggtt_write(vma);
+
+err_fence:
+	i915_vma_unpin_fence(vma);
+err_unpin:
+	__i915_vma_unpin(vma);
+err_unlock:
+	mutex_unlock(&dev->struct_mutex);
+err_reset:
+	i915_reset_unlock(i915, srcu);
+err_rpm:
+	intel_runtime_pm_put(rpm, wakeref);
+	i915_gem_object_unpin_pages(obj);
+err:
+	switch (ret) {
+	case -EIO:
+		/*
+		 * We eat errors when the gpu is terminally wedged to avoid
+		 * userspace unduly crashing (gl has no provisions for mmaps to
+		 * fail). But any other -EIO isn't ours (e.g. swap in failure)
+		 * and so needs to be reported.
+		 */
+		if (!i915_terminally_wedged(i915))
+			return VM_FAULT_SIGBUS;
+		/* else: fall through */
+	case -EAGAIN:
+		/*
+		 * EAGAIN means the gpu is hung and we'll wait for the error
+		 * handler to reset everything when re-faulting in
+		 * i915_mutex_lock_interruptible.
+		 */
+	case 0:
+	case -ERESTARTSYS:
+	case -EINTR:
+	case -EBUSY:
+		/*
+		 * EBUSY is ok: this just means that another thread
+		 * already did the job.
+		 */
+		return VM_FAULT_NOPAGE;
+	case -ENOMEM:
+		return VM_FAULT_OOM;
+	case -ENOSPC:
+	case -EFAULT:
+		return VM_FAULT_SIGBUS;
+	default:
+		WARN_ONCE(ret, "unhandled error in %s: %i\n", __func__, ret);
+		return VM_FAULT_SIGBUS;
+	}
+}
+
+void __i915_gem_object_release_mmap(struct drm_i915_gem_object *obj)
+{
+	struct i915_vma *vma;
+
+	GEM_BUG_ON(!obj->userfault_count);
+
+	obj->userfault_count = 0;
+	list_del(&obj->userfault_link);
+	drm_vma_node_unmap(&obj->base.vma_node,
+			   obj->base.dev->anon_inode->i_mapping);
+
+	for_each_ggtt_vma(vma, obj)
+		i915_vma_unset_userfault(vma);
+}
+
+/**
+ * i915_gem_object_release_mmap - remove physical page mappings
+ * @obj: obj in question
+ *
+ * Preserve the reservation of the mmapping with the DRM core code, but
+ * relinquish ownership of the pages back to the system.
+ *
+ * It is vital that we remove the page mapping if we have mapped a tiled
+ * object through the GTT and then lose the fence register due to
+ * resource pressure. Similarly if the object has been moved out of the
+ * aperture, than pages mapped into userspace must be revoked. Removing the
+ * mapping will then trigger a page fault on the next user access, allowing
+ * fixup by i915_gem_fault().
+ */
+void i915_gem_object_release_mmap(struct drm_i915_gem_object *obj)
+{
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+	intel_wakeref_t wakeref;
+
+	/* Serialisation between user GTT access and our code depends upon
+	 * revoking the CPU's PTE whilst the mutex is held. The next user
+	 * pagefault then has to wait until we release the mutex.
+	 *
+	 * Note that RPM complicates somewhat by adding an additional
+	 * requirement that operations to the GGTT be made holding the RPM
+	 * wakeref.
+	 */
+	lockdep_assert_held(&i915->drm.struct_mutex);
+	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
+
+	if (!obj->userfault_count)
+		goto out;
+
+	__i915_gem_object_release_mmap(obj);
+
+	/* Ensure that the CPU's PTE are revoked and there are not outstanding
+	 * memory transactions from userspace before we return. The TLB
+	 * flushing implied above by changing the PTE above *should* be
+	 * sufficient, an extra barrier here just provides us with a bit
+	 * of paranoid documentation about our requirement to serialise
+	 * memory writes before touching registers / GSM.
+	 */
+	wmb();
+
+out:
+	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
+}
+
+static int create_mmap_offset(struct drm_i915_gem_object *obj)
+{
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+	int err;
+
+	err = drm_gem_create_mmap_offset(&obj->base);
+	if (likely(!err))
+		return 0;
+
+	/* Attempt to reap some mmap space from dead objects */
+	do {
+		err = i915_gem_wait_for_idle(i915,
+					     I915_WAIT_INTERRUPTIBLE,
+					     MAX_SCHEDULE_TIMEOUT);
+		if (err)
+			break;
+
+		i915_gem_drain_freed_objects(i915);
+		err = drm_gem_create_mmap_offset(&obj->base);
+		if (!err)
+			break;
+
+	} while (flush_delayed_work(&i915->gem.retire_work));
+
+	return err;
+}
+
+int
+i915_gem_mmap_gtt(struct drm_file *file,
+		  struct drm_device *dev,
+		  u32 handle,
+		  u64 *offset)
+{
+	struct drm_i915_gem_object *obj;
+	int ret;
+
+	obj = i915_gem_object_lookup(file, handle);
+	if (!obj)
+		return -ENOENT;
+
+	ret = create_mmap_offset(obj);
+	if (ret == 0)
+		*offset = drm_vma_node_offset_addr(&obj->base.vma_node);
+
+	i915_gem_object_put(obj);
+	return ret;
+}
+
+/**
+ * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing
+ * @dev: DRM device
+ * @data: GTT mapping ioctl data
+ * @file: GEM object info
+ *
+ * Simply returns the fake offset to userspace so it can mmap it.
+ * The mmap call will end up in drm_gem_mmap(), which will set things
+ * up so we can get faults in the handler above.
+ *
+ * The fault handler will take care of binding the object into the GTT
+ * (since it may have been evicted to make room for something), allocating
+ * a fence register, and mapping the appropriate aperture address into
+ * userspace.
+ */
+int
+i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
+			struct drm_file *file)
+{
+	struct drm_i915_gem_mmap_gtt *args = data;
+
+	return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset);
+}
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftests/i915_gem_mman.c"
+#endif
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c
new file mode 100644
index 000000000000..be6caccce0c5
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -0,0 +1,398 @@
+/*
+ * Copyright © 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#include "display/intel_frontbuffer.h"
+
+#include "i915_drv.h"
+#include "i915_gem_clflush.h"
+#include "i915_gem_context.h"
+#include "i915_gem_object.h"
+#include "i915_globals.h"
+
+static struct i915_global_object {
+	struct i915_global base;
+	struct kmem_cache *slab_objects;
+} global;
+
+struct drm_i915_gem_object *i915_gem_object_alloc(void)
+{
+	return kmem_cache_zalloc(global.slab_objects, GFP_KERNEL);
+}
+
+void i915_gem_object_free(struct drm_i915_gem_object *obj)
+{
+	return kmem_cache_free(global.slab_objects, obj);
+}
+
+static void
+frontbuffer_retire(struct i915_active_request *active,
+		   struct i915_request *request)
+{
+	struct drm_i915_gem_object *obj =
+		container_of(active, typeof(*obj), frontbuffer_write);
+
+	intel_fb_obj_flush(obj, ORIGIN_CS);
+}
+
+void i915_gem_object_init(struct drm_i915_gem_object *obj,
+			  const struct drm_i915_gem_object_ops *ops)
+{
+	mutex_init(&obj->mm.lock);
+
+	spin_lock_init(&obj->vma.lock);
+	INIT_LIST_HEAD(&obj->vma.list);
+
+	INIT_LIST_HEAD(&obj->lut_list);
+	INIT_LIST_HEAD(&obj->batch_pool_link);
+
+	init_rcu_head(&obj->rcu);
+
+	obj->ops = ops;
+
+	obj->frontbuffer_ggtt_origin = ORIGIN_GTT;
+	i915_active_request_init(&obj->frontbuffer_write,
+				 NULL, frontbuffer_retire);
+
+	obj->mm.madv = I915_MADV_WILLNEED;
+	INIT_RADIX_TREE(&obj->mm.get_page.radix, GFP_KERNEL | __GFP_NOWARN);
+	mutex_init(&obj->mm.get_page.lock);
+}
+
+/**
+ * Mark up the object's coherency levels for a given cache_level
+ * @obj: #drm_i915_gem_object
+ * @cache_level: cache level
+ */
+void i915_gem_object_set_cache_coherency(struct drm_i915_gem_object *obj,
+					 unsigned int cache_level)
+{
+	obj->cache_level = cache_level;
+
+	if (cache_level != I915_CACHE_NONE)
+		obj->cache_coherent = (I915_BO_CACHE_COHERENT_FOR_READ |
+				       I915_BO_CACHE_COHERENT_FOR_WRITE);
+	else if (HAS_LLC(to_i915(obj->base.dev)))
+		obj->cache_coherent = I915_BO_CACHE_COHERENT_FOR_READ;
+	else
+		obj->cache_coherent = 0;
+
+	obj->cache_dirty =
+		!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE);
+}
+
+void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file)
+{
+	struct drm_i915_gem_object *obj = to_intel_bo(gem);
+	struct drm_i915_file_private *fpriv = file->driver_priv;
+	struct i915_lut_handle *lut, *ln;
+	LIST_HEAD(close);
+
+	i915_gem_object_lock(obj);
+	list_for_each_entry_safe(lut, ln, &obj->lut_list, obj_link) {
+		struct i915_gem_context *ctx = lut->ctx;
+
+		if (ctx->file_priv != fpriv)
+			continue;
+
+		i915_gem_context_get(ctx);
+		list_move(&lut->obj_link, &close);
+	}
+	i915_gem_object_unlock(obj);
+
+	list_for_each_entry_safe(lut, ln, &close, obj_link) {
+		struct i915_gem_context *ctx = lut->ctx;
+		struct i915_vma *vma;
+
+		/*
+		 * We allow the process to have multiple handles to the same
+		 * vma, in the same fd namespace, by virtue of flink/open.
+		 */
+
+		mutex_lock(&ctx->mutex);
+		vma = radix_tree_delete(&ctx->handles_vma, lut->handle);
+		if (vma) {
+			GEM_BUG_ON(vma->obj != obj);
+			GEM_BUG_ON(!atomic_read(&vma->open_count));
+			if (atomic_dec_and_test(&vma->open_count) &&
+			    !i915_vma_is_ggtt(vma))
+				i915_vma_close(vma);
+		}
+		mutex_unlock(&ctx->mutex);
+
+		i915_gem_context_put(lut->ctx);
+		i915_lut_handle_free(lut);
+		i915_gem_object_put(obj);
+	}
+}
+
+static void __i915_gem_free_objects(struct drm_i915_private *i915,
+				    struct llist_node *freed)
+{
+	struct drm_i915_gem_object *obj, *on;
+	intel_wakeref_t wakeref;
+
+	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
+	llist_for_each_entry_safe(obj, on, freed, freed) {
+		struct i915_vma *vma, *vn;
+
+		trace_i915_gem_object_destroy(obj);
+
+		mutex_lock(&i915->drm.struct_mutex);
+
+		GEM_BUG_ON(i915_gem_object_is_active(obj));
+		list_for_each_entry_safe(vma, vn, &obj->vma.list, obj_link) {
+			GEM_BUG_ON(i915_vma_is_active(vma));
+			vma->flags &= ~I915_VMA_PIN_MASK;
+			i915_vma_destroy(vma);
+		}
+		GEM_BUG_ON(!list_empty(&obj->vma.list));
+		GEM_BUG_ON(!RB_EMPTY_ROOT(&obj->vma.tree));
+
+		/*
+		 * This serializes freeing with the shrinker. Since the free
+		 * is delayed, first by RCU then by the workqueue, we want the
+		 * shrinker to be able to free pages of unreferenced objects,
+		 * or else we may oom whilst there are plenty of deferred
+		 * freed objects.
+		 */
+		if (i915_gem_object_has_pages(obj) &&
+		    i915_gem_object_is_shrinkable(obj)) {
+			unsigned long flags;
+
+			spin_lock_irqsave(&i915->mm.obj_lock, flags);
+			list_del_init(&obj->mm.link);
+			spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
+		}
+
+		mutex_unlock(&i915->drm.struct_mutex);
+
+		GEM_BUG_ON(atomic_read(&obj->bind_count));
+		GEM_BUG_ON(obj->userfault_count);
+		GEM_BUG_ON(atomic_read(&obj->frontbuffer_bits));
+		GEM_BUG_ON(!list_empty(&obj->lut_list));
+
+		if (obj->ops->release)
+			obj->ops->release(obj);
+
+		atomic_set(&obj->mm.pages_pin_count, 0);
+		__i915_gem_object_put_pages(obj, I915_MM_NORMAL);
+		GEM_BUG_ON(i915_gem_object_has_pages(obj));
+
+		if (obj->base.import_attach)
+			drm_prime_gem_destroy(&obj->base, NULL);
+
+		drm_gem_object_release(&obj->base);
+
+		bitmap_free(obj->bit_17);
+		i915_gem_object_free(obj);
+
+		GEM_BUG_ON(!atomic_read(&i915->mm.free_count));
+		atomic_dec(&i915->mm.free_count);
+
+		cond_resched();
+	}
+	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
+}
+
+void i915_gem_flush_free_objects(struct drm_i915_private *i915)
+{
+	struct llist_node *freed;
+
+	/* Free the oldest, most stale object to keep the free_list short */
+	freed = NULL;
+	if (!llist_empty(&i915->mm.free_list)) { /* quick test for hotpath */
+		/* Only one consumer of llist_del_first() allowed */
+		spin_lock(&i915->mm.free_lock);
+		freed = llist_del_first(&i915->mm.free_list);
+		spin_unlock(&i915->mm.free_lock);
+	}
+	if (unlikely(freed)) {
+		freed->next = NULL;
+		__i915_gem_free_objects(i915, freed);
+	}
+}
+
+static void __i915_gem_free_work(struct work_struct *work)
+{
+	struct drm_i915_private *i915 =
+		container_of(work, struct drm_i915_private, mm.free_work);
+	struct llist_node *freed;
+
+	/*
+	 * All file-owned VMA should have been released by this point through
+	 * i915_gem_close_object(), or earlier by i915_gem_context_close().
+	 * However, the object may also be bound into the global GTT (e.g.
+	 * older GPUs without per-process support, or for direct access through
+	 * the GTT either for the user or for scanout). Those VMA still need to
+	 * unbound now.
+	 */
+
+	spin_lock(&i915->mm.free_lock);
+	while ((freed = llist_del_all(&i915->mm.free_list))) {
+		spin_unlock(&i915->mm.free_lock);
+
+		__i915_gem_free_objects(i915, freed);
+		if (need_resched())
+			return;
+
+		spin_lock(&i915->mm.free_lock);
+	}
+	spin_unlock(&i915->mm.free_lock);
+}
+
+static void __i915_gem_free_object_rcu(struct rcu_head *head)
+{
+	struct drm_i915_gem_object *obj =
+		container_of(head, typeof(*obj), rcu);
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+
+	/*
+	 * We reuse obj->rcu for the freed list, so we had better not treat
+	 * it like a rcu_head from this point forwards. And we expect all
+	 * objects to be freed via this path.
+	 */
+	destroy_rcu_head(&obj->rcu);
+
+	/*
+	 * Since we require blocking on struct_mutex to unbind the freed
+	 * object from the GPU before releasing resources back to the
+	 * system, we can not do that directly from the RCU callback (which may
+	 * be a softirq context), but must instead then defer that work onto a
+	 * kthread. We use the RCU callback rather than move the freed object
+	 * directly onto the work queue so that we can mix between using the
+	 * worker and performing frees directly from subsequent allocations for
+	 * crude but effective memory throttling.
+	 */
+	if (llist_add(&obj->freed, &i915->mm.free_list))
+		queue_work(i915->wq, &i915->mm.free_work);
+}
+
+void i915_gem_free_object(struct drm_gem_object *gem_obj)
+{
+	struct drm_i915_gem_object *obj = to_intel_bo(gem_obj);
+
+	/*
+	 * Before we free the object, make sure any pure RCU-only
+	 * read-side critical sections are complete, e.g.
+	 * i915_gem_busy_ioctl(). For the corresponding synchronized
+	 * lookup see i915_gem_object_lookup_rcu().
+	 */
+	atomic_inc(&to_i915(obj->base.dev)->mm.free_count);
+	call_rcu(&obj->rcu, __i915_gem_free_object_rcu);
+}
+
+static inline enum fb_op_origin
+fb_write_origin(struct drm_i915_gem_object *obj, unsigned int domain)
+{
+	return (domain == I915_GEM_DOMAIN_GTT ?
+		obj->frontbuffer_ggtt_origin : ORIGIN_CPU);
+}
+
+static bool gpu_write_needs_clflush(struct drm_i915_gem_object *obj)
+{
+	return !(obj->cache_level == I915_CACHE_NONE ||
+		 obj->cache_level == I915_CACHE_WT);
+}
+
+void
+i915_gem_object_flush_write_domain(struct drm_i915_gem_object *obj,
+				   unsigned int flush_domains)
+{
+	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
+	struct i915_vma *vma;
+
+	assert_object_held(obj);
+
+	if (!(obj->write_domain & flush_domains))
+		return;
+
+	switch (obj->write_domain) {
+	case I915_GEM_DOMAIN_GTT:
+		i915_gem_flush_ggtt_writes(dev_priv);
+
+		intel_fb_obj_flush(obj,
+				   fb_write_origin(obj, I915_GEM_DOMAIN_GTT));
+
+		for_each_ggtt_vma(vma, obj) {
+			if (vma->iomap)
+				continue;
+
+			i915_vma_unset_ggtt_write(vma);
+		}
+		break;
+
+	case I915_GEM_DOMAIN_WC:
+		wmb();
+		break;
+
+	case I915_GEM_DOMAIN_CPU:
+		i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
+		break;
+
+	case I915_GEM_DOMAIN_RENDER:
+		if (gpu_write_needs_clflush(obj))
+			obj->cache_dirty = true;
+		break;
+	}
+
+	obj->write_domain = 0;
+}
+
+void i915_gem_init__objects(struct drm_i915_private *i915)
+{
+	INIT_WORK(&i915->mm.free_work, __i915_gem_free_work);
+}
+
+static void i915_global_objects_shrink(void)
+{
+	kmem_cache_shrink(global.slab_objects);
+}
+
+static void i915_global_objects_exit(void)
+{
+	kmem_cache_destroy(global.slab_objects);
+}
+
+static struct i915_global_object global = { {
+	.shrink = i915_global_objects_shrink,
+	.exit = i915_global_objects_exit,
+} };
+
+int __init i915_global_objects_init(void)
+{
+	global.slab_objects =
+		KMEM_CACHE(drm_i915_gem_object, SLAB_HWCACHE_ALIGN);
+	if (!global.slab_objects)
+		return -ENOMEM;
+
+	i915_global_register(&global.base);
+	return 0;
+}
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftests/huge_gem_object.c"
+#include "selftests/huge_pages.c"
+#include "selftests/i915_gem_object.c"
+#include "selftests/i915_gem_coherency.c"
+#endif
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h
new file mode 100644
index 000000000000..dfebd5706f16
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@@ -0,0 +1,430 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2016 Intel Corporation
+ */
+
+#ifndef __I915_GEM_OBJECT_H__
+#define __I915_GEM_OBJECT_H__
+
+#include <drm/drm_gem.h>
+#include <drm/drm_file.h>
+#include <drm/drm_device.h>
+
+#include <drm/i915_drm.h>
+
+#include "i915_gem_object_types.h"
+
+#include "i915_gem_gtt.h"
+
+void i915_gem_init__objects(struct drm_i915_private *i915);
+
+struct drm_i915_gem_object *i915_gem_object_alloc(void);
+void i915_gem_object_free(struct drm_i915_gem_object *obj);
+
+void i915_gem_object_init(struct drm_i915_gem_object *obj,
+			  const struct drm_i915_gem_object_ops *ops);
+struct drm_i915_gem_object *
+i915_gem_object_create_shmem(struct drm_i915_private *i915, u64 size);
+struct drm_i915_gem_object *
+i915_gem_object_create_shmem_from_data(struct drm_i915_private *i915,
+				       const void *data, size_t size);
+
+extern const struct drm_i915_gem_object_ops i915_gem_shmem_ops;
+void __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj,
+				     struct sg_table *pages,
+				     bool needs_clflush);
+
+int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, int align);
+
+void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file);
+void i915_gem_free_object(struct drm_gem_object *obj);
+
+void i915_gem_flush_free_objects(struct drm_i915_private *i915);
+
+struct sg_table *
+__i915_gem_object_unset_pages(struct drm_i915_gem_object *obj);
+void i915_gem_object_truncate(struct drm_i915_gem_object *obj);
+
+/**
+ * i915_gem_object_lookup_rcu - look up a temporary GEM object from its handle
+ * @filp: DRM file private date
+ * @handle: userspace handle
+ *
+ * Returns:
+ *
+ * A pointer to the object named by the handle if such exists on @filp, NULL
+ * otherwise. This object is only valid whilst under the RCU read lock, and
+ * note carefully the object may be in the process of being destroyed.
+ */
+static inline struct drm_i915_gem_object *
+i915_gem_object_lookup_rcu(struct drm_file *file, u32 handle)
+{
+#ifdef CONFIG_LOCKDEP
+	WARN_ON(debug_locks && !lock_is_held(&rcu_lock_map));
+#endif
+	return idr_find(&file->object_idr, handle);
+}
+
+static inline struct drm_i915_gem_object *
+i915_gem_object_lookup(struct drm_file *file, u32 handle)
+{
+	struct drm_i915_gem_object *obj;
+
+	rcu_read_lock();
+	obj = i915_gem_object_lookup_rcu(file, handle);
+	if (obj && !kref_get_unless_zero(&obj->base.refcount))
+		obj = NULL;
+	rcu_read_unlock();
+
+	return obj;
+}
+
+__deprecated
+extern struct drm_gem_object *
+drm_gem_object_lookup(struct drm_file *file, u32 handle);
+
+__attribute__((nonnull))
+static inline struct drm_i915_gem_object *
+i915_gem_object_get(struct drm_i915_gem_object *obj)
+{
+	drm_gem_object_get(&obj->base);
+	return obj;
+}
+
+__attribute__((nonnull))
+static inline void
+i915_gem_object_put(struct drm_i915_gem_object *obj)
+{
+	__drm_gem_object_put(&obj->base);
+}
+
+#define assert_object_held(obj) reservation_object_assert_held((obj)->base.resv)
+
+static inline void i915_gem_object_lock(struct drm_i915_gem_object *obj)
+{
+	reservation_object_lock(obj->base.resv, NULL);
+}
+
+static inline int
+i915_gem_object_lock_interruptible(struct drm_i915_gem_object *obj)
+{
+	return reservation_object_lock_interruptible(obj->base.resv, NULL);
+}
+
+static inline void i915_gem_object_unlock(struct drm_i915_gem_object *obj)
+{
+	reservation_object_unlock(obj->base.resv);
+}
+
+struct dma_fence *
+i915_gem_object_lock_fence(struct drm_i915_gem_object *obj);
+void i915_gem_object_unlock_fence(struct drm_i915_gem_object *obj,
+				  struct dma_fence *fence);
+
+static inline void
+i915_gem_object_set_readonly(struct drm_i915_gem_object *obj)
+{
+	obj->base.vma_node.readonly = true;
+}
+
+static inline bool
+i915_gem_object_is_readonly(const struct drm_i915_gem_object *obj)
+{
+	return obj->base.vma_node.readonly;
+}
+
+static inline bool
+i915_gem_object_has_struct_page(const struct drm_i915_gem_object *obj)
+{
+	return obj->ops->flags & I915_GEM_OBJECT_HAS_STRUCT_PAGE;
+}
+
+static inline bool
+i915_gem_object_is_shrinkable(const struct drm_i915_gem_object *obj)
+{
+	return obj->ops->flags & I915_GEM_OBJECT_IS_SHRINKABLE;
+}
+
+static inline bool
+i915_gem_object_is_proxy(const struct drm_i915_gem_object *obj)
+{
+	return obj->ops->flags & I915_GEM_OBJECT_IS_PROXY;
+}
+
+static inline bool
+i915_gem_object_needs_async_cancel(const struct drm_i915_gem_object *obj)
+{
+	return obj->ops->flags & I915_GEM_OBJECT_ASYNC_CANCEL;
+}
+
+static inline bool
+i915_gem_object_is_active(const struct drm_i915_gem_object *obj)
+{
+	return READ_ONCE(obj->active_count);
+}
+
+static inline bool
+i915_gem_object_is_framebuffer(const struct drm_i915_gem_object *obj)
+{
+	return READ_ONCE(obj->framebuffer_references);
+}
+
+static inline unsigned int
+i915_gem_object_get_tiling(const struct drm_i915_gem_object *obj)
+{
+	return obj->tiling_and_stride & TILING_MASK;
+}
+
+static inline bool
+i915_gem_object_is_tiled(const struct drm_i915_gem_object *obj)
+{
+	return i915_gem_object_get_tiling(obj) != I915_TILING_NONE;
+}
+
+static inline unsigned int
+i915_gem_object_get_stride(const struct drm_i915_gem_object *obj)
+{
+	return obj->tiling_and_stride & STRIDE_MASK;
+}
+
+static inline unsigned int
+i915_gem_tile_height(unsigned int tiling)
+{
+	GEM_BUG_ON(!tiling);
+	return tiling == I915_TILING_Y ? 32 : 8;
+}
+
+static inline unsigned int
+i915_gem_object_get_tile_height(const struct drm_i915_gem_object *obj)
+{
+	return i915_gem_tile_height(i915_gem_object_get_tiling(obj));
+}
+
+static inline unsigned int
+i915_gem_object_get_tile_row_size(const struct drm_i915_gem_object *obj)
+{
+	return (i915_gem_object_get_stride(obj) *
+		i915_gem_object_get_tile_height(obj));
+}
+
+int i915_gem_object_set_tiling(struct drm_i915_gem_object *obj,
+			       unsigned int tiling, unsigned int stride);
+
+struct scatterlist *
+i915_gem_object_get_sg(struct drm_i915_gem_object *obj,
+		       unsigned int n, unsigned int *offset);
+
+struct page *
+i915_gem_object_get_page(struct drm_i915_gem_object *obj,
+			 unsigned int n);
+
+struct page *
+i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj,
+			       unsigned int n);
+
+dma_addr_t
+i915_gem_object_get_dma_address_len(struct drm_i915_gem_object *obj,
+				    unsigned long n,
+				    unsigned int *len);
+
+dma_addr_t
+i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj,
+				unsigned long n);
+
+void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
+				 struct sg_table *pages,
+				 unsigned int sg_page_sizes);
+
+int ____i915_gem_object_get_pages(struct drm_i915_gem_object *obj);
+int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj);
+
+static inline int __must_check
+i915_gem_object_pin_pages(struct drm_i915_gem_object *obj)
+{
+	might_lock(&obj->mm.lock);
+
+	if (atomic_inc_not_zero(&obj->mm.pages_pin_count))
+		return 0;
+
+	return __i915_gem_object_get_pages(obj);
+}
+
+static inline bool
+i915_gem_object_has_pages(struct drm_i915_gem_object *obj)
+{
+	return !IS_ERR_OR_NULL(READ_ONCE(obj->mm.pages));
+}
+
+static inline void
+__i915_gem_object_pin_pages(struct drm_i915_gem_object *obj)
+{
+	GEM_BUG_ON(!i915_gem_object_has_pages(obj));
+
+	atomic_inc(&obj->mm.pages_pin_count);
+}
+
+static inline bool
+i915_gem_object_has_pinned_pages(struct drm_i915_gem_object *obj)
+{
+	return atomic_read(&obj->mm.pages_pin_count);
+}
+
+static inline void
+__i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj)
+{
+	GEM_BUG_ON(!i915_gem_object_has_pages(obj));
+	GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
+
+	atomic_dec(&obj->mm.pages_pin_count);
+}
+
+static inline void
+i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj)
+{
+	__i915_gem_object_unpin_pages(obj);
+}
+
+enum i915_mm_subclass { /* lockdep subclass for obj->mm.lock/struct_mutex */
+	I915_MM_NORMAL = 0,
+	I915_MM_SHRINKER /* called "recursively" from direct-reclaim-esque */
+};
+
+int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
+				enum i915_mm_subclass subclass);
+void i915_gem_object_truncate(struct drm_i915_gem_object *obj);
+void i915_gem_object_writeback(struct drm_i915_gem_object *obj);
+
+enum i915_map_type {
+	I915_MAP_WB = 0,
+	I915_MAP_WC,
+#define I915_MAP_OVERRIDE BIT(31)
+	I915_MAP_FORCE_WB = I915_MAP_WB | I915_MAP_OVERRIDE,
+	I915_MAP_FORCE_WC = I915_MAP_WC | I915_MAP_OVERRIDE,
+};
+
+/**
+ * i915_gem_object_pin_map - return a contiguous mapping of the entire object
+ * @obj: the object to map into kernel address space
+ * @type: the type of mapping, used to select pgprot_t
+ *
+ * Calls i915_gem_object_pin_pages() to prevent reaping of the object's
+ * pages and then returns a contiguous mapping of the backing storage into
+ * the kernel address space. Based on the @type of mapping, the PTE will be
+ * set to either WriteBack or WriteCombine (via pgprot_t).
+ *
+ * The caller is responsible for calling i915_gem_object_unpin_map() when the
+ * mapping is no longer required.
+ *
+ * Returns the pointer through which to access the mapped object, or an
+ * ERR_PTR() on error.
+ */
+void *__must_check i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
+					   enum i915_map_type type);
+
+void __i915_gem_object_flush_map(struct drm_i915_gem_object *obj,
+				 unsigned long offset,
+				 unsigned long size);
+static inline void i915_gem_object_flush_map(struct drm_i915_gem_object *obj)
+{
+	__i915_gem_object_flush_map(obj, 0, obj->base.size);
+}
+
+/**
+ * i915_gem_object_unpin_map - releases an earlier mapping
+ * @obj: the object to unmap
+ *
+ * After pinning the object and mapping its pages, once you are finished
+ * with your access, call i915_gem_object_unpin_map() to release the pin
+ * upon the mapping. Once the pin count reaches zero, that mapping may be
+ * removed.
+ */
+static inline void i915_gem_object_unpin_map(struct drm_i915_gem_object *obj)
+{
+	i915_gem_object_unpin_pages(obj);
+}
+
+void __i915_gem_object_release_mmap(struct drm_i915_gem_object *obj);
+void i915_gem_object_release_mmap(struct drm_i915_gem_object *obj);
+
+void
+i915_gem_object_flush_write_domain(struct drm_i915_gem_object *obj,
+				   unsigned int flush_domains);
+
+int i915_gem_object_prepare_read(struct drm_i915_gem_object *obj,
+				 unsigned int *needs_clflush);
+int i915_gem_object_prepare_write(struct drm_i915_gem_object *obj,
+				  unsigned int *needs_clflush);
+#define CLFLUSH_BEFORE	BIT(0)
+#define CLFLUSH_AFTER	BIT(1)
+#define CLFLUSH_FLAGS	(CLFLUSH_BEFORE | CLFLUSH_AFTER)
+
+static inline void
+i915_gem_object_finish_access(struct drm_i915_gem_object *obj)
+{
+	i915_gem_object_unpin_pages(obj);
+	i915_gem_object_unlock(obj);
+}
+
+static inline struct intel_engine_cs *
+i915_gem_object_last_write_engine(struct drm_i915_gem_object *obj)
+{
+	struct intel_engine_cs *engine = NULL;
+	struct dma_fence *fence;
+
+	rcu_read_lock();
+	fence = reservation_object_get_excl_rcu(obj->base.resv);
+	rcu_read_unlock();
+
+	if (fence && dma_fence_is_i915(fence) && !dma_fence_is_signaled(fence))
+		engine = to_request(fence)->engine;
+	dma_fence_put(fence);
+
+	return engine;
+}
+
+void i915_gem_object_set_cache_coherency(struct drm_i915_gem_object *obj,
+					 unsigned int cache_level);
+void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj);
+
+int __must_check
+i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write);
+int __must_check
+i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write);
+int __must_check
+i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write);
+struct i915_vma * __must_check
+i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
+				     u32 alignment,
+				     const struct i915_ggtt_view *view,
+				     unsigned int flags);
+void i915_gem_object_unpin_from_display_plane(struct i915_vma *vma);
+
+static inline bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
+{
+	if (obj->cache_dirty)
+		return false;
+
+	if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE))
+		return true;
+
+	return obj->pin_global; /* currently in use by HW, keep flushed */
+}
+
+static inline void __start_cpu_write(struct drm_i915_gem_object *obj)
+{
+	obj->read_domains = I915_GEM_DOMAIN_CPU;
+	obj->write_domain = I915_GEM_DOMAIN_CPU;
+	if (cpu_write_needs_clflush(obj))
+		obj->cache_dirty = true;
+}
+
+int i915_gem_object_wait(struct drm_i915_gem_object *obj,
+			 unsigned int flags,
+			 long timeout);
+int i915_gem_object_wait_priority(struct drm_i915_gem_object *obj,
+				  unsigned int flags,
+				  const struct i915_sched_attr *attr);
+#define I915_PRIORITY_DISPLAY I915_USER_PRIORITY(I915_PRIORITY_MAX)
+
+#endif
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c
new file mode 100644
index 000000000000..cb42e3a312e2
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c
@@ -0,0 +1,107 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include "i915_gem_object_blt.h"
+
+#include "i915_gem_clflush.h"
+#include "intel_drv.h"
+
+int intel_emit_vma_fill_blt(struct i915_request *rq,
+			    struct i915_vma *vma,
+			    u32 value)
+{
+	u32 *cs;
+
+	cs = intel_ring_begin(rq, 8);
+	if (IS_ERR(cs))
+		return PTR_ERR(cs);
+
+	if (INTEL_GEN(rq->i915) >= 8) {
+		*cs++ = XY_COLOR_BLT_CMD | BLT_WRITE_RGBA | (7 - 2);
+		*cs++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | PAGE_SIZE;
+		*cs++ = 0;
+		*cs++ = vma->size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4;
+		*cs++ = lower_32_bits(vma->node.start);
+		*cs++ = upper_32_bits(vma->node.start);
+		*cs++ = value;
+		*cs++ = MI_NOOP;
+	} else {
+		*cs++ = XY_COLOR_BLT_CMD | BLT_WRITE_RGBA | (6 - 2);
+		*cs++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | PAGE_SIZE;
+		*cs++ = 0;
+		*cs++ = vma->size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4;
+		*cs++ = vma->node.start;
+		*cs++ = value;
+		*cs++ = MI_NOOP;
+		*cs++ = MI_NOOP;
+	}
+
+	intel_ring_advance(rq, cs);
+
+	return 0;
+}
+
+int i915_gem_object_fill_blt(struct drm_i915_gem_object *obj,
+			     struct intel_context *ce,
+			     u32 value)
+{
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+	struct i915_gem_context *ctx = ce->gem_context;
+	struct i915_address_space *vm = ctx->vm ?: &i915->ggtt.vm;
+	struct i915_request *rq;
+	struct i915_vma *vma;
+	int err;
+
+	/* XXX: ce->vm please */
+	vma = i915_vma_instance(obj, vm, NULL);
+	if (IS_ERR(vma))
+		return PTR_ERR(vma);
+
+	err = i915_vma_pin(vma, 0, 0, PIN_USER);
+	if (unlikely(err))
+		return err;
+
+	if (obj->cache_dirty & ~obj->cache_coherent) {
+		i915_gem_object_lock(obj);
+		i915_gem_clflush_object(obj, 0);
+		i915_gem_object_unlock(obj);
+	}
+
+	rq = i915_request_create(ce);
+	if (IS_ERR(rq)) {
+		err = PTR_ERR(rq);
+		goto out_unpin;
+	}
+
+	err = i915_request_await_object(rq, obj, true);
+	if (unlikely(err))
+		goto out_request;
+
+	if (ce->engine->emit_init_breadcrumb) {
+		err = ce->engine->emit_init_breadcrumb(rq);
+		if (unlikely(err))
+			goto out_request;
+	}
+
+	i915_vma_lock(vma);
+	err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
+	i915_vma_unlock(vma);
+	if (unlikely(err))
+		goto out_request;
+
+	err = intel_emit_vma_fill_blt(rq, vma, value);
+out_request:
+	if (unlikely(err))
+		i915_request_skip(rq, err);
+
+	i915_request_add(rq);
+out_unpin:
+	i915_vma_unpin(vma);
+	return err;
+}
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftests/i915_gem_object_blt.c"
+#endif
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.h b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.h
new file mode 100644
index 000000000000..7ec7de6ac0c0
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef __I915_GEM_OBJECT_BLT_H__
+#define __I915_GEM_OBJECT_BLT_H__
+
+#include <linux/types.h>
+
+struct drm_i915_gem_object;
+struct intel_context;
+struct i915_request;
+struct i915_vma;
+
+int intel_emit_vma_fill_blt(struct i915_request *rq,
+			    struct i915_vma *vma,
+			    u32 value);
+
+int i915_gem_object_fill_blt(struct drm_i915_gem_object *obj,
+			     struct intel_context *ce,
+			     u32 value);
+
+#endif
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
new file mode 100644
index 000000000000..18bf4f8d6d80
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -0,0 +1,262 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2016 Intel Corporation
+ */
+
+#ifndef __I915_GEM_OBJECT_TYPES_H__
+#define __I915_GEM_OBJECT_TYPES_H__
+
+#include <drm/drm_gem.h>
+
+#include "i915_active.h"
+#include "i915_selftest.h"
+
+struct drm_i915_gem_object;
+
+/*
+ * struct i915_lut_handle tracks the fast lookups from handle to vma used
+ * for execbuf. Although we use a radixtree for that mapping, in order to
+ * remove them as the object or context is closed, we need a secondary list
+ * and a translation entry (i915_lut_handle).
+ */
+struct i915_lut_handle {
+	struct list_head obj_link;
+	struct i915_gem_context *ctx;
+	u32 handle;
+};
+
+struct drm_i915_gem_object_ops {
+	unsigned int flags;
+#define I915_GEM_OBJECT_HAS_STRUCT_PAGE	BIT(0)
+#define I915_GEM_OBJECT_IS_SHRINKABLE	BIT(1)
+#define I915_GEM_OBJECT_IS_PROXY	BIT(2)
+#define I915_GEM_OBJECT_ASYNC_CANCEL	BIT(3)
+
+	/* Interface between the GEM object and its backing storage.
+	 * get_pages() is called once prior to the use of the associated set
+	 * of pages before to binding them into the GTT, and put_pages() is
+	 * called after we no longer need them. As we expect there to be
+	 * associated cost with migrating pages between the backing storage
+	 * and making them available for the GPU (e.g. clflush), we may hold
+	 * onto the pages after they are no longer referenced by the GPU
+	 * in case they may be used again shortly (for example migrating the
+	 * pages to a different memory domain within the GTT). put_pages()
+	 * will therefore most likely be called when the object itself is
+	 * being released or under memory pressure (where we attempt to
+	 * reap pages for the shrinker).
+	 */
+	int (*get_pages)(struct drm_i915_gem_object *obj);
+	void (*put_pages)(struct drm_i915_gem_object *obj,
+			  struct sg_table *pages);
+	void (*truncate)(struct drm_i915_gem_object *obj);
+	void (*writeback)(struct drm_i915_gem_object *obj);
+
+	int (*pwrite)(struct drm_i915_gem_object *obj,
+		      const struct drm_i915_gem_pwrite *arg);
+
+	int (*dmabuf_export)(struct drm_i915_gem_object *obj);
+	void (*release)(struct drm_i915_gem_object *obj);
+};
+
+struct drm_i915_gem_object {
+	struct drm_gem_object base;
+
+	const struct drm_i915_gem_object_ops *ops;
+
+	struct {
+		/**
+		 * @vma.lock: protect the list/tree of vmas
+		 */
+		spinlock_t lock;
+
+		/**
+		 * @vma.list: List of VMAs backed by this object
+		 *
+		 * The VMA on this list are ordered by type, all GGTT vma are
+		 * placed at the head and all ppGTT vma are placed at the tail.
+		 * The different types of GGTT vma are unordered between
+		 * themselves, use the @vma.tree (which has a defined order
+		 * between all VMA) to quickly find an exact match.
+		 */
+		struct list_head list;
+
+		/**
+		 * @vma.tree: Ordered tree of VMAs backed by this object
+		 *
+		 * All VMA created for this object are placed in the @vma.tree
+		 * for fast retrieval via a binary search in
+		 * i915_vma_instance(). They are also added to @vma.list for
+		 * easy iteration.
+		 */
+		struct rb_root tree;
+	} vma;
+
+	/**
+	 * @lut_list: List of vma lookup entries in use for this object.
+	 *
+	 * If this object is closed, we need to remove all of its VMA from
+	 * the fast lookup index in associated contexts; @lut_list provides
+	 * this translation from object to context->handles_vma.
+	 */
+	struct list_head lut_list;
+
+	/** Stolen memory for this object, instead of being backed by shmem. */
+	struct drm_mm_node *stolen;
+	union {
+		struct rcu_head rcu;
+		struct llist_node freed;
+	};
+
+	/**
+	 * Whether the object is currently in the GGTT mmap.
+	 */
+	unsigned int userfault_count;
+	struct list_head userfault_link;
+
+	struct list_head batch_pool_link;
+	I915_SELFTEST_DECLARE(struct list_head st_link);
+
+	/*
+	 * Is the object to be mapped as read-only to the GPU
+	 * Only honoured if hardware has relevant pte bit
+	 */
+	unsigned int cache_level:3;
+	unsigned int cache_coherent:2;
+#define I915_BO_CACHE_COHERENT_FOR_READ BIT(0)
+#define I915_BO_CACHE_COHERENT_FOR_WRITE BIT(1)
+	unsigned int cache_dirty:1;
+
+	/**
+	 * @read_domains: Read memory domains.
+	 *
+	 * These monitor which caches contain read/write data related to the
+	 * object. When transitioning from one set of domains to another,
+	 * the driver is called to ensure that caches are suitably flushed and
+	 * invalidated.
+	 */
+	u16 read_domains;
+
+	/**
+	 * @write_domain: Corresponding unique write memory domain.
+	 */
+	u16 write_domain;
+
+	atomic_t frontbuffer_bits;
+	unsigned int frontbuffer_ggtt_origin; /* write once */
+	struct i915_active_request frontbuffer_write;
+
+	/** Current tiling stride for the object, if it's tiled. */
+	unsigned int tiling_and_stride;
+#define FENCE_MINIMUM_STRIDE 128 /* See i915_tiling_ok() */
+#define TILING_MASK (FENCE_MINIMUM_STRIDE - 1)
+#define STRIDE_MASK (~TILING_MASK)
+
+	/** Count of VMA actually bound by this object */
+	atomic_t bind_count;
+	unsigned int active_count;
+	/** Count of how many global VMA are currently pinned for use by HW */
+	unsigned int pin_global;
+
+	struct {
+		struct mutex lock; /* protects the pages and their use */
+		atomic_t pages_pin_count;
+
+		struct sg_table *pages;
+		void *mapping;
+
+		/* TODO: whack some of this into the error state */
+		struct i915_page_sizes {
+			/**
+			 * The sg mask of the pages sg_table. i.e the mask of
+			 * of the lengths for each sg entry.
+			 */
+			unsigned int phys;
+
+			/**
+			 * The gtt page sizes we are allowed to use given the
+			 * sg mask and the supported page sizes. This will
+			 * express the smallest unit we can use for the whole
+			 * object, as well as the larger sizes we may be able
+			 * to use opportunistically.
+			 */
+			unsigned int sg;
+
+			/**
+			 * The actual gtt page size usage. Since we can have
+			 * multiple vma associated with this object we need to
+			 * prevent any trampling of state, hence a copy of this
+			 * struct also lives in each vma, therefore the gtt
+			 * value here should only be read/write through the vma.
+			 */
+			unsigned int gtt;
+		} page_sizes;
+
+		I915_SELFTEST_DECLARE(unsigned int page_mask);
+
+		struct i915_gem_object_page_iter {
+			struct scatterlist *sg_pos;
+			unsigned int sg_idx; /* in pages, but 32bit eek! */
+
+			struct radix_tree_root radix;
+			struct mutex lock; /* protects this cache */
+		} get_page;
+
+		/**
+		 * Element within i915->mm.unbound_list or i915->mm.bound_list,
+		 * locked by i915->mm.obj_lock.
+		 */
+		struct list_head link;
+
+		/**
+		 * Advice: are the backing pages purgeable?
+		 */
+		unsigned int madv:2;
+
+		/**
+		 * This is set if the object has been written to since the
+		 * pages were last acquired.
+		 */
+		bool dirty:1;
+
+		/**
+		 * This is set if the object has been pinned due to unknown
+		 * swizzling.
+		 */
+		bool quirked:1;
+	} mm;
+
+	/** References from framebuffers, locks out tiling changes. */
+	unsigned int framebuffer_references;
+
+	/** Record of address bit 17 of each page at last unbind. */
+	unsigned long *bit_17;
+
+	union {
+		struct i915_gem_userptr {
+			uintptr_t ptr;
+
+			struct i915_mm_struct *mm;
+			struct i915_mmu_object *mmu_object;
+			struct work_struct *work;
+		} userptr;
+
+		unsigned long scratch;
+
+		void *gvt_info;
+	};
+
+	/** for phys allocated objects */
+	struct drm_dma_handle *phys_handle;
+};
+
+static inline struct drm_i915_gem_object *
+to_intel_bo(struct drm_gem_object *gem)
+{
+	/* Assert that to_intel_bo(NULL) == NULL */
+	BUILD_BUG_ON(offsetof(struct drm_i915_gem_object, base));
+
+	return container_of(gem, struct drm_i915_gem_object, base);
+}
+
+#endif
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
new file mode 100644
index 000000000000..b36ad269f4ea
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
@@ -0,0 +1,544 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2014-2016 Intel Corporation
+ */
+
+#include "i915_drv.h"
+#include "i915_gem_object.h"
+#include "i915_scatterlist.h"
+
+void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
+				 struct sg_table *pages,
+				 unsigned int sg_page_sizes)
+{
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+	unsigned long supported = INTEL_INFO(i915)->page_sizes;
+	int i;
+
+	lockdep_assert_held(&obj->mm.lock);
+
+	/* Make the pages coherent with the GPU (flushing any swapin). */
+	if (obj->cache_dirty) {
+		obj->write_domain = 0;
+		if (i915_gem_object_has_struct_page(obj))
+			drm_clflush_sg(pages);
+		obj->cache_dirty = false;
+	}
+
+	obj->mm.get_page.sg_pos = pages->sgl;
+	obj->mm.get_page.sg_idx = 0;
+
+	obj->mm.pages = pages;
+
+	if (i915_gem_object_is_tiled(obj) &&
+	    i915->quirks & QUIRK_PIN_SWIZZLED_PAGES) {
+		GEM_BUG_ON(obj->mm.quirked);
+		__i915_gem_object_pin_pages(obj);
+		obj->mm.quirked = true;
+	}
+
+	GEM_BUG_ON(!sg_page_sizes);
+	obj->mm.page_sizes.phys = sg_page_sizes;
+
+	/*
+	 * Calculate the supported page-sizes which fit into the given
+	 * sg_page_sizes. This will give us the page-sizes which we may be able
+	 * to use opportunistically when later inserting into the GTT. For
+	 * example if phys=2G, then in theory we should be able to use 1G, 2M,
+	 * 64K or 4K pages, although in practice this will depend on a number of
+	 * other factors.
+	 */
+	obj->mm.page_sizes.sg = 0;
+	for_each_set_bit(i, &supported, ilog2(I915_GTT_MAX_PAGE_SIZE) + 1) {
+		if (obj->mm.page_sizes.phys & ~0u << i)
+			obj->mm.page_sizes.sg |= BIT(i);
+	}
+	GEM_BUG_ON(!HAS_PAGE_SIZES(i915, obj->mm.page_sizes.sg));
+
+	if (i915_gem_object_is_shrinkable(obj)) {
+		struct list_head *list;
+		unsigned long flags;
+
+		spin_lock_irqsave(&i915->mm.obj_lock, flags);
+
+		i915->mm.shrink_count++;
+		i915->mm.shrink_memory += obj->base.size;
+
+		if (obj->mm.madv != I915_MADV_WILLNEED)
+			list = &i915->mm.purge_list;
+		else
+			list = &i915->mm.shrink_list;
+		list_add_tail(&obj->mm.link, list);
+
+		spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
+	}
+}
+
+int ____i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
+{
+	int err;
+
+	if (unlikely(obj->mm.madv != I915_MADV_WILLNEED)) {
+		DRM_DEBUG("Attempting to obtain a purgeable object\n");
+		return -EFAULT;
+	}
+
+	err = obj->ops->get_pages(obj);
+	GEM_BUG_ON(!err && !i915_gem_object_has_pages(obj));
+
+	return err;
+}
+
+/* Ensure that the associated pages are gathered from the backing storage
+ * and pinned into our object. i915_gem_object_pin_pages() may be called
+ * multiple times before they are released by a single call to
+ * i915_gem_object_unpin_pages() - once the pages are no longer referenced
+ * either as a result of memory pressure (reaping pages under the shrinker)
+ * or as the object is itself released.
+ */
+int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
+{
+	int err;
+
+	err = mutex_lock_interruptible(&obj->mm.lock);
+	if (err)
+		return err;
+
+	if (unlikely(!i915_gem_object_has_pages(obj))) {
+		GEM_BUG_ON(i915_gem_object_has_pinned_pages(obj));
+
+		err = ____i915_gem_object_get_pages(obj);
+		if (err)
+			goto unlock;
+
+		smp_mb__before_atomic();
+	}
+	atomic_inc(&obj->mm.pages_pin_count);
+
+unlock:
+	mutex_unlock(&obj->mm.lock);
+	return err;
+}
+
+/* Immediately discard the backing storage */
+void i915_gem_object_truncate(struct drm_i915_gem_object *obj)
+{
+	drm_gem_free_mmap_offset(&obj->base);
+	if (obj->ops->truncate)
+		obj->ops->truncate(obj);
+}
+
+/* Try to discard unwanted pages */
+void i915_gem_object_writeback(struct drm_i915_gem_object *obj)
+{
+	lockdep_assert_held(&obj->mm.lock);
+	GEM_BUG_ON(i915_gem_object_has_pages(obj));
+
+	if (obj->ops->writeback)
+		obj->ops->writeback(obj);
+}
+
+static void __i915_gem_object_reset_page_iter(struct drm_i915_gem_object *obj)
+{
+	struct radix_tree_iter iter;
+	void __rcu **slot;
+
+	rcu_read_lock();
+	radix_tree_for_each_slot(slot, &obj->mm.get_page.radix, &iter, 0)
+		radix_tree_delete(&obj->mm.get_page.radix, iter.index);
+	rcu_read_unlock();
+}
+
+struct sg_table *
+__i915_gem_object_unset_pages(struct drm_i915_gem_object *obj)
+{
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+	struct sg_table *pages;
+
+	pages = fetch_and_zero(&obj->mm.pages);
+	if (IS_ERR_OR_NULL(pages))
+		return pages;
+
+	if (i915_gem_object_is_shrinkable(obj)) {
+		unsigned long flags;
+
+		spin_lock_irqsave(&i915->mm.obj_lock, flags);
+
+		list_del(&obj->mm.link);
+		i915->mm.shrink_count--;
+		i915->mm.shrink_memory -= obj->base.size;
+
+		spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
+	}
+
+	if (obj->mm.mapping) {
+		void *ptr;
+
+		ptr = page_mask_bits(obj->mm.mapping);
+		if (is_vmalloc_addr(ptr))
+			vunmap(ptr);
+		else
+			kunmap(kmap_to_page(ptr));
+
+		obj->mm.mapping = NULL;
+	}
+
+	__i915_gem_object_reset_page_iter(obj);
+	obj->mm.page_sizes.phys = obj->mm.page_sizes.sg = 0;
+
+	return pages;
+}
+
+int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
+				enum i915_mm_subclass subclass)
+{
+	struct sg_table *pages;
+	int err;
+
+	if (i915_gem_object_has_pinned_pages(obj))
+		return -EBUSY;
+
+	GEM_BUG_ON(atomic_read(&obj->bind_count));
+
+	/* May be called by shrinker from within get_pages() (on another bo) */
+	mutex_lock_nested(&obj->mm.lock, subclass);
+	if (unlikely(atomic_read(&obj->mm.pages_pin_count))) {
+		err = -EBUSY;
+		goto unlock;
+	}
+
+	/*
+	 * ->put_pages might need to allocate memory for the bit17 swizzle
+	 * array, hence protect them from being reaped by removing them from gtt
+	 * lists early.
+	 */
+	pages = __i915_gem_object_unset_pages(obj);
+
+	/*
+	 * XXX Temporary hijinx to avoid updating all backends to handle
+	 * NULL pages. In the future, when we have more asynchronous
+	 * get_pages backends we should be better able to handle the
+	 * cancellation of the async task in a more uniform manner.
+	 */
+	if (!pages && !i915_gem_object_needs_async_cancel(obj))
+		pages = ERR_PTR(-EINVAL);
+
+	if (!IS_ERR(pages))
+		obj->ops->put_pages(obj, pages);
+
+	err = 0;
+unlock:
+	mutex_unlock(&obj->mm.lock);
+
+	return err;
+}
+
+/* The 'mapping' part of i915_gem_object_pin_map() below */
+static void *i915_gem_object_map(const struct drm_i915_gem_object *obj,
+				 enum i915_map_type type)
+{
+	unsigned long n_pages = obj->base.size >> PAGE_SHIFT;
+	struct sg_table *sgt = obj->mm.pages;
+	struct sgt_iter sgt_iter;
+	struct page *page;
+	struct page *stack_pages[32];
+	struct page **pages = stack_pages;
+	unsigned long i = 0;
+	pgprot_t pgprot;
+	void *addr;
+
+	/* A single page can always be kmapped */
+	if (n_pages == 1 && type == I915_MAP_WB)
+		return kmap(sg_page(sgt->sgl));
+
+	if (n_pages > ARRAY_SIZE(stack_pages)) {
+		/* Too big for stack -- allocate temporary array instead */
+		pages = kvmalloc_array(n_pages, sizeof(*pages), GFP_KERNEL);
+		if (!pages)
+			return NULL;
+	}
+
+	for_each_sgt_page(page, sgt_iter, sgt)
+		pages[i++] = page;
+
+	/* Check that we have the expected number of pages */
+	GEM_BUG_ON(i != n_pages);
+
+	switch (type) {
+	default:
+		MISSING_CASE(type);
+		/* fallthrough to use PAGE_KERNEL anyway */
+	case I915_MAP_WB:
+		pgprot = PAGE_KERNEL;
+		break;
+	case I915_MAP_WC:
+		pgprot = pgprot_writecombine(PAGE_KERNEL_IO);
+		break;
+	}
+	addr = vmap(pages, n_pages, 0, pgprot);
+
+	if (pages != stack_pages)
+		kvfree(pages);
+
+	return addr;
+}
+
+/* get, pin, and map the pages of the object into kernel space */
+void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
+			      enum i915_map_type type)
+{
+	enum i915_map_type has_type;
+	bool pinned;
+	void *ptr;
+	int err;
+
+	if (unlikely(!i915_gem_object_has_struct_page(obj)))
+		return ERR_PTR(-ENXIO);
+
+	err = mutex_lock_interruptible(&obj->mm.lock);
+	if (err)
+		return ERR_PTR(err);
+
+	pinned = !(type & I915_MAP_OVERRIDE);
+	type &= ~I915_MAP_OVERRIDE;
+
+	if (!atomic_inc_not_zero(&obj->mm.pages_pin_count)) {
+		if (unlikely(!i915_gem_object_has_pages(obj))) {
+			GEM_BUG_ON(i915_gem_object_has_pinned_pages(obj));
+
+			err = ____i915_gem_object_get_pages(obj);
+			if (err)
+				goto err_unlock;
+
+			smp_mb__before_atomic();
+		}
+		atomic_inc(&obj->mm.pages_pin_count);
+		pinned = false;
+	}
+	GEM_BUG_ON(!i915_gem_object_has_pages(obj));
+
+	ptr = page_unpack_bits(obj->mm.mapping, &has_type);
+	if (ptr && has_type != type) {
+		if (pinned) {
+			err = -EBUSY;
+			goto err_unpin;
+		}
+
+		if (is_vmalloc_addr(ptr))
+			vunmap(ptr);
+		else
+			kunmap(kmap_to_page(ptr));
+
+		ptr = obj->mm.mapping = NULL;
+	}
+
+	if (!ptr) {
+		ptr = i915_gem_object_map(obj, type);
+		if (!ptr) {
+			err = -ENOMEM;
+			goto err_unpin;
+		}
+
+		obj->mm.mapping = page_pack_bits(ptr, type);
+	}
+
+out_unlock:
+	mutex_unlock(&obj->mm.lock);
+	return ptr;
+
+err_unpin:
+	atomic_dec(&obj->mm.pages_pin_count);
+err_unlock:
+	ptr = ERR_PTR(err);
+	goto out_unlock;
+}
+
+void __i915_gem_object_flush_map(struct drm_i915_gem_object *obj,
+				 unsigned long offset,
+				 unsigned long size)
+{
+	enum i915_map_type has_type;
+	void *ptr;
+
+	GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
+	GEM_BUG_ON(range_overflows_t(typeof(obj->base.size),
+				     offset, size, obj->base.size));
+
+	obj->mm.dirty = true;
+
+	if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE)
+		return;
+
+	ptr = page_unpack_bits(obj->mm.mapping, &has_type);
+	if (has_type == I915_MAP_WC)
+		return;
+
+	drm_clflush_virt_range(ptr + offset, size);
+	if (size == obj->base.size) {
+		obj->write_domain &= ~I915_GEM_DOMAIN_CPU;
+		obj->cache_dirty = false;
+	}
+}
+
+struct scatterlist *
+i915_gem_object_get_sg(struct drm_i915_gem_object *obj,
+		       unsigned int n,
+		       unsigned int *offset)
+{
+	struct i915_gem_object_page_iter *iter = &obj->mm.get_page;
+	struct scatterlist *sg;
+	unsigned int idx, count;
+
+	might_sleep();
+	GEM_BUG_ON(n >= obj->base.size >> PAGE_SHIFT);
+	GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
+
+	/* As we iterate forward through the sg, we record each entry in a
+	 * radixtree for quick repeated (backwards) lookups. If we have seen
+	 * this index previously, we will have an entry for it.
+	 *
+	 * Initial lookup is O(N), but this is amortized to O(1) for
+	 * sequential page access (where each new request is consecutive
+	 * to the previous one). Repeated lookups are O(lg(obj->base.size)),
+	 * i.e. O(1) with a large constant!
+	 */
+	if (n < READ_ONCE(iter->sg_idx))
+		goto lookup;
+
+	mutex_lock(&iter->lock);
+
+	/* We prefer to reuse the last sg so that repeated lookup of this
+	 * (or the subsequent) sg are fast - comparing against the last
+	 * sg is faster than going through the radixtree.
+	 */
+
+	sg = iter->sg_pos;
+	idx = iter->sg_idx;
+	count = __sg_page_count(sg);
+
+	while (idx + count <= n) {
+		void *entry;
+		unsigned long i;
+		int ret;
+
+		/* If we cannot allocate and insert this entry, or the
+		 * individual pages from this range, cancel updating the
+		 * sg_idx so that on this lookup we are forced to linearly
+		 * scan onwards, but on future lookups we will try the
+		 * insertion again (in which case we need to be careful of
+		 * the error return reporting that we have already inserted
+		 * this index).
+		 */
+		ret = radix_tree_insert(&iter->radix, idx, sg);
+		if (ret && ret != -EEXIST)
+			goto scan;
+
+		entry = xa_mk_value(idx);
+		for (i = 1; i < count; i++) {
+			ret = radix_tree_insert(&iter->radix, idx + i, entry);
+			if (ret && ret != -EEXIST)
+				goto scan;
+		}
+
+		idx += count;
+		sg = ____sg_next(sg);
+		count = __sg_page_count(sg);
+	}
+
+scan:
+	iter->sg_pos = sg;
+	iter->sg_idx = idx;
+
+	mutex_unlock(&iter->lock);
+
+	if (unlikely(n < idx)) /* insertion completed by another thread */
+		goto lookup;
+
+	/* In case we failed to insert the entry into the radixtree, we need
+	 * to look beyond the current sg.
+	 */
+	while (idx + count <= n) {
+		idx += count;
+		sg = ____sg_next(sg);
+		count = __sg_page_count(sg);
+	}
+
+	*offset = n - idx;
+	return sg;
+
+lookup:
+	rcu_read_lock();
+
+	sg = radix_tree_lookup(&iter->radix, n);
+	GEM_BUG_ON(!sg);
+
+	/* If this index is in the middle of multi-page sg entry,
+	 * the radix tree will contain a value entry that points
+	 * to the start of that range. We will return the pointer to
+	 * the base page and the offset of this page within the
+	 * sg entry's range.
+	 */
+	*offset = 0;
+	if (unlikely(xa_is_value(sg))) {
+		unsigned long base = xa_to_value(sg);
+
+		sg = radix_tree_lookup(&iter->radix, base);
+		GEM_BUG_ON(!sg);
+
+		*offset = n - base;
+	}
+
+	rcu_read_unlock();
+
+	return sg;
+}
+
+struct page *
+i915_gem_object_get_page(struct drm_i915_gem_object *obj, unsigned int n)
+{
+	struct scatterlist *sg;
+	unsigned int offset;
+
+	GEM_BUG_ON(!i915_gem_object_has_struct_page(obj));
+
+	sg = i915_gem_object_get_sg(obj, n, &offset);
+	return nth_page(sg_page(sg), offset);
+}
+
+/* Like i915_gem_object_get_page(), but mark the returned page dirty */
+struct page *
+i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj,
+			       unsigned int n)
+{
+	struct page *page;
+
+	page = i915_gem_object_get_page(obj, n);
+	if (!obj->mm.dirty)
+		set_page_dirty(page);
+
+	return page;
+}
+
+dma_addr_t
+i915_gem_object_get_dma_address_len(struct drm_i915_gem_object *obj,
+				    unsigned long n,
+				    unsigned int *len)
+{
+	struct scatterlist *sg;
+	unsigned int offset;
+
+	sg = i915_gem_object_get_sg(obj, n, &offset);
+
+	if (len)
+		*len = sg_dma_len(sg) - (offset << PAGE_SHIFT);
+
+	return sg_dma_address(sg) + (offset << PAGE_SHIFT);
+}
+
+dma_addr_t
+i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj,
+				unsigned long n)
+{
+	return i915_gem_object_get_dma_address_len(obj, n, NULL);
+}
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_phys.c b/drivers/gpu/drm/i915/gem/i915_gem_phys.c
new file mode 100644
index 000000000000..2deac933cf59
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_phys.c
@@ -0,0 +1,212 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2014-2016 Intel Corporation
+ */
+
+#include <linux/highmem.h>
+#include <linux/shmem_fs.h>
+#include <linux/swap.h>
+
+#include <drm/drm.h> /* for drm_legacy.h! */
+#include <drm/drm_cache.h>
+#include <drm/drm_legacy.h> /* for drm_pci.h! */
+#include <drm/drm_pci.h>
+
+#include "i915_drv.h"
+#include "i915_gem_object.h"
+#include "i915_scatterlist.h"
+
+static int i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj)
+{
+	struct address_space *mapping = obj->base.filp->f_mapping;
+	struct drm_dma_handle *phys;
+	struct sg_table *st;
+	struct scatterlist *sg;
+	char *vaddr;
+	int i;
+	int err;
+
+	if (WARN_ON(i915_gem_object_needs_bit17_swizzle(obj)))
+		return -EINVAL;
+
+	/* Always aligning to the object size, allows a single allocation
+	 * to handle all possible callers, and given typical object sizes,
+	 * the alignment of the buddy allocation will naturally match.
+	 */
+	phys = drm_pci_alloc(obj->base.dev,
+			     roundup_pow_of_two(obj->base.size),
+			     roundup_pow_of_two(obj->base.size));
+	if (!phys)
+		return -ENOMEM;
+
+	vaddr = phys->vaddr;
+	for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
+		struct page *page;
+		char *src;
+
+		page = shmem_read_mapping_page(mapping, i);
+		if (IS_ERR(page)) {
+			err = PTR_ERR(page);
+			goto err_phys;
+		}
+
+		src = kmap_atomic(page);
+		memcpy(vaddr, src, PAGE_SIZE);
+		drm_clflush_virt_range(vaddr, PAGE_SIZE);
+		kunmap_atomic(src);
+
+		put_page(page);
+		vaddr += PAGE_SIZE;
+	}
+
+	i915_gem_chipset_flush(to_i915(obj->base.dev));
+
+	st = kmalloc(sizeof(*st), GFP_KERNEL);
+	if (!st) {
+		err = -ENOMEM;
+		goto err_phys;
+	}
+
+	if (sg_alloc_table(st, 1, GFP_KERNEL)) {
+		kfree(st);
+		err = -ENOMEM;
+		goto err_phys;
+	}
+
+	sg = st->sgl;
+	sg->offset = 0;
+	sg->length = obj->base.size;
+
+	sg_dma_address(sg) = phys->busaddr;
+	sg_dma_len(sg) = obj->base.size;
+
+	obj->phys_handle = phys;
+
+	__i915_gem_object_set_pages(obj, st, sg->length);
+
+	return 0;
+
+err_phys:
+	drm_pci_free(obj->base.dev, phys);
+
+	return err;
+}
+
+static void
+i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj,
+			       struct sg_table *pages)
+{
+	__i915_gem_object_release_shmem(obj, pages, false);
+
+	if (obj->mm.dirty) {
+		struct address_space *mapping = obj->base.filp->f_mapping;
+		char *vaddr = obj->phys_handle->vaddr;
+		int i;
+
+		for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
+			struct page *page;
+			char *dst;
+
+			page = shmem_read_mapping_page(mapping, i);
+			if (IS_ERR(page))
+				continue;
+
+			dst = kmap_atomic(page);
+			drm_clflush_virt_range(vaddr, PAGE_SIZE);
+			memcpy(dst, vaddr, PAGE_SIZE);
+			kunmap_atomic(dst);
+
+			set_page_dirty(page);
+			if (obj->mm.madv == I915_MADV_WILLNEED)
+				mark_page_accessed(page);
+			put_page(page);
+			vaddr += PAGE_SIZE;
+		}
+		obj->mm.dirty = false;
+	}
+
+	sg_free_table(pages);
+	kfree(pages);
+
+	drm_pci_free(obj->base.dev, obj->phys_handle);
+}
+
+static void
+i915_gem_object_release_phys(struct drm_i915_gem_object *obj)
+{
+	i915_gem_object_unpin_pages(obj);
+}
+
+static const struct drm_i915_gem_object_ops i915_gem_phys_ops = {
+	.get_pages = i915_gem_object_get_pages_phys,
+	.put_pages = i915_gem_object_put_pages_phys,
+	.release = i915_gem_object_release_phys,
+};
+
+int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, int align)
+{
+	struct sg_table *pages;
+	int err;
+
+	if (align > obj->base.size)
+		return -EINVAL;
+
+	if (obj->ops == &i915_gem_phys_ops)
+		return 0;
+
+	if (obj->ops != &i915_gem_shmem_ops)
+		return -EINVAL;
+
+	err = i915_gem_object_unbind(obj);
+	if (err)
+		return err;
+
+	mutex_lock(&obj->mm.lock);
+
+	if (obj->mm.madv != I915_MADV_WILLNEED) {
+		err = -EFAULT;
+		goto err_unlock;
+	}
+
+	if (obj->mm.quirked) {
+		err = -EFAULT;
+		goto err_unlock;
+	}
+
+	if (obj->mm.mapping) {
+		err = -EBUSY;
+		goto err_unlock;
+	}
+
+	pages = __i915_gem_object_unset_pages(obj);
+
+	obj->ops = &i915_gem_phys_ops;
+
+	err = ____i915_gem_object_get_pages(obj);
+	if (err)
+		goto err_xfer;
+
+	/* Perma-pin (until release) the physical set of pages */
+	__i915_gem_object_pin_pages(obj);
+
+	if (!IS_ERR_OR_NULL(pages))
+		i915_gem_shmem_ops.put_pages(obj, pages);
+	mutex_unlock(&obj->mm.lock);
+	return 0;
+
+err_xfer:
+	obj->ops = &i915_gem_shmem_ops;
+	if (!IS_ERR_OR_NULL(pages)) {
+		unsigned int sg_page_sizes = i915_sg_page_sizes(pages->sgl);
+
+		__i915_gem_object_set_pages(obj, pages, sg_page_sizes);
+	}
+err_unlock:
+	mutex_unlock(&obj->mm.lock);
+	return err;
+}
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftests/i915_gem_phys.c"
+#endif
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
new file mode 100644
index 000000000000..05011d4a3b88
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
@@ -0,0 +1,294 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include "gem/i915_gem_pm.h"
+#include "gt/intel_gt_pm.h"
+
+#include "i915_drv.h"
+#include "i915_globals.h"
+
+static void call_idle_barriers(struct intel_engine_cs *engine)
+{
+	struct llist_node *node, *next;
+
+	llist_for_each_safe(node, next, llist_del_all(&engine->barrier_tasks)) {
+		struct i915_active_request *active =
+			container_of((struct list_head *)node,
+				     typeof(*active), link);
+
+		INIT_LIST_HEAD(&active->link);
+		RCU_INIT_POINTER(active->request, NULL);
+
+		active->retire(active, NULL);
+	}
+}
+
+static void i915_gem_park(struct drm_i915_private *i915)
+{
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+
+	lockdep_assert_held(&i915->drm.struct_mutex);
+
+	for_each_engine(engine, i915, id) {
+		call_idle_barriers(engine); /* cleanup after wedging */
+		i915_gem_batch_pool_fini(&engine->batch_pool);
+	}
+
+	i915_timelines_park(i915);
+	i915_vma_parked(i915);
+
+	i915_globals_park();
+}
+
+static void idle_work_handler(struct work_struct *work)
+{
+	struct drm_i915_private *i915 =
+		container_of(work, typeof(*i915), gem.idle_work);
+	bool park;
+
+	cancel_delayed_work_sync(&i915->gem.retire_work);
+	mutex_lock(&i915->drm.struct_mutex);
+
+	intel_wakeref_lock(&i915->gt.wakeref);
+	park = !intel_wakeref_active(&i915->gt.wakeref) && !work_pending(work);
+	intel_wakeref_unlock(&i915->gt.wakeref);
+	if (park)
+		i915_gem_park(i915);
+	else
+		queue_delayed_work(i915->wq,
+				   &i915->gem.retire_work,
+				   round_jiffies_up_relative(HZ));
+
+	mutex_unlock(&i915->drm.struct_mutex);
+}
+
+static void retire_work_handler(struct work_struct *work)
+{
+	struct drm_i915_private *i915 =
+		container_of(work, typeof(*i915), gem.retire_work.work);
+
+	/* Come back later if the device is busy... */
+	if (mutex_trylock(&i915->drm.struct_mutex)) {
+		i915_retire_requests(i915);
+		mutex_unlock(&i915->drm.struct_mutex);
+	}
+
+	queue_delayed_work(i915->wq,
+			   &i915->gem.retire_work,
+			   round_jiffies_up_relative(HZ));
+}
+
+static int pm_notifier(struct notifier_block *nb,
+		       unsigned long action,
+		       void *data)
+{
+	struct drm_i915_private *i915 =
+		container_of(nb, typeof(*i915), gem.pm_notifier);
+
+	switch (action) {
+	case INTEL_GT_UNPARK:
+		i915_globals_unpark();
+		queue_delayed_work(i915->wq,
+				   &i915->gem.retire_work,
+				   round_jiffies_up_relative(HZ));
+		break;
+
+	case INTEL_GT_PARK:
+		queue_work(i915->wq, &i915->gem.idle_work);
+		break;
+	}
+
+	return NOTIFY_OK;
+}
+
+static bool switch_to_kernel_context_sync(struct drm_i915_private *i915)
+{
+	bool result = !i915_terminally_wedged(i915);
+
+	do {
+		if (i915_gem_wait_for_idle(i915,
+					   I915_WAIT_LOCKED |
+					   I915_WAIT_FOR_IDLE_BOOST,
+					   I915_GEM_IDLE_TIMEOUT) == -ETIME) {
+			/* XXX hide warning from gem_eio */
+			if (i915_modparams.reset) {
+				dev_err(i915->drm.dev,
+					"Failed to idle engines, declaring wedged!\n");
+				GEM_TRACE_DUMP();
+			}
+
+			/*
+			 * Forcibly cancel outstanding work and leave
+			 * the gpu quiet.
+			 */
+			i915_gem_set_wedged(i915);
+			result = false;
+		}
+	} while (i915_retire_requests(i915) && result);
+
+	GEM_BUG_ON(i915->gt.awake);
+	return result;
+}
+
+bool i915_gem_load_power_context(struct drm_i915_private *i915)
+{
+	return switch_to_kernel_context_sync(i915);
+}
+
+void i915_gem_suspend(struct drm_i915_private *i915)
+{
+	GEM_TRACE("\n");
+
+	intel_wakeref_auto(&i915->ggtt.userfault_wakeref, 0);
+	flush_workqueue(i915->wq);
+
+	mutex_lock(&i915->drm.struct_mutex);
+
+	/*
+	 * We have to flush all the executing contexts to main memory so
+	 * that they can saved in the hibernation image. To ensure the last
+	 * context image is coherent, we have to switch away from it. That
+	 * leaves the i915->kernel_context still active when
+	 * we actually suspend, and its image in memory may not match the GPU
+	 * state. Fortunately, the kernel_context is disposable and we do
+	 * not rely on its state.
+	 */
+	switch_to_kernel_context_sync(i915);
+
+	mutex_unlock(&i915->drm.struct_mutex);
+
+	/*
+	 * Assert that we successfully flushed all the work and
+	 * reset the GPU back to its idle, low power state.
+	 */
+	GEM_BUG_ON(i915->gt.awake);
+	flush_work(&i915->gem.idle_work);
+
+	cancel_delayed_work_sync(&i915->gpu_error.hangcheck_work);
+
+	i915_gem_drain_freed_objects(i915);
+
+	intel_uc_suspend(i915);
+}
+
+static struct drm_i915_gem_object *first_mm_object(struct list_head *list)
+{
+	return list_first_entry_or_null(list,
+					struct drm_i915_gem_object,
+					mm.link);
+}
+
+void i915_gem_suspend_late(struct drm_i915_private *i915)
+{
+	struct drm_i915_gem_object *obj;
+	struct list_head *phases[] = {
+		&i915->mm.shrink_list,
+		&i915->mm.purge_list,
+		NULL
+	}, **phase;
+	unsigned long flags;
+
+	/*
+	 * Neither the BIOS, ourselves or any other kernel
+	 * expects the system to be in execlists mode on startup,
+	 * so we need to reset the GPU back to legacy mode. And the only
+	 * known way to disable logical contexts is through a GPU reset.
+	 *
+	 * So in order to leave the system in a known default configuration,
+	 * always reset the GPU upon unload and suspend. Afterwards we then
+	 * clean up the GEM state tracking, flushing off the requests and
+	 * leaving the system in a known idle state.
+	 *
+	 * Note that is of the upmost importance that the GPU is idle and
+	 * all stray writes are flushed *before* we dismantle the backing
+	 * storage for the pinned objects.
+	 *
+	 * However, since we are uncertain that resetting the GPU on older
+	 * machines is a good idea, we don't - just in case it leaves the
+	 * machine in an unusable condition.
+	 */
+
+	spin_lock_irqsave(&i915->mm.obj_lock, flags);
+	for (phase = phases; *phase; phase++) {
+		LIST_HEAD(keep);
+
+		while ((obj = first_mm_object(*phase))) {
+			list_move_tail(&obj->mm.link, &keep);
+
+			/* Beware the background _i915_gem_free_objects */
+			if (!kref_get_unless_zero(&obj->base.refcount))
+				continue;
+
+			spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
+
+			i915_gem_object_lock(obj);
+			WARN_ON(i915_gem_object_set_to_gtt_domain(obj, false));
+			i915_gem_object_unlock(obj);
+			i915_gem_object_put(obj);
+
+			spin_lock_irqsave(&i915->mm.obj_lock, flags);
+		}
+
+		list_splice_tail(&keep, *phase);
+	}
+	spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
+
+	intel_uc_sanitize(i915);
+	i915_gem_sanitize(i915);
+}
+
+void i915_gem_resume(struct drm_i915_private *i915)
+{
+	GEM_TRACE("\n");
+
+	WARN_ON(i915->gt.awake);
+
+	mutex_lock(&i915->drm.struct_mutex);
+	intel_uncore_forcewake_get(&i915->uncore, FORCEWAKE_ALL);
+
+	i915_gem_restore_gtt_mappings(i915);
+	i915_gem_restore_fences(i915);
+
+	/*
+	 * As we didn't flush the kernel context before suspend, we cannot
+	 * guarantee that the context image is complete. So let's just reset
+	 * it and start again.
+	 */
+	intel_gt_resume(i915);
+
+	if (i915_gem_init_hw(i915))
+		goto err_wedged;
+
+	intel_uc_resume(i915);
+
+	/* Always reload a context for powersaving. */
+	if (!i915_gem_load_power_context(i915))
+		goto err_wedged;
+
+out_unlock:
+	intel_uncore_forcewake_put(&i915->uncore, FORCEWAKE_ALL);
+	mutex_unlock(&i915->drm.struct_mutex);
+	return;
+
+err_wedged:
+	if (!i915_reset_failed(i915)) {
+		dev_err(i915->drm.dev,
+			"Failed to re-initialize GPU, declaring it wedged!\n");
+		i915_gem_set_wedged(i915);
+	}
+	goto out_unlock;
+}
+
+void i915_gem_init__pm(struct drm_i915_private *i915)
+{
+	INIT_WORK(&i915->gem.idle_work, idle_work_handler);
+	INIT_DELAYED_WORK(&i915->gem.retire_work, retire_work_handler);
+
+	i915->gem.pm_notifier.notifier_call = pm_notifier;
+	blocking_notifier_chain_register(&i915->gt.pm_notifications,
+					 &i915->gem.pm_notifier);
+}
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.h b/drivers/gpu/drm/i915/gem/i915_gem_pm.h
new file mode 100644
index 000000000000..6f7d5d11ac3b
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.h
@@ -0,0 +1,25 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef __I915_GEM_PM_H__
+#define __I915_GEM_PM_H__
+
+#include <linux/types.h>
+
+struct drm_i915_private;
+struct work_struct;
+
+void i915_gem_init__pm(struct drm_i915_private *i915);
+
+bool i915_gem_load_power_context(struct drm_i915_private *i915);
+void i915_gem_resume(struct drm_i915_private *i915);
+
+void i915_gem_idle_work_handler(struct work_struct *work);
+
+void i915_gem_suspend(struct drm_i915_private *i915);
+void i915_gem_suspend_late(struct drm_i915_private *i915);
+
+#endif /* __I915_GEM_PM_H__ */
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
new file mode 100644
index 000000000000..19d9ecdb2894
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
@@ -0,0 +1,571 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2014-2016 Intel Corporation
+ */
+
+#include <linux/pagevec.h>
+#include <linux/swap.h>
+
+#include "i915_drv.h"
+#include "i915_gem_object.h"
+#include "i915_scatterlist.h"
+
+/*
+ * Move pages to appropriate lru and release the pagevec, decrementing the
+ * ref count of those pages.
+ */
+static void check_release_pagevec(struct pagevec *pvec)
+{
+	check_move_unevictable_pages(pvec);
+	__pagevec_release(pvec);
+	cond_resched();
+}
+
+static int shmem_get_pages(struct drm_i915_gem_object *obj)
+{
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+	const unsigned long page_count = obj->base.size / PAGE_SIZE;
+	unsigned long i;
+	struct address_space *mapping;
+	struct sg_table *st;
+	struct scatterlist *sg;
+	struct sgt_iter sgt_iter;
+	struct page *page;
+	unsigned long last_pfn = 0;	/* suppress gcc warning */
+	unsigned int max_segment = i915_sg_segment_size();
+	unsigned int sg_page_sizes;
+	struct pagevec pvec;
+	gfp_t noreclaim;
+	int ret;
+
+	/*
+	 * Assert that the object is not currently in any GPU domain. As it
+	 * wasn't in the GTT, there shouldn't be any way it could have been in
+	 * a GPU cache
+	 */
+	GEM_BUG_ON(obj->read_domains & I915_GEM_GPU_DOMAINS);
+	GEM_BUG_ON(obj->write_domain & I915_GEM_GPU_DOMAINS);
+
+	/*
+	 * If there's no chance of allocating enough pages for the whole
+	 * object, bail early.
+	 */
+	if (page_count > totalram_pages())
+		return -ENOMEM;
+
+	st = kmalloc(sizeof(*st), GFP_KERNEL);
+	if (!st)
+		return -ENOMEM;
+
+rebuild_st:
+	if (sg_alloc_table(st, page_count, GFP_KERNEL)) {
+		kfree(st);
+		return -ENOMEM;
+	}
+
+	/*
+	 * Get the list of pages out of our struct file.  They'll be pinned
+	 * at this point until we release them.
+	 *
+	 * Fail silently without starting the shrinker
+	 */
+	mapping = obj->base.filp->f_mapping;
+	mapping_set_unevictable(mapping);
+	noreclaim = mapping_gfp_constraint(mapping, ~__GFP_RECLAIM);
+	noreclaim |= __GFP_NORETRY | __GFP_NOWARN;
+
+	sg = st->sgl;
+	st->nents = 0;
+	sg_page_sizes = 0;
+	for (i = 0; i < page_count; i++) {
+		const unsigned int shrink[] = {
+			I915_SHRINK_BOUND | I915_SHRINK_UNBOUND,
+			0,
+		}, *s = shrink;
+		gfp_t gfp = noreclaim;
+
+		do {
+			cond_resched();
+			page = shmem_read_mapping_page_gfp(mapping, i, gfp);
+			if (!IS_ERR(page))
+				break;
+
+			if (!*s) {
+				ret = PTR_ERR(page);
+				goto err_sg;
+			}
+
+			i915_gem_shrink(i915, 2 * page_count, NULL, *s++);
+
+			/*
+			 * We've tried hard to allocate the memory by reaping
+			 * our own buffer, now let the real VM do its job and
+			 * go down in flames if truly OOM.
+			 *
+			 * However, since graphics tend to be disposable,
+			 * defer the oom here by reporting the ENOMEM back
+			 * to userspace.
+			 */
+			if (!*s) {
+				/* reclaim and warn, but no oom */
+				gfp = mapping_gfp_mask(mapping);
+
+				/*
+				 * Our bo are always dirty and so we require
+				 * kswapd to reclaim our pages (direct reclaim
+				 * does not effectively begin pageout of our
+				 * buffers on its own). However, direct reclaim
+				 * only waits for kswapd when under allocation
+				 * congestion. So as a result __GFP_RECLAIM is
+				 * unreliable and fails to actually reclaim our
+				 * dirty pages -- unless you try over and over
+				 * again with !__GFP_NORETRY. However, we still
+				 * want to fail this allocation rather than
+				 * trigger the out-of-memory killer and for
+				 * this we want __GFP_RETRY_MAYFAIL.
+				 */
+				gfp |= __GFP_RETRY_MAYFAIL;
+			}
+		} while (1);
+
+		if (!i ||
+		    sg->length >= max_segment ||
+		    page_to_pfn(page) != last_pfn + 1) {
+			if (i) {
+				sg_page_sizes |= sg->length;
+				sg = sg_next(sg);
+			}
+			st->nents++;
+			sg_set_page(sg, page, PAGE_SIZE, 0);
+		} else {
+			sg->length += PAGE_SIZE;
+		}
+		last_pfn = page_to_pfn(page);
+
+		/* Check that the i965g/gm workaround works. */
+		WARN_ON((gfp & __GFP_DMA32) && (last_pfn >= 0x00100000UL));
+	}
+	if (sg) { /* loop terminated early; short sg table */
+		sg_page_sizes |= sg->length;
+		sg_mark_end(sg);
+	}
+
+	/* Trim unused sg entries to avoid wasting memory. */
+	i915_sg_trim(st);
+
+	ret = i915_gem_gtt_prepare_pages(obj, st);
+	if (ret) {
+		/*
+		 * DMA remapping failed? One possible cause is that
+		 * it could not reserve enough large entries, asking
+		 * for PAGE_SIZE chunks instead may be helpful.
+		 */
+		if (max_segment > PAGE_SIZE) {
+			for_each_sgt_page(page, sgt_iter, st)
+				put_page(page);
+			sg_free_table(st);
+
+			max_segment = PAGE_SIZE;
+			goto rebuild_st;
+		} else {
+			dev_warn(&i915->drm.pdev->dev,
+				 "Failed to DMA remap %lu pages\n",
+				 page_count);
+			goto err_pages;
+		}
+	}
+
+	if (i915_gem_object_needs_bit17_swizzle(obj))
+		i915_gem_object_do_bit_17_swizzle(obj, st);
+
+	__i915_gem_object_set_pages(obj, st, sg_page_sizes);
+
+	return 0;
+
+err_sg:
+	sg_mark_end(sg);
+err_pages:
+	mapping_clear_unevictable(mapping);
+	pagevec_init(&pvec);
+	for_each_sgt_page(page, sgt_iter, st) {
+		if (!pagevec_add(&pvec, page))
+			check_release_pagevec(&pvec);
+	}
+	if (pagevec_count(&pvec))
+		check_release_pagevec(&pvec);
+	sg_free_table(st);
+	kfree(st);
+
+	/*
+	 * shmemfs first checks if there is enough memory to allocate the page
+	 * and reports ENOSPC should there be insufficient, along with the usual
+	 * ENOMEM for a genuine allocation failure.
+	 *
+	 * We use ENOSPC in our driver to mean that we have run out of aperture
+	 * space and so want to translate the error from shmemfs back to our
+	 * usual understanding of ENOMEM.
+	 */
+	if (ret == -ENOSPC)
+		ret = -ENOMEM;
+
+	return ret;
+}
+
+static void
+shmem_truncate(struct drm_i915_gem_object *obj)
+{
+	/*
+	 * Our goal here is to return as much of the memory as
+	 * is possible back to the system as we are called from OOM.
+	 * To do this we must instruct the shmfs to drop all of its
+	 * backing pages, *now*.
+	 */
+	shmem_truncate_range(file_inode(obj->base.filp), 0, (loff_t)-1);
+	obj->mm.madv = __I915_MADV_PURGED;
+	obj->mm.pages = ERR_PTR(-EFAULT);
+}
+
+static void
+shmem_writeback(struct drm_i915_gem_object *obj)
+{
+	struct address_space *mapping;
+	struct writeback_control wbc = {
+		.sync_mode = WB_SYNC_NONE,
+		.nr_to_write = SWAP_CLUSTER_MAX,
+		.range_start = 0,
+		.range_end = LLONG_MAX,
+		.for_reclaim = 1,
+	};
+	unsigned long i;
+
+	/*
+	 * Leave mmapings intact (GTT will have been revoked on unbinding,
+	 * leaving only CPU mmapings around) and add those pages to the LRU
+	 * instead of invoking writeback so they are aged and paged out
+	 * as normal.
+	 */
+	mapping = obj->base.filp->f_mapping;
+
+	/* Begin writeback on each dirty page */
+	for (i = 0; i < obj->base.size >> PAGE_SHIFT; i++) {
+		struct page *page;
+
+		page = find_lock_entry(mapping, i);
+		if (!page || xa_is_value(page))
+			continue;
+
+		if (!page_mapped(page) && clear_page_dirty_for_io(page)) {
+			int ret;
+
+			SetPageReclaim(page);
+			ret = mapping->a_ops->writepage(page, &wbc);
+			if (!PageWriteback(page))
+				ClearPageReclaim(page);
+			if (!ret)
+				goto put;
+		}
+		unlock_page(page);
+put:
+		put_page(page);
+	}
+}
+
+void
+__i915_gem_object_release_shmem(struct drm_i915_gem_object *obj,
+				struct sg_table *pages,
+				bool needs_clflush)
+{
+	GEM_BUG_ON(obj->mm.madv == __I915_MADV_PURGED);
+
+	if (obj->mm.madv == I915_MADV_DONTNEED)
+		obj->mm.dirty = false;
+
+	if (needs_clflush &&
+	    (obj->read_domains & I915_GEM_DOMAIN_CPU) == 0 &&
+	    !(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ))
+		drm_clflush_sg(pages);
+
+	__start_cpu_write(obj);
+}
+
+static void
+shmem_put_pages(struct drm_i915_gem_object *obj, struct sg_table *pages)
+{
+	struct sgt_iter sgt_iter;
+	struct pagevec pvec;
+	struct page *page;
+
+	__i915_gem_object_release_shmem(obj, pages, true);
+
+	i915_gem_gtt_finish_pages(obj, pages);
+
+	if (i915_gem_object_needs_bit17_swizzle(obj))
+		i915_gem_object_save_bit_17_swizzle(obj, pages);
+
+	mapping_clear_unevictable(file_inode(obj->base.filp)->i_mapping);
+
+	pagevec_init(&pvec);
+	for_each_sgt_page(page, sgt_iter, pages) {
+		if (obj->mm.dirty)
+			set_page_dirty(page);
+
+		if (obj->mm.madv == I915_MADV_WILLNEED)
+			mark_page_accessed(page);
+
+		if (!pagevec_add(&pvec, page))
+			check_release_pagevec(&pvec);
+	}
+	if (pagevec_count(&pvec))
+		check_release_pagevec(&pvec);
+	obj->mm.dirty = false;
+
+	sg_free_table(pages);
+	kfree(pages);
+}
+
+static int
+shmem_pwrite(struct drm_i915_gem_object *obj,
+	     const struct drm_i915_gem_pwrite *arg)
+{
+	struct address_space *mapping = obj->base.filp->f_mapping;
+	char __user *user_data = u64_to_user_ptr(arg->data_ptr);
+	u64 remain, offset;
+	unsigned int pg;
+
+	/* Caller already validated user args */
+	GEM_BUG_ON(!access_ok(user_data, arg->size));
+
+	/*
+	 * Before we instantiate/pin the backing store for our use, we
+	 * can prepopulate the shmemfs filp efficiently using a write into
+	 * the pagecache. We avoid the penalty of instantiating all the
+	 * pages, important if the user is just writing to a few and never
+	 * uses the object on the GPU, and using a direct write into shmemfs
+	 * allows it to avoid the cost of retrieving a page (either swapin
+	 * or clearing-before-use) before it is overwritten.
+	 */
+	if (i915_gem_object_has_pages(obj))
+		return -ENODEV;
+
+	if (obj->mm.madv != I915_MADV_WILLNEED)
+		return -EFAULT;
+
+	/*
+	 * Before the pages are instantiated the object is treated as being
+	 * in the CPU domain. The pages will be clflushed as required before
+	 * use, and we can freely write into the pages directly. If userspace
+	 * races pwrite with any other operation; corruption will ensue -
+	 * that is userspace's prerogative!
+	 */
+
+	remain = arg->size;
+	offset = arg->offset;
+	pg = offset_in_page(offset);
+
+	do {
+		unsigned int len, unwritten;
+		struct page *page;
+		void *data, *vaddr;
+		int err;
+		char c;
+
+		len = PAGE_SIZE - pg;
+		if (len > remain)
+			len = remain;
+
+		/* Prefault the user page to reduce potential recursion */
+		err = __get_user(c, user_data);
+		if (err)
+			return err;
+
+		err = __get_user(c, user_data + len - 1);
+		if (err)
+			return err;
+
+		err = pagecache_write_begin(obj->base.filp, mapping,
+					    offset, len, 0,
+					    &page, &data);
+		if (err < 0)
+			return err;
+
+		vaddr = kmap_atomic(page);
+		unwritten = __copy_from_user_inatomic(vaddr + pg,
+						      user_data,
+						      len);
+		kunmap_atomic(vaddr);
+
+		err = pagecache_write_end(obj->base.filp, mapping,
+					  offset, len, len - unwritten,
+					  page, data);
+		if (err < 0)
+			return err;
+
+		/* We don't handle -EFAULT, leave it to the caller to check */
+		if (unwritten)
+			return -ENODEV;
+
+		remain -= len;
+		user_data += len;
+		offset += len;
+		pg = 0;
+	} while (remain);
+
+	return 0;
+}
+
+const struct drm_i915_gem_object_ops i915_gem_shmem_ops = {
+	.flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE |
+		 I915_GEM_OBJECT_IS_SHRINKABLE,
+
+	.get_pages = shmem_get_pages,
+	.put_pages = shmem_put_pages,
+	.truncate = shmem_truncate,
+	.writeback = shmem_writeback,
+
+	.pwrite = shmem_pwrite,
+};
+
+static int create_shmem(struct drm_i915_private *i915,
+			struct drm_gem_object *obj,
+			size_t size)
+{
+	unsigned long flags = VM_NORESERVE;
+	struct file *filp;
+
+	drm_gem_private_object_init(&i915->drm, obj, size);
+
+	if (i915->mm.gemfs)
+		filp = shmem_file_setup_with_mnt(i915->mm.gemfs, "i915", size,
+						 flags);
+	else
+		filp = shmem_file_setup("i915", size, flags);
+	if (IS_ERR(filp))
+		return PTR_ERR(filp);
+
+	obj->filp = filp;
+	return 0;
+}
+
+struct drm_i915_gem_object *
+i915_gem_object_create_shmem(struct drm_i915_private *i915, u64 size)
+{
+	struct drm_i915_gem_object *obj;
+	struct address_space *mapping;
+	unsigned int cache_level;
+	gfp_t mask;
+	int ret;
+
+	/* There is a prevalence of the assumption that we fit the object's
+	 * page count inside a 32bit _signed_ variable. Let's document this and
+	 * catch if we ever need to fix it. In the meantime, if you do spot
+	 * such a local variable, please consider fixing!
+	 */
+	if (size >> PAGE_SHIFT > INT_MAX)
+		return ERR_PTR(-E2BIG);
+
+	if (overflows_type(size, obj->base.size))
+		return ERR_PTR(-E2BIG);
+
+	obj = i915_gem_object_alloc();
+	if (!obj)
+		return ERR_PTR(-ENOMEM);
+
+	ret = create_shmem(i915, &obj->base, size);
+	if (ret)
+		goto fail;
+
+	mask = GFP_HIGHUSER | __GFP_RECLAIMABLE;
+	if (IS_I965GM(i915) || IS_I965G(i915)) {
+		/* 965gm cannot relocate objects above 4GiB. */
+		mask &= ~__GFP_HIGHMEM;
+		mask |= __GFP_DMA32;
+	}
+
+	mapping = obj->base.filp->f_mapping;
+	mapping_set_gfp_mask(mapping, mask);
+	GEM_BUG_ON(!(mapping_gfp_mask(mapping) & __GFP_RECLAIM));
+
+	i915_gem_object_init(obj, &i915_gem_shmem_ops);
+
+	obj->write_domain = I915_GEM_DOMAIN_CPU;
+	obj->read_domains = I915_GEM_DOMAIN_CPU;
+
+	if (HAS_LLC(i915))
+		/* On some devices, we can have the GPU use the LLC (the CPU
+		 * cache) for about a 10% performance improvement
+		 * compared to uncached.  Graphics requests other than
+		 * display scanout are coherent with the CPU in
+		 * accessing this cache.  This means in this mode we
+		 * don't need to clflush on the CPU side, and on the
+		 * GPU side we only need to flush internal caches to
+		 * get data visible to the CPU.
+		 *
+		 * However, we maintain the display planes as UC, and so
+		 * need to rebind when first used as such.
+		 */
+		cache_level = I915_CACHE_LLC;
+	else
+		cache_level = I915_CACHE_NONE;
+
+	i915_gem_object_set_cache_coherency(obj, cache_level);
+
+	trace_i915_gem_object_create(obj);
+
+	return obj;
+
+fail:
+	i915_gem_object_free(obj);
+	return ERR_PTR(ret);
+}
+
+/* Allocate a new GEM object and fill it with the supplied data */
+struct drm_i915_gem_object *
+i915_gem_object_create_shmem_from_data(struct drm_i915_private *dev_priv,
+				       const void *data, size_t size)
+{
+	struct drm_i915_gem_object *obj;
+	struct file *file;
+	size_t offset;
+	int err;
+
+	obj = i915_gem_object_create_shmem(dev_priv, round_up(size, PAGE_SIZE));
+	if (IS_ERR(obj))
+		return obj;
+
+	GEM_BUG_ON(obj->write_domain != I915_GEM_DOMAIN_CPU);
+
+	file = obj->base.filp;
+	offset = 0;
+	do {
+		unsigned int len = min_t(typeof(size), size, PAGE_SIZE);
+		struct page *page;
+		void *pgdata, *vaddr;
+
+		err = pagecache_write_begin(file, file->f_mapping,
+					    offset, len, 0,
+					    &page, &pgdata);
+		if (err < 0)
+			goto fail;
+
+		vaddr = kmap(page);
+		memcpy(vaddr, data, len);
+		kunmap(page);
+
+		err = pagecache_write_end(file, file->f_mapping,
+					  offset, len, len,
+					  page, pgdata);
+		if (err < 0)
+			goto fail;
+
+		size -= len;
+		data += len;
+		offset += len;
+	} while (size);
+
+	return obj;
+
+fail:
+	i915_gem_object_put(obj);
+	return ERR_PTR(err);
+}
diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
index 6da795c7e62e..3a926a8755c6 100644
--- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
@@ -1,25 +1,7 @@
 /*
- * Copyright © 2008-2015 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
+ * SPDX-License-Identifier: MIT
  *
+ * Copyright © 2008-2015 Intel Corporation
  */
 
 #include <linux/oom.h>
@@ -32,7 +14,6 @@
 #include <linux/vmalloc.h>
 #include <drm/i915_drm.h>
 
-#include "i915_drv.h"
 #include "i915_trace.h"
 
 static bool shrinker_lock(struct drm_i915_private *i915,
@@ -88,7 +69,7 @@ static bool can_release_pages(struct drm_i915_gem_object *obj)
 	 * to the GPU, simply unbinding from the GPU is not going to succeed
 	 * in releasing our pin count on the pages themselves.
 	 */
-	if (atomic_read(&obj->mm.pages_pin_count) > obj->bind_count)
+	if (atomic_read(&obj->mm.pages_pin_count) > atomic_read(&obj->bind_count))
 		return false;
 
 	/* If any vma are "permanently" pinned, it will prevent us from
@@ -114,12 +95,26 @@ static bool unsafe_drop_pages(struct drm_i915_gem_object *obj)
 	return !i915_gem_object_has_pages(obj);
 }
 
+static void try_to_writeback(struct drm_i915_gem_object *obj,
+			     unsigned int flags)
+{
+	switch (obj->mm.madv) {
+	case I915_MADV_DONTNEED:
+		i915_gem_object_truncate(obj);
+	case __I915_MADV_PURGED:
+		return;
+	}
+
+	if (flags & I915_SHRINK_WRITEBACK)
+		i915_gem_object_writeback(obj);
+}
+
 /**
  * i915_gem_shrink - Shrink buffer object caches
  * @i915: i915 device
  * @target: amount of memory to make available, in pages
  * @nr_scanned: optional output for number of pages scanned (incremental)
- * @flags: control flags for selecting cache types
+ * @shrink: control flags for selecting cache types
  *
  * This function is the main interface to the shrinker. It will try to release
  * up to @target pages of main memory backing storage from buffer objects.
@@ -143,14 +138,17 @@ unsigned long
 i915_gem_shrink(struct drm_i915_private *i915,
 		unsigned long target,
 		unsigned long *nr_scanned,
-		unsigned flags)
+		unsigned int shrink)
 {
 	const struct {
 		struct list_head *list;
 		unsigned int bit;
 	} phases[] = {
-		{ &i915->mm.unbound_list, I915_SHRINK_UNBOUND },
-		{ &i915->mm.bound_list, I915_SHRINK_BOUND },
+		{ &i915->mm.purge_list, ~0u },
+		{
+			&i915->mm.shrink_list,
+			I915_SHRINK_BOUND | I915_SHRINK_UNBOUND
+		},
 		{ NULL, 0 },
 	}, *phase;
 	intel_wakeref_t wakeref = 0;
@@ -158,24 +156,19 @@ i915_gem_shrink(struct drm_i915_private *i915,
 	unsigned long scanned = 0;
 	bool unlock;
 
-	if (!shrinker_lock(i915, flags, &unlock))
+	if (!shrinker_lock(i915, shrink, &unlock))
 		return 0;
 
 	/*
-	 * When shrinking the active list, also consider active contexts.
-	 * Active contexts are pinned until they are retired, and so can
-	 * not be simply unbound to retire and unpin their pages. To shrink
-	 * the contexts, we must wait until the gpu is idle.
-	 *
-	 * We don't care about errors here; if we cannot wait upon the GPU,
-	 * we will free as much as we can and hope to get a second chance.
+	 * When shrinking the active list, we should also consider active
+	 * contexts. Active contexts are pinned until they are retired, and
+	 * so can not be simply unbound to retire and unpin their pages. To
+	 * shrink the contexts, we must wait until the gpu is idle and
+	 * completed its switch to the kernel context. In short, we do
+	 * not have a good mechanism for idling a specific context.
 	 */
-	if (flags & I915_SHRINK_ACTIVE)
-		i915_gem_wait_for_idle(i915,
-				       I915_WAIT_LOCKED,
-				       MAX_SCHEDULE_TIMEOUT);
 
-	trace_i915_gem_shrink(i915, target, flags);
+	trace_i915_gem_shrink(i915, target, shrink);
 	i915_retire_requests(i915);
 
 	/*
@@ -183,10 +176,10 @@ i915_gem_shrink(struct drm_i915_private *i915,
 	 * device just to recover a little memory. If absolutely necessary,
 	 * we will force the wake during oom-notifier.
 	 */
-	if (flags & I915_SHRINK_BOUND) {
-		wakeref = intel_runtime_pm_get_if_in_use(i915);
+	if (shrink & I915_SHRINK_BOUND) {
+		wakeref = intel_runtime_pm_get_if_in_use(&i915->runtime_pm);
 		if (!wakeref)
-			flags &= ~I915_SHRINK_BOUND;
+			shrink &= ~I915_SHRINK_BOUND;
 	}
 
 	/*
@@ -211,8 +204,9 @@ i915_gem_shrink(struct drm_i915_private *i915,
 	for (phase = phases; phase->list; phase++) {
 		struct list_head still_in_list;
 		struct drm_i915_gem_object *obj;
+		unsigned long flags;
 
-		if ((flags & phase->bit) == 0)
+		if ((shrink & phase->bit) == 0)
 			continue;
 
 		INIT_LIST_HEAD(&still_in_list);
@@ -224,51 +218,56 @@ i915_gem_shrink(struct drm_i915_private *i915,
 		 * to be able to shrink their pages, so they remain on
 		 * the unbound/bound list until actually freed.
 		 */
-		spin_lock(&i915->mm.obj_lock);
+		spin_lock_irqsave(&i915->mm.obj_lock, flags);
 		while (count < target &&
 		       (obj = list_first_entry_or_null(phase->list,
 						       typeof(*obj),
 						       mm.link))) {
 			list_move_tail(&obj->mm.link, &still_in_list);
 
-			if (flags & I915_SHRINK_PURGEABLE &&
-			    obj->mm.madv != I915_MADV_DONTNEED)
-				continue;
-
-			if (flags & I915_SHRINK_VMAPS &&
+			if (shrink & I915_SHRINK_VMAPS &&
 			    !is_vmalloc_addr(obj->mm.mapping))
 				continue;
 
-			if (!(flags & I915_SHRINK_ACTIVE) &&
+			if (!(shrink & I915_SHRINK_ACTIVE) &&
 			    (i915_gem_object_is_active(obj) ||
 			     i915_gem_object_is_framebuffer(obj)))
 				continue;
 
+			if (!(shrink & I915_SHRINK_BOUND) &&
+			    atomic_read(&obj->bind_count))
+				continue;
+
 			if (!can_release_pages(obj))
 				continue;
 
-			spin_unlock(&i915->mm.obj_lock);
+			if (!kref_get_unless_zero(&obj->base.refcount))
+				continue;
+
+			spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
 
 			if (unsafe_drop_pages(obj)) {
 				/* May arrive from get_pages on another bo */
 				mutex_lock_nested(&obj->mm.lock,
 						  I915_MM_SHRINKER);
 				if (!i915_gem_object_has_pages(obj)) {
-					__i915_gem_object_invalidate(obj);
+					try_to_writeback(obj, shrink);
 					count += obj->base.size >> PAGE_SHIFT;
 				}
 				mutex_unlock(&obj->mm.lock);
 			}
+
 			scanned += obj->base.size >> PAGE_SHIFT;
+			i915_gem_object_put(obj);
 
-			spin_lock(&i915->mm.obj_lock);
+			spin_lock_irqsave(&i915->mm.obj_lock, flags);
 		}
 		list_splice_tail(&still_in_list, phase->list);
-		spin_unlock(&i915->mm.obj_lock);
+		spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
 	}
 
-	if (flags & I915_SHRINK_BOUND)
-		intel_runtime_pm_put(i915, wakeref);
+	if (shrink & I915_SHRINK_BOUND)
+		intel_runtime_pm_put(&i915->runtime_pm, wakeref);
 
 	i915_retire_requests(i915);
 
@@ -298,7 +297,7 @@ unsigned long i915_gem_shrink_all(struct drm_i915_private *i915)
 	intel_wakeref_t wakeref;
 	unsigned long freed = 0;
 
-	with_intel_runtime_pm(i915, wakeref) {
+	with_intel_runtime_pm(&i915->runtime_pm, wakeref) {
 		freed = i915_gem_shrink(i915, -1UL, NULL,
 					I915_SHRINK_BOUND |
 					I915_SHRINK_UNBOUND |
@@ -313,25 +312,14 @@ i915_gem_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc)
 {
 	struct drm_i915_private *i915 =
 		container_of(shrinker, struct drm_i915_private, mm.shrinker);
-	struct drm_i915_gem_object *obj;
-	unsigned long num_objects = 0;
-	unsigned long count = 0;
+	unsigned long num_objects;
+	unsigned long count;
 
-	spin_lock(&i915->mm.obj_lock);
-	list_for_each_entry(obj, &i915->mm.unbound_list, mm.link)
-		if (can_release_pages(obj)) {
-			count += obj->base.size >> PAGE_SHIFT;
-			num_objects++;
-		}
+	count = READ_ONCE(i915->mm.shrink_memory) >> PAGE_SHIFT;
+	num_objects = READ_ONCE(i915->mm.shrink_count);
 
-	list_for_each_entry(obj, &i915->mm.bound_list, mm.link)
-		if (!i915_gem_object_is_active(obj) && can_release_pages(obj)) {
-			count += obj->base.size >> PAGE_SHIFT;
-			num_objects++;
-		}
-	spin_unlock(&i915->mm.obj_lock);
-
-	/* Update our preferred vmscan batch size for the next pass.
+	/*
+	 * Update our preferred vmscan batch size for the next pass.
 	 * Our rough guess for an effective batch size is roughly 2
 	 * available GEM objects worth of pages. That is we don't want
 	 * the shrinker to fire, until it is worth the cost of freeing an
@@ -366,23 +354,18 @@ i915_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc)
 				&sc->nr_scanned,
 				I915_SHRINK_BOUND |
 				I915_SHRINK_UNBOUND |
-				I915_SHRINK_PURGEABLE);
-	if (sc->nr_scanned < sc->nr_to_scan)
-		freed += i915_gem_shrink(i915,
-					 sc->nr_to_scan - sc->nr_scanned,
-					 &sc->nr_scanned,
-					 I915_SHRINK_BOUND |
-					 I915_SHRINK_UNBOUND);
+				I915_SHRINK_WRITEBACK);
 	if (sc->nr_scanned < sc->nr_to_scan && current_is_kswapd()) {
 		intel_wakeref_t wakeref;
 
-		with_intel_runtime_pm(i915, wakeref) {
+		with_intel_runtime_pm(&i915->runtime_pm, wakeref) {
 			freed += i915_gem_shrink(i915,
 						 sc->nr_to_scan - sc->nr_scanned,
 						 &sc->nr_scanned,
 						 I915_SHRINK_ACTIVE |
 						 I915_SHRINK_BOUND |
-						 I915_SHRINK_UNBOUND);
+						 I915_SHRINK_UNBOUND |
+						 I915_SHRINK_WRITEBACK);
 		}
 	}
 
@@ -397,39 +380,35 @@ i915_gem_shrinker_oom(struct notifier_block *nb, unsigned long event, void *ptr)
 	struct drm_i915_private *i915 =
 		container_of(nb, struct drm_i915_private, mm.oom_notifier);
 	struct drm_i915_gem_object *obj;
-	unsigned long unevictable, bound, unbound, freed_pages;
+	unsigned long unevictable, available, freed_pages;
 	intel_wakeref_t wakeref;
+	unsigned long flags;
 
 	freed_pages = 0;
-	with_intel_runtime_pm(i915, wakeref)
+	with_intel_runtime_pm(&i915->runtime_pm, wakeref)
 		freed_pages += i915_gem_shrink(i915, -1UL, NULL,
 					       I915_SHRINK_BOUND |
-					       I915_SHRINK_UNBOUND);
+					       I915_SHRINK_UNBOUND |
+					       I915_SHRINK_WRITEBACK);
 
 	/* Because we may be allocating inside our own driver, we cannot
 	 * assert that there are no objects with pinned pages that are not
 	 * being pointed to by hardware.
 	 */
-	unbound = bound = unevictable = 0;
-	spin_lock(&i915->mm.obj_lock);
-	list_for_each_entry(obj, &i915->mm.unbound_list, mm.link) {
-		if (!can_release_pages(obj))
-			unevictable += obj->base.size >> PAGE_SHIFT;
-		else
-			unbound += obj->base.size >> PAGE_SHIFT;
-	}
-	list_for_each_entry(obj, &i915->mm.bound_list, mm.link) {
+	available = unevictable = 0;
+	spin_lock_irqsave(&i915->mm.obj_lock, flags);
+	list_for_each_entry(obj, &i915->mm.shrink_list, mm.link) {
 		if (!can_release_pages(obj))
 			unevictable += obj->base.size >> PAGE_SHIFT;
 		else
-			bound += obj->base.size >> PAGE_SHIFT;
+			available += obj->base.size >> PAGE_SHIFT;
 	}
-	spin_unlock(&i915->mm.obj_lock);
+	spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
 
-	if (freed_pages || unbound || bound)
+	if (freed_pages || available)
 		pr_info("Purging GPU memory, %lu pages freed, "
-			"%lu pages still pinned.\n",
-			freed_pages, unevictable);
+			"%lu pages still pinned, %lu pages left available.\n",
+			freed_pages, unevictable, available);
 
 	*(unsigned long *)ptr += freed_pages;
 	return NOTIFY_DONE;
@@ -454,7 +433,7 @@ i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr
 				   MAX_SCHEDULE_TIMEOUT))
 		goto out;
 
-	with_intel_runtime_pm(i915, wakeref)
+	with_intel_runtime_pm(&i915->runtime_pm, wakeref)
 		freed_pages += i915_gem_shrink(i915, -1UL, NULL,
 					       I915_SHRINK_BOUND |
 					       I915_SHRINK_UNBOUND |
diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
index 0a8082cfc761..de1fab2058ec 100644
--- a/drivers/gpu/drm/i915/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
@@ -1,32 +1,15 @@
 /*
- * Copyright © 2008-2012 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * Authors:
- *    Eric Anholt <eric@anholt.net>
- *    Chris Wilson <chris@chris-wilson.co.uk>
+ * SPDX-License-Identifier: MIT
  *
+ * Copyright © 2008-2012 Intel Corporation
  */
 
+#include <linux/errno.h>
+#include <linux/mutex.h>
+
+#include <drm/drm_mm.h>
 #include <drm/i915_drm.h>
+
 #include "i915_drv.h"
 
 /*
@@ -342,11 +325,11 @@ static void bdw_get_stolen_reserved(struct drm_i915_private *dev_priv,
 	*size = stolen_top - *base;
 }
 
-static void icl_get_stolen_reserved(struct drm_i915_private *dev_priv,
+static void icl_get_stolen_reserved(struct drm_i915_private *i915,
 				    resource_size_t *base,
 				    resource_size_t *size)
 {
-	u64 reg_val = I915_READ64(GEN6_STOLEN_RESERVED);
+	u64 reg_val = intel_uncore_read64(&i915->uncore, GEN6_STOLEN_RESERVED);
 
 	DRM_DEBUG_DRIVER("GEN6_STOLEN_RESERVED = 0x%016llx\n", reg_val);
 
@@ -706,10 +689,8 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv
 	list_move_tail(&vma->vm_link, &ggtt->vm.bound_list);
 	mutex_unlock(&ggtt->vm.mutex);
 
-	spin_lock(&dev_priv->mm.obj_lock);
-	list_move_tail(&obj->mm.link, &dev_priv->mm.bound_list);
-	obj->bind_count++;
-	spin_unlock(&dev_priv->mm.obj_lock);
+	GEM_BUG_ON(i915_gem_object_is_shrinkable(obj));
+	atomic_inc(&obj->bind_count);
 
 	return obj;
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_throttle.c b/drivers/gpu/drm/i915/gem/i915_gem_throttle.c
new file mode 100644
index 000000000000..adb3074d9ce2
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_throttle.c
@@ -0,0 +1,73 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2014-2016 Intel Corporation
+ */
+
+#include <linux/jiffies.h>
+
+#include <drm/drm_file.h>
+
+#include "i915_drv.h"
+#include "i915_gem_ioctls.h"
+#include "i915_gem_object.h"
+
+/*
+ * 20ms is a fairly arbitrary limit (greater than the average frame time)
+ * chosen to prevent the CPU getting more than a frame ahead of the GPU
+ * (when using lax throttling for the frontbuffer). We also use it to
+ * offer free GPU waitboosts for severely congested workloads.
+ */
+#define DRM_I915_THROTTLE_JIFFIES msecs_to_jiffies(20)
+
+/*
+ * Throttle our rendering by waiting until the ring has completed our requests
+ * emitted over 20 msec ago.
+ *
+ * Note that if we were to use the current jiffies each time around the loop,
+ * we wouldn't escape the function with any frames outstanding if the time to
+ * render a frame was over 20ms.
+ *
+ * This should get us reasonable parallelism between CPU and GPU but also
+ * relatively low latency when blocking on a particular request to finish.
+ */
+int
+i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
+			struct drm_file *file)
+{
+	struct drm_i915_file_private *file_priv = file->driver_priv;
+	unsigned long recent_enough = jiffies - DRM_I915_THROTTLE_JIFFIES;
+	struct i915_request *request, *target = NULL;
+	long ret;
+
+	/* ABI: return -EIO if already wedged */
+	ret = i915_terminally_wedged(to_i915(dev));
+	if (ret)
+		return ret;
+
+	spin_lock(&file_priv->mm.lock);
+	list_for_each_entry(request, &file_priv->mm.request_list, client_link) {
+		if (time_after_eq(request->emitted_jiffies, recent_enough))
+			break;
+
+		if (target) {
+			list_del(&target->client_link);
+			target->file_priv = NULL;
+		}
+
+		target = request;
+	}
+	if (target)
+		i915_request_get(target);
+	spin_unlock(&file_priv->mm.lock);
+
+	if (!target)
+		return 0;
+
+	ret = i915_request_wait(target,
+				I915_WAIT_INTERRUPTIBLE,
+				MAX_SCHEDULE_TIMEOUT);
+	i915_request_put(target);
+
+	return ret < 0 ? ret : 0;
+}
diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/gem/i915_gem_tiling.c
index a9b5329dae3b..ca0c2f451742 100644
--- a/drivers/gpu/drm/i915/i915_gem_tiling.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_tiling.c
@@ -1,34 +1,17 @@
 /*
- * Copyright © 2008 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * Authors:
- *    Eric Anholt <eric@anholt.net>
+ * SPDX-License-Identifier: MIT
  *
+ * Copyright © 2008 Intel Corporation
  */
 
 #include <linux/string.h>
 #include <linux/bitops.h>
 #include <drm/i915_drm.h>
+
 #include "i915_drv.h"
+#include "i915_gem.h"
+#include "i915_gem_ioctls.h"
+#include "i915_gem_object.h"
 
 /**
  * DOC: buffer object tiling
@@ -296,7 +279,7 @@ i915_gem_object_set_tiling(struct drm_i915_gem_object *obj,
 	i915_gem_object_unlock(obj);
 
 	/* Force the fence to be reacquired for GTT access */
-	i915_gem_release_mmap(obj);
+	i915_gem_object_release_mmap(obj);
 
 	/* Try to preallocate memory required to save swizzling on put-pages */
 	if (i915_gem_object_needs_bit17_swizzle(obj)) {
diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
index 8079ea3af103..528b61678334 100644
--- a/drivers/gpu/drm/i915/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
@@ -1,37 +1,23 @@
 /*
- * Copyright © 2012-2014 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
+ * SPDX-License-Identifier: MIT
  *
+ * Copyright © 2012-2014 Intel Corporation
  */
 
-#include <drm/i915_drm.h>
-#include "i915_drv.h"
-#include "i915_trace.h"
-#include "intel_drv.h"
 #include <linux/mmu_context.h>
 #include <linux/mmu_notifier.h>
 #include <linux/mempolicy.h>
 #include <linux/swap.h>
 #include <linux/sched/mm.h>
 
+#include <drm/i915_drm.h>
+
+#include "i915_gem_ioctls.h"
+#include "i915_gem_object.h"
+#include "i915_scatterlist.h"
+#include "i915_trace.h"
+#include "intel_drv.h"
+
 struct i915_mm_struct {
 	struct mm_struct *mm;
 	struct drm_i915_private *i915;
@@ -782,14 +768,14 @@ i915_gem_userptr_ioctl(struct drm_device *dev,
 		return -EFAULT;
 
 	if (args->flags & I915_USERPTR_READ_ONLY) {
-		struct i915_hw_ppgtt *ppgtt;
+		struct i915_address_space *vm;
 
 		/*
 		 * On almost all of the older hw, we cannot tell the GPU that
 		 * a page is readonly.
 		 */
-		ppgtt = dev_priv->kernel_context->ppgtt;
-		if (!ppgtt || !ppgtt->vm.has_read_only)
+		vm = dev_priv->kernel_context->vm;
+		if (!vm || !vm->has_read_only)
 			return -ENODEV;
 	}
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_wait.c b/drivers/gpu/drm/i915/gem/i915_gem_wait.c
new file mode 100644
index 000000000000..26ec6579b7cd
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_wait.c
@@ -0,0 +1,278 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2016 Intel Corporation
+ */
+
+#include <linux/dma-fence-array.h>
+#include <linux/jiffies.h>
+
+#include "gt/intel_engine.h"
+
+#include "i915_gem_ioctls.h"
+#include "i915_gem_object.h"
+
+static long
+i915_gem_object_wait_fence(struct dma_fence *fence,
+			   unsigned int flags,
+			   long timeout)
+{
+	BUILD_BUG_ON(I915_WAIT_INTERRUPTIBLE != 0x1);
+
+	if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
+		return timeout;
+
+	if (dma_fence_is_i915(fence))
+		return i915_request_wait(to_request(fence), flags, timeout);
+
+	return dma_fence_wait_timeout(fence,
+				      flags & I915_WAIT_INTERRUPTIBLE,
+				      timeout);
+}
+
+static long
+i915_gem_object_wait_reservation(struct reservation_object *resv,
+				 unsigned int flags,
+				 long timeout)
+{
+	unsigned int seq = __read_seqcount_begin(&resv->seq);
+	struct dma_fence *excl;
+	bool prune_fences = false;
+
+	if (flags & I915_WAIT_ALL) {
+		struct dma_fence **shared;
+		unsigned int count, i;
+		int ret;
+
+		ret = reservation_object_get_fences_rcu(resv,
+							&excl, &count, &shared);
+		if (ret)
+			return ret;
+
+		for (i = 0; i < count; i++) {
+			timeout = i915_gem_object_wait_fence(shared[i],
+							     flags, timeout);
+			if (timeout < 0)
+				break;
+
+			dma_fence_put(shared[i]);
+		}
+
+		for (; i < count; i++)
+			dma_fence_put(shared[i]);
+		kfree(shared);
+
+		/*
+		 * If both shared fences and an exclusive fence exist,
+		 * then by construction the shared fences must be later
+		 * than the exclusive fence. If we successfully wait for
+		 * all the shared fences, we know that the exclusive fence
+		 * must all be signaled. If all the shared fences are
+		 * signaled, we can prune the array and recover the
+		 * floating references on the fences/requests.
+		 */
+		prune_fences = count && timeout >= 0;
+	} else {
+		excl = reservation_object_get_excl_rcu(resv);
+	}
+
+	if (excl && timeout >= 0)
+		timeout = i915_gem_object_wait_fence(excl, flags, timeout);
+
+	dma_fence_put(excl);
+
+	/*
+	 * Opportunistically prune the fences iff we know they have *all* been
+	 * signaled and that the reservation object has not been changed (i.e.
+	 * no new fences have been added).
+	 */
+	if (prune_fences && !__read_seqcount_retry(&resv->seq, seq)) {
+		if (reservation_object_trylock(resv)) {
+			if (!__read_seqcount_retry(&resv->seq, seq))
+				reservation_object_add_excl_fence(resv, NULL);
+			reservation_object_unlock(resv);
+		}
+	}
+
+	return timeout;
+}
+
+static void __fence_set_priority(struct dma_fence *fence,
+				 const struct i915_sched_attr *attr)
+{
+	struct i915_request *rq;
+	struct intel_engine_cs *engine;
+
+	if (dma_fence_is_signaled(fence) || !dma_fence_is_i915(fence))
+		return;
+
+	rq = to_request(fence);
+	engine = rq->engine;
+
+	local_bh_disable();
+	rcu_read_lock(); /* RCU serialisation for set-wedged protection */
+	if (engine->schedule)
+		engine->schedule(rq, attr);
+	rcu_read_unlock();
+	local_bh_enable(); /* kick the tasklets if queues were reprioritised */
+}
+
+static void fence_set_priority(struct dma_fence *fence,
+			       const struct i915_sched_attr *attr)
+{
+	/* Recurse once into a fence-array */
+	if (dma_fence_is_array(fence)) {
+		struct dma_fence_array *array = to_dma_fence_array(fence);
+		int i;
+
+		for (i = 0; i < array->num_fences; i++)
+			__fence_set_priority(array->fences[i], attr);
+	} else {
+		__fence_set_priority(fence, attr);
+	}
+}
+
+int
+i915_gem_object_wait_priority(struct drm_i915_gem_object *obj,
+			      unsigned int flags,
+			      const struct i915_sched_attr *attr)
+{
+	struct dma_fence *excl;
+
+	if (flags & I915_WAIT_ALL) {
+		struct dma_fence **shared;
+		unsigned int count, i;
+		int ret;
+
+		ret = reservation_object_get_fences_rcu(obj->base.resv,
+							&excl, &count, &shared);
+		if (ret)
+			return ret;
+
+		for (i = 0; i < count; i++) {
+			fence_set_priority(shared[i], attr);
+			dma_fence_put(shared[i]);
+		}
+
+		kfree(shared);
+	} else {
+		excl = reservation_object_get_excl_rcu(obj->base.resv);
+	}
+
+	if (excl) {
+		fence_set_priority(excl, attr);
+		dma_fence_put(excl);
+	}
+	return 0;
+}
+
+/**
+ * Waits for rendering to the object to be completed
+ * @obj: i915 gem object
+ * @flags: how to wait (under a lock, for all rendering or just for writes etc)
+ * @timeout: how long to wait
+ */
+int
+i915_gem_object_wait(struct drm_i915_gem_object *obj,
+		     unsigned int flags,
+		     long timeout)
+{
+	might_sleep();
+	GEM_BUG_ON(timeout < 0);
+
+	timeout = i915_gem_object_wait_reservation(obj->base.resv,
+						   flags, timeout);
+	return timeout < 0 ? timeout : 0;
+}
+
+static inline unsigned long nsecs_to_jiffies_timeout(const u64 n)
+{
+	/* nsecs_to_jiffies64() does not guard against overflow */
+	if (NSEC_PER_SEC % HZ &&
+	    div_u64(n, NSEC_PER_SEC) >= MAX_JIFFY_OFFSET / HZ)
+		return MAX_JIFFY_OFFSET;
+
+	return min_t(u64, MAX_JIFFY_OFFSET, nsecs_to_jiffies64(n) + 1);
+}
+
+static unsigned long to_wait_timeout(s64 timeout_ns)
+{
+	if (timeout_ns < 0)
+		return MAX_SCHEDULE_TIMEOUT;
+
+	if (timeout_ns == 0)
+		return 0;
+
+	return nsecs_to_jiffies_timeout(timeout_ns);
+}
+
+/**
+ * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT
+ * @dev: drm device pointer
+ * @data: ioctl data blob
+ * @file: drm file pointer
+ *
+ * Returns 0 if successful, else an error is returned with the remaining time in
+ * the timeout parameter.
+ *  -ETIME: object is still busy after timeout
+ *  -ERESTARTSYS: signal interrupted the wait
+ *  -ENONENT: object doesn't exist
+ * Also possible, but rare:
+ *  -EAGAIN: incomplete, restart syscall
+ *  -ENOMEM: damn
+ *  -ENODEV: Internal IRQ fail
+ *  -E?: The add request failed
+ *
+ * The wait ioctl with a timeout of 0 reimplements the busy ioctl. With any
+ * non-zero timeout parameter the wait ioctl will wait for the given number of
+ * nanoseconds on an object becoming unbusy. Since the wait itself does so
+ * without holding struct_mutex the object may become re-busied before this
+ * function completes. A similar but shorter * race condition exists in the busy
+ * ioctl
+ */
+int
+i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
+{
+	struct drm_i915_gem_wait *args = data;
+	struct drm_i915_gem_object *obj;
+	ktime_t start;
+	long ret;
+
+	if (args->flags != 0)
+		return -EINVAL;
+
+	obj = i915_gem_object_lookup(file, args->bo_handle);
+	if (!obj)
+		return -ENOENT;
+
+	start = ktime_get();
+
+	ret = i915_gem_object_wait(obj,
+				   I915_WAIT_INTERRUPTIBLE |
+				   I915_WAIT_PRIORITY |
+				   I915_WAIT_ALL,
+				   to_wait_timeout(args->timeout_ns));
+
+	if (args->timeout_ns > 0) {
+		args->timeout_ns -= ktime_to_ns(ktime_sub(ktime_get(), start));
+		if (args->timeout_ns < 0)
+			args->timeout_ns = 0;
+
+		/*
+		 * Apparently ktime isn't accurate enough and occasionally has a
+		 * bit of mismatch in the jiffies<->nsecs<->ktime loop. So patch
+		 * things up to make the test happy. We allow up to 1 jiffy.
+		 *
+		 * This is a regression from the timespec->ktime conversion.
+		 */
+		if (ret == -ETIME && !nsecs_to_jiffies(args->timeout_ns))
+			args->timeout_ns = 0;
+
+		/* Asked to wait beyond the jiffie/scheduler precision? */
+		if (ret == -ETIME && args->timeout_ns)
+			ret = -EAGAIN;
+	}
+
+	i915_gem_object_put(obj);
+	return ret;
+}
diff --git a/drivers/gpu/drm/i915/i915_gemfs.c b/drivers/gpu/drm/i915/gem/i915_gemfs.c
index 888b7d3f04c3..099f3397aada 100644
--- a/drivers/gpu/drm/i915/i915_gemfs.c
+++ b/drivers/gpu/drm/i915/gem/i915_gemfs.c
@@ -1,25 +1,7 @@
 /*
- * Copyright © 2017 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
+ * SPDX-License-Identifier: MIT
  *
+ * Copyright © 2017 Intel Corporation
  */
 
 #include <linux/fs.h>
diff --git a/drivers/gpu/drm/i915/gem/i915_gemfs.h b/drivers/gpu/drm/i915/gem/i915_gemfs.h
new file mode 100644
index 000000000000..2a1e59af3e4a
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gemfs.h
@@ -0,0 +1,16 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2017 Intel Corporation
+ */
+
+#ifndef __I915_GEMFS_H__
+#define __I915_GEMFS_H__
+
+struct drm_i915_private;
+
+int i915_gemfs_init(struct drm_i915_private *i915);
+
+void i915_gemfs_fini(struct drm_i915_private *i915);
+
+#endif
diff --git a/drivers/gpu/drm/i915/selftests/huge_gem_object.c b/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c
index 419fd4d6a8f0..3c5d17b2b670 100644
--- a/drivers/gpu/drm/i915/selftests/huge_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c
@@ -1,27 +1,11 @@
 /*
- * Copyright © 2016 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
+ * SPDX-License-Identifier: MIT
  *
+ * Copyright © 2016 Intel Corporation
  */
 
+#include "i915_scatterlist.h"
+
 #include "huge_gem_object.h"
 
 static void huge_free_pages(struct drm_i915_gem_object *obj,
diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.h b/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.h
new file mode 100644
index 000000000000..549c1394bcdc
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.h
@@ -0,0 +1,27 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2016 Intel Corporation
+ */
+
+#ifndef __HUGE_GEM_OBJECT_H
+#define __HUGE_GEM_OBJECT_H
+
+struct drm_i915_gem_object *
+huge_gem_object(struct drm_i915_private *i915,
+		phys_addr_t phys_size,
+		dma_addr_t dma_size);
+
+static inline phys_addr_t
+huge_gem_object_phys_size(struct drm_i915_gem_object *obj)
+{
+	return obj->scratch;
+}
+
+static inline dma_addr_t
+huge_gem_object_dma_size(struct drm_i915_gem_object *obj)
+{
+	return obj->base.size;
+}
+
+#endif /* !__HUGE_GEM_OBJECT_H */
diff --git a/drivers/gpu/drm/i915/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
index 90721b54e7ae..b74729b6f353 100644
--- a/drivers/gpu/drm/i915/selftests/huge_pages.c
+++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
@@ -1,33 +1,21 @@
 /*
- * Copyright © 2017 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
+ * SPDX-License-Identifier: MIT
  *
+ * Copyright © 2017 Intel Corporation
  */
 
-#include "../i915_selftest.h"
-
 #include <linux/prime_numbers.h>
 
-#include "mock_drm.h"
-#include "i915_random.h"
+#include "i915_selftest.h"
+
+#include "gem/i915_gem_pm.h"
+
+#include "igt_gem_utils.h"
+#include "mock_context.h"
+
+#include "selftests/mock_drm.h"
+#include "selftests/mock_gem_device.h"
+#include "selftests/i915_random.h"
 
 static const unsigned int page_sizes[] = {
 	I915_GTT_PAGE_SIZE_2M,
@@ -380,7 +368,7 @@ static int igt_check_page_sizes(struct i915_vma *vma)
 
 static int igt_mock_exhaust_device_supported_pages(void *arg)
 {
-	struct i915_hw_ppgtt *ppgtt = arg;
+	struct i915_ppgtt *ppgtt = arg;
 	struct drm_i915_private *i915 = ppgtt->vm.i915;
 	unsigned int saved_mask = INTEL_INFO(i915)->page_sizes;
 	struct drm_i915_gem_object *obj;
@@ -459,7 +447,7 @@ out_device:
 
 static int igt_mock_ppgtt_misaligned_dma(void *arg)
 {
-	struct i915_hw_ppgtt *ppgtt = arg;
+	struct i915_ppgtt *ppgtt = arg;
 	struct drm_i915_private *i915 = ppgtt->vm.i915;
 	unsigned long supported = INTEL_INFO(i915)->page_sizes;
 	struct drm_i915_gem_object *obj;
@@ -587,7 +575,7 @@ out_put:
 }
 
 static void close_object_list(struct list_head *objects,
-			      struct i915_hw_ppgtt *ppgtt)
+			      struct i915_ppgtt *ppgtt)
 {
 	struct drm_i915_gem_object *obj, *on;
 
@@ -607,7 +595,7 @@ static void close_object_list(struct list_head *objects,
 
 static int igt_mock_ppgtt_huge_fill(void *arg)
 {
-	struct i915_hw_ppgtt *ppgtt = arg;
+	struct i915_ppgtt *ppgtt = arg;
 	struct drm_i915_private *i915 = ppgtt->vm.i915;
 	unsigned long max_pages = ppgtt->vm.total >> PAGE_SHIFT;
 	unsigned long page_num;
@@ -728,7 +716,7 @@ static int igt_mock_ppgtt_huge_fill(void *arg)
 
 static int igt_mock_ppgtt_64K(void *arg)
 {
-	struct i915_hw_ppgtt *ppgtt = arg;
+	struct i915_ppgtt *ppgtt = arg;
 	struct drm_i915_private *i915 = ppgtt->vm.i915;
 	struct drm_i915_gem_object *obj;
 	const struct object_info {
@@ -972,27 +960,27 @@ static int gpu_write(struct i915_vma *vma,
 
 	GEM_BUG_ON(!intel_engine_can_store_dword(engine));
 
-	err = i915_gem_object_set_to_gtt_domain(vma->obj, true);
-	if (err)
-		return err;
-
 	batch = gpu_write_dw(vma, dword * sizeof(u32), value);
 	if (IS_ERR(batch))
 		return PTR_ERR(batch);
 
-	rq = i915_request_alloc(engine, ctx);
+	rq = igt_request_alloc(ctx, engine);
 	if (IS_ERR(rq)) {
 		err = PTR_ERR(rq);
 		goto err_batch;
 	}
 
+	i915_vma_lock(batch);
 	err = i915_vma_move_to_active(batch, rq, 0);
+	i915_vma_unlock(batch);
 	if (err)
 		goto err_request;
 
-	i915_gem_object_set_active_reference(batch->obj);
-
-	err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
+	i915_vma_lock(vma);
+	err = i915_gem_object_set_to_gtt_domain(vma->obj, false);
+	if (err == 0)
+		err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
+	i915_vma_unlock(vma);
 	if (err)
 		goto err_request;
 
@@ -1006,6 +994,7 @@ err_request:
 err_batch:
 	i915_vma_unpin(batch);
 	i915_vma_close(batch);
+	i915_vma_put(batch);
 
 	return err;
 }
@@ -1016,7 +1005,7 @@ static int cpu_check(struct drm_i915_gem_object *obj, u32 dword, u32 val)
 	unsigned long n;
 	int err;
 
-	err = i915_gem_obj_prepare_shmem_read(obj, &needs_flush);
+	err = i915_gem_object_prepare_read(obj, &needs_flush);
 	if (err)
 		return err;
 
@@ -1037,7 +1026,7 @@ static int cpu_check(struct drm_i915_gem_object *obj, u32 dword, u32 val)
 		kunmap_atomic(ptr);
 	}
 
-	i915_gem_obj_finish_shmem_access(obj);
+	i915_gem_object_finish_access(obj);
 
 	return err;
 }
@@ -1049,8 +1038,7 @@ static int __igt_write_huge(struct i915_gem_context *ctx,
 			    u32 dword, u32 val)
 {
 	struct drm_i915_private *i915 = to_i915(obj->base.dev);
-	struct i915_address_space *vm =
-		ctx->ppgtt ? &ctx->ppgtt->vm : &i915->ggtt.vm;
+	struct i915_address_space *vm = ctx->vm ?: &i915->ggtt.vm;
 	unsigned int flags = PIN_USER | PIN_OFFSET_FIXED;
 	struct i915_vma *vma;
 	int err;
@@ -1103,8 +1091,7 @@ static int igt_write_huge(struct i915_gem_context *ctx,
 			  struct drm_i915_gem_object *obj)
 {
 	struct drm_i915_private *i915 = to_i915(obj->base.dev);
-	struct i915_address_space *vm =
-		ctx->ppgtt ? &ctx->ppgtt->vm : &i915->ggtt.vm;
+	struct i915_address_space *vm = ctx->vm ?: &i915->ggtt.vm;
 	static struct intel_engine_cs *engines[I915_NUM_ENGINES];
 	struct intel_engine_cs *engine;
 	I915_RND_STATE(prng);
@@ -1389,7 +1376,7 @@ static int igt_ppgtt_gemfs_huge(void *arg)
 	for (i = 0; i < ARRAY_SIZE(sizes); ++i) {
 		unsigned int size = sizes[i];
 
-		obj = i915_gem_object_create(i915, size);
+		obj = i915_gem_object_create_shmem(i915, size);
 		if (IS_ERR(obj))
 			return PTR_ERR(obj);
 
@@ -1430,7 +1417,7 @@ static int igt_ppgtt_pin_update(void *arg)
 	struct i915_gem_context *ctx = arg;
 	struct drm_i915_private *dev_priv = ctx->i915;
 	unsigned long supported = INTEL_INFO(dev_priv)->page_sizes;
-	struct i915_hw_ppgtt *ppgtt = ctx->ppgtt;
+	struct i915_address_space *vm = ctx->vm;
 	struct drm_i915_gem_object *obj;
 	struct i915_vma *vma;
 	unsigned int flags = PIN_USER | PIN_OFFSET_FIXED;
@@ -1445,7 +1432,7 @@ static int igt_ppgtt_pin_update(void *arg)
 	 * huge-gtt-pages.
 	 */
 
-	if (!ppgtt || !i915_vm_is_4lvl(&ppgtt->vm)) {
+	if (!vm || !i915_vm_is_4lvl(vm)) {
 		pr_info("48b PPGTT not supported, skipping\n");
 		return 0;
 	}
@@ -1460,7 +1447,7 @@ static int igt_ppgtt_pin_update(void *arg)
 		if (IS_ERR(obj))
 			return PTR_ERR(obj);
 
-		vma = i915_vma_instance(obj, &ppgtt->vm, NULL);
+		vma = i915_vma_instance(obj, vm, NULL);
 		if (IS_ERR(vma)) {
 			err = PTR_ERR(vma);
 			goto out_put;
@@ -1514,7 +1501,7 @@ static int igt_ppgtt_pin_update(void *arg)
 	if (IS_ERR(obj))
 		return PTR_ERR(obj);
 
-	vma = i915_vma_instance(obj, &ppgtt->vm, NULL);
+	vma = i915_vma_instance(obj, vm, NULL);
 	if (IS_ERR(vma)) {
 		err = PTR_ERR(vma);
 		goto out_put;
@@ -1552,8 +1539,7 @@ static int igt_tmpfs_fallback(void *arg)
 	struct i915_gem_context *ctx = arg;
 	struct drm_i915_private *i915 = ctx->i915;
 	struct vfsmount *gemfs = i915->mm.gemfs;
-	struct i915_address_space *vm =
-		ctx->ppgtt ? &ctx->ppgtt->vm : &i915->ggtt.vm;
+	struct i915_address_space *vm = ctx->vm ?: &i915->ggtt.vm;
 	struct drm_i915_gem_object *obj;
 	struct i915_vma *vma;
 	u32 *vaddr;
@@ -1567,7 +1553,7 @@ static int igt_tmpfs_fallback(void *arg)
 
 	i915->mm.gemfs = NULL;
 
-	obj = i915_gem_object_create(i915, PAGE_SIZE);
+	obj = i915_gem_object_create_shmem(i915, PAGE_SIZE);
 	if (IS_ERR(obj)) {
 		err = PTR_ERR(obj);
 		goto out_restore;
@@ -1610,8 +1596,7 @@ static int igt_shrink_thp(void *arg)
 {
 	struct i915_gem_context *ctx = arg;
 	struct drm_i915_private *i915 = ctx->i915;
-	struct i915_address_space *vm =
-		ctx->ppgtt ? &ctx->ppgtt->vm : &i915->ggtt.vm;
+	struct i915_address_space *vm = ctx->vm ?: &i915->ggtt.vm;
 	struct drm_i915_gem_object *obj;
 	struct i915_vma *vma;
 	unsigned int flags = PIN_USER;
@@ -1627,7 +1612,7 @@ static int igt_shrink_thp(void *arg)
 		return 0;
 	}
 
-	obj = i915_gem_object_create(i915, SZ_2M);
+	obj = i915_gem_object_create_shmem(i915, SZ_2M);
 	if (IS_ERR(obj))
 		return PTR_ERR(obj);
 
@@ -1698,7 +1683,7 @@ int i915_gem_huge_page_mock_selftests(void)
 		SUBTEST(igt_mock_ppgtt_64K),
 	};
 	struct drm_i915_private *dev_priv;
-	struct i915_hw_ppgtt *ppgtt;
+	struct i915_ppgtt *ppgtt;
 	int err;
 
 	dev_priv = mock_gem_device();
@@ -1732,7 +1717,7 @@ int i915_gem_huge_page_mock_selftests(void)
 	err = i915_subtests(tests, ppgtt);
 
 out_close:
-	i915_ppgtt_put(ppgtt);
+	i915_vm_put(&ppgtt->vm);
 
 out_unlock:
 	mutex_unlock(&dev_priv->drm.struct_mutex);
@@ -1769,7 +1754,7 @@ int i915_gem_huge_page_live_selftests(struct drm_i915_private *dev_priv)
 		return PTR_ERR(file);
 
 	mutex_lock(&dev_priv->drm.struct_mutex);
-	wakeref = intel_runtime_pm_get(dev_priv);
+	wakeref = intel_runtime_pm_get(&dev_priv->runtime_pm);
 
 	ctx = live_context(dev_priv, file);
 	if (IS_ERR(ctx)) {
@@ -1777,13 +1762,13 @@ int i915_gem_huge_page_live_selftests(struct drm_i915_private *dev_priv)
 		goto out_unlock;
 	}
 
-	if (ctx->ppgtt)
-		ctx->ppgtt->vm.scrub_64K = true;
+	if (ctx->vm)
+		ctx->vm->scrub_64K = true;
 
 	err = i915_subtests(tests, ctx);
 
 out_unlock:
-	intel_runtime_pm_put(dev_priv, wakeref);
+	intel_runtime_pm_put(&dev_priv->runtime_pm, wakeref);
 	mutex_unlock(&dev_priv->drm.struct_mutex);
 
 	mock_file_free(dev_priv, file);
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c
new file mode 100644
index 000000000000..f3a5eb807c1c
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c
@@ -0,0 +1,127 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include "i915_selftest.h"
+
+#include "selftests/igt_flush_test.h"
+#include "selftests/mock_drm.h"
+#include "mock_context.h"
+
+static int igt_client_fill(void *arg)
+{
+	struct intel_context *ce = arg;
+	struct drm_i915_private *i915 = ce->gem_context->i915;
+	struct drm_i915_gem_object *obj;
+	struct rnd_state prng;
+	IGT_TIMEOUT(end);
+	u32 *vaddr;
+	int err = 0;
+
+	prandom_seed_state(&prng, i915_selftest.random_seed);
+
+	do {
+		u32 sz = prandom_u32_state(&prng) % SZ_32M;
+		u32 val = prandom_u32_state(&prng);
+		u32 i;
+
+		sz = round_up(sz, PAGE_SIZE);
+
+		pr_debug("%s with sz=%x, val=%x\n", __func__, sz, val);
+
+		obj = i915_gem_object_create_internal(i915, sz);
+		if (IS_ERR(obj)) {
+			err = PTR_ERR(obj);
+			goto err_flush;
+		}
+
+		vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB);
+		if (IS_ERR(vaddr)) {
+			err = PTR_ERR(vaddr);
+			goto err_put;
+		}
+
+		/*
+		 * XXX: The goal is move this to get_pages, so try to dirty the
+		 * CPU cache first to check that we do the required clflush
+		 * before scheduling the blt for !llc platforms. This matches
+		 * some version of reality where at get_pages the pages
+		 * themselves may not yet be coherent with the GPU(swap-in). If
+		 * we are missing the flush then we should see the stale cache
+		 * values after we do the set_to_cpu_domain and pick it up as a
+		 * test failure.
+		 */
+		memset32(vaddr, val ^ 0xdeadbeaf, obj->base.size / sizeof(u32));
+
+		if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE))
+			obj->cache_dirty = true;
+
+		err = i915_gem_schedule_fill_pages_blt(obj, ce, obj->mm.pages,
+						       &obj->mm.page_sizes,
+						       val);
+		if (err)
+			goto err_unpin;
+
+		/*
+		 * XXX: For now do the wait without the object resv lock to
+		 * ensure we don't deadlock.
+		 */
+		err = i915_gem_object_wait(obj,
+					   I915_WAIT_INTERRUPTIBLE |
+					   I915_WAIT_ALL,
+					   MAX_SCHEDULE_TIMEOUT);
+		if (err)
+			goto err_unpin;
+
+		i915_gem_object_lock(obj);
+		err = i915_gem_object_set_to_cpu_domain(obj, false);
+		i915_gem_object_unlock(obj);
+		if (err)
+			goto err_unpin;
+
+		for (i = 0; i < obj->base.size / sizeof(u32); ++i) {
+			if (vaddr[i] != val) {
+				pr_err("vaddr[%u]=%x, expected=%x\n", i,
+				       vaddr[i], val);
+				err = -EINVAL;
+				goto err_unpin;
+			}
+		}
+
+		i915_gem_object_unpin_map(obj);
+		i915_gem_object_put(obj);
+	} while (!time_after(jiffies, end));
+
+	goto err_flush;
+
+err_unpin:
+	i915_gem_object_unpin_map(obj);
+err_put:
+	i915_gem_object_put(obj);
+err_flush:
+	mutex_lock(&i915->drm.struct_mutex);
+	if (igt_flush_test(i915, I915_WAIT_LOCKED))
+		err = -EIO;
+	mutex_unlock(&i915->drm.struct_mutex);
+
+	if (err == -ENOMEM)
+		err = 0;
+
+	return err;
+}
+
+int i915_gem_client_blt_live_selftests(struct drm_i915_private *i915)
+{
+	static const struct i915_subtest tests[] = {
+		SUBTEST(igt_client_fill),
+	};
+
+	if (i915_terminally_wedged(i915))
+		return 0;
+
+	if (!HAS_ENGINE(i915, BCS0))
+		return 0;
+
+	return i915_subtests(tests, i915->engine[BCS0]->kernel_context);
+}
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
index e43630b40fce..8f22d3f18422 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
@@ -1,31 +1,13 @@
 /*
- * Copyright © 2017 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
+ * SPDX-License-Identifier: MIT
  *
+ * Copyright © 2017 Intel Corporation
  */
 
 #include <linux/prime_numbers.h>
 
-#include "../i915_selftest.h"
-#include "i915_random.h"
+#include "i915_selftest.h"
+#include "selftests/i915_random.h"
 
 static int cpu_set(struct drm_i915_gem_object *obj,
 		   unsigned long offset,
@@ -37,7 +19,7 @@ static int cpu_set(struct drm_i915_gem_object *obj,
 	u32 *cpu;
 	int err;
 
-	err = i915_gem_obj_prepare_shmem_write(obj, &needs_clflush);
+	err = i915_gem_object_prepare_write(obj, &needs_clflush);
 	if (err)
 		return err;
 
@@ -54,7 +36,7 @@ static int cpu_set(struct drm_i915_gem_object *obj,
 		drm_clflush_virt_range(cpu, sizeof(*cpu));
 
 	kunmap_atomic(map);
-	i915_gem_obj_finish_shmem_access(obj);
+	i915_gem_object_finish_access(obj);
 
 	return 0;
 }
@@ -69,7 +51,7 @@ static int cpu_get(struct drm_i915_gem_object *obj,
 	u32 *cpu;
 	int err;
 
-	err = i915_gem_obj_prepare_shmem_read(obj, &needs_clflush);
+	err = i915_gem_object_prepare_read(obj, &needs_clflush);
 	if (err)
 		return err;
 
@@ -83,7 +65,7 @@ static int cpu_get(struct drm_i915_gem_object *obj,
 	*v = *cpu;
 
 	kunmap_atomic(map);
-	i915_gem_obj_finish_shmem_access(obj);
+	i915_gem_object_finish_access(obj);
 
 	return 0;
 }
@@ -96,7 +78,9 @@ static int gtt_set(struct drm_i915_gem_object *obj,
 	u32 __iomem *map;
 	int err;
 
+	i915_gem_object_lock(obj);
 	err = i915_gem_object_set_to_gtt_domain(obj, true);
+	i915_gem_object_unlock(obj);
 	if (err)
 		return err;
 
@@ -123,7 +107,9 @@ static int gtt_get(struct drm_i915_gem_object *obj,
 	u32 __iomem *map;
 	int err;
 
+	i915_gem_object_lock(obj);
 	err = i915_gem_object_set_to_gtt_domain(obj, false);
+	i915_gem_object_unlock(obj);
 	if (err)
 		return err;
 
@@ -149,7 +135,9 @@ static int wc_set(struct drm_i915_gem_object *obj,
 	u32 *map;
 	int err;
 
+	i915_gem_object_lock(obj);
 	err = i915_gem_object_set_to_wc_domain(obj, true);
+	i915_gem_object_unlock(obj);
 	if (err)
 		return err;
 
@@ -170,7 +158,9 @@ static int wc_get(struct drm_i915_gem_object *obj,
 	u32 *map;
 	int err;
 
+	i915_gem_object_lock(obj);
 	err = i915_gem_object_set_to_wc_domain(obj, false);
+	i915_gem_object_unlock(obj);
 	if (err)
 		return err;
 
@@ -194,7 +184,9 @@ static int gpu_set(struct drm_i915_gem_object *obj,
 	u32 *cs;
 	int err;
 
+	i915_gem_object_lock(obj);
 	err = i915_gem_object_set_to_gtt_domain(obj, true);
+	i915_gem_object_unlock(obj);
 	if (err)
 		return err;
 
@@ -202,7 +194,7 @@ static int gpu_set(struct drm_i915_gem_object *obj,
 	if (IS_ERR(vma))
 		return PTR_ERR(vma);
 
-	rq = i915_request_alloc(i915->engine[RCS0], i915->kernel_context);
+	rq = i915_request_create(i915->engine[RCS0]->kernel_context);
 	if (IS_ERR(rq)) {
 		i915_vma_unpin(vma);
 		return PTR_ERR(rq);
@@ -233,7 +225,9 @@ static int gpu_set(struct drm_i915_gem_object *obj,
 	}
 	intel_ring_advance(rq, cs);
 
+	i915_vma_lock(vma);
 	err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
+	i915_vma_unlock(vma);
 	i915_vma_unpin(vma);
 
 	i915_request_add(rq);
@@ -299,7 +293,7 @@ static int igt_gem_coherency(void *arg)
 	values = offsets + ncachelines;
 
 	mutex_lock(&i915->drm.struct_mutex);
-	wakeref = intel_runtime_pm_get(i915);
+	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
 	for (over = igt_coherency_mode; over->name; over++) {
 		if (!over->set)
 			continue;
@@ -371,19 +365,19 @@ static int igt_gem_coherency(void *arg)
 						}
 					}
 
-					__i915_gem_object_release_unless_active(obj);
+					i915_gem_object_put(obj);
 				}
 			}
 		}
 	}
 unlock:
-	intel_runtime_pm_put(i915, wakeref);
+	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
 	mutex_unlock(&i915->drm.struct_mutex);
 	kfree(offsets);
 	return err;
 
 put_object:
-	__i915_gem_object_release_unless_active(obj);
+	i915_gem_object_put(obj);
 	goto unlock;
 }
 
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
index 4e1b6efc6b22..eaa2b16574c7 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
@@ -1,40 +1,26 @@
 /*
- * Copyright © 2017 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
+ * SPDX-License-Identifier: MIT
  *
+ * Copyright © 2017 Intel Corporation
  */
 
 #include <linux/prime_numbers.h>
 
-#include "../i915_reset.h"
-#include "../i915_selftest.h"
-#include "i915_random.h"
-#include "igt_flush_test.h"
-#include "igt_live_test.h"
-#include "igt_reset.h"
-#include "igt_spinner.h"
+#include "gem/i915_gem_pm.h"
+#include "gt/intel_reset.h"
+#include "i915_selftest.h"
+
+#include "gem/selftests/igt_gem_utils.h"
+#include "selftests/i915_random.h"
+#include "selftests/igt_flush_test.h"
+#include "selftests/igt_live_test.h"
+#include "selftests/igt_reset.h"
+#include "selftests/igt_spinner.h"
+#include "selftests/mock_drm.h"
+#include "selftests/mock_gem_device.h"
 
-#include "mock_drm.h"
-#include "mock_gem_device.h"
 #include "huge_gem_object.h"
+#include "igt_gem_utils.h"
 
 #define DW_PER_PAGE (PAGE_SIZE / sizeof(u32))
 
@@ -67,7 +53,7 @@ static int live_nop_switch(void *arg)
 		return PTR_ERR(file);
 
 	mutex_lock(&i915->drm.struct_mutex);
-	wakeref = intel_runtime_pm_get(i915);
+	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
 
 	ctx = kcalloc(nctx, sizeof(*ctx), GFP_KERNEL);
 	if (!ctx) {
@@ -90,16 +76,14 @@ static int live_nop_switch(void *arg)
 
 		times[0] = ktime_get_raw();
 		for (n = 0; n < nctx; n++) {
-			rq = i915_request_alloc(engine, ctx[n]);
+			rq = igt_request_alloc(ctx[n], engine);
 			if (IS_ERR(rq)) {
 				err = PTR_ERR(rq);
 				goto out_unlock;
 			}
 			i915_request_add(rq);
 		}
-		if (i915_request_wait(rq,
-				      I915_WAIT_LOCKED,
-				      HZ / 5) < 0) {
+		if (i915_request_wait(rq, 0, HZ / 5) < 0) {
 			pr_err("Failed to populated %d contexts\n", nctx);
 			i915_gem_set_wedged(i915);
 			err = -EIO;
@@ -120,7 +104,7 @@ static int live_nop_switch(void *arg)
 			times[1] = ktime_get_raw();
 
 			for (n = 0; n < prime; n++) {
-				rq = i915_request_alloc(engine, ctx[n % nctx]);
+				rq = igt_request_alloc(ctx[n % nctx], engine);
 				if (IS_ERR(rq)) {
 					err = PTR_ERR(rq);
 					goto out_unlock;
@@ -142,9 +126,7 @@ static int live_nop_switch(void *arg)
 
 				i915_request_add(rq);
 			}
-			if (i915_request_wait(rq,
-					      I915_WAIT_LOCKED,
-					      HZ / 5) < 0) {
+			if (i915_request_wait(rq, 0, HZ / 5) < 0) {
 				pr_err("Switching between %ld contexts timed out\n",
 				       prime);
 				i915_gem_set_wedged(i915);
@@ -170,7 +152,7 @@ static int live_nop_switch(void *arg)
 	}
 
 out_unlock:
-	intel_runtime_pm_put(i915, wakeref);
+	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
 	mutex_unlock(&i915->drm.struct_mutex);
 	mock_file_free(i915, file);
 	return err;
@@ -223,10 +205,6 @@ gpu_fill_dw(struct i915_vma *vma, u64 offset, unsigned long count, u32 value)
 	i915_gem_object_flush_map(obj);
 	i915_gem_object_unpin_map(obj);
 
-	err = i915_gem_object_set_to_gtt_domain(obj, false);
-	if (err)
-		goto err;
-
 	vma = i915_vma_instance(obj, vma->vm, NULL);
 	if (IS_ERR(vma)) {
 		err = PTR_ERR(vma);
@@ -260,8 +238,7 @@ static int gpu_fill(struct drm_i915_gem_object *obj,
 		    unsigned int dw)
 {
 	struct drm_i915_private *i915 = to_i915(obj->base.dev);
-	struct i915_address_space *vm =
-		ctx->ppgtt ? &ctx->ppgtt->vm : &i915->ggtt.vm;
+	struct i915_address_space *vm = ctx->vm ?: &i915->ggtt.vm;
 	struct i915_request *rq;
 	struct i915_vma *vma;
 	struct i915_vma *batch;
@@ -275,7 +252,9 @@ static int gpu_fill(struct drm_i915_gem_object *obj,
 	if (IS_ERR(vma))
 		return PTR_ERR(vma);
 
+	i915_gem_object_lock(obj);
 	err = i915_gem_object_set_to_gtt_domain(obj, false);
+	i915_gem_object_unlock(obj);
 	if (err)
 		return err;
 
@@ -300,7 +279,7 @@ static int gpu_fill(struct drm_i915_gem_object *obj,
 		goto err_vma;
 	}
 
-	rq = i915_request_alloc(engine, ctx);
+	rq = igt_request_alloc(ctx, engine);
 	if (IS_ERR(rq)) {
 		err = PTR_ERR(rq);
 		goto err_batch;
@@ -316,22 +295,26 @@ static int gpu_fill(struct drm_i915_gem_object *obj,
 	if (err)
 		goto err_request;
 
+	i915_vma_lock(batch);
 	err = i915_vma_move_to_active(batch, rq, 0);
+	i915_vma_unlock(batch);
 	if (err)
 		goto skip_request;
 
+	i915_vma_lock(vma);
 	err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
+	i915_vma_unlock(vma);
 	if (err)
 		goto skip_request;
 
-	i915_gem_object_set_active_reference(batch->obj);
+	i915_request_add(rq);
+
 	i915_vma_unpin(batch);
 	i915_vma_close(batch);
+	i915_vma_put(batch);
 
 	i915_vma_unpin(vma);
 
-	i915_request_add(rq);
-
 	return 0;
 
 skip_request:
@@ -352,7 +335,7 @@ static int cpu_fill(struct drm_i915_gem_object *obj, u32 value)
 	unsigned int n, m, need_flush;
 	int err;
 
-	err = i915_gem_obj_prepare_shmem_write(obj, &need_flush);
+	err = i915_gem_object_prepare_write(obj, &need_flush);
 	if (err)
 		return err;
 
@@ -367,7 +350,7 @@ static int cpu_fill(struct drm_i915_gem_object *obj, u32 value)
 		kunmap_atomic(map);
 	}
 
-	i915_gem_obj_finish_shmem_access(obj);
+	i915_gem_object_finish_access(obj);
 	obj->read_domains = I915_GEM_DOMAIN_GTT | I915_GEM_DOMAIN_CPU;
 	obj->write_domain = 0;
 	return 0;
@@ -379,7 +362,7 @@ static noinline int cpu_check(struct drm_i915_gem_object *obj,
 	unsigned int n, m, needs_flush;
 	int err;
 
-	err = i915_gem_obj_prepare_shmem_read(obj, &needs_flush);
+	err = i915_gem_object_prepare_read(obj, &needs_flush);
 	if (err)
 		return err;
 
@@ -417,7 +400,7 @@ out_unmap:
 			break;
 	}
 
-	i915_gem_obj_finish_shmem_access(obj);
+	i915_gem_object_finish_access(obj);
 	return err;
 }
 
@@ -444,8 +427,7 @@ create_test_object(struct i915_gem_context *ctx,
 		   struct list_head *objects)
 {
 	struct drm_i915_gem_object *obj;
-	struct i915_address_space *vm =
-		ctx->ppgtt ? &ctx->ppgtt->vm : &ctx->i915->ggtt.vm;
+	struct i915_address_space *vm = ctx->vm ?: &ctx->i915->ggtt.vm;
 	u64 size;
 	int err;
 
@@ -541,13 +523,13 @@ static int igt_ctx_exec(void *arg)
 				}
 			}
 
-			with_intel_runtime_pm(i915, wakeref)
+			with_intel_runtime_pm(&i915->runtime_pm, wakeref)
 				err = gpu_fill(obj, ctx, engine, dw);
 			if (err) {
 				pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n",
 				       ndwords, dw, max_dwords(obj),
 				       engine->name, ctx->hw_id,
-				       yesno(!!ctx->ppgtt), err);
+				       yesno(!!ctx->vm), err);
 				goto out_unlock;
 			}
 
@@ -618,7 +600,7 @@ static int igt_shared_ctx_exec(void *arg)
 		goto out_unlock;
 	}
 
-	if (!parent->ppgtt) { /* not full-ppgtt; nothing to share */
+	if (!parent->vm) { /* not full-ppgtt; nothing to share */
 		err = 0;
 		goto out_unlock;
 	}
@@ -649,7 +631,7 @@ static int igt_shared_ctx_exec(void *arg)
 				goto out_test;
 			}
 
-			__assign_ppgtt(ctx, parent->ppgtt);
+			__assign_ppgtt(ctx, parent->vm);
 
 			if (!obj) {
 				obj = create_test_object(parent, file, &objects);
@@ -661,13 +643,13 @@ static int igt_shared_ctx_exec(void *arg)
 			}
 
 			err = 0;
-			with_intel_runtime_pm(i915, wakeref)
+			with_intel_runtime_pm(&i915->runtime_pm, wakeref)
 				err = gpu_fill(obj, ctx, engine, dw);
 			if (err) {
 				pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n",
 				       ndwords, dw, max_dwords(obj),
 				       engine->name, ctx->hw_id,
-				       yesno(!!ctx->ppgtt), err);
+				       yesno(!!ctx->vm), err);
 				kernel_context_close(ctx);
 				goto out_test;
 			}
@@ -754,8 +736,7 @@ err:
 
 static int
 emit_rpcs_query(struct drm_i915_gem_object *obj,
-		struct i915_gem_context *ctx,
-		struct intel_engine_cs *engine,
+		struct intel_context *ce,
 		struct i915_request **rq_out)
 {
 	struct i915_request *rq;
@@ -763,13 +744,15 @@ emit_rpcs_query(struct drm_i915_gem_object *obj,
 	struct i915_vma *vma;
 	int err;
 
-	GEM_BUG_ON(!intel_engine_can_store_dword(engine));
+	GEM_BUG_ON(!intel_engine_can_store_dword(ce->engine));
 
-	vma = i915_vma_instance(obj, &ctx->ppgtt->vm, NULL);
+	vma = i915_vma_instance(obj, ce->gem_context->vm, NULL);
 	if (IS_ERR(vma))
 		return PTR_ERR(vma);
 
+	i915_gem_object_lock(obj);
 	err = i915_gem_object_set_to_gtt_domain(obj, false);
+	i915_gem_object_unlock(obj);
 	if (err)
 		return err;
 
@@ -783,27 +766,33 @@ emit_rpcs_query(struct drm_i915_gem_object *obj,
 		goto err_vma;
 	}
 
-	rq = i915_request_alloc(engine, ctx);
+	rq = i915_request_create(ce);
 	if (IS_ERR(rq)) {
 		err = PTR_ERR(rq);
 		goto err_batch;
 	}
 
-	err = engine->emit_bb_start(rq, batch->node.start, batch->node.size, 0);
+	err = rq->engine->emit_bb_start(rq,
+					batch->node.start, batch->node.size,
+					0);
 	if (err)
 		goto err_request;
 
+	i915_vma_lock(batch);
 	err = i915_vma_move_to_active(batch, rq, 0);
+	i915_vma_unlock(batch);
 	if (err)
 		goto skip_request;
 
+	i915_vma_lock(vma);
 	err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
+	i915_vma_unlock(vma);
 	if (err)
 		goto skip_request;
 
-	i915_gem_object_set_active_reference(batch->obj);
 	i915_vma_unpin(batch);
 	i915_vma_close(batch);
+	i915_vma_put(batch);
 
 	i915_vma_unpin(vma);
 
@@ -819,6 +808,7 @@ err_request:
 	i915_request_add(rq);
 err_batch:
 	i915_vma_unpin(batch);
+	i915_vma_put(batch);
 err_vma:
 	i915_vma_unpin(vma);
 
@@ -833,8 +823,7 @@ static int
 __sseu_prepare(struct drm_i915_private *i915,
 	       const char *name,
 	       unsigned int flags,
-	       struct i915_gem_context *ctx,
-	       struct intel_engine_cs *engine,
+	       struct intel_context *ce,
 	       struct igt_spinner **spin)
 {
 	struct i915_request *rq;
@@ -852,7 +841,10 @@ __sseu_prepare(struct drm_i915_private *i915,
 	if (ret)
 		goto err_free;
 
-	rq = igt_spinner_create_request(*spin, ctx, engine, MI_NOOP);
+	rq = igt_spinner_create_request(*spin,
+					ce->gem_context,
+					ce->engine,
+					MI_NOOP);
 	if (IS_ERR(rq)) {
 		ret = PTR_ERR(rq);
 		goto err_fini;
@@ -879,8 +871,7 @@ err_free:
 
 static int
 __read_slice_count(struct drm_i915_private *i915,
-		   struct i915_gem_context *ctx,
-		   struct intel_engine_cs *engine,
+		   struct intel_context *ce,
 		   struct drm_i915_gem_object *obj,
 		   struct igt_spinner *spin,
 		   u32 *rpcs)
@@ -891,14 +882,14 @@ __read_slice_count(struct drm_i915_private *i915,
 	u32 *buf, val;
 	long ret;
 
-	ret = emit_rpcs_query(obj, ctx, engine, &rq);
+	ret = emit_rpcs_query(obj, ce, &rq);
 	if (ret)
 		return ret;
 
 	if (spin)
 		igt_spinner_end(spin);
 
-	ret = i915_request_wait(rq, I915_WAIT_LOCKED, MAX_SCHEDULE_TIMEOUT);
+	ret = i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT);
 	i915_request_put(rq);
 	if (ret < 0)
 		return ret;
@@ -955,31 +946,29 @@ static int
 __sseu_finish(struct drm_i915_private *i915,
 	      const char *name,
 	      unsigned int flags,
-	      struct i915_gem_context *ctx,
-	      struct i915_gem_context *kctx,
-	      struct intel_engine_cs *engine,
+	      struct intel_context *ce,
 	      struct drm_i915_gem_object *obj,
 	      unsigned int expected,
 	      struct igt_spinner *spin)
 {
-	unsigned int slices =
-		hweight32(intel_device_default_sseu(i915).slice_mask);
+	unsigned int slices = hweight32(ce->engine->sseu.slice_mask);
 	u32 rpcs = 0;
 	int ret = 0;
 
 	if (flags & TEST_RESET) {
-		ret = i915_reset_engine(engine, "sseu");
+		ret = i915_reset_engine(ce->engine, "sseu");
 		if (ret)
 			goto out;
 	}
 
-	ret = __read_slice_count(i915, ctx, engine, obj,
+	ret = __read_slice_count(i915, ce, obj,
 				 flags & TEST_RESET ? NULL : spin, &rpcs);
 	ret = __check_rpcs(name, rpcs, ret, expected, "Context", "!");
 	if (ret)
 		goto out;
 
-	ret = __read_slice_count(i915, kctx, engine, obj, NULL, &rpcs);
+	ret = __read_slice_count(i915, ce->engine->kernel_context, obj,
+				 NULL, &rpcs);
 	ret = __check_rpcs(name, rpcs, ret, slices, "Kernel context", "!");
 
 out:
@@ -987,13 +976,11 @@ out:
 		igt_spinner_end(spin);
 
 	if ((flags & TEST_IDLE) && ret == 0) {
-		ret = i915_gem_wait_for_idle(i915,
-					     I915_WAIT_LOCKED,
-					     MAX_SCHEDULE_TIMEOUT);
+		ret = i915_gem_wait_for_idle(i915, 0, MAX_SCHEDULE_TIMEOUT);
 		if (ret)
 			return ret;
 
-		ret = __read_slice_count(i915, ctx, engine, obj, NULL, &rpcs);
+		ret = __read_slice_count(i915, ce, obj, NULL, &rpcs);
 		ret = __check_rpcs(name, rpcs, ret, expected,
 				   "Context", " after idle!");
 	}
@@ -1005,28 +992,22 @@ static int
 __sseu_test(struct drm_i915_private *i915,
 	    const char *name,
 	    unsigned int flags,
-	    struct i915_gem_context *ctx,
-	    struct intel_engine_cs *engine,
+	    struct intel_context *ce,
 	    struct drm_i915_gem_object *obj,
 	    struct intel_sseu sseu)
 {
 	struct igt_spinner *spin = NULL;
-	struct i915_gem_context *kctx;
 	int ret;
 
-	kctx = kernel_context(i915);
-	if (IS_ERR(kctx))
-		return PTR_ERR(kctx);
-
-	ret = __sseu_prepare(i915, name, flags, ctx, engine, &spin);
+	ret = __sseu_prepare(i915, name, flags, ce, &spin);
 	if (ret)
-		goto out_context;
+		return ret;
 
-	ret = __i915_gem_context_reconfigure_sseu(ctx, engine, sseu);
+	ret = __intel_context_reconfigure_sseu(ce, sseu);
 	if (ret)
 		goto out_spin;
 
-	ret = __sseu_finish(i915, name, flags, ctx, kctx, engine, obj,
+	ret = __sseu_finish(i915, name, flags, ce, obj,
 			    hweight32(sseu.slice_mask), spin);
 
 out_spin:
@@ -1035,10 +1016,6 @@ out_spin:
 		igt_spinner_fini(spin);
 		kfree(spin);
 	}
-
-out_context:
-	kernel_context_close(kctx);
-
 	return ret;
 }
 
@@ -1047,10 +1024,11 @@ __igt_ctx_sseu(struct drm_i915_private *i915,
 	       const char *name,
 	       unsigned int flags)
 {
-	struct intel_sseu default_sseu = intel_device_default_sseu(i915);
 	struct intel_engine_cs *engine = i915->engine[RCS0];
+	struct intel_sseu default_sseu = engine->sseu;
 	struct drm_i915_gem_object *obj;
 	struct i915_gem_context *ctx;
+	struct intel_context *ce;
 	struct intel_sseu pg_sseu;
 	intel_wakeref_t wakeref;
 	struct drm_file *file;
@@ -1100,25 +1078,35 @@ __igt_ctx_sseu(struct drm_i915_private *i915,
 		goto out_unlock;
 	}
 
-	wakeref = intel_runtime_pm_get(i915);
+	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
+
+	ce = i915_gem_context_get_engine(ctx, RCS0);
+	if (IS_ERR(ce)) {
+		ret = PTR_ERR(ce);
+		goto out_rpm;
+	}
+
+	ret = intel_context_pin(ce);
+	if (ret)
+		goto out_context;
 
 	/* First set the default mask. */
-	ret = __sseu_test(i915, name, flags, ctx, engine, obj, default_sseu);
+	ret = __sseu_test(i915, name, flags, ce, obj, default_sseu);
 	if (ret)
 		goto out_fail;
 
 	/* Then set a power-gated configuration. */
-	ret = __sseu_test(i915, name, flags, ctx, engine, obj, pg_sseu);
+	ret = __sseu_test(i915, name, flags, ce, obj, pg_sseu);
 	if (ret)
 		goto out_fail;
 
 	/* Back to defaults. */
-	ret = __sseu_test(i915, name, flags, ctx, engine, obj, default_sseu);
+	ret = __sseu_test(i915, name, flags, ce, obj, default_sseu);
 	if (ret)
 		goto out_fail;
 
 	/* One last power-gated configuration for the road. */
-	ret = __sseu_test(i915, name, flags, ctx, engine, obj, pg_sseu);
+	ret = __sseu_test(i915, name, flags, ce, obj, pg_sseu);
 	if (ret)
 		goto out_fail;
 
@@ -1126,10 +1114,13 @@ out_fail:
 	if (igt_flush_test(i915, I915_WAIT_LOCKED))
 		ret = -EIO;
 
+	intel_context_unpin(ce);
+out_context:
+	intel_context_put(ce);
+out_rpm:
+	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
 	i915_gem_object_put(obj);
 
-	intel_runtime_pm_put(i915, wakeref);
-
 out_unlock:
 	mutex_unlock(&i915->drm.struct_mutex);
 
@@ -1171,8 +1162,8 @@ static int igt_ctx_readonly(void *arg)
 {
 	struct drm_i915_private *i915 = arg;
 	struct drm_i915_gem_object *obj = NULL;
+	struct i915_address_space *vm;
 	struct i915_gem_context *ctx;
-	struct i915_hw_ppgtt *ppgtt;
 	unsigned long idx, ndwords, dw;
 	struct igt_live_test t;
 	struct drm_file *file;
@@ -1203,8 +1194,8 @@ static int igt_ctx_readonly(void *arg)
 		goto out_unlock;
 	}
 
-	ppgtt = ctx->ppgtt ?: i915->mm.aliasing_ppgtt;
-	if (!ppgtt || !ppgtt->vm.has_read_only) {
+	vm = ctx->vm ?: &i915->mm.aliasing_ppgtt->vm;
+	if (!vm || !vm->has_read_only) {
 		err = 0;
 		goto out_unlock;
 	}
@@ -1233,13 +1224,13 @@ static int igt_ctx_readonly(void *arg)
 			}
 
 			err = 0;
-			with_intel_runtime_pm(i915, wakeref)
+			with_intel_runtime_pm(&i915->runtime_pm, wakeref)
 				err = gpu_fill(obj, ctx, engine, dw);
 			if (err) {
 				pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n",
 				       ndwords, dw, max_dwords(obj),
 				       engine->name, ctx->hw_id,
-				       yesno(!!ctx->ppgtt), err);
+				       yesno(!!ctx->vm), err);
 				goto out_unlock;
 			}
 
@@ -1283,7 +1274,7 @@ out_unlock:
 static int check_scratch(struct i915_gem_context *ctx, u64 offset)
 {
 	struct drm_mm_node *node =
-		__drm_mm_interval_first(&ctx->ppgtt->vm.mm,
+		__drm_mm_interval_first(&ctx->vm->mm,
 					offset, offset + sizeof(u32) - 1);
 	if (!node || node->start > offset)
 		return 0;
@@ -1331,7 +1322,7 @@ static int write_to_scratch(struct i915_gem_context *ctx,
 	__i915_gem_object_flush_map(obj, 0, 64);
 	i915_gem_object_unpin_map(obj);
 
-	vma = i915_vma_instance(obj, &ctx->ppgtt->vm, NULL);
+	vma = i915_vma_instance(obj, ctx->vm, NULL);
 	if (IS_ERR(vma)) {
 		err = PTR_ERR(vma);
 		goto err;
@@ -1345,7 +1336,7 @@ static int write_to_scratch(struct i915_gem_context *ctx,
 	if (err)
 		goto err_unpin;
 
-	rq = i915_request_alloc(engine, ctx);
+	rq = igt_request_alloc(ctx, engine);
 	if (IS_ERR(rq)) {
 		err = PTR_ERR(rq);
 		goto err_unpin;
@@ -1355,13 +1346,15 @@ static int write_to_scratch(struct i915_gem_context *ctx,
 	if (err)
 		goto err_request;
 
+	i915_vma_lock(vma);
 	err = i915_vma_move_to_active(vma, rq, 0);
+	i915_vma_unlock(vma);
 	if (err)
 		goto skip_request;
 
-	i915_gem_object_set_active_reference(obj);
 	i915_vma_unpin(vma);
 	i915_vma_close(vma);
+	i915_vma_put(vma);
 
 	i915_request_add(rq);
 
@@ -1426,7 +1419,7 @@ static int read_from_scratch(struct i915_gem_context *ctx,
 	i915_gem_object_flush_map(obj);
 	i915_gem_object_unpin_map(obj);
 
-	vma = i915_vma_instance(obj, &ctx->ppgtt->vm, NULL);
+	vma = i915_vma_instance(obj, ctx->vm, NULL);
 	if (IS_ERR(vma)) {
 		err = PTR_ERR(vma);
 		goto err;
@@ -1440,7 +1433,7 @@ static int read_from_scratch(struct i915_gem_context *ctx,
 	if (err)
 		goto err_unpin;
 
-	rq = i915_request_alloc(engine, ctx);
+	rq = igt_request_alloc(ctx, engine);
 	if (IS_ERR(rq)) {
 		err = PTR_ERR(rq);
 		goto err_unpin;
@@ -1450,7 +1443,9 @@ static int read_from_scratch(struct i915_gem_context *ctx,
 	if (err)
 		goto err_request;
 
+	i915_vma_lock(vma);
 	err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
+	i915_vma_unlock(vma);
 	if (err)
 		goto skip_request;
 
@@ -1459,7 +1454,9 @@ static int read_from_scratch(struct i915_gem_context *ctx,
 
 	i915_request_add(rq);
 
+	i915_gem_object_lock(obj);
 	err = i915_gem_object_set_to_cpu_domain(obj, false);
+	i915_gem_object_unlock(obj);
 	if (err)
 		goto err;
 
@@ -1531,14 +1528,14 @@ static int igt_vm_isolation(void *arg)
 	}
 
 	/* We can only test vm isolation, if the vm are distinct */
-	if (ctx_a->ppgtt == ctx_b->ppgtt)
+	if (ctx_a->vm == ctx_b->vm)
 		goto out_unlock;
 
-	vm_total = ctx_a->ppgtt->vm.total;
-	GEM_BUG_ON(ctx_b->ppgtt->vm.total != vm_total);
+	vm_total = ctx_a->vm->total;
+	GEM_BUG_ON(ctx_b->vm->total != vm_total);
 	vm_total -= I915_GTT_PAGE_SIZE;
 
-	wakeref = intel_runtime_pm_get(i915);
+	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
 
 	count = 0;
 	for_each_engine(engine, i915, id) {
@@ -1583,7 +1580,7 @@ static int igt_vm_isolation(void *arg)
 		count, RUNTIME_INFO(i915)->num_engines);
 
 out_rpm:
-	intel_runtime_pm_put(i915, wakeref);
+	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
 out_unlock:
 	if (igt_live_test_end(&t))
 		err = -EIO;
@@ -1608,111 +1605,9 @@ __engine_name(struct drm_i915_private *i915, intel_engine_mask_t engines)
 	return "none";
 }
 
-static int __igt_switch_to_kernel_context(struct drm_i915_private *i915,
-					  struct i915_gem_context *ctx,
-					  intel_engine_mask_t engines)
+static bool skip_unused_engines(struct intel_context *ce, void *data)
 {
-	struct intel_engine_cs *engine;
-	intel_engine_mask_t tmp;
-	int pass;
-
-	GEM_TRACE("Testing %s\n", __engine_name(i915, engines));
-	for (pass = 0; pass < 4; pass++) { /* Once busy; once idle; repeat */
-		bool from_idle = pass & 1;
-		int err;
-
-		if (!from_idle) {
-			for_each_engine_masked(engine, i915, engines, tmp) {
-				struct i915_request *rq;
-
-				rq = i915_request_alloc(engine, ctx);
-				if (IS_ERR(rq))
-					return PTR_ERR(rq);
-
-				i915_request_add(rq);
-			}
-		}
-
-		err = i915_gem_switch_to_kernel_context(i915,
-							i915->gt.active_engines);
-		if (err)
-			return err;
-
-		if (!from_idle) {
-			err = i915_gem_wait_for_idle(i915,
-						     I915_WAIT_LOCKED,
-						     MAX_SCHEDULE_TIMEOUT);
-			if (err)
-				return err;
-		}
-
-		if (i915->gt.active_requests) {
-			pr_err("%d active requests remain after switching to kernel context, pass %d (%s) on %s engine%s\n",
-			       i915->gt.active_requests,
-			       pass, from_idle ? "idle" : "busy",
-			       __engine_name(i915, engines),
-			       is_power_of_2(engines) ? "" : "s");
-			return -EINVAL;
-		}
-
-		/* XXX Bonus points for proving we are the kernel context! */
-
-		mutex_unlock(&i915->drm.struct_mutex);
-		drain_delayed_work(&i915->gt.idle_work);
-		mutex_lock(&i915->drm.struct_mutex);
-	}
-
-	if (igt_flush_test(i915, I915_WAIT_LOCKED))
-		return -EIO;
-
-	return 0;
-}
-
-static int igt_switch_to_kernel_context(void *arg)
-{
-	struct drm_i915_private *i915 = arg;
-	struct intel_engine_cs *engine;
-	struct i915_gem_context *ctx;
-	enum intel_engine_id id;
-	intel_wakeref_t wakeref;
-	int err;
-
-	/*
-	 * A core premise of switching to the kernel context is that
-	 * if an engine is already idling in the kernel context, we
-	 * do not emit another request and wake it up. The other being
-	 * that we do indeed end up idling in the kernel context.
-	 */
-
-	mutex_lock(&i915->drm.struct_mutex);
-	wakeref = intel_runtime_pm_get(i915);
-
-	ctx = kernel_context(i915);
-	if (IS_ERR(ctx)) {
-		mutex_unlock(&i915->drm.struct_mutex);
-		return PTR_ERR(ctx);
-	}
-
-	/* First check idling each individual engine */
-	for_each_engine(engine, i915, id) {
-		err = __igt_switch_to_kernel_context(i915, ctx, BIT(id));
-		if (err)
-			goto out_unlock;
-	}
-
-	/* Now en masse */
-	err = __igt_switch_to_kernel_context(i915, ctx, ALL_ENGINES);
-	if (err)
-		goto out_unlock;
-
-out_unlock:
-	GEM_TRACE_DUMP_ON(err);
-
-	intel_runtime_pm_put(i915, wakeref);
-	mutex_unlock(&i915->drm.struct_mutex);
-
-	kernel_context_close(ctx);
-	return err;
+	return !ce->state;
 }
 
 static void mock_barrier_task(void *data)
@@ -1729,7 +1624,6 @@ static int mock_context_barrier(void *arg)
 	struct drm_i915_private *i915 = arg;
 	struct i915_gem_context *ctx;
 	struct i915_request *rq;
-	intel_wakeref_t wakeref;
 	unsigned int counter;
 	int err;
 
@@ -1748,7 +1642,7 @@ static int mock_context_barrier(void *arg)
 
 	counter = 0;
 	err = context_barrier_task(ctx, 0,
-				   NULL, mock_barrier_task, &counter);
+				   NULL, NULL, mock_barrier_task, &counter);
 	if (err) {
 		pr_err("Failed at line %d, err=%d\n", __LINE__, err);
 		goto out;
@@ -1761,31 +1655,31 @@ static int mock_context_barrier(void *arg)
 
 	counter = 0;
 	err = context_barrier_task(ctx, ALL_ENGINES,
-				   NULL, mock_barrier_task, &counter);
+				   skip_unused_engines,
+				   NULL,
+				   mock_barrier_task,
+				   &counter);
 	if (err) {
 		pr_err("Failed at line %d, err=%d\n", __LINE__, err);
 		goto out;
 	}
 	if (counter == 0) {
-		pr_err("Did not retire immediately for all inactive engines\n");
+		pr_err("Did not retire immediately for all unused engines\n");
 		err = -EINVAL;
 		goto out;
 	}
 
-	rq = ERR_PTR(-ENODEV);
-	with_intel_runtime_pm(i915, wakeref)
-		rq = i915_request_alloc(i915->engine[RCS0], ctx);
+	rq = igt_request_alloc(ctx, i915->engine[RCS0]);
 	if (IS_ERR(rq)) {
 		pr_err("Request allocation failed!\n");
 		goto out;
 	}
 	i915_request_add(rq);
-	GEM_BUG_ON(list_empty(&ctx->active_engines));
 
 	counter = 0;
 	context_barrier_inject_fault = BIT(RCS0);
 	err = context_barrier_task(ctx, ALL_ENGINES,
-				   NULL, mock_barrier_task, &counter);
+				   NULL, NULL, mock_barrier_task, &counter);
 	context_barrier_inject_fault = 0;
 	if (err == -ENXIO)
 		err = 0;
@@ -1800,7 +1694,10 @@ static int mock_context_barrier(void *arg)
 
 	counter = 0;
 	err = context_barrier_task(ctx, ALL_ENGINES,
-				   NULL, mock_barrier_task, &counter);
+				   skip_unused_engines,
+				   NULL,
+				   mock_barrier_task,
+				   &counter);
 	if (err) {
 		pr_err("Failed at line %d, err=%d\n", __LINE__, err);
 		goto out;
@@ -1824,7 +1721,6 @@ unlock:
 int i915_gem_context_mock_selftests(void)
 {
 	static const struct i915_subtest tests[] = {
-		SUBTEST(igt_switch_to_kernel_context),
 		SUBTEST(mock_context_barrier),
 	};
 	struct drm_i915_private *i915;
@@ -1843,7 +1739,6 @@ int i915_gem_context_mock_selftests(void)
 int i915_gem_context_live_selftests(struct drm_i915_private *dev_priv)
 {
 	static const struct i915_subtest tests[] = {
-		SUBTEST(igt_switch_to_kernel_context),
 		SUBTEST(live_nop_switch),
 		SUBTEST(igt_ctx_exec),
 		SUBTEST(igt_ctx_readonly),
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c
index 2b943ee246c9..e3a64edef918 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c
@@ -1,31 +1,14 @@
 /*
- * Copyright © 2016 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
+ * SPDX-License-Identifier: MIT
  *
+ * Copyright © 2016 Intel Corporation
  */
 
-#include "../i915_selftest.h"
+#include "i915_drv.h"
+#include "i915_selftest.h"
 
-#include "mock_gem_device.h"
 #include "mock_dmabuf.h"
+#include "selftests/mock_gem_device.h"
 
 static int igt_dmabuf_export(void *arg)
 {
@@ -33,7 +16,7 @@ static int igt_dmabuf_export(void *arg)
 	struct drm_i915_gem_object *obj;
 	struct dma_buf *dmabuf;
 
-	obj = i915_gem_object_create(i915, PAGE_SIZE);
+	obj = i915_gem_object_create_shmem(i915, PAGE_SIZE);
 	if (IS_ERR(obj))
 		return PTR_ERR(obj);
 
@@ -57,7 +40,7 @@ static int igt_dmabuf_import_self(void *arg)
 	struct dma_buf *dmabuf;
 	int err;
 
-	obj = i915_gem_object_create(i915, PAGE_SIZE);
+	obj = i915_gem_object_create_shmem(i915, PAGE_SIZE);
 	if (IS_ERR(obj))
 		return PTR_ERR(obj);
 
@@ -232,7 +215,7 @@ static int igt_dmabuf_export_vmap(void *arg)
 	void *ptr;
 	int err;
 
-	obj = i915_gem_object_create(i915, PAGE_SIZE);
+	obj = i915_gem_object_create_shmem(i915, PAGE_SIZE);
 	if (IS_ERR(obj))
 		return PTR_ERR(obj);
 
@@ -279,7 +262,7 @@ static int igt_dmabuf_export_kmap(void *arg)
 	void *ptr;
 	int err;
 
-	obj = i915_gem_object_create(i915, 2*PAGE_SIZE);
+	obj = i915_gem_object_create_shmem(i915, 2 * PAGE_SIZE);
 	if (IS_ERR(obj))
 		return PTR_ERR(obj);
 
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_object.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
index 971148fbe6f5..5c81f4b4813a 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
@@ -1,145 +1,15 @@
 /*
- * Copyright © 2016 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
+ * SPDX-License-Identifier: MIT
  *
+ * Copyright © 2016 Intel Corporation
  */
 
-#include "../i915_selftest.h"
+#include <linux/prime_numbers.h>
 
-#include "mock_gem_device.h"
+#include "gt/intel_gt_pm.h"
 #include "huge_gem_object.h"
-
-static int igt_gem_object(void *arg)
-{
-	struct drm_i915_private *i915 = arg;
-	struct drm_i915_gem_object *obj;
-	int err = -ENOMEM;
-
-	/* Basic test to ensure we can create an object */
-
-	obj = i915_gem_object_create(i915, PAGE_SIZE);
-	if (IS_ERR(obj)) {
-		err = PTR_ERR(obj);
-		pr_err("i915_gem_object_create failed, err=%d\n", err);
-		goto out;
-	}
-
-	err = 0;
-	i915_gem_object_put(obj);
-out:
-	return err;
-}
-
-static int igt_phys_object(void *arg)
-{
-	struct drm_i915_private *i915 = arg;
-	struct drm_i915_gem_object *obj;
-	int err;
-
-	/* Create an object and bind it to a contiguous set of physical pages,
-	 * i.e. exercise the i915_gem_object_phys API.
-	 */
-
-	obj = i915_gem_object_create(i915, PAGE_SIZE);
-	if (IS_ERR(obj)) {
-		err = PTR_ERR(obj);
-		pr_err("i915_gem_object_create failed, err=%d\n", err);
-		goto out;
-	}
-
-	mutex_lock(&i915->drm.struct_mutex);
-	err = i915_gem_object_attach_phys(obj, PAGE_SIZE);
-	mutex_unlock(&i915->drm.struct_mutex);
-	if (err) {
-		pr_err("i915_gem_object_attach_phys failed, err=%d\n", err);
-		goto out_obj;
-	}
-
-	if (obj->ops != &i915_gem_phys_ops) {
-		pr_err("i915_gem_object_attach_phys did not create a phys object\n");
-		err = -EINVAL;
-		goto out_obj;
-	}
-
-	if (!atomic_read(&obj->mm.pages_pin_count)) {
-		pr_err("i915_gem_object_attach_phys did not pin its phys pages\n");
-		err = -EINVAL;
-		goto out_obj;
-	}
-
-	/* Make the object dirty so that put_pages must do copy back the data */
-	mutex_lock(&i915->drm.struct_mutex);
-	err = i915_gem_object_set_to_gtt_domain(obj, true);
-	mutex_unlock(&i915->drm.struct_mutex);
-	if (err) {
-		pr_err("i915_gem_object_set_to_gtt_domain failed with err=%d\n",
-		       err);
-		goto out_obj;
-	}
-
-out_obj:
-	i915_gem_object_put(obj);
-out:
-	return err;
-}
-
-static int igt_gem_huge(void *arg)
-{
-	const unsigned int nreal = 509; /* just to be awkward */
-	struct drm_i915_private *i915 = arg;
-	struct drm_i915_gem_object *obj;
-	unsigned int n;
-	int err;
-
-	/* Basic sanitycheck of our huge fake object allocation */
-
-	obj = huge_gem_object(i915,
-			      nreal * PAGE_SIZE,
-			      i915->ggtt.vm.total + PAGE_SIZE);
-	if (IS_ERR(obj))
-		return PTR_ERR(obj);
-
-	err = i915_gem_object_pin_pages(obj);
-	if (err) {
-		pr_err("Failed to allocate %u pages (%lu total), err=%d\n",
-		       nreal, obj->base.size / PAGE_SIZE, err);
-		goto out;
-	}
-
-	for (n = 0; n < obj->base.size / PAGE_SIZE; n++) {
-		if (i915_gem_object_get_page(obj, n) !=
-		    i915_gem_object_get_page(obj, n % nreal)) {
-			pr_err("Page lookup mismatch at index %u [%u]\n",
-			       n, n % nreal);
-			err = -EINVAL;
-			goto out_unpin;
-		}
-	}
-
-out_unpin:
-	i915_gem_object_unpin_pages(obj);
-out:
-	i915_gem_object_put(obj);
-	return err;
-}
+#include "i915_selftest.h"
+#include "selftests/igt_flush_test.h"
 
 struct tile {
 	unsigned int width;
@@ -228,6 +98,14 @@ static int check_partial_mapping(struct drm_i915_gem_object *obj,
 	GEM_BUG_ON(i915_gem_object_get_tiling(obj) != tile->tiling);
 	GEM_BUG_ON(i915_gem_object_get_stride(obj) != tile->stride);
 
+	i915_gem_object_lock(obj);
+	err = i915_gem_object_set_to_gtt_domain(obj, true);
+	i915_gem_object_unlock(obj);
+	if (err) {
+		pr_err("Failed to flush to GTT write domain; err=%d\n", err);
+		return err;
+	}
+
 	for_each_prime_number_from(page, 1, npages) {
 		struct i915_ggtt_view view =
 			compute_partial_view(obj, page, MIN_CHUNK_PAGES);
@@ -240,13 +118,6 @@ static int check_partial_mapping(struct drm_i915_gem_object *obj,
 		GEM_BUG_ON(view.partial.size > nreal);
 		cond_resched();
 
-		err = i915_gem_object_set_to_gtt_domain(obj, true);
-		if (err) {
-			pr_err("Failed to flush to GTT write domain; err=%d\n",
-			       err);
-			return err;
-		}
-
 		vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, PIN_MAPPABLE);
 		if (IS_ERR(vma)) {
 			pr_err("Failed to pin partial view: offset=%lu; err=%d\n",
@@ -265,14 +136,14 @@ static int check_partial_mapping(struct drm_i915_gem_object *obj,
 			return PTR_ERR(io);
 		}
 
-		iowrite32(page, io + n * PAGE_SIZE/sizeof(*io));
+		iowrite32(page, io + n * PAGE_SIZE / sizeof(*io));
 		i915_vma_unpin_iomap(vma);
 
 		offset = tiled_offset(tile, page << PAGE_SHIFT);
 		if (offset >= obj->base.size)
 			continue;
 
-		flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
+		i915_gem_flush_ggtt_writes(to_i915(obj->base.dev));
 
 		p = i915_gem_object_get_page(obj, offset >> PAGE_SHIFT);
 		cpu = kmap(p) + offset_in_page(offset);
@@ -334,7 +205,7 @@ static int igt_partial_tiling(void *arg)
 	}
 
 	mutex_lock(&i915->drm.struct_mutex);
-	wakeref = intel_runtime_pm_get(i915);
+	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
 
 	if (1) {
 		IGT_TIMEOUT(end);
@@ -445,7 +316,7 @@ next_tiling: ;
 	}
 
 out_unlock:
-	intel_runtime_pm_put(i915, wakeref);
+	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
 	mutex_unlock(&i915->drm.struct_mutex);
 	i915_gem_object_unpin_pages(obj);
 out:
@@ -468,18 +339,20 @@ static int make_obj_busy(struct drm_i915_gem_object *obj)
 	if (err)
 		return err;
 
-	rq = i915_request_alloc(i915->engine[RCS0], i915->kernel_context);
+	rq = i915_request_create(i915->engine[RCS0]->kernel_context);
 	if (IS_ERR(rq)) {
 		i915_vma_unpin(vma);
 		return PTR_ERR(rq);
 	}
 
+	i915_vma_lock(vma);
 	err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
+	i915_vma_unlock(vma);
 
 	i915_request_add(rq);
 
-	__i915_gem_object_release_unless_active(obj);
 	i915_vma_unpin(vma);
+	i915_gem_object_put(obj); /* leave it only alive via its active ref */
 
 	return err;
 }
@@ -495,7 +368,7 @@ static bool assert_mmap_offset(struct drm_i915_private *i915,
 	if (IS_ERR(obj))
 		return PTR_ERR(obj);
 
-	err = i915_gem_object_create_mmap_offset(obj);
+	err = create_mmap_offset(obj);
 	i915_gem_object_put(obj);
 
 	return err == expected;
@@ -505,17 +378,21 @@ static void disable_retire_worker(struct drm_i915_private *i915)
 {
 	i915_gem_shrinker_unregister(i915);
 
-	mutex_lock(&i915->drm.struct_mutex);
-	if (!i915->gt.active_requests++) {
-		intel_wakeref_t wakeref;
+	intel_gt_pm_get(i915);
 
-		with_intel_runtime_pm(i915, wakeref)
-			i915_gem_unpark(i915);
-	}
+	cancel_delayed_work_sync(&i915->gem.retire_work);
+	flush_work(&i915->gem.idle_work);
+}
+
+static void restore_retire_worker(struct drm_i915_private *i915)
+{
+	intel_gt_pm_put(i915);
+
+	mutex_lock(&i915->drm.struct_mutex);
+	igt_flush_test(i915, I915_WAIT_LOCKED);
 	mutex_unlock(&i915->drm.struct_mutex);
 
-	cancel_delayed_work_sync(&i915->gt.retire_work);
-	cancel_delayed_work_sync(&i915->gt.idle_work);
+	i915_gem_shrinker_register(i915);
 }
 
 static int igt_mmap_offset_exhaustion(void *arg)
@@ -552,7 +429,7 @@ static int igt_mmap_offset_exhaustion(void *arg)
 	}
 
 	/* Too large */
-	if (!assert_mmap_offset(i915, 2*PAGE_SIZE, -ENOSPC)) {
+	if (!assert_mmap_offset(i915, 2 * PAGE_SIZE, -ENOSPC)) {
 		pr_err("Unexpectedly succeeded in inserting too large object into single page hole\n");
 		err = -EINVAL;
 		goto out;
@@ -565,7 +442,7 @@ static int igt_mmap_offset_exhaustion(void *arg)
 		goto out;
 	}
 
-	err = i915_gem_object_create_mmap_offset(obj);
+	err = create_mmap_offset(obj);
 	if (err) {
 		pr_err("Unable to insert object into reclaimed hole\n");
 		goto err_obj;
@@ -581,8 +458,6 @@ static int igt_mmap_offset_exhaustion(void *arg)
 
 	/* Now fill with busy dead objects that we expect to reap */
 	for (loop = 0; loop < 3; loop++) {
-		intel_wakeref_t wakeref;
-
 		if (i915_terminally_wedged(i915))
 			break;
 
@@ -592,10 +467,8 @@ static int igt_mmap_offset_exhaustion(void *arg)
 			goto out;
 		}
 
-		err = 0;
 		mutex_lock(&i915->drm.struct_mutex);
-		with_intel_runtime_pm(i915, wakeref)
-			err = make_obj_busy(obj);
+		err = make_obj_busy(obj);
 		mutex_unlock(&i915->drm.struct_mutex);
 		if (err) {
 			pr_err("[loop %d] Failed to busy the object\n", loop);
@@ -604,9 +477,9 @@ static int igt_mmap_offset_exhaustion(void *arg)
 
 		/* NB we rely on the _active_ reference to access obj now */
 		GEM_BUG_ON(!i915_gem_object_is_active(obj));
-		err = i915_gem_object_create_mmap_offset(obj);
+		err = create_mmap_offset(obj);
 		if (err) {
-			pr_err("[loop %d] i915_gem_object_create_mmap_offset failed with err=%d\n",
+			pr_err("[loop %d] create_mmap_offset failed with err=%d\n",
 			       loop, err);
 			goto out;
 		}
@@ -615,42 +488,16 @@ static int igt_mmap_offset_exhaustion(void *arg)
 out:
 	drm_mm_remove_node(&resv);
 out_park:
-	mutex_lock(&i915->drm.struct_mutex);
-	if (--i915->gt.active_requests)
-		queue_delayed_work(i915->wq, &i915->gt.retire_work, 0);
-	else
-		queue_delayed_work(i915->wq, &i915->gt.idle_work, 0);
-	mutex_unlock(&i915->drm.struct_mutex);
-	i915_gem_shrinker_register(i915);
+	restore_retire_worker(i915);
 	return err;
 err_obj:
 	i915_gem_object_put(obj);
 	goto out;
 }
 
-int i915_gem_object_mock_selftests(void)
-{
-	static const struct i915_subtest tests[] = {
-		SUBTEST(igt_gem_object),
-		SUBTEST(igt_phys_object),
-	};
-	struct drm_i915_private *i915;
-	int err;
-
-	i915 = mock_gem_device();
-	if (!i915)
-		return -ENOMEM;
-
-	err = i915_subtests(tests, i915);
-
-	drm_dev_put(&i915->drm);
-	return err;
-}
-
-int i915_gem_object_live_selftests(struct drm_i915_private *i915)
+int i915_gem_mman_live_selftests(struct drm_i915_private *i915)
 {
 	static const struct i915_subtest tests[] = {
-		SUBTEST(igt_gem_huge),
 		SUBTEST(igt_partial_tiling),
 		SUBTEST(igt_mmap_offset_exhaustion),
 	};
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_object.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_object.c
new file mode 100644
index 000000000000..2b6db6f799de
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_object.c
@@ -0,0 +1,99 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2016 Intel Corporation
+ */
+
+#include "i915_selftest.h"
+
+#include "huge_gem_object.h"
+#include "selftests/igt_flush_test.h"
+#include "selftests/mock_gem_device.h"
+
+static int igt_gem_object(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	struct drm_i915_gem_object *obj;
+	int err = -ENOMEM;
+
+	/* Basic test to ensure we can create an object */
+
+	obj = i915_gem_object_create_shmem(i915, PAGE_SIZE);
+	if (IS_ERR(obj)) {
+		err = PTR_ERR(obj);
+		pr_err("i915_gem_object_create failed, err=%d\n", err);
+		goto out;
+	}
+
+	err = 0;
+	i915_gem_object_put(obj);
+out:
+	return err;
+}
+
+static int igt_gem_huge(void *arg)
+{
+	const unsigned int nreal = 509; /* just to be awkward */
+	struct drm_i915_private *i915 = arg;
+	struct drm_i915_gem_object *obj;
+	unsigned int n;
+	int err;
+
+	/* Basic sanitycheck of our huge fake object allocation */
+
+	obj = huge_gem_object(i915,
+			      nreal * PAGE_SIZE,
+			      i915->ggtt.vm.total + PAGE_SIZE);
+	if (IS_ERR(obj))
+		return PTR_ERR(obj);
+
+	err = i915_gem_object_pin_pages(obj);
+	if (err) {
+		pr_err("Failed to allocate %u pages (%lu total), err=%d\n",
+		       nreal, obj->base.size / PAGE_SIZE, err);
+		goto out;
+	}
+
+	for (n = 0; n < obj->base.size / PAGE_SIZE; n++) {
+		if (i915_gem_object_get_page(obj, n) !=
+		    i915_gem_object_get_page(obj, n % nreal)) {
+			pr_err("Page lookup mismatch at index %u [%u]\n",
+			       n, n % nreal);
+			err = -EINVAL;
+			goto out_unpin;
+		}
+	}
+
+out_unpin:
+	i915_gem_object_unpin_pages(obj);
+out:
+	i915_gem_object_put(obj);
+	return err;
+}
+
+int i915_gem_object_mock_selftests(void)
+{
+	static const struct i915_subtest tests[] = {
+		SUBTEST(igt_gem_object),
+	};
+	struct drm_i915_private *i915;
+	int err;
+
+	i915 = mock_gem_device();
+	if (!i915)
+		return -ENOMEM;
+
+	err = i915_subtests(tests, i915);
+
+	drm_dev_put(&i915->drm);
+	return err;
+}
+
+int i915_gem_object_live_selftests(struct drm_i915_private *i915)
+{
+	static const struct i915_subtest tests[] = {
+		SUBTEST(igt_gem_huge),
+	};
+
+	return i915_subtests(tests, i915);
+}
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c
new file mode 100644
index 000000000000..e23d8c9e9298
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c
@@ -0,0 +1,110 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include "i915_selftest.h"
+
+#include "selftests/igt_flush_test.h"
+#include "selftests/mock_drm.h"
+#include "mock_context.h"
+
+static int igt_fill_blt(void *arg)
+{
+	struct intel_context *ce = arg;
+	struct drm_i915_private *i915 = ce->gem_context->i915;
+	struct drm_i915_gem_object *obj;
+	struct rnd_state prng;
+	IGT_TIMEOUT(end);
+	u32 *vaddr;
+	int err = 0;
+
+	prandom_seed_state(&prng, i915_selftest.random_seed);
+
+	do {
+		u32 sz = prandom_u32_state(&prng) % SZ_32M;
+		u32 val = prandom_u32_state(&prng);
+		u32 i;
+
+		sz = round_up(sz, PAGE_SIZE);
+
+		pr_debug("%s with sz=%x, val=%x\n", __func__, sz, val);
+
+		obj = i915_gem_object_create_internal(i915, sz);
+		if (IS_ERR(obj)) {
+			err = PTR_ERR(obj);
+			goto err_flush;
+		}
+
+		vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB);
+		if (IS_ERR(vaddr)) {
+			err = PTR_ERR(vaddr);
+			goto err_put;
+		}
+
+		/*
+		 * Make sure the potentially async clflush does its job, if
+		 * required.
+		 */
+		memset32(vaddr, val ^ 0xdeadbeaf, obj->base.size / sizeof(u32));
+
+		if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE))
+			obj->cache_dirty = true;
+
+		mutex_lock(&i915->drm.struct_mutex);
+		err = i915_gem_object_fill_blt(obj, ce, val);
+		mutex_unlock(&i915->drm.struct_mutex);
+		if (err)
+			goto err_unpin;
+
+		i915_gem_object_lock(obj);
+		err = i915_gem_object_set_to_cpu_domain(obj, false);
+		i915_gem_object_unlock(obj);
+		if (err)
+			goto err_unpin;
+
+		for (i = 0; i < obj->base.size / sizeof(u32); ++i) {
+			if (vaddr[i] != val) {
+				pr_err("vaddr[%u]=%x, expected=%x\n", i,
+				       vaddr[i], val);
+				err = -EINVAL;
+				goto err_unpin;
+			}
+		}
+
+		i915_gem_object_unpin_map(obj);
+		i915_gem_object_put(obj);
+	} while (!time_after(jiffies, end));
+
+	goto err_flush;
+
+err_unpin:
+	i915_gem_object_unpin_map(obj);
+err_put:
+	i915_gem_object_put(obj);
+err_flush:
+	mutex_lock(&i915->drm.struct_mutex);
+	if (igt_flush_test(i915, I915_WAIT_LOCKED))
+		err = -EIO;
+	mutex_unlock(&i915->drm.struct_mutex);
+
+	if (err == -ENOMEM)
+		err = 0;
+
+	return err;
+}
+
+int i915_gem_object_blt_live_selftests(struct drm_i915_private *i915)
+{
+	static const struct i915_subtest tests[] = {
+		SUBTEST(igt_fill_blt),
+	};
+
+	if (i915_terminally_wedged(i915))
+		return 0;
+
+	if (!HAS_ENGINE(i915, BCS0))
+		return 0;
+
+	return i915_subtests(tests, i915->engine[BCS0]->kernel_context);
+}
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c
new file mode 100644
index 000000000000..94a15e3f6db8
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c
@@ -0,0 +1,80 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2016 Intel Corporation
+ */
+
+#include "i915_selftest.h"
+
+#include "selftests/mock_gem_device.h"
+
+static int mock_phys_object(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	struct drm_i915_gem_object *obj;
+	int err;
+
+	/* Create an object and bind it to a contiguous set of physical pages,
+	 * i.e. exercise the i915_gem_object_phys API.
+	 */
+
+	obj = i915_gem_object_create_shmem(i915, PAGE_SIZE);
+	if (IS_ERR(obj)) {
+		err = PTR_ERR(obj);
+		pr_err("i915_gem_object_create failed, err=%d\n", err);
+		goto out;
+	}
+
+	mutex_lock(&i915->drm.struct_mutex);
+	err = i915_gem_object_attach_phys(obj, PAGE_SIZE);
+	mutex_unlock(&i915->drm.struct_mutex);
+	if (err) {
+		pr_err("i915_gem_object_attach_phys failed, err=%d\n", err);
+		goto out_obj;
+	}
+
+	if (obj->ops != &i915_gem_phys_ops) {
+		pr_err("i915_gem_object_attach_phys did not create a phys object\n");
+		err = -EINVAL;
+		goto out_obj;
+	}
+
+	if (!atomic_read(&obj->mm.pages_pin_count)) {
+		pr_err("i915_gem_object_attach_phys did not pin its phys pages\n");
+		err = -EINVAL;
+		goto out_obj;
+	}
+
+	/* Make the object dirty so that put_pages must do copy back the data */
+	i915_gem_object_lock(obj);
+	err = i915_gem_object_set_to_gtt_domain(obj, true);
+	i915_gem_object_unlock(obj);
+	if (err) {
+		pr_err("i915_gem_object_set_to_gtt_domain failed with err=%d\n",
+		       err);
+		goto out_obj;
+	}
+
+out_obj:
+	i915_gem_object_put(obj);
+out:
+	return err;
+}
+
+int i915_gem_phys_mock_selftests(void)
+{
+	static const struct i915_subtest tests[] = {
+		SUBTEST(mock_phys_object),
+	};
+	struct drm_i915_private *i915;
+	int err;
+
+	i915 = mock_gem_device();
+	if (!i915)
+		return -ENOMEM;
+
+	err = i915_subtests(tests, i915);
+
+	drm_dev_put(&i915->drm);
+	return err;
+}
diff --git a/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c b/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c
new file mode 100644
index 000000000000..b232e6d2cd92
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c
@@ -0,0 +1,34 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2018 Intel Corporation
+ */
+
+#include "igt_gem_utils.h"
+
+#include "gem/i915_gem_context.h"
+#include "gem/i915_gem_pm.h"
+#include "gt/intel_context.h"
+
+#include "i915_request.h"
+
+struct i915_request *
+igt_request_alloc(struct i915_gem_context *ctx, struct intel_engine_cs *engine)
+{
+	struct intel_context *ce;
+	struct i915_request *rq;
+
+	/*
+	 * Pinning the contexts may generate requests in order to acquire
+	 * GGTT space, so do this first before we reserve a seqno for
+	 * ourselves.
+	 */
+	ce = i915_gem_context_get_engine(ctx, engine->id);
+	if (IS_ERR(ce))
+		return ERR_CAST(ce);
+
+	rq = intel_context_create_request(ce);
+	intel_context_put(ce);
+
+	return rq;
+}
diff --git a/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.h b/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.h
new file mode 100644
index 000000000000..0f17251cf75d
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.h
@@ -0,0 +1,17 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2018 Intel Corporation
+ */
+
+#ifndef __IGT_GEM_UTILS_H__
+#define __IGT_GEM_UTILS_H__
+
+struct i915_request;
+struct i915_gem_context;
+struct intel_engine_cs;
+
+struct i915_request *
+igt_request_alloc(struct i915_gem_context *ctx, struct intel_engine_cs *engine);
+
+#endif /* __IGT_GEM_UTILS_H__ */
diff --git a/drivers/gpu/drm/i915/selftests/mock_context.c b/drivers/gpu/drm/i915/gem/selftests/mock_context.c
index 0426093bf1d9..be8974ccff24 100644
--- a/drivers/gpu/drm/i915/selftests/mock_context.c
+++ b/drivers/gpu/drm/i915/gem/selftests/mock_context.c
@@ -1,35 +1,18 @@
 /*
- * Copyright © 2016 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
+ * SPDX-License-Identifier: MIT
  *
+ * Copyright © 2016 Intel Corporation
  */
 
 #include "mock_context.h"
-#include "mock_gtt.h"
+#include "selftests/mock_gtt.h"
 
 struct i915_gem_context *
 mock_context(struct drm_i915_private *i915,
 	     const char *name)
 {
 	struct i915_gem_context *ctx;
+	struct i915_gem_engines *e;
 	int ret;
 
 	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
@@ -40,21 +23,22 @@ mock_context(struct drm_i915_private *i915,
 	INIT_LIST_HEAD(&ctx->link);
 	ctx->i915 = i915;
 
-	ctx->hw_contexts = RB_ROOT;
-	spin_lock_init(&ctx->hw_contexts_lock);
+	mutex_init(&ctx->engines_mutex);
+	e = default_engines(ctx);
+	if (IS_ERR(e))
+		goto err_free;
+	RCU_INIT_POINTER(ctx->engines, e);
 
 	INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL);
-	INIT_LIST_HEAD(&ctx->handles_list);
 	INIT_LIST_HEAD(&ctx->hw_id_link);
-	INIT_LIST_HEAD(&ctx->active_engines);
 	mutex_init(&ctx->mutex);
 
 	ret = i915_gem_context_pin_hw_id(ctx);
 	if (ret < 0)
-		goto err_handles;
+		goto err_engines;
 
 	if (name) {
-		struct i915_hw_ppgtt *ppgtt;
+		struct i915_ppgtt *ppgtt;
 
 		ctx->name = kstrdup(name, GFP_KERNEL);
 		if (!ctx->name)
@@ -64,12 +48,15 @@ mock_context(struct drm_i915_private *i915,
 		if (!ppgtt)
 			goto err_put;
 
-		__set_ppgtt(ctx, ppgtt);
+		__set_ppgtt(ctx, &ppgtt->vm);
+		i915_vm_put(&ppgtt->vm);
 	}
 
 	return ctx;
 
-err_handles:
+err_engines:
+	free_engines(rcu_access_pointer(ctx->engines));
+err_free:
 	kfree(ctx);
 	return NULL;
 
diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_context.h b/drivers/gpu/drm/i915/gem/selftests/mock_context.h
new file mode 100644
index 000000000000..0b926653914f
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/selftests/mock_context.h
@@ -0,0 +1,24 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2016 Intel Corporation
+ */
+
+#ifndef __MOCK_CONTEXT_H
+#define __MOCK_CONTEXT_H
+
+void mock_init_contexts(struct drm_i915_private *i915);
+
+struct i915_gem_context *
+mock_context(struct drm_i915_private *i915,
+	     const char *name);
+
+void mock_context_close(struct i915_gem_context *ctx);
+
+struct i915_gem_context *
+live_context(struct drm_i915_private *i915, struct drm_file *file);
+
+struct i915_gem_context *kernel_context(struct drm_i915_private *i915);
+void kernel_context_close(struct i915_gem_context *ctx);
+
+#endif /* !__MOCK_CONTEXT_H */
diff --git a/drivers/gpu/drm/i915/selftests/mock_dmabuf.c b/drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.c
index ca682caf1062..b9e059d4328a 100644
--- a/drivers/gpu/drm/i915/selftests/mock_dmabuf.c
+++ b/drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.c
@@ -1,25 +1,7 @@
 /*
- * Copyright © 2016 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
+ * SPDX-License-Identifier: MIT
  *
+ * Copyright © 2016 Intel Corporation
  */
 
 #include "mock_dmabuf.h"
diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.h b/drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.h
new file mode 100644
index 000000000000..f0f8bbd82dfc
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.h
@@ -0,0 +1,22 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2016 Intel Corporation
+ */
+
+#ifndef __MOCK_DMABUF_H__
+#define __MOCK_DMABUF_H__
+
+#include <linux/dma-buf.h>
+
+struct mock_dmabuf {
+	int npages;
+	struct page *pages[];
+};
+
+static struct mock_dmabuf *to_mock(struct dma_buf *buf)
+{
+	return buf->priv;
+}
+
+#endif /* !__MOCK_DMABUF_H__ */
diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_object.h b/drivers/gpu/drm/i915/gem/selftests/mock_gem_object.h
index 20acdbee7bd0..370360b4a148 100644
--- a/drivers/gpu/drm/i915/selftests/mock_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/selftests/mock_gem_object.h
@@ -1,4 +1,9 @@
-/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2016 Intel Corporation
+ */
+
 #ifndef __MOCK_GEM_OBJECT_H__
 #define __MOCK_GEM_OBJECT_H__
 
diff --git a/drivers/gpu/drm/i915/gt/Makefile b/drivers/gpu/drm/i915/gt/Makefile
new file mode 100644
index 000000000000..1c75b5c9790c
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/Makefile
@@ -0,0 +1,2 @@
+# Extra header tests
+include $(src)/Makefile.header-test
diff --git a/drivers/gpu/drm/i915/gt/Makefile.header-test b/drivers/gpu/drm/i915/gt/Makefile.header-test
new file mode 100644
index 000000000000..61e06cbb4b32
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/Makefile.header-test
@@ -0,0 +1,16 @@
+# SPDX-License-Identifier: MIT
+# Copyright © 2019 Intel Corporation
+
+# Test the headers are compilable as standalone units
+header_test := $(notdir $(wildcard $(src)/*.h))
+
+quiet_cmd_header_test = HDRTEST $@
+      cmd_header_test = echo "\#include \"$(<F)\"" > $@
+
+header_test_%.c: %.h
+	$(call cmd,header_test)
+
+extra-$(CONFIG_DRM_I915_WERROR) += \
+	$(foreach h,$(header_test),$(patsubst %.h,header_test_%.o,$(h)))
+
+clean-files += $(foreach h,$(header_test),$(patsubst %.h,header_test_%.c,$(h)))
diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
index 832cb6b1e9bd..c092bdf5f0bf 100644
--- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
@@ -81,6 +81,22 @@ static inline bool __request_completed(const struct i915_request *rq)
 	return i915_seqno_passed(__hwsp_seqno(rq), rq->fence.seqno);
 }
 
+__maybe_unused static bool
+check_signal_order(struct intel_context *ce, struct i915_request *rq)
+{
+	if (!list_is_last(&rq->signal_link, &ce->signals) &&
+	    i915_seqno_passed(rq->fence.seqno,
+			      list_next_entry(rq, signal_link)->fence.seqno))
+		return false;
+
+	if (!list_is_first(&rq->signal_link, &ce->signals) &&
+	    i915_seqno_passed(list_prev_entry(rq, signal_link)->fence.seqno,
+			      rq->fence.seqno))
+		return false;
+
+	return true;
+}
+
 static bool
 __dma_fence_signal(struct dma_fence *fence)
 {
@@ -130,6 +146,8 @@ void intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine)
 			struct i915_request *rq =
 				list_entry(pos, typeof(*rq), signal_link);
 
+			GEM_BUG_ON(!check_signal_order(ce, rq));
+
 			if (!__request_completed(rq))
 				break;
 
@@ -312,6 +330,7 @@ bool i915_request_enable_breadcrumb(struct i915_request *rq)
 		list_add(&rq->signal_link, pos);
 		if (pos == &ce->signals) /* catch transitions from empty list */
 			list_move_tail(&ce->signal_link, &b->signalers);
+		GEM_BUG_ON(!check_signal_order(ce, rq));
 
 		set_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
 		spin_unlock(&b->irq_lock);
diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
new file mode 100644
index 000000000000..2c454f227c2e
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -0,0 +1,241 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include "gem/i915_gem_context.h"
+#include "gem/i915_gem_pm.h"
+
+#include "i915_drv.h"
+#include "i915_globals.h"
+
+#include "intel_context.h"
+#include "intel_engine.h"
+#include "intel_engine_pm.h"
+
+static struct i915_global_context {
+	struct i915_global base;
+	struct kmem_cache *slab_ce;
+} global;
+
+static struct intel_context *intel_context_alloc(void)
+{
+	return kmem_cache_zalloc(global.slab_ce, GFP_KERNEL);
+}
+
+void intel_context_free(struct intel_context *ce)
+{
+	kmem_cache_free(global.slab_ce, ce);
+}
+
+struct intel_context *
+intel_context_create(struct i915_gem_context *ctx,
+		     struct intel_engine_cs *engine)
+{
+	struct intel_context *ce;
+
+	ce = intel_context_alloc();
+	if (!ce)
+		return ERR_PTR(-ENOMEM);
+
+	intel_context_init(ce, ctx, engine);
+	return ce;
+}
+
+int __intel_context_do_pin(struct intel_context *ce)
+{
+	int err;
+
+	if (mutex_lock_interruptible(&ce->pin_mutex))
+		return -EINTR;
+
+	if (likely(!atomic_read(&ce->pin_count))) {
+		intel_wakeref_t wakeref;
+
+		err = 0;
+		with_intel_runtime_pm(&ce->engine->i915->runtime_pm, wakeref)
+			err = ce->ops->pin(ce);
+		if (err)
+			goto err;
+
+		i915_gem_context_get(ce->gem_context); /* for ctx->ppgtt */
+
+		smp_mb__before_atomic(); /* flush pin before it is visible */
+	}
+
+	atomic_inc(&ce->pin_count);
+	GEM_BUG_ON(!intel_context_is_pinned(ce)); /* no overflow! */
+
+	mutex_unlock(&ce->pin_mutex);
+	return 0;
+
+err:
+	mutex_unlock(&ce->pin_mutex);
+	return err;
+}
+
+void intel_context_unpin(struct intel_context *ce)
+{
+	if (likely(atomic_add_unless(&ce->pin_count, -1, 1)))
+		return;
+
+	/* We may be called from inside intel_context_pin() to evict another */
+	intel_context_get(ce);
+	mutex_lock_nested(&ce->pin_mutex, SINGLE_DEPTH_NESTING);
+
+	if (likely(atomic_dec_and_test(&ce->pin_count))) {
+		ce->ops->unpin(ce);
+
+		i915_gem_context_put(ce->gem_context);
+		intel_context_active_release(ce);
+	}
+
+	mutex_unlock(&ce->pin_mutex);
+	intel_context_put(ce);
+}
+
+static int __context_pin_state(struct i915_vma *vma, unsigned long flags)
+{
+	int err;
+
+	err = i915_vma_pin(vma, 0, 0, flags | PIN_GLOBAL);
+	if (err)
+		return err;
+
+	/*
+	 * And mark it as a globally pinned object to let the shrinker know
+	 * it cannot reclaim the object until we release it.
+	 */
+	vma->obj->pin_global++;
+	vma->obj->mm.dirty = true;
+
+	return 0;
+}
+
+static void __context_unpin_state(struct i915_vma *vma)
+{
+	vma->obj->pin_global--;
+	__i915_vma_unpin(vma);
+}
+
+static void intel_context_retire(struct i915_active *active)
+{
+	struct intel_context *ce = container_of(active, typeof(*ce), active);
+
+	if (ce->state)
+		__context_unpin_state(ce->state);
+
+	intel_context_put(ce);
+}
+
+void
+intel_context_init(struct intel_context *ce,
+		   struct i915_gem_context *ctx,
+		   struct intel_engine_cs *engine)
+{
+	GEM_BUG_ON(!engine->cops);
+
+	kref_init(&ce->ref);
+
+	ce->gem_context = ctx;
+	ce->engine = engine;
+	ce->ops = engine->cops;
+	ce->sseu = engine->sseu;
+
+	INIT_LIST_HEAD(&ce->signal_link);
+	INIT_LIST_HEAD(&ce->signals);
+
+	mutex_init(&ce->pin_mutex);
+
+	i915_active_init(ctx->i915, &ce->active, intel_context_retire);
+}
+
+int intel_context_active_acquire(struct intel_context *ce, unsigned long flags)
+{
+	int err;
+
+	if (!i915_active_acquire(&ce->active))
+		return 0;
+
+	intel_context_get(ce);
+
+	if (!ce->state)
+		return 0;
+
+	err = __context_pin_state(ce->state, flags);
+	if (err) {
+		i915_active_cancel(&ce->active);
+		intel_context_put(ce);
+		return err;
+	}
+
+	/* Preallocate tracking nodes */
+	if (!i915_gem_context_is_kernel(ce->gem_context)) {
+		err = i915_active_acquire_preallocate_barrier(&ce->active,
+							      ce->engine);
+		if (err) {
+			i915_active_release(&ce->active);
+			return err;
+		}
+	}
+
+	return 0;
+}
+
+void intel_context_active_release(struct intel_context *ce)
+{
+	/* Nodes preallocated in intel_context_active() */
+	i915_active_acquire_barrier(&ce->active);
+	i915_active_release(&ce->active);
+}
+
+static void i915_global_context_shrink(void)
+{
+	kmem_cache_shrink(global.slab_ce);
+}
+
+static void i915_global_context_exit(void)
+{
+	kmem_cache_destroy(global.slab_ce);
+}
+
+static struct i915_global_context global = { {
+	.shrink = i915_global_context_shrink,
+	.exit = i915_global_context_exit,
+} };
+
+int __init i915_global_context_init(void)
+{
+	global.slab_ce = KMEM_CACHE(intel_context, SLAB_HWCACHE_ALIGN);
+	if (!global.slab_ce)
+		return -ENOMEM;
+
+	i915_global_register(&global.base);
+	return 0;
+}
+
+void intel_context_enter_engine(struct intel_context *ce)
+{
+	intel_engine_pm_get(ce->engine);
+}
+
+void intel_context_exit_engine(struct intel_context *ce)
+{
+	intel_engine_pm_put(ce->engine);
+}
+
+struct i915_request *intel_context_create_request(struct intel_context *ce)
+{
+	struct i915_request *rq;
+	int err;
+
+	err = intel_context_pin(ce);
+	if (unlikely(err))
+		return ERR_PTR(err);
+
+	rq = i915_request_create(ce);
+	intel_context_unpin(ce);
+
+	return rq;
+}
diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h
new file mode 100644
index 000000000000..a47275bc4f01
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_context.h
@@ -0,0 +1,134 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef __INTEL_CONTEXT_H__
+#define __INTEL_CONTEXT_H__
+
+#include <linux/lockdep.h>
+
+#include "intel_context_types.h"
+#include "intel_engine_types.h"
+
+void intel_context_init(struct intel_context *ce,
+			struct i915_gem_context *ctx,
+			struct intel_engine_cs *engine);
+
+struct intel_context *
+intel_context_create(struct i915_gem_context *ctx,
+		     struct intel_engine_cs *engine);
+
+void intel_context_free(struct intel_context *ce);
+
+/**
+ * intel_context_lock_pinned - Stablises the 'pinned' status of the HW context
+ * @ce - the context
+ *
+ * Acquire a lock on the pinned status of the HW context, such that the context
+ * can neither be bound to the GPU or unbound whilst the lock is held, i.e.
+ * intel_context_is_pinned() remains stable.
+ */
+static inline int intel_context_lock_pinned(struct intel_context *ce)
+	__acquires(ce->pin_mutex)
+{
+	return mutex_lock_interruptible(&ce->pin_mutex);
+}
+
+/**
+ * intel_context_is_pinned - Reports the 'pinned' status
+ * @ce - the context
+ *
+ * While in use by the GPU, the context, along with its ring and page
+ * tables is pinned into memory and the GTT.
+ *
+ * Returns: true if the context is currently pinned for use by the GPU.
+ */
+static inline bool
+intel_context_is_pinned(struct intel_context *ce)
+{
+	return atomic_read(&ce->pin_count);
+}
+
+/**
+ * intel_context_unlock_pinned - Releases the earlier locking of 'pinned' status
+ * @ce - the context
+ *
+ * Releases the lock earlier acquired by intel_context_unlock_pinned().
+ */
+static inline void intel_context_unlock_pinned(struct intel_context *ce)
+	__releases(ce->pin_mutex)
+{
+	mutex_unlock(&ce->pin_mutex);
+}
+
+int __intel_context_do_pin(struct intel_context *ce);
+
+static inline int intel_context_pin(struct intel_context *ce)
+{
+	if (likely(atomic_inc_not_zero(&ce->pin_count)))
+		return 0;
+
+	return __intel_context_do_pin(ce);
+}
+
+static inline void __intel_context_pin(struct intel_context *ce)
+{
+	GEM_BUG_ON(!intel_context_is_pinned(ce));
+	atomic_inc(&ce->pin_count);
+}
+
+void intel_context_unpin(struct intel_context *ce);
+
+void intel_context_enter_engine(struct intel_context *ce);
+void intel_context_exit_engine(struct intel_context *ce);
+
+static inline void intel_context_enter(struct intel_context *ce)
+{
+	if (!ce->active_count++)
+		ce->ops->enter(ce);
+}
+
+static inline void intel_context_mark_active(struct intel_context *ce)
+{
+	++ce->active_count;
+}
+
+static inline void intel_context_exit(struct intel_context *ce)
+{
+	GEM_BUG_ON(!ce->active_count);
+	if (!--ce->active_count)
+		ce->ops->exit(ce);
+}
+
+int intel_context_active_acquire(struct intel_context *ce, unsigned long flags);
+void intel_context_active_release(struct intel_context *ce);
+
+static inline struct intel_context *intel_context_get(struct intel_context *ce)
+{
+	kref_get(&ce->ref);
+	return ce;
+}
+
+static inline void intel_context_put(struct intel_context *ce)
+{
+	kref_put(&ce->ref, ce->ops->destroy);
+}
+
+static inline int __must_check
+intel_context_timeline_lock(struct intel_context *ce)
+	__acquires(&ce->ring->timeline->mutex)
+{
+	return mutex_lock_interruptible(&ce->ring->timeline->mutex);
+}
+
+static inline void intel_context_timeline_unlock(struct intel_context *ce)
+	__releases(&ce->ring->timeline->mutex)
+{
+	mutex_unlock(&ce->ring->timeline->mutex);
+}
+
+struct i915_request *intel_context_create_request(struct intel_context *ce);
+
+#endif /* __INTEL_CONTEXT_H__ */
diff --git a/drivers/gpu/drm/i915/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
index 339c7437fe82..08049ee91cee 100644
--- a/drivers/gpu/drm/i915/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -10,11 +10,11 @@
 #include <linux/kref.h>
 #include <linux/list.h>
 #include <linux/mutex.h>
-#include <linux/rbtree.h>
 #include <linux/types.h>
 
 #include "i915_active_types.h"
 #include "intel_engine_types.h"
+#include "intel_sseu.h"
 
 struct i915_gem_context;
 struct i915_vma;
@@ -25,28 +25,20 @@ struct intel_context_ops {
 	int (*pin)(struct intel_context *ce);
 	void (*unpin)(struct intel_context *ce);
 
+	void (*enter)(struct intel_context *ce);
+	void (*exit)(struct intel_context *ce);
+
 	void (*reset)(struct intel_context *ce);
 	void (*destroy)(struct kref *kref);
 };
 
-/*
- * Powergating configuration for a particular (context,engine).
- */
-struct intel_sseu {
-	u8 slice_mask;
-	u8 subslice_mask;
-	u8 min_eus_per_subslice;
-	u8 max_eus_per_subslice;
-};
-
 struct intel_context {
 	struct kref ref;
 
 	struct i915_gem_context *gem_context;
 	struct intel_engine_cs *engine;
-	struct intel_engine_cs *active;
+	struct intel_engine_cs *inflight;
 
-	struct list_head active_link;
 	struct list_head signal_link;
 	struct list_head signals;
 
@@ -56,19 +48,18 @@ struct intel_context {
 	u32 *lrc_reg_state;
 	u64 lrc_desc;
 
+	unsigned int active_count; /* notionally protected by timeline->mutex */
+
 	atomic_t pin_count;
 	struct mutex pin_mutex; /* guards pinning and associated on-gpuing */
 
-	intel_engine_mask_t saturated; /* submitting semaphores too late? */
-
 	/**
-	 * active_tracker: Active tracker for the external rq activity
-	 * on this intel_context object.
+	 * active: Active tracker for the rq activity (inc. external) on this
+	 * intel_context object.
 	 */
-	struct i915_active_request active_tracker;
+	struct i915_active active;
 
 	const struct intel_context_ops *ops;
-	struct rb_node node;
 
 	/** sseu: Control eu/slice partitioning */
 	struct intel_sseu sseu;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/gt/intel_engine.h
index 72c7c337ace9..2f1c6871ee95 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine.h
@@ -52,6 +52,7 @@ struct drm_printer;
 #define ENGINE_READ(...)	__ENGINE_READ_OP(read, __VA_ARGS__)
 #define ENGINE_READ_FW(...)	__ENGINE_READ_OP(read_fw, __VA_ARGS__)
 #define ENGINE_POSTING_READ(...) __ENGINE_READ_OP(posting_read, __VA_ARGS__)
+#define ENGINE_POSTING_READ16(...) __ENGINE_READ_OP(posting_read16, __VA_ARGS__)
 
 #define ENGINE_READ64(engine__, lower_reg__, upper_reg__) \
 	__ENGINE_REG_OP(read64_2x32, (engine__), \
@@ -68,6 +69,24 @@ struct drm_printer;
 #define ENGINE_WRITE(...)	__ENGINE_WRITE_OP(write, __VA_ARGS__)
 #define ENGINE_WRITE_FW(...)	__ENGINE_WRITE_OP(write_fw, __VA_ARGS__)
 
+#define GEN6_RING_FAULT_REG_READ(engine__) \
+	intel_uncore_read((engine__)->uncore, RING_FAULT_REG(engine__))
+
+#define GEN6_RING_FAULT_REG_POSTING_READ(engine__) \
+	intel_uncore_posting_read((engine__)->uncore, RING_FAULT_REG(engine__))
+
+#define GEN6_RING_FAULT_REG_RMW(engine__, clear__, set__) \
+({ \
+	u32 __val; \
+\
+	__val = intel_uncore_read((engine__)->uncore, \
+				  RING_FAULT_REG(engine__)); \
+	__val &= ~(clear__); \
+	__val |= (set__); \
+	intel_uncore_write((engine__)->uncore, RING_FAULT_REG(engine__), \
+			   __val); \
+})
+
 /* seqno size is actually only a uint32, but since we plan to use MI_FLUSH_DW to
  * do the writes, and that must have qw aligned offsets, simply pretend it's 8b.
  */
@@ -106,24 +125,6 @@ hangcheck_action_to_str(const enum intel_engine_hangcheck_action a)
 
 void intel_engines_set_scheduler_caps(struct drm_i915_private *i915);
 
-static inline bool __execlists_need_preempt(int prio, int last)
-{
-	/*
-	 * Allow preemption of low -> normal -> high, but we do
-	 * not allow low priority tasks to preempt other low priority
-	 * tasks under the impression that latency for low priority
-	 * tasks does not matter (as much as background throughput),
-	 * so kiss.
-	 *
-	 * More naturally we would write
-	 *	prio >= max(0, last);
-	 * except that we wish to prevent triggering preemption at the same
-	 * priority level: the task that is running should remain running
-	 * to preserve FIFO ordering of dependencies.
-	 */
-	return prio > max(I915_PRIORITY_NORMAL - 1, last);
-}
-
 static inline void
 execlists_set_active(struct intel_engine_execlists *execlists,
 		     unsigned int bit)
@@ -233,8 +234,6 @@ intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value)
  */
 #define I915_GEM_HWS_PREEMPT		0x32
 #define I915_GEM_HWS_PREEMPT_ADDR	(I915_GEM_HWS_PREEMPT * sizeof(u32))
-#define I915_GEM_HWS_HANGCHECK		0x34
-#define I915_GEM_HWS_HANGCHECK_ADDR	(I915_GEM_HWS_HANGCHECK * sizeof(u32))
 #define I915_GEM_HWS_SEQNO		0x40
 #define I915_GEM_HWS_SEQNO_ADDR		(I915_GEM_HWS_SEQNO * sizeof(u32))
 #define I915_GEM_HWS_SCRATCH		0x80
@@ -362,14 +361,16 @@ __intel_ring_space(unsigned int head, unsigned int tail, unsigned int size)
 	return (head - tail - CACHELINE_BYTES) & (size - 1);
 }
 
-int intel_engine_setup_common(struct intel_engine_cs *engine);
+int intel_engines_init_mmio(struct drm_i915_private *i915);
+int intel_engines_setup(struct drm_i915_private *i915);
+int intel_engines_init(struct drm_i915_private *i915);
+void intel_engines_cleanup(struct drm_i915_private *i915);
+
 int intel_engine_init_common(struct intel_engine_cs *engine);
 void intel_engine_cleanup_common(struct intel_engine_cs *engine);
 
-int intel_init_render_ring_buffer(struct intel_engine_cs *engine);
-int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine);
-int intel_init_blt_ring_buffer(struct intel_engine_cs *engine);
-int intel_init_vebox_ring_buffer(struct intel_engine_cs *engine);
+int intel_ring_submission_setup(struct intel_engine_cs *engine);
+int intel_ring_submission_init(struct intel_engine_cs *engine);
 
 int intel_engine_stop_cs(struct intel_engine_cs *engine);
 void intel_engine_cancel_stop_cs(struct intel_engine_cs *engine);
@@ -382,6 +383,8 @@ u64 intel_engine_get_last_batch_head(const struct intel_engine_cs *engine);
 void intel_engine_get_instdone(struct intel_engine_cs *engine,
 			       struct intel_instdone *instdone);
 
+void intel_engine_init_execlists(struct intel_engine_cs *engine);
+
 void intel_engine_init_breadcrumbs(struct intel_engine_cs *engine);
 void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine);
 
@@ -458,19 +461,12 @@ static inline void intel_engine_reset(struct intel_engine_cs *engine,
 {
 	if (engine->reset.reset)
 		engine->reset.reset(engine, stalled);
+	engine->serial++; /* contexts lost */
 }
 
-void intel_engines_sanitize(struct drm_i915_private *i915, bool force);
-void intel_gt_resume(struct drm_i915_private *i915);
-
 bool intel_engine_is_idle(struct intel_engine_cs *engine);
 bool intel_engines_are_idle(struct drm_i915_private *dev_priv);
 
-void intel_engine_lost_context(struct intel_engine_cs *engine);
-
-void intel_engines_park(struct drm_i915_private *i915);
-void intel_engines_unpark(struct drm_i915_private *i915);
-
 void intel_engines_reset_default_submission(struct drm_i915_private *i915);
 unsigned int intel_engines_has_context_isolation(struct drm_i915_private *i915);
 
@@ -547,6 +543,8 @@ ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine);
 struct i915_request *
 intel_engine_find_active_request(struct intel_engine_cs *engine);
 
+u32 intel_engine_context_size(struct drm_i915_private *i915, u8 class);
+
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
 
 static inline bool inject_preempt_hang(struct intel_engine_execlists *execlists)
@@ -567,17 +565,10 @@ static inline bool inject_preempt_hang(struct intel_engine_execlists *execlists)
 
 #endif
 
-static inline u32
-intel_engine_next_hangcheck_seqno(struct intel_engine_cs *engine)
-{
-	return engine->hangcheck.next_seqno =
-		next_pseudo_random32(engine->hangcheck.next_seqno);
-}
-
-static inline u32
-intel_engine_get_hangcheck_seqno(struct intel_engine_cs *engine)
-{
-	return intel_read_status_page(engine, I915_GEM_HWS_HANGCHECK);
-}
+void intel_engine_init_active(struct intel_engine_cs *engine,
+			      unsigned int subclass);
+#define ENGINE_PHYSICAL	0
+#define ENGINE_MOCK	1
+#define ENGINE_VIRTUAL	2
 
 #endif /* _INTEL_RINGBUFFER_H_ */
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index eea9bec04f1b..7fd33e81c2d9 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -24,10 +24,15 @@
 
 #include <drm/drm_print.h>
 
+#include "gem/i915_gem_context.h"
+
 #include "i915_drv.h"
-#include "i915_reset.h"
-#include "intel_ringbuffer.h"
+
+#include "intel_engine.h"
+#include "intel_engine_pm.h"
+#include "intel_context.h"
 #include "intel_lrc.h"
+#include "intel_reset.h"
 
 /* Haswell does have the CXT_SIZE register however it does not appear to be
  * valid. Now, docs explain in dwords what is in the context object. The full
@@ -48,35 +53,24 @@
 
 struct engine_class_info {
 	const char *name;
-	int (*init_legacy)(struct intel_engine_cs *engine);
-	int (*init_execlists)(struct intel_engine_cs *engine);
-
 	u8 uabi_class;
 };
 
 static const struct engine_class_info intel_engine_classes[] = {
 	[RENDER_CLASS] = {
 		.name = "rcs",
-		.init_execlists = logical_render_ring_init,
-		.init_legacy = intel_init_render_ring_buffer,
 		.uabi_class = I915_ENGINE_CLASS_RENDER,
 	},
 	[COPY_ENGINE_CLASS] = {
 		.name = "bcs",
-		.init_execlists = logical_xcs_ring_init,
-		.init_legacy = intel_init_blt_ring_buffer,
 		.uabi_class = I915_ENGINE_CLASS_COPY,
 	},
 	[VIDEO_DECODE_CLASS] = {
 		.name = "vcs",
-		.init_execlists = logical_xcs_ring_init,
-		.init_legacy = intel_init_bsd_ring_buffer,
 		.uabi_class = I915_ENGINE_CLASS_VIDEO,
 	},
 	[VIDEO_ENHANCEMENT_CLASS] = {
 		.name = "vecs",
-		.init_execlists = logical_xcs_ring_init,
-		.init_legacy = intel_init_vebox_ring_buffer,
 		.uabi_class = I915_ENGINE_CLASS_VIDEO_ENHANCE,
 	},
 };
@@ -165,7 +159,7 @@ static const struct engine_info intel_engines[] = {
 };
 
 /**
- * ___intel_engine_context_size() - return the size of the context for an engine
+ * intel_engine_context_size() - return the size of the context for an engine
  * @dev_priv: i915 device private
  * @class: engine class
  *
@@ -178,8 +172,7 @@ static const struct engine_info intel_engines[] = {
  * in LRC mode, but does not include the "shared data page" used with
  * GuC submission. The caller should account for this if using the GuC.
  */
-static u32
-__intel_engine_context_size(struct drm_i915_private *dev_priv, u8 class)
+u32 intel_engine_context_size(struct drm_i915_private *dev_priv, u8 class)
 {
 	u32 cxt_size;
 
@@ -212,6 +205,22 @@ __intel_engine_context_size(struct drm_i915_private *dev_priv, u8 class)
 					PAGE_SIZE);
 		case 5:
 		case 4:
+			/*
+			 * There is a discrepancy here between the size reported
+			 * by the register and the size of the context layout
+			 * in the docs. Both are described as authorative!
+			 *
+			 * The discrepancy is on the order of a few cachelines,
+			 * but the total is under one page (4k), which is our
+			 * minimum allocation anyway so it should all come
+			 * out in the wash.
+			 */
+			cxt_size = I915_READ(CXT_SIZE) + 1;
+			DRM_DEBUG_DRIVER("gen%d CXT_SIZE = %d bytes [0x%08x]\n",
+					 INTEL_GEN(dev_priv),
+					 cxt_size * 64,
+					 cxt_size - 1);
+			return round_up(cxt_size * 64, PAGE_SIZE);
 		case 3:
 		case 2:
 		/* For the special day when i810 gets merged. */
@@ -312,10 +321,16 @@ intel_engine_setup(struct drm_i915_private *dev_priv,
 	engine->class = info->class;
 	engine->instance = info->instance;
 
+	/*
+	 * To be overridden by the backend on setup. However to facilitate
+	 * cleanup on error during setup, we always provide the destroy vfunc.
+	 */
+	engine->destroy = (typeof(engine->destroy))kfree;
+
 	engine->uabi_class = intel_engine_classes[info->class].uabi_class;
 
-	engine->context_size = __intel_engine_context_size(dev_priv,
-							   engine->class);
+	engine->context_size = intel_engine_context_size(dev_priv,
+							 engine->class);
 	if (WARN_ON(engine->context_size > BIT(20)))
 		engine->context_size = 0;
 	if (engine->context_size)
@@ -336,18 +351,70 @@ intel_engine_setup(struct drm_i915_private *dev_priv,
 	return 0;
 }
 
+static void __setup_engine_capabilities(struct intel_engine_cs *engine)
+{
+	struct drm_i915_private *i915 = engine->i915;
+
+	if (engine->class == VIDEO_DECODE_CLASS) {
+		/*
+		 * HEVC support is present on first engine instance
+		 * before Gen11 and on all instances afterwards.
+		 */
+		if (INTEL_GEN(i915) >= 11 ||
+		    (INTEL_GEN(i915) >= 9 && engine->instance == 0))
+			engine->uabi_capabilities |=
+				I915_VIDEO_CLASS_CAPABILITY_HEVC;
+
+		/*
+		 * SFC block is present only on even logical engine
+		 * instances.
+		 */
+		if ((INTEL_GEN(i915) >= 11 &&
+		     RUNTIME_INFO(i915)->vdbox_sfc_access & engine->mask) ||
+		    (INTEL_GEN(i915) >= 9 && engine->instance == 0))
+			engine->uabi_capabilities |=
+				I915_VIDEO_AND_ENHANCE_CLASS_CAPABILITY_SFC;
+	} else if (engine->class == VIDEO_ENHANCEMENT_CLASS) {
+		if (INTEL_GEN(i915) >= 9)
+			engine->uabi_capabilities |=
+				I915_VIDEO_AND_ENHANCE_CLASS_CAPABILITY_SFC;
+	}
+}
+
+static void intel_setup_engine_capabilities(struct drm_i915_private *i915)
+{
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+
+	for_each_engine(engine, i915, id)
+		__setup_engine_capabilities(engine);
+}
+
+/**
+ * intel_engines_cleanup() - free the resources allocated for Command Streamers
+ * @i915: the i915 devic
+ */
+void intel_engines_cleanup(struct drm_i915_private *i915)
+{
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+
+	for_each_engine(engine, i915, id) {
+		engine->destroy(engine);
+		i915->engine[id] = NULL;
+	}
+}
+
 /**
  * intel_engines_init_mmio() - allocate and prepare the Engine Command Streamers
- * @dev_priv: i915 device private
+ * @i915: the i915 device
  *
  * Return: non-zero if the initialization failed.
  */
-int intel_engines_init_mmio(struct drm_i915_private *dev_priv)
+int intel_engines_init_mmio(struct drm_i915_private *i915)
 {
-	struct intel_device_info *device_info = mkwrite_device_info(dev_priv);
-	const unsigned int engine_mask = INTEL_INFO(dev_priv)->engine_mask;
-	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
+	struct intel_device_info *device_info = mkwrite_device_info(i915);
+	const unsigned int engine_mask = INTEL_INFO(i915)->engine_mask;
 	unsigned int mask = 0;
 	unsigned int i;
 	int err;
@@ -360,10 +427,10 @@ int intel_engines_init_mmio(struct drm_i915_private *dev_priv)
 		return -ENODEV;
 
 	for (i = 0; i < ARRAY_SIZE(intel_engines); i++) {
-		if (!HAS_ENGINE(dev_priv, i))
+		if (!HAS_ENGINE(i915, i))
 			continue;
 
-		err = intel_engine_setup(dev_priv, i);
+		err = intel_engine_setup(i915, i);
 		if (err)
 			goto cleanup;
 
@@ -379,69 +446,52 @@ int intel_engines_init_mmio(struct drm_i915_private *dev_priv)
 		device_info->engine_mask = mask;
 
 	/* We always presume we have at least RCS available for later probing */
-	if (WARN_ON(!HAS_ENGINE(dev_priv, RCS0))) {
+	if (WARN_ON(!HAS_ENGINE(i915, RCS0))) {
 		err = -ENODEV;
 		goto cleanup;
 	}
 
-	RUNTIME_INFO(dev_priv)->num_engines = hweight32(mask);
+	RUNTIME_INFO(i915)->num_engines = hweight32(mask);
 
-	i915_check_and_clear_faults(dev_priv);
+	i915_check_and_clear_faults(i915);
+
+	intel_setup_engine_capabilities(i915);
 
 	return 0;
 
 cleanup:
-	for_each_engine(engine, dev_priv, id)
-		kfree(engine);
+	intel_engines_cleanup(i915);
 	return err;
 }
 
 /**
  * intel_engines_init() - init the Engine Command Streamers
- * @dev_priv: i915 device private
+ * @i915: i915 device private
  *
  * Return: non-zero if the initialization failed.
  */
-int intel_engines_init(struct drm_i915_private *dev_priv)
+int intel_engines_init(struct drm_i915_private *i915)
 {
+	int (*init)(struct intel_engine_cs *engine);
 	struct intel_engine_cs *engine;
-	enum intel_engine_id id, err_id;
+	enum intel_engine_id id;
 	int err;
 
-	for_each_engine(engine, dev_priv, id) {
-		const struct engine_class_info *class_info =
-			&intel_engine_classes[engine->class];
-		int (*init)(struct intel_engine_cs *engine);
-
-		if (HAS_EXECLISTS(dev_priv))
-			init = class_info->init_execlists;
-		else
-			init = class_info->init_legacy;
-
-		err = -EINVAL;
-		err_id = id;
-
-		if (GEM_DEBUG_WARN_ON(!init))
-			goto cleanup;
+	if (HAS_EXECLISTS(i915))
+		init = intel_execlists_submission_init;
+	else
+		init = intel_ring_submission_init;
 
+	for_each_engine(engine, i915, id) {
 		err = init(engine);
 		if (err)
 			goto cleanup;
-
-		GEM_BUG_ON(!engine->submit_request);
 	}
 
 	return 0;
 
 cleanup:
-	for_each_engine(engine, dev_priv, id) {
-		if (id >= err_id) {
-			kfree(engine);
-			dev_priv->engine[id] = NULL;
-		} else {
-			dev_priv->gt.cleanup_engine(engine);
-		}
-	}
+	intel_engines_cleanup(i915);
 	return err;
 }
 
@@ -450,7 +500,7 @@ static void intel_engine_init_batch_pool(struct intel_engine_cs *engine)
 	i915_gem_batch_pool_init(&engine->batch_pool, engine);
 }
 
-static void intel_engine_init_execlist(struct intel_engine_cs *engine)
+void intel_engine_init_execlists(struct intel_engine_cs *engine)
 {
 	struct intel_engine_execlists * const execlists = &engine->execlists;
 
@@ -477,7 +527,7 @@ static void cleanup_status_page(struct intel_engine_cs *engine)
 		i915_vma_unpin(vma);
 
 	i915_gem_object_unpin_map(vma->obj);
-	__i915_gem_object_release_unless_active(vma->obj);
+	i915_gem_object_put(vma->obj);
 }
 
 static int pin_ggtt_status_page(struct intel_engine_cs *engine,
@@ -557,41 +607,71 @@ err:
 	return ret;
 }
 
+static int intel_engine_setup_common(struct intel_engine_cs *engine)
+{
+	int err;
+
+	init_llist_head(&engine->barrier_tasks);
+
+	err = init_status_page(engine);
+	if (err)
+		return err;
+
+	intel_engine_init_active(engine, ENGINE_PHYSICAL);
+	intel_engine_init_breadcrumbs(engine);
+	intel_engine_init_execlists(engine);
+	intel_engine_init_hangcheck(engine);
+	intel_engine_init_batch_pool(engine);
+	intel_engine_init_cmd_parser(engine);
+	intel_engine_init__pm(engine);
+
+	/* Use the whole device by default */
+	engine->sseu =
+		intel_sseu_from_device_info(&RUNTIME_INFO(engine->i915)->sseu);
+
+	return 0;
+}
+
 /**
- * intel_engines_setup_common - setup engine state not requiring hw access
- * @engine: Engine to setup.
+ * intel_engines_setup- setup engine state not requiring hw access
+ * @i915: Device to setup.
  *
- * Initializes @engine@ structure members shared between legacy and execlists
+ * Initializes engine structure members shared between legacy and execlists
  * submission modes which do not require hardware access.
  *
  * Typically done early in the submission mode specific engine setup stage.
  */
-int intel_engine_setup_common(struct intel_engine_cs *engine)
+int intel_engines_setup(struct drm_i915_private *i915)
 {
+	int (*setup)(struct intel_engine_cs *engine);
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
 	int err;
 
-	err = init_status_page(engine);
-	if (err)
-		return err;
+	if (HAS_EXECLISTS(i915))
+		setup = intel_execlists_submission_setup;
+	else
+		setup = intel_ring_submission_setup;
 
-	err = i915_timeline_init(engine->i915,
-				 &engine->timeline,
-				 engine->status_page.vma);
-	if (err)
-		goto err_hwsp;
+	for_each_engine(engine, i915, id) {
+		err = intel_engine_setup_common(engine);
+		if (err)
+			goto cleanup;
 
-	i915_timeline_set_subclass(&engine->timeline, TIMELINE_ENGINE);
+		err = setup(engine);
+		if (err)
+			goto cleanup;
 
-	intel_engine_init_breadcrumbs(engine);
-	intel_engine_init_execlist(engine);
-	intel_engine_init_hangcheck(engine);
-	intel_engine_init_batch_pool(engine);
-	intel_engine_init_cmd_parser(engine);
+		/* We expect the backend to take control over its state */
+		GEM_BUG_ON(engine->destroy == (typeof(engine->destroy))kfree);
+
+		GEM_BUG_ON(!engine->cops);
+	}
 
 	return 0;
 
-err_hwsp:
-	cleanup_status_page(engine);
+cleanup:
+	intel_engines_cleanup(i915);
 	return err;
 }
 
@@ -675,6 +755,7 @@ static int measure_breadcrumb_dw(struct intel_engine_cs *engine)
 		goto out_timeline;
 
 	dw = engine->emit_fini_breadcrumb(&frame->rq, frame->cs) - frame->cs;
+	GEM_BUG_ON(dw & 1); /* RING_TAIL must be qword aligned */
 
 	i915_timeline_unpin(&frame->timeline);
 
@@ -690,15 +771,42 @@ static int pin_context(struct i915_gem_context *ctx,
 		       struct intel_context **out)
 {
 	struct intel_context *ce;
+	int err;
 
-	ce = intel_context_pin(ctx, engine);
+	ce = i915_gem_context_get_engine(ctx, engine->id);
 	if (IS_ERR(ce))
 		return PTR_ERR(ce);
 
+	err = intel_context_pin(ce);
+	intel_context_put(ce);
+	if (err)
+		return err;
+
 	*out = ce;
 	return 0;
 }
 
+void
+intel_engine_init_active(struct intel_engine_cs *engine, unsigned int subclass)
+{
+	INIT_LIST_HEAD(&engine->active.requests);
+
+	spin_lock_init(&engine->active.lock);
+	lockdep_set_subclass(&engine->active.lock, subclass);
+
+	/*
+	 * Due to an interesting quirk in lockdep's internal debug tracking,
+	 * after setting a subclass we must ensure the lock is used. Otherwise,
+	 * nr_unused_locks is incremented once too often.
+	 */
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+	local_irq_disable();
+	lock_map_acquire(&engine->active.lock.dep_map);
+	lock_map_release(&engine->active.lock.dep_map);
+	local_irq_enable();
+#endif
+}
+
 /**
  * intel_engines_init_common - initialize cengine state which might require hw access
  * @engine: Engine to initialize.
@@ -753,30 +861,6 @@ err_unpin:
 	return ret;
 }
 
-void intel_gt_resume(struct drm_i915_private *i915)
-{
-	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
-
-	/*
-	 * After resume, we may need to poke into the pinned kernel
-	 * contexts to paper over any damage caused by the sudden suspend.
-	 * Only the kernel contexts should remain pinned over suspend,
-	 * allowing us to fixup the user contexts on their first pin.
-	 */
-	for_each_engine(engine, i915, id) {
-		struct intel_context *ce;
-
-		ce = engine->kernel_context;
-		if (ce)
-			ce->ops->reset(ce);
-
-		ce = engine->preempt_context;
-		if (ce)
-			ce->ops->reset(ce);
-	}
-}
-
 /**
  * intel_engines_cleanup_common - cleans up the engine state created by
  *                                the common initiailizers.
@@ -786,6 +870,8 @@ void intel_gt_resume(struct drm_i915_private *i915)
  */
 void intel_engine_cleanup_common(struct intel_engine_cs *engine)
 {
+	GEM_BUG_ON(!list_empty(&engine->active.requests));
+
 	cleanup_status_page(engine);
 
 	intel_engine_fini_breadcrumbs(engine);
@@ -798,8 +884,7 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine)
 	if (engine->preempt_context)
 		intel_context_unpin(engine->preempt_context);
 	intel_context_unpin(engine->kernel_context);
-
-	i915_timeline_fini(&engine->timeline);
+	GEM_BUG_ON(!llist_empty(&engine->barrier_tasks));
 
 	intel_wa_list_free(&engine->ctx_wa_list);
 	intel_wa_list_free(&engine->wa_list);
@@ -900,11 +985,12 @@ u32 intel_calculate_mcr_s_ss_select(struct drm_i915_private *dev_priv)
 	return mcr_s_ss_select;
 }
 
-static inline u32
-read_subslice_reg(struct drm_i915_private *dev_priv, int slice,
-		  int subslice, i915_reg_t reg)
+static u32
+read_subslice_reg(struct intel_engine_cs *engine, int slice, int subslice,
+		  i915_reg_t reg)
 {
-	struct intel_uncore *uncore = &dev_priv->uncore;
+	struct drm_i915_private *i915 = engine->i915;
+	struct intel_uncore *uncore = engine->uncore;
 	u32 mcr_slice_subslice_mask;
 	u32 mcr_slice_subslice_select;
 	u32 default_mcr_s_ss_select;
@@ -912,7 +998,7 @@ read_subslice_reg(struct drm_i915_private *dev_priv, int slice,
 	u32 ret;
 	enum forcewake_domains fw_domains;
 
-	if (INTEL_GEN(dev_priv) >= 11) {
+	if (INTEL_GEN(i915) >= 11) {
 		mcr_slice_subslice_mask = GEN11_MCR_SLICE_MASK |
 					  GEN11_MCR_SUBSLICE_MASK;
 		mcr_slice_subslice_select = GEN11_MCR_SLICE(slice) |
@@ -924,7 +1010,7 @@ read_subslice_reg(struct drm_i915_private *dev_priv, int slice,
 					    GEN8_MCR_SUBSLICE(subslice);
 	}
 
-	default_mcr_s_ss_select = intel_calculate_mcr_s_ss_select(dev_priv);
+	default_mcr_s_ss_select = intel_calculate_mcr_s_ss_select(i915);
 
 	fw_domains = intel_uncore_forcewake_for_reg(uncore, reg,
 						    FW_REG_READ);
@@ -961,7 +1047,7 @@ read_subslice_reg(struct drm_i915_private *dev_priv, int slice,
 void intel_engine_get_instdone(struct intel_engine_cs *engine,
 			       struct intel_instdone *instdone)
 {
-	struct drm_i915_private *dev_priv = engine->i915;
+	struct drm_i915_private *i915 = engine->i915;
 	struct intel_uncore *uncore = engine->uncore;
 	u32 mmio_base = engine->mmio_base;
 	int slice;
@@ -969,7 +1055,7 @@ void intel_engine_get_instdone(struct intel_engine_cs *engine,
 
 	memset(instdone, 0, sizeof(*instdone));
 
-	switch (INTEL_GEN(dev_priv)) {
+	switch (INTEL_GEN(i915)) {
 	default:
 		instdone->instdone =
 			intel_uncore_read(uncore, RING_INSTDONE(mmio_base));
@@ -979,12 +1065,12 @@ void intel_engine_get_instdone(struct intel_engine_cs *engine,
 
 		instdone->slice_common =
 			intel_uncore_read(uncore, GEN7_SC_INSTDONE);
-		for_each_instdone_slice_subslice(dev_priv, slice, subslice) {
+		for_each_instdone_slice_subslice(i915, slice, subslice) {
 			instdone->sampler[slice][subslice] =
-				read_subslice_reg(dev_priv, slice, subslice,
+				read_subslice_reg(engine, slice, subslice,
 						  GEN7_SAMPLER_INSTDONE);
 			instdone->row[slice][subslice] =
-				read_subslice_reg(dev_priv, slice, subslice,
+				read_subslice_reg(engine, slice, subslice,
 						  GEN7_ROW_INSTDONE);
 		}
 		break;
@@ -1030,7 +1116,7 @@ static bool ring_is_idle(struct intel_engine_cs *engine)
 		return true;
 
 	/* If the whole device is asleep, the engine must be idle */
-	wakeref = intel_runtime_pm_get_if_in_use(dev_priv);
+	wakeref = intel_runtime_pm_get_if_in_use(&dev_priv->runtime_pm);
 	if (!wakeref)
 		return true;
 
@@ -1044,7 +1130,7 @@ static bool ring_is_idle(struct intel_engine_cs *engine)
 	    !(ENGINE_READ(engine, RING_MI_MODE) & MODE_IDLE))
 		idle = false;
 
-	intel_runtime_pm_put(dev_priv, wakeref);
+	intel_runtime_pm_put(&dev_priv->runtime_pm, wakeref);
 
 	return idle;
 }
@@ -1062,10 +1148,15 @@ bool intel_engine_is_idle(struct intel_engine_cs *engine)
 	if (i915_reset_failed(engine->i915))
 		return true;
 
+	if (!intel_wakeref_active(&engine->wakeref))
+		return true;
+
 	/* Waiting to drain ELSP? */
 	if (READ_ONCE(engine->execlists.active)) {
 		struct tasklet_struct *t = &engine->execlists.tasklet;
 
+		synchronize_hardirq(engine->i915->drm.irq);
+
 		local_bh_disable();
 		if (tasklet_trylock(t)) {
 			/* Must wait for any GPU reset in progress. */
@@ -1123,137 +1214,6 @@ void intel_engines_reset_default_submission(struct drm_i915_private *i915)
 		engine->set_default_submission(engine);
 }
 
-static bool reset_engines(struct drm_i915_private *i915)
-{
-	if (INTEL_INFO(i915)->gpu_reset_clobbers_display)
-		return false;
-
-	return intel_gpu_reset(i915, ALL_ENGINES) == 0;
-}
-
-/**
- * intel_engines_sanitize: called after the GPU has lost power
- * @i915: the i915 device
- * @force: ignore a failed reset and sanitize engine state anyway
- *
- * Anytime we reset the GPU, either with an explicit GPU reset or through a
- * PCI power cycle, the GPU loses state and we must reset our state tracking
- * to match. Note that calling intel_engines_sanitize() if the GPU has not
- * been reset results in much confusion!
- */
-void intel_engines_sanitize(struct drm_i915_private *i915, bool force)
-{
-	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
-
-	GEM_TRACE("\n");
-
-	if (!reset_engines(i915) && !force)
-		return;
-
-	for_each_engine(engine, i915, id)
-		intel_engine_reset(engine, false);
-}
-
-/**
- * intel_engines_park: called when the GT is transitioning from busy->idle
- * @i915: the i915 device
- *
- * The GT is now idle and about to go to sleep (maybe never to wake again?).
- * Time for us to tidy and put away our toys (release resources back to the
- * system).
- */
-void intel_engines_park(struct drm_i915_private *i915)
-{
-	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
-
-	for_each_engine(engine, i915, id) {
-		/* Flush the residual irq tasklets first. */
-		intel_engine_disarm_breadcrumbs(engine);
-		tasklet_kill(&engine->execlists.tasklet);
-
-		/*
-		 * We are committed now to parking the engines, make sure there
-		 * will be no more interrupts arriving later and the engines
-		 * are truly idle.
-		 */
-		if (wait_for(intel_engine_is_idle(engine), 10)) {
-			struct drm_printer p = drm_debug_printer(__func__);
-
-			dev_err(i915->drm.dev,
-				"%s is not idle before parking\n",
-				engine->name);
-			intel_engine_dump(engine, &p, NULL);
-		}
-
-		/* Must be reset upon idling, or we may miss the busy wakeup. */
-		GEM_BUG_ON(engine->execlists.queue_priority_hint != INT_MIN);
-
-		if (engine->park)
-			engine->park(engine);
-
-		if (engine->pinned_default_state) {
-			i915_gem_object_unpin_map(engine->default_state);
-			engine->pinned_default_state = NULL;
-		}
-
-		i915_gem_batch_pool_fini(&engine->batch_pool);
-		engine->execlists.no_priolist = false;
-	}
-
-	i915->gt.active_engines = 0;
-}
-
-/**
- * intel_engines_unpark: called when the GT is transitioning from idle->busy
- * @i915: the i915 device
- *
- * The GT was idle and now about to fire up with some new user requests.
- */
-void intel_engines_unpark(struct drm_i915_private *i915)
-{
-	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
-
-	for_each_engine(engine, i915, id) {
-		void *map;
-
-		/* Pin the default state for fast resets from atomic context. */
-		map = NULL;
-		if (engine->default_state)
-			map = i915_gem_object_pin_map(engine->default_state,
-						      I915_MAP_WB);
-		if (!IS_ERR_OR_NULL(map))
-			engine->pinned_default_state = map;
-
-		if (engine->unpark)
-			engine->unpark(engine);
-
-		intel_engine_init_hangcheck(engine);
-	}
-}
-
-/**
- * intel_engine_lost_context: called when the GPU is reset into unknown state
- * @engine: the engine
- *
- * We have either reset the GPU or otherwise about to lose state tracking of
- * the current GPU logical state (e.g. suspend). On next use, it is therefore
- * imperative that we make no presumptions about the current state and load
- * from scratch.
- */
-void intel_engine_lost_context(struct intel_engine_cs *engine)
-{
-	struct intel_context *ce;
-
-	lockdep_assert_held(&engine->i915->drm.struct_mutex);
-
-	ce = fetch_and_zero(&engine->last_retired_context);
-	if (ce)
-		intel_context_unpin(ce);
-}
-
 bool intel_engine_can_store_dword(struct intel_engine_cs *engine)
 {
 	switch (INTEL_GEN(engine->i915)) {
@@ -1312,8 +1272,11 @@ static void print_request(struct drm_printer *m,
 		   i915_request_completed(rq) ? "!" :
 		   i915_request_started(rq) ? "*" :
 		   "",
+		   test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
+			    &rq->fence.flags) ? "+" :
 		   test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
-			    &rq->fence.flags) ?  "+" : "",
+			    &rq->fence.flags) ? "-" :
+		   "",
 		   buf,
 		   jiffies_to_msecs(jiffies - rq->emitted_jiffies),
 		   name);
@@ -1348,12 +1311,13 @@ static void hexdump(struct drm_printer *m, const void *buf, size_t len)
 	}
 }
 
-static void intel_engine_print_registers(const struct intel_engine_cs *engine,
+static void intel_engine_print_registers(struct intel_engine_cs *engine,
 					 struct drm_printer *m)
 {
 	struct drm_i915_private *dev_priv = engine->i915;
 	const struct intel_engine_execlists * const execlists =
 		&engine->execlists;
+	unsigned long flags;
 	u64 addr;
 
 	if (engine->id == RCS0 && IS_GEN_RANGE(dev_priv, 4, 7))
@@ -1434,15 +1398,16 @@ static void intel_engine_print_registers(const struct intel_engine_cs *engine,
 				   idx, hws[idx * 2], hws[idx * 2 + 1]);
 		}
 
-		rcu_read_lock();
+		spin_lock_irqsave(&engine->active.lock, flags);
 		for (idx = 0; idx < execlists_num_ports(execlists); idx++) {
 			struct i915_request *rq;
 			unsigned int count;
+			char hdr[80];
 
 			rq = port_unpack(&execlists->port[idx], &count);
-			if (rq) {
-				char hdr[80];
-
+			if (!rq) {
+				drm_printf(m, "\t\tELSP[%d] idle\n", idx);
+			} else if (!i915_request_signaled(rq)) {
 				snprintf(hdr, sizeof(hdr),
 					 "\t\tELSP[%d] count=%d, ring:{start:%08x, hwsp:%08x, seqno:%08x}, rq: ",
 					 idx, count,
@@ -1451,11 +1416,11 @@ static void intel_engine_print_registers(const struct intel_engine_cs *engine,
 					 hwsp_seqno(rq));
 				print_request(m, rq, hdr);
 			} else {
-				drm_printf(m, "\t\tELSP[%d] idle\n", idx);
+				print_request(m, rq, "\t\tELSP[%d] rq: ");
 			}
 		}
 		drm_printf(m, "\t\tHW active? 0x%x\n", execlists->active);
-		rcu_read_unlock();
+		spin_unlock_irqrestore(&engine->active.lock, flags);
 	} else if (INTEL_GEN(dev_priv) > 6) {
 		drm_printf(m, "\tPP_DIR_BASE: 0x%08x\n",
 			   ENGINE_READ(engine, RING_PP_DIR_BASE));
@@ -1518,9 +1483,8 @@ void intel_engine_dump(struct intel_engine_cs *engine,
 	if (i915_reset_failed(engine->i915))
 		drm_printf(m, "*** WEDGED ***\n");
 
-	drm_printf(m, "\tHangcheck %x:%x [%d ms]\n",
-		   engine->hangcheck.last_seqno,
-		   engine->hangcheck.next_seqno,
+	drm_printf(m, "\tAwake? %d\n", atomic_read(&engine->wakeref.count));
+	drm_printf(m, "\tHangcheck: %d ms ago\n",
 		   jiffies_to_msecs(jiffies - engine->hangcheck.action_timestamp));
 	drm_printf(m, "\tReset count: %d (global %d)\n",
 		   i915_reset_engine_count(error, engine),
@@ -1530,16 +1494,6 @@ void intel_engine_dump(struct intel_engine_cs *engine,
 
 	drm_printf(m, "\tRequests:\n");
 
-	rq = list_first_entry(&engine->timeline.requests,
-			      struct i915_request, link);
-	if (&rq->link != &engine->timeline.requests)
-		print_request(m, rq, "\t\tfirst  ");
-
-	rq = list_last_entry(&engine->timeline.requests,
-			     struct i915_request, link);
-	if (&rq->link != &engine->timeline.requests)
-		print_request(m, rq, "\t\tlast   ");
-
 	rq = intel_engine_find_active_request(engine);
 	if (rq) {
 		print_request(m, rq, "\t\tactive ");
@@ -1562,10 +1516,10 @@ void intel_engine_dump(struct intel_engine_cs *engine,
 
 	rcu_read_unlock();
 
-	wakeref = intel_runtime_pm_get_if_in_use(engine->i915);
+	wakeref = intel_runtime_pm_get_if_in_use(&engine->i915->runtime_pm);
 	if (wakeref) {
 		intel_engine_print_registers(engine, m);
-		intel_runtime_pm_put(engine->i915, wakeref);
+		intel_runtime_pm_put(&engine->i915->runtime_pm, wakeref);
 	} else {
 		drm_printf(m, "\tDevice is asleep; skipping register dump\n");
 	}
@@ -1620,7 +1574,7 @@ int intel_enable_engine_stats(struct intel_engine_cs *engine)
 	if (!intel_engine_supports_stats(engine))
 		return -ENODEV;
 
-	spin_lock_irqsave(&engine->timeline.lock, flags);
+	spin_lock_irqsave(&engine->active.lock, flags);
 	write_seqlock(&engine->stats.lock);
 
 	if (unlikely(engine->stats.enabled == ~0)) {
@@ -1646,7 +1600,7 @@ int intel_enable_engine_stats(struct intel_engine_cs *engine)
 
 unlock:
 	write_sequnlock(&engine->stats.lock);
-	spin_unlock_irqrestore(&engine->timeline.lock, flags);
+	spin_unlock_irqrestore(&engine->active.lock, flags);
 
 	return err;
 }
@@ -1731,27 +1685,26 @@ intel_engine_find_active_request(struct intel_engine_cs *engine)
 	 * At all other times, we must assume the GPU is still running, but
 	 * we only care about the snapshot of this moment.
 	 */
-	spin_lock_irqsave(&engine->timeline.lock, flags);
-	list_for_each_entry(request, &engine->timeline.requests, link) {
+	spin_lock_irqsave(&engine->active.lock, flags);
+	list_for_each_entry(request, &engine->active.requests, sched.link) {
 		if (i915_request_completed(request))
 			continue;
 
 		if (!i915_request_started(request))
-			break;
+			continue;
 
 		/* More than one preemptible request may match! */
 		if (!match_ring(request))
-			break;
+			continue;
 
 		active = request;
 		break;
 	}
-	spin_unlock_irqrestore(&engine->timeline.lock, flags);
+	spin_unlock_irqrestore(&engine->active.lock, flags);
 
 	return active;
 }
 
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
-#include "selftests/mock_engine.c"
-#include "selftests/intel_engine_cs.c"
+#include "selftest_engine_cs.c"
 #endif
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
new file mode 100644
index 000000000000..2ce00d3dc42a
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
@@ -0,0 +1,168 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include "i915_drv.h"
+
+#include "intel_engine.h"
+#include "intel_engine_pm.h"
+#include "intel_gt_pm.h"
+
+static int __engine_unpark(struct intel_wakeref *wf)
+{
+	struct intel_engine_cs *engine =
+		container_of(wf, typeof(*engine), wakeref);
+	void *map;
+
+	GEM_TRACE("%s\n", engine->name);
+
+	intel_gt_pm_get(engine->i915);
+
+	/* Pin the default state for fast resets from atomic context. */
+	map = NULL;
+	if (engine->default_state)
+		map = i915_gem_object_pin_map(engine->default_state,
+					      I915_MAP_WB);
+	if (!IS_ERR_OR_NULL(map))
+		engine->pinned_default_state = map;
+
+	if (engine->unpark)
+		engine->unpark(engine);
+
+	intel_engine_init_hangcheck(engine);
+	return 0;
+}
+
+void intel_engine_pm_get(struct intel_engine_cs *engine)
+{
+	intel_wakeref_get(&engine->i915->runtime_pm, &engine->wakeref, __engine_unpark);
+}
+
+void intel_engine_park(struct intel_engine_cs *engine)
+{
+	/*
+	 * We are committed now to parking this engine, make sure there
+	 * will be no more interrupts arriving later and the engine
+	 * is truly idle.
+	 */
+	if (wait_for(intel_engine_is_idle(engine), 10)) {
+		struct drm_printer p = drm_debug_printer(__func__);
+
+		dev_err(engine->i915->drm.dev,
+			"%s is not idle before parking\n",
+			engine->name);
+		intel_engine_dump(engine, &p, NULL);
+	}
+}
+
+static bool switch_to_kernel_context(struct intel_engine_cs *engine)
+{
+	struct i915_request *rq;
+
+	/* Already inside the kernel context, safe to power down. */
+	if (engine->wakeref_serial == engine->serial)
+		return true;
+
+	/* GPU is pointing to the void, as good as in the kernel context. */
+	if (i915_reset_failed(engine->i915))
+		return true;
+
+	/*
+	 * Note, we do this without taking the timeline->mutex. We cannot
+	 * as we may be called while retiring the kernel context and so
+	 * already underneath the timeline->mutex. Instead we rely on the
+	 * exclusive property of the __engine_park that prevents anyone
+	 * else from creating a request on this engine. This also requires
+	 * that the ring is empty and we avoid any waits while constructing
+	 * the context, as they assume protection by the timeline->mutex.
+	 * This should hold true as we can only park the engine after
+	 * retiring the last request, thus all rings should be empty and
+	 * all timelines idle.
+	 */
+	rq = __i915_request_create(engine->kernel_context, GFP_NOWAIT);
+	if (IS_ERR(rq))
+		/* Context switch failed, hope for the best! Maybe reset? */
+		return true;
+
+	/* Check again on the next retirement. */
+	engine->wakeref_serial = engine->serial + 1;
+
+	i915_request_add_barriers(rq);
+	__i915_request_commit(rq);
+
+	return false;
+}
+
+static int __engine_park(struct intel_wakeref *wf)
+{
+	struct intel_engine_cs *engine =
+		container_of(wf, typeof(*engine), wakeref);
+
+	engine->saturated = 0;
+
+	/*
+	 * If one and only one request is completed between pm events,
+	 * we know that we are inside the kernel context and it is
+	 * safe to power down. (We are paranoid in case that runtime
+	 * suspend causes corruption to the active context image, and
+	 * want to avoid that impacting userspace.)
+	 */
+	if (!switch_to_kernel_context(engine))
+		return -EBUSY;
+
+	GEM_TRACE("%s\n", engine->name);
+
+	intel_engine_disarm_breadcrumbs(engine);
+
+	/* Must be reset upon idling, or we may miss the busy wakeup. */
+	GEM_BUG_ON(engine->execlists.queue_priority_hint != INT_MIN);
+
+	if (engine->park)
+		engine->park(engine);
+
+	if (engine->pinned_default_state) {
+		i915_gem_object_unpin_map(engine->default_state);
+		engine->pinned_default_state = NULL;
+	}
+
+	engine->execlists.no_priolist = false;
+
+	intel_gt_pm_put(engine->i915);
+	return 0;
+}
+
+void intel_engine_pm_put(struct intel_engine_cs *engine)
+{
+	intel_wakeref_put(&engine->i915->runtime_pm, &engine->wakeref, __engine_park);
+}
+
+void intel_engine_init__pm(struct intel_engine_cs *engine)
+{
+	intel_wakeref_init(&engine->wakeref);
+}
+
+int intel_engines_resume(struct drm_i915_private *i915)
+{
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+	int err = 0;
+
+	intel_gt_pm_get(i915);
+	for_each_engine(engine, i915, id) {
+		intel_engine_pm_get(engine);
+		engine->serial++; /* kernel context lost */
+		err = engine->resume(engine);
+		intel_engine_pm_put(engine);
+		if (err) {
+			dev_err(i915->drm.dev,
+				"Failed to restart %s (%d)\n",
+				engine->name, err);
+			break;
+		}
+	}
+	intel_gt_pm_put(i915);
+
+	return err;
+}
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.h b/drivers/gpu/drm/i915/gt/intel_engine_pm.h
new file mode 100644
index 000000000000..b326cd993d60
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.h
@@ -0,0 +1,22 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef INTEL_ENGINE_PM_H
+#define INTEL_ENGINE_PM_H
+
+struct drm_i915_private;
+struct intel_engine_cs;
+
+void intel_engine_pm_get(struct intel_engine_cs *engine);
+void intel_engine_pm_put(struct intel_engine_cs *engine);
+
+void intel_engine_park(struct intel_engine_cs *engine);
+
+void intel_engine_init__pm(struct intel_engine_cs *engine);
+
+int intel_engines_resume(struct drm_i915_private *i915);
+
+#endif /* INTEL_ENGINE_PM_H */
diff --git a/drivers/gpu/drm/i915/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index 1f970c76b6a6..868b220214f8 100644
--- a/drivers/gpu/drm/i915/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -11,23 +11,26 @@
 #include <linux/irq_work.h>
 #include <linux/kref.h>
 #include <linux/list.h>
+#include <linux/llist.h>
 #include <linux/types.h>
 
 #include "i915_gem.h"
+#include "i915_gem_batch_pool.h"
+#include "i915_pmu.h"
 #include "i915_priolist_types.h"
 #include "i915_selftest.h"
 #include "i915_timeline_types.h"
+#include "intel_sseu.h"
+#include "intel_wakeref.h"
 #include "intel_workarounds_types.h"
 
-#include "i915_gem_batch_pool.h"
-#include "i915_pmu.h"
-
 #define I915_MAX_SLICES	3
 #define I915_MAX_SUBSLICES 8
 
 #define I915_CMD_HASH_ORDER 9
 
 struct dma_fence;
+struct drm_i915_gem_object;
 struct drm_i915_reg_table;
 struct i915_gem_context;
 struct i915_request;
@@ -52,8 +55,8 @@ struct intel_instdone {
 
 struct intel_engine_hangcheck {
 	u64 acthd;
-	u32 last_seqno;
-	u32 next_seqno;
+	u32 last_ring;
+	u32 last_head;
 	unsigned long action_timestamp;
 	struct intel_instdone instdone;
 };
@@ -226,6 +229,7 @@ struct intel_engine_execlists {
 	 * @queue: queue of requests, in priority lists
 	 */
 	struct rb_root_cached queue;
+	struct rb_root_cached virtual;
 
 	/**
 	 * @csb_write: control register for Context Switch buffer
@@ -278,13 +282,28 @@ struct intel_engine_cs {
 	u32 context_size;
 	u32 mmio_base;
 
+	u32 uabi_capabilities;
+
+	struct intel_sseu sseu;
+
 	struct intel_ring *buffer;
 
-	struct i915_timeline timeline;
+	struct {
+		spinlock_t lock;
+		struct list_head requests;
+	} active;
+
+	struct llist_head barrier_tasks;
 
 	struct intel_context *kernel_context; /* pinned */
 	struct intel_context *preempt_context; /* pinned; optional */
 
+	intel_engine_mask_t saturated; /* submitting semaphores too late? */
+
+	unsigned long serial;
+
+	unsigned long wakeref_serial;
+	struct intel_wakeref wakeref;
 	struct drm_i915_gem_object *default_state;
 	void *pinned_default_state;
 
@@ -357,7 +376,7 @@ struct intel_engine_cs {
 	void		(*irq_enable)(struct intel_engine_cs *engine);
 	void		(*irq_disable)(struct intel_engine_cs *engine);
 
-	int		(*init_hw)(struct intel_engine_cs *engine);
+	int		(*resume)(struct intel_engine_cs *engine);
 
 	struct {
 		void (*prepare)(struct intel_engine_cs *engine);
@@ -398,6 +417,13 @@ struct intel_engine_cs {
 	void		(*submit_request)(struct i915_request *rq);
 
 	/*
+	 * Called on signaling of a SUBMIT_FENCE, passing along the signaling
+	 * request down to the bonded pairs.
+	 */
+	void            (*bond_execute)(struct i915_request *rq,
+					struct dma_fence *signal);
+
+	/*
 	 * Call when the priority on a request has changed and it and its
 	 * dependencies may need rescheduling. Note the request itself may
 	 * not be ready to run!
@@ -413,21 +439,10 @@ struct intel_engine_cs {
 	 */
 	void		(*cancel_requests)(struct intel_engine_cs *engine);
 
-	void		(*cleanup)(struct intel_engine_cs *engine);
+	void		(*destroy)(struct intel_engine_cs *engine);
 
 	struct intel_engine_execlists execlists;
 
-	/* Contexts are pinned whilst they are active on the GPU. The last
-	 * context executed remains active whilst the GPU is idle - the
-	 * switch away and write to the context object only occurs on the
-	 * next execution.  Contexts are only unpinned on retirement of the
-	 * following request ensuring that we can always write to the object
-	 * on the context switch even after idling. Across suspend, we switch
-	 * to the kernel context and trash it as the save may not happen
-	 * before the hardware is powered down.
-	 */
-	struct intel_context *last_retired_context;
-
 	/* status_notifier: list of callbacks for context-switch changes */
 	struct atomic_notifier_head context_status_notifier;
 
@@ -438,6 +453,7 @@ struct intel_engine_cs {
 #define I915_ENGINE_HAS_PREEMPTION   BIT(2)
 #define I915_ENGINE_HAS_SEMAPHORES   BIT(3)
 #define I915_ENGINE_NEEDS_BREADCRUMB_TASKLET BIT(4)
+#define I915_ENGINE_IS_VIRTUAL       BIT(5)
 	unsigned int flags;
 
 	/*
@@ -527,6 +543,12 @@ intel_engine_needs_breadcrumb_tasklet(const struct intel_engine_cs *engine)
 	return engine->flags & I915_ENGINE_NEEDS_BREADCRUMB_TASKLET;
 }
 
+static inline bool
+intel_engine_is_virtual(const struct intel_engine_cs *engine)
+{
+	return engine->flags & I915_ENGINE_IS_VIRTUAL;
+}
+
 #define instdone_slice_mask(dev_priv__) \
 	(IS_GEN(dev_priv__, 7) ? \
 	 1 : RUNTIME_INFO(dev_priv__)->sseu.slice_mask)
diff --git a/drivers/gpu/drm/i915/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
index a34ece53a771..eec31e36aca7 100644
--- a/drivers/gpu/drm/i915/intel_gpu_commands.h
+++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
@@ -180,6 +180,7 @@
 #define GFX_OP_DRAWRECT_INFO_I965  ((0x7900<<16)|0x2)
 
 #define COLOR_BLT_CMD			(2<<29 | 0x40<<22 | (5-2))
+#define XY_COLOR_BLT_CMD		(2 << 29 | 0x50 << 22)
 #define SRC_COPY_BLT_CMD		((2<<29)|(0x43<<22)|4)
 #define XY_SRC_COPY_BLT_CMD		((2<<29)|(0x53<<22)|6)
 #define XY_MONO_SRC_COPY_IMM_BLT	((2<<29)|(0x71<<22)|5)
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
new file mode 100644
index 000000000000..7b5967751762
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
@@ -0,0 +1,143 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include "i915_drv.h"
+#include "intel_gt_pm.h"
+#include "intel_pm.h"
+#include "intel_wakeref.h"
+
+static void pm_notify(struct drm_i915_private *i915, int state)
+{
+	blocking_notifier_call_chain(&i915->gt.pm_notifications, state, i915);
+}
+
+static int intel_gt_unpark(struct intel_wakeref *wf)
+{
+	struct drm_i915_private *i915 =
+		container_of(wf, typeof(*i915), gt.wakeref);
+
+	GEM_TRACE("\n");
+
+	/*
+	 * It seems that the DMC likes to transition between the DC states a lot
+	 * when there are no connected displays (no active power domains) during
+	 * command submission.
+	 *
+	 * This activity has negative impact on the performance of the chip with
+	 * huge latencies observed in the interrupt handler and elsewhere.
+	 *
+	 * Work around it by grabbing a GT IRQ power domain whilst there is any
+	 * GT activity, preventing any DC state transitions.
+	 */
+	i915->gt.awake = intel_display_power_get(i915, POWER_DOMAIN_GT_IRQ);
+	GEM_BUG_ON(!i915->gt.awake);
+
+	intel_enable_gt_powersave(i915);
+
+	i915_update_gfx_val(i915);
+	if (INTEL_GEN(i915) >= 6)
+		gen6_rps_busy(i915);
+
+	i915_pmu_gt_unparked(i915);
+
+	i915_queue_hangcheck(i915);
+
+	pm_notify(i915, INTEL_GT_UNPARK);
+
+	return 0;
+}
+
+void intel_gt_pm_get(struct drm_i915_private *i915)
+{
+	intel_wakeref_get(&i915->runtime_pm, &i915->gt.wakeref, intel_gt_unpark);
+}
+
+static int intel_gt_park(struct intel_wakeref *wf)
+{
+	struct drm_i915_private *i915 =
+		container_of(wf, typeof(*i915), gt.wakeref);
+	intel_wakeref_t wakeref = fetch_and_zero(&i915->gt.awake);
+
+	GEM_TRACE("\n");
+
+	pm_notify(i915, INTEL_GT_PARK);
+
+	i915_pmu_gt_parked(i915);
+	if (INTEL_GEN(i915) >= 6)
+		gen6_rps_idle(i915);
+
+	GEM_BUG_ON(!wakeref);
+	intel_display_power_put(i915, POWER_DOMAIN_GT_IRQ, wakeref);
+
+	return 0;
+}
+
+void intel_gt_pm_put(struct drm_i915_private *i915)
+{
+	intel_wakeref_put(&i915->runtime_pm, &i915->gt.wakeref, intel_gt_park);
+}
+
+void intel_gt_pm_init(struct drm_i915_private *i915)
+{
+	intel_wakeref_init(&i915->gt.wakeref);
+	BLOCKING_INIT_NOTIFIER_HEAD(&i915->gt.pm_notifications);
+}
+
+static bool reset_engines(struct drm_i915_private *i915)
+{
+	if (INTEL_INFO(i915)->gpu_reset_clobbers_display)
+		return false;
+
+	return intel_gpu_reset(i915, ALL_ENGINES) == 0;
+}
+
+/**
+ * intel_gt_sanitize: called after the GPU has lost power
+ * @i915: the i915 device
+ * @force: ignore a failed reset and sanitize engine state anyway
+ *
+ * Anytime we reset the GPU, either with an explicit GPU reset or through a
+ * PCI power cycle, the GPU loses state and we must reset our state tracking
+ * to match. Note that calling intel_gt_sanitize() if the GPU has not
+ * been reset results in much confusion!
+ */
+void intel_gt_sanitize(struct drm_i915_private *i915, bool force)
+{
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+
+	GEM_TRACE("\n");
+
+	if (!reset_engines(i915) && !force)
+		return;
+
+	for_each_engine(engine, i915, id)
+		intel_engine_reset(engine, false);
+}
+
+void intel_gt_resume(struct drm_i915_private *i915)
+{
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+
+	/*
+	 * After resume, we may need to poke into the pinned kernel
+	 * contexts to paper over any damage caused by the sudden suspend.
+	 * Only the kernel contexts should remain pinned over suspend,
+	 * allowing us to fixup the user contexts on their first pin.
+	 */
+	for_each_engine(engine, i915, id) {
+		struct intel_context *ce;
+
+		ce = engine->kernel_context;
+		if (ce)
+			ce->ops->reset(ce);
+
+		ce = engine->preempt_context;
+		if (ce)
+			ce->ops->reset(ce);
+	}
+}
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.h b/drivers/gpu/drm/i915/gt/intel_gt_pm.h
new file mode 100644
index 000000000000..7dd1130a19a4
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.h
@@ -0,0 +1,27 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef INTEL_GT_PM_H
+#define INTEL_GT_PM_H
+
+#include <linux/types.h>
+
+struct drm_i915_private;
+
+enum {
+	INTEL_GT_UNPARK,
+	INTEL_GT_PARK,
+};
+
+void intel_gt_pm_get(struct drm_i915_private *i915);
+void intel_gt_pm_put(struct drm_i915_private *i915);
+
+void intel_gt_pm_init(struct drm_i915_private *i915);
+
+void intel_gt_sanitize(struct drm_i915_private *i915, bool force);
+void intel_gt_resume(struct drm_i915_private *i915);
+
+#endif /* INTEL_GT_PM_H */
diff --git a/drivers/gpu/drm/i915/intel_hangcheck.c b/drivers/gpu/drm/i915/gt/intel_hangcheck.c
index 3d51ed1428d4..6bcfa6456c45 100644
--- a/drivers/gpu/drm/i915/intel_hangcheck.c
+++ b/drivers/gpu/drm/i915/gt/intel_hangcheck.c
@@ -22,12 +22,13 @@
  *
  */
 
+#include "intel_reset.h"
 #include "i915_drv.h"
-#include "i915_reset.h"
 
 struct hangcheck {
 	u64 acthd;
-	u32 seqno;
+	u32 ring;
+	u32 head;
 	enum intel_engine_hangcheck_action action;
 	unsigned long action_timestamp;
 	int deadlock;
@@ -133,26 +134,31 @@ static void hangcheck_load_sample(struct intel_engine_cs *engine,
 				  struct hangcheck *hc)
 {
 	hc->acthd = intel_engine_get_active_head(engine);
-	hc->seqno = intel_engine_get_hangcheck_seqno(engine);
+	hc->ring = ENGINE_READ(engine, RING_START);
+	hc->head = ENGINE_READ(engine, RING_HEAD);
 }
 
 static void hangcheck_store_sample(struct intel_engine_cs *engine,
 				   const struct hangcheck *hc)
 {
 	engine->hangcheck.acthd = hc->acthd;
-	engine->hangcheck.last_seqno = hc->seqno;
+	engine->hangcheck.last_ring = hc->ring;
+	engine->hangcheck.last_head = hc->head;
 }
 
 static enum intel_engine_hangcheck_action
 hangcheck_get_action(struct intel_engine_cs *engine,
 		     const struct hangcheck *hc)
 {
-	if (engine->hangcheck.last_seqno != hc->seqno)
-		return ENGINE_ACTIVE_SEQNO;
-
 	if (intel_engine_is_idle(engine))
 		return ENGINE_IDLE;
 
+	if (engine->hangcheck.last_ring != hc->ring)
+		return ENGINE_ACTIVE_SEQNO;
+
+	if (engine->hangcheck.last_head != hc->head)
+		return ENGINE_ACTIVE_SEQNO;
+
 	return engine_stuck(engine, hc->acthd);
 }
 
@@ -217,8 +223,8 @@ static void hangcheck_accumulate_sample(struct intel_engine_cs *engine,
 }
 
 static void hangcheck_declare_hang(struct drm_i915_private *i915,
-				   unsigned int hung,
-				   unsigned int stuck)
+				   intel_engine_mask_t hung,
+				   intel_engine_mask_t stuck)
 {
 	struct intel_engine_cs *engine;
 	intel_engine_mask_t tmp;
@@ -253,9 +259,10 @@ static void i915_hangcheck_elapsed(struct work_struct *work)
 	struct drm_i915_private *dev_priv =
 		container_of(work, typeof(*dev_priv),
 			     gpu_error.hangcheck_work.work);
+	intel_engine_mask_t hung = 0, stuck = 0, wedged = 0;
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
-	unsigned int hung = 0, stuck = 0, wedged = 0;
+	intel_wakeref_t wakeref;
 
 	if (!i915_modparams.enable_hangcheck)
 		return;
@@ -266,6 +273,10 @@ static void i915_hangcheck_elapsed(struct work_struct *work)
 	if (i915_terminally_wedged(dev_priv))
 		return;
 
+	wakeref = intel_runtime_pm_get_if_in_use(&dev_priv->runtime_pm);
+	if (!wakeref)
+		return;
+
 	/* As enabling the GPU requires fairly extensive mmio access,
 	 * periodically arm the mmio checker to see if we are triggering
 	 * any invalid access.
@@ -313,6 +324,8 @@ static void i915_hangcheck_elapsed(struct work_struct *work)
 	if (hung)
 		hangcheck_declare_hang(dev_priv, hung, stuck);
 
+	intel_runtime_pm_put(&dev_priv->runtime_pm, wakeref);
+
 	/* Reset timer in case GPU hangs without another request being added */
 	i915_queue_hangcheck(dev_priv);
 }
@@ -330,5 +343,5 @@ void intel_hangcheck_init(struct drm_i915_private *i915)
 }
 
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
-#include "selftests/intel_hangcheck.c"
+#include "selftest_hangcheck.c"
 #endif
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 11e5a86610bf..b42b5f158295 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -133,13 +133,15 @@
  */
 #include <linux/interrupt.h>
 
-#include <drm/i915_drm.h>
+#include "gem/i915_gem_context.h"
+
 #include "i915_drv.h"
 #include "i915_gem_render_state.h"
-#include "i915_reset.h"
 #include "i915_vgpu.h"
+#include "intel_engine_pm.h"
 #include "intel_lrc_reg.h"
 #include "intel_mocs.h"
+#include "intel_reset.h"
 #include "intel_workarounds.h"
 
 #define RING_EXECLIST_QFULL		(1 << 0x2)
@@ -164,7 +166,53 @@
 #define WA_TAIL_DWORDS 2
 #define WA_TAIL_BYTES (sizeof(u32) * WA_TAIL_DWORDS)
 
-#define ACTIVE_PRIORITY (I915_PRIORITY_NOSEMAPHORE)
+struct virtual_engine {
+	struct intel_engine_cs base;
+	struct intel_context context;
+
+	/*
+	 * We allow only a single request through the virtual engine at a time
+	 * (each request in the timeline waits for the completion fence of
+	 * the previous before being submitted). By restricting ourselves to
+	 * only submitting a single request, each request is placed on to a
+	 * physical to maximise load spreading (by virtue of the late greedy
+	 * scheduling -- each real engine takes the next available request
+	 * upon idling).
+	 */
+	struct i915_request *request;
+
+	/*
+	 * We keep a rbtree of available virtual engines inside each physical
+	 * engine, sorted by priority. Here we preallocate the nodes we need
+	 * for the virtual engine, indexed by physical_engine->id.
+	 */
+	struct ve_node {
+		struct rb_node rb;
+		int prio;
+	} nodes[I915_NUM_ENGINES];
+
+	/*
+	 * Keep track of bonded pairs -- restrictions upon on our selection
+	 * of physical engines any particular request may be submitted to.
+	 * If we receive a submit-fence from a master engine, we will only
+	 * use one of sibling_mask physical engines.
+	 */
+	struct ve_bond {
+		const struct intel_engine_cs *master;
+		intel_engine_mask_t sibling_mask;
+	} *bonds;
+	unsigned int num_bonds;
+
+	/* And finally, which physical engines this virtual engine maps onto. */
+	unsigned int num_siblings;
+	struct intel_engine_cs *siblings[0];
+};
+
+static struct virtual_engine *to_virtual_engine(struct intel_engine_cs *engine)
+{
+	GEM_BUG_ON(!intel_engine_is_virtual(engine));
+	return container_of(engine, struct virtual_engine, base);
+}
 
 static int execlists_context_deferred_alloc(struct intel_context *ce,
 					    struct intel_engine_cs *engine);
@@ -189,23 +237,12 @@ static int effective_prio(const struct i915_request *rq)
 
 	/*
 	 * On unwinding the active request, we give it a priority bump
-	 * equivalent to a freshly submitted request. This protects it from
-	 * being gazumped again, but it would be preferable if we didn't
-	 * let it be gazumped in the first place!
-	 *
-	 * See __unwind_incomplete_requests()
+	 * if it has completed waiting on any semaphore. If we know that
+	 * the request has already started, we can prevent an unwanted
+	 * preempt-to-idle cycle by taking that into account now.
 	 */
-	if (~prio & ACTIVE_PRIORITY && __i915_request_has_started(rq)) {
-		/*
-		 * After preemption, we insert the active request at the
-		 * end of the new priority level. This means that we will be
-		 * _lower_ priority than the preemptee all things equal (and
-		 * so the preemption is valid), so adjust our comparison
-		 * accordingly.
-		 */
-		prio |= ACTIVE_PRIORITY;
-		prio--;
-	}
+	if (__i915_request_has_started(rq))
+		prio |= I915_PRIORITY_NOSEMAPHORE;
 
 	/* Restrict mere WAIT boosts from triggering preemption */
 	return prio | __NO_PREEMPTION;
@@ -229,7 +266,8 @@ static int queue_prio(const struct intel_engine_execlists *execlists)
 }
 
 static inline bool need_preempt(const struct intel_engine_cs *engine,
-				const struct i915_request *rq)
+				const struct i915_request *rq,
+				struct rb_node *rb)
 {
 	int last_prio;
 
@@ -252,18 +290,37 @@ static inline bool need_preempt(const struct intel_engine_cs *engine,
 	 * ourselves, ignore the request.
 	 */
 	last_prio = effective_prio(rq);
-	if (!__execlists_need_preempt(engine->execlists.queue_priority_hint,
-				      last_prio))
+	if (!i915_scheduler_need_preempt(engine->execlists.queue_priority_hint,
+					 last_prio))
 		return false;
 
 	/*
 	 * Check against the first request in ELSP[1], it will, thanks to the
 	 * power of PI, be the highest priority of that context.
 	 */
-	if (!list_is_last(&rq->link, &engine->timeline.requests) &&
-	    rq_prio(list_next_entry(rq, link)) > last_prio)
+	if (!list_is_last(&rq->sched.link, &engine->active.requests) &&
+	    rq_prio(list_next_entry(rq, sched.link)) > last_prio)
 		return true;
 
+	if (rb) {
+		struct virtual_engine *ve =
+			rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
+		bool preempt = false;
+
+		if (engine == ve->siblings[0]) { /* only preempt one sibling */
+			struct i915_request *next;
+
+			rcu_read_lock();
+			next = READ_ONCE(ve->request);
+			if (next)
+				preempt = rq_prio(next) > last_prio;
+			rcu_read_unlock();
+		}
+
+		if (preempt)
+			return preempt;
+	}
+
 	/*
 	 * If the inflight context did not trigger the preemption, then maybe
 	 * it was the set of queued requests? Pick the highest priority in
@@ -375,55 +432,46 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine)
 {
 	struct i915_request *rq, *rn, *active = NULL;
 	struct list_head *uninitialized_var(pl);
-	int prio = I915_PRIORITY_INVALID | ACTIVE_PRIORITY;
+	int prio = I915_PRIORITY_INVALID;
 
-	lockdep_assert_held(&engine->timeline.lock);
+	lockdep_assert_held(&engine->active.lock);
 
 	list_for_each_entry_safe_reverse(rq, rn,
-					 &engine->timeline.requests,
-					 link) {
+					 &engine->active.requests,
+					 sched.link) {
+		struct intel_engine_cs *owner;
+
 		if (i915_request_completed(rq))
 			break;
 
 		__i915_request_unsubmit(rq);
 		unwind_wa_tail(rq);
 
-		GEM_BUG_ON(rq->hw_context->active);
-
-		GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID);
-		if (rq_prio(rq) != prio) {
-			prio = rq_prio(rq);
-			pl = i915_sched_lookup_priolist(engine, prio);
-		}
-		GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
+		GEM_BUG_ON(rq->hw_context->inflight);
 
-		list_add(&rq->sched.link, pl);
-
-		active = rq;
-	}
+		/*
+		 * Push the request back into the queue for later resubmission.
+		 * If this request is not native to this physical engine (i.e.
+		 * it came from a virtual source), push it back onto the virtual
+		 * engine so that it can be moved across onto another physical
+		 * engine as load dictates.
+		 */
+		owner = rq->hw_context->engine;
+		if (likely(owner == engine)) {
+			GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID);
+			if (rq_prio(rq) != prio) {
+				prio = rq_prio(rq);
+				pl = i915_sched_lookup_priolist(engine, prio);
+			}
+			GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
 
-	/*
-	 * The active request is now effectively the start of a new client
-	 * stream, so give it the equivalent small priority bump to prevent
-	 * it being gazumped a second time by another peer.
-	 *
-	 * Note we have to be careful not to apply a priority boost to a request
-	 * still spinning on its semaphores. If the request hasn't started, that
-	 * means it is still waiting for its dependencies to be signaled, and
-	 * if we apply a priority boost to this request, we will boost it past
-	 * its signalers and so break PI.
-	 *
-	 * One consequence of this preemption boost is that we may jump
-	 * over lesser priorities (such as I915_PRIORITY_WAIT), effectively
-	 * making those priorities non-preemptible. They will be moved forward
-	 * in the priority queue, but they will not gain immediate access to
-	 * the GPU.
-	 */
-	if (~prio & ACTIVE_PRIORITY && __i915_request_has_started(active)) {
-		prio |= ACTIVE_PRIORITY;
-		active->sched.attr.priority = prio;
-		list_move_tail(&active->sched.link,
-			       i915_sched_lookup_priolist(engine, prio));
+			list_move(&rq->sched.link, pl);
+			active = rq;
+		} else {
+			rq->engine = owner;
+			owner->submit_request(rq);
+			active = NULL;
+		}
 	}
 
 	return active;
@@ -468,20 +516,41 @@ execlists_user_end(struct intel_engine_execlists *execlists)
 static inline void
 execlists_context_schedule_in(struct i915_request *rq)
 {
-	GEM_BUG_ON(rq->hw_context->active);
+	GEM_BUG_ON(rq->hw_context->inflight);
 
 	execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN);
 	intel_engine_context_in(rq->engine);
-	rq->hw_context->active = rq->engine;
+	rq->hw_context->inflight = rq->engine;
+}
+
+static void kick_siblings(struct i915_request *rq)
+{
+	struct virtual_engine *ve = to_virtual_engine(rq->hw_context->engine);
+	struct i915_request *next = READ_ONCE(ve->request);
+
+	if (next && next->execution_mask & ~rq->execution_mask)
+		tasklet_schedule(&ve->base.execlists.tasklet);
 }
 
 static inline void
 execlists_context_schedule_out(struct i915_request *rq, unsigned long status)
 {
-	rq->hw_context->active = NULL;
+	rq->hw_context->inflight = NULL;
 	intel_engine_context_out(rq->engine);
 	execlists_context_status_change(rq, status);
 	trace_i915_request_out(rq);
+
+	/*
+	 * If this is part of a virtual engine, its next request may have
+	 * been blocked waiting for access to the active context. We have
+	 * to kick all the siblings again in case we need to switch (e.g.
+	 * the next request is not runnable on this engine). Hopefully,
+	 * we will already have submitted the next request before the
+	 * tasklet runs and do not need to rebuild each virtual tree
+	 * and kick everyone again.
+	 */
+	if (rq->engine != rq->hw_context->engine)
+		kick_siblings(rq);
 }
 
 static u64 execlists_update_context(struct i915_request *rq)
@@ -535,7 +604,7 @@ static void execlists_submit_ports(struct intel_engine_cs *engine)
 	 * that all ELSP are drained i.e. we have processed the CSB,
 	 * before allowing ourselves to idle and calling intel_runtime_pm_put().
 	 */
-	GEM_BUG_ON(!engine->i915->gt.awake);
+	GEM_BUG_ON(!intel_wakeref_active(&engine->wakeref));
 
 	/*
 	 * ELSQ note: the submit queue is not cleared after being submitted
@@ -659,6 +728,93 @@ static void complete_preempt_context(struct intel_engine_execlists *execlists)
 						  execlists));
 }
 
+static void virtual_update_register_offsets(u32 *regs,
+					    struct intel_engine_cs *engine)
+{
+	u32 base = engine->mmio_base;
+
+	/* Must match execlists_init_reg_state()! */
+
+	regs[CTX_CONTEXT_CONTROL] =
+		i915_mmio_reg_offset(RING_CONTEXT_CONTROL(base));
+	regs[CTX_RING_HEAD] = i915_mmio_reg_offset(RING_HEAD(base));
+	regs[CTX_RING_TAIL] = i915_mmio_reg_offset(RING_TAIL(base));
+	regs[CTX_RING_BUFFER_START] = i915_mmio_reg_offset(RING_START(base));
+	regs[CTX_RING_BUFFER_CONTROL] = i915_mmio_reg_offset(RING_CTL(base));
+
+	regs[CTX_BB_HEAD_U] = i915_mmio_reg_offset(RING_BBADDR_UDW(base));
+	regs[CTX_BB_HEAD_L] = i915_mmio_reg_offset(RING_BBADDR(base));
+	regs[CTX_BB_STATE] = i915_mmio_reg_offset(RING_BBSTATE(base));
+	regs[CTX_SECOND_BB_HEAD_U] =
+		i915_mmio_reg_offset(RING_SBBADDR_UDW(base));
+	regs[CTX_SECOND_BB_HEAD_L] = i915_mmio_reg_offset(RING_SBBADDR(base));
+	regs[CTX_SECOND_BB_STATE] = i915_mmio_reg_offset(RING_SBBSTATE(base));
+
+	regs[CTX_CTX_TIMESTAMP] =
+		i915_mmio_reg_offset(RING_CTX_TIMESTAMP(base));
+	regs[CTX_PDP3_UDW] = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, 3));
+	regs[CTX_PDP3_LDW] = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, 3));
+	regs[CTX_PDP2_UDW] = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, 2));
+	regs[CTX_PDP2_LDW] = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, 2));
+	regs[CTX_PDP1_UDW] = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, 1));
+	regs[CTX_PDP1_LDW] = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, 1));
+	regs[CTX_PDP0_UDW] = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, 0));
+	regs[CTX_PDP0_LDW] = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, 0));
+
+	if (engine->class == RENDER_CLASS) {
+		regs[CTX_RCS_INDIRECT_CTX] =
+			i915_mmio_reg_offset(RING_INDIRECT_CTX(base));
+		regs[CTX_RCS_INDIRECT_CTX_OFFSET] =
+			i915_mmio_reg_offset(RING_INDIRECT_CTX_OFFSET(base));
+		regs[CTX_BB_PER_CTX_PTR] =
+			i915_mmio_reg_offset(RING_BB_PER_CTX_PTR(base));
+
+		regs[CTX_R_PWR_CLK_STATE] =
+			i915_mmio_reg_offset(GEN8_R_PWR_CLK_STATE);
+	}
+}
+
+static bool virtual_matches(const struct virtual_engine *ve,
+			    const struct i915_request *rq,
+			    const struct intel_engine_cs *engine)
+{
+	const struct intel_engine_cs *inflight;
+
+	if (!(rq->execution_mask & engine->mask)) /* We peeked too soon! */
+		return false;
+
+	/*
+	 * We track when the HW has completed saving the context image
+	 * (i.e. when we have seen the final CS event switching out of
+	 * the context) and must not overwrite the context image before
+	 * then. This restricts us to only using the active engine
+	 * while the previous virtualized request is inflight (so
+	 * we reuse the register offsets). This is a very small
+	 * hystersis on the greedy seelction algorithm.
+	 */
+	inflight = READ_ONCE(ve->context.inflight);
+	if (inflight && inflight != engine)
+		return false;
+
+	return true;
+}
+
+static void virtual_xfer_breadcrumbs(struct virtual_engine *ve,
+				     struct intel_engine_cs *engine)
+{
+	struct intel_engine_cs *old = ve->siblings[0];
+
+	/* All unattached (rq->engine == old) must already be completed */
+
+	spin_lock(&old->breadcrumbs.irq_lock);
+	if (!list_empty(&ve->context.signal_link)) {
+		list_move_tail(&ve->context.signal_link,
+			       &engine->breadcrumbs.signalers);
+		intel_engine_queue_breadcrumbs(engine);
+	}
+	spin_unlock(&old->breadcrumbs.irq_lock);
+}
+
 static void execlists_dequeue(struct intel_engine_cs *engine)
 {
 	struct intel_engine_execlists * const execlists = &engine->execlists;
@@ -691,6 +847,26 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 	 * and context switches) submission.
 	 */
 
+	for (rb = rb_first_cached(&execlists->virtual); rb; ) {
+		struct virtual_engine *ve =
+			rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
+		struct i915_request *rq = READ_ONCE(ve->request);
+
+		if (!rq) { /* lazily cleanup after another engine handled rq */
+			rb_erase_cached(rb, &execlists->virtual);
+			RB_CLEAR_NODE(rb);
+			rb = rb_first_cached(&execlists->virtual);
+			continue;
+		}
+
+		if (!virtual_matches(ve, rq, engine)) {
+			rb = rb_next(rb);
+			continue;
+		}
+
+		break;
+	}
+
 	if (last) {
 		/*
 		 * Don't resubmit or switch until all outstanding
@@ -712,7 +888,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 		if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_HWACK))
 			return;
 
-		if (need_preempt(engine, last)) {
+		if (need_preempt(engine, last, rb)) {
 			inject_preempt_context(engine);
 			return;
 		}
@@ -752,6 +928,93 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 		last->tail = last->wa_tail;
 	}
 
+	while (rb) { /* XXX virtual is always taking precedence */
+		struct virtual_engine *ve =
+			rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
+		struct i915_request *rq;
+
+		spin_lock(&ve->base.active.lock);
+
+		rq = ve->request;
+		if (unlikely(!rq)) { /* lost the race to a sibling */
+			spin_unlock(&ve->base.active.lock);
+			rb_erase_cached(rb, &execlists->virtual);
+			RB_CLEAR_NODE(rb);
+			rb = rb_first_cached(&execlists->virtual);
+			continue;
+		}
+
+		GEM_BUG_ON(rq != ve->request);
+		GEM_BUG_ON(rq->engine != &ve->base);
+		GEM_BUG_ON(rq->hw_context != &ve->context);
+
+		if (rq_prio(rq) >= queue_prio(execlists)) {
+			if (!virtual_matches(ve, rq, engine)) {
+				spin_unlock(&ve->base.active.lock);
+				rb = rb_next(rb);
+				continue;
+			}
+
+			if (last && !can_merge_rq(last, rq)) {
+				spin_unlock(&ve->base.active.lock);
+				return; /* leave this rq for another engine */
+			}
+
+			GEM_TRACE("%s: virtual rq=%llx:%lld%s, new engine? %s\n",
+				  engine->name,
+				  rq->fence.context,
+				  rq->fence.seqno,
+				  i915_request_completed(rq) ? "!" :
+				  i915_request_started(rq) ? "*" :
+				  "",
+				  yesno(engine != ve->siblings[0]));
+
+			ve->request = NULL;
+			ve->base.execlists.queue_priority_hint = INT_MIN;
+			rb_erase_cached(rb, &execlists->virtual);
+			RB_CLEAR_NODE(rb);
+
+			GEM_BUG_ON(!(rq->execution_mask & engine->mask));
+			rq->engine = engine;
+
+			if (engine != ve->siblings[0]) {
+				u32 *regs = ve->context.lrc_reg_state;
+				unsigned int n;
+
+				GEM_BUG_ON(READ_ONCE(ve->context.inflight));
+				virtual_update_register_offsets(regs, engine);
+
+				if (!list_empty(&ve->context.signals))
+					virtual_xfer_breadcrumbs(ve, engine);
+
+				/*
+				 * Move the bound engine to the top of the list
+				 * for future execution. We then kick this
+				 * tasklet first before checking others, so that
+				 * we preferentially reuse this set of bound
+				 * registers.
+				 */
+				for (n = 1; n < ve->num_siblings; n++) {
+					if (ve->siblings[n] == engine) {
+						swap(ve->siblings[n],
+						     ve->siblings[0]);
+						break;
+					}
+				}
+
+				GEM_BUG_ON(ve->siblings[0] != engine);
+			}
+
+			__i915_request_submit(rq);
+			trace_i915_request_in(rq, port_index(port, execlists));
+			submit = true;
+			last = rq;
+		}
+
+		spin_unlock(&ve->base.active.lock);
+		break;
+	}
+
 	while ((rb = rb_first_cached(&execlists->queue))) {
 		struct i915_priolist *p = to_priolist(rb);
 		struct i915_request *rq, *rn;
@@ -805,8 +1068,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 				GEM_BUG_ON(port_isset(port));
 			}
 
-			list_del_init(&rq->sched.link);
-
 			__i915_request_submit(rq);
 			trace_i915_request_in(rq, port_index(port, execlists));
 
@@ -907,7 +1168,8 @@ static void process_csb(struct intel_engine_cs *engine)
 	const u8 num_entries = execlists->csb_size;
 	u8 head, tail;
 
-	lockdep_assert_held(&engine->timeline.lock);
+	lockdep_assert_held(&engine->active.lock);
+	GEM_BUG_ON(USES_GUC_SUBMISSION(engine->i915));
 
 	/*
 	 * Note that csb_write, csb_status may be either in HWSP or mmio.
@@ -1067,7 +1329,7 @@ static void process_csb(struct intel_engine_cs *engine)
 
 static void __execlists_submission_tasklet(struct intel_engine_cs *const engine)
 {
-	lockdep_assert_held(&engine->timeline.lock);
+	lockdep_assert_held(&engine->active.lock);
 
 	process_csb(engine);
 	if (!execlists_is_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT))
@@ -1085,18 +1347,19 @@ static void execlists_submission_tasklet(unsigned long data)
 
 	GEM_TRACE("%s awake?=%d, active=%x\n",
 		  engine->name,
-		  !!engine->i915->gt.awake,
+		  !!intel_wakeref_active(&engine->wakeref),
 		  engine->execlists.active);
 
-	spin_lock_irqsave(&engine->timeline.lock, flags);
+	spin_lock_irqsave(&engine->active.lock, flags);
 	__execlists_submission_tasklet(engine);
-	spin_unlock_irqrestore(&engine->timeline.lock, flags);
+	spin_unlock_irqrestore(&engine->active.lock, flags);
 }
 
 static void queue_request(struct intel_engine_cs *engine,
 			  struct i915_sched_node *node,
 			  int prio)
 {
+	GEM_BUG_ON(!list_empty(&node->link));
 	list_add_tail(&node->link, i915_sched_lookup_priolist(engine, prio));
 }
 
@@ -1127,7 +1390,7 @@ static void execlists_submit_request(struct i915_request *request)
 	unsigned long flags;
 
 	/* Will be called from irq-context when using foreign fences. */
-	spin_lock_irqsave(&engine->timeline.lock, flags);
+	spin_lock_irqsave(&engine->active.lock, flags);
 
 	queue_request(engine, &request->sched, rq_prio(request));
 
@@ -1136,7 +1399,7 @@ static void execlists_submit_request(struct i915_request *request)
 
 	submit_queue(engine, rq_prio(request));
 
-	spin_unlock_irqrestore(&engine->timeline.lock, flags);
+	spin_unlock_irqrestore(&engine->active.lock, flags);
 }
 
 static void __execlists_context_fini(struct intel_context *ce)
@@ -1159,60 +1422,11 @@ static void execlists_context_destroy(struct kref *kref)
 	intel_context_free(ce);
 }
 
-static int __context_pin(struct i915_vma *vma)
-{
-	unsigned int flags;
-	int err;
-
-	flags = PIN_GLOBAL | PIN_HIGH;
-	flags |= PIN_OFFSET_BIAS | i915_ggtt_pin_bias(vma);
-
-	err = i915_vma_pin(vma, 0, 0, flags);
-	if (err)
-		return err;
-
-	vma->obj->pin_global++;
-	vma->obj->mm.dirty = true;
-
-	return 0;
-}
-
-static void __context_unpin(struct i915_vma *vma)
-{
-	vma->obj->pin_global--;
-	__i915_vma_unpin(vma);
-}
-
 static void execlists_context_unpin(struct intel_context *ce)
 {
-	struct intel_engine_cs *engine;
-
-	/*
-	 * The tasklet may still be using a pointer to our state, via an
-	 * old request. However, since we know we only unpin the context
-	 * on retirement of the following request, we know that the last
-	 * request referencing us will have had a completion CS interrupt.
-	 * If we see that it is still active, it means that the tasklet hasn't
-	 * had the chance to run yet; let it run before we teardown the
-	 * reference it may use.
-	 */
-	engine = READ_ONCE(ce->active);
-	if (unlikely(engine)) {
-		unsigned long flags;
-
-		spin_lock_irqsave(&engine->timeline.lock, flags);
-		process_csb(engine);
-		spin_unlock_irqrestore(&engine->timeline.lock, flags);
-
-		GEM_BUG_ON(READ_ONCE(ce->active));
-	}
-
 	i915_gem_context_unpin_hw_id(ce->gem_context);
-
-	intel_ring_unpin(ce->ring);
-
 	i915_gem_object_unpin_map(ce->state->obj);
-	__context_unpin(ce->state);
+	intel_ring_unpin(ce->ring);
 }
 
 static void
@@ -1232,7 +1446,7 @@ __execlists_update_reg_state(struct intel_context *ce,
 	/* RPCS */
 	if (engine->class == RENDER_CLASS)
 		regs[CTX_R_PWR_CLK_STATE + 1] =
-			gen8_make_rpcs(engine->i915, &ce->sseu);
+			intel_sseu_make_rpcs(engine->i915, &ce->sseu);
 }
 
 static int
@@ -1242,14 +1456,17 @@ __execlists_context_pin(struct intel_context *ce,
 	void *vaddr;
 	int ret;
 
-	GEM_BUG_ON(!ce->gem_context->ppgtt);
+	GEM_BUG_ON(!ce->gem_context->vm);
 
 	ret = execlists_context_deferred_alloc(ce, engine);
 	if (ret)
 		goto err;
 	GEM_BUG_ON(!ce->state);
 
-	ret = __context_pin(ce->state);
+	ret = intel_context_active_acquire(ce,
+					   engine->i915->ggtt.pin_bias |
+					   PIN_OFFSET_BIAS |
+					   PIN_HIGH);
 	if (ret)
 		goto err;
 
@@ -1258,7 +1475,7 @@ __execlists_context_pin(struct intel_context *ce,
 					I915_MAP_OVERRIDE);
 	if (IS_ERR(vaddr)) {
 		ret = PTR_ERR(vaddr);
-		goto unpin_vma;
+		goto unpin_active;
 	}
 
 	ret = intel_ring_pin(ce->ring);
@@ -1279,8 +1496,8 @@ unpin_ring:
 	intel_ring_unpin(ce->ring);
 unpin_map:
 	i915_gem_object_unpin_map(ce->state->obj);
-unpin_vma:
-	__context_unpin(ce->state);
+unpin_active:
+	intel_context_active_release(ce);
 err:
 	return ret;
 }
@@ -1316,6 +1533,9 @@ static const struct intel_context_ops execlists_context_ops = {
 	.pin = execlists_context_pin,
 	.unpin = execlists_context_unpin,
 
+	.enter = intel_context_enter_engine,
+	.exit = intel_context_exit_engine,
+
 	.reset = execlists_context_reset,
 	.destroy = execlists_context_destroy,
 };
@@ -1355,7 +1575,8 @@ static int gen8_emit_init_breadcrumb(struct i915_request *rq)
 static int emit_pdps(struct i915_request *rq)
 {
 	const struct intel_engine_cs * const engine = rq->engine;
-	struct i915_hw_ppgtt * const ppgtt = rq->gem_context->ppgtt;
+	struct i915_ppgtt * const ppgtt =
+		i915_vm_to_ppgtt(rq->gem_context->vm);
 	int err, i;
 	u32 *cs;
 
@@ -1428,7 +1649,7 @@ static int execlists_request_alloc(struct i915_request *request)
 	 */
 
 	/* Unconditionally invalidate GPU caches and TLBs. */
-	if (i915_vm_is_4lvl(&request->gem_context->ppgtt->vm))
+	if (i915_vm_is_4lvl(request->gem_context->vm))
 		ret = request->engine->emit_flush(request, EMIT_INVALIDATE);
 	else
 		ret = emit_pdps(request);
@@ -1655,7 +1876,7 @@ static int lrc_setup_wa_ctx(struct intel_engine_cs *engine)
 	struct i915_vma *vma;
 	int err;
 
-	obj = i915_gem_object_create(engine->i915, CTX_WA_BB_OBJ_SIZE);
+	obj = i915_gem_object_create_shmem(engine->i915, CTX_WA_BB_OBJ_SIZE);
 	if (IS_ERR(obj))
 		return PTR_ERR(obj);
 
@@ -1695,8 +1916,8 @@ static int intel_init_workaround_bb(struct intel_engine_cs *engine)
 	unsigned int i;
 	int ret;
 
-	if (GEM_DEBUG_WARN_ON(engine->id != RCS0))
-		return -EINVAL;
+	if (engine->class != RENDER_CLASS)
+		return 0;
 
 	switch (INTEL_GEN(engine->i915)) {
 	case 11:
@@ -1755,31 +1976,30 @@ static int intel_init_workaround_bb(struct intel_engine_cs *engine)
 
 static void enable_execlists(struct intel_engine_cs *engine)
 {
-	struct drm_i915_private *dev_priv = engine->i915;
-
 	intel_engine_set_hwsp_writemask(engine, ~0u); /* HWSTAM */
 
-	if (INTEL_GEN(dev_priv) >= 11)
-		I915_WRITE(RING_MODE_GEN7(engine),
-			   _MASKED_BIT_ENABLE(GEN11_GFX_DISABLE_LEGACY_MODE));
+	if (INTEL_GEN(engine->i915) >= 11)
+		ENGINE_WRITE(engine,
+			     RING_MODE_GEN7,
+			     _MASKED_BIT_ENABLE(GEN11_GFX_DISABLE_LEGACY_MODE));
 	else
-		I915_WRITE(RING_MODE_GEN7(engine),
-			   _MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE));
+		ENGINE_WRITE(engine,
+			     RING_MODE_GEN7,
+			     _MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE));
 
-	I915_WRITE(RING_MI_MODE(engine->mmio_base),
-		   _MASKED_BIT_DISABLE(STOP_RING));
+	ENGINE_WRITE(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING));
 
-	I915_WRITE(RING_HWS_PGA(engine->mmio_base),
-		   i915_ggtt_offset(engine->status_page.vma));
-	POSTING_READ(RING_HWS_PGA(engine->mmio_base));
+	ENGINE_WRITE(engine,
+		     RING_HWS_PGA,
+		     i915_ggtt_offset(engine->status_page.vma));
+	ENGINE_POSTING_READ(engine, RING_HWS_PGA);
 }
 
 static bool unexpected_starting_state(struct intel_engine_cs *engine)
 {
-	struct drm_i915_private *dev_priv = engine->i915;
 	bool unexpected = false;
 
-	if (I915_READ(RING_MI_MODE(engine->mmio_base)) & STOP_RING) {
+	if (ENGINE_READ(engine, RING_MI_MODE) & STOP_RING) {
 		DRM_DEBUG_DRIVER("STOP_RING still set in RING_MI_MODE\n");
 		unexpected = true;
 	}
@@ -1787,7 +2007,7 @@ static bool unexpected_starting_state(struct intel_engine_cs *engine)
 	return unexpected;
 }
 
-static int gen8_init_common_ring(struct intel_engine_cs *engine)
+static int execlists_resume(struct intel_engine_cs *engine)
 {
 	intel_engine_apply_workarounds(engine);
 	intel_engine_apply_whitelist(engine);
@@ -1820,7 +2040,7 @@ static void execlists_reset_prepare(struct intel_engine_cs *engine)
 	 * completed the reset in i915_gem_reset_finish(). If a request
 	 * is completed by one engine, it may then queue a request
 	 * to a second via its execlists->tasklet *just* as we are
-	 * calling engine->init_hw() and also writing the ELSP.
+	 * calling engine->resume() and also writing the ELSP.
 	 * Turning off the execlists->tasklet until the reset is over
 	 * prevents the race.
 	 */
@@ -1830,8 +2050,8 @@ static void execlists_reset_prepare(struct intel_engine_cs *engine)
 	intel_engine_stop_cs(engine);
 
 	/* And flush any current direct submission. */
-	spin_lock_irqsave(&engine->timeline.lock, flags);
-	spin_unlock_irqrestore(&engine->timeline.lock, flags);
+	spin_lock_irqsave(&engine->active.lock, flags);
+	spin_unlock_irqrestore(&engine->active.lock, flags);
 }
 
 static bool lrc_regs_ok(const struct i915_request *rq)
@@ -1872,6 +2092,25 @@ static void reset_csb_pointers(struct intel_engine_execlists *execlists)
 			       &execlists->csb_status[reset_value]);
 }
 
+static struct i915_request *active_request(struct i915_request *rq)
+{
+	const struct list_head * const list = &rq->engine->active.requests;
+	const struct intel_context * const context = rq->hw_context;
+	struct i915_request *active = NULL;
+
+	list_for_each_entry_from_reverse(rq, list, sched.link) {
+		if (i915_request_completed(rq))
+			break;
+
+		if (rq->hw_context != context)
+			break;
+
+		active = rq;
+	}
+
+	return active;
+}
+
 static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
 {
 	struct intel_engine_execlists * const execlists = &engine->execlists;
@@ -1892,7 +2131,8 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
 	if (!port_isset(execlists->port))
 		goto out_clear;
 
-	ce = port_request(execlists->port)->hw_context;
+	rq = port_request(execlists->port);
+	ce = rq->hw_context;
 
 	/*
 	 * Catch up with any missed context-switch interrupts.
@@ -1905,16 +2145,10 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
 	 */
 	execlists_cancel_port_requests(execlists);
 
-	/* Push back any incomplete requests for replay after the reset. */
-	rq = __unwind_incomplete_requests(engine);
+	rq = active_request(rq);
 	if (!rq)
 		goto out_replay;
 
-	if (rq->hw_context != ce) { /* caught just before a CS event */
-		rq = NULL;
-		goto out_replay;
-	}
-
 	/*
 	 * If this request hasn't started yet, e.g. it is waiting on a
 	 * semaphore, we need to avoid skipping the request or else we
@@ -1961,13 +2195,16 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
 	}
 	execlists_init_reg_state(regs, ce, engine, ce->ring);
 
-	/* Rerun the request; its payload has been neutered (if guilty). */
 out_replay:
+	/* Rerun the request; its payload has been neutered (if guilty). */
 	ce->ring->head =
 		rq ? intel_ring_wrap(ce->ring, rq->head) : ce->ring->tail;
 	intel_ring_update_space(ce->ring);
 	__execlists_update_reg_state(ce, engine);
 
+	/* Push back any incomplete requests for replay after the reset. */
+	__unwind_incomplete_requests(engine);
+
 out_clear:
 	execlists_clear_all_active(execlists);
 }
@@ -1978,11 +2215,11 @@ static void execlists_reset(struct intel_engine_cs *engine, bool stalled)
 
 	GEM_TRACE("%s\n", engine->name);
 
-	spin_lock_irqsave(&engine->timeline.lock, flags);
+	spin_lock_irqsave(&engine->active.lock, flags);
 
 	__execlists_reset(engine, stalled);
 
-	spin_unlock_irqrestore(&engine->timeline.lock, flags);
+	spin_unlock_irqrestore(&engine->active.lock, flags);
 }
 
 static void nop_submission_tasklet(unsigned long data)
@@ -2013,12 +2250,12 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
 	 * submission's irq state, we also wish to remind ourselves that
 	 * it is irq state.)
 	 */
-	spin_lock_irqsave(&engine->timeline.lock, flags);
+	spin_lock_irqsave(&engine->active.lock, flags);
 
 	__execlists_reset(engine, true);
 
 	/* Mark all executing requests as skipped. */
-	list_for_each_entry(rq, &engine->timeline.requests, link) {
+	list_for_each_entry(rq, &engine->active.requests, sched.link) {
 		if (!i915_request_signaled(rq))
 			dma_fence_set_error(&rq->fence, -EIO);
 
@@ -2041,6 +2278,26 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
 		i915_priolist_free(p);
 	}
 
+	/* Cancel all attached virtual engines */
+	while ((rb = rb_first_cached(&execlists->virtual))) {
+		struct virtual_engine *ve =
+			rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
+
+		rb_erase_cached(rb, &execlists->virtual);
+		RB_CLEAR_NODE(rb);
+
+		spin_lock(&ve->base.active.lock);
+		if (ve->request) {
+			ve->request->engine = engine;
+			__i915_request_submit(ve->request);
+			dma_fence_set_error(&ve->request->fence, -EIO);
+			i915_request_mark_complete(ve->request);
+			ve->base.execlists.queue_priority_hint = INT_MIN;
+			ve->request = NULL;
+		}
+		spin_unlock(&ve->base.active.lock);
+	}
+
 	/* Remaining _unready_ requests will be nop'ed when submitted */
 
 	execlists->queue_priority_hint = INT_MIN;
@@ -2050,7 +2307,7 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
 	GEM_BUG_ON(__tasklet_is_enabled(&execlists->tasklet));
 	execlists->tasklet.func = nop_submission_tasklet;
 
-	spin_unlock_irqrestore(&engine->timeline.lock, flags);
+	spin_unlock_irqrestore(&engine->active.lock, flags);
 }
 
 static void execlists_reset_finish(struct intel_engine_cs *engine)
@@ -2270,12 +2527,6 @@ static u32 *gen8_emit_fini_breadcrumb(struct i915_request *request, u32 *cs)
 				  request->timeline->hwsp_offset,
 				  0);
 
-	cs = gen8_emit_ggtt_write(cs,
-				  intel_engine_next_hangcheck_seqno(request->engine),
-				  I915_GEM_HWS_HANGCHECK_ADDR,
-				  MI_FLUSH_DW_STORE_INDEX);
-
-
 	*cs++ = MI_USER_INTERRUPT;
 	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
 
@@ -2287,19 +2538,17 @@ static u32 *gen8_emit_fini_breadcrumb(struct i915_request *request, u32 *cs)
 
 static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
 {
+	/* XXX flush+write+CS_STALL all in one upsets gem_concurrent_blt:kbl */
 	cs = gen8_emit_ggtt_write_rcs(cs,
 				      request->fence.seqno,
 				      request->timeline->hwsp_offset,
 				      PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
 				      PIPE_CONTROL_DEPTH_CACHE_FLUSH |
-				      PIPE_CONTROL_DC_FLUSH_ENABLE |
-				      PIPE_CONTROL_FLUSH_ENABLE |
-				      PIPE_CONTROL_CS_STALL);
-
-	cs = gen8_emit_ggtt_write_rcs(cs,
-				      intel_engine_next_hangcheck_seqno(request->engine),
-				      I915_GEM_HWS_HANGCHECK_ADDR,
-				      PIPE_CONTROL_STORE_DATA_INDEX);
+				      PIPE_CONTROL_DC_FLUSH_ENABLE);
+	cs = gen8_emit_pipe_control(cs,
+				    PIPE_CONTROL_FLUSH_ENABLE |
+				    PIPE_CONTROL_CS_STALL,
+				    0);
 
 	*cs++ = MI_USER_INTERRUPT;
 	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
@@ -2329,38 +2578,9 @@ static int gen8_init_rcs_context(struct i915_request *rq)
 	return i915_gem_render_state_emit(rq);
 }
 
-/**
- * intel_logical_ring_cleanup() - deallocate the Engine Command Streamer
- * @engine: Engine Command Streamer.
- */
-void intel_logical_ring_cleanup(struct intel_engine_cs *engine)
+static void execlists_park(struct intel_engine_cs *engine)
 {
-	struct drm_i915_private *dev_priv;
-
-	/*
-	 * Tasklet cannot be active at this point due intel_mark_active/idle
-	 * so this is just for documentation.
-	 */
-	if (WARN_ON(test_bit(TASKLET_STATE_SCHED,
-			     &engine->execlists.tasklet.state)))
-		tasklet_kill(&engine->execlists.tasklet);
-
-	dev_priv = engine->i915;
-
-	if (engine->buffer) {
-		WARN_ON((ENGINE_READ(engine, RING_MI_MODE) & MODE_IDLE) == 0);
-	}
-
-	if (engine->cleanup)
-		engine->cleanup(engine);
-
-	intel_engine_cleanup_common(engine);
-
-	lrc_destroy_wa_ctx(engine);
-
-	engine->i915 = NULL;
-	dev_priv->engine[engine->id] = NULL;
-	kfree(engine);
+	intel_engine_park(engine);
 }
 
 void intel_execlists_set_default_submission(struct intel_engine_cs *engine)
@@ -2374,7 +2594,7 @@ void intel_execlists_set_default_submission(struct intel_engine_cs *engine)
 	engine->reset.reset = execlists_reset;
 	engine->reset.finish = execlists_reset_finish;
 
-	engine->park = NULL;
+	engine->park = execlists_park;
 	engine->unpark = NULL;
 
 	engine->flags |= I915_ENGINE_SUPPORTS_STATS;
@@ -2385,11 +2605,20 @@ void intel_execlists_set_default_submission(struct intel_engine_cs *engine)
 		engine->flags |= I915_ENGINE_HAS_PREEMPTION;
 }
 
+static void execlists_destroy(struct intel_engine_cs *engine)
+{
+	intel_engine_cleanup_common(engine);
+	lrc_destroy_wa_ctx(engine);
+	kfree(engine);
+}
+
 static void
 logical_ring_default_vfuncs(struct intel_engine_cs *engine)
 {
 	/* Default vfuncs which can be overriden by each engine. */
-	engine->init_hw = gen8_init_common_ring;
+
+	engine->destroy = execlists_destroy;
+	engine->resume = execlists_resume;
 
 	engine->reset.prepare = execlists_reset_prepare;
 	engine->reset.reset = execlists_reset;
@@ -2442,15 +2671,8 @@ logical_ring_default_irqs(struct intel_engine_cs *engine)
 	engine->irq_keep_mask = GT_CONTEXT_SWITCH_INTERRUPT << shift;
 }
 
-static int
-logical_ring_setup(struct intel_engine_cs *engine)
+int intel_execlists_submission_setup(struct intel_engine_cs *engine)
 {
-	int err;
-
-	err = intel_engine_setup_common(engine);
-	if (err)
-		return err;
-
 	/* Intentionally left blank. */
 	engine->buffer = NULL;
 
@@ -2460,13 +2682,20 @@ logical_ring_setup(struct intel_engine_cs *engine)
 	logical_ring_default_vfuncs(engine);
 	logical_ring_default_irqs(engine);
 
+	if (engine->class == RENDER_CLASS) {
+		engine->init_context = gen8_init_rcs_context;
+		engine->emit_flush = gen8_emit_flush_render;
+		engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs;
+	}
+
 	return 0;
 }
 
-static int logical_ring_init(struct intel_engine_cs *engine)
+int intel_execlists_submission_init(struct intel_engine_cs *engine)
 {
-	struct drm_i915_private *i915 = engine->i915;
 	struct intel_engine_execlists * const execlists = &engine->execlists;
+	struct drm_i915_private *i915 = engine->i915;
+	struct intel_uncore *uncore = engine->uncore;
 	u32 base = engine->mmio_base;
 	int ret;
 
@@ -2475,14 +2704,23 @@ static int logical_ring_init(struct intel_engine_cs *engine)
 		return ret;
 
 	intel_engine_init_workarounds(engine);
+	intel_engine_init_whitelist(engine);
+
+	if (intel_init_workaround_bb(engine))
+		/*
+		 * We continue even if we fail to initialize WA batch
+		 * because we only expect rare glitches but nothing
+		 * critical to prevent us from using GPU
+		 */
+		DRM_ERROR("WA batch buffer initialization failed\n");
 
 	if (HAS_LOGICAL_RING_ELSQ(i915)) {
-		execlists->submit_reg = i915->uncore.regs +
+		execlists->submit_reg = uncore->regs +
 			i915_mmio_reg_offset(RING_EXECLIST_SQ_CONTENTS(base));
-		execlists->ctrl_reg = i915->uncore.regs +
+		execlists->ctrl_reg = uncore->regs +
 			i915_mmio_reg_offset(RING_EXECLIST_CONTROL(base));
 	} else {
-		execlists->submit_reg = i915->uncore.regs +
+		execlists->submit_reg = uncore->regs +
 			i915_mmio_reg_offset(RING_ELSP(base));
 	}
 
@@ -2497,7 +2735,7 @@ static int logical_ring_init(struct intel_engine_cs *engine)
 	execlists->csb_write =
 		&engine->status_page.addr[intel_hws_csb_write_index(i915)];
 
-	if (INTEL_GEN(engine->i915) < 11)
+	if (INTEL_GEN(i915) < 11)
 		execlists->csb_size = GEN8_CSB_ENTRIES;
 	else
 		execlists->csb_size = GEN11_CSB_ENTRIES;
@@ -2507,182 +2745,6 @@ static int logical_ring_init(struct intel_engine_cs *engine)
 	return 0;
 }
 
-int logical_render_ring_init(struct intel_engine_cs *engine)
-{
-	int ret;
-
-	ret = logical_ring_setup(engine);
-	if (ret)
-		return ret;
-
-	/* Override some for render ring. */
-	engine->init_context = gen8_init_rcs_context;
-	engine->emit_flush = gen8_emit_flush_render;
-	engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs;
-
-	ret = logical_ring_init(engine);
-	if (ret)
-		return ret;
-
-	ret = intel_init_workaround_bb(engine);
-	if (ret) {
-		/*
-		 * We continue even if we fail to initialize WA batch
-		 * because we only expect rare glitches but nothing
-		 * critical to prevent us from using GPU
-		 */
-		DRM_ERROR("WA batch buffer initialization failed: %d\n",
-			  ret);
-	}
-
-	intel_engine_init_whitelist(engine);
-
-	return 0;
-}
-
-int logical_xcs_ring_init(struct intel_engine_cs *engine)
-{
-	int err;
-
-	err = logical_ring_setup(engine);
-	if (err)
-		return err;
-
-	return logical_ring_init(engine);
-}
-
-u32 gen8_make_rpcs(struct drm_i915_private *i915, struct intel_sseu *req_sseu)
-{
-	const struct sseu_dev_info *sseu = &RUNTIME_INFO(i915)->sseu;
-	bool subslice_pg = sseu->has_subslice_pg;
-	struct intel_sseu ctx_sseu;
-	u8 slices, subslices;
-	u32 rpcs = 0;
-
-	/*
-	 * No explicit RPCS request is needed to ensure full
-	 * slice/subslice/EU enablement prior to Gen9.
-	*/
-	if (INTEL_GEN(i915) < 9)
-		return 0;
-
-	/*
-	 * If i915/perf is active, we want a stable powergating configuration
-	 * on the system.
-	 *
-	 * We could choose full enablement, but on ICL we know there are use
-	 * cases which disable slices for functional, apart for performance
-	 * reasons. So in this case we select a known stable subset.
-	 */
-	if (!i915->perf.oa.exclusive_stream) {
-		ctx_sseu = *req_sseu;
-	} else {
-		ctx_sseu = intel_device_default_sseu(i915);
-
-		if (IS_GEN(i915, 11)) {
-			/*
-			 * We only need subslice count so it doesn't matter
-			 * which ones we select - just turn off low bits in the
-			 * amount of half of all available subslices per slice.
-			 */
-			ctx_sseu.subslice_mask =
-				~(~0 << (hweight8(ctx_sseu.subslice_mask) / 2));
-			ctx_sseu.slice_mask = 0x1;
-		}
-	}
-
-	slices = hweight8(ctx_sseu.slice_mask);
-	subslices = hweight8(ctx_sseu.subslice_mask);
-
-	/*
-	 * Since the SScount bitfield in GEN8_R_PWR_CLK_STATE is only three bits
-	 * wide and Icelake has up to eight subslices, specfial programming is
-	 * needed in order to correctly enable all subslices.
-	 *
-	 * According to documentation software must consider the configuration
-	 * as 2x4x8 and hardware will translate this to 1x8x8.
-	 *
-	 * Furthemore, even though SScount is three bits, maximum documented
-	 * value for it is four. From this some rules/restrictions follow:
-	 *
-	 * 1.
-	 * If enabled subslice count is greater than four, two whole slices must
-	 * be enabled instead.
-	 *
-	 * 2.
-	 * When more than one slice is enabled, hardware ignores the subslice
-	 * count altogether.
-	 *
-	 * From these restrictions it follows that it is not possible to enable
-	 * a count of subslices between the SScount maximum of four restriction,
-	 * and the maximum available number on a particular SKU. Either all
-	 * subslices are enabled, or a count between one and four on the first
-	 * slice.
-	 */
-	if (IS_GEN(i915, 11) &&
-	    slices == 1 &&
-	    subslices > min_t(u8, 4, hweight8(sseu->subslice_mask[0]) / 2)) {
-		GEM_BUG_ON(subslices & 1);
-
-		subslice_pg = false;
-		slices *= 2;
-	}
-
-	/*
-	 * Starting in Gen9, render power gating can leave
-	 * slice/subslice/EU in a partially enabled state. We
-	 * must make an explicit request through RPCS for full
-	 * enablement.
-	*/
-	if (sseu->has_slice_pg) {
-		u32 mask, val = slices;
-
-		if (INTEL_GEN(i915) >= 11) {
-			mask = GEN11_RPCS_S_CNT_MASK;
-			val <<= GEN11_RPCS_S_CNT_SHIFT;
-		} else {
-			mask = GEN8_RPCS_S_CNT_MASK;
-			val <<= GEN8_RPCS_S_CNT_SHIFT;
-		}
-
-		GEM_BUG_ON(val & ~mask);
-		val &= mask;
-
-		rpcs |= GEN8_RPCS_ENABLE | GEN8_RPCS_S_CNT_ENABLE | val;
-	}
-
-	if (subslice_pg) {
-		u32 val = subslices;
-
-		val <<= GEN8_RPCS_SS_CNT_SHIFT;
-
-		GEM_BUG_ON(val & ~GEN8_RPCS_SS_CNT_MASK);
-		val &= GEN8_RPCS_SS_CNT_MASK;
-
-		rpcs |= GEN8_RPCS_ENABLE | GEN8_RPCS_SS_CNT_ENABLE | val;
-	}
-
-	if (sseu->has_eu_pg) {
-		u32 val;
-
-		val = ctx_sseu.min_eus_per_subslice << GEN8_RPCS_EU_MIN_SHIFT;
-		GEM_BUG_ON(val & ~GEN8_RPCS_EU_MIN_MASK);
-		val &= GEN8_RPCS_EU_MIN_MASK;
-
-		rpcs |= val;
-
-		val = ctx_sseu.max_eus_per_subslice << GEN8_RPCS_EU_MAX_SHIFT;
-		GEM_BUG_ON(val & ~GEN8_RPCS_EU_MAX_MASK);
-		val &= GEN8_RPCS_EU_MAX_MASK;
-
-		rpcs |= val;
-
-		rpcs |= GEN8_RPCS_ENABLE;
-	}
-
-	return rpcs;
-}
-
 static u32 intel_lr_indirect_ctx_offset(struct intel_engine_cs *engine)
 {
 	u32 indirect_ctx_offset;
@@ -2717,16 +2779,19 @@ static void execlists_init_reg_state(u32 *regs,
 				     struct intel_engine_cs *engine,
 				     struct intel_ring *ring)
 {
-	struct i915_hw_ppgtt *ppgtt = ce->gem_context->ppgtt;
+	struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(ce->gem_context->vm);
 	bool rcs = engine->class == RENDER_CLASS;
 	u32 base = engine->mmio_base;
 
-	/* A context is actually a big batch buffer with several
+	/*
+	 * A context is actually a big batch buffer with several
 	 * MI_LOAD_REGISTER_IMM commands followed by (reg, value) pairs. The
 	 * values we are setting here are only for the first context restore:
 	 * on a subsequent save, the GPU will recreate this batchbuffer with new
 	 * values (including all the missing MI_LOAD_REGISTER_IMM commands that
 	 * we are not initializing here).
+	 *
+	 * Must keep consistent with virtual_update_register_offsets().
 	 */
 	regs[CTX_LRI_HEADER_0] = MI_LOAD_REGISTER_IMM(rcs ? 14 : 11) |
 				 MI_LRI_FORCE_POSTED;
@@ -2902,7 +2967,7 @@ static int execlists_context_deferred_alloc(struct intel_context *ce,
 	 */
 	context_size += LRC_HEADER_PAGES * PAGE_SIZE;
 
-	ctx_obj = i915_gem_object_create(engine->i915, context_size);
+	ctx_obj = i915_gem_object_create_shmem(engine->i915, context_size);
 	if (IS_ERR(ctx_obj))
 		return PTR_ERR(ctx_obj);
 
@@ -2945,6 +3010,468 @@ error_deref_obj:
 	return ret;
 }
 
+static struct list_head *virtual_queue(struct virtual_engine *ve)
+{
+	return &ve->base.execlists.default_priolist.requests[0];
+}
+
+static void virtual_context_destroy(struct kref *kref)
+{
+	struct virtual_engine *ve =
+		container_of(kref, typeof(*ve), context.ref);
+	unsigned int n;
+
+	GEM_BUG_ON(!list_empty(virtual_queue(ve)));
+	GEM_BUG_ON(ve->request);
+	GEM_BUG_ON(ve->context.inflight);
+
+	for (n = 0; n < ve->num_siblings; n++) {
+		struct intel_engine_cs *sibling = ve->siblings[n];
+		struct rb_node *node = &ve->nodes[sibling->id].rb;
+
+		if (RB_EMPTY_NODE(node))
+			continue;
+
+		spin_lock_irq(&sibling->active.lock);
+
+		/* Detachment is lazily performed in the execlists tasklet */
+		if (!RB_EMPTY_NODE(node))
+			rb_erase_cached(node, &sibling->execlists.virtual);
+
+		spin_unlock_irq(&sibling->active.lock);
+	}
+	GEM_BUG_ON(__tasklet_is_scheduled(&ve->base.execlists.tasklet));
+
+	if (ve->context.state)
+		__execlists_context_fini(&ve->context);
+
+	kfree(ve->bonds);
+	kfree(ve);
+}
+
+static void virtual_engine_initial_hint(struct virtual_engine *ve)
+{
+	int swp;
+
+	/*
+	 * Pick a random sibling on starting to help spread the load around.
+	 *
+	 * New contexts are typically created with exactly the same order
+	 * of siblings, and often started in batches. Due to the way we iterate
+	 * the array of sibling when submitting requests, sibling[0] is
+	 * prioritised for dequeuing. If we make sure that sibling[0] is fairly
+	 * randomised across the system, we also help spread the load by the
+	 * first engine we inspect being different each time.
+	 *
+	 * NB This does not force us to execute on this engine, it will just
+	 * typically be the first we inspect for submission.
+	 */
+	swp = prandom_u32_max(ve->num_siblings);
+	if (!swp)
+		return;
+
+	swap(ve->siblings[swp], ve->siblings[0]);
+	virtual_update_register_offsets(ve->context.lrc_reg_state,
+					ve->siblings[0]);
+}
+
+static int virtual_context_pin(struct intel_context *ce)
+{
+	struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
+	int err;
+
+	/* Note: we must use a real engine class for setting up reg state */
+	err = __execlists_context_pin(ce, ve->siblings[0]);
+	if (err)
+		return err;
+
+	virtual_engine_initial_hint(ve);
+	return 0;
+}
+
+static void virtual_context_enter(struct intel_context *ce)
+{
+	struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
+	unsigned int n;
+
+	for (n = 0; n < ve->num_siblings; n++)
+		intel_engine_pm_get(ve->siblings[n]);
+}
+
+static void virtual_context_exit(struct intel_context *ce)
+{
+	struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
+	unsigned int n;
+
+	for (n = 0; n < ve->num_siblings; n++)
+		intel_engine_pm_put(ve->siblings[n]);
+}
+
+static const struct intel_context_ops virtual_context_ops = {
+	.pin = virtual_context_pin,
+	.unpin = execlists_context_unpin,
+
+	.enter = virtual_context_enter,
+	.exit = virtual_context_exit,
+
+	.destroy = virtual_context_destroy,
+};
+
+static intel_engine_mask_t virtual_submission_mask(struct virtual_engine *ve)
+{
+	struct i915_request *rq;
+	intel_engine_mask_t mask;
+
+	rq = READ_ONCE(ve->request);
+	if (!rq)
+		return 0;
+
+	/* The rq is ready for submission; rq->execution_mask is now stable. */
+	mask = rq->execution_mask;
+	if (unlikely(!mask)) {
+		/* Invalid selection, submit to a random engine in error */
+		i915_request_skip(rq, -ENODEV);
+		mask = ve->siblings[0]->mask;
+	}
+
+	GEM_TRACE("%s: rq=%llx:%lld, mask=%x, prio=%d\n",
+		  ve->base.name,
+		  rq->fence.context, rq->fence.seqno,
+		  mask, ve->base.execlists.queue_priority_hint);
+
+	return mask;
+}
+
+static void virtual_submission_tasklet(unsigned long data)
+{
+	struct virtual_engine * const ve = (struct virtual_engine *)data;
+	const int prio = ve->base.execlists.queue_priority_hint;
+	intel_engine_mask_t mask;
+	unsigned int n;
+
+	rcu_read_lock();
+	mask = virtual_submission_mask(ve);
+	rcu_read_unlock();
+	if (unlikely(!mask))
+		return;
+
+	local_irq_disable();
+	for (n = 0; READ_ONCE(ve->request) && n < ve->num_siblings; n++) {
+		struct intel_engine_cs *sibling = ve->siblings[n];
+		struct ve_node * const node = &ve->nodes[sibling->id];
+		struct rb_node **parent, *rb;
+		bool first;
+
+		if (unlikely(!(mask & sibling->mask))) {
+			if (!RB_EMPTY_NODE(&node->rb)) {
+				spin_lock(&sibling->active.lock);
+				rb_erase_cached(&node->rb,
+						&sibling->execlists.virtual);
+				RB_CLEAR_NODE(&node->rb);
+				spin_unlock(&sibling->active.lock);
+			}
+			continue;
+		}
+
+		spin_lock(&sibling->active.lock);
+
+		if (!RB_EMPTY_NODE(&node->rb)) {
+			/*
+			 * Cheat and avoid rebalancing the tree if we can
+			 * reuse this node in situ.
+			 */
+			first = rb_first_cached(&sibling->execlists.virtual) ==
+				&node->rb;
+			if (prio == node->prio || (prio > node->prio && first))
+				goto submit_engine;
+
+			rb_erase_cached(&node->rb, &sibling->execlists.virtual);
+		}
+
+		rb = NULL;
+		first = true;
+		parent = &sibling->execlists.virtual.rb_root.rb_node;
+		while (*parent) {
+			struct ve_node *other;
+
+			rb = *parent;
+			other = rb_entry(rb, typeof(*other), rb);
+			if (prio > other->prio) {
+				parent = &rb->rb_left;
+			} else {
+				parent = &rb->rb_right;
+				first = false;
+			}
+		}
+
+		rb_link_node(&node->rb, rb, parent);
+		rb_insert_color_cached(&node->rb,
+				       &sibling->execlists.virtual,
+				       first);
+
+submit_engine:
+		GEM_BUG_ON(RB_EMPTY_NODE(&node->rb));
+		node->prio = prio;
+		if (first && prio > sibling->execlists.queue_priority_hint) {
+			sibling->execlists.queue_priority_hint = prio;
+			tasklet_hi_schedule(&sibling->execlists.tasklet);
+		}
+
+		spin_unlock(&sibling->active.lock);
+	}
+	local_irq_enable();
+}
+
+static void virtual_submit_request(struct i915_request *rq)
+{
+	struct virtual_engine *ve = to_virtual_engine(rq->engine);
+
+	GEM_TRACE("%s: rq=%llx:%lld\n",
+		  ve->base.name,
+		  rq->fence.context,
+		  rq->fence.seqno);
+
+	GEM_BUG_ON(ve->base.submit_request != virtual_submit_request);
+
+	GEM_BUG_ON(ve->request);
+	GEM_BUG_ON(!list_empty(virtual_queue(ve)));
+
+	ve->base.execlists.queue_priority_hint = rq_prio(rq);
+	WRITE_ONCE(ve->request, rq);
+
+	list_move_tail(&rq->sched.link, virtual_queue(ve));
+
+	tasklet_schedule(&ve->base.execlists.tasklet);
+}
+
+static struct ve_bond *
+virtual_find_bond(struct virtual_engine *ve,
+		  const struct intel_engine_cs *master)
+{
+	int i;
+
+	for (i = 0; i < ve->num_bonds; i++) {
+		if (ve->bonds[i].master == master)
+			return &ve->bonds[i];
+	}
+
+	return NULL;
+}
+
+static void
+virtual_bond_execute(struct i915_request *rq, struct dma_fence *signal)
+{
+	struct virtual_engine *ve = to_virtual_engine(rq->engine);
+	struct ve_bond *bond;
+
+	bond = virtual_find_bond(ve, to_request(signal)->engine);
+	if (bond) {
+		intel_engine_mask_t old, new, cmp;
+
+		cmp = READ_ONCE(rq->execution_mask);
+		do {
+			old = cmp;
+			new = cmp & bond->sibling_mask;
+		} while ((cmp = cmpxchg(&rq->execution_mask, old, new)) != old);
+	}
+}
+
+struct intel_context *
+intel_execlists_create_virtual(struct i915_gem_context *ctx,
+			       struct intel_engine_cs **siblings,
+			       unsigned int count)
+{
+	struct virtual_engine *ve;
+	unsigned int n;
+	int err;
+
+	if (count == 0)
+		return ERR_PTR(-EINVAL);
+
+	if (count == 1)
+		return intel_context_create(ctx, siblings[0]);
+
+	ve = kzalloc(struct_size(ve, siblings, count), GFP_KERNEL);
+	if (!ve)
+		return ERR_PTR(-ENOMEM);
+
+	ve->base.i915 = ctx->i915;
+	ve->base.id = -1;
+	ve->base.class = OTHER_CLASS;
+	ve->base.uabi_class = I915_ENGINE_CLASS_INVALID;
+	ve->base.instance = I915_ENGINE_CLASS_INVALID_VIRTUAL;
+	ve->base.flags = I915_ENGINE_IS_VIRTUAL;
+
+	/*
+	 * The decision on whether to submit a request using semaphores
+	 * depends on the saturated state of the engine. We only compute
+	 * this during HW submission of the request, and we need for this
+	 * state to be globally applied to all requests being submitted
+	 * to this engine. Virtual engines encompass more than one physical
+	 * engine and so we cannot accurately tell in advance if one of those
+	 * engines is already saturated and so cannot afford to use a semaphore
+	 * and be pessimized in priority for doing so -- if we are the only
+	 * context using semaphores after all other clients have stopped, we
+	 * will be starved on the saturated system. Such a global switch for
+	 * semaphores is less than ideal, but alas is the current compromise.
+	 */
+	ve->base.saturated = ALL_ENGINES;
+
+	snprintf(ve->base.name, sizeof(ve->base.name), "virtual");
+
+	intel_engine_init_active(&ve->base, ENGINE_VIRTUAL);
+
+	intel_engine_init_execlists(&ve->base);
+
+	ve->base.cops = &virtual_context_ops;
+	ve->base.request_alloc = execlists_request_alloc;
+
+	ve->base.schedule = i915_schedule;
+	ve->base.submit_request = virtual_submit_request;
+	ve->base.bond_execute = virtual_bond_execute;
+
+	INIT_LIST_HEAD(virtual_queue(ve));
+	ve->base.execlists.queue_priority_hint = INT_MIN;
+	tasklet_init(&ve->base.execlists.tasklet,
+		     virtual_submission_tasklet,
+		     (unsigned long)ve);
+
+	intel_context_init(&ve->context, ctx, &ve->base);
+
+	for (n = 0; n < count; n++) {
+		struct intel_engine_cs *sibling = siblings[n];
+
+		GEM_BUG_ON(!is_power_of_2(sibling->mask));
+		if (sibling->mask & ve->base.mask) {
+			DRM_DEBUG("duplicate %s entry in load balancer\n",
+				  sibling->name);
+			err = -EINVAL;
+			goto err_put;
+		}
+
+		/*
+		 * The virtual engine implementation is tightly coupled to
+		 * the execlists backend -- we push out request directly
+		 * into a tree inside each physical engine. We could support
+		 * layering if we handle cloning of the requests and
+		 * submitting a copy into each backend.
+		 */
+		if (sibling->execlists.tasklet.func !=
+		    execlists_submission_tasklet) {
+			err = -ENODEV;
+			goto err_put;
+		}
+
+		GEM_BUG_ON(RB_EMPTY_NODE(&ve->nodes[sibling->id].rb));
+		RB_CLEAR_NODE(&ve->nodes[sibling->id].rb);
+
+		ve->siblings[ve->num_siblings++] = sibling;
+		ve->base.mask |= sibling->mask;
+
+		/*
+		 * All physical engines must be compatible for their emission
+		 * functions (as we build the instructions during request
+		 * construction and do not alter them before submission
+		 * on the physical engine). We use the engine class as a guide
+		 * here, although that could be refined.
+		 */
+		if (ve->base.class != OTHER_CLASS) {
+			if (ve->base.class != sibling->class) {
+				DRM_DEBUG("invalid mixing of engine class, sibling %d, already %d\n",
+					  sibling->class, ve->base.class);
+				err = -EINVAL;
+				goto err_put;
+			}
+			continue;
+		}
+
+		ve->base.class = sibling->class;
+		ve->base.uabi_class = sibling->uabi_class;
+		snprintf(ve->base.name, sizeof(ve->base.name),
+			 "v%dx%d", ve->base.class, count);
+		ve->base.context_size = sibling->context_size;
+
+		ve->base.emit_bb_start = sibling->emit_bb_start;
+		ve->base.emit_flush = sibling->emit_flush;
+		ve->base.emit_init_breadcrumb = sibling->emit_init_breadcrumb;
+		ve->base.emit_fini_breadcrumb = sibling->emit_fini_breadcrumb;
+		ve->base.emit_fini_breadcrumb_dw =
+			sibling->emit_fini_breadcrumb_dw;
+	}
+
+	return &ve->context;
+
+err_put:
+	intel_context_put(&ve->context);
+	return ERR_PTR(err);
+}
+
+struct intel_context *
+intel_execlists_clone_virtual(struct i915_gem_context *ctx,
+			      struct intel_engine_cs *src)
+{
+	struct virtual_engine *se = to_virtual_engine(src);
+	struct intel_context *dst;
+
+	dst = intel_execlists_create_virtual(ctx,
+					     se->siblings,
+					     se->num_siblings);
+	if (IS_ERR(dst))
+		return dst;
+
+	if (se->num_bonds) {
+		struct virtual_engine *de = to_virtual_engine(dst->engine);
+
+		de->bonds = kmemdup(se->bonds,
+				    sizeof(*se->bonds) * se->num_bonds,
+				    GFP_KERNEL);
+		if (!de->bonds) {
+			intel_context_put(dst);
+			return ERR_PTR(-ENOMEM);
+		}
+
+		de->num_bonds = se->num_bonds;
+	}
+
+	return dst;
+}
+
+int intel_virtual_engine_attach_bond(struct intel_engine_cs *engine,
+				     const struct intel_engine_cs *master,
+				     const struct intel_engine_cs *sibling)
+{
+	struct virtual_engine *ve = to_virtual_engine(engine);
+	struct ve_bond *bond;
+	int n;
+
+	/* Sanity check the sibling is part of the virtual engine */
+	for (n = 0; n < ve->num_siblings; n++)
+		if (sibling == ve->siblings[n])
+			break;
+	if (n == ve->num_siblings)
+		return -EINVAL;
+
+	bond = virtual_find_bond(ve, master);
+	if (bond) {
+		bond->sibling_mask |= sibling->mask;
+		return 0;
+	}
+
+	bond = krealloc(ve->bonds,
+			sizeof(*bond) * (ve->num_bonds + 1),
+			GFP_KERNEL);
+	if (!bond)
+		return -ENOMEM;
+
+	bond[ve->num_bonds].master = master;
+	bond[ve->num_bonds].sibling_mask = sibling->mask;
+
+	ve->bonds = bond;
+	ve->num_bonds++;
+
+	return 0;
+}
+
 void intel_execlists_show_requests(struct intel_engine_cs *engine,
 				   struct drm_printer *m,
 				   void (*show_request)(struct drm_printer *m,
@@ -2958,11 +3485,11 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine,
 	unsigned int count;
 	struct rb_node *rb;
 
-	spin_lock_irqsave(&engine->timeline.lock, flags);
+	spin_lock_irqsave(&engine->active.lock, flags);
 
 	last = NULL;
 	count = 0;
-	list_for_each_entry(rq, &engine->timeline.requests, link) {
+	list_for_each_entry(rq, &engine->active.requests, sched.link) {
 		if (count++ < max - 1)
 			show_request(m, rq, "\t\tE ");
 		else
@@ -3002,7 +3529,30 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine,
 		show_request(m, last, "\t\tQ ");
 	}
 
-	spin_unlock_irqrestore(&engine->timeline.lock, flags);
+	last = NULL;
+	count = 0;
+	for (rb = rb_first_cached(&execlists->virtual); rb; rb = rb_next(rb)) {
+		struct virtual_engine *ve =
+			rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
+		struct i915_request *rq = READ_ONCE(ve->request);
+
+		if (rq) {
+			if (count++ < max - 1)
+				show_request(m, rq, "\t\tV ");
+			else
+				last = rq;
+		}
+	}
+	if (last) {
+		if (count > max) {
+			drm_printf(m,
+				   "\t\t...skipping %d virtual requests...\n",
+				   count - max);
+		}
+		show_request(m, last, "\t\tV ");
+	}
+
+	spin_unlock_irqrestore(&engine->active.lock, flags);
 }
 
 void intel_lr_context_reset(struct intel_engine_cs *engine,
@@ -3037,5 +3587,5 @@ void intel_lr_context_reset(struct intel_engine_cs *engine,
 }
 
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
-#include "selftests/intel_lrc.c"
+#include "selftest_lrc.c"
 #endif
diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/gt/intel_lrc.h
index 84aa230ea27b..c2bba82bcc16 100644
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.h
@@ -24,8 +24,15 @@
 #ifndef _INTEL_LRC_H_
 #define _INTEL_LRC_H_
 
-#include "intel_ringbuffer.h"
-#include "i915_gem_context.h"
+#include <linux/types.h>
+
+struct drm_printer;
+
+struct drm_i915_private;
+struct i915_gem_context;
+struct i915_request;
+struct intel_context;
+struct intel_engine_cs;
 
 /* Execlists regs */
 #define RING_ELSP(base)				_MMIO((base) + 0x230)
@@ -67,8 +74,9 @@ enum {
 
 /* Logical Rings */
 void intel_logical_ring_cleanup(struct intel_engine_cs *engine);
-int logical_render_ring_init(struct intel_engine_cs *engine);
-int logical_xcs_ring_init(struct intel_engine_cs *engine);
+
+int intel_execlists_submission_setup(struct intel_engine_cs *engine);
+int intel_execlists_submission_init(struct intel_engine_cs *engine);
 
 /* Logical Ring Contexts */
 
@@ -96,11 +104,6 @@ int logical_xcs_ring_init(struct intel_engine_cs *engine);
  */
 #define LRC_HEADER_PAGES LRC_PPHWSP_PN
 
-struct drm_printer;
-
-struct drm_i915_private;
-struct i915_gem_context;
-
 void intel_execlists_set_default_submission(struct intel_engine_cs *engine);
 
 void intel_lr_context_reset(struct intel_engine_cs *engine,
@@ -115,6 +118,17 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine,
 							const char *prefix),
 				   unsigned int max);
 
-u32 gen8_make_rpcs(struct drm_i915_private *i915, struct intel_sseu *ctx_sseu);
+struct intel_context *
+intel_execlists_create_virtual(struct i915_gem_context *ctx,
+			       struct intel_engine_cs **siblings,
+			       unsigned int count);
+
+struct intel_context *
+intel_execlists_clone_virtual(struct i915_gem_context *ctx,
+			      struct intel_engine_cs *src);
+
+int intel_virtual_engine_attach_bond(struct intel_engine_cs *engine,
+				     const struct intel_engine_cs *master,
+				     const struct intel_engine_cs *sibling);
 
 #endif /* _INTEL_LRC_H_ */
diff --git a/drivers/gpu/drm/i915/intel_lrc_reg.h b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
index 5ef932d810a7..6bf34738b4e5 100644
--- a/drivers/gpu/drm/i915/intel_lrc_reg.h
+++ b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
@@ -55,7 +55,7 @@
 
 #define ASSIGN_CTX_PML4(ppgtt, reg_state) do { \
 	u32 *reg_state__ = (reg_state); \
-	const u64 addr__ = px_dma(&ppgtt->pml4); \
+	const u64 addr__ = px_dma(ppgtt->pd); \
 	(reg_state__)[CTX_PDP0_UDW + 1] = upper_32_bits(addr__); \
 	(reg_state__)[CTX_PDP0_LDW + 1] = lower_32_bits(addr__); \
 } while (0)
diff --git a/drivers/gpu/drm/i915/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c
index 274ba78500c0..1f9db50b1869 100644
--- a/drivers/gpu/drm/i915/intel_mocs.c
+++ b/drivers/gpu/drm/i915/gt/intel_mocs.c
@@ -20,9 +20,11 @@
  * SOFTWARE.
  */
 
+#include "i915_drv.h"
+
+#include "intel_engine.h"
 #include "intel_mocs.h"
 #include "intel_lrc.h"
-#include "intel_ringbuffer.h"
 
 /* structures required */
 struct drm_i915_mocs_entry {
@@ -198,6 +200,14 @@ static const struct drm_i915_mocs_entry broxton_mocs_table[] = {
 	MOCS_ENTRY(15, \
 		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(2) | LE_AOM(1), \
 		   L3_3_WB), \
+	/* Bypass LLC - Uncached (EHL+) */ \
+	MOCS_ENTRY(16, \
+		   LE_1_UC | LE_TC_1_LLC | LE_SCF(1), \
+		   L3_1_UC), \
+	/* Bypass LLC - L3 (Read-Only) (EHL+) */ \
+	MOCS_ENTRY(17, \
+		   LE_1_UC | LE_TC_1_LLC | LE_SCF(1), \
+		   L3_3_WB), \
 	/* Self-Snoop - L3 + LLC */ \
 	MOCS_ENTRY(18, \
 		   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SSE(3), \
diff --git a/drivers/gpu/drm/i915/intel_mocs.h b/drivers/gpu/drm/i915/gt/intel_mocs.h
index 3d99d1271b2b..0913704a1af2 100644
--- a/drivers/gpu/drm/i915/intel_mocs.h
+++ b/drivers/gpu/drm/i915/gt/intel_mocs.h
@@ -49,7 +49,9 @@
  * context handling keep the MOCS in step.
  */
 
-#include "i915_drv.h"
+struct drm_i915_private;
+struct i915_request;
+struct intel_engine_cs;
 
 int intel_rcs_context_init_mocs(struct i915_request *rq);
 void intel_mocs_init_l3cc_table(struct drm_i915_private *dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c
index 677d59304e78..4c478b38e420 100644
--- a/drivers/gpu/drm/i915/i915_reset.c
+++ b/drivers/gpu/drm/i915/gt/intel_reset.c
@@ -7,9 +7,16 @@
 #include <linux/sched/mm.h>
 #include <linux/stop_machine.h>
 
+#include "display/intel_overlay.h"
+
+#include "gem/i915_gem_context.h"
+
 #include "i915_drv.h"
 #include "i915_gpu_error.h"
-#include "i915_reset.h"
+#include "i915_irq.h"
+#include "intel_engine_pm.h"
+#include "intel_gt_pm.h"
+#include "intel_reset.h"
 
 #include "intel_guc.h"
 
@@ -43,12 +50,12 @@ static void engine_skip_context(struct i915_request *rq)
 	struct intel_engine_cs *engine = rq->engine;
 	struct i915_gem_context *hung_ctx = rq->gem_context;
 
-	lockdep_assert_held(&engine->timeline.lock);
+	lockdep_assert_held(&engine->active.lock);
 
 	if (!i915_request_is_active(rq))
 		return;
 
-	list_for_each_entry_continue(rq, &engine->timeline.requests, link)
+	list_for_each_entry_continue(rq, &engine->active.requests, sched.link)
 		if (rq->gem_context == hung_ctx)
 			i915_request_skip(rq, -EIO);
 }
@@ -124,7 +131,7 @@ void i915_reset_request(struct i915_request *rq, bool guilty)
 		  rq->fence.seqno,
 		  yesno(guilty));
 
-	lockdep_assert_held(&rq->engine->timeline.lock);
+	lockdep_assert_held(&rq->engine->active.lock);
 	GEM_BUG_ON(i915_request_completed(rq));
 
 	if (guilty) {
@@ -641,9 +648,6 @@ int intel_gpu_reset(struct drm_i915_private *i915,
 
 bool intel_has_gpu_reset(struct drm_i915_private *i915)
 {
-	if (USES_GUC(i915))
-		return false;
-
 	if (!i915_modparams.reset)
 		return NULL;
 
@@ -683,6 +687,7 @@ static void reset_prepare_engine(struct intel_engine_cs *engine)
 	 * written to the powercontext is undefined and so we may lose
 	 * GPU state upon resume, i.e. fail to restart after a reset.
 	 */
+	intel_engine_pm_get(engine);
 	intel_uncore_forcewake_get(engine->uncore, FORCEWAKE_ALL);
 	engine->reset.prepare(engine);
 }
@@ -691,19 +696,19 @@ static void revoke_mmaps(struct drm_i915_private *i915)
 {
 	int i;
 
-	for (i = 0; i < i915->num_fence_regs; i++) {
+	for (i = 0; i < i915->ggtt.num_fences; i++) {
 		struct drm_vma_offset_node *node;
 		struct i915_vma *vma;
 		u64 vma_offset;
 
-		vma = READ_ONCE(i915->fence_regs[i].vma);
+		vma = READ_ONCE(i915->ggtt.fence_regs[i].vma);
 		if (!vma)
 			continue;
 
 		if (!i915_vma_has_userfault(vma))
 			continue;
 
-		GEM_BUG_ON(vma->fence != &i915->fence_regs[i]);
+		GEM_BUG_ON(vma->fence != &i915->ggtt.fence_regs[i]);
 		node = &vma->obj->base.vma_node;
 		vma_offset = vma->ggtt_view.partial.offset << PAGE_SHIFT;
 		unmap_mapping_range(i915->drm.anon_inode->i_mapping,
@@ -718,6 +723,7 @@ static void reset_prepare(struct drm_i915_private *i915)
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
 
+	intel_gt_pm_get(i915);
 	for_each_engine(engine, i915, id)
 		reset_prepare_engine(engine);
 
@@ -755,48 +761,10 @@ static int gt_reset(struct drm_i915_private *i915,
 static void reset_finish_engine(struct intel_engine_cs *engine)
 {
 	engine->reset.finish(engine);
+	intel_engine_pm_put(engine);
 	intel_uncore_forcewake_put(engine->uncore, FORCEWAKE_ALL);
 }
 
-struct i915_gpu_restart {
-	struct work_struct work;
-	struct drm_i915_private *i915;
-};
-
-static void restart_work(struct work_struct *work)
-{
-	struct i915_gpu_restart *arg = container_of(work, typeof(*arg), work);
-	struct drm_i915_private *i915 = arg->i915;
-	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
-	intel_wakeref_t wakeref;
-
-	wakeref = intel_runtime_pm_get(i915);
-	mutex_lock(&i915->drm.struct_mutex);
-	WRITE_ONCE(i915->gpu_error.restart, NULL);
-
-	for_each_engine(engine, i915, id) {
-		struct i915_request *rq;
-
-		/*
-		 * Ostensibily, we always want a context loaded for powersaving,
-		 * so if the engine is idle after the reset, send a request
-		 * to load our scratch kernel_context.
-		 */
-		if (!intel_engine_is_idle(engine))
-			continue;
-
-		rq = i915_request_alloc(engine, i915->kernel_context);
-		if (!IS_ERR(rq))
-			i915_request_add(rq);
-	}
-
-	mutex_unlock(&i915->drm.struct_mutex);
-	intel_runtime_pm_put(i915, wakeref);
-
-	kfree(arg);
-}
-
 static void reset_finish(struct drm_i915_private *i915)
 {
 	struct intel_engine_cs *engine;
@@ -806,29 +774,7 @@ static void reset_finish(struct drm_i915_private *i915)
 		reset_finish_engine(engine);
 		intel_engine_signal_breadcrumbs(engine);
 	}
-}
-
-static void reset_restart(struct drm_i915_private *i915)
-{
-	struct i915_gpu_restart *arg;
-
-	/*
-	 * Following the reset, ensure that we always reload context for
-	 * powersaving, and to correct engine->last_retired_context. Since
-	 * this requires us to submit a request, queue a worker to do that
-	 * task for us to evade any locking here.
-	 */
-	if (READ_ONCE(i915->gpu_error.restart))
-		return;
-
-	arg = kmalloc(sizeof(*arg), GFP_KERNEL);
-	if (arg) {
-		arg->i915 = i915;
-		INIT_WORK(&arg->work, restart_work);
-
-		WRITE_ONCE(i915->gpu_error.restart, arg);
-		queue_work(i915->wq, &arg->work);
-	}
+	intel_gt_pm_put(i915);
 }
 
 static void nop_submit_request(struct i915_request *request)
@@ -840,10 +786,10 @@ static void nop_submit_request(struct i915_request *request)
 		  engine->name, request->fence.context, request->fence.seqno);
 	dma_fence_set_error(&request->fence, -EIO);
 
-	spin_lock_irqsave(&engine->timeline.lock, flags);
+	spin_lock_irqsave(&engine->active.lock, flags);
 	__i915_request_submit(request);
 	i915_request_mark_complete(request);
-	spin_unlock_irqrestore(&engine->timeline.lock, flags);
+	spin_unlock_irqrestore(&engine->active.lock, flags);
 
 	intel_engine_queue_breadcrumbs(engine);
 }
@@ -889,6 +835,7 @@ static void __i915_gem_set_wedged(struct drm_i915_private *i915)
 	 * in nop_submit_request.
 	 */
 	synchronize_rcu_expedited();
+	set_bit(I915_WEDGED, &error->flags);
 
 	/* Mark all executing requests as skipped */
 	for_each_engine(engine, i915, id)
@@ -896,9 +843,6 @@ static void __i915_gem_set_wedged(struct drm_i915_private *i915)
 
 	reset_finish(i915);
 
-	smp_mb__before_atomic();
-	set_bit(I915_WEDGED, &error->flags);
-
 	GEM_TRACE("end\n");
 }
 
@@ -908,7 +852,7 @@ void i915_gem_set_wedged(struct drm_i915_private *i915)
 	intel_wakeref_t wakeref;
 
 	mutex_lock(&error->wedge_mutex);
-	with_intel_runtime_pm(i915, wakeref)
+	with_intel_runtime_pm(&i915->runtime_pm, wakeref)
 		__i915_gem_set_wedged(i915);
 	mutex_unlock(&error->wedge_mutex);
 }
@@ -956,7 +900,7 @@ static bool __i915_gem_unset_wedged(struct drm_i915_private *i915)
 	}
 	mutex_unlock(&i915->gt.timelines.mutex);
 
-	intel_engines_sanitize(i915, false);
+	intel_gt_sanitize(i915, false);
 
 	/*
 	 * Undo nop_submit_request. We prevent all new i915 requests from
@@ -1034,12 +978,12 @@ void i915_reset(struct drm_i915_private *i915,
 	GEM_TRACE("flags=%lx\n", error->flags);
 
 	might_sleep();
-	assert_rpm_wakelock_held(i915);
 	GEM_BUG_ON(!test_bit(I915_RESET_BACKOFF, &error->flags));
+	mutex_lock(&error->wedge_mutex);
 
 	/* Clear any previous failed attempts at recovery. Time to try again. */
 	if (!__i915_gem_unset_wedged(i915))
-		return;
+		goto unlock;
 
 	if (reason)
 		dev_notice(i915->drm.dev, "Resetting chip for %s\n", reason);
@@ -1087,8 +1031,8 @@ void i915_reset(struct drm_i915_private *i915,
 
 finish:
 	reset_finish(i915);
-	if (!__i915_wedged(error))
-		reset_restart(i915);
+unlock:
+	mutex_unlock(&error->wedge_mutex);
 	return;
 
 taint:
@@ -1104,7 +1048,7 @@ taint:
 	 * rather than continue on into oblivion. For everyone else,
 	 * the system should still plod along, but they have been warned!
 	 */
-	add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
+	add_taint_for_CI(TAINT_WARN);
 error:
 	__i915_gem_set_wedged(i915);
 	goto finish;
@@ -1137,6 +1081,9 @@ int i915_reset_engine(struct intel_engine_cs *engine, const char *msg)
 	GEM_TRACE("%s flags=%lx\n", engine->name, error->flags);
 	GEM_BUG_ON(!test_bit(I915_RESET_ENGINE + engine->id, &error->flags));
 
+	if (!intel_wakeref_active(&engine->wakeref))
+		return 0;
+
 	reset_prepare_engine(engine);
 
 	if (msg)
@@ -1168,7 +1115,7 @@ int i915_reset_engine(struct intel_engine_cs *engine, const char *msg)
 	 * have been reset to their default values. Follow the init_ring
 	 * process to program RING_MODE, HWSP and re-enable submission.
 	 */
-	ret = engine->init_hw(engine);
+	ret = engine->resume(engine);
 	if (ret)
 		goto out;
 
@@ -1201,9 +1148,7 @@ static void i915_reset_device(struct drm_i915_private *i915,
 		/* Flush everyone using a resource about to be clobbered */
 		synchronize_srcu_expedited(&error->reset_backoff_srcu);
 
-		mutex_lock(&error->wedge_mutex);
 		i915_reset(i915, engine_mask, reason);
-		mutex_unlock(&error->wedge_mutex);
 
 		intel_finish_reset(i915);
 	}
@@ -1217,7 +1162,14 @@ static void clear_register(struct intel_uncore *uncore, i915_reg_t reg)
 	intel_uncore_rmw(uncore, reg, 0, 0);
 }
 
-void i915_clear_error_registers(struct drm_i915_private *i915)
+static void gen8_clear_engine_error_register(struct intel_engine_cs *engine)
+{
+	GEN6_RING_FAULT_REG_RMW(engine, RING_FAULT_VALID, 0);
+	GEN6_RING_FAULT_REG_POSTING_READ(engine);
+}
+
+static void clear_error_registers(struct drm_i915_private *i915,
+				  intel_engine_mask_t engine_mask)
 {
 	struct intel_uncore *uncore = &i915->uncore;
 	u32 eir;
@@ -1250,15 +1202,74 @@ void i915_clear_error_registers(struct drm_i915_private *i915)
 		struct intel_engine_cs *engine;
 		enum intel_engine_id id;
 
-		for_each_engine(engine, i915, id) {
-			rmw_clear(uncore,
-				  RING_FAULT_REG(engine), RING_FAULT_VALID);
-			intel_uncore_posting_read(uncore,
-						  RING_FAULT_REG(engine));
+		for_each_engine_masked(engine, i915, engine_mask, id)
+			gen8_clear_engine_error_register(engine);
+	}
+}
+
+static void gen6_check_faults(struct drm_i915_private *dev_priv)
+{
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+	u32 fault;
+
+	for_each_engine(engine, dev_priv, id) {
+		fault = GEN6_RING_FAULT_REG_READ(engine);
+		if (fault & RING_FAULT_VALID) {
+			DRM_DEBUG_DRIVER("Unexpected fault\n"
+					 "\tAddr: 0x%08lx\n"
+					 "\tAddress space: %s\n"
+					 "\tSource ID: %d\n"
+					 "\tType: %d\n",
+					 fault & PAGE_MASK,
+					 fault & RING_FAULT_GTTSEL_MASK ? "GGTT" : "PPGTT",
+					 RING_FAULT_SRCID(fault),
+					 RING_FAULT_FAULT_TYPE(fault));
 		}
 	}
 }
 
+static void gen8_check_faults(struct drm_i915_private *dev_priv)
+{
+	u32 fault = I915_READ(GEN8_RING_FAULT_REG);
+
+	if (fault & RING_FAULT_VALID) {
+		u32 fault_data0, fault_data1;
+		u64 fault_addr;
+
+		fault_data0 = I915_READ(GEN8_FAULT_TLB_DATA0);
+		fault_data1 = I915_READ(GEN8_FAULT_TLB_DATA1);
+		fault_addr = ((u64)(fault_data1 & FAULT_VA_HIGH_BITS) << 44) |
+			     ((u64)fault_data0 << 12);
+
+		DRM_DEBUG_DRIVER("Unexpected fault\n"
+				 "\tAddr: 0x%08x_%08x\n"
+				 "\tAddress space: %s\n"
+				 "\tEngine ID: %d\n"
+				 "\tSource ID: %d\n"
+				 "\tType: %d\n",
+				 upper_32_bits(fault_addr),
+				 lower_32_bits(fault_addr),
+				 fault_data1 & FAULT_GTT_SEL ? "GGTT" : "PPGTT",
+				 GEN8_RING_FAULT_ENGINE_ID(fault),
+				 RING_FAULT_SRCID(fault),
+				 RING_FAULT_FAULT_TYPE(fault));
+	}
+}
+
+void i915_check_and_clear_faults(struct drm_i915_private *i915)
+{
+	/* From GEN8 onwards we only have one 'All Engine Fault Register' */
+	if (INTEL_GEN(i915) >= 8)
+		gen8_check_faults(i915);
+	else if (INTEL_GEN(i915) >= 6)
+		gen6_check_faults(i915);
+	else
+		return;
+
+	clear_error_registers(i915, ALL_ENGINES);
+}
+
 /**
  * i915_handle_error - handle a gpu error
  * @i915: i915 device private
@@ -1301,13 +1312,13 @@ void i915_handle_error(struct drm_i915_private *i915,
 	 * isn't the case at least when we get here by doing a
 	 * simulated reset via debugfs, so get an RPM reference.
 	 */
-	wakeref = intel_runtime_pm_get(i915);
+	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
 
 	engine_mask &= INTEL_INFO(i915)->engine_mask;
 
 	if (flags & I915_ERROR_CAPTURE) {
 		i915_capture_error_state(i915, engine_mask, msg);
-		i915_clear_error_registers(i915);
+		clear_error_registers(i915, engine_mask);
 	}
 
 	/*
@@ -1364,7 +1375,7 @@ void i915_handle_error(struct drm_i915_private *i915,
 	wake_up_all(&error->reset_queue);
 
 out:
-	intel_runtime_pm_put(i915, wakeref);
+	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
 }
 
 int i915_reset_trylock(struct drm_i915_private *i915)
@@ -1425,25 +1436,6 @@ int i915_terminally_wedged(struct drm_i915_private *i915)
 	return __i915_wedged(error) ? -EIO : 0;
 }
 
-bool i915_reset_flush(struct drm_i915_private *i915)
-{
-	int err;
-
-	cancel_delayed_work_sync(&i915->gpu_error.hangcheck_work);
-
-	flush_workqueue(i915->wq);
-	GEM_BUG_ON(READ_ONCE(i915->gpu_error.restart));
-
-	mutex_lock(&i915->drm.struct_mutex);
-	err = i915_gem_wait_for_idle(i915,
-				     I915_WAIT_LOCKED |
-				     I915_WAIT_FOR_IDLE_BOOST,
-				     MAX_SCHEDULE_TIMEOUT);
-	mutex_unlock(&i915->drm.struct_mutex);
-
-	return !err;
-}
-
 static void i915_wedge_me(struct work_struct *work)
 {
 	struct i915_wedge_me *w = container_of(work, typeof(*w), work.work);
@@ -1472,3 +1464,7 @@ void __i915_fini_wedge(struct i915_wedge_me *w)
 	destroy_delayed_work_on_stack(&w->work);
 	w->i915 = NULL;
 }
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftest_reset.c"
+#endif
diff --git a/drivers/gpu/drm/i915/i915_reset.h b/drivers/gpu/drm/i915/gt/intel_reset.h
index 3c0450289b8f..580ebdb59eca 100644
--- a/drivers/gpu/drm/i915/i915_reset.h
+++ b/drivers/gpu/drm/i915/gt/intel_reset.h
@@ -11,7 +11,7 @@
 #include <linux/types.h>
 #include <linux/srcu.h>
 
-#include "intel_engine_types.h"
+#include "gt/intel_engine_types.h"
 
 struct drm_i915_private;
 struct i915_request;
@@ -25,7 +25,7 @@ void i915_handle_error(struct drm_i915_private *i915,
 		       const char *fmt, ...);
 #define I915_ERROR_CAPTURE BIT(0)
 
-void i915_clear_error_registers(struct drm_i915_private *i915);
+void i915_check_and_clear_faults(struct drm_i915_private *i915);
 
 void i915_reset(struct drm_i915_private *i915,
 		intel_engine_mask_t stalled_mask,
@@ -34,7 +34,6 @@ int i915_reset_engine(struct intel_engine_cs *engine,
 		      const char *reason);
 
 void i915_reset_request(struct i915_request *rq, bool guilty);
-bool i915_reset_flush(struct drm_i915_private *i915);
 
 int __must_check i915_reset_trylock(struct drm_i915_private *i915);
 void i915_reset_unlock(struct drm_i915_private *i915, int tag);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
index 029fd8ec1857..c6023bc9452d 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
@@ -31,11 +31,13 @@
 
 #include <drm/i915_drm.h>
 
+#include "gem/i915_gem_context.h"
+
 #include "i915_drv.h"
 #include "i915_gem_render_state.h"
-#include "i915_reset.h"
 #include "i915_trace.h"
-#include "intel_drv.h"
+#include "intel_context.h"
+#include "intel_reset.h"
 #include "intel_workarounds.h"
 
 /* Rough estimate of the typical request size, performing a flush,
@@ -310,11 +312,6 @@ static u32 *gen6_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
 	*cs++ = rq->timeline->hwsp_offset | PIPE_CONTROL_GLOBAL_GTT;
 	*cs++ = rq->fence.seqno;
 
-	*cs++ = GFX_OP_PIPE_CONTROL(4);
-	*cs++ = PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_STORE_DATA_INDEX;
-	*cs++ = I915_GEM_HWS_HANGCHECK_ADDR | PIPE_CONTROL_GLOBAL_GTT;
-	*cs++ = intel_engine_next_hangcheck_seqno(rq->engine);
-
 	*cs++ = MI_USER_INTERRUPT;
 	*cs++ = MI_NOOP;
 
@@ -416,13 +413,6 @@ static u32 *gen7_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
 	*cs++ = rq->timeline->hwsp_offset;
 	*cs++ = rq->fence.seqno;
 
-	*cs++ = GFX_OP_PIPE_CONTROL(4);
-	*cs++ = (PIPE_CONTROL_QW_WRITE |
-		 PIPE_CONTROL_STORE_DATA_INDEX |
-		 PIPE_CONTROL_GLOBAL_GTT_IVB);
-	*cs++ = I915_GEM_HWS_HANGCHECK_ADDR;
-	*cs++ = intel_engine_next_hangcheck_seqno(rq->engine);
-
 	*cs++ = MI_USER_INTERRUPT;
 	*cs++ = MI_NOOP;
 
@@ -441,12 +431,7 @@ static u32 *gen6_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
 	*cs++ = I915_GEM_HWS_SEQNO_ADDR | MI_FLUSH_DW_USE_GTT;
 	*cs++ = rq->fence.seqno;
 
-	*cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX;
-	*cs++ = I915_GEM_HWS_HANGCHECK_ADDR | MI_FLUSH_DW_USE_GTT;
-	*cs++ = intel_engine_next_hangcheck_seqno(rq->engine);
-
 	*cs++ = MI_USER_INTERRUPT;
-	*cs++ = MI_NOOP;
 
 	rq->tail = intel_ring_offset(rq, cs);
 	assert_ring_tail_valid(rq->ring, rq->tail);
@@ -466,10 +451,6 @@ static u32 *gen7_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
 	*cs++ = I915_GEM_HWS_SEQNO_ADDR | MI_FLUSH_DW_USE_GTT;
 	*cs++ = rq->fence.seqno;
 
-	*cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX;
-	*cs++ = I915_GEM_HWS_HANGCHECK_ADDR | MI_FLUSH_DW_USE_GTT;
-	*cs++ = intel_engine_next_hangcheck_seqno(rq->engine);
-
 	for (i = 0; i < GEN7_XCS_WA; i++) {
 		*cs++ = MI_STORE_DWORD_INDEX;
 		*cs++ = I915_GEM_HWS_SEQNO_ADDR;
@@ -481,6 +462,7 @@ static u32 *gen7_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
 	*cs++ = 0;
 
 	*cs++ = MI_USER_INTERRUPT;
+	*cs++ = MI_NOOP;
 
 	rq->tail = intel_ring_offset(rq, cs);
 	assert_ring_tail_valid(rq->ring, rq->tail);
@@ -638,12 +620,15 @@ static bool stop_ring(struct intel_engine_cs *engine)
 	return (ENGINE_READ(engine, RING_HEAD) & HEAD_ADDR) == 0;
 }
 
-static int init_ring_common(struct intel_engine_cs *engine)
+static int xcs_resume(struct intel_engine_cs *engine)
 {
 	struct drm_i915_private *dev_priv = engine->i915;
 	struct intel_ring *ring = engine->buffer;
 	int ret = 0;
 
+	GEM_TRACE("%s: ring:{HEAD:%04x, TAIL:%04x}\n",
+		  engine->name, ring->head, ring->tail);
+
 	intel_uncore_forcewake_get(engine->uncore, FORCEWAKE_ALL);
 
 	if (!stop_ring(engine)) {
@@ -745,14 +730,13 @@ static void reset_prepare(struct intel_engine_cs *engine)
 
 static void reset_ring(struct intel_engine_cs *engine, bool stalled)
 {
-	struct i915_timeline *tl = &engine->timeline;
 	struct i915_request *pos, *rq;
 	unsigned long flags;
 	u32 head;
 
 	rq = NULL;
-	spin_lock_irqsave(&tl->lock, flags);
-	list_for_each_entry(pos, &tl->requests, link) {
+	spin_lock_irqsave(&engine->active.lock, flags);
+	list_for_each_entry(pos, &engine->active.requests, sched.link) {
 		if (!i915_request_completed(pos)) {
 			rq = pos;
 			break;
@@ -806,7 +790,7 @@ static void reset_ring(struct intel_engine_cs *engine, bool stalled)
 	}
 	engine->buffer->head = intel_ring_wrap(engine->buffer, head);
 
-	spin_unlock_irqrestore(&tl->lock, flags);
+	spin_unlock_irqrestore(&engine->active.lock, flags);
 }
 
 static void reset_finish(struct intel_engine_cs *engine)
@@ -828,12 +812,23 @@ static int intel_rcs_ctx_init(struct i915_request *rq)
 	return 0;
 }
 
-static int init_render_ring(struct intel_engine_cs *engine)
+static int rcs_resume(struct intel_engine_cs *engine)
 {
 	struct drm_i915_private *dev_priv = engine->i915;
-	int ret = init_ring_common(engine);
-	if (ret)
-		return ret;
+
+	/*
+	 * Disable CONSTANT_BUFFER before it is loaded from the context
+	 * image. For as it is loaded, it is executed and the stored
+	 * address may no longer be valid, leading to a GPU hang.
+	 *
+	 * This imposes the requirement that userspace reload their
+	 * CONSTANT_BUFFER on every batch, fortunately a requirement
+	 * they are already accustomed to from before contexts were
+	 * enabled.
+	 */
+	if (IS_GEN(dev_priv, 4))
+		I915_WRITE(ECOSKPD,
+			   _MASKED_BIT_ENABLE(ECO_CONSTANT_BUFFER_SR_DISABLE));
 
 	/* WaTimedSingleVertexDispatch:cl,bw,ctg,elk,ilk,snb */
 	if (IS_GEN_RANGE(dev_priv, 4, 6))
@@ -873,10 +868,7 @@ static int init_render_ring(struct intel_engine_cs *engine)
 	if (IS_GEN_RANGE(dev_priv, 6, 7))
 		I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING));
 
-	if (INTEL_GEN(dev_priv) >= 6)
-		ENGINE_WRITE(engine, RING_IMR, ~engine->irq_keep_mask);
-
-	return 0;
+	return xcs_resume(engine);
 }
 
 static void cancel_requests(struct intel_engine_cs *engine)
@@ -884,10 +876,10 @@ static void cancel_requests(struct intel_engine_cs *engine)
 	struct i915_request *request;
 	unsigned long flags;
 
-	spin_lock_irqsave(&engine->timeline.lock, flags);
+	spin_lock_irqsave(&engine->active.lock, flags);
 
 	/* Mark all submitted requests as skipped. */
-	list_for_each_entry(request, &engine->timeline.requests, link) {
+	list_for_each_entry(request, &engine->active.requests, sched.link) {
 		if (!i915_request_signaled(request))
 			dma_fence_set_error(&request->fence, -EIO);
 
@@ -896,7 +888,7 @@ static void cancel_requests(struct intel_engine_cs *engine)
 
 	/* Remaining _unready_ requests will be nop'ed when submitted */
 
-	spin_unlock_irqrestore(&engine->timeline.lock, flags);
+	spin_unlock_irqrestore(&engine->active.lock, flags);
 }
 
 static void i9xx_submit_request(struct i915_request *request)
@@ -918,11 +910,8 @@ static u32 *i9xx_emit_breadcrumb(struct i915_request *rq, u32 *cs)
 	*cs++ = I915_GEM_HWS_SEQNO_ADDR;
 	*cs++ = rq->fence.seqno;
 
-	*cs++ = MI_STORE_DWORD_INDEX;
-	*cs++ = I915_GEM_HWS_HANGCHECK_ADDR;
-	*cs++ = intel_engine_next_hangcheck_seqno(rq->engine);
-
 	*cs++ = MI_USER_INTERRUPT;
+	*cs++ = MI_NOOP;
 
 	rq->tail = intel_ring_offset(rq, cs);
 	assert_ring_tail_valid(rq->ring, rq->tail);
@@ -940,10 +929,6 @@ static u32 *gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs)
 
 	*cs++ = MI_FLUSH;
 
-	*cs++ = MI_STORE_DWORD_INDEX;
-	*cs++ = I915_GEM_HWS_HANGCHECK_ADDR;
-	*cs++ = intel_engine_next_hangcheck_seqno(rq->engine);
-
 	BUILD_BUG_ON(GEN5_WA_STORES < 1);
 	for (i = 0; i < GEN5_WA_STORES; i++) {
 		*cs++ = MI_STORE_DWORD_INDEX;
@@ -952,7 +937,6 @@ static u32 *gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs)
 	}
 
 	*cs++ = MI_USER_INTERRUPT;
-	*cs++ = MI_NOOP;
 
 	rq->tail = intel_ring_offset(rq, cs);
 	assert_ring_tail_valid(rq->ring, rq->tail);
@@ -991,20 +975,20 @@ i9xx_irq_disable(struct intel_engine_cs *engine)
 static void
 i8xx_irq_enable(struct intel_engine_cs *engine)
 {
-	struct drm_i915_private *dev_priv = engine->i915;
+	struct drm_i915_private *i915 = engine->i915;
 
-	dev_priv->irq_mask &= ~engine->irq_enable_mask;
-	I915_WRITE16(GEN2_IMR, dev_priv->irq_mask);
-	POSTING_READ16(RING_IMR(engine->mmio_base));
+	i915->irq_mask &= ~engine->irq_enable_mask;
+	intel_uncore_write16(&i915->uncore, GEN2_IMR, i915->irq_mask);
+	ENGINE_POSTING_READ16(engine, RING_IMR);
 }
 
 static void
 i8xx_irq_disable(struct intel_engine_cs *engine)
 {
-	struct drm_i915_private *dev_priv = engine->i915;
+	struct drm_i915_private *i915 = engine->i915;
 
-	dev_priv->irq_mask |= engine->irq_enable_mask;
-	I915_WRITE16(GEN2_IMR, dev_priv->irq_mask);
+	i915->irq_mask |= engine->irq_enable_mask;
+	intel_uncore_write16(&i915->uncore, GEN2_IMR, i915->irq_mask);
 }
 
 static int
@@ -1282,8 +1266,6 @@ intel_engine_create_ring(struct intel_engine_cs *engine,
 
 	GEM_BUG_ON(!is_power_of_2(size));
 	GEM_BUG_ON(RING_CTL_SIZE(size) & ~RING_NR_PAGES);
-	GEM_BUG_ON(timeline == &engine->timeline);
-	lockdep_assert_held(&engine->i915->drm.struct_mutex);
 
 	ring = kzalloc(sizeof(*ring), GFP_KERNEL);
 	if (!ring)
@@ -1317,10 +1299,9 @@ intel_engine_create_ring(struct intel_engine_cs *engine,
 void intel_ring_free(struct kref *ref)
 {
 	struct intel_ring *ring = container_of(ref, typeof(*ring), ref);
-	struct drm_i915_gem_object *obj = ring->vma->obj;
 
 	i915_vma_close(ring->vma);
-	__i915_gem_object_release_unless_active(obj);
+	i915_vma_put(ring->vma);
 
 	i915_timeline_put(ring->timeline);
 	kfree(ring);
@@ -1346,64 +1327,28 @@ static void ring_context_destroy(struct kref *ref)
 
 static int __context_pin_ppgtt(struct i915_gem_context *ctx)
 {
-	struct i915_hw_ppgtt *ppgtt;
+	struct i915_address_space *vm;
 	int err = 0;
 
-	ppgtt = ctx->ppgtt ?: ctx->i915->mm.aliasing_ppgtt;
-	if (ppgtt)
-		err = gen6_ppgtt_pin(ppgtt);
+	vm = ctx->vm ?: &ctx->i915->mm.aliasing_ppgtt->vm;
+	if (vm)
+		err = gen6_ppgtt_pin(i915_vm_to_ppgtt((vm)));
 
 	return err;
 }
 
 static void __context_unpin_ppgtt(struct i915_gem_context *ctx)
 {
-	struct i915_hw_ppgtt *ppgtt;
+	struct i915_address_space *vm;
 
-	ppgtt = ctx->ppgtt ?: ctx->i915->mm.aliasing_ppgtt;
-	if (ppgtt)
-		gen6_ppgtt_unpin(ppgtt);
-}
-
-static int __context_pin(struct intel_context *ce)
-{
-	struct i915_vma *vma;
-	int err;
-
-	vma = ce->state;
-	if (!vma)
-		return 0;
-
-	err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
-	if (err)
-		return err;
-
-	/*
-	 * And mark is as a globally pinned object to let the shrinker know
-	 * it cannot reclaim the object until we release it.
-	 */
-	vma->obj->pin_global++;
-	vma->obj->mm.dirty = true;
-
-	return 0;
-}
-
-static void __context_unpin(struct intel_context *ce)
-{
-	struct i915_vma *vma;
-
-	vma = ce->state;
-	if (!vma)
-		return;
-
-	vma->obj->pin_global--;
-	i915_vma_unpin(vma);
+	vm = ctx->vm ?: &ctx->i915->mm.aliasing_ppgtt->vm;
+	if (vm)
+		gen6_ppgtt_unpin(i915_vm_to_ppgtt(vm));
 }
 
 static void ring_context_unpin(struct intel_context *ce)
 {
 	__context_unpin_ppgtt(ce->gem_context);
-	__context_unpin(ce);
 }
 
 static struct i915_vma *
@@ -1414,7 +1359,7 @@ alloc_context_vma(struct intel_engine_cs *engine)
 	struct i915_vma *vma;
 	int err;
 
-	obj = i915_gem_object_create(i915, engine->context_size);
+	obj = i915_gem_object_create_shmem(i915, engine->context_size);
 	if (IS_ERR(obj))
 		return ERR_CAST(obj);
 
@@ -1493,18 +1438,18 @@ static int ring_context_pin(struct intel_context *ce)
 		ce->state = vma;
 	}
 
-	err = __context_pin(ce);
+	err = intel_context_active_acquire(ce, PIN_HIGH);
 	if (err)
 		return err;
 
 	err = __context_pin_ppgtt(ce->gem_context);
 	if (err)
-		goto err_unpin;
+		goto err_active;
 
 	return 0;
 
-err_unpin:
-	__context_unpin(ce);
+err_active:
+	intel_context_active_release(ce);
 	return err;
 }
 
@@ -1517,79 +1462,14 @@ static const struct intel_context_ops ring_context_ops = {
 	.pin = ring_context_pin,
 	.unpin = ring_context_unpin,
 
+	.enter = intel_context_enter_engine,
+	.exit = intel_context_exit_engine,
+
 	.reset = ring_context_reset,
 	.destroy = ring_context_destroy,
 };
 
-static int intel_init_ring_buffer(struct intel_engine_cs *engine)
-{
-	struct i915_timeline *timeline;
-	struct intel_ring *ring;
-	int err;
-
-	err = intel_engine_setup_common(engine);
-	if (err)
-		return err;
-
-	timeline = i915_timeline_create(engine->i915, engine->status_page.vma);
-	if (IS_ERR(timeline)) {
-		err = PTR_ERR(timeline);
-		goto err;
-	}
-	GEM_BUG_ON(timeline->has_initial_breadcrumb);
-
-	ring = intel_engine_create_ring(engine, timeline, 32 * PAGE_SIZE);
-	i915_timeline_put(timeline);
-	if (IS_ERR(ring)) {
-		err = PTR_ERR(ring);
-		goto err;
-	}
-
-	err = intel_ring_pin(ring);
-	if (err)
-		goto err_ring;
-
-	GEM_BUG_ON(engine->buffer);
-	engine->buffer = ring;
-
-	err = intel_engine_init_common(engine);
-	if (err)
-		goto err_unpin;
-
-	GEM_BUG_ON(ring->timeline->hwsp_ggtt != engine->status_page.vma);
-
-	return 0;
-
-err_unpin:
-	intel_ring_unpin(ring);
-err_ring:
-	intel_ring_put(ring);
-err:
-	intel_engine_cleanup_common(engine);
-	return err;
-}
-
-void intel_engine_cleanup(struct intel_engine_cs *engine)
-{
-	struct drm_i915_private *dev_priv = engine->i915;
-
-	WARN_ON(INTEL_GEN(dev_priv) > 2 &&
-		(ENGINE_READ(engine, RING_MI_MODE) & MODE_IDLE) == 0);
-
-	intel_ring_unpin(engine->buffer);
-	intel_ring_put(engine->buffer);
-
-	if (engine->cleanup)
-		engine->cleanup(engine);
-
-	intel_engine_cleanup_common(engine);
-
-	dev_priv->engine[engine->id] = NULL;
-	kfree(engine);
-}
-
-static int load_pd_dir(struct i915_request *rq,
-		       const struct i915_hw_ppgtt *ppgtt)
+static int load_pd_dir(struct i915_request *rq, const struct i915_ppgtt *ppgtt)
 {
 	const struct intel_engine_cs * const engine = rq->engine;
 	u32 *cs;
@@ -1604,7 +1484,7 @@ static int load_pd_dir(struct i915_request *rq,
 
 	*cs++ = MI_LOAD_REGISTER_IMM(1);
 	*cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine->mmio_base));
-	*cs++ = ppgtt->pd.base.ggtt_offset << 10;
+	*cs++ = ppgtt->pd->base.ggtt_offset << 10;
 
 	intel_ring_advance(rq, cs);
 
@@ -1646,11 +1526,14 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags)
 		/* These flags are for resource streamer on HSW+ */
 		flags |= HSW_MI_RS_SAVE_STATE_EN | HSW_MI_RS_RESTORE_STATE_EN;
 	else
+		/* We need to save the extended state for powersaving modes */
 		flags |= MI_SAVE_EXT_STATE_EN | MI_RESTORE_EXT_STATE_EN;
 
 	len = 4;
 	if (IS_GEN(i915, 7))
 		len += 2 + (num_engines ? 4 * num_engines + 6 : 0);
+	else if (IS_GEN(i915, 5))
+		len += 2;
 	if (flags & MI_FORCE_RESTORE) {
 		GEM_BUG_ON(flags & MI_RESTORE_INHIBIT);
 		flags &= ~MI_FORCE_RESTORE;
@@ -1679,6 +1562,14 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags)
 						GEN6_PSMI_SLEEP_MSG_DISABLE);
 			}
 		}
+	} else if (IS_GEN(i915, 5)) {
+		/*
+		 * This w/a is only listed for pre-production ilk a/b steppings,
+		 * but is also mentioned for programming the powerctx. To be
+		 * safe, just apply the workaround; we do not use SyncFlush so
+		 * this should never take effect and so be a no-op!
+		 */
+		*cs++ = MI_SUSPEND_FLUSH | MI_SUSPEND_FLUSH_EN;
 	}
 
 	if (force_restore) {
@@ -1732,6 +1623,8 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags)
 			*cs++ = MI_NOOP;
 		}
 		*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
+	} else if (IS_GEN(i915, 5)) {
+		*cs++ = MI_SUSPEND_FLUSH;
 	}
 
 	intel_ring_advance(rq, cs);
@@ -1771,15 +1664,16 @@ static int switch_context(struct i915_request *rq)
 {
 	struct intel_engine_cs *engine = rq->engine;
 	struct i915_gem_context *ctx = rq->gem_context;
-	struct i915_hw_ppgtt *ppgtt = ctx->ppgtt ?: rq->i915->mm.aliasing_ppgtt;
+	struct i915_address_space *vm =
+		ctx->vm ?: &rq->i915->mm.aliasing_ppgtt->vm;
 	unsigned int unwind_mm = 0;
 	u32 hw_flags = 0;
 	int ret, i;
 
-	lockdep_assert_held(&rq->i915->drm.struct_mutex);
 	GEM_BUG_ON(HAS_EXECLISTS(rq->i915));
 
-	if (ppgtt) {
+	if (vm) {
+		struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
 		int loops;
 
 		/*
@@ -1826,7 +1720,7 @@ static int switch_context(struct i915_request *rq)
 			goto err_mm;
 	}
 
-	if (ppgtt) {
+	if (vm) {
 		ret = engine->emit_flush(rq, EMIT_INVALIDATE);
 		if (ret)
 			goto err_mm;
@@ -1869,7 +1763,7 @@ static int switch_context(struct i915_request *rq)
 
 err_mm:
 	if (unwind_mm)
-		ppgtt->pd_dirty_engines |= unwind_mm;
+		i915_vm_to_ppgtt(vm)->pd_dirty_engines |= unwind_mm;
 err:
 	return ret;
 }
@@ -1888,12 +1782,12 @@ static int ring_request_alloc(struct i915_request *request)
 	 */
 	request->reserved_space += LEGACY_REQUEST_SIZE;
 
-	ret = switch_context(request);
+	/* Unconditionally invalidate GPU caches and TLBs. */
+	ret = request->engine->emit_flush(request, EMIT_INVALIDATE);
 	if (ret)
 		return ret;
 
-	/* Unconditionally invalidate GPU caches and TLBs. */
-	ret = request->engine->emit_flush(request, EMIT_INVALIDATE);
+	ret = switch_context(request);
 	if (ret)
 		return ret;
 
@@ -1906,8 +1800,6 @@ static noinline int wait_for_space(struct intel_ring *ring, unsigned int bytes)
 	struct i915_request *target;
 	long timeout;
 
-	lockdep_assert_held(&ring->vma->vm->i915->drm.struct_mutex);
-
 	if (intel_ring_update_space(ring) >= bytes)
 		return 0;
 
@@ -1923,7 +1815,7 @@ static noinline int wait_for_space(struct intel_ring *ring, unsigned int bytes)
 		return -ENOSPC;
 
 	timeout = i915_request_wait(target,
-				    I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED,
+				    I915_WAIT_INTERRUPTIBLE,
 				    MAX_SCHEDULE_TIMEOUT);
 	if (timeout < 0)
 		return timeout;
@@ -2167,24 +2059,6 @@ static int gen6_ring_flush(struct i915_request *rq, u32 mode)
 	return gen6_flush_dw(rq, mode, MI_INVALIDATE_TLB);
 }
 
-static void intel_ring_init_irq(struct drm_i915_private *dev_priv,
-				struct intel_engine_cs *engine)
-{
-	if (INTEL_GEN(dev_priv) >= 6) {
-		engine->irq_enable = gen6_irq_enable;
-		engine->irq_disable = gen6_irq_disable;
-	} else if (INTEL_GEN(dev_priv) >= 5) {
-		engine->irq_enable = gen5_irq_enable;
-		engine->irq_disable = gen5_irq_disable;
-	} else if (INTEL_GEN(dev_priv) >= 3) {
-		engine->irq_enable = i9xx_irq_enable;
-		engine->irq_disable = i9xx_irq_disable;
-	} else {
-		engine->irq_enable = i8xx_irq_enable;
-		engine->irq_disable = i8xx_irq_disable;
-	}
-}
-
 static void i9xx_set_default_submission(struct intel_engine_cs *engine)
 {
 	engine->submit_request = i9xx_submit_request;
@@ -2200,15 +2074,51 @@ static void gen6_bsd_set_default_submission(struct intel_engine_cs *engine)
 	engine->submit_request = gen6_bsd_submit_request;
 }
 
-static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv,
-				      struct intel_engine_cs *engine)
+static void ring_destroy(struct intel_engine_cs *engine)
 {
+	struct drm_i915_private *dev_priv = engine->i915;
+
+	WARN_ON(INTEL_GEN(dev_priv) > 2 &&
+		(ENGINE_READ(engine, RING_MI_MODE) & MODE_IDLE) == 0);
+
+	intel_ring_unpin(engine->buffer);
+	intel_ring_put(engine->buffer);
+
+	intel_engine_cleanup_common(engine);
+	kfree(engine);
+}
+
+static void setup_irq(struct intel_engine_cs *engine)
+{
+	struct drm_i915_private *i915 = engine->i915;
+
+	if (INTEL_GEN(i915) >= 6) {
+		engine->irq_enable = gen6_irq_enable;
+		engine->irq_disable = gen6_irq_disable;
+	} else if (INTEL_GEN(i915) >= 5) {
+		engine->irq_enable = gen5_irq_enable;
+		engine->irq_disable = gen5_irq_disable;
+	} else if (INTEL_GEN(i915) >= 3) {
+		engine->irq_enable = i9xx_irq_enable;
+		engine->irq_disable = i9xx_irq_disable;
+	} else {
+		engine->irq_enable = i8xx_irq_enable;
+		engine->irq_disable = i8xx_irq_disable;
+	}
+}
+
+static void setup_common(struct intel_engine_cs *engine)
+{
+	struct drm_i915_private *i915 = engine->i915;
+
 	/* gen8+ are only supported with execlists */
-	GEM_BUG_ON(INTEL_GEN(dev_priv) >= 8);
+	GEM_BUG_ON(INTEL_GEN(i915) >= 8);
+
+	setup_irq(engine);
 
-	intel_ring_init_irq(dev_priv, engine);
+	engine->destroy = ring_destroy;
 
-	engine->init_hw = init_ring_common;
+	engine->resume = xcs_resume;
 	engine->reset.prepare = reset_prepare;
 	engine->reset.reset = reset_ring;
 	engine->reset.finish = reset_finish;
@@ -2222,117 +2132,96 @@ static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv,
 	 * engine->emit_init_breadcrumb().
 	 */
 	engine->emit_fini_breadcrumb = i9xx_emit_breadcrumb;
-	if (IS_GEN(dev_priv, 5))
+	if (IS_GEN(i915, 5))
 		engine->emit_fini_breadcrumb = gen5_emit_breadcrumb;
 
 	engine->set_default_submission = i9xx_set_default_submission;
 
-	if (INTEL_GEN(dev_priv) >= 6)
+	if (INTEL_GEN(i915) >= 6)
 		engine->emit_bb_start = gen6_emit_bb_start;
-	else if (INTEL_GEN(dev_priv) >= 4)
+	else if (INTEL_GEN(i915) >= 4)
 		engine->emit_bb_start = i965_emit_bb_start;
-	else if (IS_I830(dev_priv) || IS_I845G(dev_priv))
+	else if (IS_I830(i915) || IS_I845G(i915))
 		engine->emit_bb_start = i830_emit_bb_start;
 	else
 		engine->emit_bb_start = i915_emit_bb_start;
 }
 
-int intel_init_render_ring_buffer(struct intel_engine_cs *engine)
+static void setup_rcs(struct intel_engine_cs *engine)
 {
-	struct drm_i915_private *dev_priv = engine->i915;
-	int ret;
-
-	intel_ring_default_vfuncs(dev_priv, engine);
+	struct drm_i915_private *i915 = engine->i915;
 
-	if (HAS_L3_DPF(dev_priv))
+	if (HAS_L3_DPF(i915))
 		engine->irq_keep_mask = GT_RENDER_L3_PARITY_ERROR_INTERRUPT;
 
 	engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT;
 
-	if (INTEL_GEN(dev_priv) >= 7) {
+	if (INTEL_GEN(i915) >= 7) {
 		engine->init_context = intel_rcs_ctx_init;
 		engine->emit_flush = gen7_render_ring_flush;
 		engine->emit_fini_breadcrumb = gen7_rcs_emit_breadcrumb;
-	} else if (IS_GEN(dev_priv, 6)) {
+	} else if (IS_GEN(i915, 6)) {
 		engine->init_context = intel_rcs_ctx_init;
 		engine->emit_flush = gen6_render_ring_flush;
 		engine->emit_fini_breadcrumb = gen6_rcs_emit_breadcrumb;
-	} else if (IS_GEN(dev_priv, 5)) {
+	} else if (IS_GEN(i915, 5)) {
 		engine->emit_flush = gen4_render_ring_flush;
 	} else {
-		if (INTEL_GEN(dev_priv) < 4)
+		if (INTEL_GEN(i915) < 4)
 			engine->emit_flush = gen2_render_ring_flush;
 		else
 			engine->emit_flush = gen4_render_ring_flush;
 		engine->irq_enable_mask = I915_USER_INTERRUPT;
 	}
 
-	if (IS_HASWELL(dev_priv))
+	if (IS_HASWELL(i915))
 		engine->emit_bb_start = hsw_emit_bb_start;
 
-	engine->init_hw = init_render_ring;
-
-	ret = intel_init_ring_buffer(engine);
-	if (ret)
-		return ret;
-
-	return 0;
+	engine->resume = rcs_resume;
 }
 
-int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine)
+static void setup_vcs(struct intel_engine_cs *engine)
 {
-	struct drm_i915_private *dev_priv = engine->i915;
-
-	intel_ring_default_vfuncs(dev_priv, engine);
+	struct drm_i915_private *i915 = engine->i915;
 
-	if (INTEL_GEN(dev_priv) >= 6) {
+	if (INTEL_GEN(i915) >= 6) {
 		/* gen6 bsd needs a special wa for tail updates */
-		if (IS_GEN(dev_priv, 6))
+		if (IS_GEN(i915, 6))
 			engine->set_default_submission = gen6_bsd_set_default_submission;
 		engine->emit_flush = gen6_bsd_ring_flush;
 		engine->irq_enable_mask = GT_BSD_USER_INTERRUPT;
 
-		if (IS_GEN(dev_priv, 6))
+		if (IS_GEN(i915, 6))
 			engine->emit_fini_breadcrumb = gen6_xcs_emit_breadcrumb;
 		else
 			engine->emit_fini_breadcrumb = gen7_xcs_emit_breadcrumb;
 	} else {
 		engine->emit_flush = bsd_ring_flush;
-		if (IS_GEN(dev_priv, 5))
+		if (IS_GEN(i915, 5))
 			engine->irq_enable_mask = ILK_BSD_USER_INTERRUPT;
 		else
 			engine->irq_enable_mask = I915_BSD_USER_INTERRUPT;
 	}
-
-	return intel_init_ring_buffer(engine);
 }
 
-int intel_init_blt_ring_buffer(struct intel_engine_cs *engine)
+static void setup_bcs(struct intel_engine_cs *engine)
 {
-	struct drm_i915_private *dev_priv = engine->i915;
-
-	GEM_BUG_ON(INTEL_GEN(dev_priv) < 6);
-
-	intel_ring_default_vfuncs(dev_priv, engine);
+	struct drm_i915_private *i915 = engine->i915;
 
 	engine->emit_flush = gen6_ring_flush;
 	engine->irq_enable_mask = GT_BLT_USER_INTERRUPT;
 
-	if (IS_GEN(dev_priv, 6))
+	if (IS_GEN(i915, 6))
 		engine->emit_fini_breadcrumb = gen6_xcs_emit_breadcrumb;
 	else
 		engine->emit_fini_breadcrumb = gen7_xcs_emit_breadcrumb;
-
-	return intel_init_ring_buffer(engine);
 }
 
-int intel_init_vebox_ring_buffer(struct intel_engine_cs *engine)
+static void setup_vecs(struct intel_engine_cs *engine)
 {
-	struct drm_i915_private *dev_priv = engine->i915;
-
-	GEM_BUG_ON(INTEL_GEN(dev_priv) < 7);
+	struct drm_i915_private *i915 = engine->i915;
 
-	intel_ring_default_vfuncs(dev_priv, engine);
+	GEM_BUG_ON(INTEL_GEN(i915) < 7);
 
 	engine->emit_flush = gen6_ring_flush;
 	engine->irq_enable_mask = PM_VEBOX_USER_INTERRUPT;
@@ -2340,6 +2229,73 @@ int intel_init_vebox_ring_buffer(struct intel_engine_cs *engine)
 	engine->irq_disable = hsw_vebox_irq_disable;
 
 	engine->emit_fini_breadcrumb = gen7_xcs_emit_breadcrumb;
+}
+
+int intel_ring_submission_setup(struct intel_engine_cs *engine)
+{
+	setup_common(engine);
+
+	switch (engine->class) {
+	case RENDER_CLASS:
+		setup_rcs(engine);
+		break;
+	case VIDEO_DECODE_CLASS:
+		setup_vcs(engine);
+		break;
+	case COPY_ENGINE_CLASS:
+		setup_bcs(engine);
+		break;
+	case VIDEO_ENHANCEMENT_CLASS:
+		setup_vecs(engine);
+		break;
+	default:
+		MISSING_CASE(engine->class);
+		return -ENODEV;
+	}
+
+	return 0;
+}
+
+int intel_ring_submission_init(struct intel_engine_cs *engine)
+{
+	struct i915_timeline *timeline;
+	struct intel_ring *ring;
+	int err;
+
+	timeline = i915_timeline_create(engine->i915, engine->status_page.vma);
+	if (IS_ERR(timeline)) {
+		err = PTR_ERR(timeline);
+		goto err;
+	}
+	GEM_BUG_ON(timeline->has_initial_breadcrumb);
+
+	ring = intel_engine_create_ring(engine, timeline, 32 * PAGE_SIZE);
+	i915_timeline_put(timeline);
+	if (IS_ERR(ring)) {
+		err = PTR_ERR(ring);
+		goto err;
+	}
+
+	err = intel_ring_pin(ring);
+	if (err)
+		goto err_ring;
+
+	GEM_BUG_ON(engine->buffer);
+	engine->buffer = ring;
 
-	return intel_init_ring_buffer(engine);
+	err = intel_engine_init_common(engine);
+	if (err)
+		goto err_unpin;
+
+	GEM_BUG_ON(ring->timeline->hwsp_ggtt != engine->status_page.vma);
+
+	return 0;
+
+err_unpin:
+	intel_ring_unpin(ring);
+err_ring:
+	intel_ring_put(ring);
+err:
+	intel_engine_cleanup_common(engine);
+	return err;
 }
diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c b/drivers/gpu/drm/i915/gt/intel_sseu.c
new file mode 100644
index 000000000000..a0756f006f5f
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_sseu.c
@@ -0,0 +1,159 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include "i915_drv.h"
+#include "intel_lrc_reg.h"
+#include "intel_sseu.h"
+
+unsigned int
+intel_sseu_subslice_total(const struct sseu_dev_info *sseu)
+{
+	unsigned int i, total = 0;
+
+	for (i = 0; i < ARRAY_SIZE(sseu->subslice_mask); i++)
+		total += hweight8(sseu->subslice_mask[i]);
+
+	return total;
+}
+
+unsigned int
+intel_sseu_subslices_per_slice(const struct sseu_dev_info *sseu, u8 slice)
+{
+	return hweight8(sseu->subslice_mask[slice]);
+}
+
+u32 intel_sseu_make_rpcs(struct drm_i915_private *i915,
+			 const struct intel_sseu *req_sseu)
+{
+	const struct sseu_dev_info *sseu = &RUNTIME_INFO(i915)->sseu;
+	bool subslice_pg = sseu->has_subslice_pg;
+	struct intel_sseu ctx_sseu;
+	u8 slices, subslices;
+	u32 rpcs = 0;
+
+	/*
+	 * No explicit RPCS request is needed to ensure full
+	 * slice/subslice/EU enablement prior to Gen9.
+	 */
+	if (INTEL_GEN(i915) < 9)
+		return 0;
+
+	/*
+	 * If i915/perf is active, we want a stable powergating configuration
+	 * on the system.
+	 *
+	 * We could choose full enablement, but on ICL we know there are use
+	 * cases which disable slices for functional, apart for performance
+	 * reasons. So in this case we select a known stable subset.
+	 */
+	if (!i915->perf.oa.exclusive_stream) {
+		ctx_sseu = *req_sseu;
+	} else {
+		ctx_sseu = intel_sseu_from_device_info(sseu);
+
+		if (IS_GEN(i915, 11)) {
+			/*
+			 * We only need subslice count so it doesn't matter
+			 * which ones we select - just turn off low bits in the
+			 * amount of half of all available subslices per slice.
+			 */
+			ctx_sseu.subslice_mask =
+				~(~0 << (hweight8(ctx_sseu.subslice_mask) / 2));
+			ctx_sseu.slice_mask = 0x1;
+		}
+	}
+
+	slices = hweight8(ctx_sseu.slice_mask);
+	subslices = hweight8(ctx_sseu.subslice_mask);
+
+	/*
+	 * Since the SScount bitfield in GEN8_R_PWR_CLK_STATE is only three bits
+	 * wide and Icelake has up to eight subslices, specfial programming is
+	 * needed in order to correctly enable all subslices.
+	 *
+	 * According to documentation software must consider the configuration
+	 * as 2x4x8 and hardware will translate this to 1x8x8.
+	 *
+	 * Furthemore, even though SScount is three bits, maximum documented
+	 * value for it is four. From this some rules/restrictions follow:
+	 *
+	 * 1.
+	 * If enabled subslice count is greater than four, two whole slices must
+	 * be enabled instead.
+	 *
+	 * 2.
+	 * When more than one slice is enabled, hardware ignores the subslice
+	 * count altogether.
+	 *
+	 * From these restrictions it follows that it is not possible to enable
+	 * a count of subslices between the SScount maximum of four restriction,
+	 * and the maximum available number on a particular SKU. Either all
+	 * subslices are enabled, or a count between one and four on the first
+	 * slice.
+	 */
+	if (IS_GEN(i915, 11) &&
+	    slices == 1 &&
+	    subslices > min_t(u8, 4, hweight8(sseu->subslice_mask[0]) / 2)) {
+		GEM_BUG_ON(subslices & 1);
+
+		subslice_pg = false;
+		slices *= 2;
+	}
+
+	/*
+	 * Starting in Gen9, render power gating can leave
+	 * slice/subslice/EU in a partially enabled state. We
+	 * must make an explicit request through RPCS for full
+	 * enablement.
+	 */
+	if (sseu->has_slice_pg) {
+		u32 mask, val = slices;
+
+		if (INTEL_GEN(i915) >= 11) {
+			mask = GEN11_RPCS_S_CNT_MASK;
+			val <<= GEN11_RPCS_S_CNT_SHIFT;
+		} else {
+			mask = GEN8_RPCS_S_CNT_MASK;
+			val <<= GEN8_RPCS_S_CNT_SHIFT;
+		}
+
+		GEM_BUG_ON(val & ~mask);
+		val &= mask;
+
+		rpcs |= GEN8_RPCS_ENABLE | GEN8_RPCS_S_CNT_ENABLE | val;
+	}
+
+	if (subslice_pg) {
+		u32 val = subslices;
+
+		val <<= GEN8_RPCS_SS_CNT_SHIFT;
+
+		GEM_BUG_ON(val & ~GEN8_RPCS_SS_CNT_MASK);
+		val &= GEN8_RPCS_SS_CNT_MASK;
+
+		rpcs |= GEN8_RPCS_ENABLE | GEN8_RPCS_SS_CNT_ENABLE | val;
+	}
+
+	if (sseu->has_eu_pg) {
+		u32 val;
+
+		val = ctx_sseu.min_eus_per_subslice << GEN8_RPCS_EU_MIN_SHIFT;
+		GEM_BUG_ON(val & ~GEN8_RPCS_EU_MIN_MASK);
+		val &= GEN8_RPCS_EU_MIN_MASK;
+
+		rpcs |= val;
+
+		val = ctx_sseu.max_eus_per_subslice << GEN8_RPCS_EU_MAX_SHIFT;
+		GEM_BUG_ON(val & ~GEN8_RPCS_EU_MAX_MASK);
+		val &= GEN8_RPCS_EU_MAX_MASK;
+
+		rpcs |= val;
+
+		rpcs |= GEN8_RPCS_ENABLE;
+	}
+
+	return rpcs;
+}
diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.h b/drivers/gpu/drm/i915/gt/intel_sseu.h
new file mode 100644
index 000000000000..b50d0401a4e2
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_sseu.h
@@ -0,0 +1,75 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef __INTEL_SSEU_H__
+#define __INTEL_SSEU_H__
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+
+struct drm_i915_private;
+
+#define GEN_MAX_SLICES		(6) /* CNL upper bound */
+#define GEN_MAX_SUBSLICES	(8) /* ICL upper bound */
+#define GEN_SSEU_STRIDE(max_entries) DIV_ROUND_UP(max_entries, BITS_PER_BYTE)
+
+struct sseu_dev_info {
+	u8 slice_mask;
+	u8 subslice_mask[GEN_MAX_SLICES];
+	u16 eu_total;
+	u8 eu_per_subslice;
+	u8 min_eu_in_pool;
+	/* For each slice, which subslice(s) has(have) 7 EUs (bitfield)? */
+	u8 subslice_7eu[3];
+	u8 has_slice_pg:1;
+	u8 has_subslice_pg:1;
+	u8 has_eu_pg:1;
+
+	/* Topology fields */
+	u8 max_slices;
+	u8 max_subslices;
+	u8 max_eus_per_subslice;
+
+	/* We don't have more than 8 eus per subslice at the moment and as we
+	 * store eus enabled using bits, no need to multiply by eus per
+	 * subslice.
+	 */
+	u8 eu_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICES];
+};
+
+/*
+ * Powergating configuration for a particular (context,engine).
+ */
+struct intel_sseu {
+	u8 slice_mask;
+	u8 subslice_mask;
+	u8 min_eus_per_subslice;
+	u8 max_eus_per_subslice;
+};
+
+static inline struct intel_sseu
+intel_sseu_from_device_info(const struct sseu_dev_info *sseu)
+{
+	struct intel_sseu value = {
+		.slice_mask = sseu->slice_mask,
+		.subslice_mask = sseu->subslice_mask[0],
+		.min_eus_per_subslice = sseu->max_eus_per_subslice,
+		.max_eus_per_subslice = sseu->max_eus_per_subslice,
+	};
+
+	return value;
+}
+
+unsigned int
+intel_sseu_subslice_total(const struct sseu_dev_info *sseu);
+
+unsigned int
+intel_sseu_subslices_per_slice(const struct sseu_dev_info *sseu, u8 slice);
+
+u32 intel_sseu_make_rpcs(struct drm_i915_private *i915,
+			 const struct intel_sseu *req_sseu);
+
+#endif /* __INTEL_SSEU_H__ */
diff --git a/drivers/gpu/drm/i915/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index 841b8e515f4d..15e90fd2cfdc 100644
--- a/drivers/gpu/drm/i915/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -5,6 +5,7 @@
  */
 
 #include "i915_drv.h"
+#include "intel_context.h"
 #include "intel_workarounds.h"
 
 /**
@@ -122,6 +123,7 @@ static void _wa_add(struct i915_wa_list *wal, const struct i915_wa *wa)
 			wal->wa_count++;
 			wa_->val |= wa->val;
 			wa_->mask |= wa->mask;
+			wa_->read |= wa->read;
 			return;
 		}
 	}
@@ -146,9 +148,10 @@ wa_write_masked_or(struct i915_wa_list *wal, i915_reg_t reg, u32 mask,
 		   u32 val)
 {
 	struct i915_wa wa = {
-		.reg = reg,
+		.reg  = reg,
 		.mask = mask,
-		.val = val
+		.val  = val,
+		.read = mask,
 	};
 
 	_wa_add(wal, &wa);
@@ -172,6 +175,19 @@ wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
 	wa_write_masked_or(wal, reg, val, val);
 }
 
+static void
+ignore_wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 mask, u32 val)
+{
+	struct i915_wa wa = {
+		.reg  = reg,
+		.mask = mask,
+		.val  = val,
+		/* Bonkers HW, skip verifying */
+	};
+
+	_wa_add(wal, &wa);
+}
+
 #define WA_SET_BIT_MASKED(addr, mask) \
 	wa_write_masked_or(wal, (addr), (mask), _MASKED_BIT_ENABLE(mask))
 
@@ -181,10 +197,9 @@ wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
 #define WA_SET_FIELD_MASKED(addr, mask, value) \
 	wa_write_masked_or(wal, (addr), (mask), _MASKED_FIELD((mask), (value)))
 
-static void gen8_ctx_workarounds_init(struct intel_engine_cs *engine)
+static void gen8_ctx_workarounds_init(struct intel_engine_cs *engine,
+				      struct i915_wa_list *wal)
 {
-	struct i915_wa_list *wal = &engine->ctx_wa_list;
-
 	WA_SET_BIT_MASKED(INSTPM, INSTPM_FORCE_ORDERING);
 
 	/* WaDisableAsyncFlipPerfMode:bdw,chv */
@@ -230,12 +245,12 @@ static void gen8_ctx_workarounds_init(struct intel_engine_cs *engine)
 			    GEN6_WIZ_HASHING_16x4);
 }
 
-static void bdw_ctx_workarounds_init(struct intel_engine_cs *engine)
+static void bdw_ctx_workarounds_init(struct intel_engine_cs *engine,
+				     struct i915_wa_list *wal)
 {
 	struct drm_i915_private *i915 = engine->i915;
-	struct i915_wa_list *wal = &engine->ctx_wa_list;
 
-	gen8_ctx_workarounds_init(engine);
+	gen8_ctx_workarounds_init(engine, wal);
 
 	/* WaDisableThreadStallDopClockGating:bdw (pre-production) */
 	WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
@@ -258,11 +273,10 @@ static void bdw_ctx_workarounds_init(struct intel_engine_cs *engine)
 			  (IS_BDW_GT3(i915) ? HDC_FENCE_DEST_SLM_DISABLE : 0));
 }
 
-static void chv_ctx_workarounds_init(struct intel_engine_cs *engine)
+static void chv_ctx_workarounds_init(struct intel_engine_cs *engine,
+				     struct i915_wa_list *wal)
 {
-	struct i915_wa_list *wal = &engine->ctx_wa_list;
-
-	gen8_ctx_workarounds_init(engine);
+	gen8_ctx_workarounds_init(engine, wal);
 
 	/* WaDisableThreadStallDopClockGating:chv */
 	WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
@@ -271,10 +285,10 @@ static void chv_ctx_workarounds_init(struct intel_engine_cs *engine)
 	WA_SET_BIT_MASKED(HIZ_CHICKEN, CHV_HZ_8X8_MODE_IN_1X);
 }
 
-static void gen9_ctx_workarounds_init(struct intel_engine_cs *engine)
+static void gen9_ctx_workarounds_init(struct intel_engine_cs *engine,
+				      struct i915_wa_list *wal)
 {
 	struct drm_i915_private *i915 = engine->i915;
-	struct i915_wa_list *wal = &engine->ctx_wa_list;
 
 	if (HAS_LLC(i915)) {
 		/* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl
@@ -369,10 +383,10 @@ static void gen9_ctx_workarounds_init(struct intel_engine_cs *engine)
 		WA_SET_BIT_MASKED(GEN9_WM_CHICKEN3, GEN9_FACTOR_IN_CLR_VAL_HIZ);
 }
 
-static void skl_tune_iz_hashing(struct intel_engine_cs *engine)
+static void skl_tune_iz_hashing(struct intel_engine_cs *engine,
+				struct i915_wa_list *wal)
 {
 	struct drm_i915_private *i915 = engine->i915;
-	struct i915_wa_list *wal = &engine->ctx_wa_list;
 	u8 vals[3] = { 0, 0, 0 };
 	unsigned int i;
 
@@ -409,17 +423,17 @@ static void skl_tune_iz_hashing(struct intel_engine_cs *engine)
 			    GEN9_IZ_HASHING(0, vals[0]));
 }
 
-static void skl_ctx_workarounds_init(struct intel_engine_cs *engine)
+static void skl_ctx_workarounds_init(struct intel_engine_cs *engine,
+				     struct i915_wa_list *wal)
 {
-	gen9_ctx_workarounds_init(engine);
-	skl_tune_iz_hashing(engine);
+	gen9_ctx_workarounds_init(engine, wal);
+	skl_tune_iz_hashing(engine, wal);
 }
 
-static void bxt_ctx_workarounds_init(struct intel_engine_cs *engine)
+static void bxt_ctx_workarounds_init(struct intel_engine_cs *engine,
+				     struct i915_wa_list *wal)
 {
-	struct i915_wa_list *wal = &engine->ctx_wa_list;
-
-	gen9_ctx_workarounds_init(engine);
+	gen9_ctx_workarounds_init(engine, wal);
 
 	/* WaDisableThreadStallDopClockGating:bxt */
 	WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
@@ -430,12 +444,12 @@ static void bxt_ctx_workarounds_init(struct intel_engine_cs *engine)
 			  GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
 }
 
-static void kbl_ctx_workarounds_init(struct intel_engine_cs *engine)
+static void kbl_ctx_workarounds_init(struct intel_engine_cs *engine,
+				     struct i915_wa_list *wal)
 {
 	struct drm_i915_private *i915 = engine->i915;
-	struct i915_wa_list *wal = &engine->ctx_wa_list;
 
-	gen9_ctx_workarounds_init(engine);
+	gen9_ctx_workarounds_init(engine, wal);
 
 	/* WaToEnableHwFixForPushConstHWBug:kbl */
 	if (IS_KBL_REVID(i915, KBL_REVID_C0, REVID_FOREVER))
@@ -447,22 +461,20 @@ static void kbl_ctx_workarounds_init(struct intel_engine_cs *engine)
 			  GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
 }
 
-static void glk_ctx_workarounds_init(struct intel_engine_cs *engine)
+static void glk_ctx_workarounds_init(struct intel_engine_cs *engine,
+				     struct i915_wa_list *wal)
 {
-	struct i915_wa_list *wal = &engine->ctx_wa_list;
-
-	gen9_ctx_workarounds_init(engine);
+	gen9_ctx_workarounds_init(engine, wal);
 
 	/* WaToEnableHwFixForPushConstHWBug:glk */
 	WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
 			  GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
 }
 
-static void cfl_ctx_workarounds_init(struct intel_engine_cs *engine)
+static void cfl_ctx_workarounds_init(struct intel_engine_cs *engine,
+				     struct i915_wa_list *wal)
 {
-	struct i915_wa_list *wal = &engine->ctx_wa_list;
-
-	gen9_ctx_workarounds_init(engine);
+	gen9_ctx_workarounds_init(engine, wal);
 
 	/* WaToEnableHwFixForPushConstHWBug:cfl */
 	WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
@@ -473,10 +485,10 @@ static void cfl_ctx_workarounds_init(struct intel_engine_cs *engine)
 			  GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
 }
 
-static void cnl_ctx_workarounds_init(struct intel_engine_cs *engine)
+static void cnl_ctx_workarounds_init(struct intel_engine_cs *engine,
+				     struct i915_wa_list *wal)
 {
 	struct drm_i915_private *i915 = engine->i915;
-	struct i915_wa_list *wal = &engine->ctx_wa_list;
 
 	/* WaForceContextSaveRestoreNonCoherent:cnl */
 	WA_SET_BIT_MASKED(CNL_HDC_CHICKEN0,
@@ -513,10 +525,16 @@ static void cnl_ctx_workarounds_init(struct intel_engine_cs *engine)
 	WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, DISABLE_EARLY_EOT);
 }
 
-static void icl_ctx_workarounds_init(struct intel_engine_cs *engine)
+static void icl_ctx_workarounds_init(struct intel_engine_cs *engine,
+				     struct i915_wa_list *wal)
 {
 	struct drm_i915_private *i915 = engine->i915;
-	struct i915_wa_list *wal = &engine->ctx_wa_list;
+
+	/* WaDisableBankHangMode:icl */
+	wa_write(wal,
+		 GEN8_L3CNTLREG,
+		 intel_uncore_read(engine->uncore, GEN8_L3CNTLREG) |
+		 GEN8_ERRDETBCTRL);
 
 	/* WaDisableBankHangMode:icl */
 	wa_write(wal,
@@ -562,33 +580,42 @@ static void icl_ctx_workarounds_init(struct intel_engine_cs *engine)
 	WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1,
 			    GEN9_PREEMPT_GPGPU_LEVEL_MASK,
 			    GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL);
+
+	/* allow headerless messages for preemptible GPGPU context */
+	WA_SET_BIT_MASKED(GEN10_SAMPLER_MODE,
+			  GEN11_SAMPLER_ENABLE_HEADLESS_MSG);
 }
 
-void intel_engine_init_ctx_wa(struct intel_engine_cs *engine)
+static void
+__intel_engine_init_ctx_wa(struct intel_engine_cs *engine,
+			   struct i915_wa_list *wal,
+			   const char *name)
 {
 	struct drm_i915_private *i915 = engine->i915;
-	struct i915_wa_list *wal = &engine->ctx_wa_list;
 
-	wa_init_start(wal, "context");
+	if (engine->class != RENDER_CLASS)
+		return;
+
+	wa_init_start(wal, name);
 
 	if (IS_GEN(i915, 11))
-		icl_ctx_workarounds_init(engine);
+		icl_ctx_workarounds_init(engine, wal);
 	else if (IS_CANNONLAKE(i915))
-		cnl_ctx_workarounds_init(engine);
+		cnl_ctx_workarounds_init(engine, wal);
 	else if (IS_COFFEELAKE(i915))
-		cfl_ctx_workarounds_init(engine);
+		cfl_ctx_workarounds_init(engine, wal);
 	else if (IS_GEMINILAKE(i915))
-		glk_ctx_workarounds_init(engine);
+		glk_ctx_workarounds_init(engine, wal);
 	else if (IS_KABYLAKE(i915))
-		kbl_ctx_workarounds_init(engine);
+		kbl_ctx_workarounds_init(engine, wal);
 	else if (IS_BROXTON(i915))
-		bxt_ctx_workarounds_init(engine);
+		bxt_ctx_workarounds_init(engine, wal);
 	else if (IS_SKYLAKE(i915))
-		skl_ctx_workarounds_init(engine);
+		skl_ctx_workarounds_init(engine, wal);
 	else if (IS_CHERRYVIEW(i915))
-		chv_ctx_workarounds_init(engine);
+		chv_ctx_workarounds_init(engine, wal);
 	else if (IS_BROADWELL(i915))
-		bdw_ctx_workarounds_init(engine);
+		bdw_ctx_workarounds_init(engine, wal);
 	else if (INTEL_GEN(i915) < 8)
 		return;
 	else
@@ -597,6 +624,11 @@ void intel_engine_init_ctx_wa(struct intel_engine_cs *engine)
 	wa_init_finish(wal);
 }
 
+void intel_engine_init_ctx_wa(struct intel_engine_cs *engine)
+{
+	__intel_engine_init_ctx_wa(engine, &engine->ctx_wa_list, "context");
+}
+
 int intel_engine_emit_ctx_wa(struct i915_request *rq)
 {
 	struct i915_wa_list *wal = &rq->engine->ctx_wa_list;
@@ -915,6 +947,21 @@ wal_get_fw_for_rmw(struct intel_uncore *uncore, const struct i915_wa_list *wal)
 	return fw;
 }
 
+static bool
+wa_verify(const struct i915_wa *wa, u32 cur, const char *name, const char *from)
+{
+	if ((cur ^ wa->val) & wa->read) {
+		DRM_ERROR("%s workaround lost on %s! (%x=%x/%x, expected %x, mask=%x)\n",
+			  name, from, i915_mmio_reg_offset(wa->reg),
+			  cur, cur & wa->read,
+			  wa->val, wa->mask);
+
+		return false;
+	}
+
+	return true;
+}
+
 static void
 wa_list_apply(struct intel_uncore *uncore, const struct i915_wa_list *wal)
 {
@@ -933,6 +980,10 @@ wa_list_apply(struct intel_uncore *uncore, const struct i915_wa_list *wal)
 
 	for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
 		intel_uncore_rmw_fw(uncore, wa->reg, wa->mask, wa->val);
+		if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
+			wa_verify(wa,
+				  intel_uncore_read_fw(uncore, wa->reg),
+				  wal->name, "application");
 	}
 
 	intel_uncore_forcewake_put__locked(uncore, fw);
@@ -944,20 +995,6 @@ void intel_gt_apply_workarounds(struct drm_i915_private *i915)
 	wa_list_apply(&i915->uncore, &i915->gt_wa_list);
 }
 
-static bool
-wa_verify(const struct i915_wa *wa, u32 cur, const char *name, const char *from)
-{
-	if ((cur ^ wa->val) & wa->mask) {
-		DRM_ERROR("%s workaround lost on %s! (%x=%x/%x, expected %x, mask=%x)\n",
-			  name, from, i915_mmio_reg_offset(wa->reg), cur,
-			  cur & wa->mask, wa->val, wa->mask);
-
-		return false;
-	}
-
-	return true;
-}
-
 static bool wa_list_verify(struct intel_uncore *uncore,
 			   const struct i915_wa_list *wal,
 			   const char *from)
@@ -981,7 +1018,7 @@ bool intel_gt_verify_workarounds(struct drm_i915_private *i915,
 }
 
 static void
-whitelist_reg(struct i915_wa_list *wal, i915_reg_t reg)
+whitelist_reg_ext(struct i915_wa_list *wal, i915_reg_t reg, u32 flags)
 {
 	struct i915_wa wa = {
 		.reg = reg
@@ -990,9 +1027,16 @@ whitelist_reg(struct i915_wa_list *wal, i915_reg_t reg)
 	if (GEM_DEBUG_WARN_ON(wal->count >= RING_MAX_NONPRIV_SLOTS))
 		return;
 
+	wa.reg.reg |= flags;
 	_wa_add(wal, &wa);
 }
 
+static void
+whitelist_reg(struct i915_wa_list *wal, i915_reg_t reg)
+{
+	whitelist_reg_ext(wal, reg, RING_FORCE_TO_NONPRIV_RW);
+}
+
 static void gen9_whitelist_build(struct i915_wa_list *w)
 {
 	/* WaVFEStateAfterPipeControlwithMediaStateClear:skl,bxt,glk,cfl */
@@ -1005,56 +1049,103 @@ static void gen9_whitelist_build(struct i915_wa_list *w)
 	whitelist_reg(w, GEN8_HDC_CHICKEN1);
 }
 
-static void skl_whitelist_build(struct i915_wa_list *w)
+static void skl_whitelist_build(struct intel_engine_cs *engine)
 {
+	struct i915_wa_list *w = &engine->whitelist;
+
+	if (engine->class != RENDER_CLASS)
+		return;
+
 	gen9_whitelist_build(w);
 
 	/* WaDisableLSQCROPERFforOCL:skl */
 	whitelist_reg(w, GEN8_L3SQCREG4);
 }
 
-static void bxt_whitelist_build(struct i915_wa_list *w)
+static void bxt_whitelist_build(struct intel_engine_cs *engine)
 {
-	gen9_whitelist_build(w);
+	if (engine->class != RENDER_CLASS)
+		return;
+
+	gen9_whitelist_build(&engine->whitelist);
 }
 
-static void kbl_whitelist_build(struct i915_wa_list *w)
+static void kbl_whitelist_build(struct intel_engine_cs *engine)
 {
+	struct i915_wa_list *w = &engine->whitelist;
+
+	if (engine->class != RENDER_CLASS)
+		return;
+
 	gen9_whitelist_build(w);
 
 	/* WaDisableLSQCROPERFforOCL:kbl */
 	whitelist_reg(w, GEN8_L3SQCREG4);
 }
 
-static void glk_whitelist_build(struct i915_wa_list *w)
+static void glk_whitelist_build(struct intel_engine_cs *engine)
 {
+	struct i915_wa_list *w = &engine->whitelist;
+
+	if (engine->class != RENDER_CLASS)
+		return;
+
 	gen9_whitelist_build(w);
 
 	/* WA #0862: Userspace has to set "Barrier Mode" to avoid hangs. */
 	whitelist_reg(w, GEN9_SLICE_COMMON_ECO_CHICKEN1);
 }
 
-static void cfl_whitelist_build(struct i915_wa_list *w)
+static void cfl_whitelist_build(struct intel_engine_cs *engine)
 {
-	gen9_whitelist_build(w);
+	if (engine->class != RENDER_CLASS)
+		return;
+
+	gen9_whitelist_build(&engine->whitelist);
 }
 
-static void cnl_whitelist_build(struct i915_wa_list *w)
+static void cnl_whitelist_build(struct intel_engine_cs *engine)
 {
+	struct i915_wa_list *w = &engine->whitelist;
+
+	if (engine->class != RENDER_CLASS)
+		return;
+
 	/* WaEnablePreemptionGranularityControlByUMD:cnl */
 	whitelist_reg(w, GEN8_CS_CHICKEN1);
 }
 
-static void icl_whitelist_build(struct i915_wa_list *w)
+static void icl_whitelist_build(struct intel_engine_cs *engine)
 {
-	/* WaAllowUMDToModifyHalfSliceChicken7:icl */
-	whitelist_reg(w, GEN9_HALF_SLICE_CHICKEN7);
-
-	/* WaAllowUMDToModifySamplerMode:icl */
-	whitelist_reg(w, GEN10_SAMPLER_MODE);
+	struct i915_wa_list *w = &engine->whitelist;
 
-	/* WaEnableStateCacheRedirectToCS:icl */
-	whitelist_reg(w, GEN9_SLICE_COMMON_ECO_CHICKEN1);
+	switch (engine->class) {
+	case RENDER_CLASS:
+		/* WaAllowUMDToModifyHalfSliceChicken7:icl */
+		whitelist_reg(w, GEN9_HALF_SLICE_CHICKEN7);
+
+		/* WaAllowUMDToModifySamplerMode:icl */
+		whitelist_reg(w, GEN10_SAMPLER_MODE);
+
+		/* WaEnableStateCacheRedirectToCS:icl */
+		whitelist_reg(w, GEN9_SLICE_COMMON_ECO_CHICKEN1);
+		break;
+
+	case VIDEO_DECODE_CLASS:
+		/* hucStatusRegOffset */
+		whitelist_reg_ext(w, _MMIO(0x2000 + engine->mmio_base),
+				  RING_FORCE_TO_NONPRIV_RD);
+		/* hucUKernelHdrInfoRegOffset */
+		whitelist_reg_ext(w, _MMIO(0x2014 + engine->mmio_base),
+				  RING_FORCE_TO_NONPRIV_RD);
+		/* hucStatus2RegOffset */
+		whitelist_reg_ext(w, _MMIO(0x23B0 + engine->mmio_base),
+				  RING_FORCE_TO_NONPRIV_RD);
+		break;
+
+	default:
+		break;
+	}
 }
 
 void intel_engine_init_whitelist(struct intel_engine_cs *engine)
@@ -1062,24 +1153,22 @@ void intel_engine_init_whitelist(struct intel_engine_cs *engine)
 	struct drm_i915_private *i915 = engine->i915;
 	struct i915_wa_list *w = &engine->whitelist;
 
-	GEM_BUG_ON(engine->id != RCS0);
-
 	wa_init_start(w, "whitelist");
 
 	if (IS_GEN(i915, 11))
-		icl_whitelist_build(w);
+		icl_whitelist_build(engine);
 	else if (IS_CANNONLAKE(i915))
-		cnl_whitelist_build(w);
+		cnl_whitelist_build(engine);
 	else if (IS_COFFEELAKE(i915))
-		cfl_whitelist_build(w);
+		cfl_whitelist_build(engine);
 	else if (IS_GEMINILAKE(i915))
-		glk_whitelist_build(w);
+		glk_whitelist_build(engine);
 	else if (IS_KABYLAKE(i915))
-		kbl_whitelist_build(w);
+		kbl_whitelist_build(engine);
 	else if (IS_BROXTON(i915))
-		bxt_whitelist_build(w);
+		bxt_whitelist_build(engine);
 	else if (IS_SKYLAKE(i915))
-		skl_whitelist_build(w);
+		skl_whitelist_build(engine);
 	else if (INTEL_GEN(i915) <= 8)
 		return;
 	else
@@ -1123,9 +1212,10 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
 			     _3D_CHICKEN3_AA_LINE_QUALITY_FIX_ENABLE);
 
 		/* WaPipelineFlushCoherentLines:icl */
-		wa_write_or(wal,
-			    GEN8_L3SQCREG4,
-			    GEN8_LQSC_FLUSH_COHERENT_LINES);
+		ignore_wa_write_or(wal,
+				   GEN8_L3SQCREG4,
+				   GEN8_LQSC_FLUSH_COHERENT_LINES,
+				   GEN8_LQSC_FLUSH_COHERENT_LINES);
 
 		/*
 		 * Wa_1405543622:icl
@@ -1152,9 +1242,10 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
 		 * Wa_1405733216:icl
 		 * Formerly known as WaDisableCleanEvicts
 		 */
-		wa_write_or(wal,
-			    GEN8_L3SQCREG4,
-			    GEN11_LQSC_CLEAN_EVICT_DISABLE);
+		ignore_wa_write_or(wal,
+				   GEN8_L3SQCREG4,
+				   GEN11_LQSC_CLEAN_EVICT_DISABLE,
+				   GEN11_LQSC_CLEAN_EVICT_DISABLE);
 
 		/* WaForwardProgressSoftReset:icl */
 		wa_write_or(wal,
@@ -1260,6 +1351,128 @@ void intel_engine_apply_workarounds(struct intel_engine_cs *engine)
 	wa_list_apply(engine->uncore, &engine->wa_list);
 }
 
+static struct i915_vma *
+create_scratch(struct i915_address_space *vm, int count)
+{
+	struct drm_i915_gem_object *obj;
+	struct i915_vma *vma;
+	unsigned int size;
+	int err;
+
+	size = round_up(count * sizeof(u32), PAGE_SIZE);
+	obj = i915_gem_object_create_internal(vm->i915, size);
+	if (IS_ERR(obj))
+		return ERR_CAST(obj);
+
+	i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC);
+
+	vma = i915_vma_instance(obj, vm, NULL);
+	if (IS_ERR(vma)) {
+		err = PTR_ERR(vma);
+		goto err_obj;
+	}
+
+	err = i915_vma_pin(vma, 0, 0,
+			   i915_vma_is_ggtt(vma) ? PIN_GLOBAL : PIN_USER);
+	if (err)
+		goto err_obj;
+
+	return vma;
+
+err_obj:
+	i915_gem_object_put(obj);
+	return ERR_PTR(err);
+}
+
+static int
+wa_list_srm(struct i915_request *rq,
+	    const struct i915_wa_list *wal,
+	    struct i915_vma *vma)
+{
+	const struct i915_wa *wa;
+	unsigned int i;
+	u32 srm, *cs;
+
+	srm = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
+	if (INTEL_GEN(rq->i915) >= 8)
+		srm++;
+
+	cs = intel_ring_begin(rq, 4 * wal->count);
+	if (IS_ERR(cs))
+		return PTR_ERR(cs);
+
+	for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
+		*cs++ = srm;
+		*cs++ = i915_mmio_reg_offset(wa->reg);
+		*cs++ = i915_ggtt_offset(vma) + sizeof(u32) * i;
+		*cs++ = 0;
+	}
+	intel_ring_advance(rq, cs);
+
+	return 0;
+}
+
+static int engine_wa_list_verify(struct intel_context *ce,
+				 const struct i915_wa_list * const wal,
+				 const char *from)
+{
+	const struct i915_wa *wa;
+	struct i915_request *rq;
+	struct i915_vma *vma;
+	unsigned int i;
+	u32 *results;
+	int err;
+
+	if (!wal->count)
+		return 0;
+
+	vma = create_scratch(&ce->engine->i915->ggtt.vm, wal->count);
+	if (IS_ERR(vma))
+		return PTR_ERR(vma);
+
+	rq = intel_context_create_request(ce);
+	if (IS_ERR(rq)) {
+		err = PTR_ERR(rq);
+		goto err_vma;
+	}
+
+	err = wa_list_srm(rq, wal, vma);
+	if (err)
+		goto err_vma;
+
+	i915_request_add(rq);
+	if (i915_request_wait(rq, 0, HZ / 5) < 0) {
+		err = -ETIME;
+		goto err_vma;
+	}
+
+	results = i915_gem_object_pin_map(vma->obj, I915_MAP_WB);
+	if (IS_ERR(results)) {
+		err = PTR_ERR(results);
+		goto err_vma;
+	}
+
+	err = 0;
+	for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
+		if (!wa_verify(wa, results[i], wal->name, from))
+			err = -ENXIO;
+
+	i915_gem_object_unpin_map(vma->obj);
+
+err_vma:
+	i915_vma_unpin(vma);
+	i915_vma_put(vma);
+	return err;
+}
+
+int intel_engine_verify_workarounds(struct intel_engine_cs *engine,
+				    const char *from)
+{
+	return engine_wa_list_verify(engine->kernel_context,
+				     &engine->wa_list,
+				     from);
+}
+
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
-#include "selftests/intel_workarounds.c"
+#include "selftest_workarounds.c"
 #endif
diff --git a/drivers/gpu/drm/i915/intel_workarounds.h b/drivers/gpu/drm/i915/gt/intel_workarounds.h
index 34eee5ec511e..3761a6ee58bb 100644
--- a/drivers/gpu/drm/i915/intel_workarounds.h
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.h
@@ -4,13 +4,17 @@
  * Copyright © 2014-2018 Intel Corporation
  */
 
-#ifndef _I915_WORKAROUNDS_H_
-#define _I915_WORKAROUNDS_H_
+#ifndef _INTEL_WORKAROUNDS_H_
+#define _INTEL_WORKAROUNDS_H_
 
 #include <linux/slab.h>
 
 #include "intel_workarounds_types.h"
 
+struct drm_i915_private;
+struct i915_request;
+struct intel_engine_cs;
+
 static inline void intel_wa_list_free(struct i915_wa_list *wal)
 {
 	kfree(wal->list);
@@ -30,5 +34,7 @@ void intel_engine_apply_whitelist(struct intel_engine_cs *engine);
 
 void intel_engine_init_workarounds(struct intel_engine_cs *engine);
 void intel_engine_apply_workarounds(struct intel_engine_cs *engine);
+int intel_engine_verify_workarounds(struct intel_engine_cs *engine,
+				    const char *from);
 
 #endif
diff --git a/drivers/gpu/drm/i915/intel_workarounds_types.h b/drivers/gpu/drm/i915/gt/intel_workarounds_types.h
index 30918da180ff..42ac1fb99572 100644
--- a/drivers/gpu/drm/i915/intel_workarounds_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds_types.h
@@ -12,9 +12,10 @@
 #include "i915_reg.h"
 
 struct i915_wa {
-	i915_reg_t	  reg;
-	u32		  mask;
-	u32		  val;
+	i915_reg_t	reg;
+	u32		mask;
+	u32		val;
+	u32		read;
 };
 
 struct i915_wa_list {
diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c
index 61a8206ed677..086801b51441 100644
--- a/drivers/gpu/drm/i915/selftests/mock_engine.c
+++ b/drivers/gpu/drm/i915/gt/mock_engine.c
@@ -22,8 +22,14 @@
  *
  */
 
+#include "gem/i915_gem_context.h"
+
+#include "i915_drv.h"
+#include "intel_context.h"
+#include "intel_engine_pm.h"
+
 #include "mock_engine.h"
-#include "mock_request.h"
+#include "selftests/mock_request.h"
 
 struct mock_ring {
 	struct intel_ring base;
@@ -140,12 +146,18 @@ static void mock_context_destroy(struct kref *ref)
 
 static int mock_context_pin(struct intel_context *ce)
 {
+	int ret;
+
 	if (!ce->ring) {
 		ce->ring = mock_ring(ce->engine);
 		if (!ce->ring)
 			return -ENOMEM;
 	}
 
+	ret = intel_context_active_acquire(ce, PIN_HIGH);
+	if (ret)
+		return ret;
+
 	mock_timeline_pin(ce->ring->timeline);
 	return 0;
 }
@@ -154,6 +166,9 @@ static const struct intel_context_ops mock_context_ops = {
 	.pin = mock_context_pin,
 	.unpin = mock_context_unpin,
 
+	.enter = intel_context_enter_engine,
+	.exit = intel_context_exit_engine,
+
 	.destroy = mock_context_destroy,
 };
 
@@ -214,17 +229,17 @@ static void mock_cancel_requests(struct intel_engine_cs *engine)
 	struct i915_request *request;
 	unsigned long flags;
 
-	spin_lock_irqsave(&engine->timeline.lock, flags);
+	spin_lock_irqsave(&engine->active.lock, flags);
 
 	/* Mark all submitted requests as skipped. */
-	list_for_each_entry(request, &engine->timeline.requests, sched.link) {
+	list_for_each_entry(request, &engine->active.requests, sched.link) {
 		if (!i915_request_signaled(request))
 			dma_fence_set_error(&request->fence, -EIO);
 
 		i915_request_mark_complete(request);
 	}
 
-	spin_unlock_irqrestore(&engine->timeline.lock, flags);
+	spin_unlock_irqrestore(&engine->active.lock, flags);
 }
 
 struct intel_engine_cs *mock_engine(struct drm_i915_private *i915,
@@ -257,29 +272,39 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915,
 	engine->base.reset.finish = mock_reset_finish;
 	engine->base.cancel_requests = mock_cancel_requests;
 
-	if (i915_timeline_init(i915, &engine->base.timeline, NULL))
-		goto err_free;
-	i915_timeline_set_subclass(&engine->base.timeline, TIMELINE_ENGINE);
-
-	intel_engine_init_breadcrumbs(&engine->base);
-
 	/* fake hw queue */
 	spin_lock_init(&engine->hw_lock);
 	timer_setup(&engine->hw_delay, hw_delay_complete, 0);
 	INIT_LIST_HEAD(&engine->hw_queue);
 
-	if (pin_context(i915->kernel_context, &engine->base,
-			&engine->base.kernel_context))
+	return &engine->base;
+}
+
+int mock_engine_init(struct intel_engine_cs *engine)
+{
+	struct drm_i915_private *i915 = engine->i915;
+	int err;
+
+	intel_engine_init_active(engine, ENGINE_MOCK);
+	intel_engine_init_breadcrumbs(engine);
+	intel_engine_init_execlists(engine);
+	intel_engine_init__pm(engine);
+
+	engine->kernel_context =
+		i915_gem_context_get_engine(i915->kernel_context, engine->id);
+	if (IS_ERR(engine->kernel_context))
 		goto err_breadcrumbs;
 
-	return &engine->base;
+	err = intel_context_pin(engine->kernel_context);
+	intel_context_put(engine->kernel_context);
+	if (err)
+		goto err_breadcrumbs;
+
+	return 0;
 
 err_breadcrumbs:
-	intel_engine_fini_breadcrumbs(&engine->base);
-	i915_timeline_fini(&engine->base.timeline);
-err_free:
-	kfree(engine);
-	return NULL;
+	intel_engine_fini_breadcrumbs(engine);
+	return -ENOMEM;
 }
 
 void mock_engine_flush(struct intel_engine_cs *engine)
@@ -304,18 +329,12 @@ void mock_engine_free(struct intel_engine_cs *engine)
 {
 	struct mock_engine *mock =
 		container_of(engine, typeof(*mock), base);
-	struct intel_context *ce;
 
 	GEM_BUG_ON(timer_pending(&mock->hw_delay));
 
-	ce = fetch_and_zero(&engine->last_retired_context);
-	if (ce)
-		intel_context_unpin(ce);
-
 	intel_context_unpin(engine->kernel_context);
 
 	intel_engine_fini_breadcrumbs(engine);
-	i915_timeline_fini(&engine->timeline);
 
 	kfree(engine);
 }
diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.h b/drivers/gpu/drm/i915/gt/mock_engine.h
index b9cc3a245f16..3f9b698c49d2 100644
--- a/drivers/gpu/drm/i915/selftests/mock_engine.h
+++ b/drivers/gpu/drm/i915/gt/mock_engine.h
@@ -29,7 +29,7 @@
 #include <linux/spinlock.h>
 #include <linux/timer.h>
 
-#include "../intel_ringbuffer.h"
+#include "gt/intel_engine.h"
 
 struct mock_engine {
 	struct intel_engine_cs base;
@@ -42,6 +42,8 @@ struct mock_engine {
 struct intel_engine_cs *mock_engine(struct drm_i915_private *i915,
 				    const char *name,
 				    int id);
+int mock_engine_init(struct intel_engine_cs *engine);
+
 void mock_engine_flush(struct intel_engine_cs *engine);
 void mock_engine_reset(struct intel_engine_cs *engine);
 void mock_engine_free(struct intel_engine_cs *engine);
diff --git a/drivers/gpu/drm/i915/selftests/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/selftest_engine_cs.c
index cfaa6b296835..cfaa6b296835 100644
--- a/drivers/gpu/drm/i915/selftests/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/selftest_engine_cs.c
diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
index 050bd1e19e02..1ee4c923044f 100644
--- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
+++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
@@ -24,14 +24,20 @@
 
 #include <linux/kthread.h>
 
-#include "../i915_selftest.h"
-#include "i915_random.h"
-#include "igt_flush_test.h"
-#include "igt_reset.h"
-#include "igt_wedge_me.h"
+#include "gem/i915_gem_context.h"
+#include "intel_engine_pm.h"
 
-#include "mock_context.h"
-#include "mock_drm.h"
+#include "i915_selftest.h"
+#include "selftests/i915_random.h"
+#include "selftests/igt_flush_test.h"
+#include "selftests/igt_reset.h"
+#include "selftests/igt_wedge_me.h"
+#include "selftests/igt_atomic.h"
+
+#include "selftests/mock_drm.h"
+
+#include "gem/selftests/mock_context.h"
+#include "gem/selftests/igt_gem_utils.h"
 
 #define IGT_IDLE_TIMEOUT 50 /* ms; time to wait after flushing between tests */
 
@@ -111,24 +117,18 @@ static int move_to_active(struct i915_vma *vma,
 {
 	int err;
 
+	i915_vma_lock(vma);
 	err = i915_vma_move_to_active(vma, rq, flags);
-	if (err)
-		return err;
+	i915_vma_unlock(vma);
 
-	if (!i915_gem_object_has_active_reference(vma->obj)) {
-		i915_gem_object_get(vma->obj);
-		i915_gem_object_set_active_reference(vma->obj);
-	}
-
-	return 0;
+	return err;
 }
 
 static struct i915_request *
 hang_create_request(struct hang *h, struct intel_engine_cs *engine)
 {
 	struct drm_i915_private *i915 = h->i915;
-	struct i915_address_space *vm =
-		h->ctx->ppgtt ? &h->ctx->ppgtt->vm : &i915->ggtt.vm;
+	struct i915_address_space *vm = h->ctx->vm ?: &i915->ggtt.vm;
 	struct i915_request *rq = NULL;
 	struct i915_vma *hws, *vma;
 	unsigned int flags;
@@ -173,7 +173,7 @@ hang_create_request(struct hang *h, struct intel_engine_cs *engine)
 	if (err)
 		goto unpin_vma;
 
-	rq = i915_request_alloc(engine, h->ctx);
+	rq = igt_request_alloc(h->ctx, engine);
 	if (IS_ERR(rq)) {
 		err = PTR_ERR(rq);
 		goto unpin_hws;
@@ -339,8 +339,7 @@ static int igt_hang_sanitycheck(void *arg)
 
 		timeout = 0;
 		igt_wedge_on_timeout(&w, i915, HZ / 10 /* 100ms timeout*/)
-			timeout = i915_request_wait(rq,
-						    I915_WAIT_LOCKED,
+			timeout = i915_request_wait(rq, 0,
 						    MAX_SCHEDULE_TIMEOUT);
 		if (i915_reset_failed(i915))
 			timeout = -EIO;
@@ -362,54 +361,6 @@ unlock:
 	return err;
 }
 
-static int igt_global_reset(void *arg)
-{
-	struct drm_i915_private *i915 = arg;
-	unsigned int reset_count;
-	int err = 0;
-
-	/* Check that we can issue a global GPU reset */
-
-	igt_global_reset_lock(i915);
-
-	reset_count = i915_reset_count(&i915->gpu_error);
-
-	i915_reset(i915, ALL_ENGINES, NULL);
-
-	if (i915_reset_count(&i915->gpu_error) == reset_count) {
-		pr_err("No GPU reset recorded!\n");
-		err = -EINVAL;
-	}
-
-	igt_global_reset_unlock(i915);
-
-	if (i915_reset_failed(i915))
-		err = -EIO;
-
-	return err;
-}
-
-static int igt_wedged_reset(void *arg)
-{
-	struct drm_i915_private *i915 = arg;
-	intel_wakeref_t wakeref;
-
-	/* Check that we can recover a wedged device with a GPU reset */
-
-	igt_global_reset_lock(i915);
-	wakeref = intel_runtime_pm_get(i915);
-
-	i915_gem_set_wedged(i915);
-
-	GEM_BUG_ON(!i915_reset_failed(i915));
-	i915_reset(i915, ALL_ENGINES, NULL);
-
-	intel_runtime_pm_put(i915, wakeref);
-	igt_global_reset_unlock(i915);
-
-	return i915_reset_failed(i915) ? -EIO : 0;
-}
-
 static bool wait_for_idle(struct intel_engine_cs *engine)
 {
 	return wait_for(intel_engine_is_idle(engine), IGT_IDLE_TIMEOUT) == 0;
@@ -442,7 +393,7 @@ static int igt_reset_nop(void *arg)
 	}
 
 	i915_gem_context_clear_bannable(ctx);
-	wakeref = intel_runtime_pm_get(i915);
+	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
 	reset_count = i915_reset_count(&i915->gpu_error);
 	count = 0;
 	do {
@@ -453,7 +404,7 @@ static int igt_reset_nop(void *arg)
 			for (i = 0; i < 16; i++) {
 				struct i915_request *rq;
 
-				rq = i915_request_alloc(engine, ctx);
+				rq = igt_request_alloc(ctx, engine);
 				if (IS_ERR(rq)) {
 					err = PTR_ERR(rq);
 					break;
@@ -479,19 +430,6 @@ static int igt_reset_nop(void *arg)
 			break;
 		}
 
-		if (!i915_reset_flush(i915)) {
-			struct drm_printer p =
-				drm_info_printer(i915->drm.dev);
-
-			pr_err("%s failed to idle after reset\n",
-			       engine->name);
-			intel_engine_dump(engine, &p,
-					  "%s\n", engine->name);
-
-			err = -EIO;
-			break;
-		}
-
 		err = igt_flush_test(i915, 0);
 		if (err)
 			break;
@@ -502,7 +440,7 @@ static int igt_reset_nop(void *arg)
 	err = igt_flush_test(i915, I915_WAIT_LOCKED);
 	mutex_unlock(&i915->drm.struct_mutex);
 
-	intel_runtime_pm_put(i915, wakeref);
+	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
 
 out:
 	mock_file_free(i915, file);
@@ -539,7 +477,7 @@ static int igt_reset_nop_engine(void *arg)
 	}
 
 	i915_gem_context_clear_bannable(ctx);
-	wakeref = intel_runtime_pm_get(i915);
+	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
 	for_each_engine(engine, i915, id) {
 		unsigned int reset_count, reset_engine_count;
 		unsigned int count;
@@ -565,7 +503,7 @@ static int igt_reset_nop_engine(void *arg)
 			for (i = 0; i < 16; i++) {
 				struct i915_request *rq;
 
-				rq = i915_request_alloc(engine, ctx);
+				rq = igt_request_alloc(ctx, engine);
 				if (IS_ERR(rq)) {
 					err = PTR_ERR(rq);
 					break;
@@ -594,19 +532,6 @@ static int igt_reset_nop_engine(void *arg)
 				err = -EINVAL;
 				break;
 			}
-
-			if (!i915_reset_flush(i915)) {
-				struct drm_printer p =
-					drm_info_printer(i915->drm.dev);
-
-				pr_err("%s failed to idle after reset\n",
-				       engine->name);
-				intel_engine_dump(engine, &p,
-						  "%s\n", engine->name);
-
-				err = -EIO;
-				break;
-			}
 		} while (time_before(jiffies, end_time));
 		clear_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags);
 		pr_info("%s(%s): %d resets\n", __func__, engine->name, count);
@@ -623,7 +548,7 @@ static int igt_reset_nop_engine(void *arg)
 	err = igt_flush_test(i915, I915_WAIT_LOCKED);
 	mutex_unlock(&i915->drm.struct_mutex);
 
-	intel_runtime_pm_put(i915, wakeref);
+	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
 out:
 	mock_file_free(i915, file);
 	if (i915_reset_failed(i915))
@@ -669,6 +594,7 @@ static int __igt_reset_engine(struct drm_i915_private *i915, bool active)
 		reset_engine_count = i915_reset_engine_count(&i915->gpu_error,
 							     engine);
 
+		intel_engine_pm_get(engine);
 		set_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags);
 		do {
 			if (active) {
@@ -721,21 +647,9 @@ static int __igt_reset_engine(struct drm_i915_private *i915, bool active)
 				err = -EINVAL;
 				break;
 			}
-
-			if (!i915_reset_flush(i915)) {
-				struct drm_printer p =
-					drm_info_printer(i915->drm.dev);
-
-				pr_err("%s failed to idle after reset\n",
-				       engine->name);
-				intel_engine_dump(engine, &p,
-						  "%s\n", engine->name);
-
-				err = -EIO;
-				break;
-			}
 		} while (time_before(jiffies, end_time));
 		clear_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags);
+		intel_engine_pm_put(engine);
 
 		if (err)
 			break;
@@ -835,7 +749,7 @@ static int active_engine(void *data)
 		struct i915_request *new;
 
 		mutex_lock(&engine->i915->drm.struct_mutex);
-		new = i915_request_alloc(engine, ctx[idx]);
+		new = igt_request_alloc(ctx[idx], engine);
 		if (IS_ERR(new)) {
 			mutex_unlock(&engine->i915->drm.struct_mutex);
 			err = PTR_ERR(new);
@@ -942,6 +856,7 @@ static int __igt_reset_engines(struct drm_i915_private *i915,
 			get_task_struct(tsk);
 		}
 
+		intel_engine_pm_get(engine);
 		set_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags);
 		do {
 			struct i915_request *rq = NULL;
@@ -1018,6 +933,7 @@ static int __igt_reset_engines(struct drm_i915_private *i915,
 			}
 		} while (time_before(jiffies, end_time));
 		clear_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags);
+		intel_engine_pm_put(engine);
 		pr_info("i915_reset_engine(%s:%s): %lu resets\n",
 			engine->name, test_name, count);
 
@@ -1069,7 +985,9 @@ unwind:
 		if (err)
 			break;
 
-		err = igt_flush_test(i915, 0);
+		mutex_lock(&i915->drm.struct_mutex);
+		err = igt_flush_test(i915, I915_WAIT_LOCKED);
+		mutex_unlock(&i915->drm.struct_mutex);
 		if (err)
 			break;
 	}
@@ -1179,7 +1097,7 @@ static int igt_reset_wait(void *arg)
 
 	reset_count = fake_hangcheck(i915, ALL_ENGINES);
 
-	timeout = i915_request_wait(rq, I915_WAIT_LOCKED, 10);
+	timeout = i915_request_wait(rq, 0, 10);
 	if (timeout < 0) {
 		pr_err("i915_request_wait failed on a stuck request: err=%ld\n",
 		       timeout);
@@ -1327,7 +1245,9 @@ static int __igt_reset_evict_vma(struct drm_i915_private *i915,
 		}
 	}
 
+	i915_vma_lock(arg.vma);
 	err = i915_vma_move_to_active(arg.vma, rq, flags);
+	i915_vma_unlock(arg.vma);
 
 	if (flags & EXEC_OBJECT_NEEDS_FENCE)
 		i915_vma_unpin_fence(arg.vma);
@@ -1432,8 +1352,8 @@ static int igt_reset_evict_ppgtt(void *arg)
 	}
 
 	err = 0;
-	if (ctx->ppgtt) /* aliasing == global gtt locking, covered above */
-		err = __igt_reset_evict_vma(i915, &ctx->ppgtt->vm,
+	if (ctx->vm) /* aliasing == global gtt locking, covered above */
+		err = __igt_reset_evict_vma(i915, ctx->vm,
 					    evict_vma, EXEC_OBJECT_WRITE);
 
 out:
@@ -1681,44 +1601,8 @@ err_unlock:
 	return err;
 }
 
-static void __preempt_begin(void)
-{
-	preempt_disable();
-}
-
-static void __preempt_end(void)
-{
-	preempt_enable();
-}
-
-static void __softirq_begin(void)
-{
-	local_bh_disable();
-}
-
-static void __softirq_end(void)
-{
-	local_bh_enable();
-}
-
-static void __hardirq_begin(void)
-{
-	local_irq_disable();
-}
-
-static void __hardirq_end(void)
-{
-	local_irq_enable();
-}
-
-struct atomic_section {
-	const char *name;
-	void (*critical_section_begin)(void);
-	void (*critical_section_end)(void);
-};
-
 static int __igt_atomic_reset_engine(struct intel_engine_cs *engine,
-				     const struct atomic_section *p,
+				     const struct igt_atomic_section *p,
 				     const char *mode)
 {
 	struct tasklet_struct * const t = &engine->execlists.tasklet;
@@ -1743,7 +1627,7 @@ static int __igt_atomic_reset_engine(struct intel_engine_cs *engine,
 }
 
 static int igt_atomic_reset_engine(struct intel_engine_cs *engine,
-				   const struct atomic_section *p)
+				   const struct igt_atomic_section *p)
 {
 	struct drm_i915_private *i915 = engine->i915;
 	struct i915_request *rq;
@@ -1781,9 +1665,7 @@ static int igt_atomic_reset_engine(struct intel_engine_cs *engine,
 		struct igt_wedge_me w;
 
 		igt_wedge_on_timeout(&w, i915, HZ / 20 /* 50ms timeout*/)
-			i915_request_wait(rq,
-					  I915_WAIT_LOCKED,
-					  MAX_SCHEDULE_TIMEOUT);
+			i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT);
 		if (i915_reset_failed(i915))
 			err = -EIO;
 	}
@@ -1794,79 +1676,43 @@ out:
 	return err;
 }
 
-static void force_reset(struct drm_i915_private *i915)
+static int igt_reset_engines_atomic(void *arg)
 {
-	i915_gem_set_wedged(i915);
-	i915_reset(i915, 0, NULL);
-}
-
-static int igt_atomic_reset(void *arg)
-{
-	static const struct atomic_section phases[] = {
-		{ "preempt", __preempt_begin, __preempt_end },
-		{ "softirq", __softirq_begin, __softirq_end },
-		{ "hardirq", __hardirq_begin, __hardirq_end },
-		{ }
-	};
 	struct drm_i915_private *i915 = arg;
-	intel_wakeref_t wakeref;
+	const typeof(*igt_atomic_phases) *p;
 	int err = 0;
 
-	/* Check that the resets are usable from atomic context */
+	/* Check that the engines resets are usable from atomic context */
+
+	if (!intel_has_reset_engine(i915))
+		return 0;
 
 	if (USES_GUC_SUBMISSION(i915))
-		return 0; /* guc is dead; long live the guc */
+		return 0;
 
 	igt_global_reset_lock(i915);
 	mutex_lock(&i915->drm.struct_mutex);
-	wakeref = intel_runtime_pm_get(i915);
 
 	/* Flush any requests before we get started and check basics */
-	force_reset(i915);
-	if (i915_reset_failed(i915))
+	if (!igt_force_reset(i915))
 		goto unlock;
 
-	if (intel_has_gpu_reset(i915)) {
-		const typeof(*phases) *p;
-
-		for (p = phases; p->name; p++) {
-			GEM_TRACE("intel_gpu_reset under %s\n", p->name);
-
-			p->critical_section_begin();
-			err = intel_gpu_reset(i915, ALL_ENGINES);
-			p->critical_section_end();
-
-			if (err) {
-				pr_err("intel_gpu_reset failed under %s\n",
-				       p->name);
-				goto out;
-			}
-		}
-
-		force_reset(i915);
-	}
-
-	if (intel_has_reset_engine(i915)) {
+	for (p = igt_atomic_phases; p->name; p++) {
 		struct intel_engine_cs *engine;
 		enum intel_engine_id id;
 
 		for_each_engine(engine, i915, id) {
-			const typeof(*phases) *p;
-
-			for (p = phases; p->name; p++) {
-				err = igt_atomic_reset_engine(engine, p);
-				if (err)
-					goto out;
-			}
+			err = igt_atomic_reset_engine(engine, p);
+			if (err)
+				goto out;
 		}
 	}
 
 out:
 	/* As we poke around the guts, do a full reset before continuing. */
-	force_reset(i915);
+	igt_force_reset(i915);
 
 unlock:
-	intel_runtime_pm_put(i915, wakeref);
 	mutex_unlock(&i915->drm.struct_mutex);
 	igt_global_reset_unlock(i915);
 
@@ -1876,21 +1722,19 @@ unlock:
 int intel_hangcheck_live_selftests(struct drm_i915_private *i915)
 {
 	static const struct i915_subtest tests[] = {
-		SUBTEST(igt_global_reset), /* attempt to recover GPU first */
-		SUBTEST(igt_wedged_reset),
 		SUBTEST(igt_hang_sanitycheck),
 		SUBTEST(igt_reset_nop),
 		SUBTEST(igt_reset_nop_engine),
 		SUBTEST(igt_reset_idle_engine),
 		SUBTEST(igt_reset_active_engine),
 		SUBTEST(igt_reset_engines),
+		SUBTEST(igt_reset_engines_atomic),
 		SUBTEST(igt_reset_queue),
 		SUBTEST(igt_reset_wait),
 		SUBTEST(igt_reset_evict_ggtt),
 		SUBTEST(igt_reset_evict_ppgtt),
 		SUBTEST(igt_reset_evict_fence),
 		SUBTEST(igt_handle_error),
-		SUBTEST(igt_atomic_reset),
 	};
 	intel_wakeref_t wakeref;
 	bool saved_hangcheck;
@@ -1902,7 +1746,7 @@ int intel_hangcheck_live_selftests(struct drm_i915_private *i915)
 	if (i915_terminally_wedged(i915))
 		return -EIO; /* we're long past hope of a successful reset */
 
-	wakeref = intel_runtime_pm_get(i915);
+	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
 	saved_hangcheck = fetch_and_zero(&i915_modparams.enable_hangcheck);
 	drain_delayed_work(&i915->gpu_error.hangcheck_work); /* flush param */
 
@@ -1913,7 +1757,7 @@ int intel_hangcheck_live_selftests(struct drm_i915_private *i915)
 	mutex_unlock(&i915->drm.struct_mutex);
 
 	i915_modparams.enable_hangcheck = saved_hangcheck;
-	intel_runtime_pm_put(i915, wakeref);
+	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
 
 	return err;
 }
diff --git a/drivers/gpu/drm/i915/selftests/intel_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
index e8b0b5dbcb2c..401e8b539297 100644
--- a/drivers/gpu/drm/i915/selftests/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
@@ -6,15 +6,18 @@
 
 #include <linux/prime_numbers.h>
 
-#include "../i915_reset.h"
+#include "gem/i915_gem_pm.h"
+#include "gt/intel_reset.h"
 
-#include "../i915_selftest.h"
-#include "igt_flush_test.h"
-#include "igt_live_test.h"
-#include "igt_spinner.h"
-#include "i915_random.h"
+#include "i915_selftest.h"
+#include "selftests/i915_random.h"
+#include "selftests/igt_flush_test.h"
+#include "selftests/igt_live_test.h"
+#include "selftests/igt_spinner.h"
+#include "selftests/lib_sw_fence.h"
 
-#include "mock_context.h"
+#include "gem/selftests/igt_gem_utils.h"
+#include "gem/selftests/mock_context.h"
 
 static int live_sanitycheck(void *arg)
 {
@@ -30,7 +33,7 @@ static int live_sanitycheck(void *arg)
 		return 0;
 
 	mutex_lock(&i915->drm.struct_mutex);
-	wakeref = intel_runtime_pm_get(i915);
+	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
 
 	if (igt_spinner_init(&spin, i915))
 		goto err_unlock;
@@ -71,7 +74,7 @@ err_spin:
 	igt_spinner_fini(&spin);
 err_unlock:
 	igt_flush_test(i915, I915_WAIT_LOCKED);
-	intel_runtime_pm_put(i915, wakeref);
+	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
 	mutex_unlock(&i915->drm.struct_mutex);
 	return err;
 }
@@ -94,7 +97,7 @@ static int live_busywait_preempt(void *arg)
 	 */
 
 	mutex_lock(&i915->drm.struct_mutex);
-	wakeref = intel_runtime_pm_get(i915);
+	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
 
 	ctx_hi = kernel_context(i915);
 	if (!ctx_hi)
@@ -152,7 +155,7 @@ static int live_busywait_preempt(void *arg)
 		 * fails, we hang instead.
 		 */
 
-		lo = i915_request_alloc(engine, ctx_lo);
+		lo = igt_request_alloc(ctx_lo, engine);
 		if (IS_ERR(lo)) {
 			err = PTR_ERR(lo);
 			goto err_vma;
@@ -189,14 +192,14 @@ static int live_busywait_preempt(void *arg)
 		}
 
 		/* Low priority request should be busywaiting now */
-		if (i915_request_wait(lo, I915_WAIT_LOCKED, 1) != -ETIME) {
+		if (i915_request_wait(lo, 0, 1) != -ETIME) {
 			pr_err("%s: Busywaiting request did not!\n",
 			       engine->name);
 			err = -EIO;
 			goto err_vma;
 		}
 
-		hi = i915_request_alloc(engine, ctx_hi);
+		hi = igt_request_alloc(ctx_hi, engine);
 		if (IS_ERR(hi)) {
 			err = PTR_ERR(hi);
 			goto err_vma;
@@ -217,7 +220,7 @@ static int live_busywait_preempt(void *arg)
 		intel_ring_advance(hi, cs);
 		i915_request_add(hi);
 
-		if (i915_request_wait(lo, I915_WAIT_LOCKED, HZ / 5) < 0) {
+		if (i915_request_wait(lo, 0, HZ / 5) < 0) {
 			struct drm_printer p = drm_info_printer(i915->drm.dev);
 
 			pr_err("%s: Failed to preempt semaphore busywait!\n",
@@ -252,7 +255,7 @@ err_ctx_hi:
 err_unlock:
 	if (igt_flush_test(i915, I915_WAIT_LOCKED))
 		err = -EIO;
-	intel_runtime_pm_put(i915, wakeref);
+	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
 	mutex_unlock(&i915->drm.struct_mutex);
 	return err;
 }
@@ -274,7 +277,7 @@ static int live_preempt(void *arg)
 		pr_err("Logical preemption supported, but not exposed\n");
 
 	mutex_lock(&i915->drm.struct_mutex);
-	wakeref = intel_runtime_pm_get(i915);
+	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
 
 	if (igt_spinner_init(&spin_hi, i915))
 		goto err_unlock;
@@ -359,7 +362,7 @@ err_spin_hi:
 	igt_spinner_fini(&spin_hi);
 err_unlock:
 	igt_flush_test(i915, I915_WAIT_LOCKED);
-	intel_runtime_pm_put(i915, wakeref);
+	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
 	mutex_unlock(&i915->drm.struct_mutex);
 	return err;
 }
@@ -379,7 +382,7 @@ static int live_late_preempt(void *arg)
 		return 0;
 
 	mutex_lock(&i915->drm.struct_mutex);
-	wakeref = intel_runtime_pm_get(i915);
+	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
 
 	if (igt_spinner_init(&spin_hi, i915))
 		goto err_unlock;
@@ -463,7 +466,7 @@ err_spin_hi:
 	igt_spinner_fini(&spin_hi);
 err_unlock:
 	igt_flush_test(i915, I915_WAIT_LOCKED);
-	intel_runtime_pm_put(i915, wakeref);
+	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
 	mutex_unlock(&i915->drm.struct_mutex);
 	return err;
 
@@ -529,7 +532,7 @@ static int live_suppress_self_preempt(void *arg)
 		return 0; /* presume black blox */
 
 	mutex_lock(&i915->drm.struct_mutex);
-	wakeref = intel_runtime_pm_get(i915);
+	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
 
 	if (preempt_client_init(i915, &a))
 		goto err_unlock;
@@ -603,7 +606,7 @@ err_client_a:
 err_unlock:
 	if (igt_flush_test(i915, I915_WAIT_LOCKED))
 		err = -EIO;
-	intel_runtime_pm_put(i915, wakeref);
+	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
 	mutex_unlock(&i915->drm.struct_mutex);
 	return err;
 
@@ -641,14 +644,19 @@ static struct i915_request *dummy_request(struct intel_engine_cs *engine)
 	GEM_BUG_ON(i915_request_completed(rq));
 
 	i915_sw_fence_init(&rq->submit, dummy_notify);
-	i915_sw_fence_commit(&rq->submit);
+	set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags);
 
 	return rq;
 }
 
 static void dummy_request_free(struct i915_request *dummy)
 {
+	/* We have to fake the CS interrupt to kick the next request */
+	i915_sw_fence_commit(&dummy->submit);
+
 	i915_request_mark_complete(dummy);
+	dma_fence_signal(&dummy->fence);
+
 	i915_sched_node_fini(&dummy->sched);
 	i915_sw_fence_fini(&dummy->submit);
 
@@ -675,7 +683,7 @@ static int live_suppress_wait_preempt(void *arg)
 		return 0;
 
 	mutex_lock(&i915->drm.struct_mutex);
-	wakeref = intel_runtime_pm_get(i915);
+	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
 
 	if (preempt_client_init(i915, &client[0])) /* ELSP[0] */
 		goto err_unlock;
@@ -731,7 +739,6 @@ static int live_suppress_wait_preempt(void *arg)
 			GEM_BUG_ON(!i915_request_started(rq[0]));
 
 			if (i915_request_wait(rq[depth],
-					      I915_WAIT_LOCKED |
 					      I915_WAIT_PRIORITY,
 					      1) != -ETIME) {
 				pr_err("%s: Waiter depth:%d completed!\n",
@@ -768,7 +775,7 @@ err_client_0:
 err_unlock:
 	if (igt_flush_test(i915, I915_WAIT_LOCKED))
 		err = -EIO;
-	intel_runtime_pm_put(i915, wakeref);
+	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
 	mutex_unlock(&i915->drm.struct_mutex);
 	return err;
 
@@ -799,7 +806,7 @@ static int live_chain_preempt(void *arg)
 		return 0;
 
 	mutex_lock(&i915->drm.struct_mutex);
-	wakeref = intel_runtime_pm_get(i915);
+	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
 
 	if (preempt_client_init(i915, &hi))
 		goto err_unlock;
@@ -833,7 +840,7 @@ static int live_chain_preempt(void *arg)
 			 __func__, engine->name, ring_size);
 
 		igt_spinner_end(&lo.spin);
-		if (i915_request_wait(rq, I915_WAIT_LOCKED, HZ / 2) < 0) {
+		if (i915_request_wait(rq, 0, HZ / 2) < 0) {
 			pr_err("Timed out waiting to flush %s\n", engine->name);
 			goto err_wedged;
 		}
@@ -861,20 +868,20 @@ static int live_chain_preempt(void *arg)
 			i915_request_add(rq);
 
 			for (i = 0; i < count; i++) {
-				rq = i915_request_alloc(engine, lo.ctx);
+				rq = igt_request_alloc(lo.ctx, engine);
 				if (IS_ERR(rq))
 					goto err_wedged;
 				i915_request_add(rq);
 			}
 
-			rq = i915_request_alloc(engine, hi.ctx);
+			rq = igt_request_alloc(hi.ctx, engine);
 			if (IS_ERR(rq))
 				goto err_wedged;
 			i915_request_add(rq);
 			engine->schedule(rq, &attr);
 
 			igt_spinner_end(&hi.spin);
-			if (i915_request_wait(rq, I915_WAIT_LOCKED, HZ / 5) < 0) {
+			if (i915_request_wait(rq, 0, HZ / 5) < 0) {
 				struct drm_printer p =
 					drm_info_printer(i915->drm.dev);
 
@@ -886,11 +893,11 @@ static int live_chain_preempt(void *arg)
 			}
 			igt_spinner_end(&lo.spin);
 
-			rq = i915_request_alloc(engine, lo.ctx);
+			rq = igt_request_alloc(lo.ctx, engine);
 			if (IS_ERR(rq))
 				goto err_wedged;
 			i915_request_add(rq);
-			if (i915_request_wait(rq, I915_WAIT_LOCKED, HZ / 5) < 0) {
+			if (i915_request_wait(rq, 0, HZ / 5) < 0) {
 				struct drm_printer p =
 					drm_info_printer(i915->drm.dev);
 
@@ -916,7 +923,7 @@ err_client_hi:
 err_unlock:
 	if (igt_flush_test(i915, I915_WAIT_LOCKED))
 		err = -EIO;
-	intel_runtime_pm_put(i915, wakeref);
+	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
 	mutex_unlock(&i915->drm.struct_mutex);
 	return err;
 
@@ -945,7 +952,7 @@ static int live_preempt_hang(void *arg)
 		return 0;
 
 	mutex_lock(&i915->drm.struct_mutex);
-	wakeref = intel_runtime_pm_get(i915);
+	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
 
 	if (igt_spinner_init(&spin_hi, i915))
 		goto err_unlock;
@@ -1042,7 +1049,7 @@ err_spin_hi:
 	igt_spinner_fini(&spin_hi);
 err_unlock:
 	igt_flush_test(i915, I915_WAIT_LOCKED);
-	intel_runtime_pm_put(i915, wakeref);
+	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
 	mutex_unlock(&i915->drm.struct_mutex);
 	return err;
 }
@@ -1082,7 +1089,7 @@ static int smoke_submit(struct preempt_smoke *smoke,
 	int err = 0;
 
 	if (batch) {
-		vma = i915_vma_instance(batch, &ctx->ppgtt->vm, NULL);
+		vma = i915_vma_instance(batch, ctx->vm, NULL);
 		if (IS_ERR(vma))
 			return PTR_ERR(vma);
 
@@ -1093,18 +1100,20 @@ static int smoke_submit(struct preempt_smoke *smoke,
 
 	ctx->sched.priority = prio;
 
-	rq = i915_request_alloc(smoke->engine, ctx);
+	rq = igt_request_alloc(ctx, smoke->engine);
 	if (IS_ERR(rq)) {
 		err = PTR_ERR(rq);
 		goto unpin;
 	}
 
 	if (vma) {
+		i915_vma_lock(vma);
 		err = rq->engine->emit_bb_start(rq,
 						vma->node.start,
 						PAGE_SIZE, 0);
 		if (!err)
 			err = i915_vma_move_to_active(vma, rq, 0);
+		i915_vma_unlock(vma);
 	}
 
 	i915_request_add(rq);
@@ -1246,7 +1255,7 @@ static int live_preempt_smoke(void *arg)
 		return -ENOMEM;
 
 	mutex_lock(&smoke.i915->drm.struct_mutex);
-	wakeref = intel_runtime_pm_get(smoke.i915);
+	wakeref = intel_runtime_pm_get(&smoke.i915->runtime_pm);
 
 	smoke.batch = i915_gem_object_create_internal(smoke.i915, PAGE_SIZE);
 	if (IS_ERR(smoke.batch)) {
@@ -1299,13 +1308,506 @@ err_ctx:
 err_batch:
 	i915_gem_object_put(smoke.batch);
 err_unlock:
-	intel_runtime_pm_put(smoke.i915, wakeref);
+	intel_runtime_pm_put(&smoke.i915->runtime_pm, wakeref);
 	mutex_unlock(&smoke.i915->drm.struct_mutex);
 	kfree(smoke.contexts);
 
 	return err;
 }
 
+static int nop_virtual_engine(struct drm_i915_private *i915,
+			      struct intel_engine_cs **siblings,
+			      unsigned int nsibling,
+			      unsigned int nctx,
+			      unsigned int flags)
+#define CHAIN BIT(0)
+{
+	IGT_TIMEOUT(end_time);
+	struct i915_request *request[16];
+	struct i915_gem_context *ctx[16];
+	struct intel_context *ve[16];
+	unsigned long n, prime, nc;
+	struct igt_live_test t;
+	ktime_t times[2] = {};
+	int err;
+
+	GEM_BUG_ON(!nctx || nctx > ARRAY_SIZE(ctx));
+
+	for (n = 0; n < nctx; n++) {
+		ctx[n] = kernel_context(i915);
+		if (!ctx[n]) {
+			err = -ENOMEM;
+			nctx = n;
+			goto out;
+		}
+
+		ve[n] = intel_execlists_create_virtual(ctx[n],
+						       siblings, nsibling);
+		if (IS_ERR(ve[n])) {
+			kernel_context_close(ctx[n]);
+			err = PTR_ERR(ve[n]);
+			nctx = n;
+			goto out;
+		}
+
+		err = intel_context_pin(ve[n]);
+		if (err) {
+			intel_context_put(ve[n]);
+			kernel_context_close(ctx[n]);
+			nctx = n;
+			goto out;
+		}
+	}
+
+	err = igt_live_test_begin(&t, i915, __func__, ve[0]->engine->name);
+	if (err)
+		goto out;
+
+	for_each_prime_number_from(prime, 1, 8192) {
+		times[1] = ktime_get_raw();
+
+		if (flags & CHAIN) {
+			for (nc = 0; nc < nctx; nc++) {
+				for (n = 0; n < prime; n++) {
+					request[nc] =
+						i915_request_create(ve[nc]);
+					if (IS_ERR(request[nc])) {
+						err = PTR_ERR(request[nc]);
+						goto out;
+					}
+
+					i915_request_add(request[nc]);
+				}
+			}
+		} else {
+			for (n = 0; n < prime; n++) {
+				for (nc = 0; nc < nctx; nc++) {
+					request[nc] =
+						i915_request_create(ve[nc]);
+					if (IS_ERR(request[nc])) {
+						err = PTR_ERR(request[nc]);
+						goto out;
+					}
+
+					i915_request_add(request[nc]);
+				}
+			}
+		}
+
+		for (nc = 0; nc < nctx; nc++) {
+			if (i915_request_wait(request[nc], 0, HZ / 10) < 0) {
+				pr_err("%s(%s): wait for %llx:%lld timed out\n",
+				       __func__, ve[0]->engine->name,
+				       request[nc]->fence.context,
+				       request[nc]->fence.seqno);
+
+				GEM_TRACE("%s(%s) failed at request %llx:%lld\n",
+					  __func__, ve[0]->engine->name,
+					  request[nc]->fence.context,
+					  request[nc]->fence.seqno);
+				GEM_TRACE_DUMP();
+				i915_gem_set_wedged(i915);
+				break;
+			}
+		}
+
+		times[1] = ktime_sub(ktime_get_raw(), times[1]);
+		if (prime == 1)
+			times[0] = times[1];
+
+		if (__igt_timeout(end_time, NULL))
+			break;
+	}
+
+	err = igt_live_test_end(&t);
+	if (err)
+		goto out;
+
+	pr_info("Requestx%d latencies on %s: 1 = %lluns, %lu = %lluns\n",
+		nctx, ve[0]->engine->name, ktime_to_ns(times[0]),
+		prime, div64_u64(ktime_to_ns(times[1]), prime));
+
+out:
+	if (igt_flush_test(i915, I915_WAIT_LOCKED))
+		err = -EIO;
+
+	for (nc = 0; nc < nctx; nc++) {
+		intel_context_unpin(ve[nc]);
+		intel_context_put(ve[nc]);
+		kernel_context_close(ctx[nc]);
+	}
+	return err;
+}
+
+static int live_virtual_engine(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+	unsigned int class, inst;
+	int err = -ENODEV;
+
+	if (USES_GUC_SUBMISSION(i915))
+		return 0;
+
+	mutex_lock(&i915->drm.struct_mutex);
+
+	for_each_engine(engine, i915, id) {
+		err = nop_virtual_engine(i915, &engine, 1, 1, 0);
+		if (err) {
+			pr_err("Failed to wrap engine %s: err=%d\n",
+			       engine->name, err);
+			goto out_unlock;
+		}
+	}
+
+	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
+		int nsibling, n;
+
+		nsibling = 0;
+		for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
+			if (!i915->engine_class[class][inst])
+				continue;
+
+			siblings[nsibling++] = i915->engine_class[class][inst];
+		}
+		if (nsibling < 2)
+			continue;
+
+		for (n = 1; n <= nsibling + 1; n++) {
+			err = nop_virtual_engine(i915, siblings, nsibling,
+						 n, 0);
+			if (err)
+				goto out_unlock;
+		}
+
+		err = nop_virtual_engine(i915, siblings, nsibling, n, CHAIN);
+		if (err)
+			goto out_unlock;
+	}
+
+out_unlock:
+	mutex_unlock(&i915->drm.struct_mutex);
+	return err;
+}
+
+static int mask_virtual_engine(struct drm_i915_private *i915,
+			       struct intel_engine_cs **siblings,
+			       unsigned int nsibling)
+{
+	struct i915_request *request[MAX_ENGINE_INSTANCE + 1];
+	struct i915_gem_context *ctx;
+	struct intel_context *ve;
+	struct igt_live_test t;
+	unsigned int n;
+	int err;
+
+	/*
+	 * Check that by setting the execution mask on a request, we can
+	 * restrict it to our desired engine within the virtual engine.
+	 */
+
+	ctx = kernel_context(i915);
+	if (!ctx)
+		return -ENOMEM;
+
+	ve = intel_execlists_create_virtual(ctx, siblings, nsibling);
+	if (IS_ERR(ve)) {
+		err = PTR_ERR(ve);
+		goto out_close;
+	}
+
+	err = intel_context_pin(ve);
+	if (err)
+		goto out_put;
+
+	err = igt_live_test_begin(&t, i915, __func__, ve->engine->name);
+	if (err)
+		goto out_unpin;
+
+	for (n = 0; n < nsibling; n++) {
+		request[n] = i915_request_create(ve);
+		if (IS_ERR(request[n])) {
+			err = PTR_ERR(request[n]);
+			nsibling = n;
+			goto out;
+		}
+
+		/* Reverse order as it's more likely to be unnatural */
+		request[n]->execution_mask = siblings[nsibling - n - 1]->mask;
+
+		i915_request_get(request[n]);
+		i915_request_add(request[n]);
+	}
+
+	for (n = 0; n < nsibling; n++) {
+		if (i915_request_wait(request[n], 0, HZ / 10) < 0) {
+			pr_err("%s(%s): wait for %llx:%lld timed out\n",
+			       __func__, ve->engine->name,
+			       request[n]->fence.context,
+			       request[n]->fence.seqno);
+
+			GEM_TRACE("%s(%s) failed at request %llx:%lld\n",
+				  __func__, ve->engine->name,
+				  request[n]->fence.context,
+				  request[n]->fence.seqno);
+			GEM_TRACE_DUMP();
+			i915_gem_set_wedged(i915);
+			err = -EIO;
+			goto out;
+		}
+
+		if (request[n]->engine != siblings[nsibling - n - 1]) {
+			pr_err("Executed on wrong sibling '%s', expected '%s'\n",
+			       request[n]->engine->name,
+			       siblings[nsibling - n - 1]->name);
+			err = -EINVAL;
+			goto out;
+		}
+	}
+
+	err = igt_live_test_end(&t);
+	if (err)
+		goto out;
+
+out:
+	if (igt_flush_test(i915, I915_WAIT_LOCKED))
+		err = -EIO;
+
+	for (n = 0; n < nsibling; n++)
+		i915_request_put(request[n]);
+
+out_unpin:
+	intel_context_unpin(ve);
+out_put:
+	intel_context_put(ve);
+out_close:
+	kernel_context_close(ctx);
+	return err;
+}
+
+static int live_virtual_mask(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
+	unsigned int class, inst;
+	int err = 0;
+
+	if (USES_GUC_SUBMISSION(i915))
+		return 0;
+
+	mutex_lock(&i915->drm.struct_mutex);
+
+	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
+		unsigned int nsibling;
+
+		nsibling = 0;
+		for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
+			if (!i915->engine_class[class][inst])
+				break;
+
+			siblings[nsibling++] = i915->engine_class[class][inst];
+		}
+		if (nsibling < 2)
+			continue;
+
+		err = mask_virtual_engine(i915, siblings, nsibling);
+		if (err)
+			goto out_unlock;
+	}
+
+out_unlock:
+	mutex_unlock(&i915->drm.struct_mutex);
+	return err;
+}
+
+static int bond_virtual_engine(struct drm_i915_private *i915,
+			       unsigned int class,
+			       struct intel_engine_cs **siblings,
+			       unsigned int nsibling,
+			       unsigned int flags)
+#define BOND_SCHEDULE BIT(0)
+{
+	struct intel_engine_cs *master;
+	struct i915_gem_context *ctx;
+	struct i915_request *rq[16];
+	enum intel_engine_id id;
+	unsigned long n;
+	int err;
+
+	GEM_BUG_ON(nsibling >= ARRAY_SIZE(rq) - 1);
+
+	ctx = kernel_context(i915);
+	if (!ctx)
+		return -ENOMEM;
+
+	err = 0;
+	rq[0] = ERR_PTR(-ENOMEM);
+	for_each_engine(master, i915, id) {
+		struct i915_sw_fence fence = {};
+
+		if (master->class == class)
+			continue;
+
+		memset_p((void *)rq, ERR_PTR(-EINVAL), ARRAY_SIZE(rq));
+
+		rq[0] = igt_request_alloc(ctx, master);
+		if (IS_ERR(rq[0])) {
+			err = PTR_ERR(rq[0]);
+			goto out;
+		}
+		i915_request_get(rq[0]);
+
+		if (flags & BOND_SCHEDULE) {
+			onstack_fence_init(&fence);
+			err = i915_sw_fence_await_sw_fence_gfp(&rq[0]->submit,
+							       &fence,
+							       GFP_KERNEL);
+		}
+		i915_request_add(rq[0]);
+		if (err < 0)
+			goto out;
+
+		for (n = 0; n < nsibling; n++) {
+			struct intel_context *ve;
+
+			ve = intel_execlists_create_virtual(ctx,
+							    siblings,
+							    nsibling);
+			if (IS_ERR(ve)) {
+				err = PTR_ERR(ve);
+				onstack_fence_fini(&fence);
+				goto out;
+			}
+
+			err = intel_virtual_engine_attach_bond(ve->engine,
+							       master,
+							       siblings[n]);
+			if (err) {
+				intel_context_put(ve);
+				onstack_fence_fini(&fence);
+				goto out;
+			}
+
+			err = intel_context_pin(ve);
+			intel_context_put(ve);
+			if (err) {
+				onstack_fence_fini(&fence);
+				goto out;
+			}
+
+			rq[n + 1] = i915_request_create(ve);
+			intel_context_unpin(ve);
+			if (IS_ERR(rq[n + 1])) {
+				err = PTR_ERR(rq[n + 1]);
+				onstack_fence_fini(&fence);
+				goto out;
+			}
+			i915_request_get(rq[n + 1]);
+
+			err = i915_request_await_execution(rq[n + 1],
+							   &rq[0]->fence,
+							   ve->engine->bond_execute);
+			i915_request_add(rq[n + 1]);
+			if (err < 0) {
+				onstack_fence_fini(&fence);
+				goto out;
+			}
+		}
+		onstack_fence_fini(&fence);
+
+		if (i915_request_wait(rq[0], 0, HZ / 10) < 0) {
+			pr_err("Master request did not execute (on %s)!\n",
+			       rq[0]->engine->name);
+			err = -EIO;
+			goto out;
+		}
+
+		for (n = 0; n < nsibling; n++) {
+			if (i915_request_wait(rq[n + 1], 0,
+					      MAX_SCHEDULE_TIMEOUT) < 0) {
+				err = -EIO;
+				goto out;
+			}
+
+			if (rq[n + 1]->engine != siblings[n]) {
+				pr_err("Bonded request did not execute on target engine: expected %s, used %s; master was %s\n",
+				       siblings[n]->name,
+				       rq[n + 1]->engine->name,
+				       rq[0]->engine->name);
+				err = -EINVAL;
+				goto out;
+			}
+		}
+
+		for (n = 0; !IS_ERR(rq[n]); n++)
+			i915_request_put(rq[n]);
+		rq[0] = ERR_PTR(-ENOMEM);
+	}
+
+out:
+	for (n = 0; !IS_ERR(rq[n]); n++)
+		i915_request_put(rq[n]);
+	if (igt_flush_test(i915, I915_WAIT_LOCKED))
+		err = -EIO;
+
+	kernel_context_close(ctx);
+	return err;
+}
+
+static int live_virtual_bond(void *arg)
+{
+	static const struct phase {
+		const char *name;
+		unsigned int flags;
+	} phases[] = {
+		{ "", 0 },
+		{ "schedule", BOND_SCHEDULE },
+		{ },
+	};
+	struct drm_i915_private *i915 = arg;
+	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
+	unsigned int class, inst;
+	int err = 0;
+
+	if (USES_GUC_SUBMISSION(i915))
+		return 0;
+
+	mutex_lock(&i915->drm.struct_mutex);
+
+	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
+		const struct phase *p;
+		int nsibling;
+
+		nsibling = 0;
+		for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
+			if (!i915->engine_class[class][inst])
+				break;
+
+			GEM_BUG_ON(nsibling == ARRAY_SIZE(siblings));
+			siblings[nsibling++] = i915->engine_class[class][inst];
+		}
+		if (nsibling < 2)
+			continue;
+
+		for (p = phases; p->name; p++) {
+			err = bond_virtual_engine(i915,
+						  class, siblings, nsibling,
+						  p->flags);
+			if (err) {
+				pr_err("%s(%s): failed class=%d, nsibling=%d, err=%d\n",
+				       __func__, p->name, class, nsibling, err);
+				goto out_unlock;
+			}
+		}
+	}
+
+out_unlock:
+	mutex_unlock(&i915->drm.struct_mutex);
+	return err;
+}
+
 int intel_execlists_live_selftests(struct drm_i915_private *i915)
 {
 	static const struct i915_subtest tests[] = {
@@ -1318,6 +1820,9 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915)
 		SUBTEST(live_chain_preempt),
 		SUBTEST(live_preempt_hang),
 		SUBTEST(live_preempt_smoke),
+		SUBTEST(live_virtual_engine),
+		SUBTEST(live_virtual_mask),
+		SUBTEST(live_virtual_bond),
 	};
 
 	if (!HAS_EXECLISTS(i915))
diff --git a/drivers/gpu/drm/i915/gt/selftest_reset.c b/drivers/gpu/drm/i915/gt/selftest_reset.c
new file mode 100644
index 000000000000..89da9e7cc1ba
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/selftest_reset.c
@@ -0,0 +1,118 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2018 Intel Corporation
+ */
+
+#include "i915_selftest.h"
+#include "selftests/igt_reset.h"
+#include "selftests/igt_atomic.h"
+
+static int igt_global_reset(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	unsigned int reset_count;
+	int err = 0;
+
+	/* Check that we can issue a global GPU reset */
+
+	igt_global_reset_lock(i915);
+
+	reset_count = i915_reset_count(&i915->gpu_error);
+
+	i915_reset(i915, ALL_ENGINES, NULL);
+
+	if (i915_reset_count(&i915->gpu_error) == reset_count) {
+		pr_err("No GPU reset recorded!\n");
+		err = -EINVAL;
+	}
+
+	igt_global_reset_unlock(i915);
+
+	if (i915_reset_failed(i915))
+		err = -EIO;
+
+	return err;
+}
+
+static int igt_wedged_reset(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	intel_wakeref_t wakeref;
+
+	/* Check that we can recover a wedged device with a GPU reset */
+
+	igt_global_reset_lock(i915);
+	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
+
+	i915_gem_set_wedged(i915);
+
+	GEM_BUG_ON(!i915_reset_failed(i915));
+	i915_reset(i915, ALL_ENGINES, NULL);
+
+	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
+	igt_global_reset_unlock(i915);
+
+	return i915_reset_failed(i915) ? -EIO : 0;
+}
+
+static int igt_atomic_reset(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	const typeof(*igt_atomic_phases) *p;
+	int err = 0;
+
+	/* Check that the resets are usable from atomic context */
+
+	igt_global_reset_lock(i915);
+	mutex_lock(&i915->drm.struct_mutex);
+
+	/* Flush any requests before we get started and check basics */
+	if (!igt_force_reset(i915))
+		goto unlock;
+
+	for (p = igt_atomic_phases; p->name; p++) {
+		GEM_TRACE("intel_gpu_reset under %s\n", p->name);
+
+		p->critical_section_begin();
+		reset_prepare(i915);
+		err = intel_gpu_reset(i915, ALL_ENGINES);
+		reset_finish(i915);
+		p->critical_section_end();
+
+		if (err) {
+			pr_err("intel_gpu_reset failed under %s\n", p->name);
+			break;
+		}
+	}
+
+	/* As we poke around the guts, do a full reset before continuing. */
+	igt_force_reset(i915);
+
+unlock:
+	mutex_unlock(&i915->drm.struct_mutex);
+	igt_global_reset_unlock(i915);
+
+	return err;
+}
+
+int intel_reset_live_selftests(struct drm_i915_private *i915)
+{
+	static const struct i915_subtest tests[] = {
+		SUBTEST(igt_global_reset), /* attempt to recover GPU first */
+		SUBTEST(igt_wedged_reset),
+		SUBTEST(igt_atomic_reset),
+	};
+	intel_wakeref_t wakeref;
+	int err = 0;
+
+	if (!intel_has_gpu_reset(i915))
+		return 0;
+
+	if (i915_terminally_wedged(i915))
+		return -EIO; /* we're long past hope of a successful reset */
+
+	with_intel_runtime_pm(&i915->runtime_pm, wakeref)
+		err = i915_subtests(tests, i915);
+
+	return err;
+}
diff --git a/drivers/gpu/drm/i915/selftests/intel_workarounds.c b/drivers/gpu/drm/i915/gt/selftest_workarounds.c
index 567b6f8dae86..9eaf030affd0 100644
--- a/drivers/gpu/drm/i915/selftests/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/selftest_workarounds.c
@@ -4,15 +4,18 @@
  * Copyright © 2018 Intel Corporation
  */
 
-#include "../i915_selftest.h"
-#include "../i915_reset.h"
+#include "gem/i915_gem_pm.h"
+#include "i915_selftest.h"
+#include "intel_reset.h"
 
-#include "igt_flush_test.h"
-#include "igt_reset.h"
-#include "igt_spinner.h"
-#include "igt_wedge_me.h"
-#include "mock_context.h"
-#include "mock_drm.h"
+#include "selftests/igt_flush_test.h"
+#include "selftests/igt_reset.h"
+#include "selftests/igt_spinner.h"
+#include "selftests/igt_wedge_me.h"
+#include "selftests/mock_drm.h"
+
+#include "gem/selftests/igt_gem_utils.h"
+#include "gem/selftests/mock_context.h"
 
 static const struct wo_register {
 	enum intel_platform platform;
@@ -21,12 +24,13 @@ static const struct wo_register {
 	{ INTEL_GEMINILAKE, 0x731c }
 };
 
-#define REF_NAME_MAX (INTEL_ENGINE_CS_MAX_NAME + 4)
+#define REF_NAME_MAX (INTEL_ENGINE_CS_MAX_NAME + 8)
 struct wa_lists {
 	struct i915_wa_list gt_wa_list;
 	struct {
 		char name[REF_NAME_MAX];
 		struct i915_wa_list wa_list;
+		struct i915_wa_list ctx_wa_list;
 	} engine[I915_NUM_ENGINES];
 };
 
@@ -51,6 +55,12 @@ reference_lists_init(struct drm_i915_private *i915, struct wa_lists *lists)
 		wa_init_start(wal, name);
 		engine_init_workarounds(engine, wal);
 		wa_init_finish(wal);
+
+		snprintf(name, REF_NAME_MAX, "%s_CTX_REF", engine->name);
+
+		__intel_engine_init_ctx_wa(engine,
+					   &lists->engine[id].ctx_wa_list,
+					   name);
 	}
 }
 
@@ -71,7 +81,6 @@ read_nonprivs(struct i915_gem_context *ctx, struct intel_engine_cs *engine)
 {
 	const u32 base = engine->mmio_base;
 	struct drm_i915_gem_object *result;
-	intel_wakeref_t wakeref;
 	struct i915_request *rq;
 	struct i915_vma *vma;
 	u32 srm, *cs;
@@ -103,15 +112,15 @@ read_nonprivs(struct i915_gem_context *ctx, struct intel_engine_cs *engine)
 	if (err)
 		goto err_obj;
 
-	rq = ERR_PTR(-ENODEV);
-	with_intel_runtime_pm(engine->i915, wakeref)
-		rq = i915_request_alloc(engine, ctx);
+	rq = igt_request_alloc(ctx, engine);
 	if (IS_ERR(rq)) {
 		err = PTR_ERR(rq);
 		goto err_pin;
 	}
 
+	i915_vma_lock(vma);
 	err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
+	i915_vma_unlock(vma);
 	if (err)
 		goto err_req;
 
@@ -133,9 +142,6 @@ read_nonprivs(struct i915_gem_context *ctx, struct intel_engine_cs *engine)
 	}
 	intel_ring_advance(rq, cs);
 
-	i915_gem_object_get(result);
-	i915_gem_object_set_active_reference(result);
-
 	i915_request_add(rq);
 	i915_vma_unpin(vma);
 
@@ -188,8 +194,10 @@ static int check_whitelist(struct i915_gem_context *ctx,
 		return PTR_ERR(results);
 
 	err = 0;
+	i915_gem_object_lock(results);
 	igt_wedge_on_timeout(&wedge, ctx->i915, HZ / 5) /* a safety net! */
 		err = i915_gem_object_set_to_cpu_domain(results, false);
+	i915_gem_object_unlock(results);
 	if (i915_terminally_wedged(ctx->i915))
 		err = -EIO;
 	if (err)
@@ -248,7 +256,7 @@ switch_to_scratch_context(struct intel_engine_cs *engine,
 	GEM_BUG_ON(i915_gem_context_is_bannable(ctx));
 
 	rq = ERR_PTR(-ENODEV);
-	with_intel_runtime_pm(engine->i915, wakeref)
+	with_intel_runtime_pm(&engine->i915->runtime_pm, wakeref)
 		rq = igt_spinner_create_request(spin, ctx, engine, MI_NOOP);
 
 	kernel_context_close(ctx);
@@ -304,7 +312,7 @@ static int check_whitelist_across_reset(struct intel_engine_cs *engine,
 	if (err)
 		goto out;
 
-	with_intel_runtime_pm(i915, wakeref)
+	with_intel_runtime_pm(&i915->runtime_pm, wakeref)
 		err = reset(engine);
 
 	igt_spinner_end(&spin);
@@ -340,49 +348,6 @@ out:
 	return err;
 }
 
-static struct i915_vma *create_scratch(struct i915_gem_context *ctx)
-{
-	struct drm_i915_gem_object *obj;
-	struct i915_vma *vma;
-	void *ptr;
-	int err;
-
-	obj = i915_gem_object_create_internal(ctx->i915, PAGE_SIZE);
-	if (IS_ERR(obj))
-		return ERR_CAST(obj);
-
-	i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC);
-
-	ptr = i915_gem_object_pin_map(obj, I915_MAP_WB);
-	if (IS_ERR(ptr)) {
-		err = PTR_ERR(ptr);
-		goto err_obj;
-	}
-	memset(ptr, 0xc5, PAGE_SIZE);
-	i915_gem_object_flush_map(obj);
-	i915_gem_object_unpin_map(obj);
-
-	vma = i915_vma_instance(obj, &ctx->ppgtt->vm, NULL);
-	if (IS_ERR(vma)) {
-		err = PTR_ERR(vma);
-		goto err_obj;
-	}
-
-	err = i915_vma_pin(vma, 0, 0, PIN_USER);
-	if (err)
-		goto err_obj;
-
-	err = i915_gem_object_set_to_cpu_domain(obj, false);
-	if (err)
-		goto err_obj;
-
-	return vma;
-
-err_obj:
-	i915_gem_object_put(obj);
-	return ERR_PTR(err);
-}
-
 static struct i915_vma *create_batch(struct i915_gem_context *ctx)
 {
 	struct drm_i915_gem_object *obj;
@@ -393,7 +358,7 @@ static struct i915_vma *create_batch(struct i915_gem_context *ctx)
 	if (IS_ERR(obj))
 		return ERR_CAST(obj);
 
-	vma = i915_vma_instance(obj, &ctx->ppgtt->vm, NULL);
+	vma = i915_vma_instance(obj, ctx->vm, NULL);
 	if (IS_ERR(vma)) {
 		err = PTR_ERR(vma);
 		goto err_obj;
@@ -403,10 +368,6 @@ static struct i915_vma *create_batch(struct i915_gem_context *ctx)
 	if (err)
 		goto err_obj;
 
-	err = i915_gem_object_set_to_wc_domain(obj, true);
-	if (err)
-		goto err_obj;
-
 	return vma;
 
 err_obj:
@@ -441,6 +402,29 @@ static bool wo_register(struct intel_engine_cs *engine, u32 reg)
 	return false;
 }
 
+static bool ro_register(u32 reg)
+{
+	if (reg & RING_FORCE_TO_NONPRIV_RD)
+		return true;
+
+	return false;
+}
+
+static int whitelist_writable_count(struct intel_engine_cs *engine)
+{
+	int count = engine->whitelist.count;
+	int i;
+
+	for (i = 0; i < engine->whitelist.count; i++) {
+		u32 reg = i915_mmio_reg_offset(engine->whitelist.list[i].reg);
+
+		if (ro_register(reg))
+			count--;
+	}
+
+	return count;
+}
+
 static int check_dirty_whitelist(struct i915_gem_context *ctx,
 				 struct intel_engine_cs *engine)
 {
@@ -475,7 +459,7 @@ static int check_dirty_whitelist(struct i915_gem_context *ctx,
 	int err = 0, i, v;
 	u32 *cs, *results;
 
-	scratch = create_scratch(ctx);
+	scratch = create_scratch(ctx->vm, 2 * ARRAY_SIZE(values) + 1);
 	if (IS_ERR(scratch))
 		return PTR_ERR(scratch);
 
@@ -496,6 +480,9 @@ static int check_dirty_whitelist(struct i915_gem_context *ctx,
 		if (wo_register(engine, reg))
 			continue;
 
+		if (ro_register(reg))
+			continue;
+
 		srm = MI_STORE_REGISTER_MEM;
 		lrm = MI_LOAD_REGISTER_MEM;
 		if (INTEL_GEN(ctx->i915) >= 8)
@@ -557,7 +544,7 @@ static int check_dirty_whitelist(struct i915_gem_context *ctx,
 		i915_gem_object_unpin_map(batch->obj);
 		i915_gem_chipset_flush(ctx->i915);
 
-		rq = i915_request_alloc(engine, ctx);
+		rq = igt_request_alloc(ctx, engine);
 		if (IS_ERR(rq)) {
 			err = PTR_ERR(rq);
 			goto out_batch;
@@ -580,7 +567,7 @@ err_request:
 		if (err)
 			goto out_batch;
 
-		if (i915_request_wait(rq, I915_WAIT_LOCKED, HZ / 5) < 0) {
+		if (i915_request_wait(rq, 0, HZ / 5) < 0) {
 			pr_err("%s: Futzing %x timedout; cancelling test\n",
 			       engine->name, reg);
 			i915_gem_set_wedged(ctx->i915);
@@ -675,7 +662,7 @@ static int live_dirty_whitelist(void *arg)
 	if (INTEL_GEN(i915) < 7) /* minimum requirement for LRI, SRM, LRM */
 		return 0;
 
-	wakeref = intel_runtime_pm_get(i915);
+	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
 
 	mutex_unlock(&i915->drm.struct_mutex);
 	file = mock_file(i915);
@@ -705,7 +692,7 @@ out_file:
 	mock_file_free(i915, file);
 	mutex_lock(&i915->drm.struct_mutex);
 out_rpm:
-	intel_runtime_pm_put(i915, wakeref);
+	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
 	return err;
 }
 
@@ -743,26 +730,352 @@ out:
 	return err;
 }
 
-static bool verify_gt_engine_wa(struct drm_i915_private *i915,
-				struct wa_lists *lists, const char *str)
+static int read_whitelisted_registers(struct i915_gem_context *ctx,
+				      struct intel_engine_cs *engine,
+				      struct i915_vma *results)
+{
+	struct i915_request *rq;
+	int i, err = 0;
+	u32 srm, *cs;
+
+	rq = igt_request_alloc(ctx, engine);
+	if (IS_ERR(rq))
+		return PTR_ERR(rq);
+
+	srm = MI_STORE_REGISTER_MEM;
+	if (INTEL_GEN(ctx->i915) >= 8)
+		srm++;
+
+	cs = intel_ring_begin(rq, 4 * engine->whitelist.count);
+	if (IS_ERR(cs)) {
+		err = PTR_ERR(cs);
+		goto err_req;
+	}
+
+	for (i = 0; i < engine->whitelist.count; i++) {
+		u64 offset = results->node.start + sizeof(u32) * i;
+		u32 reg = i915_mmio_reg_offset(engine->whitelist.list[i].reg);
+
+		/* Clear RD only and WR only flags */
+		reg &= ~(RING_FORCE_TO_NONPRIV_RD | RING_FORCE_TO_NONPRIV_WR);
+
+		*cs++ = srm;
+		*cs++ = reg;
+		*cs++ = lower_32_bits(offset);
+		*cs++ = upper_32_bits(offset);
+	}
+	intel_ring_advance(rq, cs);
+
+err_req:
+	i915_request_add(rq);
+
+	if (i915_request_wait(rq, 0, HZ / 5) < 0)
+		err = -EIO;
+
+	return err;
+}
+
+static int scrub_whitelisted_registers(struct i915_gem_context *ctx,
+				       struct intel_engine_cs *engine)
+{
+	struct i915_request *rq;
+	struct i915_vma *batch;
+	int i, err = 0;
+	u32 *cs;
+
+	batch = create_batch(ctx);
+	if (IS_ERR(batch))
+		return PTR_ERR(batch);
+
+	cs = i915_gem_object_pin_map(batch->obj, I915_MAP_WC);
+	if (IS_ERR(cs)) {
+		err = PTR_ERR(cs);
+		goto err_batch;
+	}
+
+	*cs++ = MI_LOAD_REGISTER_IMM(whitelist_writable_count(engine));
+	for (i = 0; i < engine->whitelist.count; i++) {
+		u32 reg = i915_mmio_reg_offset(engine->whitelist.list[i].reg);
+
+		if (ro_register(reg))
+			continue;
+
+		*cs++ = reg;
+		*cs++ = 0xffffffff;
+	}
+	*cs++ = MI_BATCH_BUFFER_END;
+
+	i915_gem_object_flush_map(batch->obj);
+	i915_gem_chipset_flush(ctx->i915);
+
+	rq = igt_request_alloc(ctx, engine);
+	if (IS_ERR(rq)) {
+		err = PTR_ERR(rq);
+		goto err_unpin;
+	}
+
+	if (engine->emit_init_breadcrumb) { /* Be nice if we hang */
+		err = engine->emit_init_breadcrumb(rq);
+		if (err)
+			goto err_request;
+	}
+
+	/* Perform the writes from an unprivileged "user" batch */
+	err = engine->emit_bb_start(rq, batch->node.start, 0, 0);
+
+err_request:
+	i915_request_add(rq);
+	if (i915_request_wait(rq, 0, HZ / 5) < 0)
+		err = -EIO;
+
+err_unpin:
+	i915_gem_object_unpin_map(batch->obj);
+err_batch:
+	i915_vma_unpin_and_release(&batch, 0);
+	return err;
+}
+
+struct regmask {
+	i915_reg_t reg;
+	unsigned long gen_mask;
+};
+
+static bool find_reg(struct drm_i915_private *i915,
+		     i915_reg_t reg,
+		     const struct regmask *tbl,
+		     unsigned long count)
+{
+	u32 offset = i915_mmio_reg_offset(reg);
+
+	while (count--) {
+		if (INTEL_INFO(i915)->gen_mask & tbl->gen_mask &&
+		    i915_mmio_reg_offset(tbl->reg) == offset)
+			return true;
+		tbl++;
+	}
+
+	return false;
+}
+
+static bool pardon_reg(struct drm_i915_private *i915, i915_reg_t reg)
+{
+	/* Alas, we must pardon some whitelists. Mistakes already made */
+	static const struct regmask pardon[] = {
+		{ GEN9_CTX_PREEMPT_REG, INTEL_GEN_MASK(9, 9) },
+		{ GEN8_L3SQCREG4, INTEL_GEN_MASK(9, 9) },
+	};
+
+	return find_reg(i915, reg, pardon, ARRAY_SIZE(pardon));
+}
+
+static bool result_eq(struct intel_engine_cs *engine,
+		      u32 a, u32 b, i915_reg_t reg)
+{
+	if (a != b && !pardon_reg(engine->i915, reg)) {
+		pr_err("Whitelisted register 0x%4x not context saved: A=%08x, B=%08x\n",
+		       i915_mmio_reg_offset(reg), a, b);
+		return false;
+	}
+
+	return true;
+}
+
+static bool writeonly_reg(struct drm_i915_private *i915, i915_reg_t reg)
+{
+	/* Some registers do not seem to behave and our writes unreadable */
+	static const struct regmask wo[] = {
+		{ GEN9_SLICE_COMMON_ECO_CHICKEN1, INTEL_GEN_MASK(9, 9) },
+	};
+
+	return find_reg(i915, reg, wo, ARRAY_SIZE(wo));
+}
+
+static bool result_neq(struct intel_engine_cs *engine,
+		       u32 a, u32 b, i915_reg_t reg)
+{
+	if (a == b && !writeonly_reg(engine->i915, reg)) {
+		pr_err("Whitelist register 0x%4x:%08x was unwritable\n",
+		       i915_mmio_reg_offset(reg), a);
+		return false;
+	}
+
+	return true;
+}
+
+static int
+check_whitelisted_registers(struct intel_engine_cs *engine,
+			    struct i915_vma *A,
+			    struct i915_vma *B,
+			    bool (*fn)(struct intel_engine_cs *engine,
+				       u32 a, u32 b,
+				       i915_reg_t reg))
+{
+	u32 *a, *b;
+	int i, err;
+
+	a = i915_gem_object_pin_map(A->obj, I915_MAP_WB);
+	if (IS_ERR(a))
+		return PTR_ERR(a);
+
+	b = i915_gem_object_pin_map(B->obj, I915_MAP_WB);
+	if (IS_ERR(b)) {
+		err = PTR_ERR(b);
+		goto err_a;
+	}
+
+	err = 0;
+	for (i = 0; i < engine->whitelist.count; i++) {
+		if (!fn(engine, a[i], b[i], engine->whitelist.list[i].reg))
+			err = -EINVAL;
+	}
+
+	i915_gem_object_unpin_map(B->obj);
+err_a:
+	i915_gem_object_unpin_map(A->obj);
+	return err;
+}
+
+static int live_isolated_whitelist(void *arg)
 {
+	struct drm_i915_private *i915 = arg;
+	struct {
+		struct i915_gem_context *ctx;
+		struct i915_vma *scratch[2];
+	} client[2] = {};
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
+	int i, err = 0;
+
+	/*
+	 * Check that a write into a whitelist register works, but
+	 * invisible to a second context.
+	 */
+
+	if (!intel_engines_has_context_isolation(i915))
+		return 0;
+
+	if (!i915->kernel_context->vm)
+		return 0;
+
+	for (i = 0; i < ARRAY_SIZE(client); i++) {
+		struct i915_gem_context *c;
+
+		c = kernel_context(i915);
+		if (IS_ERR(c)) {
+			err = PTR_ERR(c);
+			goto err;
+		}
+
+		client[i].scratch[0] = create_scratch(c->vm, 1024);
+		if (IS_ERR(client[i].scratch[0])) {
+			err = PTR_ERR(client[i].scratch[0]);
+			kernel_context_close(c);
+			goto err;
+		}
+
+		client[i].scratch[1] = create_scratch(c->vm, 1024);
+		if (IS_ERR(client[i].scratch[1])) {
+			err = PTR_ERR(client[i].scratch[1]);
+			i915_vma_unpin_and_release(&client[i].scratch[0], 0);
+			kernel_context_close(c);
+			goto err;
+		}
+
+		client[i].ctx = c;
+	}
+
+	for_each_engine(engine, i915, id) {
+		if (!whitelist_writable_count(engine))
+			continue;
+
+		/* Read default values */
+		err = read_whitelisted_registers(client[0].ctx, engine,
+						 client[0].scratch[0]);
+		if (err)
+			goto err;
+
+		/* Try to overwrite registers (should only affect ctx0) */
+		err = scrub_whitelisted_registers(client[0].ctx, engine);
+		if (err)
+			goto err;
+
+		/* Read values from ctx1, we expect these to be defaults */
+		err = read_whitelisted_registers(client[1].ctx, engine,
+						 client[1].scratch[0]);
+		if (err)
+			goto err;
+
+		/* Verify that both reads return the same default values */
+		err = check_whitelisted_registers(engine,
+						  client[0].scratch[0],
+						  client[1].scratch[0],
+						  result_eq);
+		if (err)
+			goto err;
+
+		/* Read back the updated values in ctx0 */
+		err = read_whitelisted_registers(client[0].ctx, engine,
+						 client[0].scratch[1]);
+		if (err)
+			goto err;
+
+		/* User should be granted privilege to overwhite regs */
+		err = check_whitelisted_registers(engine,
+						  client[0].scratch[0],
+						  client[0].scratch[1],
+						  result_neq);
+		if (err)
+			goto err;
+	}
+
+err:
+	for (i = 0; i < ARRAY_SIZE(client); i++) {
+		if (!client[i].ctx)
+			break;
+
+		i915_vma_unpin_and_release(&client[i].scratch[1], 0);
+		i915_vma_unpin_and_release(&client[i].scratch[0], 0);
+		kernel_context_close(client[i].ctx);
+	}
+
+	if (igt_flush_test(i915, I915_WAIT_LOCKED))
+		err = -EIO;
+
+	return err;
+}
+
+static bool
+verify_wa_lists(struct i915_gem_context *ctx, struct wa_lists *lists,
+		const char *str)
+{
+	struct drm_i915_private *i915 = ctx->i915;
+	struct i915_gem_engines_iter it;
+	struct intel_context *ce;
 	bool ok = true;
 
 	ok &= wa_list_verify(&i915->uncore, &lists->gt_wa_list, str);
 
-	for_each_engine(engine, i915, id)
-		ok &= wa_list_verify(engine->uncore,
-				     &lists->engine[id].wa_list, str);
+	for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) {
+		enum intel_engine_id id = ce->engine->id;
+
+		ok &= engine_wa_list_verify(ce,
+					    &lists->engine[id].wa_list,
+					    str) == 0;
+
+		ok &= engine_wa_list_verify(ce,
+					    &lists->engine[id].ctx_wa_list,
+					    str) == 0;
+	}
+	i915_gem_context_unlock_engines(ctx);
 
 	return ok;
 }
 
 static int
-live_gpu_reset_gt_engine_workarounds(void *arg)
+live_gpu_reset_workarounds(void *arg)
 {
 	struct drm_i915_private *i915 = arg;
+	struct i915_gem_context *ctx;
 	intel_wakeref_t wakeref;
 	struct wa_lists lists;
 	bool ok;
@@ -770,31 +1083,36 @@ live_gpu_reset_gt_engine_workarounds(void *arg)
 	if (!intel_has_gpu_reset(i915))
 		return 0;
 
+	ctx = kernel_context(i915);
+	if (IS_ERR(ctx))
+		return PTR_ERR(ctx);
+
 	pr_info("Verifying after GPU reset...\n");
 
 	igt_global_reset_lock(i915);
-	wakeref = intel_runtime_pm_get(i915);
+	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
 
 	reference_lists_init(i915, &lists);
 
-	ok = verify_gt_engine_wa(i915, &lists, "before reset");
+	ok = verify_wa_lists(ctx, &lists, "before reset");
 	if (!ok)
 		goto out;
 
 	i915_reset(i915, ALL_ENGINES, "live_workarounds");
 
-	ok = verify_gt_engine_wa(i915, &lists, "after reset");
+	ok = verify_wa_lists(ctx, &lists, "after reset");
 
 out:
+	kernel_context_close(ctx);
 	reference_lists_fini(i915, &lists);
-	intel_runtime_pm_put(i915, wakeref);
+	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
 	igt_global_reset_unlock(i915);
 
 	return ok ? 0 : -ESRCH;
 }
 
 static int
-live_engine_reset_gt_engine_workarounds(void *arg)
+live_engine_reset_workarounds(void *arg)
 {
 	struct drm_i915_private *i915 = arg;
 	struct intel_engine_cs *engine;
@@ -814,7 +1132,7 @@ live_engine_reset_gt_engine_workarounds(void *arg)
 		return PTR_ERR(ctx);
 
 	igt_global_reset_lock(i915);
-	wakeref = intel_runtime_pm_get(i915);
+	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
 
 	reference_lists_init(i915, &lists);
 
@@ -823,7 +1141,7 @@ live_engine_reset_gt_engine_workarounds(void *arg)
 
 		pr_info("Verifying after %s reset...\n", engine->name);
 
-		ok = verify_gt_engine_wa(i915, &lists, "before reset");
+		ok = verify_wa_lists(ctx, &lists, "before reset");
 		if (!ok) {
 			ret = -ESRCH;
 			goto err;
@@ -831,7 +1149,7 @@ live_engine_reset_gt_engine_workarounds(void *arg)
 
 		i915_reset_engine(engine, "live_workarounds");
 
-		ok = verify_gt_engine_wa(i915, &lists, "after idle reset");
+		ok = verify_wa_lists(ctx, &lists, "after idle reset");
 		if (!ok) {
 			ret = -ESRCH;
 			goto err;
@@ -862,7 +1180,7 @@ live_engine_reset_gt_engine_workarounds(void *arg)
 		igt_spinner_end(&spin);
 		igt_spinner_fini(&spin);
 
-		ok = verify_gt_engine_wa(i915, &lists, "after busy reset");
+		ok = verify_wa_lists(ctx, &lists, "after busy reset");
 		if (!ok) {
 			ret = -ESRCH;
 			goto err;
@@ -871,7 +1189,7 @@ live_engine_reset_gt_engine_workarounds(void *arg)
 
 err:
 	reference_lists_fini(i915, &lists);
-	intel_runtime_pm_put(i915, wakeref);
+	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
 	igt_global_reset_unlock(i915);
 	kernel_context_close(ctx);
 
@@ -885,8 +1203,9 @@ int intel_workarounds_live_selftests(struct drm_i915_private *i915)
 	static const struct i915_subtest tests[] = {
 		SUBTEST(live_dirty_whitelist),
 		SUBTEST(live_reset_whitelist),
-		SUBTEST(live_gpu_reset_gt_engine_workarounds),
-		SUBTEST(live_engine_reset_gt_engine_workarounds),
+		SUBTEST(live_isolated_whitelist),
+		SUBTEST(live_gpu_reset_workarounds),
+		SUBTEST(live_engine_reset_workarounds),
 	};
 	int err;
 
diff --git a/drivers/gpu/drm/i915/gvt/aperture_gm.c b/drivers/gpu/drm/i915/gvt/aperture_gm.c
index 1fa2f65c3cd1..c3d19d88da40 100644
--- a/drivers/gpu/drm/i915/gvt/aperture_gm.c
+++ b/drivers/gpu/drm/i915/gvt/aperture_gm.c
@@ -35,6 +35,7 @@
  */
 
 #include "i915_drv.h"
+#include "i915_gem_fence_reg.h"
 #include "gvt.h"
 
 static int alloc_gm(struct intel_vgpu *vgpu, bool high_gm)
@@ -128,10 +129,10 @@ void intel_vgpu_write_fence(struct intel_vgpu *vgpu,
 {
 	struct intel_gvt *gvt = vgpu->gvt;
 	struct drm_i915_private *dev_priv = gvt->dev_priv;
-	struct drm_i915_fence_reg *reg;
+	struct i915_fence_reg *reg;
 	i915_reg_t fence_reg_lo, fence_reg_hi;
 
-	assert_rpm_wakelock_held(dev_priv);
+	assert_rpm_wakelock_held(&dev_priv->runtime_pm);
 
 	if (WARN_ON(fence >= vgpu_fence_sz(vgpu)))
 		return;
@@ -163,13 +164,13 @@ static void free_vgpu_fence(struct intel_vgpu *vgpu)
 {
 	struct intel_gvt *gvt = vgpu->gvt;
 	struct drm_i915_private *dev_priv = gvt->dev_priv;
-	struct drm_i915_fence_reg *reg;
+	struct i915_fence_reg *reg;
 	u32 i;
 
 	if (WARN_ON(!vgpu_fence_sz(vgpu)))
 		return;
 
-	intel_runtime_pm_get(dev_priv);
+	intel_runtime_pm_get(&dev_priv->runtime_pm);
 
 	mutex_lock(&dev_priv->drm.struct_mutex);
 	_clear_vgpu_fence(vgpu);
@@ -180,17 +181,18 @@ static void free_vgpu_fence(struct intel_vgpu *vgpu)
 	}
 	mutex_unlock(&dev_priv->drm.struct_mutex);
 
-	intel_runtime_pm_put_unchecked(dev_priv);
+	intel_runtime_pm_put_unchecked(&dev_priv->runtime_pm);
 }
 
 static int alloc_vgpu_fence(struct intel_vgpu *vgpu)
 {
 	struct intel_gvt *gvt = vgpu->gvt;
 	struct drm_i915_private *dev_priv = gvt->dev_priv;
-	struct drm_i915_fence_reg *reg;
+	struct intel_runtime_pm *rpm = &dev_priv->runtime_pm;
+	struct i915_fence_reg *reg;
 	int i;
 
-	intel_runtime_pm_get(dev_priv);
+	intel_runtime_pm_get(rpm);
 
 	/* Request fences from host */
 	mutex_lock(&dev_priv->drm.struct_mutex);
@@ -206,7 +208,7 @@ static int alloc_vgpu_fence(struct intel_vgpu *vgpu)
 	_clear_vgpu_fence(vgpu);
 
 	mutex_unlock(&dev_priv->drm.struct_mutex);
-	intel_runtime_pm_put_unchecked(dev_priv);
+	intel_runtime_pm_put_unchecked(rpm);
 	return 0;
 out_free_fence:
 	gvt_vgpu_err("Failed to alloc fences\n");
@@ -219,7 +221,7 @@ out_free_fence:
 		vgpu->fence.regs[i] = NULL;
 	}
 	mutex_unlock(&dev_priv->drm.struct_mutex);
-	intel_runtime_pm_put_unchecked(dev_priv);
+	intel_runtime_pm_put_unchecked(rpm);
 	return -ENOSPC;
 }
 
@@ -315,9 +317,9 @@ void intel_vgpu_reset_resource(struct intel_vgpu *vgpu)
 {
 	struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
 
-	intel_runtime_pm_get(dev_priv);
+	intel_runtime_pm_get(&dev_priv->runtime_pm);
 	_clear_vgpu_fence(vgpu);
-	intel_runtime_pm_put_unchecked(dev_priv);
+	intel_runtime_pm_put_unchecked(&dev_priv->runtime_pm);
 }
 
 /**
diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c
index de5347725564..6ea88270c818 100644
--- a/drivers/gpu/drm/i915/gvt/cmd_parser.c
+++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c
@@ -1725,7 +1725,7 @@ static int perform_bb_shadow(struct parser_exec_state *s)
 	int ret = 0;
 	struct intel_vgpu_mm *mm = (s->buf_addr_type == GTT_BUFFER) ?
 		s->vgpu->gtt.ggtt_mm : s->workload->shadow_mm;
-	unsigned long gma_start_offset = 0;
+	unsigned long start_offset = 0;
 
 	/* get the start gm address of the batch buffer */
 	gma = get_gma_bb_from_cmd(s, 1);
@@ -1742,7 +1742,7 @@ static int perform_bb_shadow(struct parser_exec_state *s)
 
 	bb->ppgtt = (s->buf_addr_type == GTT_BUFFER) ? false : true;
 
-	/* the gma_start_offset stores the batch buffer's start gma's
+	/* the start_offset stores the batch buffer's start gma's
 	 * offset relative to page boundary. so for non-privileged batch
 	 * buffer, the shadowed gem object holds exactly the same page
 	 * layout as original gem object. This is for the convience of
@@ -1754,16 +1754,17 @@ static int perform_bb_shadow(struct parser_exec_state *s)
 	 * that of shadowed page.
 	 */
 	if (bb->ppgtt)
-		gma_start_offset = gma & ~I915_GTT_PAGE_MASK;
+		start_offset = gma & ~I915_GTT_PAGE_MASK;
 
-	bb->obj = i915_gem_object_create(s->vgpu->gvt->dev_priv,
-			 roundup(bb_size + gma_start_offset, PAGE_SIZE));
+	bb->obj = i915_gem_object_create_shmem(s->vgpu->gvt->dev_priv,
+					       round_up(bb_size + start_offset,
+							PAGE_SIZE));
 	if (IS_ERR(bb->obj)) {
 		ret = PTR_ERR(bb->obj);
 		goto err_free_bb;
 	}
 
-	ret = i915_gem_obj_prepare_shmem_write(bb->obj, &bb->clflush);
+	ret = i915_gem_object_prepare_write(bb->obj, &bb->clflush);
 	if (ret)
 		goto err_free_obj;
 
@@ -1780,7 +1781,7 @@ static int perform_bb_shadow(struct parser_exec_state *s)
 
 	ret = copy_gma_to_hva(s->vgpu, mm,
 			      gma, gma + bb_size,
-			      bb->va + gma_start_offset);
+			      bb->va + start_offset);
 	if (ret < 0) {
 		gvt_vgpu_err("fail to copy guest ring buffer\n");
 		ret = -EFAULT;
@@ -1806,13 +1807,13 @@ static int perform_bb_shadow(struct parser_exec_state *s)
 	 * buffer's gma in pair. After all, we don't want to pin the shadow
 	 * buffer here (too early).
 	 */
-	s->ip_va = bb->va + gma_start_offset;
+	s->ip_va = bb->va + start_offset;
 	s->ip_gma = gma;
 	return 0;
 err_unmap:
 	i915_gem_object_unpin_map(bb->obj);
 err_finish_shmem_access:
-	i915_gem_obj_finish_shmem_access(bb->obj);
+	i915_gem_object_finish_access(bb->obj);
 err_free_obj:
 	i915_gem_object_put(bb->obj);
 err_free_bb:
@@ -2829,9 +2830,9 @@ static int shadow_indirect_ctx(struct intel_shadow_wa_ctx *wa_ctx)
 	int ret = 0;
 	void *map;
 
-	obj = i915_gem_object_create(workload->vgpu->gvt->dev_priv,
-				     roundup(ctx_size + CACHELINE_BYTES,
-					     PAGE_SIZE));
+	obj = i915_gem_object_create_shmem(workload->vgpu->gvt->dev_priv,
+					   roundup(ctx_size + CACHELINE_BYTES,
+						   PAGE_SIZE));
 	if (IS_ERR(obj))
 		return PTR_ERR(obj);
 
@@ -2843,7 +2844,9 @@ static int shadow_indirect_ctx(struct intel_shadow_wa_ctx *wa_ctx)
 		goto put_obj;
 	}
 
+	i915_gem_object_lock(obj);
 	ret = i915_gem_object_set_to_cpu_domain(obj, false);
+	i915_gem_object_unlock(obj);
 	if (ret) {
 		gvt_vgpu_err("failed to set shadow indirect ctx to CPU\n");
 		goto unmap_src;
diff --git a/drivers/gpu/drm/i915/gvt/debugfs.c b/drivers/gpu/drm/i915/gvt/debugfs.c
index 8a9606f91e68..2fb7b73b260d 100644
--- a/drivers/gpu/drm/i915/gvt/debugfs.c
+++ b/drivers/gpu/drm/i915/gvt/debugfs.c
@@ -58,12 +58,12 @@ static int mmio_offset_compare(void *priv,
 static inline int mmio_diff_handler(struct intel_gvt *gvt,
 				    u32 offset, void *data)
 {
-	struct drm_i915_private *dev_priv = gvt->dev_priv;
+	struct drm_i915_private *i915 = gvt->dev_priv;
 	struct mmio_diff_param *param = data;
 	struct diff_mmio *node;
 	u32 preg, vreg;
 
-	preg = I915_READ_NOTRACE(_MMIO(offset));
+	preg = intel_uncore_read_notrace(&i915->uncore, _MMIO(offset));
 	vreg = vgpu_vreg(param->vgpu, offset);
 
 	if (preg != vreg) {
diff --git a/drivers/gpu/drm/i915/gvt/firmware.c b/drivers/gpu/drm/i915/gvt/firmware.c
index 4ac18b447247..049775e8e350 100644
--- a/drivers/gpu/drm/i915/gvt/firmware.c
+++ b/drivers/gpu/drm/i915/gvt/firmware.c
@@ -68,9 +68,10 @@ static struct bin_attribute firmware_attr = {
 
 static int mmio_snapshot_handler(struct intel_gvt *gvt, u32 offset, void *data)
 {
-	struct drm_i915_private *dev_priv = gvt->dev_priv;
+	struct drm_i915_private *i915 = gvt->dev_priv;
 
-	*(u32 *)(data + offset) = I915_READ_NOTRACE(_MMIO(offset));
+	*(u32 *)(data + offset) = intel_uncore_read_notrace(&i915->uncore,
+							    _MMIO(offset));
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h
index f5a328b5290a..7a1fe44d45af 100644
--- a/drivers/gpu/drm/i915/gvt/gvt.h
+++ b/drivers/gpu/drm/i915/gvt/gvt.h
@@ -87,7 +87,7 @@ struct intel_vgpu_gm {
 
 /* Fences owned by a vGPU */
 struct intel_vgpu_fence {
-	struct drm_i915_fence_reg *regs[INTEL_GVT_MAX_NUM_FENCES];
+	struct i915_fence_reg *regs[INTEL_GVT_MAX_NUM_FENCES];
 	u32 base;
 	u32 size;
 };
@@ -149,9 +149,9 @@ struct intel_vgpu_submission_ops {
 struct intel_vgpu_submission {
 	struct intel_vgpu_execlist execlist[I915_NUM_ENGINES];
 	struct list_head workload_q_head[I915_NUM_ENGINES];
+	struct intel_context *shadow[I915_NUM_ENGINES];
 	struct kmem_cache *workloads;
 	atomic_t running_workload_num;
-	struct i915_gem_context *shadow_ctx;
 	union {
 		u64 i915_context_pml4;
 		u64 i915_context_pdps[GEN8_3LVL_PDPES];
@@ -390,7 +390,7 @@ int intel_gvt_load_firmware(struct intel_gvt *gvt);
 #define gvt_hidden_gmadr_end(gvt) (gvt_hidden_gmadr_base(gvt) \
 				   + gvt_hidden_sz(gvt) - 1)
 
-#define gvt_fence_sz(gvt) (gvt->dev_priv->num_fence_regs)
+#define gvt_fence_sz(gvt) ((gvt)->dev_priv->ggtt.num_fences)
 
 /* Aperture/GM space definitions for vGPU */
 #define vgpu_aperture_offset(vgpu)	((vgpu)->gm.low_gm_node.start)
@@ -584,12 +584,12 @@ enum {
 
 static inline void mmio_hw_access_pre(struct drm_i915_private *dev_priv)
 {
-	intel_runtime_pm_get(dev_priv);
+	intel_runtime_pm_get(&dev_priv->runtime_pm);
 }
 
 static inline void mmio_hw_access_post(struct drm_i915_private *dev_priv)
 {
-	intel_runtime_pm_put_unchecked(dev_priv);
+	intel_runtime_pm_put_unchecked(&dev_priv->runtime_pm);
 }
 
 /**
diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c
index a68addf95c23..144301b778df 100644
--- a/drivers/gpu/drm/i915/gvt/kvmgt.c
+++ b/drivers/gpu/drm/i915/gvt/kvmgt.c
@@ -1576,7 +1576,7 @@ hw_id_show(struct device *dev, struct device_attribute *attr,
 		struct intel_vgpu *vgpu = (struct intel_vgpu *)
 			mdev_get_drvdata(mdev);
 		return sprintf(buf, "%u\n",
-			       vgpu->submission.shadow_ctx->hw_id);
+			       vgpu->submission.shadow[0]->gem_context->hw_id);
 	}
 	return sprintf(buf, "\n");
 }
diff --git a/drivers/gpu/drm/i915/gvt/mmio_context.c b/drivers/gpu/drm/i915/gvt/mmio_context.c
index 90bb3df0db50..2998999e8568 100644
--- a/drivers/gpu/drm/i915/gvt/mmio_context.c
+++ b/drivers/gpu/drm/i915/gvt/mmio_context.c
@@ -34,6 +34,7 @@
  */
 
 #include "i915_drv.h"
+#include "gt/intel_context.h"
 #include "gvt.h"
 #include "trace.h"
 
@@ -493,8 +494,7 @@ static void switch_mmio(struct intel_vgpu *pre,
 			 * itself.
 			 */
 			if (mmio->in_context &&
-			    !is_inhibit_context(intel_context_lookup(s->shadow_ctx,
-								     dev_priv->engine[ring_id])))
+			    !is_inhibit_context(s->shadow[ring_id]))
 				continue;
 
 			if (mmio->mask)
diff --git a/drivers/gpu/drm/i915/gvt/opregion.c b/drivers/gpu/drm/i915/gvt/opregion.c
index 276db53f1bf1..867e7629025b 100644
--- a/drivers/gpu/drm/i915/gvt/opregion.c
+++ b/drivers/gpu/drm/i915/gvt/opregion.c
@@ -30,7 +30,7 @@
  * not do like this.
  */
 #define _INTEL_BIOS_PRIVATE
-#include "intel_vbt_defs.h"
+#include "display/intel_vbt_defs.h"
 
 #define OPREGION_SIGNATURE "IntelGraphicsMem"
 #define MBOX_VBT      (1<<3)
diff --git a/drivers/gpu/drm/i915/gvt/sched_policy.c b/drivers/gpu/drm/i915/gvt/sched_policy.c
index 1c763a27a412..2369d4a9af94 100644
--- a/drivers/gpu/drm/i915/gvt/sched_policy.c
+++ b/drivers/gpu/drm/i915/gvt/sched_policy.c
@@ -465,7 +465,7 @@ void intel_vgpu_stop_schedule(struct intel_vgpu *vgpu)
 		scheduler->current_vgpu = NULL;
 	}
 
-	intel_runtime_pm_get(dev_priv);
+	intel_runtime_pm_get(&dev_priv->runtime_pm);
 	spin_lock_bh(&scheduler->mmio_context_lock);
 	for (ring_id = 0; ring_id < I915_NUM_ENGINES; ring_id++) {
 		if (scheduler->engine_owner[ring_id] == vgpu) {
@@ -474,6 +474,6 @@ void intel_vgpu_stop_schedule(struct intel_vgpu *vgpu)
 		}
 	}
 	spin_unlock_bh(&scheduler->mmio_context_lock);
-	intel_runtime_pm_put_unchecked(dev_priv);
+	intel_runtime_pm_put_unchecked(&dev_priv->runtime_pm);
 	mutex_unlock(&vgpu->gvt->sched_lock);
 }
diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
index 0f919f0a43d4..2144fb46d0e1 100644
--- a/drivers/gpu/drm/i915/gvt/scheduler.c
+++ b/drivers/gpu/drm/i915/gvt/scheduler.c
@@ -35,6 +35,10 @@
 
 #include <linux/kthread.h>
 
+#include "gem/i915_gem_context.h"
+#include "gem/i915_gem_pm.h"
+#include "gt/intel_context.h"
+
 #include "i915_drv.h"
 #include "gvt.h"
 
@@ -277,18 +281,23 @@ static int shadow_context_status_change(struct notifier_block *nb,
 	return NOTIFY_OK;
 }
 
-static void shadow_context_descriptor_update(struct intel_context *ce)
+static void
+shadow_context_descriptor_update(struct intel_context *ce,
+				 struct intel_vgpu_workload *workload)
 {
-	u64 desc = 0;
+	u64 desc = ce->lrc_desc;
 
-	desc = ce->lrc_desc;
-
-	/* Update bits 0-11 of the context descriptor which includes flags
+	/*
+	 * Update bits 0-11 of the context descriptor which includes flags
 	 * like GEN8_CTX_* cached in desc_template
 	 */
 	desc &= U64_MAX << 12;
 	desc |= ce->gem_context->desc_template & ((1ULL << 12) - 1);
 
+	desc &= ~(0x3 << GEN8_CTX_ADDRESSING_MODE_SHIFT);
+	desc |= workload->ctx_desc.addressing_mode <<
+		GEN8_CTX_ADDRESSING_MODE_SHIFT;
+
 	ce->lrc_desc = desc;
 }
 
@@ -359,18 +368,20 @@ static int set_context_ppgtt_from_shadow(struct intel_vgpu_workload *workload,
 					 struct i915_gem_context *ctx)
 {
 	struct intel_vgpu_mm *mm = workload->shadow_mm;
-	struct i915_hw_ppgtt *ppgtt = ctx->ppgtt;
+	struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(ctx->vm);
 	int i = 0;
 
 	if (mm->type != INTEL_GVT_MM_PPGTT || !mm->ppgtt_mm.shadowed)
 		return -EINVAL;
 
 	if (mm->ppgtt_mm.root_entry_type == GTT_TYPE_PPGTT_ROOT_L4_ENTRY) {
-		px_dma(&ppgtt->pml4) = mm->ppgtt_mm.shadow_pdps[0];
+		px_dma(ppgtt->pd) = mm->ppgtt_mm.shadow_pdps[0];
 	} else {
 		for (i = 0; i < GVT_RING_CTX_NR_PDPS; i++) {
-			px_dma(ppgtt->pdp.page_directory[i]) =
-				mm->ppgtt_mm.shadow_pdps[i];
+			struct i915_page_directory * const pd =
+				i915_pd_entry(ppgtt->pd, i);
+
+			px_dma(pd) = mm->ppgtt_mm.shadow_pdps[i];
 		}
 	}
 
@@ -382,26 +393,22 @@ intel_gvt_workload_req_alloc(struct intel_vgpu_workload *workload)
 {
 	struct intel_vgpu *vgpu = workload->vgpu;
 	struct intel_vgpu_submission *s = &vgpu->submission;
-	struct i915_gem_context *shadow_ctx = s->shadow_ctx;
 	struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
-	struct intel_engine_cs *engine = dev_priv->engine[workload->ring_id];
 	struct i915_request *rq;
-	int ret = 0;
 
 	lockdep_assert_held(&dev_priv->drm.struct_mutex);
 
 	if (workload->req)
-		goto out;
+		return 0;
 
-	rq = i915_request_alloc(engine, shadow_ctx);
+	rq = i915_request_create(s->shadow[workload->ring_id]);
 	if (IS_ERR(rq)) {
 		gvt_vgpu_err("fail to allocate gem request\n");
-		ret = PTR_ERR(rq);
-		goto out;
+		return PTR_ERR(rq);
 	}
+
 	workload->req = i915_request_get(rq);
-out:
-	return ret;
+	return 0;
 }
 
 /**
@@ -416,10 +423,7 @@ int intel_gvt_scan_and_shadow_workload(struct intel_vgpu_workload *workload)
 {
 	struct intel_vgpu *vgpu = workload->vgpu;
 	struct intel_vgpu_submission *s = &vgpu->submission;
-	struct i915_gem_context *shadow_ctx = s->shadow_ctx;
 	struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
-	struct intel_engine_cs *engine = dev_priv->engine[workload->ring_id];
-	struct intel_context *ce;
 	int ret;
 
 	lockdep_assert_held(&dev_priv->drm.struct_mutex);
@@ -427,29 +431,13 @@ int intel_gvt_scan_and_shadow_workload(struct intel_vgpu_workload *workload)
 	if (workload->shadow)
 		return 0;
 
-	/* pin shadow context by gvt even the shadow context will be pinned
-	 * when i915 alloc request. That is because gvt will update the guest
-	 * context from shadow context when workload is completed, and at that
-	 * moment, i915 may already unpined the shadow context to make the
-	 * shadow_ctx pages invalid. So gvt need to pin itself. After update
-	 * the guest context, gvt can unpin the shadow_ctx safely.
-	 */
-	ce = intel_context_pin(shadow_ctx, engine);
-	if (IS_ERR(ce)) {
-		gvt_vgpu_err("fail to pin shadow context\n");
-		return PTR_ERR(ce);
-	}
-
-	shadow_ctx->desc_template &= ~(0x3 << GEN8_CTX_ADDRESSING_MODE_SHIFT);
-	shadow_ctx->desc_template |= workload->ctx_desc.addressing_mode <<
-				    GEN8_CTX_ADDRESSING_MODE_SHIFT;
-
 	if (!test_and_set_bit(workload->ring_id, s->shadow_ctx_desc_updated))
-		shadow_context_descriptor_update(ce);
+		shadow_context_descriptor_update(s->shadow[workload->ring_id],
+						 workload);
 
 	ret = intel_gvt_scan_and_shadow_ringbuffer(workload);
 	if (ret)
-		goto err_unpin;
+		return ret;
 
 	if (workload->ring_id == RCS0 && workload->wa_ctx.indirect_ctx.size) {
 		ret = intel_gvt_scan_and_shadow_wa_ctx(&workload->wa_ctx);
@@ -461,8 +449,6 @@ int intel_gvt_scan_and_shadow_workload(struct intel_vgpu_workload *workload)
 	return 0;
 err_shadow:
 	release_shadow_wa_ctx(&workload->wa_ctx);
-err_unpin:
-	intel_context_unpin(ce);
 	return ret;
 }
 
@@ -501,7 +487,7 @@ static int prepare_shadow_batch_buffer(struct intel_vgpu_workload *workload)
 						bb->obj->base.size);
 				bb->clflush &= ~CLFLUSH_AFTER;
 			}
-			i915_gem_obj_finish_shmem_access(bb->obj);
+			i915_gem_object_finish_access(bb->obj);
 			bb->accessing = false;
 
 		} else {
@@ -525,18 +511,18 @@ static int prepare_shadow_batch_buffer(struct intel_vgpu_workload *workload)
 			}
 
 			ret = i915_gem_object_set_to_gtt_domain(bb->obj,
-					false);
+								false);
 			if (ret)
 				goto err;
 
-			i915_gem_obj_finish_shmem_access(bb->obj);
-			bb->accessing = false;
-
 			ret = i915_vma_move_to_active(bb->vma,
 						      workload->req,
 						      0);
 			if (ret)
 				goto err;
+
+			i915_gem_object_finish_access(bb->obj);
+			bb->accessing = false;
 		}
 	}
 	return 0;
@@ -607,7 +593,7 @@ static void release_shadow_batch_buffer(struct intel_vgpu_workload *workload)
 	list_for_each_entry_safe(bb, pos, &workload->shadow_bb, list) {
 		if (bb->obj) {
 			if (bb->accessing)
-				i915_gem_obj_finish_shmem_access(bb->obj);
+				i915_gem_object_finish_access(bb->obj);
 
 			if (bb->va && !IS_ERR(bb->va))
 				i915_gem_object_unpin_map(bb->obj);
@@ -616,7 +602,7 @@ static void release_shadow_batch_buffer(struct intel_vgpu_workload *workload)
 				i915_vma_unpin(bb->vma);
 				i915_vma_close(bb->vma);
 			}
-			__i915_gem_object_release_unless_active(bb->obj);
+			i915_gem_object_put(bb->obj);
 		}
 		list_del(&bb->list);
 		kfree(bb);
@@ -689,7 +675,6 @@ static int dispatch_workload(struct intel_vgpu_workload *workload)
 	struct intel_vgpu *vgpu = workload->vgpu;
 	struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
 	struct intel_vgpu_submission *s = &vgpu->submission;
-	struct i915_gem_context *shadow_ctx = s->shadow_ctx;
 	struct i915_request *rq;
 	int ring_id = workload->ring_id;
 	int ret;
@@ -700,7 +685,8 @@ static int dispatch_workload(struct intel_vgpu_workload *workload)
 	mutex_lock(&vgpu->vgpu_lock);
 	mutex_lock(&dev_priv->drm.struct_mutex);
 
-	ret = set_context_ppgtt_from_shadow(workload, shadow_ctx);
+	ret = set_context_ppgtt_from_shadow(workload,
+					    s->shadow[ring_id]->gem_context);
 	if (ret < 0) {
 		gvt_vgpu_err("workload shadow ppgtt isn't ready\n");
 		goto err_req;
@@ -949,11 +935,6 @@ static void complete_current_workload(struct intel_gvt *gvt, int ring_id)
 				intel_vgpu_trigger_virtual_event(vgpu, event);
 		}
 
-		/* unpin shadow ctx as the shadow_ctx update is done */
-		mutex_lock(&rq->i915->drm.struct_mutex);
-		intel_context_unpin(rq->hw_context);
-		mutex_unlock(&rq->i915->drm.struct_mutex);
-
 		i915_request_put(fetch_and_zero(&workload->req));
 	}
 
@@ -1032,8 +1013,6 @@ static int workload_thread(void *priv)
 				workload->ring_id, workload,
 				workload->vgpu->id);
 
-		intel_runtime_pm_get(gvt->dev_priv);
-
 		gvt_dbg_sched("ring id %d will dispatch workload %p\n",
 				workload->ring_id, workload);
 
@@ -1063,7 +1042,6 @@ complete:
 			intel_uncore_forcewake_put(&gvt->dev_priv->uncore,
 					FORCEWAKE_ALL);
 
-		intel_runtime_pm_put_unchecked(gvt->dev_priv);
 		if (ret && (vgpu_is_vm_unhealthy(ret)))
 			enter_failsafe_mode(vgpu, GVT_FAILSAFE_GUEST_ERR);
 	}
@@ -1146,17 +1124,20 @@ err:
 }
 
 static void
-i915_context_ppgtt_root_restore(struct intel_vgpu_submission *s)
+i915_context_ppgtt_root_restore(struct intel_vgpu_submission *s,
+				struct i915_ppgtt *ppgtt)
 {
-	struct i915_hw_ppgtt *i915_ppgtt = s->shadow_ctx->ppgtt;
 	int i;
 
-	if (i915_vm_is_4lvl(&i915_ppgtt->vm)) {
-		px_dma(&i915_ppgtt->pml4) = s->i915_context_pml4;
+	if (i915_vm_is_4lvl(&ppgtt->vm)) {
+		px_dma(ppgtt->pd) = s->i915_context_pml4;
 	} else {
-		for (i = 0; i < GEN8_3LVL_PDPES; i++)
-			px_dma(i915_ppgtt->pdp.page_directory[i]) =
-						s->i915_context_pdps[i];
+		for (i = 0; i < GEN8_3LVL_PDPES; i++) {
+			struct i915_page_directory * const pd =
+				i915_pd_entry(ppgtt->pd, i);
+
+			px_dma(pd) = s->i915_context_pdps[i];
+		}
 	}
 }
 
@@ -1170,10 +1151,15 @@ i915_context_ppgtt_root_restore(struct intel_vgpu_submission *s)
 void intel_vgpu_clean_submission(struct intel_vgpu *vgpu)
 {
 	struct intel_vgpu_submission *s = &vgpu->submission;
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
 
 	intel_vgpu_select_submission_ops(vgpu, ALL_ENGINES, 0);
-	i915_context_ppgtt_root_restore(s);
-	i915_gem_context_put(s->shadow_ctx);
+
+	i915_context_ppgtt_root_restore(s, i915_vm_to_ppgtt(s->shadow[0]->gem_context->vm));
+	for_each_engine(engine, vgpu->gvt->dev_priv, id)
+		intel_context_unpin(s->shadow[id]);
+
 	kmem_cache_destroy(s->workloads);
 }
 
@@ -1199,17 +1185,20 @@ void intel_vgpu_reset_submission(struct intel_vgpu *vgpu,
 }
 
 static void
-i915_context_ppgtt_root_save(struct intel_vgpu_submission *s)
+i915_context_ppgtt_root_save(struct intel_vgpu_submission *s,
+			     struct i915_ppgtt *ppgtt)
 {
-	struct i915_hw_ppgtt *i915_ppgtt = s->shadow_ctx->ppgtt;
 	int i;
 
-	if (i915_vm_is_4lvl(&i915_ppgtt->vm))
-		s->i915_context_pml4 = px_dma(&i915_ppgtt->pml4);
-	else {
-		for (i = 0; i < GEN8_3LVL_PDPES; i++)
-			s->i915_context_pdps[i] =
-				px_dma(i915_ppgtt->pdp.page_directory[i]);
+	if (i915_vm_is_4lvl(&ppgtt->vm)) {
+		s->i915_context_pml4 = px_dma(ppgtt->pd);
+	} else {
+		for (i = 0; i < GEN8_3LVL_PDPES; i++) {
+			struct i915_page_directory * const pd =
+				i915_pd_entry(ppgtt->pd, i);
+
+			s->i915_context_pdps[i] = px_dma(pd);
+		}
 	}
 }
 
@@ -1226,16 +1215,36 @@ i915_context_ppgtt_root_save(struct intel_vgpu_submission *s)
 int intel_vgpu_setup_submission(struct intel_vgpu *vgpu)
 {
 	struct intel_vgpu_submission *s = &vgpu->submission;
-	enum intel_engine_id i;
 	struct intel_engine_cs *engine;
+	struct i915_gem_context *ctx;
+	enum intel_engine_id i;
 	int ret;
 
-	s->shadow_ctx = i915_gem_context_create_gvt(
-			&vgpu->gvt->dev_priv->drm);
-	if (IS_ERR(s->shadow_ctx))
-		return PTR_ERR(s->shadow_ctx);
+	ctx = i915_gem_context_create_gvt(&vgpu->gvt->dev_priv->drm);
+	if (IS_ERR(ctx))
+		return PTR_ERR(ctx);
+
+	i915_context_ppgtt_root_save(s, i915_vm_to_ppgtt(ctx->vm));
+
+	for_each_engine(engine, vgpu->gvt->dev_priv, i) {
+		struct intel_context *ce;
 
-	i915_context_ppgtt_root_save(s);
+		INIT_LIST_HEAD(&s->workload_q_head[i]);
+		s->shadow[i] = ERR_PTR(-EINVAL);
+
+		ce = i915_gem_context_get_engine(ctx, i);
+		if (IS_ERR(ce)) {
+			ret = PTR_ERR(ce);
+			goto out_shadow_ctx;
+		}
+
+		ret = intel_context_pin(ce);
+		intel_context_put(ce);
+		if (ret)
+			goto out_shadow_ctx;
+
+		s->shadow[i] = ce;
+	}
 
 	bitmap_zero(s->shadow_ctx_desc_updated, I915_NUM_ENGINES);
 
@@ -1251,16 +1260,21 @@ int intel_vgpu_setup_submission(struct intel_vgpu *vgpu)
 		goto out_shadow_ctx;
 	}
 
-	for_each_engine(engine, vgpu->gvt->dev_priv, i)
-		INIT_LIST_HEAD(&s->workload_q_head[i]);
-
 	atomic_set(&s->running_workload_num, 0);
 	bitmap_zero(s->tlb_handle_pending, I915_NUM_ENGINES);
 
+	i915_gem_context_put(ctx);
 	return 0;
 
 out_shadow_ctx:
-	i915_gem_context_put(s->shadow_ctx);
+	i915_context_ppgtt_root_restore(s, i915_vm_to_ppgtt(ctx->vm));
+	for_each_engine(engine, vgpu->gvt->dev_priv, i) {
+		if (IS_ERR(s->shadow[i]))
+			break;
+
+		intel_context_unpin(s->shadow[i]);
+	}
+	i915_gem_context_put(ctx);
 	return ret;
 }
 
@@ -1520,11 +1534,11 @@ intel_vgpu_create_workload(struct intel_vgpu *vgpu, int ring_id,
 	 * as there is only one pre-allocated buf-obj for shadow.
 	 */
 	if (list_empty(workload_q_head(vgpu, ring_id))) {
-		intel_runtime_pm_get(dev_priv);
+		intel_runtime_pm_get(&dev_priv->runtime_pm);
 		mutex_lock(&dev_priv->drm.struct_mutex);
 		ret = intel_gvt_scan_and_shadow_workload(workload);
 		mutex_unlock(&dev_priv->drm.struct_mutex);
-		intel_runtime_pm_put_unchecked(dev_priv);
+		intel_runtime_pm_put_unchecked(&dev_priv->runtime_pm);
 	}
 
 	if (ret) {
diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c
index 863ae12707ba..293e5bcc4b6c 100644
--- a/drivers/gpu/drm/i915/i915_active.c
+++ b/drivers/gpu/drm/i915/i915_active.c
@@ -4,6 +4,8 @@
  * Copyright © 2019 Intel Corporation
  */
 
+#include "gt/intel_engine_pm.h"
+
 #include "i915_drv.h"
 #include "i915_active.h"
 #include "i915_globals.h"
@@ -157,6 +159,7 @@ void i915_active_init(struct drm_i915_private *i915,
 	ref->retire = retire;
 	ref->tree = RB_ROOT;
 	i915_active_request_init(&ref->last, NULL, last_retire);
+	init_llist_head(&ref->barriers);
 	ref->count = 0;
 }
 
@@ -263,6 +266,99 @@ void i915_active_fini(struct i915_active *ref)
 }
 #endif
 
+int i915_active_acquire_preallocate_barrier(struct i915_active *ref,
+					    struct intel_engine_cs *engine)
+{
+	struct drm_i915_private *i915 = engine->i915;
+	struct llist_node *pos, *next;
+	unsigned long tmp;
+	int err;
+
+	GEM_BUG_ON(!engine->mask);
+	for_each_engine_masked(engine, i915, engine->mask, tmp) {
+		struct intel_context *kctx = engine->kernel_context;
+		struct active_node *node;
+
+		node = kmem_cache_alloc(global.slab_cache, GFP_KERNEL);
+		if (unlikely(!node)) {
+			err = -ENOMEM;
+			goto unwind;
+		}
+
+		i915_active_request_init(&node->base,
+					 (void *)engine, node_retire);
+		node->timeline = kctx->ring->timeline->fence_context;
+		node->ref = ref;
+		ref->count++;
+
+		intel_engine_pm_get(engine);
+		llist_add((struct llist_node *)&node->base.link,
+			  &ref->barriers);
+	}
+
+	return 0;
+
+unwind:
+	llist_for_each_safe(pos, next, llist_del_all(&ref->barriers)) {
+		struct active_node *node;
+
+		node = container_of((struct list_head *)pos,
+				    typeof(*node), base.link);
+		engine = (void *)rcu_access_pointer(node->base.request);
+
+		intel_engine_pm_put(engine);
+		kmem_cache_free(global.slab_cache, node);
+	}
+	return err;
+}
+
+void i915_active_acquire_barrier(struct i915_active *ref)
+{
+	struct llist_node *pos, *next;
+
+	i915_active_acquire(ref);
+
+	llist_for_each_safe(pos, next, llist_del_all(&ref->barriers)) {
+		struct intel_engine_cs *engine;
+		struct active_node *node;
+		struct rb_node **p, *parent;
+
+		node = container_of((struct list_head *)pos,
+				    typeof(*node), base.link);
+
+		engine = (void *)rcu_access_pointer(node->base.request);
+		RCU_INIT_POINTER(node->base.request, ERR_PTR(-EAGAIN));
+
+		parent = NULL;
+		p = &ref->tree.rb_node;
+		while (*p) {
+			parent = *p;
+			if (rb_entry(parent,
+				     struct active_node,
+				     node)->timeline < node->timeline)
+				p = &parent->rb_right;
+			else
+				p = &parent->rb_left;
+		}
+		rb_link_node(&node->node, parent, p);
+		rb_insert_color(&node->node, &ref->tree);
+
+		llist_add((struct llist_node *)&node->base.link,
+			  &engine->barrier_tasks);
+		intel_engine_pm_put(engine);
+	}
+	i915_active_release(ref);
+}
+
+void i915_request_add_barriers(struct i915_request *rq)
+{
+	struct intel_engine_cs *engine = rq->engine;
+	struct llist_node *node, *next;
+
+	llist_for_each_safe(node, next, llist_del_all(&engine->barrier_tasks))
+		list_add_tail((struct list_head *)node, &rq->active_list);
+}
+
 int i915_active_request_set(struct i915_active_request *active,
 			    struct i915_request *rq)
 {
diff --git a/drivers/gpu/drm/i915/i915_active.h b/drivers/gpu/drm/i915/i915_active.h
index 7d758719ce39..c14eebf6d074 100644
--- a/drivers/gpu/drm/i915/i915_active.h
+++ b/drivers/gpu/drm/i915/i915_active.h
@@ -330,7 +330,7 @@ i915_active_request_retire(struct i915_active_request *active,
 		return 0;
 
 	ret = i915_request_wait(request,
-				I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED,
+				I915_WAIT_INTERRUPTIBLE,
 				MAX_SCHEDULE_TIMEOUT);
 	if (ret < 0)
 		return ret;
@@ -406,4 +406,9 @@ void i915_active_fini(struct i915_active *ref);
 static inline void i915_active_fini(struct i915_active *ref) { }
 #endif
 
+int i915_active_acquire_preallocate_barrier(struct i915_active *ref,
+					    struct intel_engine_cs *engine);
+void i915_active_acquire_barrier(struct i915_active *ref);
+void i915_request_add_barriers(struct i915_request *rq);
+
 #endif /* _I915_ACTIVE_H_ */
diff --git a/drivers/gpu/drm/i915/i915_active_types.h b/drivers/gpu/drm/i915/i915_active_types.h
index b679253b53a5..c025991b9233 100644
--- a/drivers/gpu/drm/i915/i915_active_types.h
+++ b/drivers/gpu/drm/i915/i915_active_types.h
@@ -7,6 +7,7 @@
 #ifndef _I915_ACTIVE_TYPES_H_
 #define _I915_ACTIVE_TYPES_H_
 
+#include <linux/llist.h>
 #include <linux/rbtree.h>
 #include <linux/rcupdate.h>
 
@@ -31,6 +32,8 @@ struct i915_active {
 	unsigned int count;
 
 	void (*retire)(struct i915_active *ref);
+
+	struct llist_head barriers;
 };
 
 #endif /* _I915_ACTIVE_TYPES_H_ */
diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c
index 503d548a55f7..a28bcd2d7c09 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -25,8 +25,9 @@
  *
  */
 
+#include "gt/intel_engine.h"
+
 #include "i915_drv.h"
-#include "intel_ringbuffer.h"
 
 /**
  * DOC: batch buffer command parser
@@ -1057,19 +1058,20 @@ static u32 *copy_batch(struct drm_i915_gem_object *dst_obj,
 	void *dst, *src;
 	int ret;
 
-	ret = i915_gem_obj_prepare_shmem_read(src_obj, &src_needs_clflush);
+	ret = i915_gem_object_prepare_write(dst_obj, &dst_needs_clflush);
 	if (ret)
 		return ERR_PTR(ret);
 
-	ret = i915_gem_obj_prepare_shmem_write(dst_obj, &dst_needs_clflush);
-	if (ret) {
-		dst = ERR_PTR(ret);
-		goto unpin_src;
-	}
-
 	dst = i915_gem_object_pin_map(dst_obj, I915_MAP_FORCE_WB);
+	i915_gem_object_finish_access(dst_obj);
 	if (IS_ERR(dst))
-		goto unpin_dst;
+		return dst;
+
+	ret = i915_gem_object_prepare_read(src_obj, &src_needs_clflush);
+	if (ret) {
+		i915_gem_object_unpin_map(dst_obj);
+		return ERR_PTR(ret);
+	}
 
 	src = ERR_PTR(-ENODEV);
 	if (src_needs_clflush &&
@@ -1115,13 +1117,11 @@ static u32 *copy_batch(struct drm_i915_gem_object *dst_obj,
 		}
 	}
 
+	i915_gem_object_finish_access(src_obj);
+
 	/* dst_obj is returned with vmap pinned */
 	*needs_clflush_after = dst_needs_clflush & CLFLUSH_AFTER;
 
-unpin_dst:
-	i915_gem_obj_finish_shmem_access(dst_obj);
-unpin_src:
-	i915_gem_obj_finish_shmem_access(src_obj);
 	return dst;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 5823ffb17821..62cf34db9280 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -32,15 +32,22 @@
 #include <drm/drm_debugfs.h>
 #include <drm/drm_fourcc.h>
 
-#include "i915_reset.h"
-#include "intel_dp.h"
+#include "display/intel_dp.h"
+#include "display/intel_fbc.h"
+#include "display/intel_hdcp.h"
+#include "display/intel_hdmi.h"
+#include "display/intel_psr.h"
+
+#include "gem/i915_gem_context.h"
+#include "gt/intel_reset.h"
+
+#include "i915_debugfs.h"
+#include "i915_irq.h"
+#include "intel_csr.h"
 #include "intel_drv.h"
-#include "intel_fbc.h"
 #include "intel_guc_submission.h"
-#include "intel_hdcp.h"
-#include "intel_hdmi.h"
 #include "intel_pm.h"
-#include "intel_psr.h"
+#include "intel_sideband.h"
 
 static inline struct drm_i915_private *node_to_i915(struct drm_info_node *node)
 {
@@ -98,19 +105,6 @@ static char get_pin_mapped_flag(struct drm_i915_gem_object *obj)
 	return obj->mm.mapping ? 'M' : ' ';
 }
 
-static u64 i915_gem_obj_total_ggtt_size(struct drm_i915_gem_object *obj)
-{
-	u64 size = 0;
-	struct i915_vma *vma;
-
-	for_each_ggtt_vma(vma, obj) {
-		if (drm_mm_node_allocated(&vma->node))
-			size += vma->node.size;
-	}
-
-	return size;
-}
-
 static const char *
 stringify_page_sizes(unsigned int page_sizes, char *buf, size_t len)
 {
@@ -150,8 +144,6 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
 	unsigned int frontbuffer_bits;
 	int pin_count = 0;
 
-	lockdep_assert_held(&obj->base.dev->struct_mutex);
-
 	seq_printf(m, "%pK: %c%c%c%c%c %8zdKiB %02x %02x %s%s%s",
 		   &obj->base,
 		   get_active_flag(obj),
@@ -167,17 +159,17 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
 		   obj->mm.madv == I915_MADV_DONTNEED ? " purgeable" : "");
 	if (obj->base.name)
 		seq_printf(m, " (name: %d)", obj->base.name);
-	list_for_each_entry(vma, &obj->vma.list, obj_link) {
-		if (i915_vma_is_pinned(vma))
-			pin_count++;
-	}
-	seq_printf(m, " (pinned x %d)", pin_count);
-	if (obj->pin_global)
-		seq_printf(m, " (global)");
+
+	spin_lock(&obj->vma.lock);
 	list_for_each_entry(vma, &obj->vma.list, obj_link) {
 		if (!drm_mm_node_allocated(&vma->node))
 			continue;
 
+		spin_unlock(&obj->vma.lock);
+
+		if (i915_vma_is_pinned(vma))
+			pin_count++;
+
 		seq_printf(m, " (%sgtt offset: %08llx, size: %08llx, pages: %s",
 			   i915_vma_is_ggtt(vma) ? "g" : "pp",
 			   vma->node.start, vma->node.size,
@@ -206,6 +198,18 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
 					   vma->ggtt_view.rotated.plane[1].offset);
 				break;
 
+			case I915_GGTT_VIEW_REMAPPED:
+				seq_printf(m, ", remapped [(%ux%u, stride=%u, offset=%u), (%ux%u, stride=%u, offset=%u)]",
+					   vma->ggtt_view.remapped.plane[0].width,
+					   vma->ggtt_view.remapped.plane[0].height,
+					   vma->ggtt_view.remapped.plane[0].stride,
+					   vma->ggtt_view.remapped.plane[0].offset,
+					   vma->ggtt_view.remapped.plane[1].width,
+					   vma->ggtt_view.remapped.plane[1].height,
+					   vma->ggtt_view.remapped.plane[1].stride,
+					   vma->ggtt_view.remapped.plane[1].offset);
+				break;
+
 			default:
 				MISSING_CASE(vma->ggtt_view.type);
 				break;
@@ -216,9 +220,16 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
 				   vma->fence->id,
 				   i915_active_request_isset(&vma->last_fence) ? "*" : "");
 		seq_puts(m, ")");
+
+		spin_lock(&obj->vma.lock);
 	}
+	spin_unlock(&obj->vma.lock);
+
+	seq_printf(m, " (pinned x %d)", pin_count);
 	if (obj->stolen)
 		seq_printf(m, " (stolen: %08llx)", obj->stolen->start);
+	if (obj->pin_global)
+		seq_printf(m, " (global)");
 
 	engine = i915_gem_object_last_write_engine(obj);
 	if (engine)
@@ -229,83 +240,6 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
 		seq_printf(m, " (frontbuffer: 0x%03x)", frontbuffer_bits);
 }
 
-static int obj_rank_by_stolen(const void *A, const void *B)
-{
-	const struct drm_i915_gem_object *a =
-		*(const struct drm_i915_gem_object **)A;
-	const struct drm_i915_gem_object *b =
-		*(const struct drm_i915_gem_object **)B;
-
-	if (a->stolen->start < b->stolen->start)
-		return -1;
-	if (a->stolen->start > b->stolen->start)
-		return 1;
-	return 0;
-}
-
-static int i915_gem_stolen_list_info(struct seq_file *m, void *data)
-{
-	struct drm_i915_private *dev_priv = node_to_i915(m->private);
-	struct drm_device *dev = &dev_priv->drm;
-	struct drm_i915_gem_object **objects;
-	struct drm_i915_gem_object *obj;
-	u64 total_obj_size, total_gtt_size;
-	unsigned long total, count, n;
-	int ret;
-
-	total = READ_ONCE(dev_priv->mm.object_count);
-	objects = kvmalloc_array(total, sizeof(*objects), GFP_KERNEL);
-	if (!objects)
-		return -ENOMEM;
-
-	ret = mutex_lock_interruptible(&dev->struct_mutex);
-	if (ret)
-		goto out;
-
-	total_obj_size = total_gtt_size = count = 0;
-
-	spin_lock(&dev_priv->mm.obj_lock);
-	list_for_each_entry(obj, &dev_priv->mm.bound_list, mm.link) {
-		if (count == total)
-			break;
-
-		if (obj->stolen == NULL)
-			continue;
-
-		objects[count++] = obj;
-		total_obj_size += obj->base.size;
-		total_gtt_size += i915_gem_obj_total_ggtt_size(obj);
-
-	}
-	list_for_each_entry(obj, &dev_priv->mm.unbound_list, mm.link) {
-		if (count == total)
-			break;
-
-		if (obj->stolen == NULL)
-			continue;
-
-		objects[count++] = obj;
-		total_obj_size += obj->base.size;
-	}
-	spin_unlock(&dev_priv->mm.obj_lock);
-
-	sort(objects, count, sizeof(*objects), obj_rank_by_stolen, NULL);
-
-	seq_puts(m, "Stolen:\n");
-	for (n = 0; n < count; n++) {
-		seq_puts(m, "   ");
-		describe_obj(m, objects[n]);
-		seq_putc(m, '\n');
-	}
-	seq_printf(m, "Total %lu objects, %llu bytes, %llu GTT size\n",
-		   count, total_obj_size, total_gtt_size);
-
-	mutex_unlock(&dev->struct_mutex);
-out:
-	kvfree(objects);
-	return ret;
-}
-
 struct file_stats {
 	struct i915_address_space *vm;
 	unsigned long count;
@@ -325,7 +259,7 @@ static int per_file_stats(int id, void *ptr, void *data)
 
 	stats->count++;
 	stats->total += obj->base.size;
-	if (!obj->bind_count)
+	if (!atomic_read(&obj->bind_count))
 		stats->unbound += obj->base.size;
 	if (obj->base.name || obj->base.dma_buf)
 		stats->shared += obj->base.size;
@@ -395,17 +329,20 @@ static void print_context_stats(struct seq_file *m,
 	struct i915_gem_context *ctx;
 
 	list_for_each_entry(ctx, &i915->contexts.list, link) {
+		struct i915_gem_engines_iter it;
 		struct intel_context *ce;
 
-		list_for_each_entry(ce, &ctx->active_engines, active_link) {
+		for_each_gem_engine(ce,
+				    i915_gem_context_lock_engines(ctx), it) {
 			if (ce->state)
 				per_file_stats(0, ce->state->obj, &kstats);
 			if (ce->ring)
 				per_file_stats(0, ce->ring->vma->obj, &kstats);
 		}
+		i915_gem_context_unlock_engines(ctx);
 
 		if (!IS_ERR_OR_NULL(ctx->file_priv)) {
-			struct file_stats stats = { .vm = &ctx->ppgtt->vm, };
+			struct file_stats stats = { .vm = ctx->vm, };
 			struct drm_file *file = ctx->file_priv->file;
 			struct task_struct *task;
 			char name[80];
@@ -429,153 +366,22 @@ static void print_context_stats(struct seq_file *m,
 
 static int i915_gem_object_info(struct seq_file *m, void *data)
 {
-	struct drm_i915_private *dev_priv = node_to_i915(m->private);
-	struct drm_device *dev = &dev_priv->drm;
-	struct i915_ggtt *ggtt = &dev_priv->ggtt;
-	u32 count, mapped_count, purgeable_count, dpy_count, huge_count;
-	u64 size, mapped_size, purgeable_size, dpy_size, huge_size;
-	struct drm_i915_gem_object *obj;
-	unsigned int page_sizes = 0;
-	char buf[80];
+	struct drm_i915_private *i915 = node_to_i915(m->private);
 	int ret;
 
-	seq_printf(m, "%u objects, %llu bytes\n",
-		   dev_priv->mm.object_count,
-		   dev_priv->mm.object_memory);
-
-	size = count = 0;
-	mapped_size = mapped_count = 0;
-	purgeable_size = purgeable_count = 0;
-	huge_size = huge_count = 0;
-
-	spin_lock(&dev_priv->mm.obj_lock);
-	list_for_each_entry(obj, &dev_priv->mm.unbound_list, mm.link) {
-		size += obj->base.size;
-		++count;
-
-		if (obj->mm.madv == I915_MADV_DONTNEED) {
-			purgeable_size += obj->base.size;
-			++purgeable_count;
-		}
-
-		if (obj->mm.mapping) {
-			mapped_count++;
-			mapped_size += obj->base.size;
-		}
-
-		if (obj->mm.page_sizes.sg > I915_GTT_PAGE_SIZE) {
-			huge_count++;
-			huge_size += obj->base.size;
-			page_sizes |= obj->mm.page_sizes.sg;
-		}
-	}
-	seq_printf(m, "%u unbound objects, %llu bytes\n", count, size);
-
-	size = count = dpy_size = dpy_count = 0;
-	list_for_each_entry(obj, &dev_priv->mm.bound_list, mm.link) {
-		size += obj->base.size;
-		++count;
-
-		if (obj->pin_global) {
-			dpy_size += obj->base.size;
-			++dpy_count;
-		}
-
-		if (obj->mm.madv == I915_MADV_DONTNEED) {
-			purgeable_size += obj->base.size;
-			++purgeable_count;
-		}
-
-		if (obj->mm.mapping) {
-			mapped_count++;
-			mapped_size += obj->base.size;
-		}
-
-		if (obj->mm.page_sizes.sg > I915_GTT_PAGE_SIZE) {
-			huge_count++;
-			huge_size += obj->base.size;
-			page_sizes |= obj->mm.page_sizes.sg;
-		}
-	}
-	spin_unlock(&dev_priv->mm.obj_lock);
-
-	seq_printf(m, "%u bound objects, %llu bytes\n",
-		   count, size);
-	seq_printf(m, "%u purgeable objects, %llu bytes\n",
-		   purgeable_count, purgeable_size);
-	seq_printf(m, "%u mapped objects, %llu bytes\n",
-		   mapped_count, mapped_size);
-	seq_printf(m, "%u huge-paged objects (%s) %llu bytes\n",
-		   huge_count,
-		   stringify_page_sizes(page_sizes, buf, sizeof(buf)),
-		   huge_size);
-	seq_printf(m, "%u display objects (globally pinned), %llu bytes\n",
-		   dpy_count, dpy_size);
-
-	seq_printf(m, "%llu [%pa] gtt total\n",
-		   ggtt->vm.total, &ggtt->mappable_end);
-	seq_printf(m, "Supported page sizes: %s\n",
-		   stringify_page_sizes(INTEL_INFO(dev_priv)->page_sizes,
-					buf, sizeof(buf)));
+	seq_printf(m, "%u shrinkable objects, %llu bytes\n",
+		   i915->mm.shrink_count,
+		   i915->mm.shrink_memory);
 
 	seq_putc(m, '\n');
 
-	ret = mutex_lock_interruptible(&dev->struct_mutex);
-	if (ret)
-		return ret;
-
-	print_batch_pool_stats(m, dev_priv);
-	print_context_stats(m, dev_priv);
-	mutex_unlock(&dev->struct_mutex);
-
-	return 0;
-}
-
-static int i915_gem_gtt_info(struct seq_file *m, void *data)
-{
-	struct drm_info_node *node = m->private;
-	struct drm_i915_private *dev_priv = node_to_i915(node);
-	struct drm_device *dev = &dev_priv->drm;
-	struct drm_i915_gem_object **objects;
-	struct drm_i915_gem_object *obj;
-	u64 total_obj_size, total_gtt_size;
-	unsigned long nobject, n;
-	int count, ret;
-
-	nobject = READ_ONCE(dev_priv->mm.object_count);
-	objects = kvmalloc_array(nobject, sizeof(*objects), GFP_KERNEL);
-	if (!objects)
-		return -ENOMEM;
-
-	ret = mutex_lock_interruptible(&dev->struct_mutex);
+	ret = mutex_lock_interruptible(&i915->drm.struct_mutex);
 	if (ret)
 		return ret;
 
-	count = 0;
-	spin_lock(&dev_priv->mm.obj_lock);
-	list_for_each_entry(obj, &dev_priv->mm.bound_list, mm.link) {
-		objects[count++] = obj;
-		if (count == nobject)
-			break;
-	}
-	spin_unlock(&dev_priv->mm.obj_lock);
-
-	total_obj_size = total_gtt_size = 0;
-	for (n = 0;  n < count; n++) {
-		obj = objects[n];
-
-		seq_puts(m, "   ");
-		describe_obj(m, obj);
-		seq_putc(m, '\n');
-		total_obj_size += obj->base.size;
-		total_gtt_size += i915_gem_obj_total_ggtt_size(obj);
-	}
-
-	mutex_unlock(&dev->struct_mutex);
-
-	seq_printf(m, "Total %d objects, %llu bytes, %llu GTT size\n",
-		   count, total_obj_size, total_gtt_size);
-	kvfree(objects);
+	print_batch_pool_stats(m, i915);
+	print_context_stats(m, i915);
+	mutex_unlock(&i915->drm.struct_mutex);
 
 	return 0;
 }
@@ -685,7 +491,7 @@ static int i915_interrupt_info(struct seq_file *m, void *data)
 	intel_wakeref_t wakeref;
 	int i, pipe;
 
-	wakeref = intel_runtime_pm_get(dev_priv);
+	wakeref = intel_runtime_pm_get(&dev_priv->runtime_pm);
 
 	if (IS_CHERRYVIEW(dev_priv)) {
 		intel_wakeref_t pref;
@@ -891,35 +697,32 @@ static int i915_interrupt_info(struct seq_file *m, void *data)
 		}
 	}
 
-	intel_runtime_pm_put(dev_priv, wakeref);
+	intel_runtime_pm_put(&dev_priv->runtime_pm, wakeref);
 
 	return 0;
 }
 
 static int i915_gem_fence_regs_info(struct seq_file *m, void *data)
 {
-	struct drm_i915_private *dev_priv = node_to_i915(m->private);
-	struct drm_device *dev = &dev_priv->drm;
-	int i, ret;
+	struct drm_i915_private *i915 = node_to_i915(m->private);
+	unsigned int i;
 
-	ret = mutex_lock_interruptible(&dev->struct_mutex);
-	if (ret)
-		return ret;
+	seq_printf(m, "Total fences = %d\n", i915->ggtt.num_fences);
 
-	seq_printf(m, "Total fences = %d\n", dev_priv->num_fence_regs);
-	for (i = 0; i < dev_priv->num_fence_regs; i++) {
-		struct i915_vma *vma = dev_priv->fence_regs[i].vma;
+	rcu_read_lock();
+	for (i = 0; i < i915->ggtt.num_fences; i++) {
+		struct i915_vma *vma = i915->ggtt.fence_regs[i].vma;
 
 		seq_printf(m, "Fence %d, pin count = %d, object = ",
-			   i, dev_priv->fence_regs[i].pin_count);
+			   i, i915->ggtt.fence_regs[i].pin_count);
 		if (!vma)
 			seq_puts(m, "unused");
 		else
 			describe_obj(m, vma->obj);
 		seq_putc(m, '\n');
 	}
+	rcu_read_unlock();
 
-	mutex_unlock(&dev->struct_mutex);
 	return 0;
 }
 
@@ -967,7 +770,7 @@ static int i915_gpu_info_open(struct inode *inode, struct file *file)
 	intel_wakeref_t wakeref;
 
 	gpu = NULL;
-	with_intel_runtime_pm(i915, wakeref)
+	with_intel_runtime_pm(&i915->runtime_pm, wakeref)
 		gpu = i915_capture_gpu_state(i915);
 	if (IS_ERR(gpu))
 		return PTR_ERR(gpu);
@@ -1026,15 +829,16 @@ static const struct file_operations i915_error_state_fops = {
 static int i915_frequency_info(struct seq_file *m, void *unused)
 {
 	struct drm_i915_private *dev_priv = node_to_i915(m->private);
+	struct intel_uncore *uncore = &dev_priv->uncore;
 	struct intel_rps *rps = &dev_priv->gt_pm.rps;
 	intel_wakeref_t wakeref;
 	int ret = 0;
 
-	wakeref = intel_runtime_pm_get(dev_priv);
+	wakeref = intel_runtime_pm_get(&dev_priv->runtime_pm);
 
 	if (IS_GEN(dev_priv, 5)) {
-		u16 rgvswctl = I915_READ16(MEMSWCTL);
-		u16 rgvstat = I915_READ16(MEMSTAT_ILK);
+		u16 rgvswctl = intel_uncore_read16(uncore, MEMSWCTL);
+		u16 rgvstat = intel_uncore_read16(uncore, MEMSTAT_ILK);
 
 		seq_printf(m, "Requested P-state: %d\n", (rgvswctl >> 8) & 0xf);
 		seq_printf(m, "Requested VID: %d\n", rgvswctl & 0x3f);
@@ -1045,8 +849,6 @@ static int i915_frequency_info(struct seq_file *m, void *unused)
 	} else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) {
 		u32 rpmodectl, freq_sts;
 
-		mutex_lock(&dev_priv->pcu_lock);
-
 		rpmodectl = I915_READ(GEN6_RP_CONTROL);
 		seq_printf(m, "Video Turbo Mode: %s\n",
 			   yesno(rpmodectl & GEN6_RP_MEDIA_TURBO));
@@ -1056,7 +858,10 @@ static int i915_frequency_info(struct seq_file *m, void *unused)
 			   yesno((rpmodectl & GEN6_RP_MEDIA_MODE_MASK) ==
 				  GEN6_RP_MEDIA_SW_MODE));
 
+		vlv_punit_get(dev_priv);
 		freq_sts = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
+		vlv_punit_put(dev_priv);
+
 		seq_printf(m, "PUNIT_REG_GPU_FREQ_STS: 0x%08x\n", freq_sts);
 		seq_printf(m, "DDR freq: %d MHz\n", dev_priv->mem_freq);
 
@@ -1078,7 +883,6 @@ static int i915_frequency_info(struct seq_file *m, void *unused)
 		seq_printf(m,
 			   "efficient (RPe) frequency: %d MHz\n",
 			   intel_gpu_freq(dev_priv, rps->efficient_freq));
-		mutex_unlock(&dev_priv->pcu_lock);
 	} else if (INTEL_GEN(dev_priv) >= 6) {
 		u32 rp_state_limits;
 		u32 gt_perf_status;
@@ -1242,7 +1046,7 @@ static int i915_frequency_info(struct seq_file *m, void *unused)
 	seq_printf(m, "Max CD clock frequency: %d kHz\n", dev_priv->max_cdclk_freq);
 	seq_printf(m, "Max pixel clock frequency: %d kHz\n", dev_priv->max_dotclk_freq);
 
-	intel_runtime_pm_put(dev_priv, wakeref);
+	intel_runtime_pm_put(&dev_priv->runtime_pm, wakeref);
 	return ret;
 }
 
@@ -1279,7 +1083,6 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused)
 	struct drm_i915_private *dev_priv = node_to_i915(m->private);
 	struct intel_engine_cs *engine;
 	u64 acthd[I915_NUM_ENGINES];
-	u32 seqno[I915_NUM_ENGINES];
 	struct intel_instdone instdone;
 	intel_wakeref_t wakeref;
 	enum intel_engine_id id;
@@ -1295,11 +1098,9 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused)
 		return 0;
 	}
 
-	with_intel_runtime_pm(dev_priv, wakeref) {
-		for_each_engine(engine, dev_priv, id) {
+	with_intel_runtime_pm(&dev_priv->runtime_pm, wakeref) {
+		for_each_engine(engine, dev_priv, id)
 			acthd[id] = intel_engine_get_active_head(engine);
-			seqno[id] = intel_engine_get_hangcheck_seqno(engine);
-		}
 
 		intel_engine_get_instdone(dev_priv->engine[RCS0], &instdone);
 	}
@@ -1316,11 +1117,8 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused)
 	seq_printf(m, "GT active? %s\n", yesno(dev_priv->gt.awake));
 
 	for_each_engine(engine, dev_priv, id) {
-		seq_printf(m, "%s:\n", engine->name);
-		seq_printf(m, "\tseqno = %x [current %x, last %x], %dms ago\n",
-			   engine->hangcheck.last_seqno,
-			   seqno[id],
-			   engine->hangcheck.next_seqno,
+		seq_printf(m, "%s: %d ms ago\n",
+			   engine->name,
 			   jiffies_to_msecs(jiffies -
 					    engine->hangcheck.action_timestamp));
 
@@ -1362,13 +1160,14 @@ static int i915_reset_info(struct seq_file *m, void *unused)
 
 static int ironlake_drpc_info(struct seq_file *m)
 {
-	struct drm_i915_private *dev_priv = node_to_i915(m->private);
+	struct drm_i915_private *i915 = node_to_i915(m->private);
+	struct intel_uncore *uncore = &i915->uncore;
 	u32 rgvmodectl, rstdbyctl;
 	u16 crstandvid;
 
-	rgvmodectl = I915_READ(MEMMODECTL);
-	rstdbyctl = I915_READ(RSTDBYCTL);
-	crstandvid = I915_READ16(CRSTANDVID);
+	rgvmodectl = intel_uncore_read(uncore, MEMMODECTL);
+	rstdbyctl = intel_uncore_read(uncore, RSTDBYCTL);
+	crstandvid = intel_uncore_read16(uncore, CRSTANDVID);
 
 	seq_printf(m, "HD boost: %s\n", yesno(rgvmodectl & MEMMODE_BOOST_EN));
 	seq_printf(m, "Boost freq: %d\n",
@@ -1483,12 +1282,9 @@ static int gen6_drpc_info(struct seq_file *m)
 		gen9_powergate_status = I915_READ(GEN9_PWRGT_DOMAIN_STATUS);
 	}
 
-	if (INTEL_GEN(dev_priv) <= 7) {
-		mutex_lock(&dev_priv->pcu_lock);
+	if (INTEL_GEN(dev_priv) <= 7)
 		sandybridge_pcode_read(dev_priv, GEN6_PCODE_READ_RC6VIDS,
-				       &rc6vids);
-		mutex_unlock(&dev_priv->pcu_lock);
-	}
+				       &rc6vids, NULL);
 
 	seq_printf(m, "RC1e Enabled: %s\n",
 		   yesno(rcctl1 & GEN6_RC_CTL_RC1e_ENABLE));
@@ -1562,7 +1358,7 @@ static int i915_drpc_info(struct seq_file *m, void *unused)
 	intel_wakeref_t wakeref;
 	int err = -ENODEV;
 
-	with_intel_runtime_pm(dev_priv, wakeref) {
+	with_intel_runtime_pm(&dev_priv->runtime_pm, wakeref) {
 		if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
 			err = vlv_drpc_info(m);
 		else if (INTEL_GEN(dev_priv) >= 6)
@@ -1596,7 +1392,7 @@ static int i915_fbc_status(struct seq_file *m, void *unused)
 	if (!HAS_FBC(dev_priv))
 		return -ENODEV;
 
-	wakeref = intel_runtime_pm_get(dev_priv);
+	wakeref = intel_runtime_pm_get(&dev_priv->runtime_pm);
 	mutex_lock(&fbc->lock);
 
 	if (intel_fbc_is_active(dev_priv))
@@ -1623,7 +1419,7 @@ static int i915_fbc_status(struct seq_file *m, void *unused)
 	}
 
 	mutex_unlock(&fbc->lock);
-	intel_runtime_pm_put(dev_priv, wakeref);
+	intel_runtime_pm_put(&dev_priv->runtime_pm, wakeref);
 
 	return 0;
 }
@@ -1673,7 +1469,7 @@ static int i915_ips_status(struct seq_file *m, void *unused)
 	if (!HAS_IPS(dev_priv))
 		return -ENODEV;
 
-	wakeref = intel_runtime_pm_get(dev_priv);
+	wakeref = intel_runtime_pm_get(&dev_priv->runtime_pm);
 
 	seq_printf(m, "Enabled by kernel parameter: %s\n",
 		   yesno(i915_modparams.enable_ips));
@@ -1687,7 +1483,7 @@ static int i915_ips_status(struct seq_file *m, void *unused)
 			seq_puts(m, "Currently: disabled\n");
 	}
 
-	intel_runtime_pm_put(dev_priv, wakeref);
+	intel_runtime_pm_put(&dev_priv->runtime_pm, wakeref);
 
 	return 0;
 }
@@ -1729,7 +1525,7 @@ static int i915_emon_status(struct seq_file *m, void *unused)
 	if (!IS_GEN(i915, 5))
 		return -ENODEV;
 
-	with_intel_runtime_pm(i915, wakeref) {
+	with_intel_runtime_pm(&i915->runtime_pm, wakeref) {
 		unsigned long temp, chipset, gfx;
 
 		temp = i915_mch_val(i915);
@@ -1752,17 +1548,10 @@ static int i915_ring_freq_table(struct seq_file *m, void *unused)
 	unsigned int max_gpu_freq, min_gpu_freq;
 	intel_wakeref_t wakeref;
 	int gpu_freq, ia_freq;
-	int ret;
 
 	if (!HAS_LLC(dev_priv))
 		return -ENODEV;
 
-	wakeref = intel_runtime_pm_get(dev_priv);
-
-	ret = mutex_lock_interruptible(&dev_priv->pcu_lock);
-	if (ret)
-		goto out;
-
 	min_gpu_freq = rps->min_freq;
 	max_gpu_freq = rps->max_freq;
 	if (IS_GEN9_BC(dev_priv) || INTEL_GEN(dev_priv) >= 10) {
@@ -1773,11 +1562,12 @@ static int i915_ring_freq_table(struct seq_file *m, void *unused)
 
 	seq_puts(m, "GPU freq (MHz)\tEffective CPU freq (MHz)\tEffective Ring freq (MHz)\n");
 
+	wakeref = intel_runtime_pm_get(&dev_priv->runtime_pm);
 	for (gpu_freq = min_gpu_freq; gpu_freq <= max_gpu_freq; gpu_freq++) {
 		ia_freq = gpu_freq;
 		sandybridge_pcode_read(dev_priv,
 				       GEN6_PCODE_READ_MIN_FREQ_TABLE,
-				       &ia_freq);
+				       &ia_freq, NULL);
 		seq_printf(m, "%d\t\t%d\t\t\t\t%d\n",
 			   intel_gpu_freq(dev_priv, (gpu_freq *
 						     (IS_GEN9_BC(dev_priv) ||
@@ -1786,12 +1576,9 @@ static int i915_ring_freq_table(struct seq_file *m, void *unused)
 			   ((ia_freq >> 0) & 0xff) * 100,
 			   ((ia_freq >> 8) & 0xff) * 100);
 	}
+	intel_runtime_pm_put(&dev_priv->runtime_pm, wakeref);
 
-	mutex_unlock(&dev_priv->pcu_lock);
-
-out:
-	intel_runtime_pm_put(dev_priv, wakeref);
-	return ret;
+	return 0;
 }
 
 static int i915_opregion(struct seq_file *m, void *unused)
@@ -1892,6 +1679,7 @@ static int i915_context_status(struct seq_file *m, void *unused)
 		return ret;
 
 	list_for_each_entry(ctx, &dev_priv->contexts.list, link) {
+		struct i915_gem_engines_iter it;
 		struct intel_context *ce;
 
 		seq_puts(m, "HW context ");
@@ -1916,7 +1704,8 @@ static int i915_context_status(struct seq_file *m, void *unused)
 		seq_putc(m, ctx->remap_slice ? 'R' : 'r');
 		seq_putc(m, '\n');
 
-		list_for_each_entry(ce, &ctx->active_engines, active_link) {
+		for_each_gem_engine(ce,
+				    i915_gem_context_lock_engines(ctx), it) {
 			seq_printf(m, "%s: ", ce->engine->name);
 			if (ce->state)
 				describe_obj(m, ce->state->obj);
@@ -1924,6 +1713,7 @@ static int i915_context_status(struct seq_file *m, void *unused)
 				describe_ctx_ring(m, ce->ring);
 			seq_putc(m, '\n');
 		}
+		i915_gem_context_unlock_engines(ctx);
 
 		seq_putc(m, '\n');
 	}
@@ -1960,9 +1750,10 @@ static const char *swizzle_string(unsigned swizzle)
 static int i915_swizzle_info(struct seq_file *m, void *data)
 {
 	struct drm_i915_private *dev_priv = node_to_i915(m->private);
+	struct intel_uncore *uncore = &dev_priv->uncore;
 	intel_wakeref_t wakeref;
 
-	wakeref = intel_runtime_pm_get(dev_priv);
+	wakeref = intel_runtime_pm_get(&dev_priv->runtime_pm);
 
 	seq_printf(m, "bit6 swizzle for X-tiling = %s\n",
 		   swizzle_string(dev_priv->mm.bit_6_swizzle_x));
@@ -1971,36 +1762,36 @@ static int i915_swizzle_info(struct seq_file *m, void *data)
 
 	if (IS_GEN_RANGE(dev_priv, 3, 4)) {
 		seq_printf(m, "DDC = 0x%08x\n",
-			   I915_READ(DCC));
+			   intel_uncore_read(uncore, DCC));
 		seq_printf(m, "DDC2 = 0x%08x\n",
-			   I915_READ(DCC2));
+			   intel_uncore_read(uncore, DCC2));
 		seq_printf(m, "C0DRB3 = 0x%04x\n",
-			   I915_READ16(C0DRB3));
+			   intel_uncore_read16(uncore, C0DRB3));
 		seq_printf(m, "C1DRB3 = 0x%04x\n",
-			   I915_READ16(C1DRB3));
+			   intel_uncore_read16(uncore, C1DRB3));
 	} else if (INTEL_GEN(dev_priv) >= 6) {
 		seq_printf(m, "MAD_DIMM_C0 = 0x%08x\n",
-			   I915_READ(MAD_DIMM_C0));
+			   intel_uncore_read(uncore, MAD_DIMM_C0));
 		seq_printf(m, "MAD_DIMM_C1 = 0x%08x\n",
-			   I915_READ(MAD_DIMM_C1));
+			   intel_uncore_read(uncore, MAD_DIMM_C1));
 		seq_printf(m, "MAD_DIMM_C2 = 0x%08x\n",
-			   I915_READ(MAD_DIMM_C2));
+			   intel_uncore_read(uncore, MAD_DIMM_C2));
 		seq_printf(m, "TILECTL = 0x%08x\n",
-			   I915_READ(TILECTL));
+			   intel_uncore_read(uncore, TILECTL));
 		if (INTEL_GEN(dev_priv) >= 8)
 			seq_printf(m, "GAMTARBMODE = 0x%08x\n",
-				   I915_READ(GAMTARBMODE));
+				   intel_uncore_read(uncore, GAMTARBMODE));
 		else
 			seq_printf(m, "ARB_MODE = 0x%08x\n",
-				   I915_READ(ARB_MODE));
+				   intel_uncore_read(uncore, ARB_MODE));
 		seq_printf(m, "DISP_ARB_CTL = 0x%08x\n",
-			   I915_READ(DISP_ARB_CTL));
+			   intel_uncore_read(uncore, DISP_ARB_CTL));
 	}
 
 	if (dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES)
 		seq_puts(m, "L-shaped memory detected\n");
 
-	intel_runtime_pm_put(dev_priv, wakeref);
+	intel_runtime_pm_put(&dev_priv->runtime_pm, wakeref);
 
 	return 0;
 }
@@ -2026,13 +1817,13 @@ static int i915_rps_boost_info(struct seq_file *m, void *data)
 	u32 act_freq = rps->cur_freq;
 	intel_wakeref_t wakeref;
 
-	with_intel_runtime_pm_if_in_use(dev_priv, wakeref) {
+	with_intel_runtime_pm_if_in_use(&dev_priv->runtime_pm, wakeref) {
 		if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) {
-			mutex_lock(&dev_priv->pcu_lock);
+			vlv_punit_get(dev_priv);
 			act_freq = vlv_punit_read(dev_priv,
 						  PUNIT_REG_GPU_FREQ_STS);
+			vlv_punit_put(dev_priv);
 			act_freq = (act_freq >> 8) & 0xff;
-			mutex_unlock(&dev_priv->pcu_lock);
 		} else {
 			act_freq = intel_get_cagf(dev_priv,
 						  I915_READ(GEN6_RPSTAT1));
@@ -2040,8 +1831,7 @@ static int i915_rps_boost_info(struct seq_file *m, void *data)
 	}
 
 	seq_printf(m, "RPS enabled? %d\n", rps->enabled);
-	seq_printf(m, "GPU busy? %s [%d requests]\n",
-		   yesno(dev_priv->gt.awake), dev_priv->gt.active_requests);
+	seq_printf(m, "GPU busy? %s\n", yesno(dev_priv->gt.awake));
 	seq_printf(m, "Boosts outstanding? %d\n",
 		   atomic_read(&rps->num_waiters));
 	seq_printf(m, "Interactive? %d\n", READ_ONCE(rps->power.interactive));
@@ -2060,9 +1850,7 @@ static int i915_rps_boost_info(struct seq_file *m, void *data)
 
 	seq_printf(m, "Wait boosts: %d\n", atomic_read(&rps->boosts));
 
-	if (INTEL_GEN(dev_priv) >= 6 &&
-	    rps->enabled &&
-	    dev_priv->gt.active_requests) {
+	if (INTEL_GEN(dev_priv) >= 6 && rps->enabled && dev_priv->gt.awake) {
 		u32 rpup, rpupei;
 		u32 rpdown, rpdownei;
 
@@ -2112,7 +1900,7 @@ static int i915_huc_load_status_info(struct seq_file *m, void *data)
 	p = drm_seq_file_printer(m);
 	intel_uc_fw_dump(&dev_priv->huc.fw, &p);
 
-	with_intel_runtime_pm(dev_priv, wakeref)
+	with_intel_runtime_pm(&dev_priv->runtime_pm, wakeref)
 		seq_printf(m, "\nHuC status 0x%08x:\n", I915_READ(HUC_STATUS2));
 
 	return 0;
@@ -2130,7 +1918,7 @@ static int i915_guc_load_status_info(struct seq_file *m, void *data)
 	p = drm_seq_file_printer(m);
 	intel_uc_fw_dump(&dev_priv->guc.fw, &p);
 
-	with_intel_runtime_pm(dev_priv, wakeref) {
+	with_intel_runtime_pm(&dev_priv->runtime_pm, wakeref) {
 		u32 tmp = I915_READ(GUC_STATUS);
 		u32 i;
 
@@ -2516,7 +2304,7 @@ static int i915_edp_psr_status(struct seq_file *m, void *data)
 	if (!psr->sink_support)
 		return 0;
 
-	wakeref = intel_runtime_pm_get(dev_priv);
+	wakeref = intel_runtime_pm_get(&dev_priv->runtime_pm);
 	mutex_lock(&psr->lock);
 
 	if (psr->enabled)
@@ -2580,7 +2368,7 @@ static int i915_edp_psr_status(struct seq_file *m, void *data)
 
 unlock:
 	mutex_unlock(&psr->lock);
-	intel_runtime_pm_put(dev_priv, wakeref);
+	intel_runtime_pm_put(&dev_priv->runtime_pm, wakeref);
 
 	return 0;
 }
@@ -2597,11 +2385,11 @@ i915_edp_psr_debug_set(void *data, u64 val)
 
 	DRM_DEBUG_KMS("Setting PSR debug to %llx\n", val);
 
-	wakeref = intel_runtime_pm_get(dev_priv);
+	wakeref = intel_runtime_pm_get(&dev_priv->runtime_pm);
 
 	ret = intel_psr_debug_set(dev_priv, val);
 
-	intel_runtime_pm_put(dev_priv, wakeref);
+	intel_runtime_pm_put(&dev_priv->runtime_pm, wakeref);
 
 	return ret;
 }
@@ -2636,7 +2424,7 @@ static int i915_energy_uJ(struct seq_file *m, void *data)
 		return -ENODEV;
 
 	units = (power & 0x1f00) >> 8;
-	with_intel_runtime_pm(dev_priv, wakeref)
+	with_intel_runtime_pm(&dev_priv->runtime_pm, wakeref)
 		power = I915_READ(MCH_SECP_NRG_STTS);
 
 	power = (1000000 * power) >> units; /* convert to uJ */
@@ -2672,7 +2460,7 @@ static int i915_runtime_pm_status(struct seq_file *m, void *unused)
 	if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_RUNTIME_PM)) {
 		struct drm_printer p = drm_seq_file_printer(m);
 
-		print_intel_runtime_pm_wakeref(dev_priv, &p);
+		print_intel_runtime_pm_wakeref(&dev_priv->runtime_pm, &p);
 	}
 
 	return 0;
@@ -2717,7 +2505,7 @@ static int i915_dmc_info(struct seq_file *m, void *unused)
 
 	csr = &dev_priv->csr;
 
-	wakeref = intel_runtime_pm_get(dev_priv);
+	wakeref = intel_runtime_pm_get(&dev_priv->runtime_pm);
 
 	seq_printf(m, "fw loaded: %s\n", yesno(csr->dmc_payload != NULL));
 	seq_printf(m, "path: %s\n", csr->fw_path);
@@ -2743,7 +2531,7 @@ out:
 	seq_printf(m, "ssp base: 0x%08x\n", I915_READ(CSR_SSP_BASE));
 	seq_printf(m, "htp: 0x%08x\n", I915_READ(CSR_HTP_SKL));
 
-	intel_runtime_pm_put(dev_priv, wakeref);
+	intel_runtime_pm_put(&dev_priv->runtime_pm, wakeref);
 
 	return 0;
 }
@@ -3027,7 +2815,7 @@ static int i915_display_info(struct seq_file *m, void *unused)
 	struct drm_connector_list_iter conn_iter;
 	intel_wakeref_t wakeref;
 
-	wakeref = intel_runtime_pm_get(dev_priv);
+	wakeref = intel_runtime_pm_get(&dev_priv->runtime_pm);
 
 	seq_printf(m, "CRTC info\n");
 	seq_printf(m, "---------\n");
@@ -3076,7 +2864,7 @@ static int i915_display_info(struct seq_file *m, void *unused)
 	drm_connector_list_iter_end(&conn_iter);
 	mutex_unlock(&dev->mode_config.mutex);
 
-	intel_runtime_pm_put(dev_priv, wakeref);
+	intel_runtime_pm_put(&dev_priv->runtime_pm, wakeref);
 
 	return 0;
 }
@@ -3089,11 +2877,11 @@ static int i915_engine_info(struct seq_file *m, void *unused)
 	enum intel_engine_id id;
 	struct drm_printer p;
 
-	wakeref = intel_runtime_pm_get(dev_priv);
+	wakeref = intel_runtime_pm_get(&dev_priv->runtime_pm);
 
-	seq_printf(m, "GT awake? %s\n", yesno(dev_priv->gt.awake));
-	seq_printf(m, "Global active requests: %d\n",
-		   dev_priv->gt.active_requests);
+	seq_printf(m, "GT awake? %s [%d]\n",
+		   yesno(dev_priv->gt.awake),
+		   atomic_read(&dev_priv->gt.wakeref.count));
 	seq_printf(m, "CS timestamp frequency: %u kHz\n",
 		   RUNTIME_INFO(dev_priv)->cs_timestamp_frequency_khz);
 
@@ -3101,7 +2889,7 @@ static int i915_engine_info(struct seq_file *m, void *unused)
 	for_each_engine(engine, dev_priv, id)
 		intel_engine_dump(engine, &p, "%s\n", engine->name);
 
-	intel_runtime_pm_put(dev_priv, wakeref);
+	intel_runtime_pm_put(&dev_priv->runtime_pm, wakeref);
 
 	return 0;
 }
@@ -3222,7 +3010,7 @@ static ssize_t i915_ipc_status_write(struct file *file, const char __user *ubuf,
 	if (ret < 0)
 		return ret;
 
-	with_intel_runtime_pm(dev_priv, wakeref) {
+	with_intel_runtime_pm(&dev_priv->runtime_pm, wakeref) {
 		if (!dev_priv->ipc_enabled && enable)
 			DRM_INFO("Enabling IPC: WM will be proper only after next commit\n");
 		dev_priv->wm.distrust_bios_wm = true;
@@ -3904,14 +3692,26 @@ i915_drop_caches_set(void *data, u64 val)
 
 	/* No need to check and wait for gpu resets, only libdrm auto-restarts
 	 * on ioctls on -EAGAIN. */
-	if (val & (DROP_ACTIVE | DROP_RETIRE | DROP_RESET_SEQNO)) {
+	if (val & (DROP_ACTIVE | DROP_IDLE | DROP_RETIRE | DROP_RESET_SEQNO)) {
 		int ret;
 
 		ret = mutex_lock_interruptible(&i915->drm.struct_mutex);
 		if (ret)
 			return ret;
 
-		if (val & DROP_ACTIVE)
+		/*
+		 * To finish the flush of the idle_worker, we must complete
+		 * the switch-to-kernel-context, which requires a double
+		 * pass through wait_for_idle: first queues the switch,
+		 * second waits for the switch.
+		 */
+		if (ret == 0 && val & (DROP_IDLE | DROP_ACTIVE))
+			ret = i915_gem_wait_for_idle(i915,
+						     I915_WAIT_INTERRUPTIBLE |
+						     I915_WAIT_LOCKED,
+						     MAX_SCHEDULE_TIMEOUT);
+
+		if (ret == 0 && val & DROP_IDLE)
 			ret = i915_gem_wait_for_idle(i915,
 						     I915_WAIT_INTERRUPTIBLE |
 						     I915_WAIT_LOCKED,
@@ -3938,11 +3738,8 @@ i915_drop_caches_set(void *data, u64 val)
 	fs_reclaim_release(GFP_KERNEL);
 
 	if (val & DROP_IDLE) {
-		do {
-			if (READ_ONCE(i915->gt.active_requests))
-				flush_delayed_work(&i915->gt.retire_work);
-			drain_delayed_work(&i915->gt.idle_work);
-		} while (READ_ONCE(i915->gt.awake));
+		flush_delayed_work(&i915->gem.retire_work);
+		flush_work(&i915->gem.idle_work);
 	}
 
 	if (val & DROP_FREED)
@@ -3965,7 +3762,7 @@ i915_cache_sharing_get(void *data, u64 *val)
 	if (!(IS_GEN_RANGE(dev_priv, 6, 7)))
 		return -ENODEV;
 
-	with_intel_runtime_pm(dev_priv, wakeref)
+	with_intel_runtime_pm(&dev_priv->runtime_pm, wakeref)
 		snpcr = I915_READ(GEN6_MBCUNIT_SNPCR);
 
 	*val = (snpcr & GEN6_MBC_SNPCR_MASK) >> GEN6_MBC_SNPCR_SHIFT;
@@ -3986,7 +3783,7 @@ i915_cache_sharing_set(void *data, u64 val)
 		return -EINVAL;
 
 	DRM_DEBUG_DRIVER("Manually setting uncore sharing to %llu\n", val);
-	with_intel_runtime_pm(dev_priv, wakeref) {
+	with_intel_runtime_pm(&dev_priv->runtime_pm, wakeref) {
 		u32 snpcr;
 
 		/* Update the cache sharing policy here as well */
@@ -4164,7 +3961,7 @@ static void broadwell_sseu_device_status(struct drm_i915_private *dev_priv,
 				RUNTIME_INFO(dev_priv)->sseu.subslice_mask[s];
 		}
 		sseu->eu_total = sseu->eu_per_subslice *
-				 sseu_subslice_total(sseu);
+				 intel_sseu_subslice_total(sseu);
 
 		/* subtract fused off EU(s) from enabled slice(s) */
 		for (s = 0; s < fls(sseu->slice_mask); s++) {
@@ -4188,10 +3985,10 @@ static void i915_print_sseu_info(struct seq_file *m, bool is_available_info,
 	seq_printf(m, "  %s Slice Total: %u\n", type,
 		   hweight8(sseu->slice_mask));
 	seq_printf(m, "  %s Subslice Total: %u\n", type,
-		   sseu_subslice_total(sseu));
+		   intel_sseu_subslice_total(sseu));
 	for (s = 0; s < fls(sseu->slice_mask); s++) {
 		seq_printf(m, "  %s Slice%i subslices: %u\n", type,
-			   s, hweight8(sseu->subslice_mask[s]));
+			   s, intel_sseu_subslices_per_slice(sseu, s));
 	}
 	seq_printf(m, "  %s EU Total: %u\n", type,
 		   sseu->eu_total);
@@ -4232,7 +4029,7 @@ static int i915_sseu_status(struct seq_file *m, void *unused)
 	sseu.max_eus_per_subslice =
 		RUNTIME_INFO(dev_priv)->sseu.max_eus_per_subslice;
 
-	with_intel_runtime_pm(dev_priv, wakeref) {
+	with_intel_runtime_pm(&dev_priv->runtime_pm, wakeref) {
 		if (IS_CHERRYVIEW(dev_priv))
 			cherryview_sseu_device_status(dev_priv, &sseu);
 		else if (IS_BROADWELL(dev_priv))
@@ -4255,7 +4052,8 @@ static int i915_forcewake_open(struct inode *inode, struct file *file)
 	if (INTEL_GEN(i915) < 6)
 		return 0;
 
-	file->private_data = (void *)(uintptr_t)intel_runtime_pm_get(i915);
+	file->private_data =
+		(void *)(uintptr_t)intel_runtime_pm_get(&i915->runtime_pm);
 	intel_uncore_forcewake_user_get(&i915->uncore);
 
 	return 0;
@@ -4269,7 +4067,7 @@ static int i915_forcewake_release(struct inode *inode, struct file *file)
 		return 0;
 
 	intel_uncore_forcewake_user_put(&i915->uncore);
-	intel_runtime_pm_put(i915,
+	intel_runtime_pm_put(&i915->runtime_pm,
 			     (intel_wakeref_t)(uintptr_t)file->private_data);
 
 	return 0;
@@ -4570,8 +4368,6 @@ static const struct file_operations i915_fifo_underrun_reset_ops = {
 static const struct drm_info_list i915_debugfs_list[] = {
 	{"i915_capabilities", i915_capabilities, 0},
 	{"i915_gem_objects", i915_gem_object_info, 0},
-	{"i915_gem_gtt", i915_gem_gtt_info, 0},
-	{"i915_gem_stolen", i915_gem_stolen_list_info },
 	{"i915_gem_fence_regs", i915_gem_fence_regs_info, 0},
 	{"i915_gem_interrupt", i915_interrupt_info, 0},
 	{"i915_gem_batch_pool", i915_gem_batch_pool_info, 0},
@@ -4648,23 +4444,17 @@ static const struct i915_debugfs_files {
 int i915_debugfs_register(struct drm_i915_private *dev_priv)
 {
 	struct drm_minor *minor = dev_priv->drm.primary;
-	struct dentry *ent;
 	int i;
 
-	ent = debugfs_create_file("i915_forcewake_user", S_IRUSR,
-				  minor->debugfs_root, to_i915(minor->dev),
-				  &i915_forcewake_fops);
-	if (!ent)
-		return -ENOMEM;
+	debugfs_create_file("i915_forcewake_user", S_IRUSR, minor->debugfs_root,
+			    to_i915(minor->dev), &i915_forcewake_fops);
 
 	for (i = 0; i < ARRAY_SIZE(i915_debugfs_files); i++) {
-		ent = debugfs_create_file(i915_debugfs_files[i].name,
-					  S_IRUGO | S_IWUSR,
-					  minor->debugfs_root,
-					  to_i915(minor->dev),
-					  i915_debugfs_files[i].fops);
-		if (!ent)
-			return -ENOMEM;
+		debugfs_create_file(i915_debugfs_files[i].name,
+				    S_IRUGO | S_IWUSR,
+				    minor->debugfs_root,
+				    to_i915(minor->dev),
+				    i915_debugfs_files[i].fops);
 	}
 
 	return drm_debugfs_create_files(i915_debugfs_list,
@@ -4757,6 +4547,7 @@ static int i915_hdcp_sink_capability_show(struct seq_file *m, void *data)
 {
 	struct drm_connector *connector = m->private;
 	struct intel_connector *intel_connector = to_intel_connector(connector);
+	bool hdcp_cap, hdcp2_cap;
 
 	if (connector->status != connector_status_connected)
 		return -ENODEV;
@@ -4767,8 +4558,16 @@ static int i915_hdcp_sink_capability_show(struct seq_file *m, void *data)
 
 	seq_printf(m, "%s:%d HDCP version: ", connector->name,
 		   connector->base.id);
-	seq_printf(m, "%s ", !intel_hdcp_capable(intel_connector) ?
-		   "None" : "HDCP1.4");
+	hdcp_cap = intel_hdcp_capable(intel_connector);
+	hdcp2_cap = intel_hdcp2_capable(intel_connector);
+
+	if (hdcp_cap)
+		seq_puts(m, "HDCP1.4 ");
+	if (hdcp2_cap)
+		seq_puts(m, "HDCP2.2 ");
+
+	if (!hdcp_cap && !hdcp2_cap)
+		seq_puts(m, "None");
 	seq_puts(m, "\n");
 
 	return 0;
diff --git a/drivers/gpu/drm/i915/i915_debugfs.h b/drivers/gpu/drm/i915/i915_debugfs.h
new file mode 100644
index 000000000000..c0cd22eb916d
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_debugfs.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef __I915_DEBUGFS_H__
+#define __I915_DEBUGFS_H__
+
+struct drm_i915_private;
+struct drm_connector;
+
+#ifdef CONFIG_DEBUG_FS
+int i915_debugfs_register(struct drm_i915_private *dev_priv);
+int i915_debugfs_connector_add(struct drm_connector *connector);
+#else
+static inline int i915_debugfs_register(struct drm_i915_private *dev_priv) { return 0; }
+static inline int i915_debugfs_connector_add(struct drm_connector *connector) { return 0; }
+#endif
+
+#endif /* __I915_DEBUGFS_H__ */
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 1ad88e6d7c04..f62e3397d936 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -47,22 +47,35 @@
 #include <drm/drm_probe_helper.h>
 #include <drm/i915_drm.h>
 
+#include "display/intel_acpi.h"
+#include "display/intel_audio.h"
+#include "display/intel_bw.h"
+#include "display/intel_cdclk.h"
+#include "display/intel_dp.h"
+#include "display/intel_fbdev.h"
+#include "display/intel_gmbus.h"
+#include "display/intel_hotplug.h"
+#include "display/intel_overlay.h"
+#include "display/intel_pipe_crc.h"
+#include "display/intel_sprite.h"
+
+#include "gem/i915_gem_context.h"
+#include "gem/i915_gem_ioctls.h"
+#include "gt/intel_gt_pm.h"
+#include "gt/intel_reset.h"
+#include "gt/intel_workarounds.h"
+
+#include "i915_debugfs.h"
 #include "i915_drv.h"
+#include "i915_irq.h"
 #include "i915_pmu.h"
 #include "i915_query.h"
-#include "i915_reset.h"
 #include "i915_trace.h"
 #include "i915_vgpu.h"
-#include "intel_audio.h"
-#include "intel_cdclk.h"
 #include "intel_csr.h"
-#include "intel_dp.h"
 #include "intel_drv.h"
-#include "intel_fbdev.h"
 #include "intel_pm.h"
-#include "intel_sprite.h"
 #include "intel_uc.h"
-#include "intel_workarounds.h"
 
 static struct drm_driver driver;
 
@@ -186,7 +199,8 @@ intel_pch_type(const struct drm_i915_private *dev_priv, unsigned short id)
 		DRM_DEBUG_KMS("Found Kaby Lake PCH (KBP)\n");
 		WARN_ON(!IS_SKYLAKE(dev_priv) && !IS_KABYLAKE(dev_priv) &&
 			!IS_COFFEELAKE(dev_priv));
-		return PCH_KBP;
+		/* KBP is SPT compatible */
+		return PCH_SPT;
 	case INTEL_PCH_CNP_DEVICE_ID_TYPE:
 		DRM_DEBUG_KMS("Found Cannon Lake PCH (CNP)\n");
 		WARN_ON(!IS_CANNONLAKE(dev_priv) && !IS_COFFEELAKE(dev_priv));
@@ -204,6 +218,10 @@ intel_pch_type(const struct drm_i915_private *dev_priv, unsigned short id)
 		DRM_DEBUG_KMS("Found Ice Lake PCH\n");
 		WARN_ON(!IS_ICELAKE(dev_priv));
 		return PCH_ICP;
+	case INTEL_PCH_MCC_DEVICE_ID_TYPE:
+		DRM_DEBUG_KMS("Found Mule Creek Canyon PCH\n");
+		WARN_ON(!IS_ELKHARTLAKE(dev_priv));
+		return PCH_MCC;
 	default:
 		return PCH_NONE;
 	}
@@ -231,7 +249,9 @@ intel_virt_detect_pch(const struct drm_i915_private *dev_priv)
 	 * make an educated guess as to which PCH is really there.
 	 */
 
-	if (IS_ICELAKE(dev_priv))
+	if (IS_ELKHARTLAKE(dev_priv))
+		id = INTEL_PCH_MCC_DEVICE_ID_TYPE;
+	else if (IS_ICELAKE(dev_priv))
 		id = INTEL_PCH_ICP_DEVICE_ID_TYPE;
 	else if (IS_CANNONLAKE(dev_priv) || IS_COFFEELAKE(dev_priv))
 		id = INTEL_PCH_CNP_DEVICE_ID_TYPE;
@@ -319,6 +339,7 @@ static int i915_getparam_ioctl(struct drm_device *dev, void *data,
 {
 	struct drm_i915_private *dev_priv = to_i915(dev);
 	struct pci_dev *pdev = dev_priv->drm.pdev;
+	const struct sseu_dev_info *sseu = &RUNTIME_INFO(dev_priv)->sseu;
 	drm_i915_getparam_t *param = data;
 	int value;
 
@@ -336,7 +357,7 @@ static int i915_getparam_ioctl(struct drm_device *dev, void *data,
 		value = pdev->revision;
 		break;
 	case I915_PARAM_NUM_FENCES_AVAIL:
-		value = dev_priv->num_fence_regs;
+		value = dev_priv->ggtt.num_fences;
 		break;
 	case I915_PARAM_HAS_OVERLAY:
 		value = dev_priv->overlay ? 1 : 0;
@@ -372,12 +393,12 @@ static int i915_getparam_ioctl(struct drm_device *dev, void *data,
 		value = i915_cmd_parser_get_version(dev_priv);
 		break;
 	case I915_PARAM_SUBSLICE_TOTAL:
-		value = sseu_subslice_total(&RUNTIME_INFO(dev_priv)->sseu);
+		value = intel_sseu_subslice_total(sseu);
 		if (!value)
 			return -ENODEV;
 		break;
 	case I915_PARAM_EU_TOTAL:
-		value = RUNTIME_INFO(dev_priv)->sseu.eu_total;
+		value = sseu->eu_total;
 		if (!value)
 			return -ENODEV;
 		break;
@@ -394,7 +415,7 @@ static int i915_getparam_ioctl(struct drm_device *dev, void *data,
 		value = HAS_POOLED_EU(dev_priv);
 		break;
 	case I915_PARAM_MIN_EU_IN_POOL:
-		value = RUNTIME_INFO(dev_priv)->sseu.min_eu_in_pool;
+		value = sseu->min_eu_in_pool;
 		break;
 	case I915_PARAM_HUC_STATUS:
 		value = intel_huc_check_status(&dev_priv->huc);
@@ -433,6 +454,7 @@ static int i915_getparam_ioctl(struct drm_device *dev, void *data,
 	case I915_PARAM_HAS_EXEC_CAPTURE:
 	case I915_PARAM_HAS_EXEC_BATCH_FIRST:
 	case I915_PARAM_HAS_EXEC_FENCE_ARRAY:
+	case I915_PARAM_HAS_EXEC_SUBMIT_FENCE:
 		/* For the time being all of these are always true;
 		 * if some supported hardware does not have one of these
 		 * features this value needs to be provided from
@@ -444,12 +466,12 @@ static int i915_getparam_ioctl(struct drm_device *dev, void *data,
 		value = intel_engines_has_context_isolation(dev_priv);
 		break;
 	case I915_PARAM_SLICE_MASK:
-		value = RUNTIME_INFO(dev_priv)->sseu.slice_mask;
+		value = sseu->slice_mask;
 		if (!value)
 			return -ENODEV;
 		break;
 	case I915_PARAM_SUBSLICE_MASK:
-		value = RUNTIME_INFO(dev_priv)->sseu.subslice_mask[0];
+		value = sseu->subslice_mask[0];
 		if (!value)
 			return -ENODEV;
 		break;
@@ -697,7 +719,7 @@ static int i915_load_modeset_init(struct drm_device *dev)
 	if (ret)
 		goto cleanup_csr;
 
-	intel_setup_gmbus(dev_priv);
+	intel_gmbus_setup(dev_priv);
 
 	/* Important: The output setup functions called by modeset_init need
 	 * working irqs for e.g. gmbus and dp aux transfers. */
@@ -727,12 +749,13 @@ static int i915_load_modeset_init(struct drm_device *dev)
 
 cleanup_gem:
 	i915_gem_suspend(dev_priv);
+	i915_gem_fini_hw(dev_priv);
 	i915_gem_fini(dev_priv);
 cleanup_modeset:
 	intel_modeset_cleanup(dev);
 cleanup_irq:
 	drm_irq_uninstall(dev);
-	intel_teardown_gmbus(dev_priv);
+	intel_gmbus_teardown(dev_priv);
 cleanup_csr:
 	intel_csr_ucode_fini(dev_priv);
 	intel_power_domains_fini_hw(dev_priv);
@@ -884,13 +907,16 @@ static int i915_driver_init_early(struct drm_i915_private *dev_priv)
 	mutex_init(&dev_priv->backlight_lock);
 
 	mutex_init(&dev_priv->sb_lock);
+	pm_qos_add_request(&dev_priv->sb_qos,
+			   PM_QOS_CPU_DMA_LATENCY, PM_QOS_DEFAULT_VALUE);
+
 	mutex_init(&dev_priv->av_mutex);
 	mutex_init(&dev_priv->wm.wm_mutex);
 	mutex_init(&dev_priv->pps_mutex);
 	mutex_init(&dev_priv->hdcp_comp_mutex);
 
 	i915_memcpy_init_early(dev_priv);
-	intel_runtime_pm_init_early(dev_priv);
+	intel_runtime_pm_init_early(&dev_priv->runtime_pm);
 
 	ret = i915_workqueues_init(dev_priv);
 	if (ret < 0)
@@ -943,6 +969,9 @@ static void i915_driver_cleanup_early(struct drm_i915_private *dev_priv)
 	i915_gem_cleanup_early(dev_priv);
 	i915_workqueues_cleanup(dev_priv);
 	i915_engines_cleanup(dev_priv);
+
+	pm_qos_remove_request(&dev_priv->sb_qos);
+	mutex_destroy(&dev_priv->sb_lock);
 }
 
 /**
@@ -1603,7 +1632,6 @@ static int i915_driver_init_hw(struct drm_i915_private *dev_priv)
 	intel_uncore_sanitize(dev_priv);
 
 	intel_gt_init_workarounds(dev_priv);
-	i915_gem_load_init_fences(dev_priv);
 
 	/* On the 945G/GM, the chipset reports the MSI capability on the
 	 * integrated graphics even though the support isn't actually there
@@ -1640,6 +1668,7 @@ static int i915_driver_init_hw(struct drm_i915_private *dev_priv)
 	 */
 	intel_get_dram_info(dev_priv);
 
+	intel_bw_init_hw(dev_priv);
 
 	return 0;
 
@@ -1668,7 +1697,6 @@ static void i915_driver_cleanup_hw(struct drm_i915_private *dev_priv)
 		pci_disable_msi(pdev);
 
 	pm_qos_remove_request(&dev_priv->pm_qos);
-	i915_ggtt_cleanup_hw(dev_priv);
 }
 
 /**
@@ -1730,7 +1758,7 @@ static void i915_driver_register(struct drm_i915_private *dev_priv)
 		drm_kms_helper_poll_init(dev);
 
 	intel_power_domains_enable(dev_priv);
-	intel_runtime_pm_enable(dev_priv);
+	intel_runtime_pm_enable(&dev_priv->runtime_pm);
 }
 
 /**
@@ -1739,7 +1767,7 @@ static void i915_driver_register(struct drm_i915_private *dev_priv)
  */
 static void i915_driver_unregister(struct drm_i915_private *dev_priv)
 {
-	intel_runtime_pm_disable(dev_priv);
+	intel_runtime_pm_disable(&dev_priv->runtime_pm);
 	intel_power_domains_disable(dev_priv);
 
 	intel_fbdev_unregister(dev_priv);
@@ -1760,7 +1788,7 @@ static void i915_driver_unregister(struct drm_i915_private *dev_priv)
 	i915_pmu_unregister(dev_priv);
 
 	i915_teardown_sysfs(dev_priv);
-	drm_dev_unregister(&dev_priv->drm);
+	drm_dev_unplug(&dev_priv->drm);
 
 	i915_gem_shrinker_unregister(dev_priv);
 }
@@ -1868,7 +1896,7 @@ int i915_driver_load(struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (ret < 0)
 		goto out_pci_disable;
 
-	disable_rpm_wakeref_asserts(dev_priv);
+	disable_rpm_wakeref_asserts(&dev_priv->runtime_pm);
 
 	ret = i915_driver_init_mmio(dev_priv);
 	if (ret < 0)
@@ -1884,7 +1912,7 @@ int i915_driver_load(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 	i915_driver_register(dev_priv);
 
-	enable_rpm_wakeref_asserts(dev_priv);
+	enable_rpm_wakeref_asserts(&dev_priv->runtime_pm);
 
 	i915_welcome_messages(dev_priv);
 
@@ -1892,10 +1920,11 @@ int i915_driver_load(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 out_cleanup_hw:
 	i915_driver_cleanup_hw(dev_priv);
+	i915_ggtt_cleanup_hw(dev_priv);
 out_cleanup_mmio:
 	i915_driver_cleanup_mmio(dev_priv);
 out_runtime_pm_put:
-	enable_rpm_wakeref_asserts(dev_priv);
+	enable_rpm_wakeref_asserts(&dev_priv->runtime_pm);
 	i915_driver_cleanup_early(dev_priv);
 out_pci_disable:
 	pci_disable_device(pdev);
@@ -1910,7 +1939,7 @@ void i915_driver_unload(struct drm_device *dev)
 	struct drm_i915_private *dev_priv = to_i915(dev);
 	struct pci_dev *pdev = dev_priv->drm.pdev;
 
-	disable_rpm_wakeref_asserts(dev_priv);
+	disable_rpm_wakeref_asserts(&dev_priv->runtime_pm);
 
 	i915_driver_unregister(dev_priv);
 
@@ -1943,20 +1972,29 @@ void i915_driver_unload(struct drm_device *dev)
 	cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work);
 	i915_reset_error_state(dev_priv);
 
-	i915_gem_fini(dev_priv);
+	i915_gem_fini_hw(dev_priv);
 
 	intel_power_domains_fini_hw(dev_priv);
 
 	i915_driver_cleanup_hw(dev_priv);
-	i915_driver_cleanup_mmio(dev_priv);
 
-	enable_rpm_wakeref_asserts(dev_priv);
-	intel_runtime_pm_cleanup(dev_priv);
+	enable_rpm_wakeref_asserts(&dev_priv->runtime_pm);
 }
 
 static void i915_driver_release(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct intel_runtime_pm *rpm = &dev_priv->runtime_pm;
+
+	disable_rpm_wakeref_asserts(rpm);
+
+	i915_gem_fini(dev_priv);
+
+	i915_ggtt_cleanup_hw(dev_priv);
+	i915_driver_cleanup_mmio(dev_priv);
+
+	enable_rpm_wakeref_asserts(rpm);
+	intel_runtime_pm_cleanup(rpm);
 
 	i915_driver_cleanup_early(dev_priv);
 	i915_driver_destroy(dev_priv);
@@ -2050,7 +2088,7 @@ static int i915_drm_suspend(struct drm_device *dev)
 	struct pci_dev *pdev = dev_priv->drm.pdev;
 	pci_power_t opregion_target_state;
 
-	disable_rpm_wakeref_asserts(dev_priv);
+	disable_rpm_wakeref_asserts(&dev_priv->runtime_pm);
 
 	/* We do a lot of poking in a lot of registers, make sure they work
 	 * properly. */
@@ -2084,7 +2122,7 @@ static int i915_drm_suspend(struct drm_device *dev)
 
 	intel_csr_ucode_suspend(dev_priv);
 
-	enable_rpm_wakeref_asserts(dev_priv);
+	enable_rpm_wakeref_asserts(&dev_priv->runtime_pm);
 
 	return 0;
 }
@@ -2105,9 +2143,10 @@ static int i915_drm_suspend_late(struct drm_device *dev, bool hibernation)
 {
 	struct drm_i915_private *dev_priv = to_i915(dev);
 	struct pci_dev *pdev = dev_priv->drm.pdev;
+	struct intel_runtime_pm *rpm = &dev_priv->runtime_pm;
 	int ret;
 
-	disable_rpm_wakeref_asserts(dev_priv);
+	disable_rpm_wakeref_asserts(rpm);
 
 	i915_gem_suspend_late(dev_priv);
 
@@ -2148,9 +2187,9 @@ static int i915_drm_suspend_late(struct drm_device *dev, bool hibernation)
 		pci_set_power_state(pdev, PCI_D3hot);
 
 out:
-	enable_rpm_wakeref_asserts(dev_priv);
+	enable_rpm_wakeref_asserts(rpm);
 	if (!dev_priv->uncore.user_forcewake.count)
-		intel_runtime_pm_cleanup(dev_priv);
+		intel_runtime_pm_cleanup(rpm);
 
 	return ret;
 }
@@ -2184,7 +2223,7 @@ static int i915_drm_resume(struct drm_device *dev)
 	struct drm_i915_private *dev_priv = to_i915(dev);
 	int ret;
 
-	disable_rpm_wakeref_asserts(dev_priv);
+	disable_rpm_wakeref_asserts(&dev_priv->runtime_pm);
 	intel_sanitize_gt_powersave(dev_priv);
 
 	i915_gem_sanitize(dev_priv);
@@ -2244,7 +2283,7 @@ static int i915_drm_resume(struct drm_device *dev)
 
 	intel_power_domains_enable(dev_priv);
 
-	enable_rpm_wakeref_asserts(dev_priv);
+	enable_rpm_wakeref_asserts(&dev_priv->runtime_pm);
 
 	return 0;
 }
@@ -2299,7 +2338,7 @@ static int i915_drm_resume_early(struct drm_device *dev)
 
 	pci_set_master(pdev);
 
-	disable_rpm_wakeref_asserts(dev_priv);
+	disable_rpm_wakeref_asserts(&dev_priv->runtime_pm);
 
 	if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
 		ret = vlv_resume_prepare(dev_priv, false);
@@ -2322,9 +2361,9 @@ static int i915_drm_resume_early(struct drm_device *dev)
 
 	intel_power_domains_resume(dev_priv);
 
-	intel_engines_sanitize(dev_priv, true);
+	intel_gt_sanitize(dev_priv, true);
 
-	enable_rpm_wakeref_asserts(dev_priv);
+	enable_rpm_wakeref_asserts(&dev_priv->runtime_pm);
 
 	return ret;
 }
@@ -2677,7 +2716,7 @@ static void vlv_restore_gunit_s0ix_state(struct drm_i915_private *dev_priv)
 	I915_WRITE(VLV_GUNIT_CLOCK_GATE2,	s->clock_gate_dis2);
 }
 
-static int vlv_wait_for_pw_status(struct drm_i915_private *dev_priv,
+static int vlv_wait_for_pw_status(struct drm_i915_private *i915,
 				  u32 mask, u32 val)
 {
 	i915_reg_t reg = VLV_GTLC_PW_STATUS;
@@ -2691,7 +2730,9 @@ static int vlv_wait_for_pw_status(struct drm_i915_private *dev_priv,
 	 * Transitioning between RC6 states should be at most 2ms (see
 	 * valleyview_enable_rps) so use a 3ms timeout.
 	 */
-	ret = wait_for(((reg_value = I915_READ_NOTRACE(reg)) & mask) == val, 3);
+	ret = wait_for(((reg_value =
+			 intel_uncore_read_notrace(&i915->uncore, reg)) & mask)
+		       == val, 3);
 
 	/* just trace the final value */
 	trace_i915_reg_rw(false, reg, reg_value, sizeof(reg_value), true);
@@ -2857,6 +2898,7 @@ static int intel_runtime_suspend(struct device *kdev)
 	struct pci_dev *pdev = to_pci_dev(kdev);
 	struct drm_device *dev = pci_get_drvdata(pdev);
 	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct intel_runtime_pm *rpm = &dev_priv->runtime_pm;
 	int ret;
 
 	if (WARN_ON_ONCE(!(dev_priv->gt_pm.rc6.enabled && HAS_RC6(dev_priv))))
@@ -2867,7 +2909,7 @@ static int intel_runtime_suspend(struct device *kdev)
 
 	DRM_DEBUG_KMS("Suspending device\n");
 
-	disable_rpm_wakeref_asserts(dev_priv);
+	disable_rpm_wakeref_asserts(rpm);
 
 	/*
 	 * We are safe here against re-faults, since the fault handler takes
@@ -2875,7 +2917,7 @@ static int intel_runtime_suspend(struct device *kdev)
 	 */
 	i915_gem_runtime_suspend(dev_priv);
 
-	intel_uc_suspend(dev_priv);
+	intel_uc_runtime_suspend(dev_priv);
 
 	intel_runtime_pm_disable_interrupts(dev_priv);
 
@@ -2905,18 +2947,18 @@ static int intel_runtime_suspend(struct device *kdev)
 		i915_gem_init_swizzling(dev_priv);
 		i915_gem_restore_fences(dev_priv);
 
-		enable_rpm_wakeref_asserts(dev_priv);
+		enable_rpm_wakeref_asserts(rpm);
 
 		return ret;
 	}
 
-	enable_rpm_wakeref_asserts(dev_priv);
-	intel_runtime_pm_cleanup(dev_priv);
+	enable_rpm_wakeref_asserts(rpm);
+	intel_runtime_pm_cleanup(rpm);
 
 	if (intel_uncore_arm_unclaimed_mmio_detection(&dev_priv->uncore))
 		DRM_ERROR("Unclaimed access detected prior to suspending\n");
 
-	dev_priv->runtime_pm.suspended = true;
+	rpm->suspended = true;
 
 	/*
 	 * FIXME: We really should find a document that references the arguments
@@ -2955,6 +2997,7 @@ static int intel_runtime_resume(struct device *kdev)
 	struct pci_dev *pdev = to_pci_dev(kdev);
 	struct drm_device *dev = pci_get_drvdata(pdev);
 	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct intel_runtime_pm *rpm = &dev_priv->runtime_pm;
 	int ret = 0;
 
 	if (WARN_ON_ONCE(!HAS_RUNTIME_PM(dev_priv)))
@@ -2962,11 +3005,11 @@ static int intel_runtime_resume(struct device *kdev)
 
 	DRM_DEBUG_KMS("Resuming device\n");
 
-	WARN_ON_ONCE(atomic_read(&dev_priv->runtime_pm.wakeref_count));
-	disable_rpm_wakeref_asserts(dev_priv);
+	WARN_ON_ONCE(atomic_read(&rpm->wakeref_count));
+	disable_rpm_wakeref_asserts(rpm);
 
 	intel_opregion_notify_adapter(dev_priv, PCI_D0);
-	dev_priv->runtime_pm.suspended = false;
+	rpm->suspended = false;
 	if (intel_uncore_unclaimed_mmio(&dev_priv->uncore))
 		DRM_DEBUG_DRIVER("Unclaimed access during suspend, bios?\n");
 
@@ -3016,7 +3059,7 @@ static int intel_runtime_resume(struct device *kdev)
 
 	intel_enable_ipc(dev_priv);
 
-	enable_rpm_wakeref_asserts(dev_priv);
+	enable_rpm_wakeref_asserts(rpm);
 
 	if (ret)
 		DRM_ERROR("Runtime resume failed, disabling it (%d)\n", ret);
@@ -3098,7 +3141,7 @@ static const struct drm_ioctl_desc i915_ioctls[] = {
 	DRM_IOCTL_DEF_DRV(I915_BATCHBUFFER, drm_noop, DRM_AUTH),
 	DRM_IOCTL_DEF_DRV(I915_IRQ_EMIT, drm_noop, DRM_AUTH),
 	DRM_IOCTL_DEF_DRV(I915_IRQ_WAIT, drm_noop, DRM_AUTH),
-	DRM_IOCTL_DEF_DRV(I915_GETPARAM, i915_getparam_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(I915_GETPARAM, i915_getparam_ioctl, DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(I915_SETPARAM, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
 	DRM_IOCTL_DEF_DRV(I915_ALLOC, drm_noop, DRM_AUTH),
 	DRM_IOCTL_DEF_DRV(I915_FREE, drm_noop, DRM_AUTH),
@@ -3111,13 +3154,13 @@ static const struct drm_ioctl_desc i915_ioctls[] = {
 	DRM_IOCTL_DEF_DRV(I915_HWS_ADDR, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
 	DRM_IOCTL_DEF_DRV(I915_GEM_INIT, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
 	DRM_IOCTL_DEF_DRV(I915_GEM_EXECBUFFER, i915_gem_execbuffer_ioctl, DRM_AUTH),
-	DRM_IOCTL_DEF_DRV(I915_GEM_EXECBUFFER2_WR, i915_gem_execbuffer2_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(I915_GEM_EXECBUFFER2_WR, i915_gem_execbuffer2_ioctl, DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(I915_GEM_PIN, i915_gem_reject_pin_ioctl, DRM_AUTH|DRM_ROOT_ONLY),
 	DRM_IOCTL_DEF_DRV(I915_GEM_UNPIN, i915_gem_reject_pin_ioctl, DRM_AUTH|DRM_ROOT_ONLY),
-	DRM_IOCTL_DEF_DRV(I915_GEM_BUSY, i915_gem_busy_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(I915_GEM_BUSY, i915_gem_busy_ioctl, DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(I915_GEM_SET_CACHING, i915_gem_set_caching_ioctl, DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(I915_GEM_GET_CACHING, i915_gem_get_caching_ioctl, DRM_RENDER_ALLOW),
-	DRM_IOCTL_DEF_DRV(I915_GEM_THROTTLE, i915_gem_throttle_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(I915_GEM_THROTTLE, i915_gem_throttle_ioctl, DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(I915_GEM_ENTERVT, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
 	DRM_IOCTL_DEF_DRV(I915_GEM_LEAVEVT, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
 	DRM_IOCTL_DEF_DRV(I915_GEM_CREATE, i915_gem_create_ioctl, DRM_RENDER_ALLOW),
@@ -3136,7 +3179,7 @@ static const struct drm_ioctl_desc i915_ioctls[] = {
 	DRM_IOCTL_DEF_DRV(I915_OVERLAY_ATTRS, intel_overlay_attrs_ioctl, DRM_MASTER),
 	DRM_IOCTL_DEF_DRV(I915_SET_SPRITE_COLORKEY, intel_sprite_set_colorkey_ioctl, DRM_MASTER),
 	DRM_IOCTL_DEF_DRV(I915_GET_SPRITE_COLORKEY, drm_noop, DRM_MASTER),
-	DRM_IOCTL_DEF_DRV(I915_GEM_WAIT, i915_gem_wait_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(I915_GEM_WAIT, i915_gem_wait_ioctl, DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_CREATE_EXT, i915_gem_context_create_ioctl, DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_DESTROY, i915_gem_context_destroy_ioctl, DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(I915_REG_READ, i915_reg_read_ioctl, DRM_RENDER_ALLOW),
@@ -3148,6 +3191,8 @@ static const struct drm_ioctl_desc i915_ioctls[] = {
 	DRM_IOCTL_DEF_DRV(I915_PERF_ADD_CONFIG, i915_perf_add_config_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(I915_PERF_REMOVE_CONFIG, i915_perf_remove_config_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(I915_QUERY, i915_query_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(I915_GEM_VM_CREATE, i915_gem_vm_create_ioctl, DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(I915_GEM_VM_DESTROY, i915_gem_vm_destroy_ioctl, DRM_RENDER_ALLOW),
 };
 
 static struct drm_driver driver = {
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 066fd2a12851..bc909ec5d9c3 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -54,6 +54,7 @@
 #include <drm/drm_cache.h>
 #include <drm/drm_util.h>
 #include <drm/drm_dsc.h>
+#include <drm/drm_atomic.h>
 #include <drm/drm_connector.h>
 #include <drm/i915_mei_hdcp_interface.h>
 
@@ -62,23 +63,27 @@
 #include "i915_reg.h"
 #include "i915_utils.h"
 
-#include "intel_bios.h"
+#include "display/intel_bios.h"
+#include "display/intel_display.h"
+#include "display/intel_display_power.h"
+#include "display/intel_dpll_mgr.h"
+#include "display/intel_frontbuffer.h"
+#include "display/intel_opregion.h"
+
+#include "gt/intel_lrc.h"
+#include "gt/intel_engine.h"
+#include "gt/intel_workarounds.h"
+
 #include "intel_device_info.h"
-#include "intel_display.h"
-#include "intel_dpll_mgr.h"
-#include "intel_frontbuffer.h"
-#include "intel_lrc.h"
-#include "intel_opregion.h"
-#include "intel_ringbuffer.h"
+#include "intel_runtime_pm.h"
 #include "intel_uc.h"
 #include "intel_uncore.h"
+#include "intel_wakeref.h"
 #include "intel_wopcm.h"
-#include "intel_workarounds.h"
 
 #include "i915_gem.h"
-#include "i915_gem_context.h"
+#include "gem/i915_gem_context_types.h"
 #include "i915_gem_fence_reg.h"
-#include "i915_gem_object.h"
 #include "i915_gem_gtt.h"
 #include "i915_gpu_error.h"
 #include "i915_request.h"
@@ -93,8 +98,8 @@
 
 #define DRIVER_NAME		"i915"
 #define DRIVER_DESC		"Intel Graphics"
-#define DRIVER_DATE		"20190417"
-#define DRIVER_TIMESTAMP	1555492067
+#define DRIVER_DATE		"20190619"
+#define DRIVER_TIMESTAMP	1560947544
 
 /* Use I915_STATE_WARN(x) and I915_STATE_WARN_ON() (rather than WARN() and
  * WARN_ON()) for hw state sanity checks to check for unexpected conditions
@@ -133,7 +138,7 @@ bool i915_error_injected(void);
 	__i915_printk(i915, i915_error_injected() ? KERN_DEBUG : KERN_ERR, \
 		      fmt, ##__VA_ARGS__)
 
-typedef depot_stack_handle_t intel_wakeref_t;
+struct drm_i915_gem_object;
 
 enum hpd_pin {
 	HPD_NONE = 0,
@@ -210,12 +215,6 @@ struct drm_i915_file_private {
 	struct {
 		spinlock_t lock;
 		struct list_head request_list;
-/* 20ms is a fairly arbitrary limit (greater than the average frame time)
- * chosen to prevent the CPU getting more than a frame ahead of the GPU
- * (when using lax throttling for the frontbuffer). We also use it to
- * offer free GPU waitboosts for severely congested workloads.
- */
-#define DRM_I915_THROTTLE_JIFFIES msecs_to_jiffies(20)
 	} mm;
 
 	struct idr context_idr;
@@ -297,7 +296,7 @@ struct drm_i915_display_funcs {
 				    struct intel_crtc_state *cstate);
 	int (*compute_global_watermarks)(struct intel_atomic_state *state);
 	void (*update_wm)(struct intel_crtc *crtc);
-	int (*modeset_calc_cdclk)(struct drm_atomic_state *state);
+	int (*modeset_calc_cdclk)(struct intel_atomic_state *state);
 	/* Returns the active state of the crtc, and if the crtc is active,
 	 * fills out the pipe-config with the hw state. */
 	bool (*get_pipe_config)(struct intel_crtc *,
@@ -342,12 +341,9 @@ struct drm_i915_display_funcs {
 	 * involved with the same commit.
 	 */
 	void (*load_luts)(const struct intel_crtc_state *crtc_state);
+	void (*read_luts)(struct intel_crtc_state *crtc_state);
 };
 
-#define CSR_VERSION(major, minor)	((major) << 16 | (minor))
-#define CSR_VERSION_MAJOR(version)	((version) >> 16)
-#define CSR_VERSION_MINOR(version)	((version) & 0xffff)
-
 struct intel_csr {
 	struct work_struct work;
 	const char *fw_path;
@@ -357,8 +353,8 @@ struct intel_csr {
 	u32 dmc_fw_size; /* dwords */
 	u32 version;
 	u32 mmio_count;
-	i915_reg_t mmioaddr[8];
-	u32 mmiodata[8];
+	i915_reg_t mmioaddr[20];
+	u32 mmiodata[20];
 	u32 dc_state;
 	u32 allowed_dc_mask;
 	intel_wakeref_t wakeref;
@@ -535,15 +531,10 @@ enum intel_pch {
 	PCH_IBX,	/* Ibexpeak PCH */
 	PCH_CPT,	/* Cougarpoint/Pantherpoint PCH */
 	PCH_LPT,	/* Lynxpoint/Wildcatpoint PCH */
-	PCH_SPT,        /* Sunrisepoint PCH */
-	PCH_KBP,        /* Kaby Lake PCH */
+	PCH_SPT,        /* Sunrisepoint/Kaby Lake PCH */
 	PCH_CNP,        /* Cannon/Comet Lake PCH */
 	PCH_ICP,	/* Ice Lake PCH */
-};
-
-enum intel_sbi_destination {
-	SBI_ICLK,
-	SBI_MPHY,
+	PCH_MCC,        /* Mule Creek Canyon PCH */
 };
 
 #define QUIRK_LVDS_SSC_DISABLE (1<<1)
@@ -648,6 +639,8 @@ struct intel_rps_ei {
 };
 
 struct intel_rps {
+	struct mutex lock; /* protects enabling and the worker */
+
 	/*
 	 * work, interrupts_enabled and pm_iir are protected by
 	 * dev_priv->irq_lock
@@ -739,111 +732,6 @@ struct intel_ilk_power_mgmt {
 	int r_t;
 };
 
-struct drm_i915_private;
-struct i915_power_well;
-
-struct i915_power_well_ops {
-	/*
-	 * Synchronize the well's hw state to match the current sw state, for
-	 * example enable/disable it based on the current refcount. Called
-	 * during driver init and resume time, possibly after first calling
-	 * the enable/disable handlers.
-	 */
-	void (*sync_hw)(struct drm_i915_private *dev_priv,
-			struct i915_power_well *power_well);
-	/*
-	 * Enable the well and resources that depend on it (for example
-	 * interrupts located on the well). Called after the 0->1 refcount
-	 * transition.
-	 */
-	void (*enable)(struct drm_i915_private *dev_priv,
-		       struct i915_power_well *power_well);
-	/*
-	 * Disable the well and resources that depend on it. Called after
-	 * the 1->0 refcount transition.
-	 */
-	void (*disable)(struct drm_i915_private *dev_priv,
-			struct i915_power_well *power_well);
-	/* Returns the hw enabled state. */
-	bool (*is_enabled)(struct drm_i915_private *dev_priv,
-			   struct i915_power_well *power_well);
-};
-
-struct i915_power_well_regs {
-	i915_reg_t bios;
-	i915_reg_t driver;
-	i915_reg_t kvmr;
-	i915_reg_t debug;
-};
-
-/* Power well structure for haswell */
-struct i915_power_well_desc {
-	const char *name;
-	bool always_on;
-	u64 domains;
-	/* unique identifier for this power well */
-	enum i915_power_well_id id;
-	/*
-	 * Arbitraty data associated with this power well. Platform and power
-	 * well specific.
-	 */
-	union {
-		struct {
-			/*
-			 * request/status flag index in the PUNIT power well
-			 * control/status registers.
-			 */
-			u8 idx;
-		} vlv;
-		struct {
-			enum dpio_phy phy;
-		} bxt;
-		struct {
-			const struct i915_power_well_regs *regs;
-			/*
-			 * request/status flag index in the power well
-			 * constrol/status registers.
-			 */
-			u8 idx;
-			/* Mask of pipes whose IRQ logic is backed by the pw */
-			u8 irq_pipe_mask;
-			/* The pw is backing the VGA functionality */
-			bool has_vga:1;
-			bool has_fuses:1;
-			/*
-			 * The pw is for an ICL+ TypeC PHY port in
-			 * Thunderbolt mode.
-			 */
-			bool is_tc_tbt:1;
-		} hsw;
-	};
-	const struct i915_power_well_ops *ops;
-};
-
-struct i915_power_well {
-	const struct i915_power_well_desc *desc;
-	/* power well enable/disable usage count */
-	int count;
-	/* cached hw enabled state */
-	bool hw_enabled;
-};
-
-struct i915_power_domains {
-	/*
-	 * Power wells needed for initialization at driver init and suspend
-	 * time are on. They are kept on until after the first modeset.
-	 */
-	bool initializing;
-	bool display_core_suspended;
-	int power_well_count;
-
-	intel_wakeref_t wakeref;
-
-	struct mutex lock;
-	int domain_use_count[POWER_DOMAIN_NUM];
-	struct i915_power_well *power_wells;
-};
-
 #define MAX_L3_SLICES 2
 struct intel_l3_parity {
 	u32 *remap_info[MAX_L3_SLICES];
@@ -861,20 +749,15 @@ struct i915_gem_mm {
 	/* Protects bound_list/unbound_list and #drm_i915_gem_object.mm.link */
 	spinlock_t obj_lock;
 
-	/** List of all objects in gtt_space. Used to restore gtt
-	 * mappings on resume */
-	struct list_head bound_list;
 	/**
-	 * List of objects which are not bound to the GTT (thus
-	 * are idle and not used by the GPU). These objects may or may
-	 * not actually have any pages attached.
+	 * List of objects which are purgeable.
 	 */
-	struct list_head unbound_list;
+	struct list_head purge_list;
 
-	/** List of all objects in gtt_space, currently mmaped by userspace.
-	 * All objects within this list must also be on bound_list.
+	/**
+	 * List of objects which have allocated pages and are shrinkable.
 	 */
-	struct list_head userfault_list;
+	struct list_head shrink_list;
 
 	/**
 	 * List of objects which are pending destruction.
@@ -899,15 +782,12 @@ struct i915_gem_mm {
 	struct vfsmount *gemfs;
 
 	/** PPGTT used for aliasing the PPGTT with the GTT */
-	struct i915_hw_ppgtt *aliasing_ppgtt;
+	struct i915_ppgtt *aliasing_ppgtt;
 
 	struct notifier_block oom_notifier;
 	struct notifier_block vmap_notifier;
 	struct shrinker shrinker;
 
-	/** LRU list of objects with fence regs on them. */
-	struct list_head fence_list;
-
 	/**
 	 * Workqueue to fault in userptr pages, flushed by the execbuf
 	 * when required but otherwise left to userspace to try again
@@ -925,10 +805,9 @@ struct i915_gem_mm {
 	/** Bit 6 swizzling required for Y tiling */
 	u32 bit_6_swizzle_y;
 
-	/* accounting, useful for userland debugging */
-	spinlock_t object_stat_lock;
-	u64 object_memory;
-	u32 object_count;
+	/* shrinker accounting, also useful for userland debugging */
+	u64 shrink_memory;
+	u32 shrink_count;
 };
 
 #define I915_IDLE_ENGINES_TIMEOUT (200) /* in ms */
@@ -942,6 +821,9 @@ struct i915_gem_mm {
 #define I915_ENGINE_WEDGED_TIMEOUT  (60 * HZ)  /* Reset but no recovery? */
 
 struct ddi_vbt_port_info {
+	/* Non-NULL if port present. */
+	const struct child_device_config *child;
+
 	int max_tmds_clock;
 
 	/*
@@ -952,7 +834,6 @@ struct ddi_vbt_port_info {
 #define HDMI_LEVEL_SHIFT_UNKNOWN	0xff
 	u8 hdmi_level_shift;
 
-	u8 present:1;
 	u8 supports_dvi:1;
 	u8 supports_hdmi:1;
 	u8 supports_dp:1;
@@ -1154,54 +1035,6 @@ struct skl_wm_params {
 	u32 dbuf_block_size;
 };
 
-/*
- * This struct helps tracking the state needed for runtime PM, which puts the
- * device in PCI D3 state. Notice that when this happens, nothing on the
- * graphics device works, even register access, so we don't get interrupts nor
- * anything else.
- *
- * Every piece of our code that needs to actually touch the hardware needs to
- * either call intel_runtime_pm_get or call intel_display_power_get with the
- * appropriate power domain.
- *
- * Our driver uses the autosuspend delay feature, which means we'll only really
- * suspend if we stay with zero refcount for a certain amount of time. The
- * default value is currently very conservative (see intel_runtime_pm_enable), but
- * it can be changed with the standard runtime PM files from sysfs.
- *
- * The irqs_disabled variable becomes true exactly after we disable the IRQs and
- * goes back to false exactly before we reenable the IRQs. We use this variable
- * to check if someone is trying to enable/disable IRQs while they're supposed
- * to be disabled. This shouldn't happen and we'll print some error messages in
- * case it happens.
- *
- * For more, read the Documentation/power/runtime_pm.txt.
- */
-struct i915_runtime_pm {
-	atomic_t wakeref_count;
-	bool suspended;
-	bool irqs_enabled;
-
-#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_RUNTIME_PM)
-	/*
-	 * To aide detection of wakeref leaks and general misuse, we
-	 * track all wakeref holders. With manual markup (i.e. returning
-	 * a cookie to each rpm_get caller which they then supply to their
-	 * paired rpm_put) we can remove corresponding pairs of and keep
-	 * the array trimmed to active wakerefs.
-	 */
-	struct intel_runtime_pm_debug {
-		spinlock_t lock;
-
-		depot_stack_handle_t last_acquire;
-		depot_stack_handle_t last_release;
-
-		depot_stack_handle_t *owners;
-		unsigned long count;
-	} debug;
-#endif
-};
-
 enum intel_pipe_crc_source {
 	INTEL_PIPE_CRC_SOURCE_NONE,
 	INTEL_PIPE_CRC_SOURCE_PLANE1,
@@ -1561,6 +1394,7 @@ struct drm_i915_private {
 
 	/* Sideband mailbox protection */
 	struct mutex sb_lock;
+	struct pm_qos_request sb_qos;
 
 	/** Cached value of IMR to avoid reads in updating the bitfield */
 	union {
@@ -1594,9 +1428,6 @@ struct drm_i915_private {
 	/* protects panel power sequencer state */
 	struct mutex pps_mutex;
 
-	struct drm_i915_fence_reg fence_regs[I915_MAX_NUM_FENCES]; /* assume 965 */
-	int num_fence_regs; /* 8 on pre-965, 16 otherwise */
-
 	unsigned int fsb_freq, mem_freq, is_ddr3;
 	unsigned int skl_preferred_vco_freq;
 	unsigned int max_cdclk_freq;
@@ -1709,14 +1540,6 @@ struct drm_i915_private {
 	 */
 	u32 edram_size_mb;
 
-	/*
-	 * Protects RPS/RC6 register access and PCU communication.
-	 * Must be taken after struct_mutex if nested. Note that
-	 * this lock may be held for long periods of time when
-	 * talking to hw - so only take it when talking to hw!
-	 */
-	struct mutex pcu_lock;
-
 	/* gen6+ GT PM state */
 	struct intel_gen6_power_mgmt gt_pm;
 
@@ -1850,7 +1673,14 @@ struct drm_i915_private {
 		} type;
 	} dram_info;
 
-	struct i915_runtime_pm runtime_pm;
+	struct intel_bw_info {
+		int num_planes;
+		int deratedbw[3];
+	} max_bw[6];
+
+	struct drm_private_obj bw_obj;
+
+	struct intel_runtime_pm runtime_pm;
 
 	struct {
 		bool initialized;
@@ -1995,8 +1825,6 @@ struct drm_i915_private {
 
 	/* Abstract the submission mechanism (legacy ringbuffer or execlists) away */
 	struct {
-		void (*cleanup_engine)(struct intel_engine_cs *engine);
-
 		struct i915_gt_timelines {
 			struct mutex mutex; /* protects list, tainted by GPU */
 			struct list_head active_list;
@@ -2006,10 +1834,12 @@ struct drm_i915_private {
 			struct list_head hwsp_free_list;
 		} timelines;
 
-		intel_engine_mask_t active_engines;
 		struct list_head active_rings;
+
+		struct intel_wakeref wakeref;
+
 		struct list_head closed_vma;
-		u32 active_requests;
+		spinlock_t closed_lock; /* guards the list of closed_vma */
 
 		/**
 		 * Is the GPU currently considered idle, or busy executing
@@ -2020,6 +1850,16 @@ struct drm_i915_private {
 		 */
 		intel_wakeref_t awake;
 
+		struct blocking_notifier_head pm_notifications;
+
+		ktime_t last_init_time;
+
+		struct i915_vma *scratch;
+	} gt;
+
+	struct {
+		struct notifier_block pm_notifier;
+
 		/**
 		 * We leave the user IRQ off as much as possible,
 		 * but this means that requests will finish and never
@@ -2036,12 +1876,8 @@ struct drm_i915_private {
 		 * arrive within a small period of time, we fire
 		 * off the idle_work.
 		 */
-		struct delayed_work idle_work;
-
-		ktime_t last_init_time;
-
-		struct i915_vma *scratch;
-	} gt;
+		struct work_struct idle_work;
+	} gem;
 
 	/* For i945gm vblank irq vs. C3 workaround */
 	struct {
@@ -2162,111 +1998,6 @@ enum hdmi_force_audio {
 	GENMASK(INTEL_FRONTBUFFER_BITS_PER_PIPE * ((pipe) + 1) - 1, \
 		INTEL_FRONTBUFFER_BITS_PER_PIPE * (pipe))
 
-/*
- * Optimised SGL iterator for GEM objects
- */
-static __always_inline struct sgt_iter {
-	struct scatterlist *sgp;
-	union {
-		unsigned long pfn;
-		dma_addr_t dma;
-	};
-	unsigned int curr;
-	unsigned int max;
-} __sgt_iter(struct scatterlist *sgl, bool dma) {
-	struct sgt_iter s = { .sgp = sgl };
-
-	if (s.sgp) {
-		s.max = s.curr = s.sgp->offset;
-		s.max += s.sgp->length;
-		if (dma)
-			s.dma = sg_dma_address(s.sgp);
-		else
-			s.pfn = page_to_pfn(sg_page(s.sgp));
-	}
-
-	return s;
-}
-
-static inline struct scatterlist *____sg_next(struct scatterlist *sg)
-{
-	++sg;
-	if (unlikely(sg_is_chain(sg)))
-		sg = sg_chain_ptr(sg);
-	return sg;
-}
-
-/**
- * __sg_next - return the next scatterlist entry in a list
- * @sg:		The current sg entry
- *
- * Description:
- *   If the entry is the last, return NULL; otherwise, step to the next
- *   element in the array (@sg@+1). If that's a chain pointer, follow it;
- *   otherwise just return the pointer to the current element.
- **/
-static inline struct scatterlist *__sg_next(struct scatterlist *sg)
-{
-	return sg_is_last(sg) ? NULL : ____sg_next(sg);
-}
-
-/**
- * for_each_sgt_dma - iterate over the DMA addresses of the given sg_table
- * @__dmap:	DMA address (output)
- * @__iter:	'struct sgt_iter' (iterator state, internal)
- * @__sgt:	sg_table to iterate over (input)
- */
-#define for_each_sgt_dma(__dmap, __iter, __sgt)				\
-	for ((__iter) = __sgt_iter((__sgt)->sgl, true);			\
-	     ((__dmap) = (__iter).dma + (__iter).curr);			\
-	     (((__iter).curr += I915_GTT_PAGE_SIZE) >= (__iter).max) ?	\
-	     (__iter) = __sgt_iter(__sg_next((__iter).sgp), true), 0 : 0)
-
-/**
- * for_each_sgt_page - iterate over the pages of the given sg_table
- * @__pp:	page pointer (output)
- * @__iter:	'struct sgt_iter' (iterator state, internal)
- * @__sgt:	sg_table to iterate over (input)
- */
-#define for_each_sgt_page(__pp, __iter, __sgt)				\
-	for ((__iter) = __sgt_iter((__sgt)->sgl, false);		\
-	     ((__pp) = (__iter).pfn == 0 ? NULL :			\
-	      pfn_to_page((__iter).pfn + ((__iter).curr >> PAGE_SHIFT))); \
-	     (((__iter).curr += PAGE_SIZE) >= (__iter).max) ?		\
-	     (__iter) = __sgt_iter(__sg_next((__iter).sgp), false), 0 : 0)
-
-bool i915_sg_trim(struct sg_table *orig_st);
-
-static inline unsigned int i915_sg_page_sizes(struct scatterlist *sg)
-{
-	unsigned int page_sizes;
-
-	page_sizes = 0;
-	while (sg) {
-		GEM_BUG_ON(sg->offset);
-		GEM_BUG_ON(!IS_ALIGNED(sg->length, PAGE_SIZE));
-		page_sizes |= sg->length;
-		sg = __sg_next(sg);
-	}
-
-	return page_sizes;
-}
-
-static inline unsigned int i915_sg_segment_size(void)
-{
-	unsigned int size = swiotlb_max_segment();
-
-	if (size == 0)
-		return SCATTERLIST_MAX_SEGMENT;
-
-	size = rounddown(size, PAGE_SIZE);
-	/* swiotlb_max_segment_size can return 1 byte when it means one page. */
-	if (size < PAGE_SIZE)
-		size = PAGE_SIZE;
-
-	return size;
-}
-
 #define INTEL_INFO(dev_priv)	(&(dev_priv)->__info)
 #define RUNTIME_INFO(dev_priv)	(&(dev_priv)->__runtime)
 #define DRIVER_CAPS(dev_priv)	(&(dev_priv)->caps)
@@ -2420,9 +2151,6 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915,
 	IS_SUBPLATFORM(dev_priv, INTEL_KABYLAKE, INTEL_SUBPLATFORM_ULT)
 #define IS_KBL_ULX(dev_priv) \
 	IS_SUBPLATFORM(dev_priv, INTEL_KABYLAKE, INTEL_SUBPLATFORM_ULX)
-#define IS_AML_ULX(dev_priv) \
-	(IS_SUBPLATFORM(dev_priv, INTEL_KABYLAKE, INTEL_SUBPLATFORM_AML) || \
-	 IS_SUBPLATFORM(dev_priv, INTEL_COFFEELAKE, INTEL_SUBPLATFORM_AML))
 #define IS_SKL_GT2(dev_priv)	(IS_SKYLAKE(dev_priv) && \
 				 INTEL_INFO(dev_priv)->gt == 2)
 #define IS_SKL_GT3(dev_priv)	(IS_SKYLAKE(dev_priv) && \
@@ -2435,6 +2163,8 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915,
 				 INTEL_INFO(dev_priv)->gt == 3)
 #define IS_CFL_ULT(dev_priv) \
 	IS_SUBPLATFORM(dev_priv, INTEL_COFFEELAKE, INTEL_SUBPLATFORM_ULT)
+#define IS_CFL_ULX(dev_priv) \
+	IS_SUBPLATFORM(dev_priv, INTEL_COFFEELAKE, INTEL_SUBPLATFORM_ULX)
 #define IS_CFL_GT2(dev_priv)	(IS_COFFEELAKE(dev_priv) && \
 				 INTEL_INFO(dev_priv)->gt == 2)
 #define IS_CFL_GT3(dev_priv)	(IS_COFFEELAKE(dev_priv) && \
@@ -2444,8 +2174,6 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915,
 #define IS_ICL_WITH_PORT_F(dev_priv) \
 	IS_SUBPLATFORM(dev_priv, INTEL_ICELAKE, INTEL_SUBPLATFORM_PORTF)
 
-#define IS_ALPHA_SUPPORT(intel_info) ((intel_info)->is_alpha_support)
-
 #define SKL_REVID_A0		0x0
 #define SKL_REVID_B0		0x1
 #define SKL_REVID_C0		0x2
@@ -2585,6 +2313,8 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915,
 #define HAS_RC6p(dev_priv)		 (INTEL_INFO(dev_priv)->has_rc6p)
 #define HAS_RC6pp(dev_priv)		 (false) /* HW was never validated */
 
+#define HAS_RPS(dev_priv)	(INTEL_INFO(dev_priv)->has_rps)
+
 #define HAS_CSR(dev_priv)	(INTEL_INFO(dev_priv)->display.has_csr)
 
 #define HAS_RUNTIME_PM(dev_priv) (INTEL_INFO(dev_priv)->has_runtime_pm)
@@ -2598,7 +2328,6 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915,
  * properties, so we have separate macros to test them.
  */
 #define HAS_GUC(dev_priv)	(INTEL_INFO(dev_priv)->has_guc)
-#define HAS_GUC_CT(dev_priv)	(INTEL_INFO(dev_priv)->has_guc_ct)
 #define HAS_GUC_UCODE(dev_priv)	(HAS_GUC(dev_priv))
 #define HAS_GUC_SCHED(dev_priv)	(HAS_GUC(dev_priv))
 
@@ -2628,15 +2357,16 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915,
 #define INTEL_PCH_CNP_LP_DEVICE_ID_TYPE		0x9D80
 #define INTEL_PCH_CMP_DEVICE_ID_TYPE		0x0280
 #define INTEL_PCH_ICP_DEVICE_ID_TYPE		0x3480
+#define INTEL_PCH_MCC_DEVICE_ID_TYPE		0x4B00
 #define INTEL_PCH_P2X_DEVICE_ID_TYPE		0x7100
 #define INTEL_PCH_P3X_DEVICE_ID_TYPE		0x7000
 #define INTEL_PCH_QEMU_DEVICE_ID_TYPE		0x2900 /* qemu q35 has 2918 */
 
 #define INTEL_PCH_TYPE(dev_priv) ((dev_priv)->pch_type)
 #define INTEL_PCH_ID(dev_priv) ((dev_priv)->pch_id)
+#define HAS_PCH_MCC(dev_priv) (INTEL_PCH_TYPE(dev_priv) == PCH_MCC)
 #define HAS_PCH_ICP(dev_priv) (INTEL_PCH_TYPE(dev_priv) == PCH_ICP)
 #define HAS_PCH_CNP(dev_priv) (INTEL_PCH_TYPE(dev_priv) == PCH_CNP)
-#define HAS_PCH_KBP(dev_priv) (INTEL_PCH_TYPE(dev_priv) == PCH_KBP)
 #define HAS_PCH_SPT(dev_priv) (INTEL_PCH_TYPE(dev_priv) == PCH_SPT)
 #define HAS_PCH_LPT(dev_priv) (INTEL_PCH_TYPE(dev_priv) == PCH_LPT)
 #define HAS_PCH_LPT_LP(dev_priv) \
@@ -2708,29 +2438,10 @@ extern void i915_driver_unload(struct drm_device *dev);
 
 extern void intel_engine_init_hangcheck(struct intel_engine_cs *engine);
 extern void intel_hangcheck_init(struct drm_i915_private *dev_priv);
-extern unsigned long i915_chipset_val(struct drm_i915_private *dev_priv);
-extern unsigned long i915_mch_val(struct drm_i915_private *dev_priv);
-extern unsigned long i915_gfx_val(struct drm_i915_private *dev_priv);
-extern void i915_update_gfx_val(struct drm_i915_private *dev_priv);
 int vlv_force_gfx_clock(struct drm_i915_private *dev_priv, bool on);
 
-int intel_engines_init_mmio(struct drm_i915_private *dev_priv);
-int intel_engines_init(struct drm_i915_private *dev_priv);
-
 u32 intel_calculate_mcr_s_ss_select(struct drm_i915_private *dev_priv);
 
-/* intel_hotplug.c */
-void intel_hpd_irq_handler(struct drm_i915_private *dev_priv,
-			   u32 pin_mask, u32 long_mask);
-void intel_hpd_init(struct drm_i915_private *dev_priv);
-void intel_hpd_init_work(struct drm_i915_private *dev_priv);
-void intel_hpd_cancel_work(struct drm_i915_private *dev_priv);
-enum hpd_pin intel_hpd_pin_default(struct drm_i915_private *dev_priv,
-				   enum port port);
-bool intel_hpd_disable(struct drm_i915_private *dev_priv, enum hpd_pin pin);
-void intel_hpd_enable(struct drm_i915_private *dev_priv, enum hpd_pin pin);
-
-/* i915_irq.c */
 static inline void i915_queue_hangcheck(struct drm_i915_private *dev_priv)
 {
 	unsigned long delay;
@@ -2748,11 +2459,6 @@ static inline void i915_queue_hangcheck(struct drm_i915_private *dev_priv)
 			   &dev_priv->gpu_error.hangcheck_work, delay);
 }
 
-extern void intel_irq_init(struct drm_i915_private *dev_priv);
-extern void intel_irq_fini(struct drm_i915_private *dev_priv);
-int intel_irq_install(struct drm_i915_private *dev_priv);
-void intel_irq_uninstall(struct drm_i915_private *dev_priv);
-
 static inline bool intel_gvt_active(struct drm_i915_private *dev_priv)
 {
 	return dev_priv->gvt;
@@ -2763,120 +2469,15 @@ static inline bool intel_vgpu_active(struct drm_i915_private *dev_priv)
 	return dev_priv->vgpu.active;
 }
 
-u32 i915_pipestat_enable_mask(struct drm_i915_private *dev_priv,
-			      enum pipe pipe);
-void
-i915_enable_pipestat(struct drm_i915_private *dev_priv, enum pipe pipe,
-		     u32 status_mask);
-
-void
-i915_disable_pipestat(struct drm_i915_private *dev_priv, enum pipe pipe,
-		      u32 status_mask);
-
-void valleyview_enable_display_irqs(struct drm_i915_private *dev_priv);
-void valleyview_disable_display_irqs(struct drm_i915_private *dev_priv);
-void i915_hotplug_interrupt_update(struct drm_i915_private *dev_priv,
-				   u32 mask,
-				   u32 bits);
-void ilk_update_display_irq(struct drm_i915_private *dev_priv,
-			    u32 interrupt_mask,
-			    u32 enabled_irq_mask);
-static inline void
-ilk_enable_display_irq(struct drm_i915_private *dev_priv, u32 bits)
-{
-	ilk_update_display_irq(dev_priv, bits, bits);
-}
-static inline void
-ilk_disable_display_irq(struct drm_i915_private *dev_priv, u32 bits)
-{
-	ilk_update_display_irq(dev_priv, bits, 0);
-}
-void bdw_update_pipe_irq(struct drm_i915_private *dev_priv,
-			 enum pipe pipe,
-			 u32 interrupt_mask,
-			 u32 enabled_irq_mask);
-static inline void bdw_enable_pipe_irq(struct drm_i915_private *dev_priv,
-				       enum pipe pipe, u32 bits)
-{
-	bdw_update_pipe_irq(dev_priv, pipe, bits, bits);
-}
-static inline void bdw_disable_pipe_irq(struct drm_i915_private *dev_priv,
-					enum pipe pipe, u32 bits)
-{
-	bdw_update_pipe_irq(dev_priv, pipe, bits, 0);
-}
-void ibx_display_interrupt_update(struct drm_i915_private *dev_priv,
-				  u32 interrupt_mask,
-				  u32 enabled_irq_mask);
-static inline void
-ibx_enable_display_interrupt(struct drm_i915_private *dev_priv, u32 bits)
-{
-	ibx_display_interrupt_update(dev_priv, bits, bits);
-}
-static inline void
-ibx_disable_display_interrupt(struct drm_i915_private *dev_priv, u32 bits)
-{
-	ibx_display_interrupt_update(dev_priv, bits, 0);
-}
-
 /* i915_gem.c */
-int i915_gem_create_ioctl(struct drm_device *dev, void *data,
-			  struct drm_file *file_priv);
-int i915_gem_pread_ioctl(struct drm_device *dev, void *data,
-			 struct drm_file *file_priv);
-int i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
-			  struct drm_file *file_priv);
-int i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
-			struct drm_file *file_priv);
-int i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
-			struct drm_file *file_priv);
-int i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
-			      struct drm_file *file_priv);
-int i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
-			     struct drm_file *file_priv);
-int i915_gem_execbuffer_ioctl(struct drm_device *dev, void *data,
-			      struct drm_file *file_priv);
-int i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data,
-			       struct drm_file *file_priv);
-int i915_gem_busy_ioctl(struct drm_device *dev, void *data,
-			struct drm_file *file_priv);
-int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
-			       struct drm_file *file);
-int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
-			       struct drm_file *file);
-int i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
-			    struct drm_file *file_priv);
-int i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
-			   struct drm_file *file_priv);
-int i915_gem_set_tiling_ioctl(struct drm_device *dev, void *data,
-			      struct drm_file *file_priv);
-int i915_gem_get_tiling_ioctl(struct drm_device *dev, void *data,
-			      struct drm_file *file_priv);
 int i915_gem_init_userptr(struct drm_i915_private *dev_priv);
 void i915_gem_cleanup_userptr(struct drm_i915_private *dev_priv);
-int i915_gem_userptr_ioctl(struct drm_device *dev, void *data,
-			   struct drm_file *file);
-int i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
-				struct drm_file *file_priv);
-int i915_gem_wait_ioctl(struct drm_device *dev, void *data,
-			struct drm_file *file_priv);
 void i915_gem_sanitize(struct drm_i915_private *i915);
 int i915_gem_init_early(struct drm_i915_private *dev_priv);
 void i915_gem_cleanup_early(struct drm_i915_private *dev_priv);
-void i915_gem_load_init_fences(struct drm_i915_private *dev_priv);
 int i915_gem_freeze(struct drm_i915_private *dev_priv);
 int i915_gem_freeze_late(struct drm_i915_private *dev_priv);
 
-void i915_gem_object_init(struct drm_i915_gem_object *obj,
-			 const struct drm_i915_gem_object_ops *ops);
-struct drm_i915_gem_object *
-i915_gem_object_create(struct drm_i915_private *dev_priv, u64 size);
-struct drm_i915_gem_object *
-i915_gem_object_create_from_data(struct drm_i915_private *dev_priv,
-				 const void *data, size_t size);
-void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file);
-void i915_gem_free_object(struct drm_gem_object *obj);
-
 static inline void i915_gem_drain_freed_objects(struct drm_i915_private *i915)
 {
 	if (!atomic_read(&i915->mm.free_count))
@@ -2903,15 +2504,15 @@ static inline void i915_gem_drain_workqueue(struct drm_i915_private *i915)
 	 * grace period so that we catch work queued via RCU from the first
 	 * pass. As neither drain_workqueue() nor flush_workqueue() report
 	 * a result, we make an assumption that we only don't require more
-	 * than 2 passes to catch all recursive RCU delayed work.
+	 * than 3 passes to catch all _recursive_ RCU delayed work.
 	 *
 	 */
-	int pass = 2;
+	int pass = 3;
 	do {
 		rcu_barrier();
 		i915_gem_drain_freed_objects(i915);
-		drain_workqueue(i915->wq);
 	} while (--pass);
+	drain_workqueue(i915->wq);
 }
 
 struct i915_vma * __must_check
@@ -2922,160 +2523,9 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
 			 u64 flags);
 
 int i915_gem_object_unbind(struct drm_i915_gem_object *obj);
-void i915_gem_release_mmap(struct drm_i915_gem_object *obj);
 
 void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv);
 
-static inline int __sg_page_count(const struct scatterlist *sg)
-{
-	return sg->length >> PAGE_SHIFT;
-}
-
-struct scatterlist *
-i915_gem_object_get_sg(struct drm_i915_gem_object *obj,
-		       unsigned int n, unsigned int *offset);
-
-struct page *
-i915_gem_object_get_page(struct drm_i915_gem_object *obj,
-			 unsigned int n);
-
-struct page *
-i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj,
-			       unsigned int n);
-
-dma_addr_t
-i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj,
-				unsigned long n);
-
-void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
-				 struct sg_table *pages,
-				 unsigned int sg_page_sizes);
-int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj);
-
-static inline int __must_check
-i915_gem_object_pin_pages(struct drm_i915_gem_object *obj)
-{
-	might_lock(&obj->mm.lock);
-
-	if (atomic_inc_not_zero(&obj->mm.pages_pin_count))
-		return 0;
-
-	return __i915_gem_object_get_pages(obj);
-}
-
-static inline bool
-i915_gem_object_has_pages(struct drm_i915_gem_object *obj)
-{
-	return !IS_ERR_OR_NULL(READ_ONCE(obj->mm.pages));
-}
-
-static inline void
-__i915_gem_object_pin_pages(struct drm_i915_gem_object *obj)
-{
-	GEM_BUG_ON(!i915_gem_object_has_pages(obj));
-
-	atomic_inc(&obj->mm.pages_pin_count);
-}
-
-static inline bool
-i915_gem_object_has_pinned_pages(struct drm_i915_gem_object *obj)
-{
-	return atomic_read(&obj->mm.pages_pin_count);
-}
-
-static inline void
-__i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj)
-{
-	GEM_BUG_ON(!i915_gem_object_has_pages(obj));
-	GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
-
-	atomic_dec(&obj->mm.pages_pin_count);
-}
-
-static inline void
-i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj)
-{
-	__i915_gem_object_unpin_pages(obj);
-}
-
-enum i915_mm_subclass { /* lockdep subclass for obj->mm.lock/struct_mutex */
-	I915_MM_NORMAL = 0,
-	I915_MM_SHRINKER /* called "recursively" from direct-reclaim-esque */
-};
-
-int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
-				enum i915_mm_subclass subclass);
-void __i915_gem_object_invalidate(struct drm_i915_gem_object *obj);
-
-enum i915_map_type {
-	I915_MAP_WB = 0,
-	I915_MAP_WC,
-#define I915_MAP_OVERRIDE BIT(31)
-	I915_MAP_FORCE_WB = I915_MAP_WB | I915_MAP_OVERRIDE,
-	I915_MAP_FORCE_WC = I915_MAP_WC | I915_MAP_OVERRIDE,
-};
-
-static inline enum i915_map_type
-i915_coherent_map_type(struct drm_i915_private *i915)
-{
-	return HAS_LLC(i915) ? I915_MAP_WB : I915_MAP_WC;
-}
-
-/**
- * i915_gem_object_pin_map - return a contiguous mapping of the entire object
- * @obj: the object to map into kernel address space
- * @type: the type of mapping, used to select pgprot_t
- *
- * Calls i915_gem_object_pin_pages() to prevent reaping of the object's
- * pages and then returns a contiguous mapping of the backing storage into
- * the kernel address space. Based on the @type of mapping, the PTE will be
- * set to either WriteBack or WriteCombine (via pgprot_t).
- *
- * The caller is responsible for calling i915_gem_object_unpin_map() when the
- * mapping is no longer required.
- *
- * Returns the pointer through which to access the mapped object, or an
- * ERR_PTR() on error.
- */
-void *__must_check i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
-					   enum i915_map_type type);
-
-void __i915_gem_object_flush_map(struct drm_i915_gem_object *obj,
-				 unsigned long offset,
-				 unsigned long size);
-static inline void i915_gem_object_flush_map(struct drm_i915_gem_object *obj)
-{
-	__i915_gem_object_flush_map(obj, 0, obj->base.size);
-}
-
-/**
- * i915_gem_object_unpin_map - releases an earlier mapping
- * @obj: the object to unmap
- *
- * After pinning the object and mapping its pages, once you are finished
- * with your access, call i915_gem_object_unpin_map() to release the pin
- * upon the mapping. Once the pin count reaches zero, that mapping may be
- * removed.
- */
-static inline void i915_gem_object_unpin_map(struct drm_i915_gem_object *obj)
-{
-	i915_gem_object_unpin_pages(obj);
-}
-
-int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
-				    unsigned int *needs_clflush);
-int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
-				     unsigned int *needs_clflush);
-#define CLFLUSH_BEFORE	BIT(0)
-#define CLFLUSH_AFTER	BIT(1)
-#define CLFLUSH_FLAGS	(CLFLUSH_BEFORE | CLFLUSH_AFTER)
-
-static inline void
-i915_gem_obj_finish_shmem_access(struct drm_i915_gem_object *obj)
-{
-	i915_gem_object_unpin_pages(obj);
-}
-
 static inline int __must_check
 i915_mutex_lock_interruptible(struct drm_device *dev)
 {
@@ -3123,36 +2573,15 @@ void i915_gem_init_mmio(struct drm_i915_private *i915);
 int __must_check i915_gem_init(struct drm_i915_private *dev_priv);
 int __must_check i915_gem_init_hw(struct drm_i915_private *dev_priv);
 void i915_gem_init_swizzling(struct drm_i915_private *dev_priv);
+void i915_gem_fini_hw(struct drm_i915_private *dev_priv);
 void i915_gem_fini(struct drm_i915_private *dev_priv);
-void i915_gem_cleanup_engines(struct drm_i915_private *dev_priv);
 int i915_gem_wait_for_idle(struct drm_i915_private *dev_priv,
 			   unsigned int flags, long timeout);
 void i915_gem_suspend(struct drm_i915_private *dev_priv);
 void i915_gem_suspend_late(struct drm_i915_private *dev_priv);
 void i915_gem_resume(struct drm_i915_private *dev_priv);
 vm_fault_t i915_gem_fault(struct vm_fault *vmf);
-int i915_gem_object_wait(struct drm_i915_gem_object *obj,
-			 unsigned int flags,
-			 long timeout);
-int i915_gem_object_wait_priority(struct drm_i915_gem_object *obj,
-				  unsigned int flags,
-				  const struct i915_sched_attr *attr);
-#define I915_PRIORITY_DISPLAY I915_USER_PRIORITY(I915_PRIORITY_MAX)
-
-int __must_check
-i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write);
-int __must_check
-i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write);
-int __must_check
-i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write);
-struct i915_vma * __must_check
-i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
-				     u32 alignment,
-				     const struct i915_ggtt_view *view,
-				     unsigned int flags);
-void i915_gem_object_unpin_from_display_plane(struct i915_vma *vma);
-int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj,
-				int align);
+
 int i915_gem_open(struct drm_i915_private *i915, struct drm_file *file);
 void i915_gem_release(struct drm_device *dev, struct drm_file *file);
 
@@ -3165,25 +2594,6 @@ struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev,
 struct dma_buf *i915_gem_prime_export(struct drm_device *dev,
 				struct drm_gem_object *gem_obj, int flags);
 
-static inline struct i915_hw_ppgtt *
-i915_vm_to_ppgtt(struct i915_address_space *vm)
-{
-	return container_of(vm, struct i915_hw_ppgtt, vm);
-}
-
-/* i915_gem_fence_reg.c */
-struct drm_i915_fence_reg *
-i915_reserve_fence(struct drm_i915_private *dev_priv);
-void i915_unreserve_fence(struct drm_i915_fence_reg *fence);
-
-void i915_gem_restore_fences(struct drm_i915_private *dev_priv);
-
-void i915_gem_detect_bit_6_swizzle(struct drm_i915_private *dev_priv);
-void i915_gem_object_do_bit_17_swizzle(struct drm_i915_gem_object *obj,
-				       struct sg_table *pages);
-void i915_gem_object_save_bit_17_swizzle(struct drm_i915_gem_object *obj,
-					 struct sg_table *pages);
-
 static inline struct i915_gem_context *
 __i915_gem_context_lookup_rcu(struct drm_i915_file_private *file_priv, u32 id)
 {
@@ -3266,11 +2676,12 @@ unsigned long i915_gem_shrink(struct drm_i915_private *i915,
 			      unsigned long target,
 			      unsigned long *nr_scanned,
 			      unsigned flags);
-#define I915_SHRINK_PURGEABLE 0x1
-#define I915_SHRINK_UNBOUND 0x2
-#define I915_SHRINK_BOUND 0x4
-#define I915_SHRINK_ACTIVE 0x8
-#define I915_SHRINK_VMAPS 0x10
+#define I915_SHRINK_UNBOUND	BIT(0)
+#define I915_SHRINK_BOUND	BIT(1)
+#define I915_SHRINK_ACTIVE	BIT(2)
+#define I915_SHRINK_VMAPS	BIT(3)
+#define I915_SHRINK_WRITEBACK	BIT(4)
+
 unsigned long i915_gem_shrink_all(struct drm_i915_private *i915);
 void i915_gem_shrinker_register(struct drm_i915_private *i915);
 void i915_gem_shrinker_unregister(struct drm_i915_private *i915);
@@ -3291,18 +2702,6 @@ u32 i915_gem_fence_size(struct drm_i915_private *dev_priv, u32 size,
 u32 i915_gem_fence_alignment(struct drm_i915_private *dev_priv, u32 size,
 			     unsigned int tiling, unsigned int stride);
 
-/* i915_debugfs.c */
-#ifdef CONFIG_DEBUG_FS
-int i915_debugfs_register(struct drm_i915_private *dev_priv);
-int i915_debugfs_connector_add(struct drm_connector *connector);
-void intel_display_crc_init(struct drm_i915_private *dev_priv);
-#else
-static inline int i915_debugfs_register(struct drm_i915_private *dev_priv) {return 0;}
-static inline int i915_debugfs_connector_add(struct drm_connector *connector)
-{ return 0; }
-static inline void intel_display_crc_init(struct drm_i915_private *dev_priv) {}
-#endif
-
 const char *i915_cache_level_str(struct drm_i915_private *i915, int type);
 
 /* i915_cmd_parser.c */
@@ -3330,56 +2729,6 @@ extern int i915_restore_state(struct drm_i915_private *dev_priv);
 void i915_setup_sysfs(struct drm_i915_private *dev_priv);
 void i915_teardown_sysfs(struct drm_i915_private *dev_priv);
 
-/* intel_lpe_audio.c */
-int  intel_lpe_audio_init(struct drm_i915_private *dev_priv);
-void intel_lpe_audio_teardown(struct drm_i915_private *dev_priv);
-void intel_lpe_audio_irq_handler(struct drm_i915_private *dev_priv);
-void intel_lpe_audio_notify(struct drm_i915_private *dev_priv,
-			    enum pipe pipe, enum port port,
-			    const void *eld, int ls_clock, bool dp_output);
-
-/* intel_i2c.c */
-extern int intel_setup_gmbus(struct drm_i915_private *dev_priv);
-extern void intel_teardown_gmbus(struct drm_i915_private *dev_priv);
-extern bool intel_gmbus_is_valid_pin(struct drm_i915_private *dev_priv,
-				     unsigned int pin);
-extern int intel_gmbus_output_aksv(struct i2c_adapter *adapter);
-
-extern struct i2c_adapter *
-intel_gmbus_get_adapter(struct drm_i915_private *dev_priv, unsigned int pin);
-extern void intel_gmbus_set_speed(struct i2c_adapter *adapter, int speed);
-extern void intel_gmbus_force_bit(struct i2c_adapter *adapter, bool force_bit);
-static inline bool intel_gmbus_is_forced_bit(struct i2c_adapter *adapter)
-{
-	return container_of(adapter, struct intel_gmbus, adapter)->force_bit;
-}
-extern void intel_i2c_reset(struct drm_i915_private *dev_priv);
-
-/* intel_bios.c */
-void intel_bios_init(struct drm_i915_private *dev_priv);
-void intel_bios_cleanup(struct drm_i915_private *dev_priv);
-bool intel_bios_is_valid_vbt(const void *buf, size_t size);
-bool intel_bios_is_tv_present(struct drm_i915_private *dev_priv);
-bool intel_bios_is_lvds_present(struct drm_i915_private *dev_priv, u8 *i2c_pin);
-bool intel_bios_is_port_present(struct drm_i915_private *dev_priv, enum port port);
-bool intel_bios_is_port_edp(struct drm_i915_private *dev_priv, enum port port);
-bool intel_bios_is_port_dp_dual_mode(struct drm_i915_private *dev_priv, enum port port);
-bool intel_bios_is_dsi_present(struct drm_i915_private *dev_priv, enum port *port);
-bool intel_bios_is_port_hpd_inverted(struct drm_i915_private *dev_priv,
-				     enum port port);
-bool intel_bios_is_lspcon_present(struct drm_i915_private *dev_priv,
-				enum port port);
-enum aux_ch intel_bios_port_aux_ch(struct drm_i915_private *dev_priv, enum port port);
-
-/* intel_acpi.c */
-#ifdef CONFIG_ACPI
-extern void intel_register_dsm_handler(void);
-extern void intel_unregister_dsm_handler(void);
-#else
-static inline void intel_register_dsm_handler(void) { return; }
-static inline void intel_unregister_dsm_handler(void) { return; }
-#endif /* CONFIG_ACPI */
-
 /* intel_device_info.c */
 static inline struct intel_device_info *
 mkwrite_device_info(struct drm_i915_private *dev_priv)
@@ -3387,20 +2736,6 @@ mkwrite_device_info(struct drm_i915_private *dev_priv)
 	return (struct intel_device_info *)INTEL_INFO(dev_priv);
 }
 
-static inline struct intel_sseu
-intel_device_default_sseu(struct drm_i915_private *i915)
-{
-	const struct sseu_dev_info *sseu = &RUNTIME_INFO(i915)->sseu;
-	struct intel_sseu value = {
-		.slice_mask = sseu->slice_mask,
-		.subslice_mask = sseu->subslice_mask[0],
-		.min_eus_per_subslice = sseu->max_eus_per_subslice,
-		.max_eus_per_subslice = sseu->max_eus_per_subslice,
-	};
-
-	return value;
-}
-
 /* modesetting */
 extern void intel_modeset_init_hw(struct drm_device *dev);
 extern int intel_modeset_init(struct drm_device *dev);
@@ -3410,158 +2745,23 @@ extern int intel_modeset_vga_set_state(struct drm_i915_private *dev_priv,
 extern void intel_display_resume(struct drm_device *dev);
 extern void i915_redisable_vga(struct drm_i915_private *dev_priv);
 extern void i915_redisable_vga_power_on(struct drm_i915_private *dev_priv);
-extern bool ironlake_set_drps(struct drm_i915_private *dev_priv, u8 val);
 extern void intel_init_pch_refclk(struct drm_i915_private *dev_priv);
-extern int intel_set_rps(struct drm_i915_private *dev_priv, u8 val);
-extern void intel_rps_mark_interactive(struct drm_i915_private *i915,
-				       bool interactive);
-extern bool intel_set_memory_cxsr(struct drm_i915_private *dev_priv,
-				  bool enable);
-void intel_dsc_enable(struct intel_encoder *encoder,
-		      const struct intel_crtc_state *crtc_state);
-void intel_dsc_disable(const struct intel_crtc_state *crtc_state);
 
 int i915_reg_read_ioctl(struct drm_device *dev, void *data,
 			struct drm_file *file);
 
-/* overlay */
-extern struct intel_overlay_error_state *
-intel_overlay_capture_error_state(struct drm_i915_private *dev_priv);
-extern void intel_overlay_print_error_state(struct drm_i915_error_state_buf *e,
-					    struct intel_overlay_error_state *error);
-
 extern struct intel_display_error_state *
 intel_display_capture_error_state(struct drm_i915_private *dev_priv);
 extern void intel_display_print_error_state(struct drm_i915_error_state_buf *e,
 					    struct intel_display_error_state *error);
 
-int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox, u32 *val);
-int sandybridge_pcode_write_timeout(struct drm_i915_private *dev_priv, u32 mbox,
-				    u32 val, int fast_timeout_us,
-				    int slow_timeout_ms);
-#define sandybridge_pcode_write(dev_priv, mbox, val)	\
-	sandybridge_pcode_write_timeout(dev_priv, mbox, val, 500, 0)
-
-int skl_pcode_request(struct drm_i915_private *dev_priv, u32 mbox, u32 request,
-		      u32 reply_mask, u32 reply, int timeout_base_ms);
-
-/* intel_sideband.c */
-u32 vlv_punit_read(struct drm_i915_private *dev_priv, u32 addr);
-int vlv_punit_write(struct drm_i915_private *dev_priv, u32 addr, u32 val);
-u32 vlv_nc_read(struct drm_i915_private *dev_priv, u8 addr);
-u32 vlv_iosf_sb_read(struct drm_i915_private *dev_priv, u8 port, u32 reg);
-void vlv_iosf_sb_write(struct drm_i915_private *dev_priv, u8 port, u32 reg, u32 val);
-u32 vlv_cck_read(struct drm_i915_private *dev_priv, u32 reg);
-void vlv_cck_write(struct drm_i915_private *dev_priv, u32 reg, u32 val);
-u32 vlv_ccu_read(struct drm_i915_private *dev_priv, u32 reg);
-void vlv_ccu_write(struct drm_i915_private *dev_priv, u32 reg, u32 val);
-u32 vlv_bunit_read(struct drm_i915_private *dev_priv, u32 reg);
-void vlv_bunit_write(struct drm_i915_private *dev_priv, u32 reg, u32 val);
-u32 vlv_dpio_read(struct drm_i915_private *dev_priv, enum pipe pipe, int reg);
-void vlv_dpio_write(struct drm_i915_private *dev_priv, enum pipe pipe, int reg, u32 val);
-u32 intel_sbi_read(struct drm_i915_private *dev_priv, u16 reg,
-		   enum intel_sbi_destination destination);
-void intel_sbi_write(struct drm_i915_private *dev_priv, u16 reg, u32 value,
-		     enum intel_sbi_destination destination);
-u32 vlv_flisdsi_read(struct drm_i915_private *dev_priv, u32 reg);
-void vlv_flisdsi_write(struct drm_i915_private *dev_priv, u32 reg, u32 val);
-
-/* intel_dpio_phy.c */
-void bxt_port_to_phy_channel(struct drm_i915_private *dev_priv, enum port port,
-			     enum dpio_phy *phy, enum dpio_channel *ch);
-void bxt_ddi_phy_set_signal_level(struct drm_i915_private *dev_priv,
-				  enum port port, u32 margin, u32 scale,
-				  u32 enable, u32 deemphasis);
-void bxt_ddi_phy_init(struct drm_i915_private *dev_priv, enum dpio_phy phy);
-void bxt_ddi_phy_uninit(struct drm_i915_private *dev_priv, enum dpio_phy phy);
-bool bxt_ddi_phy_is_enabled(struct drm_i915_private *dev_priv,
-			    enum dpio_phy phy);
-bool bxt_ddi_phy_verify_state(struct drm_i915_private *dev_priv,
-			      enum dpio_phy phy);
-u8 bxt_ddi_phy_calc_lane_lat_optim_mask(u8 lane_count);
-void bxt_ddi_phy_set_lane_optim_mask(struct intel_encoder *encoder,
-				     u8 lane_lat_optim_mask);
-u8 bxt_ddi_phy_get_lane_lat_optim_mask(struct intel_encoder *encoder);
-
-void chv_set_phy_signal_level(struct intel_encoder *encoder,
-			      u32 deemph_reg_value, u32 margin_reg_value,
-			      bool uniq_trans_scale);
-void chv_data_lane_soft_reset(struct intel_encoder *encoder,
-			      const struct intel_crtc_state *crtc_state,
-			      bool reset);
-void chv_phy_pre_pll_enable(struct intel_encoder *encoder,
-			    const struct intel_crtc_state *crtc_state);
-void chv_phy_pre_encoder_enable(struct intel_encoder *encoder,
-				const struct intel_crtc_state *crtc_state);
-void chv_phy_release_cl2_override(struct intel_encoder *encoder);
-void chv_phy_post_pll_disable(struct intel_encoder *encoder,
-			      const struct intel_crtc_state *old_crtc_state);
-
-void vlv_set_phy_signal_level(struct intel_encoder *encoder,
-			      u32 demph_reg_value, u32 preemph_reg_value,
-			      u32 uniqtranscale_reg_value, u32 tx3_demph);
-void vlv_phy_pre_pll_enable(struct intel_encoder *encoder,
-			    const struct intel_crtc_state *crtc_state);
-void vlv_phy_pre_encoder_enable(struct intel_encoder *encoder,
-				const struct intel_crtc_state *crtc_state);
-void vlv_phy_reset_lanes(struct intel_encoder *encoder,
-			 const struct intel_crtc_state *old_crtc_state);
-
-/* intel_combo_phy.c */
-void icl_combo_phys_init(struct drm_i915_private *dev_priv);
-void icl_combo_phys_uninit(struct drm_i915_private *dev_priv);
-void cnl_combo_phys_init(struct drm_i915_private *dev_priv);
-void cnl_combo_phys_uninit(struct drm_i915_private *dev_priv);
-
-int intel_gpu_freq(struct drm_i915_private *dev_priv, int val);
-int intel_freq_opcode(struct drm_i915_private *dev_priv, int val);
-u64 intel_rc6_residency_ns(struct drm_i915_private *dev_priv,
-			   const i915_reg_t reg);
-
-u32 intel_get_cagf(struct drm_i915_private *dev_priv, u32 rpstat1);
-
-static inline u64 intel_rc6_residency_us(struct drm_i915_private *dev_priv,
-					 const i915_reg_t reg)
-{
-	return DIV_ROUND_UP_ULL(intel_rc6_residency_ns(dev_priv, reg), 1000);
-}
-
 #define __I915_REG_OP(op__, dev_priv__, ...) \
 	intel_uncore_##op__(&(dev_priv__)->uncore, __VA_ARGS__)
 
-#define I915_READ8(reg__)	  __I915_REG_OP(read8, dev_priv, (reg__))
-#define I915_WRITE8(reg__, val__) __I915_REG_OP(write8, dev_priv, (reg__), (val__))
-
-#define I915_READ16(reg__)	   __I915_REG_OP(read16, dev_priv, (reg__))
-#define I915_WRITE16(reg__, val__) __I915_REG_OP(write16, dev_priv, (reg__), (val__))
-#define I915_READ16_NOTRACE(reg__)	   __I915_REG_OP(read16_notrace, dev_priv, (reg__))
-#define I915_WRITE16_NOTRACE(reg__, val__) __I915_REG_OP(write16_notrace, dev_priv, (reg__), (val__))
-
 #define I915_READ(reg__)	 __I915_REG_OP(read, dev_priv, (reg__))
 #define I915_WRITE(reg__, val__) __I915_REG_OP(write, dev_priv, (reg__), (val__))
-#define I915_READ_NOTRACE(reg__)	 __I915_REG_OP(read_notrace, dev_priv, (reg__))
-#define I915_WRITE_NOTRACE(reg__, val__) __I915_REG_OP(write_notrace, dev_priv, (reg__), (val__))
-
-/* Be very careful with read/write 64-bit values. On 32-bit machines, they
- * will be implemented using 2 32-bit writes in an arbitrary order with
- * an arbitrary delay between them. This can cause the hardware to
- * act upon the intermediate value, possibly leading to corruption and
- * machine death. For this reason we do not support I915_WRITE64, or
- * dev_priv->uncore.funcs.mmio_writeq.
- *
- * When reading a 64-bit value as two 32-bit values, the delay may cause
- * the two reads to mismatch, e.g. a timestamp overflowing. Also note that
- * occasionally a 64-bit register does not actualy support a full readq
- * and must be read using two 32-bit reads.
- *
- * You have been warned.
- */
-#define I915_READ64(reg__)	__I915_REG_OP(read64, dev_priv, (reg__))
-#define I915_READ64_2x32(lower_reg__, upper_reg__) \
-	__I915_REG_OP(read64_2x32, dev_priv, (lower_reg__), (upper_reg__))
 
 #define POSTING_READ(reg__)	__I915_REG_OP(posting_read, dev_priv, (reg__))
-#define POSTING_READ16(reg__)	__I915_REG_OP(posting_read16, dev_priv, (reg__))
 
 /* These are untraced mmio-accessors that are only valid to be used inside
  * critical sections, such as inside IRQ handlers, where forcewake is explicitly
@@ -3591,68 +2791,12 @@ static inline u64 intel_rc6_residency_us(struct drm_i915_private *dev_priv,
  */
 #define I915_READ_FW(reg__) __I915_REG_OP(read_fw, dev_priv, (reg__))
 #define I915_WRITE_FW(reg__, val__) __I915_REG_OP(write_fw, dev_priv, (reg__), (val__))
-#define I915_WRITE64_FW(reg__, val__) __I915_REG_OP(write64_fw, dev_priv, (reg__), (val__))
-#define POSTING_READ_FW(reg__) __I915_REG_OP(posting_read_fw, dev_priv, (reg__))
 
 /* "Broadcast RGB" property */
 #define INTEL_BROADCAST_RGB_AUTO 0
 #define INTEL_BROADCAST_RGB_FULL 1
 #define INTEL_BROADCAST_RGB_LIMITED 2
 
-static inline i915_reg_t i915_vgacntrl_reg(struct drm_i915_private *dev_priv)
-{
-	if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
-		return VLV_VGACNTRL;
-	else if (INTEL_GEN(dev_priv) >= 5)
-		return CPU_VGACNTRL;
-	else
-		return VGACNTRL;
-}
-
-static inline unsigned long msecs_to_jiffies_timeout(const unsigned int m)
-{
-	unsigned long j = msecs_to_jiffies(m);
-
-	return min_t(unsigned long, MAX_JIFFY_OFFSET, j + 1);
-}
-
-static inline unsigned long nsecs_to_jiffies_timeout(const u64 n)
-{
-	/* nsecs_to_jiffies64() does not guard against overflow */
-	if (NSEC_PER_SEC % HZ &&
-	    div_u64(n, NSEC_PER_SEC) >= MAX_JIFFY_OFFSET / HZ)
-		return MAX_JIFFY_OFFSET;
-
-        return min_t(u64, MAX_JIFFY_OFFSET, nsecs_to_jiffies64(n) + 1);
-}
-
-/*
- * If you need to wait X milliseconds between events A and B, but event B
- * doesn't happen exactly after event A, you record the timestamp (jiffies) of
- * when event A happened, then just before event B you call this function and
- * pass the timestamp as the first argument, and X as the second argument.
- */
-static inline void
-wait_remaining_ms_from_jiffies(unsigned long timestamp_jiffies, int to_wait_ms)
-{
-	unsigned long target_jiffies, tmp_jiffies, remaining_jiffies;
-
-	/*
-	 * Don't re-read the value of "jiffies" every time since it may change
-	 * behind our back and break the math.
-	 */
-	tmp_jiffies = jiffies;
-	target_jiffies = timestamp_jiffies +
-			 msecs_to_jiffies_timeout(to_wait_ms);
-
-	if (time_after(target_jiffies, tmp_jiffies)) {
-		remaining_jiffies = target_jiffies - tmp_jiffies;
-		while (remaining_jiffies)
-			remaining_jiffies =
-			    schedule_timeout_uninterruptible(remaining_jiffies);
-	}
-}
-
 void i915_memcpy_init_early(struct drm_i915_private *dev_priv);
 bool i915_memcpy_from_wc(void *dst, const void *src, unsigned long len);
 
@@ -3690,4 +2834,21 @@ static inline u32 i915_scratch_offset(const struct drm_i915_private *i915)
 	return i915_ggtt_offset(i915->gt.scratch);
 }
 
+static inline enum i915_map_type
+i915_coherent_map_type(struct drm_i915_private *i915)
+{
+	return HAS_LLC(i915) ? I915_MAP_WB : I915_MAP_WC;
+}
+
+static inline void add_taint_for_CI(unsigned int taint)
+{
+	/*
+	 * The system is "ok", just about surviving for the user, but
+	 * CI results are now unreliable as the HW is very suspect.
+	 * CI checks the taint state after every test and will reboot
+	 * the machine if the kernel is tainted.
+	 */
+	add_taint(taint, LOCKDEP_STILL_OK);
+}
+
 #endif
diff --git a/drivers/gpu/drm/i915/i915_fixed.h b/drivers/gpu/drm/i915/i915_fixed.h
index 591dd89ba7af..6621595fe74c 100644
--- a/drivers/gpu/drm/i915/i915_fixed.h
+++ b/drivers/gpu/drm/i915/i915_fixed.h
@@ -71,7 +71,7 @@ static inline u32 mul_round_up_u32_fixed16(u32 val, uint_fixed_16_16_t mul)
 {
 	u64 tmp;
 
-	tmp = (u64)val * mul.val;
+	tmp = mul_u32_u32(val, mul.val);
 	tmp = DIV_ROUND_UP_ULL(tmp, 1 << 16);
 	WARN_ON(tmp > U32_MAX);
 
@@ -83,7 +83,7 @@ static inline uint_fixed_16_16_t mul_fixed16(uint_fixed_16_16_t val,
 {
 	u64 tmp;
 
-	tmp = (u64)val.val * mul.val;
+	tmp = mul_u32_u32(val.val, mul.val);
 	tmp = tmp >> 16;
 
 	return clamp_u64_to_fixed16(tmp);
@@ -114,7 +114,7 @@ static inline uint_fixed_16_16_t mul_u32_fixed16(u32 val, uint_fixed_16_16_t mul
 {
 	u64 tmp;
 
-	tmp = (u64)val * mul.val;
+	tmp = mul_u32_u32(val, mul.val);
 
 	return clamp_u64_to_fixed16(tmp);
 }
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index ad01c92aaf74..190ad54fb072 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -26,7 +26,6 @@
  */
 
 #include <drm/drm_vma_manager.h>
-#include <drm/drm_pci.h>
 #include <drm/i915_drm.h>
 #include <linux/dma-fence-array.h>
 #include <linux/kthread.h>
@@ -39,32 +38,27 @@
 #include <linux/dma-buf.h>
 #include <linux/mman.h>
 
+#include "display/intel_display.h"
+#include "display/intel_frontbuffer.h"
+
+#include "gem/i915_gem_clflush.h"
+#include "gem/i915_gem_context.h"
+#include "gem/i915_gem_ioctls.h"
+#include "gem/i915_gem_pm.h"
+#include "gem/i915_gemfs.h"
+#include "gt/intel_engine_pm.h"
+#include "gt/intel_gt_pm.h"
+#include "gt/intel_mocs.h"
+#include "gt/intel_reset.h"
+#include "gt/intel_workarounds.h"
+
 #include "i915_drv.h"
-#include "i915_gem_clflush.h"
-#include "i915_gemfs.h"
-#include "i915_globals.h"
-#include "i915_reset.h"
+#include "i915_scatterlist.h"
 #include "i915_trace.h"
 #include "i915_vgpu.h"
 
 #include "intel_drv.h"
-#include "intel_frontbuffer.h"
-#include "intel_mocs.h"
 #include "intel_pm.h"
-#include "intel_workarounds.h"
-
-static void i915_gem_flush_free_objects(struct drm_i915_private *i915);
-
-static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
-{
-	if (obj->cache_dirty)
-		return false;
-
-	if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE))
-		return true;
-
-	return obj->pin_global; /* currently in use by HW, keep flushed */
-}
 
 static int
 insert_mappable_node(struct i915_ggtt *ggtt,
@@ -83,124 +77,6 @@ remove_mappable_node(struct drm_mm_node *node)
 	drm_mm_remove_node(node);
 }
 
-/* some bookkeeping */
-static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv,
-				  u64 size)
-{
-	spin_lock(&dev_priv->mm.object_stat_lock);
-	dev_priv->mm.object_count++;
-	dev_priv->mm.object_memory += size;
-	spin_unlock(&dev_priv->mm.object_stat_lock);
-}
-
-static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv,
-				     u64 size)
-{
-	spin_lock(&dev_priv->mm.object_stat_lock);
-	dev_priv->mm.object_count--;
-	dev_priv->mm.object_memory -= size;
-	spin_unlock(&dev_priv->mm.object_stat_lock);
-}
-
-static void __i915_gem_park(struct drm_i915_private *i915)
-{
-	intel_wakeref_t wakeref;
-
-	GEM_TRACE("\n");
-
-	lockdep_assert_held(&i915->drm.struct_mutex);
-	GEM_BUG_ON(i915->gt.active_requests);
-	GEM_BUG_ON(!list_empty(&i915->gt.active_rings));
-
-	if (!i915->gt.awake)
-		return;
-
-	/*
-	 * Be paranoid and flush a concurrent interrupt to make sure
-	 * we don't reactivate any irq tasklets after parking.
-	 *
-	 * FIXME: Note that even though we have waited for execlists to be idle,
-	 * there may still be an in-flight interrupt even though the CSB
-	 * is now empty. synchronize_irq() makes sure that a residual interrupt
-	 * is completed before we continue, but it doesn't prevent the HW from
-	 * raising a spurious interrupt later. To complete the shield we should
-	 * coordinate disabling the CS irq with flushing the interrupts.
-	 */
-	synchronize_irq(i915->drm.irq);
-
-	intel_engines_park(i915);
-	i915_timelines_park(i915);
-
-	i915_pmu_gt_parked(i915);
-	i915_vma_parked(i915);
-
-	wakeref = fetch_and_zero(&i915->gt.awake);
-	GEM_BUG_ON(!wakeref);
-
-	if (INTEL_GEN(i915) >= 6)
-		gen6_rps_idle(i915);
-
-	intel_display_power_put(i915, POWER_DOMAIN_GT_IRQ, wakeref);
-
-	i915_globals_park();
-}
-
-void i915_gem_park(struct drm_i915_private *i915)
-{
-	GEM_TRACE("\n");
-
-	lockdep_assert_held(&i915->drm.struct_mutex);
-	GEM_BUG_ON(i915->gt.active_requests);
-
-	if (!i915->gt.awake)
-		return;
-
-	/* Defer the actual call to __i915_gem_park() to prevent ping-pongs */
-	mod_delayed_work(i915->wq, &i915->gt.idle_work, msecs_to_jiffies(100));
-}
-
-void i915_gem_unpark(struct drm_i915_private *i915)
-{
-	GEM_TRACE("\n");
-
-	lockdep_assert_held(&i915->drm.struct_mutex);
-	GEM_BUG_ON(!i915->gt.active_requests);
-	assert_rpm_wakelock_held(i915);
-
-	if (i915->gt.awake)
-		return;
-
-	/*
-	 * It seems that the DMC likes to transition between the DC states a lot
-	 * when there are no connected displays (no active power domains) during
-	 * command submission.
-	 *
-	 * This activity has negative impact on the performance of the chip with
-	 * huge latencies observed in the interrupt handler and elsewhere.
-	 *
-	 * Work around it by grabbing a GT IRQ power domain whilst there is any
-	 * GT activity, preventing any DC state transitions.
-	 */
-	i915->gt.awake = intel_display_power_get(i915, POWER_DOMAIN_GT_IRQ);
-	GEM_BUG_ON(!i915->gt.awake);
-
-	i915_globals_unpark();
-
-	intel_enable_gt_powersave(i915);
-	i915_update_gfx_val(i915);
-	if (INTEL_GEN(i915) >= 6)
-		gen6_rps_busy(i915);
-	i915_pmu_gt_unparked(i915);
-
-	intel_engines_unpark(i915);
-
-	i915_queue_hangcheck(i915);
-
-	queue_delayed_work(i915->wq,
-			   &i915->gt.retire_work,
-			   round_jiffies_up_relative(HZ));
-}
-
 int
 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
 			    struct drm_file *file)
@@ -225,178 +101,14 @@ i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
 	return 0;
 }
 
-static int i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj)
-{
-	struct address_space *mapping = obj->base.filp->f_mapping;
-	drm_dma_handle_t *phys;
-	struct sg_table *st;
-	struct scatterlist *sg;
-	char *vaddr;
-	int i;
-	int err;
-
-	if (WARN_ON(i915_gem_object_needs_bit17_swizzle(obj)))
-		return -EINVAL;
-
-	/* Always aligning to the object size, allows a single allocation
-	 * to handle all possible callers, and given typical object sizes,
-	 * the alignment of the buddy allocation will naturally match.
-	 */
-	phys = drm_pci_alloc(obj->base.dev,
-			     roundup_pow_of_two(obj->base.size),
-			     roundup_pow_of_two(obj->base.size));
-	if (!phys)
-		return -ENOMEM;
-
-	vaddr = phys->vaddr;
-	for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
-		struct page *page;
-		char *src;
-
-		page = shmem_read_mapping_page(mapping, i);
-		if (IS_ERR(page)) {
-			err = PTR_ERR(page);
-			goto err_phys;
-		}
-
-		src = kmap_atomic(page);
-		memcpy(vaddr, src, PAGE_SIZE);
-		drm_clflush_virt_range(vaddr, PAGE_SIZE);
-		kunmap_atomic(src);
-
-		put_page(page);
-		vaddr += PAGE_SIZE;
-	}
-
-	i915_gem_chipset_flush(to_i915(obj->base.dev));
-
-	st = kmalloc(sizeof(*st), GFP_KERNEL);
-	if (!st) {
-		err = -ENOMEM;
-		goto err_phys;
-	}
-
-	if (sg_alloc_table(st, 1, GFP_KERNEL)) {
-		kfree(st);
-		err = -ENOMEM;
-		goto err_phys;
-	}
-
-	sg = st->sgl;
-	sg->offset = 0;
-	sg->length = obj->base.size;
-
-	sg_dma_address(sg) = phys->busaddr;
-	sg_dma_len(sg) = obj->base.size;
-
-	obj->phys_handle = phys;
-
-	__i915_gem_object_set_pages(obj, st, sg->length);
-
-	return 0;
-
-err_phys:
-	drm_pci_free(obj->base.dev, phys);
-
-	return err;
-}
-
-static void __start_cpu_write(struct drm_i915_gem_object *obj)
-{
-	obj->read_domains = I915_GEM_DOMAIN_CPU;
-	obj->write_domain = I915_GEM_DOMAIN_CPU;
-	if (cpu_write_needs_clflush(obj))
-		obj->cache_dirty = true;
-}
-
-void
-__i915_gem_object_release_shmem(struct drm_i915_gem_object *obj,
-				struct sg_table *pages,
-				bool needs_clflush)
-{
-	GEM_BUG_ON(obj->mm.madv == __I915_MADV_PURGED);
-
-	if (obj->mm.madv == I915_MADV_DONTNEED)
-		obj->mm.dirty = false;
-
-	if (needs_clflush &&
-	    (obj->read_domains & I915_GEM_DOMAIN_CPU) == 0 &&
-	    !(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ))
-		drm_clflush_sg(pages);
-
-	__start_cpu_write(obj);
-}
-
-static void
-i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj,
-			       struct sg_table *pages)
-{
-	__i915_gem_object_release_shmem(obj, pages, false);
-
-	if (obj->mm.dirty) {
-		struct address_space *mapping = obj->base.filp->f_mapping;
-		char *vaddr = obj->phys_handle->vaddr;
-		int i;
-
-		for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
-			struct page *page;
-			char *dst;
-
-			page = shmem_read_mapping_page(mapping, i);
-			if (IS_ERR(page))
-				continue;
-
-			dst = kmap_atomic(page);
-			drm_clflush_virt_range(vaddr, PAGE_SIZE);
-			memcpy(dst, vaddr, PAGE_SIZE);
-			kunmap_atomic(dst);
-
-			set_page_dirty(page);
-			if (obj->mm.madv == I915_MADV_WILLNEED)
-				mark_page_accessed(page);
-			put_page(page);
-			vaddr += PAGE_SIZE;
-		}
-		obj->mm.dirty = false;
-	}
-
-	sg_free_table(pages);
-	kfree(pages);
-
-	drm_pci_free(obj->base.dev, obj->phys_handle);
-}
-
-static void
-i915_gem_object_release_phys(struct drm_i915_gem_object *obj)
-{
-	i915_gem_object_unpin_pages(obj);
-}
-
-static const struct drm_i915_gem_object_ops i915_gem_phys_ops = {
-	.get_pages = i915_gem_object_get_pages_phys,
-	.put_pages = i915_gem_object_put_pages_phys,
-	.release = i915_gem_object_release_phys,
-};
-
-static const struct drm_i915_gem_object_ops i915_gem_object_ops;
-
 int i915_gem_object_unbind(struct drm_i915_gem_object *obj)
 {
 	struct i915_vma *vma;
 	LIST_HEAD(still_in_list);
-	int ret;
+	int ret = 0;
 
 	lockdep_assert_held(&obj->base.dev->struct_mutex);
 
-	/* Closed vma are removed from the obj->vma_list - but they may
-	 * still have an active binding on the object. To remove those we
-	 * must wait for all rendering to complete to the object (as unbinding
-	 * must anyway), and retire the requests.
-	 */
-	ret = i915_gem_object_set_to_cpu_domain(obj, false);
-	if (ret)
-		return ret;
-
 	spin_lock(&obj->vma.lock);
 	while (!ret && (vma = list_first_entry_or_null(&obj->vma.list,
 						       struct i915_vma,
@@ -414,190 +126,6 @@ int i915_gem_object_unbind(struct drm_i915_gem_object *obj)
 	return ret;
 }
 
-static long
-i915_gem_object_wait_fence(struct dma_fence *fence,
-			   unsigned int flags,
-			   long timeout)
-{
-	struct i915_request *rq;
-
-	BUILD_BUG_ON(I915_WAIT_INTERRUPTIBLE != 0x1);
-
-	if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
-		return timeout;
-
-	if (!dma_fence_is_i915(fence))
-		return dma_fence_wait_timeout(fence,
-					      flags & I915_WAIT_INTERRUPTIBLE,
-					      timeout);
-
-	rq = to_request(fence);
-	if (i915_request_completed(rq))
-		goto out;
-
-	timeout = i915_request_wait(rq, flags, timeout);
-
-out:
-	if (flags & I915_WAIT_LOCKED && i915_request_completed(rq))
-		i915_request_retire_upto(rq);
-
-	return timeout;
-}
-
-static long
-i915_gem_object_wait_reservation(struct reservation_object *resv,
-				 unsigned int flags,
-				 long timeout)
-{
-	unsigned int seq = __read_seqcount_begin(&resv->seq);
-	struct dma_fence *excl;
-	bool prune_fences = false;
-
-	if (flags & I915_WAIT_ALL) {
-		struct dma_fence **shared;
-		unsigned int count, i;
-		int ret;
-
-		ret = reservation_object_get_fences_rcu(resv,
-							&excl, &count, &shared);
-		if (ret)
-			return ret;
-
-		for (i = 0; i < count; i++) {
-			timeout = i915_gem_object_wait_fence(shared[i],
-							     flags, timeout);
-			if (timeout < 0)
-				break;
-
-			dma_fence_put(shared[i]);
-		}
-
-		for (; i < count; i++)
-			dma_fence_put(shared[i]);
-		kfree(shared);
-
-		/*
-		 * If both shared fences and an exclusive fence exist,
-		 * then by construction the shared fences must be later
-		 * than the exclusive fence. If we successfully wait for
-		 * all the shared fences, we know that the exclusive fence
-		 * must all be signaled. If all the shared fences are
-		 * signaled, we can prune the array and recover the
-		 * floating references on the fences/requests.
-		 */
-		prune_fences = count && timeout >= 0;
-	} else {
-		excl = reservation_object_get_excl_rcu(resv);
-	}
-
-	if (excl && timeout >= 0)
-		timeout = i915_gem_object_wait_fence(excl, flags, timeout);
-
-	dma_fence_put(excl);
-
-	/*
-	 * Opportunistically prune the fences iff we know they have *all* been
-	 * signaled and that the reservation object has not been changed (i.e.
-	 * no new fences have been added).
-	 */
-	if (prune_fences && !__read_seqcount_retry(&resv->seq, seq)) {
-		if (reservation_object_trylock(resv)) {
-			if (!__read_seqcount_retry(&resv->seq, seq))
-				reservation_object_add_excl_fence(resv, NULL);
-			reservation_object_unlock(resv);
-		}
-	}
-
-	return timeout;
-}
-
-static void __fence_set_priority(struct dma_fence *fence,
-				 const struct i915_sched_attr *attr)
-{
-	struct i915_request *rq;
-	struct intel_engine_cs *engine;
-
-	if (dma_fence_is_signaled(fence) || !dma_fence_is_i915(fence))
-		return;
-
-	rq = to_request(fence);
-	engine = rq->engine;
-
-	local_bh_disable();
-	rcu_read_lock(); /* RCU serialisation for set-wedged protection */
-	if (engine->schedule)
-		engine->schedule(rq, attr);
-	rcu_read_unlock();
-	local_bh_enable(); /* kick the tasklets if queues were reprioritised */
-}
-
-static void fence_set_priority(struct dma_fence *fence,
-			       const struct i915_sched_attr *attr)
-{
-	/* Recurse once into a fence-array */
-	if (dma_fence_is_array(fence)) {
-		struct dma_fence_array *array = to_dma_fence_array(fence);
-		int i;
-
-		for (i = 0; i < array->num_fences; i++)
-			__fence_set_priority(array->fences[i], attr);
-	} else {
-		__fence_set_priority(fence, attr);
-	}
-}
-
-int
-i915_gem_object_wait_priority(struct drm_i915_gem_object *obj,
-			      unsigned int flags,
-			      const struct i915_sched_attr *attr)
-{
-	struct dma_fence *excl;
-
-	if (flags & I915_WAIT_ALL) {
-		struct dma_fence **shared;
-		unsigned int count, i;
-		int ret;
-
-		ret = reservation_object_get_fences_rcu(obj->resv,
-							&excl, &count, &shared);
-		if (ret)
-			return ret;
-
-		for (i = 0; i < count; i++) {
-			fence_set_priority(shared[i], attr);
-			dma_fence_put(shared[i]);
-		}
-
-		kfree(shared);
-	} else {
-		excl = reservation_object_get_excl_rcu(obj->resv);
-	}
-
-	if (excl) {
-		fence_set_priority(excl, attr);
-		dma_fence_put(excl);
-	}
-	return 0;
-}
-
-/**
- * Waits for rendering to the object to be completed
- * @obj: i915 gem object
- * @flags: how to wait (under a lock, for all rendering or just for writes etc)
- * @timeout: how long to wait
- */
-int
-i915_gem_object_wait(struct drm_i915_gem_object *obj,
-		     unsigned int flags,
-		     long timeout)
-{
-	might_sleep();
-	GEM_BUG_ON(timeout < 0);
-
-	timeout = i915_gem_object_wait_reservation(obj->resv, flags, timeout);
-	return timeout < 0 ? timeout : 0;
-}
-
 static int
 i915_gem_phys_pwrite(struct drm_i915_gem_object *obj,
 		     struct drm_i915_gem_pwrite *args,
@@ -636,7 +164,7 @@ i915_gem_create(struct drm_file *file,
 		return -EINVAL;
 
 	/* Allocate the new object */
-	obj = i915_gem_object_create(dev_priv, size);
+	obj = i915_gem_object_create_shmem(dev_priv, size);
 	if (IS_ERR(obj))
 		return PTR_ERR(obj);
 
@@ -656,19 +184,36 @@ i915_gem_dumb_create(struct drm_file *file,
 		     struct drm_device *dev,
 		     struct drm_mode_create_dumb *args)
 {
+	int cpp = DIV_ROUND_UP(args->bpp, 8);
+	u32 format;
+
+	switch (cpp) {
+	case 1:
+		format = DRM_FORMAT_C8;
+		break;
+	case 2:
+		format = DRM_FORMAT_RGB565;
+		break;
+	case 4:
+		format = DRM_FORMAT_XRGB8888;
+		break;
+	default:
+		return -EINVAL;
+	}
+
 	/* have to work out size/pitch and return them */
-	args->pitch = ALIGN(args->width * DIV_ROUND_UP(args->bpp, 8), 64);
+	args->pitch = ALIGN(args->width * cpp, 64);
+
+	/* align stride to page size so that we can remap */
+	if (args->pitch > intel_plane_fb_max_stride(to_i915(dev), format,
+						    DRM_FORMAT_MOD_LINEAR))
+		args->pitch = ALIGN(args->pitch, 4096);
+
 	args->size = args->pitch * args->height;
 	return i915_gem_create(file, to_i915(dev),
 			       &args->size, &args->handle);
 }
 
-static bool gpu_write_needs_clflush(struct drm_i915_gem_object *obj)
-{
-	return !(obj->cache_level == I915_CACHE_NONE ||
-		 obj->cache_level == I915_CACHE_WT);
-}
-
 /**
  * Creates a new mm object and returns a handle to it.
  * @dev: drm device pointer
@@ -688,13 +233,6 @@ i915_gem_create_ioctl(struct drm_device *dev, void *data,
 			       &args->size, &args->handle);
 }
 
-static inline enum fb_op_origin
-fb_write_origin(struct drm_i915_gem_object *obj, unsigned int domain)
-{
-	return (domain == I915_GEM_DOMAIN_GTT ?
-		obj->frontbuffer_ggtt_origin : ORIGIN_CPU);
-}
-
 void i915_gem_flush_ggtt_writes(struct drm_i915_private *dev_priv)
 {
 	intel_wakeref_t wakeref;
@@ -725,171 +263,14 @@ void i915_gem_flush_ggtt_writes(struct drm_i915_private *dev_priv)
 
 	i915_gem_chipset_flush(dev_priv);
 
-	with_intel_runtime_pm(dev_priv, wakeref) {
-		spin_lock_irq(&dev_priv->uncore.lock);
+	with_intel_runtime_pm(&dev_priv->runtime_pm, wakeref) {
+		struct intel_uncore *uncore = &dev_priv->uncore;
 
-		POSTING_READ_FW(RING_HEAD(RENDER_RING_BASE));
-
-		spin_unlock_irq(&dev_priv->uncore.lock);
-	}
-}
-
-static void
-flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains)
-{
-	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
-	struct i915_vma *vma;
-
-	if (!(obj->write_domain & flush_domains))
-		return;
-
-	switch (obj->write_domain) {
-	case I915_GEM_DOMAIN_GTT:
-		i915_gem_flush_ggtt_writes(dev_priv);
-
-		intel_fb_obj_flush(obj,
-				   fb_write_origin(obj, I915_GEM_DOMAIN_GTT));
-
-		for_each_ggtt_vma(vma, obj) {
-			if (vma->iomap)
-				continue;
-
-			i915_vma_unset_ggtt_write(vma);
-		}
-		break;
-
-	case I915_GEM_DOMAIN_WC:
-		wmb();
-		break;
-
-	case I915_GEM_DOMAIN_CPU:
-		i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
-		break;
-
-	case I915_GEM_DOMAIN_RENDER:
-		if (gpu_write_needs_clflush(obj))
-			obj->cache_dirty = true;
-		break;
-	}
-
-	obj->write_domain = 0;
-}
-
-/*
- * Pins the specified object's pages and synchronizes the object with
- * GPU accesses. Sets needs_clflush to non-zero if the caller should
- * flush the object from the CPU cache.
- */
-int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
-				    unsigned int *needs_clflush)
-{
-	int ret;
-
-	lockdep_assert_held(&obj->base.dev->struct_mutex);
-
-	*needs_clflush = 0;
-	if (!i915_gem_object_has_struct_page(obj))
-		return -ENODEV;
-
-	ret = i915_gem_object_wait(obj,
-				   I915_WAIT_INTERRUPTIBLE |
-				   I915_WAIT_LOCKED,
-				   MAX_SCHEDULE_TIMEOUT);
-	if (ret)
-		return ret;
-
-	ret = i915_gem_object_pin_pages(obj);
-	if (ret)
-		return ret;
-
-	if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ ||
-	    !static_cpu_has(X86_FEATURE_CLFLUSH)) {
-		ret = i915_gem_object_set_to_cpu_domain(obj, false);
-		if (ret)
-			goto err_unpin;
-		else
-			goto out;
-	}
-
-	flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
-
-	/* If we're not in the cpu read domain, set ourself into the gtt
-	 * read domain and manually flush cachelines (if required). This
-	 * optimizes for the case when the gpu will dirty the data
-	 * anyway again before the next pread happens.
-	 */
-	if (!obj->cache_dirty &&
-	    !(obj->read_domains & I915_GEM_DOMAIN_CPU))
-		*needs_clflush = CLFLUSH_BEFORE;
-
-out:
-	/* return with the pages pinned */
-	return 0;
-
-err_unpin:
-	i915_gem_object_unpin_pages(obj);
-	return ret;
-}
-
-int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
-				     unsigned int *needs_clflush)
-{
-	int ret;
-
-	lockdep_assert_held(&obj->base.dev->struct_mutex);
-
-	*needs_clflush = 0;
-	if (!i915_gem_object_has_struct_page(obj))
-		return -ENODEV;
-
-	ret = i915_gem_object_wait(obj,
-				   I915_WAIT_INTERRUPTIBLE |
-				   I915_WAIT_LOCKED |
-				   I915_WAIT_ALL,
-				   MAX_SCHEDULE_TIMEOUT);
-	if (ret)
-		return ret;
-
-	ret = i915_gem_object_pin_pages(obj);
-	if (ret)
-		return ret;
-
-	if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE ||
-	    !static_cpu_has(X86_FEATURE_CLFLUSH)) {
-		ret = i915_gem_object_set_to_cpu_domain(obj, true);
-		if (ret)
-			goto err_unpin;
-		else
-			goto out;
+		spin_lock_irq(&uncore->lock);
+		intel_uncore_posting_read_fw(uncore,
+					     RING_HEAD(RENDER_RING_BASE));
+		spin_unlock_irq(&uncore->lock);
 	}
-
-	flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
-
-	/* If we're not in the cpu write domain, set ourself into the
-	 * gtt write domain and manually flush cachelines (as required).
-	 * This optimizes for the case when the gpu will use the data
-	 * right away and we therefore have to clflush anyway.
-	 */
-	if (!obj->cache_dirty) {
-		*needs_clflush |= CLFLUSH_AFTER;
-
-		/*
-		 * Same trick applies to invalidate partially written
-		 * cachelines read before writing.
-		 */
-		if (!(obj->read_domains & I915_GEM_DOMAIN_CPU))
-			*needs_clflush |= CLFLUSH_BEFORE;
-	}
-
-out:
-	intel_fb_obj_invalidate(obj, ORIGIN_CPU);
-	obj->mm.dirty = true;
-	/* return with the pages pinned */
-	return 0;
-
-err_unpin:
-	i915_gem_object_unpin_pages(obj);
-	return ret;
 }
 
 static int
@@ -915,20 +296,21 @@ static int
 i915_gem_shmem_pread(struct drm_i915_gem_object *obj,
 		     struct drm_i915_gem_pread *args)
 {
-	char __user *user_data;
-	u64 remain;
 	unsigned int needs_clflush;
 	unsigned int idx, offset;
+	struct dma_fence *fence;
+	char __user *user_data;
+	u64 remain;
 	int ret;
 
-	ret = mutex_lock_interruptible(&obj->base.dev->struct_mutex);
+	ret = i915_gem_object_prepare_read(obj, &needs_clflush);
 	if (ret)
 		return ret;
 
-	ret = i915_gem_obj_prepare_shmem_read(obj, &needs_clflush);
-	mutex_unlock(&obj->base.dev->struct_mutex);
-	if (ret)
-		return ret;
+	fence = i915_gem_object_lock_fence(obj);
+	i915_gem_object_finish_access(obj);
+	if (!fence)
+		return -ENOMEM;
 
 	remain = args->size;
 	user_data = u64_to_user_ptr(args->data_ptr);
@@ -947,7 +329,7 @@ i915_gem_shmem_pread(struct drm_i915_gem_object *obj,
 		offset = 0;
 	}
 
-	i915_gem_obj_finish_shmem_access(obj);
+	i915_gem_object_unlock_fence(obj, fence);
 	return ret;
 }
 
@@ -983,8 +365,9 @@ i915_gem_gtt_pread(struct drm_i915_gem_object *obj,
 	struct i915_ggtt *ggtt = &i915->ggtt;
 	intel_wakeref_t wakeref;
 	struct drm_mm_node node;
-	struct i915_vma *vma;
+	struct dma_fence *fence;
 	void __user *user_data;
+	struct i915_vma *vma;
 	u64 remain, offset;
 	int ret;
 
@@ -992,7 +375,7 @@ i915_gem_gtt_pread(struct drm_i915_gem_object *obj,
 	if (ret)
 		return ret;
 
-	wakeref = intel_runtime_pm_get(i915);
+	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
 	vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
 				       PIN_MAPPABLE |
 				       PIN_NONFAULT |
@@ -1013,11 +396,24 @@ i915_gem_gtt_pread(struct drm_i915_gem_object *obj,
 		GEM_BUG_ON(!node.allocated);
 	}
 
-	ret = i915_gem_object_set_to_gtt_domain(obj, false);
+	mutex_unlock(&i915->drm.struct_mutex);
+
+	ret = i915_gem_object_lock_interruptible(obj);
 	if (ret)
 		goto out_unpin;
 
-	mutex_unlock(&i915->drm.struct_mutex);
+	ret = i915_gem_object_set_to_gtt_domain(obj, false);
+	if (ret) {
+		i915_gem_object_unlock(obj);
+		goto out_unpin;
+	}
+
+	fence = i915_gem_object_lock_fence(obj);
+	i915_gem_object_unlock(obj);
+	if (!fence) {
+		ret = -ENOMEM;
+		goto out_unpin;
+	}
 
 	user_data = u64_to_user_ptr(args->data_ptr);
 	remain = args->size;
@@ -1055,8 +451,9 @@ i915_gem_gtt_pread(struct drm_i915_gem_object *obj,
 		offset += page_length;
 	}
 
-	mutex_lock(&i915->drm.struct_mutex);
+	i915_gem_object_unlock_fence(obj, fence);
 out_unpin:
+	mutex_lock(&i915->drm.struct_mutex);
 	if (node.allocated) {
 		wmb();
 		ggtt->vm.clear_range(&ggtt->vm, node.start, node.size);
@@ -1065,7 +462,7 @@ out_unpin:
 		i915_vma_unpin(vma);
 	}
 out_unlock:
-	intel_runtime_pm_put(i915, wakeref);
+	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
 	mutex_unlock(&i915->drm.struct_mutex);
 
 	return ret;
@@ -1165,8 +562,10 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj,
 {
 	struct drm_i915_private *i915 = to_i915(obj->base.dev);
 	struct i915_ggtt *ggtt = &i915->ggtt;
+	struct intel_runtime_pm *rpm = &i915->runtime_pm;
 	intel_wakeref_t wakeref;
 	struct drm_mm_node node;
+	struct dma_fence *fence;
 	struct i915_vma *vma;
 	u64 remain, offset;
 	void __user *user_data;
@@ -1184,14 +583,14 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj,
 		 * This easily dwarfs any performance advantage from
 		 * using the cache bypass of indirect GGTT access.
 		 */
-		wakeref = intel_runtime_pm_get_if_in_use(i915);
+		wakeref = intel_runtime_pm_get_if_in_use(rpm);
 		if (!wakeref) {
 			ret = -EFAULT;
 			goto out_unlock;
 		}
 	} else {
 		/* No backing pages, no fallback, we must force GGTT access */
-		wakeref = intel_runtime_pm_get(i915);
+		wakeref = intel_runtime_pm_get(rpm);
 	}
 
 	vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
@@ -1214,11 +613,24 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj,
 		GEM_BUG_ON(!node.allocated);
 	}
 
-	ret = i915_gem_object_set_to_gtt_domain(obj, true);
+	mutex_unlock(&i915->drm.struct_mutex);
+
+	ret = i915_gem_object_lock_interruptible(obj);
 	if (ret)
 		goto out_unpin;
 
-	mutex_unlock(&i915->drm.struct_mutex);
+	ret = i915_gem_object_set_to_gtt_domain(obj, true);
+	if (ret) {
+		i915_gem_object_unlock(obj);
+		goto out_unpin;
+	}
+
+	fence = i915_gem_object_lock_fence(obj);
+	i915_gem_object_unlock(obj);
+	if (!fence) {
+		ret = -ENOMEM;
+		goto out_unpin;
+	}
 
 	intel_fb_obj_invalidate(obj, ORIGIN_CPU);
 
@@ -1263,8 +675,9 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj,
 	}
 	intel_fb_obj_flush(obj, ORIGIN_CPU);
 
-	mutex_lock(&i915->drm.struct_mutex);
+	i915_gem_object_unlock_fence(obj, fence);
 out_unpin:
+	mutex_lock(&i915->drm.struct_mutex);
 	if (node.allocated) {
 		wmb();
 		ggtt->vm.clear_range(&ggtt->vm, node.start, node.size);
@@ -1273,7 +686,7 @@ out_unpin:
 		i915_vma_unpin(vma);
 	}
 out_rpm:
-	intel_runtime_pm_put(i915, wakeref);
+	intel_runtime_pm_put(rpm, wakeref);
 out_unlock:
 	mutex_unlock(&i915->drm.struct_mutex);
 	return ret;
@@ -1310,22 +723,22 @@ static int
 i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj,
 		      const struct drm_i915_gem_pwrite *args)
 {
-	struct drm_i915_private *i915 = to_i915(obj->base.dev);
-	void __user *user_data;
-	u64 remain;
 	unsigned int partial_cacheline_write;
 	unsigned int needs_clflush;
 	unsigned int offset, idx;
+	struct dma_fence *fence;
+	void __user *user_data;
+	u64 remain;
 	int ret;
 
-	ret = mutex_lock_interruptible(&i915->drm.struct_mutex);
+	ret = i915_gem_object_prepare_write(obj, &needs_clflush);
 	if (ret)
 		return ret;
 
-	ret = i915_gem_obj_prepare_shmem_write(obj, &needs_clflush);
-	mutex_unlock(&i915->drm.struct_mutex);
-	if (ret)
-		return ret;
+	fence = i915_gem_object_lock_fence(obj);
+	i915_gem_object_finish_access(obj);
+	if (!fence)
+		return -ENOMEM;
 
 	/* If we don't overwrite a cacheline completely we need to be
 	 * careful to have up-to-date data by first clflushing. Don't
@@ -1354,7 +767,8 @@ i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj,
 	}
 
 	intel_fb_obj_flush(obj, ORIGIN_CPU);
-	i915_gem_obj_finish_shmem_access(obj);
+	i915_gem_object_unlock_fence(obj, fence);
+
 	return ret;
 }
 
@@ -1443,143 +857,6 @@ err:
 	return ret;
 }
 
-static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj)
-{
-	struct drm_i915_private *i915 = to_i915(obj->base.dev);
-	struct list_head *list;
-	struct i915_vma *vma;
-
-	GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
-
-	mutex_lock(&i915->ggtt.vm.mutex);
-	for_each_ggtt_vma(vma, obj) {
-		if (!drm_mm_node_allocated(&vma->node))
-			continue;
-
-		list_move_tail(&vma->vm_link, &vma->vm->bound_list);
-	}
-	mutex_unlock(&i915->ggtt.vm.mutex);
-
-	spin_lock(&i915->mm.obj_lock);
-	list = obj->bind_count ? &i915->mm.bound_list : &i915->mm.unbound_list;
-	list_move_tail(&obj->mm.link, list);
-	spin_unlock(&i915->mm.obj_lock);
-}
-
-/**
- * Called when user space prepares to use an object with the CPU, either
- * through the mmap ioctl's mapping or a GTT mapping.
- * @dev: drm device
- * @data: ioctl data blob
- * @file: drm file
- */
-int
-i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
-			  struct drm_file *file)
-{
-	struct drm_i915_gem_set_domain *args = data;
-	struct drm_i915_gem_object *obj;
-	u32 read_domains = args->read_domains;
-	u32 write_domain = args->write_domain;
-	int err;
-
-	/* Only handle setting domains to types used by the CPU. */
-	if ((write_domain | read_domains) & I915_GEM_GPU_DOMAINS)
-		return -EINVAL;
-
-	/*
-	 * Having something in the write domain implies it's in the read
-	 * domain, and only that read domain.  Enforce that in the request.
-	 */
-	if (write_domain && read_domains != write_domain)
-		return -EINVAL;
-
-	if (!read_domains)
-		return 0;
-
-	obj = i915_gem_object_lookup(file, args->handle);
-	if (!obj)
-		return -ENOENT;
-
-	/*
-	 * Already in the desired write domain? Nothing for us to do!
-	 *
-	 * We apply a little bit of cunning here to catch a broader set of
-	 * no-ops. If obj->write_domain is set, we must be in the same
-	 * obj->read_domains, and only that domain. Therefore, if that
-	 * obj->write_domain matches the request read_domains, we are
-	 * already in the same read/write domain and can skip the operation,
-	 * without having to further check the requested write_domain.
-	 */
-	if (READ_ONCE(obj->write_domain) == read_domains) {
-		err = 0;
-		goto out;
-	}
-
-	/*
-	 * Try to flush the object off the GPU without holding the lock.
-	 * We will repeat the flush holding the lock in the normal manner
-	 * to catch cases where we are gazumped.
-	 */
-	err = i915_gem_object_wait(obj,
-				   I915_WAIT_INTERRUPTIBLE |
-				   I915_WAIT_PRIORITY |
-				   (write_domain ? I915_WAIT_ALL : 0),
-				   MAX_SCHEDULE_TIMEOUT);
-	if (err)
-		goto out;
-
-	/*
-	 * Proxy objects do not control access to the backing storage, ergo
-	 * they cannot be used as a means to manipulate the cache domain
-	 * tracking for that backing storage. The proxy object is always
-	 * considered to be outside of any cache domain.
-	 */
-	if (i915_gem_object_is_proxy(obj)) {
-		err = -ENXIO;
-		goto out;
-	}
-
-	/*
-	 * Flush and acquire obj->pages so that we are coherent through
-	 * direct access in memory with previous cached writes through
-	 * shmemfs and that our cache domain tracking remains valid.
-	 * For example, if the obj->filp was moved to swap without us
-	 * being notified and releasing the pages, we would mistakenly
-	 * continue to assume that the obj remained out of the CPU cached
-	 * domain.
-	 */
-	err = i915_gem_object_pin_pages(obj);
-	if (err)
-		goto out;
-
-	err = i915_mutex_lock_interruptible(dev);
-	if (err)
-		goto out_unpin;
-
-	if (read_domains & I915_GEM_DOMAIN_WC)
-		err = i915_gem_object_set_to_wc_domain(obj, write_domain);
-	else if (read_domains & I915_GEM_DOMAIN_GTT)
-		err = i915_gem_object_set_to_gtt_domain(obj, write_domain);
-	else
-		err = i915_gem_object_set_to_cpu_domain(obj, write_domain);
-
-	/* And bump the LRU for this access */
-	i915_gem_object_bump_inactive_ggtt(obj);
-
-	mutex_unlock(&dev->struct_mutex);
-
-	if (write_domain != 0)
-		intel_fb_obj_invalidate(obj,
-					fb_write_origin(obj, write_domain));
-
-out_unpin:
-	i915_gem_object_unpin_pages(obj);
-out:
-	i915_gem_object_put(obj);
-	return err;
-}
-
 /**
  * Called when user space has done writes to this buffer
  * @dev: drm device
@@ -1609,421 +886,7 @@ i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
 	return 0;
 }
 
-static inline bool
-__vma_matches(struct vm_area_struct *vma, struct file *filp,
-	      unsigned long addr, unsigned long size)
-{
-	if (vma->vm_file != filp)
-		return false;
-
-	return vma->vm_start == addr &&
-	       (vma->vm_end - vma->vm_start) == PAGE_ALIGN(size);
-}
-
-/**
- * i915_gem_mmap_ioctl - Maps the contents of an object, returning the address
- *			 it is mapped to.
- * @dev: drm device
- * @data: ioctl data blob
- * @file: drm file
- *
- * While the mapping holds a reference on the contents of the object, it doesn't
- * imply a ref on the object itself.
- *
- * IMPORTANT:
- *
- * DRM driver writers who look a this function as an example for how to do GEM
- * mmap support, please don't implement mmap support like here. The modern way
- * to implement DRM mmap support is with an mmap offset ioctl (like
- * i915_gem_mmap_gtt) and then using the mmap syscall on the DRM fd directly.
- * That way debug tooling like valgrind will understand what's going on, hiding
- * the mmap call in a driver private ioctl will break that. The i915 driver only
- * does cpu mmaps this way because we didn't know better.
- */
-int
-i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
-		    struct drm_file *file)
-{
-	struct drm_i915_gem_mmap *args = data;
-	struct drm_i915_gem_object *obj;
-	unsigned long addr;
-
-	if (args->flags & ~(I915_MMAP_WC))
-		return -EINVAL;
-
-	if (args->flags & I915_MMAP_WC && !boot_cpu_has(X86_FEATURE_PAT))
-		return -ENODEV;
-
-	obj = i915_gem_object_lookup(file, args->handle);
-	if (!obj)
-		return -ENOENT;
-
-	/* prime objects have no backing filp to GEM mmap
-	 * pages from.
-	 */
-	if (!obj->base.filp) {
-		addr = -ENXIO;
-		goto err;
-	}
-
-	if (range_overflows(args->offset, args->size, (u64)obj->base.size)) {
-		addr = -EINVAL;
-		goto err;
-	}
-
-	addr = vm_mmap(obj->base.filp, 0, args->size,
-		       PROT_READ | PROT_WRITE, MAP_SHARED,
-		       args->offset);
-	if (IS_ERR_VALUE(addr))
-		goto err;
-
-	if (args->flags & I915_MMAP_WC) {
-		struct mm_struct *mm = current->mm;
-		struct vm_area_struct *vma;
-
-		if (down_write_killable(&mm->mmap_sem)) {
-			addr = -EINTR;
-			goto err;
-		}
-		vma = find_vma(mm, addr);
-		if (vma && __vma_matches(vma, obj->base.filp, addr, args->size))
-			vma->vm_page_prot =
-				pgprot_writecombine(vm_get_page_prot(vma->vm_flags));
-		else
-			addr = -ENOMEM;
-		up_write(&mm->mmap_sem);
-		if (IS_ERR_VALUE(addr))
-			goto err;
-
-		/* This may race, but that's ok, it only gets set */
-		WRITE_ONCE(obj->frontbuffer_ggtt_origin, ORIGIN_CPU);
-	}
-	i915_gem_object_put(obj);
-
-	args->addr_ptr = (u64)addr;
-	return 0;
-
-err:
-	i915_gem_object_put(obj);
-	return addr;
-}
-
-static unsigned int tile_row_pages(const struct drm_i915_gem_object *obj)
-{
-	return i915_gem_object_get_tile_row_size(obj) >> PAGE_SHIFT;
-}
-
-/**
- * i915_gem_mmap_gtt_version - report the current feature set for GTT mmaps
- *
- * A history of the GTT mmap interface:
- *
- * 0 - Everything had to fit into the GTT. Both parties of a memcpy had to
- *     aligned and suitable for fencing, and still fit into the available
- *     mappable space left by the pinned display objects. A classic problem
- *     we called the page-fault-of-doom where we would ping-pong between
- *     two objects that could not fit inside the GTT and so the memcpy
- *     would page one object in at the expense of the other between every
- *     single byte.
- *
- * 1 - Objects can be any size, and have any compatible fencing (X Y, or none
- *     as set via i915_gem_set_tiling() [DRM_I915_GEM_SET_TILING]). If the
- *     object is too large for the available space (or simply too large
- *     for the mappable aperture!), a view is created instead and faulted
- *     into userspace. (This view is aligned and sized appropriately for
- *     fenced access.)
- *
- * 2 - Recognise WC as a separate cache domain so that we can flush the
- *     delayed writes via GTT before performing direct access via WC.
- *
- * 3 - Remove implicit set-domain(GTT) and synchronisation on initial
- *     pagefault; swapin remains transparent.
- *
- * Restrictions:
- *
- *  * snoopable objects cannot be accessed via the GTT. It can cause machine
- *    hangs on some architectures, corruption on others. An attempt to service
- *    a GTT page fault from a snoopable object will generate a SIGBUS.
- *
- *  * the object must be able to fit into RAM (physical memory, though no
- *    limited to the mappable aperture).
- *
- *
- * Caveats:
- *
- *  * a new GTT page fault will synchronize rendering from the GPU and flush
- *    all data to system memory. Subsequent access will not be synchronized.
- *
- *  * all mappings are revoked on runtime device suspend.
- *
- *  * there are only 8, 16 or 32 fence registers to share between all users
- *    (older machines require fence register for display and blitter access
- *    as well). Contention of the fence registers will cause the previous users
- *    to be unmapped and any new access will generate new page faults.
- *
- *  * running out of memory while servicing a fault may generate a SIGBUS,
- *    rather than the expected SIGSEGV.
- */
-int i915_gem_mmap_gtt_version(void)
-{
-	return 3;
-}
-
-static inline struct i915_ggtt_view
-compute_partial_view(const struct drm_i915_gem_object *obj,
-		     pgoff_t page_offset,
-		     unsigned int chunk)
-{
-	struct i915_ggtt_view view;
-
-	if (i915_gem_object_is_tiled(obj))
-		chunk = roundup(chunk, tile_row_pages(obj));
-
-	view.type = I915_GGTT_VIEW_PARTIAL;
-	view.partial.offset = rounddown(page_offset, chunk);
-	view.partial.size =
-		min_t(unsigned int, chunk,
-		      (obj->base.size >> PAGE_SHIFT) - view.partial.offset);
-
-	/* If the partial covers the entire object, just create a normal VMA. */
-	if (chunk >= obj->base.size >> PAGE_SHIFT)
-		view.type = I915_GGTT_VIEW_NORMAL;
-
-	return view;
-}
-
-/**
- * i915_gem_fault - fault a page into the GTT
- * @vmf: fault info
- *
- * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped
- * from userspace.  The fault handler takes care of binding the object to
- * the GTT (if needed), allocating and programming a fence register (again,
- * only if needed based on whether the old reg is still valid or the object
- * is tiled) and inserting a new PTE into the faulting process.
- *
- * Note that the faulting process may involve evicting existing objects
- * from the GTT and/or fence registers to make room.  So performance may
- * suffer if the GTT working set is large or there are few fence registers
- * left.
- *
- * The current feature set supported by i915_gem_fault() and thus GTT mmaps
- * is exposed via I915_PARAM_MMAP_GTT_VERSION (see i915_gem_mmap_gtt_version).
- */
-vm_fault_t i915_gem_fault(struct vm_fault *vmf)
-{
-#define MIN_CHUNK_PAGES (SZ_1M >> PAGE_SHIFT)
-	struct vm_area_struct *area = vmf->vma;
-	struct drm_i915_gem_object *obj = to_intel_bo(area->vm_private_data);
-	struct drm_device *dev = obj->base.dev;
-	struct drm_i915_private *dev_priv = to_i915(dev);
-	struct i915_ggtt *ggtt = &dev_priv->ggtt;
-	bool write = area->vm_flags & VM_WRITE;
-	intel_wakeref_t wakeref;
-	struct i915_vma *vma;
-	pgoff_t page_offset;
-	int srcu;
-	int ret;
-
-	/* Sanity check that we allow writing into this object */
-	if (i915_gem_object_is_readonly(obj) && write)
-		return VM_FAULT_SIGBUS;
-
-	/* We don't use vmf->pgoff since that has the fake offset */
-	page_offset = (vmf->address - area->vm_start) >> PAGE_SHIFT;
-
-	trace_i915_gem_object_fault(obj, page_offset, true, write);
-
-	ret = i915_gem_object_pin_pages(obj);
-	if (ret)
-		goto err;
-
-	wakeref = intel_runtime_pm_get(dev_priv);
-
-	srcu = i915_reset_trylock(dev_priv);
-	if (srcu < 0) {
-		ret = srcu;
-		goto err_rpm;
-	}
-
-	ret = i915_mutex_lock_interruptible(dev);
-	if (ret)
-		goto err_reset;
-
-	/* Access to snoopable pages through the GTT is incoherent. */
-	if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev_priv)) {
-		ret = -EFAULT;
-		goto err_unlock;
-	}
-
-	/* Now pin it into the GTT as needed */
-	vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
-				       PIN_MAPPABLE |
-				       PIN_NONBLOCK |
-				       PIN_NONFAULT);
-	if (IS_ERR(vma)) {
-		/* Use a partial view if it is bigger than available space */
-		struct i915_ggtt_view view =
-			compute_partial_view(obj, page_offset, MIN_CHUNK_PAGES);
-		unsigned int flags;
-
-		flags = PIN_MAPPABLE;
-		if (view.type == I915_GGTT_VIEW_NORMAL)
-			flags |= PIN_NONBLOCK; /* avoid warnings for pinned */
-
-		/*
-		 * Userspace is now writing through an untracked VMA, abandon
-		 * all hope that the hardware is able to track future writes.
-		 */
-		obj->frontbuffer_ggtt_origin = ORIGIN_CPU;
-
-		vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, flags);
-		if (IS_ERR(vma) && !view.type) {
-			flags = PIN_MAPPABLE;
-			view.type = I915_GGTT_VIEW_PARTIAL;
-			vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, flags);
-		}
-	}
-	if (IS_ERR(vma)) {
-		ret = PTR_ERR(vma);
-		goto err_unlock;
-	}
-
-	ret = i915_vma_pin_fence(vma);
-	if (ret)
-		goto err_unpin;
-
-	/* Finally, remap it using the new GTT offset */
-	ret = remap_io_mapping(area,
-			       area->vm_start + (vma->ggtt_view.partial.offset << PAGE_SHIFT),
-			       (ggtt->gmadr.start + vma->node.start) >> PAGE_SHIFT,
-			       min_t(u64, vma->size, area->vm_end - area->vm_start),
-			       &ggtt->iomap);
-	if (ret)
-		goto err_fence;
-
-	/* Mark as being mmapped into userspace for later revocation */
-	assert_rpm_wakelock_held(dev_priv);
-	if (!i915_vma_set_userfault(vma) && !obj->userfault_count++)
-		list_add(&obj->userfault_link, &dev_priv->mm.userfault_list);
-	GEM_BUG_ON(!obj->userfault_count);
-
-	i915_vma_set_ggtt_write(vma);
-
-err_fence:
-	i915_vma_unpin_fence(vma);
-err_unpin:
-	__i915_vma_unpin(vma);
-err_unlock:
-	mutex_unlock(&dev->struct_mutex);
-err_reset:
-	i915_reset_unlock(dev_priv, srcu);
-err_rpm:
-	intel_runtime_pm_put(dev_priv, wakeref);
-	i915_gem_object_unpin_pages(obj);
-err:
-	switch (ret) {
-	case -EIO:
-		/*
-		 * We eat errors when the gpu is terminally wedged to avoid
-		 * userspace unduly crashing (gl has no provisions for mmaps to
-		 * fail). But any other -EIO isn't ours (e.g. swap in failure)
-		 * and so needs to be reported.
-		 */
-		if (!i915_terminally_wedged(dev_priv))
-			return VM_FAULT_SIGBUS;
-		/* else: fall through */
-	case -EAGAIN:
-		/*
-		 * EAGAIN means the gpu is hung and we'll wait for the error
-		 * handler to reset everything when re-faulting in
-		 * i915_mutex_lock_interruptible.
-		 */
-	case 0:
-	case -ERESTARTSYS:
-	case -EINTR:
-	case -EBUSY:
-		/*
-		 * EBUSY is ok: this just means that another thread
-		 * already did the job.
-		 */
-		return VM_FAULT_NOPAGE;
-	case -ENOMEM:
-		return VM_FAULT_OOM;
-	case -ENOSPC:
-	case -EFAULT:
-		return VM_FAULT_SIGBUS;
-	default:
-		WARN_ONCE(ret, "unhandled error in i915_gem_fault: %i\n", ret);
-		return VM_FAULT_SIGBUS;
-	}
-}
-
-static void __i915_gem_object_release_mmap(struct drm_i915_gem_object *obj)
-{
-	struct i915_vma *vma;
-
-	GEM_BUG_ON(!obj->userfault_count);
-
-	obj->userfault_count = 0;
-	list_del(&obj->userfault_link);
-	drm_vma_node_unmap(&obj->base.vma_node,
-			   obj->base.dev->anon_inode->i_mapping);
-
-	for_each_ggtt_vma(vma, obj)
-		i915_vma_unset_userfault(vma);
-}
-
-/**
- * i915_gem_release_mmap - remove physical page mappings
- * @obj: obj in question
- *
- * Preserve the reservation of the mmapping with the DRM core code, but
- * relinquish ownership of the pages back to the system.
- *
- * It is vital that we remove the page mapping if we have mapped a tiled
- * object through the GTT and then lose the fence register due to
- * resource pressure. Similarly if the object has been moved out of the
- * aperture, than pages mapped into userspace must be revoked. Removing the
- * mapping will then trigger a page fault on the next user access, allowing
- * fixup by i915_gem_fault().
- */
-void
-i915_gem_release_mmap(struct drm_i915_gem_object *obj)
-{
-	struct drm_i915_private *i915 = to_i915(obj->base.dev);
-	intel_wakeref_t wakeref;
-
-	/* Serialisation between user GTT access and our code depends upon
-	 * revoking the CPU's PTE whilst the mutex is held. The next user
-	 * pagefault then has to wait until we release the mutex.
-	 *
-	 * Note that RPM complicates somewhat by adding an additional
-	 * requirement that operations to the GGTT be made holding the RPM
-	 * wakeref.
-	 */
-	lockdep_assert_held(&i915->drm.struct_mutex);
-	wakeref = intel_runtime_pm_get(i915);
-
-	if (!obj->userfault_count)
-		goto out;
-
-	__i915_gem_object_release_mmap(obj);
-
-	/* Ensure that the CPU's PTE are revoked and there are not outstanding
-	 * memory transactions from userspace before we return. The TLB
-	 * flushing implied above by changing the PTE above *should* be
-	 * sufficient, an extra barrier here just provides us with a bit
-	 * of paranoid documentation about our requirement to serialise
-	 * memory writes before touching registers / GSM.
-	 */
-	wmb();
-
-out:
-	intel_runtime_pm_put(i915, wakeref);
-}
-
-void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv)
+void i915_gem_runtime_suspend(struct drm_i915_private *i915)
 {
 	struct drm_i915_gem_object *obj, *on;
 	int i;
@@ -2036,17 +899,19 @@ void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv)
 	 */
 
 	list_for_each_entry_safe(obj, on,
-				 &dev_priv->mm.userfault_list, userfault_link)
+				 &i915->ggtt.userfault_list, userfault_link)
 		__i915_gem_object_release_mmap(obj);
 
-	/* The fence will be lost when the device powers down. If any were
+	/*
+	 * The fence will be lost when the device powers down. If any were
 	 * in use by hardware (i.e. they are pinned), we should not be powering
 	 * down! All other fences will be reacquired by the user upon waking.
 	 */
-	for (i = 0; i < dev_priv->num_fence_regs; i++) {
-		struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i];
+	for (i = 0; i < i915->ggtt.num_fences; i++) {
+		struct i915_fence_reg *reg = &i915->ggtt.fence_regs[i];
 
-		/* Ideally we want to assert that the fence register is not
+		/*
+		 * Ideally we want to assert that the fence register is not
 		 * live at this point (i.e. that no piece of code will be
 		 * trying to write through fence + GTT, as that both violates
 		 * our tracking of activity and associated locking/barriers,
@@ -2065,1056 +930,6 @@ void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv)
 	}
 }
 
-static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj)
-{
-	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
-	int err;
-
-	err = drm_gem_create_mmap_offset(&obj->base);
-	if (likely(!err))
-		return 0;
-
-	/* Attempt to reap some mmap space from dead objects */
-	do {
-		err = i915_gem_wait_for_idle(dev_priv,
-					     I915_WAIT_INTERRUPTIBLE,
-					     MAX_SCHEDULE_TIMEOUT);
-		if (err)
-			break;
-
-		i915_gem_drain_freed_objects(dev_priv);
-		err = drm_gem_create_mmap_offset(&obj->base);
-		if (!err)
-			break;
-
-	} while (flush_delayed_work(&dev_priv->gt.retire_work));
-
-	return err;
-}
-
-static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj)
-{
-	drm_gem_free_mmap_offset(&obj->base);
-}
-
-int
-i915_gem_mmap_gtt(struct drm_file *file,
-		  struct drm_device *dev,
-		  u32 handle,
-		  u64 *offset)
-{
-	struct drm_i915_gem_object *obj;
-	int ret;
-
-	obj = i915_gem_object_lookup(file, handle);
-	if (!obj)
-		return -ENOENT;
-
-	ret = i915_gem_object_create_mmap_offset(obj);
-	if (ret == 0)
-		*offset = drm_vma_node_offset_addr(&obj->base.vma_node);
-
-	i915_gem_object_put(obj);
-	return ret;
-}
-
-/**
- * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing
- * @dev: DRM device
- * @data: GTT mapping ioctl data
- * @file: GEM object info
- *
- * Simply returns the fake offset to userspace so it can mmap it.
- * The mmap call will end up in drm_gem_mmap(), which will set things
- * up so we can get faults in the handler above.
- *
- * The fault handler will take care of binding the object into the GTT
- * (since it may have been evicted to make room for something), allocating
- * a fence register, and mapping the appropriate aperture address into
- * userspace.
- */
-int
-i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
-			struct drm_file *file)
-{
-	struct drm_i915_gem_mmap_gtt *args = data;
-
-	return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset);
-}
-
-/* Immediately discard the backing storage */
-static void
-i915_gem_object_truncate(struct drm_i915_gem_object *obj)
-{
-	i915_gem_object_free_mmap_offset(obj);
-
-	if (obj->base.filp == NULL)
-		return;
-
-	/* Our goal here is to return as much of the memory as
-	 * is possible back to the system as we are called from OOM.
-	 * To do this we must instruct the shmfs to drop all of its
-	 * backing pages, *now*.
-	 */
-	shmem_truncate_range(file_inode(obj->base.filp), 0, (loff_t)-1);
-	obj->mm.madv = __I915_MADV_PURGED;
-	obj->mm.pages = ERR_PTR(-EFAULT);
-}
-
-/* Try to discard unwanted pages */
-void __i915_gem_object_invalidate(struct drm_i915_gem_object *obj)
-{
-	struct address_space *mapping;
-
-	lockdep_assert_held(&obj->mm.lock);
-	GEM_BUG_ON(i915_gem_object_has_pages(obj));
-
-	switch (obj->mm.madv) {
-	case I915_MADV_DONTNEED:
-		i915_gem_object_truncate(obj);
-	case __I915_MADV_PURGED:
-		return;
-	}
-
-	if (obj->base.filp == NULL)
-		return;
-
-	mapping = obj->base.filp->f_mapping,
-	invalidate_mapping_pages(mapping, 0, (loff_t)-1);
-}
-
-/*
- * Move pages to appropriate lru and release the pagevec, decrementing the
- * ref count of those pages.
- */
-static void check_release_pagevec(struct pagevec *pvec)
-{
-	check_move_unevictable_pages(pvec);
-	__pagevec_release(pvec);
-	cond_resched();
-}
-
-static void
-i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj,
-			      struct sg_table *pages)
-{
-	struct sgt_iter sgt_iter;
-	struct pagevec pvec;
-	struct page *page;
-
-	__i915_gem_object_release_shmem(obj, pages, true);
-	i915_gem_gtt_finish_pages(obj, pages);
-
-	if (i915_gem_object_needs_bit17_swizzle(obj))
-		i915_gem_object_save_bit_17_swizzle(obj, pages);
-
-	mapping_clear_unevictable(file_inode(obj->base.filp)->i_mapping);
-
-	pagevec_init(&pvec);
-	for_each_sgt_page(page, sgt_iter, pages) {
-		if (obj->mm.dirty)
-			set_page_dirty(page);
-
-		if (obj->mm.madv == I915_MADV_WILLNEED)
-			mark_page_accessed(page);
-
-		if (!pagevec_add(&pvec, page))
-			check_release_pagevec(&pvec);
-	}
-	if (pagevec_count(&pvec))
-		check_release_pagevec(&pvec);
-	obj->mm.dirty = false;
-
-	sg_free_table(pages);
-	kfree(pages);
-}
-
-static void __i915_gem_object_reset_page_iter(struct drm_i915_gem_object *obj)
-{
-	struct radix_tree_iter iter;
-	void __rcu **slot;
-
-	rcu_read_lock();
-	radix_tree_for_each_slot(slot, &obj->mm.get_page.radix, &iter, 0)
-		radix_tree_delete(&obj->mm.get_page.radix, iter.index);
-	rcu_read_unlock();
-}
-
-static struct sg_table *
-__i915_gem_object_unset_pages(struct drm_i915_gem_object *obj)
-{
-	struct drm_i915_private *i915 = to_i915(obj->base.dev);
-	struct sg_table *pages;
-
-	pages = fetch_and_zero(&obj->mm.pages);
-	if (IS_ERR_OR_NULL(pages))
-		return pages;
-
-	spin_lock(&i915->mm.obj_lock);
-	list_del(&obj->mm.link);
-	spin_unlock(&i915->mm.obj_lock);
-
-	if (obj->mm.mapping) {
-		void *ptr;
-
-		ptr = page_mask_bits(obj->mm.mapping);
-		if (is_vmalloc_addr(ptr))
-			vunmap(ptr);
-		else
-			kunmap(kmap_to_page(ptr));
-
-		obj->mm.mapping = NULL;
-	}
-
-	__i915_gem_object_reset_page_iter(obj);
-	obj->mm.page_sizes.phys = obj->mm.page_sizes.sg = 0;
-
-	return pages;
-}
-
-int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
-				enum i915_mm_subclass subclass)
-{
-	struct sg_table *pages;
-	int ret;
-
-	if (i915_gem_object_has_pinned_pages(obj))
-		return -EBUSY;
-
-	GEM_BUG_ON(obj->bind_count);
-
-	/* May be called by shrinker from within get_pages() (on another bo) */
-	mutex_lock_nested(&obj->mm.lock, subclass);
-	if (unlikely(atomic_read(&obj->mm.pages_pin_count))) {
-		ret = -EBUSY;
-		goto unlock;
-	}
-
-	/*
-	 * ->put_pages might need to allocate memory for the bit17 swizzle
-	 * array, hence protect them from being reaped by removing them from gtt
-	 * lists early.
-	 */
-	pages = __i915_gem_object_unset_pages(obj);
-
-	/*
-	 * XXX Temporary hijinx to avoid updating all backends to handle
-	 * NULL pages. In the future, when we have more asynchronous
-	 * get_pages backends we should be better able to handle the
-	 * cancellation of the async task in a more uniform manner.
-	 */
-	if (!pages && !i915_gem_object_needs_async_cancel(obj))
-		pages = ERR_PTR(-EINVAL);
-
-	if (!IS_ERR(pages))
-		obj->ops->put_pages(obj, pages);
-
-	ret = 0;
-unlock:
-	mutex_unlock(&obj->mm.lock);
-
-	return ret;
-}
-
-bool i915_sg_trim(struct sg_table *orig_st)
-{
-	struct sg_table new_st;
-	struct scatterlist *sg, *new_sg;
-	unsigned int i;
-
-	if (orig_st->nents == orig_st->orig_nents)
-		return false;
-
-	if (sg_alloc_table(&new_st, orig_st->nents, GFP_KERNEL | __GFP_NOWARN))
-		return false;
-
-	new_sg = new_st.sgl;
-	for_each_sg(orig_st->sgl, sg, orig_st->nents, i) {
-		sg_set_page(new_sg, sg_page(sg), sg->length, 0);
-		sg_dma_address(new_sg) = sg_dma_address(sg);
-		sg_dma_len(new_sg) = sg_dma_len(sg);
-
-		new_sg = sg_next(new_sg);
-	}
-	GEM_BUG_ON(new_sg); /* Should walk exactly nents and hit the end */
-
-	sg_free_table(orig_st);
-
-	*orig_st = new_st;
-	return true;
-}
-
-static int i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
-{
-	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
-	const unsigned long page_count = obj->base.size / PAGE_SIZE;
-	unsigned long i;
-	struct address_space *mapping;
-	struct sg_table *st;
-	struct scatterlist *sg;
-	struct sgt_iter sgt_iter;
-	struct page *page;
-	unsigned long last_pfn = 0;	/* suppress gcc warning */
-	unsigned int max_segment = i915_sg_segment_size();
-	unsigned int sg_page_sizes;
-	struct pagevec pvec;
-	gfp_t noreclaim;
-	int ret;
-
-	/*
-	 * Assert that the object is not currently in any GPU domain. As it
-	 * wasn't in the GTT, there shouldn't be any way it could have been in
-	 * a GPU cache
-	 */
-	GEM_BUG_ON(obj->read_domains & I915_GEM_GPU_DOMAINS);
-	GEM_BUG_ON(obj->write_domain & I915_GEM_GPU_DOMAINS);
-
-	/*
-	 * If there's no chance of allocating enough pages for the whole
-	 * object, bail early.
-	 */
-	if (page_count > totalram_pages())
-		return -ENOMEM;
-
-	st = kmalloc(sizeof(*st), GFP_KERNEL);
-	if (st == NULL)
-		return -ENOMEM;
-
-rebuild_st:
-	if (sg_alloc_table(st, page_count, GFP_KERNEL)) {
-		kfree(st);
-		return -ENOMEM;
-	}
-
-	/*
-	 * Get the list of pages out of our struct file.  They'll be pinned
-	 * at this point until we release them.
-	 *
-	 * Fail silently without starting the shrinker
-	 */
-	mapping = obj->base.filp->f_mapping;
-	mapping_set_unevictable(mapping);
-	noreclaim = mapping_gfp_constraint(mapping, ~__GFP_RECLAIM);
-	noreclaim |= __GFP_NORETRY | __GFP_NOWARN;
-
-	sg = st->sgl;
-	st->nents = 0;
-	sg_page_sizes = 0;
-	for (i = 0; i < page_count; i++) {
-		const unsigned int shrink[] = {
-			I915_SHRINK_BOUND | I915_SHRINK_UNBOUND | I915_SHRINK_PURGEABLE,
-			0,
-		}, *s = shrink;
-		gfp_t gfp = noreclaim;
-
-		do {
-			cond_resched();
-			page = shmem_read_mapping_page_gfp(mapping, i, gfp);
-			if (!IS_ERR(page))
-				break;
-
-			if (!*s) {
-				ret = PTR_ERR(page);
-				goto err_sg;
-			}
-
-			i915_gem_shrink(dev_priv, 2 * page_count, NULL, *s++);
-
-			/*
-			 * We've tried hard to allocate the memory by reaping
-			 * our own buffer, now let the real VM do its job and
-			 * go down in flames if truly OOM.
-			 *
-			 * However, since graphics tend to be disposable,
-			 * defer the oom here by reporting the ENOMEM back
-			 * to userspace.
-			 */
-			if (!*s) {
-				/* reclaim and warn, but no oom */
-				gfp = mapping_gfp_mask(mapping);
-
-				/*
-				 * Our bo are always dirty and so we require
-				 * kswapd to reclaim our pages (direct reclaim
-				 * does not effectively begin pageout of our
-				 * buffers on its own). However, direct reclaim
-				 * only waits for kswapd when under allocation
-				 * congestion. So as a result __GFP_RECLAIM is
-				 * unreliable and fails to actually reclaim our
-				 * dirty pages -- unless you try over and over
-				 * again with !__GFP_NORETRY. However, we still
-				 * want to fail this allocation rather than
-				 * trigger the out-of-memory killer and for
-				 * this we want __GFP_RETRY_MAYFAIL.
-				 */
-				gfp |= __GFP_RETRY_MAYFAIL;
-			}
-		} while (1);
-
-		if (!i ||
-		    sg->length >= max_segment ||
-		    page_to_pfn(page) != last_pfn + 1) {
-			if (i) {
-				sg_page_sizes |= sg->length;
-				sg = sg_next(sg);
-			}
-			st->nents++;
-			sg_set_page(sg, page, PAGE_SIZE, 0);
-		} else {
-			sg->length += PAGE_SIZE;
-		}
-		last_pfn = page_to_pfn(page);
-
-		/* Check that the i965g/gm workaround works. */
-		WARN_ON((gfp & __GFP_DMA32) && (last_pfn >= 0x00100000UL));
-	}
-	if (sg) { /* loop terminated early; short sg table */
-		sg_page_sizes |= sg->length;
-		sg_mark_end(sg);
-	}
-
-	/* Trim unused sg entries to avoid wasting memory. */
-	i915_sg_trim(st);
-
-	ret = i915_gem_gtt_prepare_pages(obj, st);
-	if (ret) {
-		/*
-		 * DMA remapping failed? One possible cause is that
-		 * it could not reserve enough large entries, asking
-		 * for PAGE_SIZE chunks instead may be helpful.
-		 */
-		if (max_segment > PAGE_SIZE) {
-			for_each_sgt_page(page, sgt_iter, st)
-				put_page(page);
-			sg_free_table(st);
-
-			max_segment = PAGE_SIZE;
-			goto rebuild_st;
-		} else {
-			dev_warn(&dev_priv->drm.pdev->dev,
-				 "Failed to DMA remap %lu pages\n",
-				 page_count);
-			goto err_pages;
-		}
-	}
-
-	if (i915_gem_object_needs_bit17_swizzle(obj))
-		i915_gem_object_do_bit_17_swizzle(obj, st);
-
-	__i915_gem_object_set_pages(obj, st, sg_page_sizes);
-
-	return 0;
-
-err_sg:
-	sg_mark_end(sg);
-err_pages:
-	mapping_clear_unevictable(mapping);
-	pagevec_init(&pvec);
-	for_each_sgt_page(page, sgt_iter, st) {
-		if (!pagevec_add(&pvec, page))
-			check_release_pagevec(&pvec);
-	}
-	if (pagevec_count(&pvec))
-		check_release_pagevec(&pvec);
-	sg_free_table(st);
-	kfree(st);
-
-	/*
-	 * shmemfs first checks if there is enough memory to allocate the page
-	 * and reports ENOSPC should there be insufficient, along with the usual
-	 * ENOMEM for a genuine allocation failure.
-	 *
-	 * We use ENOSPC in our driver to mean that we have run out of aperture
-	 * space and so want to translate the error from shmemfs back to our
-	 * usual understanding of ENOMEM.
-	 */
-	if (ret == -ENOSPC)
-		ret = -ENOMEM;
-
-	return ret;
-}
-
-void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
-				 struct sg_table *pages,
-				 unsigned int sg_page_sizes)
-{
-	struct drm_i915_private *i915 = to_i915(obj->base.dev);
-	unsigned long supported = INTEL_INFO(i915)->page_sizes;
-	int i;
-
-	lockdep_assert_held(&obj->mm.lock);
-
-	/* Make the pages coherent with the GPU (flushing any swapin). */
-	if (obj->cache_dirty) {
-		obj->write_domain = 0;
-		if (i915_gem_object_has_struct_page(obj))
-			drm_clflush_sg(pages);
-		obj->cache_dirty = false;
-	}
-
-	obj->mm.get_page.sg_pos = pages->sgl;
-	obj->mm.get_page.sg_idx = 0;
-
-	obj->mm.pages = pages;
-
-	if (i915_gem_object_is_tiled(obj) &&
-	    i915->quirks & QUIRK_PIN_SWIZZLED_PAGES) {
-		GEM_BUG_ON(obj->mm.quirked);
-		__i915_gem_object_pin_pages(obj);
-		obj->mm.quirked = true;
-	}
-
-	GEM_BUG_ON(!sg_page_sizes);
-	obj->mm.page_sizes.phys = sg_page_sizes;
-
-	/*
-	 * Calculate the supported page-sizes which fit into the given
-	 * sg_page_sizes. This will give us the page-sizes which we may be able
-	 * to use opportunistically when later inserting into the GTT. For
-	 * example if phys=2G, then in theory we should be able to use 1G, 2M,
-	 * 64K or 4K pages, although in practice this will depend on a number of
-	 * other factors.
-	 */
-	obj->mm.page_sizes.sg = 0;
-	for_each_set_bit(i, &supported, ilog2(I915_GTT_MAX_PAGE_SIZE) + 1) {
-		if (obj->mm.page_sizes.phys & ~0u << i)
-			obj->mm.page_sizes.sg |= BIT(i);
-	}
-	GEM_BUG_ON(!HAS_PAGE_SIZES(i915, obj->mm.page_sizes.sg));
-
-	spin_lock(&i915->mm.obj_lock);
-	list_add(&obj->mm.link, &i915->mm.unbound_list);
-	spin_unlock(&i915->mm.obj_lock);
-}
-
-static int ____i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
-{
-	int err;
-
-	if (unlikely(obj->mm.madv != I915_MADV_WILLNEED)) {
-		DRM_DEBUG("Attempting to obtain a purgeable object\n");
-		return -EFAULT;
-	}
-
-	err = obj->ops->get_pages(obj);
-	GEM_BUG_ON(!err && !i915_gem_object_has_pages(obj));
-
-	return err;
-}
-
-/* Ensure that the associated pages are gathered from the backing storage
- * and pinned into our object. i915_gem_object_pin_pages() may be called
- * multiple times before they are released by a single call to
- * i915_gem_object_unpin_pages() - once the pages are no longer referenced
- * either as a result of memory pressure (reaping pages under the shrinker)
- * or as the object is itself released.
- */
-int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
-{
-	int err;
-
-	err = mutex_lock_interruptible(&obj->mm.lock);
-	if (err)
-		return err;
-
-	if (unlikely(!i915_gem_object_has_pages(obj))) {
-		GEM_BUG_ON(i915_gem_object_has_pinned_pages(obj));
-
-		err = ____i915_gem_object_get_pages(obj);
-		if (err)
-			goto unlock;
-
-		smp_mb__before_atomic();
-	}
-	atomic_inc(&obj->mm.pages_pin_count);
-
-unlock:
-	mutex_unlock(&obj->mm.lock);
-	return err;
-}
-
-/* The 'mapping' part of i915_gem_object_pin_map() below */
-static void *i915_gem_object_map(const struct drm_i915_gem_object *obj,
-				 enum i915_map_type type)
-{
-	unsigned long n_pages = obj->base.size >> PAGE_SHIFT;
-	struct sg_table *sgt = obj->mm.pages;
-	struct sgt_iter sgt_iter;
-	struct page *page;
-	struct page *stack_pages[32];
-	struct page **pages = stack_pages;
-	unsigned long i = 0;
-	pgprot_t pgprot;
-	void *addr;
-
-	/* A single page can always be kmapped */
-	if (n_pages == 1 && type == I915_MAP_WB)
-		return kmap(sg_page(sgt->sgl));
-
-	if (n_pages > ARRAY_SIZE(stack_pages)) {
-		/* Too big for stack -- allocate temporary array instead */
-		pages = kvmalloc_array(n_pages, sizeof(*pages), GFP_KERNEL);
-		if (!pages)
-			return NULL;
-	}
-
-	for_each_sgt_page(page, sgt_iter, sgt)
-		pages[i++] = page;
-
-	/* Check that we have the expected number of pages */
-	GEM_BUG_ON(i != n_pages);
-
-	switch (type) {
-	default:
-		MISSING_CASE(type);
-		/* fallthrough to use PAGE_KERNEL anyway */
-	case I915_MAP_WB:
-		pgprot = PAGE_KERNEL;
-		break;
-	case I915_MAP_WC:
-		pgprot = pgprot_writecombine(PAGE_KERNEL_IO);
-		break;
-	}
-	addr = vmap(pages, n_pages, 0, pgprot);
-
-	if (pages != stack_pages)
-		kvfree(pages);
-
-	return addr;
-}
-
-/* get, pin, and map the pages of the object into kernel space */
-void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
-			      enum i915_map_type type)
-{
-	enum i915_map_type has_type;
-	bool pinned;
-	void *ptr;
-	int ret;
-
-	if (unlikely(!i915_gem_object_has_struct_page(obj)))
-		return ERR_PTR(-ENXIO);
-
-	ret = mutex_lock_interruptible(&obj->mm.lock);
-	if (ret)
-		return ERR_PTR(ret);
-
-	pinned = !(type & I915_MAP_OVERRIDE);
-	type &= ~I915_MAP_OVERRIDE;
-
-	if (!atomic_inc_not_zero(&obj->mm.pages_pin_count)) {
-		if (unlikely(!i915_gem_object_has_pages(obj))) {
-			GEM_BUG_ON(i915_gem_object_has_pinned_pages(obj));
-
-			ret = ____i915_gem_object_get_pages(obj);
-			if (ret)
-				goto err_unlock;
-
-			smp_mb__before_atomic();
-		}
-		atomic_inc(&obj->mm.pages_pin_count);
-		pinned = false;
-	}
-	GEM_BUG_ON(!i915_gem_object_has_pages(obj));
-
-	ptr = page_unpack_bits(obj->mm.mapping, &has_type);
-	if (ptr && has_type != type) {
-		if (pinned) {
-			ret = -EBUSY;
-			goto err_unpin;
-		}
-
-		if (is_vmalloc_addr(ptr))
-			vunmap(ptr);
-		else
-			kunmap(kmap_to_page(ptr));
-
-		ptr = obj->mm.mapping = NULL;
-	}
-
-	if (!ptr) {
-		ptr = i915_gem_object_map(obj, type);
-		if (!ptr) {
-			ret = -ENOMEM;
-			goto err_unpin;
-		}
-
-		obj->mm.mapping = page_pack_bits(ptr, type);
-	}
-
-out_unlock:
-	mutex_unlock(&obj->mm.lock);
-	return ptr;
-
-err_unpin:
-	atomic_dec(&obj->mm.pages_pin_count);
-err_unlock:
-	ptr = ERR_PTR(ret);
-	goto out_unlock;
-}
-
-void __i915_gem_object_flush_map(struct drm_i915_gem_object *obj,
-				 unsigned long offset,
-				 unsigned long size)
-{
-	enum i915_map_type has_type;
-	void *ptr;
-
-	GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
-	GEM_BUG_ON(range_overflows_t(typeof(obj->base.size),
-				     offset, size, obj->base.size));
-
-	obj->mm.dirty = true;
-
-	if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE)
-		return;
-
-	ptr = page_unpack_bits(obj->mm.mapping, &has_type);
-	if (has_type == I915_MAP_WC)
-		return;
-
-	drm_clflush_virt_range(ptr + offset, size);
-	if (size == obj->base.size) {
-		obj->write_domain &= ~I915_GEM_DOMAIN_CPU;
-		obj->cache_dirty = false;
-	}
-}
-
-static int
-i915_gem_object_pwrite_gtt(struct drm_i915_gem_object *obj,
-			   const struct drm_i915_gem_pwrite *arg)
-{
-	struct address_space *mapping = obj->base.filp->f_mapping;
-	char __user *user_data = u64_to_user_ptr(arg->data_ptr);
-	u64 remain, offset;
-	unsigned int pg;
-
-	/* Caller already validated user args */
-	GEM_BUG_ON(!access_ok(user_data, arg->size));
-
-	/*
-	 * Before we instantiate/pin the backing store for our use, we
-	 * can prepopulate the shmemfs filp efficiently using a write into
-	 * the pagecache. We avoid the penalty of instantiating all the
-	 * pages, important if the user is just writing to a few and never
-	 * uses the object on the GPU, and using a direct write into shmemfs
-	 * allows it to avoid the cost of retrieving a page (either swapin
-	 * or clearing-before-use) before it is overwritten.
-	 */
-	if (i915_gem_object_has_pages(obj))
-		return -ENODEV;
-
-	if (obj->mm.madv != I915_MADV_WILLNEED)
-		return -EFAULT;
-
-	/*
-	 * Before the pages are instantiated the object is treated as being
-	 * in the CPU domain. The pages will be clflushed as required before
-	 * use, and we can freely write into the pages directly. If userspace
-	 * races pwrite with any other operation; corruption will ensue -
-	 * that is userspace's prerogative!
-	 */
-
-	remain = arg->size;
-	offset = arg->offset;
-	pg = offset_in_page(offset);
-
-	do {
-		unsigned int len, unwritten;
-		struct page *page;
-		void *data, *vaddr;
-		int err;
-		char c;
-
-		len = PAGE_SIZE - pg;
-		if (len > remain)
-			len = remain;
-
-		/* Prefault the user page to reduce potential recursion */
-		err = __get_user(c, user_data);
-		if (err)
-			return err;
-
-		err = __get_user(c, user_data + len - 1);
-		if (err)
-			return err;
-
-		err = pagecache_write_begin(obj->base.filp, mapping,
-					    offset, len, 0,
-					    &page, &data);
-		if (err < 0)
-			return err;
-
-		vaddr = kmap_atomic(page);
-		unwritten = __copy_from_user_inatomic(vaddr + pg,
-						      user_data,
-						      len);
-		kunmap_atomic(vaddr);
-
-		err = pagecache_write_end(obj->base.filp, mapping,
-					  offset, len, len - unwritten,
-					  page, data);
-		if (err < 0)
-			return err;
-
-		/* We don't handle -EFAULT, leave it to the caller to check */
-		if (unwritten)
-			return -ENODEV;
-
-		remain -= len;
-		user_data += len;
-		offset += len;
-		pg = 0;
-	} while (remain);
-
-	return 0;
-}
-
-static void
-i915_gem_retire_work_handler(struct work_struct *work)
-{
-	struct drm_i915_private *dev_priv =
-		container_of(work, typeof(*dev_priv), gt.retire_work.work);
-	struct drm_device *dev = &dev_priv->drm;
-
-	/* Come back later if the device is busy... */
-	if (mutex_trylock(&dev->struct_mutex)) {
-		i915_retire_requests(dev_priv);
-		mutex_unlock(&dev->struct_mutex);
-	}
-
-	/*
-	 * Keep the retire handler running until we are finally idle.
-	 * We do not need to do this test under locking as in the worst-case
-	 * we queue the retire worker once too often.
-	 */
-	if (READ_ONCE(dev_priv->gt.awake))
-		queue_delayed_work(dev_priv->wq,
-				   &dev_priv->gt.retire_work,
-				   round_jiffies_up_relative(HZ));
-}
-
-static bool switch_to_kernel_context_sync(struct drm_i915_private *i915,
-					  unsigned long mask)
-{
-	bool result = true;
-
-	/*
-	 * Even if we fail to switch, give whatever is running a small chance
-	 * to save itself before we report the failure. Yes, this may be a
-	 * false positive due to e.g. ENOMEM, caveat emptor!
-	 */
-	if (i915_gem_switch_to_kernel_context(i915, mask))
-		result = false;
-
-	if (i915_gem_wait_for_idle(i915,
-				   I915_WAIT_LOCKED |
-				   I915_WAIT_FOR_IDLE_BOOST,
-				   I915_GEM_IDLE_TIMEOUT))
-		result = false;
-
-	if (!result) {
-		if (i915_modparams.reset) { /* XXX hide warning from gem_eio */
-			dev_err(i915->drm.dev,
-				"Failed to idle engines, declaring wedged!\n");
-			GEM_TRACE_DUMP();
-		}
-
-		/* Forcibly cancel outstanding work and leave the gpu quiet. */
-		i915_gem_set_wedged(i915);
-	}
-
-	i915_retire_requests(i915); /* ensure we flush after wedging */
-	return result;
-}
-
-static bool load_power_context(struct drm_i915_private *i915)
-{
-	/* Force loading the kernel context on all engines */
-	if (!switch_to_kernel_context_sync(i915, ALL_ENGINES))
-		return false;
-
-	/*
-	 * Immediately park the GPU so that we enable powersaving and
-	 * treat it as idle. The next time we issue a request, we will
-	 * unpark and start using the engine->pinned_default_state, otherwise
-	 * it is in limbo and an early reset may fail.
-	 */
-	__i915_gem_park(i915);
-
-	return true;
-}
-
-static void
-i915_gem_idle_work_handler(struct work_struct *work)
-{
-	struct drm_i915_private *i915 =
-		container_of(work, typeof(*i915), gt.idle_work.work);
-	bool rearm_hangcheck;
-
-	if (!READ_ONCE(i915->gt.awake))
-		return;
-
-	if (READ_ONCE(i915->gt.active_requests))
-		return;
-
-	rearm_hangcheck =
-		cancel_delayed_work_sync(&i915->gpu_error.hangcheck_work);
-
-	if (!mutex_trylock(&i915->drm.struct_mutex)) {
-		/* Currently busy, come back later */
-		mod_delayed_work(i915->wq,
-				 &i915->gt.idle_work,
-				 msecs_to_jiffies(50));
-		goto out_rearm;
-	}
-
-	/*
-	 * Flush out the last user context, leaving only the pinned
-	 * kernel context resident. Should anything unfortunate happen
-	 * while we are idle (such as the GPU being power cycled), no users
-	 * will be harmed.
-	 */
-	if (!work_pending(&i915->gt.idle_work.work) &&
-	    !i915->gt.active_requests) {
-		++i915->gt.active_requests; /* don't requeue idle */
-
-		switch_to_kernel_context_sync(i915, i915->gt.active_engines);
-
-		if (!--i915->gt.active_requests) {
-			__i915_gem_park(i915);
-			rearm_hangcheck = false;
-		}
-	}
-
-	mutex_unlock(&i915->drm.struct_mutex);
-
-out_rearm:
-	if (rearm_hangcheck) {
-		GEM_BUG_ON(!i915->gt.awake);
-		i915_queue_hangcheck(i915);
-	}
-}
-
-void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file)
-{
-	struct drm_i915_private *i915 = to_i915(gem->dev);
-	struct drm_i915_gem_object *obj = to_intel_bo(gem);
-	struct drm_i915_file_private *fpriv = file->driver_priv;
-	struct i915_lut_handle *lut, *ln;
-
-	mutex_lock(&i915->drm.struct_mutex);
-
-	list_for_each_entry_safe(lut, ln, &obj->lut_list, obj_link) {
-		struct i915_gem_context *ctx = lut->ctx;
-		struct i915_vma *vma;
-
-		GEM_BUG_ON(ctx->file_priv == ERR_PTR(-EBADF));
-		if (ctx->file_priv != fpriv)
-			continue;
-
-		vma = radix_tree_delete(&ctx->handles_vma, lut->handle);
-		GEM_BUG_ON(vma->obj != obj);
-
-		/* We allow the process to have multiple handles to the same
-		 * vma, in the same fd namespace, by virtue of flink/open.
-		 */
-		GEM_BUG_ON(!vma->open_count);
-		if (!--vma->open_count && !i915_vma_is_ggtt(vma))
-			i915_vma_close(vma);
-
-		list_del(&lut->obj_link);
-		list_del(&lut->ctx_link);
-
-		i915_lut_handle_free(lut);
-		__i915_gem_object_release_unless_active(obj);
-	}
-
-	mutex_unlock(&i915->drm.struct_mutex);
-}
-
-static unsigned long to_wait_timeout(s64 timeout_ns)
-{
-	if (timeout_ns < 0)
-		return MAX_SCHEDULE_TIMEOUT;
-
-	if (timeout_ns == 0)
-		return 0;
-
-	return nsecs_to_jiffies_timeout(timeout_ns);
-}
-
-/**
- * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT
- * @dev: drm device pointer
- * @data: ioctl data blob
- * @file: drm file pointer
- *
- * Returns 0 if successful, else an error is returned with the remaining time in
- * the timeout parameter.
- *  -ETIME: object is still busy after timeout
- *  -ERESTARTSYS: signal interrupted the wait
- *  -ENONENT: object doesn't exist
- * Also possible, but rare:
- *  -EAGAIN: incomplete, restart syscall
- *  -ENOMEM: damn
- *  -ENODEV: Internal IRQ fail
- *  -E?: The add request failed
- *
- * The wait ioctl with a timeout of 0 reimplements the busy ioctl. With any
- * non-zero timeout parameter the wait ioctl will wait for the given number of
- * nanoseconds on an object becoming unbusy. Since the wait itself does so
- * without holding struct_mutex the object may become re-busied before this
- * function completes. A similar but shorter * race condition exists in the busy
- * ioctl
- */
-int
-i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
-{
-	struct drm_i915_gem_wait *args = data;
-	struct drm_i915_gem_object *obj;
-	ktime_t start;
-	long ret;
-
-	if (args->flags != 0)
-		return -EINVAL;
-
-	obj = i915_gem_object_lookup(file, args->bo_handle);
-	if (!obj)
-		return -ENOENT;
-
-	start = ktime_get();
-
-	ret = i915_gem_object_wait(obj,
-				   I915_WAIT_INTERRUPTIBLE |
-				   I915_WAIT_PRIORITY |
-				   I915_WAIT_ALL,
-				   to_wait_timeout(args->timeout_ns));
-
-	if (args->timeout_ns > 0) {
-		args->timeout_ns -= ktime_to_ns(ktime_sub(ktime_get(), start));
-		if (args->timeout_ns < 0)
-			args->timeout_ns = 0;
-
-		/*
-		 * Apparently ktime isn't accurate enough and occasionally has a
-		 * bit of mismatch in the jiffies<->nsecs<->ktime loop. So patch
-		 * things up to make the test happy. We allow up to 1 jiffy.
-		 *
-		 * This is a regression from the timespec->ktime conversion.
-		 */
-		if (ret == -ETIME && !nsecs_to_jiffies(args->timeout_ns))
-			args->timeout_ns = 0;
-
-		/* Asked to wait beyond the jiffie/scheduler precision? */
-		if (ret == -ETIME && args->timeout_ns)
-			ret = -EAGAIN;
-	}
-
-	i915_gem_object_put(obj);
-	return ret;
-}
-
 static int wait_for_engines(struct drm_i915_private *i915)
 {
 	if (wait_for(intel_engines_are_idle(i915), I915_IDLE_ENGINES_TIMEOUT)) {
@@ -3135,9 +950,6 @@ wait_for_timelines(struct drm_i915_private *i915,
 	struct i915_gt_timelines *gt = &i915->gt.timelines;
 	struct i915_timeline *tl;
 
-	if (!READ_ONCE(i915->gt.active_requests))
-		return timeout;
-
 	mutex_lock(&gt->mutex);
 	list_for_each_entry(tl, &gt->active_list, link) {
 		struct i915_request *rq;
@@ -3177,9 +989,10 @@ wait_for_timelines(struct drm_i915_private *i915,
 int i915_gem_wait_for_idle(struct drm_i915_private *i915,
 			   unsigned int flags, long timeout)
 {
-	GEM_TRACE("flags=%x (%s), timeout=%ld%s\n",
+	GEM_TRACE("flags=%x (%s), timeout=%ld%s, awake?=%s\n",
 		  flags, flags & I915_WAIT_LOCKED ? "locked" : "unlocked",
-		  timeout, timeout == MAX_SCHEDULE_TIMEOUT ? " (forever)" : "");
+		  timeout, timeout == MAX_SCHEDULE_TIMEOUT ? " (forever)" : "",
+		  yesno(i915->gt.awake));
 
 	/* If the device is asleep, we have no requests outstanding */
 	if (!READ_ONCE(i915->gt.awake))
@@ -3204,565 +1017,6 @@ int i915_gem_wait_for_idle(struct drm_i915_private *i915,
 	return 0;
 }
 
-static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj)
-{
-	/*
-	 * We manually flush the CPU domain so that we can override and
-	 * force the flush for the display, and perform it asyncrhonously.
-	 */
-	flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
-	if (obj->cache_dirty)
-		i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE);
-	obj->write_domain = 0;
-}
-
-void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj)
-{
-	if (!READ_ONCE(obj->pin_global))
-		return;
-
-	mutex_lock(&obj->base.dev->struct_mutex);
-	__i915_gem_object_flush_for_display(obj);
-	mutex_unlock(&obj->base.dev->struct_mutex);
-}
-
-/**
- * Moves a single object to the WC read, and possibly write domain.
- * @obj: object to act on
- * @write: ask for write access or read only
- *
- * This function returns when the move is complete, including waiting on
- * flushes to occur.
- */
-int
-i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write)
-{
-	int ret;
-
-	lockdep_assert_held(&obj->base.dev->struct_mutex);
-
-	ret = i915_gem_object_wait(obj,
-				   I915_WAIT_INTERRUPTIBLE |
-				   I915_WAIT_LOCKED |
-				   (write ? I915_WAIT_ALL : 0),
-				   MAX_SCHEDULE_TIMEOUT);
-	if (ret)
-		return ret;
-
-	if (obj->write_domain == I915_GEM_DOMAIN_WC)
-		return 0;
-
-	/* Flush and acquire obj->pages so that we are coherent through
-	 * direct access in memory with previous cached writes through
-	 * shmemfs and that our cache domain tracking remains valid.
-	 * For example, if the obj->filp was moved to swap without us
-	 * being notified and releasing the pages, we would mistakenly
-	 * continue to assume that the obj remained out of the CPU cached
-	 * domain.
-	 */
-	ret = i915_gem_object_pin_pages(obj);
-	if (ret)
-		return ret;
-
-	flush_write_domain(obj, ~I915_GEM_DOMAIN_WC);
-
-	/* Serialise direct access to this object with the barriers for
-	 * coherent writes from the GPU, by effectively invalidating the
-	 * WC domain upon first access.
-	 */
-	if ((obj->read_domains & I915_GEM_DOMAIN_WC) == 0)
-		mb();
-
-	/* It should now be out of any other write domains, and we can update
-	 * the domain values for our changes.
-	 */
-	GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_WC) != 0);
-	obj->read_domains |= I915_GEM_DOMAIN_WC;
-	if (write) {
-		obj->read_domains = I915_GEM_DOMAIN_WC;
-		obj->write_domain = I915_GEM_DOMAIN_WC;
-		obj->mm.dirty = true;
-	}
-
-	i915_gem_object_unpin_pages(obj);
-	return 0;
-}
-
-/**
- * Moves a single object to the GTT read, and possibly write domain.
- * @obj: object to act on
- * @write: ask for write access or read only
- *
- * This function returns when the move is complete, including waiting on
- * flushes to occur.
- */
-int
-i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
-{
-	int ret;
-
-	lockdep_assert_held(&obj->base.dev->struct_mutex);
-
-	ret = i915_gem_object_wait(obj,
-				   I915_WAIT_INTERRUPTIBLE |
-				   I915_WAIT_LOCKED |
-				   (write ? I915_WAIT_ALL : 0),
-				   MAX_SCHEDULE_TIMEOUT);
-	if (ret)
-		return ret;
-
-	if (obj->write_domain == I915_GEM_DOMAIN_GTT)
-		return 0;
-
-	/* Flush and acquire obj->pages so that we are coherent through
-	 * direct access in memory with previous cached writes through
-	 * shmemfs and that our cache domain tracking remains valid.
-	 * For example, if the obj->filp was moved to swap without us
-	 * being notified and releasing the pages, we would mistakenly
-	 * continue to assume that the obj remained out of the CPU cached
-	 * domain.
-	 */
-	ret = i915_gem_object_pin_pages(obj);
-	if (ret)
-		return ret;
-
-	flush_write_domain(obj, ~I915_GEM_DOMAIN_GTT);
-
-	/* Serialise direct access to this object with the barriers for
-	 * coherent writes from the GPU, by effectively invalidating the
-	 * GTT domain upon first access.
-	 */
-	if ((obj->read_domains & I915_GEM_DOMAIN_GTT) == 0)
-		mb();
-
-	/* It should now be out of any other write domains, and we can update
-	 * the domain values for our changes.
-	 */
-	GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
-	obj->read_domains |= I915_GEM_DOMAIN_GTT;
-	if (write) {
-		obj->read_domains = I915_GEM_DOMAIN_GTT;
-		obj->write_domain = I915_GEM_DOMAIN_GTT;
-		obj->mm.dirty = true;
-	}
-
-	i915_gem_object_unpin_pages(obj);
-	return 0;
-}
-
-/**
- * Changes the cache-level of an object across all VMA.
- * @obj: object to act on
- * @cache_level: new cache level to set for the object
- *
- * After this function returns, the object will be in the new cache-level
- * across all GTT and the contents of the backing storage will be coherent,
- * with respect to the new cache-level. In order to keep the backing storage
- * coherent for all users, we only allow a single cache level to be set
- * globally on the object and prevent it from being changed whilst the
- * hardware is reading from the object. That is if the object is currently
- * on the scanout it will be set to uncached (or equivalent display
- * cache coherency) and all non-MOCS GPU access will also be uncached so
- * that all direct access to the scanout remains coherent.
- */
-int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
-				    enum i915_cache_level cache_level)
-{
-	struct i915_vma *vma;
-	int ret;
-
-	lockdep_assert_held(&obj->base.dev->struct_mutex);
-
-	if (obj->cache_level == cache_level)
-		return 0;
-
-	/* Inspect the list of currently bound VMA and unbind any that would
-	 * be invalid given the new cache-level. This is principally to
-	 * catch the issue of the CS prefetch crossing page boundaries and
-	 * reading an invalid PTE on older architectures.
-	 */
-restart:
-	list_for_each_entry(vma, &obj->vma.list, obj_link) {
-		if (!drm_mm_node_allocated(&vma->node))
-			continue;
-
-		if (i915_vma_is_pinned(vma)) {
-			DRM_DEBUG("can not change the cache level of pinned objects\n");
-			return -EBUSY;
-		}
-
-		if (!i915_vma_is_closed(vma) &&
-		    i915_gem_valid_gtt_space(vma, cache_level))
-			continue;
-
-		ret = i915_vma_unbind(vma);
-		if (ret)
-			return ret;
-
-		/* As unbinding may affect other elements in the
-		 * obj->vma_list (due to side-effects from retiring
-		 * an active vma), play safe and restart the iterator.
-		 */
-		goto restart;
-	}
-
-	/* We can reuse the existing drm_mm nodes but need to change the
-	 * cache-level on the PTE. We could simply unbind them all and
-	 * rebind with the correct cache-level on next use. However since
-	 * we already have a valid slot, dma mapping, pages etc, we may as
-	 * rewrite the PTE in the belief that doing so tramples upon less
-	 * state and so involves less work.
-	 */
-	if (obj->bind_count) {
-		/* Before we change the PTE, the GPU must not be accessing it.
-		 * If we wait upon the object, we know that all the bound
-		 * VMA are no longer active.
-		 */
-		ret = i915_gem_object_wait(obj,
-					   I915_WAIT_INTERRUPTIBLE |
-					   I915_WAIT_LOCKED |
-					   I915_WAIT_ALL,
-					   MAX_SCHEDULE_TIMEOUT);
-		if (ret)
-			return ret;
-
-		if (!HAS_LLC(to_i915(obj->base.dev)) &&
-		    cache_level != I915_CACHE_NONE) {
-			/* Access to snoopable pages through the GTT is
-			 * incoherent and on some machines causes a hard
-			 * lockup. Relinquish the CPU mmaping to force
-			 * userspace to refault in the pages and we can
-			 * then double check if the GTT mapping is still
-			 * valid for that pointer access.
-			 */
-			i915_gem_release_mmap(obj);
-
-			/* As we no longer need a fence for GTT access,
-			 * we can relinquish it now (and so prevent having
-			 * to steal a fence from someone else on the next
-			 * fence request). Note GPU activity would have
-			 * dropped the fence as all snoopable access is
-			 * supposed to be linear.
-			 */
-			for_each_ggtt_vma(vma, obj) {
-				ret = i915_vma_put_fence(vma);
-				if (ret)
-					return ret;
-			}
-		} else {
-			/* We either have incoherent backing store and
-			 * so no GTT access or the architecture is fully
-			 * coherent. In such cases, existing GTT mmaps
-			 * ignore the cache bit in the PTE and we can
-			 * rewrite it without confusing the GPU or having
-			 * to force userspace to fault back in its mmaps.
-			 */
-		}
-
-		list_for_each_entry(vma, &obj->vma.list, obj_link) {
-			if (!drm_mm_node_allocated(&vma->node))
-				continue;
-
-			ret = i915_vma_bind(vma, cache_level, PIN_UPDATE);
-			if (ret)
-				return ret;
-		}
-	}
-
-	list_for_each_entry(vma, &obj->vma.list, obj_link)
-		vma->node.color = cache_level;
-	i915_gem_object_set_cache_coherency(obj, cache_level);
-	obj->cache_dirty = true; /* Always invalidate stale cachelines */
-
-	return 0;
-}
-
-int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
-			       struct drm_file *file)
-{
-	struct drm_i915_gem_caching *args = data;
-	struct drm_i915_gem_object *obj;
-	int err = 0;
-
-	rcu_read_lock();
-	obj = i915_gem_object_lookup_rcu(file, args->handle);
-	if (!obj) {
-		err = -ENOENT;
-		goto out;
-	}
-
-	switch (obj->cache_level) {
-	case I915_CACHE_LLC:
-	case I915_CACHE_L3_LLC:
-		args->caching = I915_CACHING_CACHED;
-		break;
-
-	case I915_CACHE_WT:
-		args->caching = I915_CACHING_DISPLAY;
-		break;
-
-	default:
-		args->caching = I915_CACHING_NONE;
-		break;
-	}
-out:
-	rcu_read_unlock();
-	return err;
-}
-
-int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
-			       struct drm_file *file)
-{
-	struct drm_i915_private *i915 = to_i915(dev);
-	struct drm_i915_gem_caching *args = data;
-	struct drm_i915_gem_object *obj;
-	enum i915_cache_level level;
-	int ret = 0;
-
-	switch (args->caching) {
-	case I915_CACHING_NONE:
-		level = I915_CACHE_NONE;
-		break;
-	case I915_CACHING_CACHED:
-		/*
-		 * Due to a HW issue on BXT A stepping, GPU stores via a
-		 * snooped mapping may leave stale data in a corresponding CPU
-		 * cacheline, whereas normally such cachelines would get
-		 * invalidated.
-		 */
-		if (!HAS_LLC(i915) && !HAS_SNOOP(i915))
-			return -ENODEV;
-
-		level = I915_CACHE_LLC;
-		break;
-	case I915_CACHING_DISPLAY:
-		level = HAS_WT(i915) ? I915_CACHE_WT : I915_CACHE_NONE;
-		break;
-	default:
-		return -EINVAL;
-	}
-
-	obj = i915_gem_object_lookup(file, args->handle);
-	if (!obj)
-		return -ENOENT;
-
-	/*
-	 * The caching mode of proxy object is handled by its generator, and
-	 * not allowed to be changed by userspace.
-	 */
-	if (i915_gem_object_is_proxy(obj)) {
-		ret = -ENXIO;
-		goto out;
-	}
-
-	if (obj->cache_level == level)
-		goto out;
-
-	ret = i915_gem_object_wait(obj,
-				   I915_WAIT_INTERRUPTIBLE,
-				   MAX_SCHEDULE_TIMEOUT);
-	if (ret)
-		goto out;
-
-	ret = i915_mutex_lock_interruptible(dev);
-	if (ret)
-		goto out;
-
-	ret = i915_gem_object_set_cache_level(obj, level);
-	mutex_unlock(&dev->struct_mutex);
-
-out:
-	i915_gem_object_put(obj);
-	return ret;
-}
-
-/*
- * Prepare buffer for display plane (scanout, cursors, etc). Can be called from
- * an uninterruptible phase (modesetting) and allows any flushes to be pipelined
- * (for pageflips). We only flush the caches while preparing the buffer for
- * display, the callers are responsible for frontbuffer flush.
- */
-struct i915_vma *
-i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
-				     u32 alignment,
-				     const struct i915_ggtt_view *view,
-				     unsigned int flags)
-{
-	struct i915_vma *vma;
-	int ret;
-
-	lockdep_assert_held(&obj->base.dev->struct_mutex);
-
-	/* Mark the global pin early so that we account for the
-	 * display coherency whilst setting up the cache domains.
-	 */
-	obj->pin_global++;
-
-	/* The display engine is not coherent with the LLC cache on gen6.  As
-	 * a result, we make sure that the pinning that is about to occur is
-	 * done with uncached PTEs. This is lowest common denominator for all
-	 * chipsets.
-	 *
-	 * However for gen6+, we could do better by using the GFDT bit instead
-	 * of uncaching, which would allow us to flush all the LLC-cached data
-	 * with that bit in the PTE to main memory with just one PIPE_CONTROL.
-	 */
-	ret = i915_gem_object_set_cache_level(obj,
-					      HAS_WT(to_i915(obj->base.dev)) ?
-					      I915_CACHE_WT : I915_CACHE_NONE);
-	if (ret) {
-		vma = ERR_PTR(ret);
-		goto err_unpin_global;
-	}
-
-	/* As the user may map the buffer once pinned in the display plane
-	 * (e.g. libkms for the bootup splash), we have to ensure that we
-	 * always use map_and_fenceable for all scanout buffers. However,
-	 * it may simply be too big to fit into mappable, in which case
-	 * put it anyway and hope that userspace can cope (but always first
-	 * try to preserve the existing ABI).
-	 */
-	vma = ERR_PTR(-ENOSPC);
-	if ((flags & PIN_MAPPABLE) == 0 &&
-	    (!view || view->type == I915_GGTT_VIEW_NORMAL))
-		vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment,
-					       flags |
-					       PIN_MAPPABLE |
-					       PIN_NONBLOCK);
-	if (IS_ERR(vma))
-		vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, flags);
-	if (IS_ERR(vma))
-		goto err_unpin_global;
-
-	vma->display_alignment = max_t(u64, vma->display_alignment, alignment);
-
-	__i915_gem_object_flush_for_display(obj);
-
-	/* It should now be out of any other write domains, and we can update
-	 * the domain values for our changes.
-	 */
-	obj->read_domains |= I915_GEM_DOMAIN_GTT;
-
-	return vma;
-
-err_unpin_global:
-	obj->pin_global--;
-	return vma;
-}
-
-void
-i915_gem_object_unpin_from_display_plane(struct i915_vma *vma)
-{
-	lockdep_assert_held(&vma->vm->i915->drm.struct_mutex);
-
-	if (WARN_ON(vma->obj->pin_global == 0))
-		return;
-
-	if (--vma->obj->pin_global == 0)
-		vma->display_alignment = I915_GTT_MIN_ALIGNMENT;
-
-	/* Bump the LRU to try and avoid premature eviction whilst flipping  */
-	i915_gem_object_bump_inactive_ggtt(vma->obj);
-
-	i915_vma_unpin(vma);
-}
-
-/**
- * Moves a single object to the CPU read, and possibly write domain.
- * @obj: object to act on
- * @write: requesting write or read-only access
- *
- * This function returns when the move is complete, including waiting on
- * flushes to occur.
- */
-int
-i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
-{
-	int ret;
-
-	lockdep_assert_held(&obj->base.dev->struct_mutex);
-
-	ret = i915_gem_object_wait(obj,
-				   I915_WAIT_INTERRUPTIBLE |
-				   I915_WAIT_LOCKED |
-				   (write ? I915_WAIT_ALL : 0),
-				   MAX_SCHEDULE_TIMEOUT);
-	if (ret)
-		return ret;
-
-	flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
-
-	/* Flush the CPU cache if it's still invalid. */
-	if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) {
-		i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
-		obj->read_domains |= I915_GEM_DOMAIN_CPU;
-	}
-
-	/* It should now be out of any other write domains, and we can update
-	 * the domain values for our changes.
-	 */
-	GEM_BUG_ON(obj->write_domain & ~I915_GEM_DOMAIN_CPU);
-
-	/* If we're writing through the CPU, then the GPU read domains will
-	 * need to be invalidated at next use.
-	 */
-	if (write)
-		__start_cpu_write(obj);
-
-	return 0;
-}
-
-/* Throttle our rendering by waiting until the ring has completed our requests
- * emitted over 20 msec ago.
- *
- * Note that if we were to use the current jiffies each time around the loop,
- * we wouldn't escape the function with any frames outstanding if the time to
- * render a frame was over 20ms.
- *
- * This should get us reasonable parallelism between CPU and GPU but also
- * relatively low latency when blocking on a particular request to finish.
- */
-static int
-i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
-{
-	struct drm_i915_private *dev_priv = to_i915(dev);
-	struct drm_i915_file_private *file_priv = file->driver_priv;
-	unsigned long recent_enough = jiffies - DRM_I915_THROTTLE_JIFFIES;
-	struct i915_request *request, *target = NULL;
-	long ret;
-
-	/* ABI: return -EIO if already wedged */
-	ret = i915_terminally_wedged(dev_priv);
-	if (ret)
-		return ret;
-
-	spin_lock(&file_priv->mm.lock);
-	list_for_each_entry(request, &file_priv->mm.request_list, client_link) {
-		if (time_after_eq(request->emitted_jiffies, recent_enough))
-			break;
-
-		if (target) {
-			list_del(&target->client_link);
-			target->file_priv = NULL;
-		}
-
-		target = request;
-	}
-	if (target)
-		i915_request_get(target);
-	spin_unlock(&file_priv->mm.lock);
-
-	if (target == NULL)
-		return 0;
-
-	ret = i915_request_wait(target,
-				I915_WAIT_INTERRUPTIBLE,
-				MAX_SCHEDULE_TIMEOUT);
-	i915_request_put(target);
-
-	return ret < 0 ? ret : 0;
-}
-
 struct i915_vma *
 i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
 			 const struct i915_ggtt_view *view,
@@ -3842,146 +1096,11 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
 	return vma;
 }
 
-static __always_inline u32 __busy_read_flag(u8 id)
-{
-	if (id == (u8)I915_ENGINE_CLASS_INVALID)
-		return 0xffff0000u;
-
-	GEM_BUG_ON(id >= 16);
-	return 0x10000u << id;
-}
-
-static __always_inline u32 __busy_write_id(u8 id)
-{
-	/*
-	 * The uABI guarantees an active writer is also amongst the read
-	 * engines. This would be true if we accessed the activity tracking
-	 * under the lock, but as we perform the lookup of the object and
-	 * its activity locklessly we can not guarantee that the last_write
-	 * being active implies that we have set the same engine flag from
-	 * last_read - hence we always set both read and write busy for
-	 * last_write.
-	 */
-	if (id == (u8)I915_ENGINE_CLASS_INVALID)
-		return 0xffffffffu;
-
-	return (id + 1) | __busy_read_flag(id);
-}
-
-static __always_inline unsigned int
-__busy_set_if_active(const struct dma_fence *fence, u32 (*flag)(u8 id))
-{
-	const struct i915_request *rq;
-
-	/*
-	 * We have to check the current hw status of the fence as the uABI
-	 * guarantees forward progress. We could rely on the idle worker
-	 * to eventually flush us, but to minimise latency just ask the
-	 * hardware.
-	 *
-	 * Note we only report on the status of native fences.
-	 */
-	if (!dma_fence_is_i915(fence))
-		return 0;
-
-	/* opencode to_request() in order to avoid const warnings */
-	rq = container_of(fence, const struct i915_request, fence);
-	if (i915_request_completed(rq))
-		return 0;
-
-	/* Beware type-expansion follies! */
-	BUILD_BUG_ON(!typecheck(u8, rq->engine->uabi_class));
-	return flag(rq->engine->uabi_class);
-}
-
-static __always_inline unsigned int
-busy_check_reader(const struct dma_fence *fence)
-{
-	return __busy_set_if_active(fence, __busy_read_flag);
-}
-
-static __always_inline unsigned int
-busy_check_writer(const struct dma_fence *fence)
-{
-	if (!fence)
-		return 0;
-
-	return __busy_set_if_active(fence, __busy_write_id);
-}
-
-int
-i915_gem_busy_ioctl(struct drm_device *dev, void *data,
-		    struct drm_file *file)
-{
-	struct drm_i915_gem_busy *args = data;
-	struct drm_i915_gem_object *obj;
-	struct reservation_object_list *list;
-	unsigned int seq;
-	int err;
-
-	err = -ENOENT;
-	rcu_read_lock();
-	obj = i915_gem_object_lookup_rcu(file, args->handle);
-	if (!obj)
-		goto out;
-
-	/*
-	 * A discrepancy here is that we do not report the status of
-	 * non-i915 fences, i.e. even though we may report the object as idle,
-	 * a call to set-domain may still stall waiting for foreign rendering.
-	 * This also means that wait-ioctl may report an object as busy,
-	 * where busy-ioctl considers it idle.
-	 *
-	 * We trade the ability to warn of foreign fences to report on which
-	 * i915 engines are active for the object.
-	 *
-	 * Alternatively, we can trade that extra information on read/write
-	 * activity with
-	 *	args->busy =
-	 *		!reservation_object_test_signaled_rcu(obj->resv, true);
-	 * to report the overall busyness. This is what the wait-ioctl does.
-	 *
-	 */
-retry:
-	seq = raw_read_seqcount(&obj->resv->seq);
-
-	/* Translate the exclusive fence to the READ *and* WRITE engine */
-	args->busy = busy_check_writer(rcu_dereference(obj->resv->fence_excl));
-
-	/* Translate shared fences to READ set of engines */
-	list = rcu_dereference(obj->resv->fence);
-	if (list) {
-		unsigned int shared_count = list->shared_count, i;
-
-		for (i = 0; i < shared_count; ++i) {
-			struct dma_fence *fence =
-				rcu_dereference(list->shared[i]);
-
-			args->busy |= busy_check_reader(fence);
-		}
-	}
-
-	if (args->busy && read_seqcount_retry(&obj->resv->seq, seq))
-		goto retry;
-
-	err = 0;
-out:
-	rcu_read_unlock();
-	return err;
-}
-
-int
-i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
-			struct drm_file *file_priv)
-{
-	return i915_gem_ring_throttle(dev, file_priv);
-}
-
 int
 i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
 		       struct drm_file *file_priv)
 {
-	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct drm_i915_private *i915 = to_i915(dev);
 	struct drm_i915_gem_madvise *args = data;
 	struct drm_i915_gem_object *obj;
 	int err;
@@ -4004,7 +1123,7 @@ i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
 
 	if (i915_gem_object_has_pages(obj) &&
 	    i915_gem_object_is_tiled(obj) &&
-	    dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) {
+	    i915->quirks & QUIRK_PIN_SWIZZLED_PAGES) {
 		if (obj->mm.madv == I915_MADV_WILLNEED) {
 			GEM_BUG_ON(!obj->mm.quirked);
 			__i915_gem_object_unpin_pages(obj);
@@ -4020,6 +1139,24 @@ i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
 	if (obj->mm.madv != __I915_MADV_PURGED)
 		obj->mm.madv = args->madv;
 
+	if (i915_gem_object_has_pages(obj)) {
+		struct list_head *list;
+
+		if (i915_gem_object_is_shrinkable(obj)) {
+			unsigned long flags;
+
+			spin_lock_irqsave(&i915->mm.obj_lock, flags);
+
+			if (obj->mm.madv != I915_MADV_WILLNEED)
+				list = &i915->mm.purge_list;
+			else
+				list = &i915->mm.shrink_list;
+			list_move_tail(&obj->mm.link, list);
+
+			spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
+		}
+	}
+
 	/* if the object is no longer attached, discard its backing storage */
 	if (obj->mm.madv == I915_MADV_DONTNEED &&
 	    !i915_gem_object_has_pages(obj))
@@ -4033,355 +1170,13 @@ out:
 	return err;
 }
 
-static void
-frontbuffer_retire(struct i915_active_request *active,
-		   struct i915_request *request)
-{
-	struct drm_i915_gem_object *obj =
-		container_of(active, typeof(*obj), frontbuffer_write);
-
-	intel_fb_obj_flush(obj, ORIGIN_CS);
-}
-
-void i915_gem_object_init(struct drm_i915_gem_object *obj,
-			  const struct drm_i915_gem_object_ops *ops)
-{
-	mutex_init(&obj->mm.lock);
-
-	spin_lock_init(&obj->vma.lock);
-	INIT_LIST_HEAD(&obj->vma.list);
-
-	INIT_LIST_HEAD(&obj->lut_list);
-	INIT_LIST_HEAD(&obj->batch_pool_link);
-
-	init_rcu_head(&obj->rcu);
-
-	obj->ops = ops;
-
-	reservation_object_init(&obj->__builtin_resv);
-	obj->resv = &obj->__builtin_resv;
-
-	obj->frontbuffer_ggtt_origin = ORIGIN_GTT;
-	i915_active_request_init(&obj->frontbuffer_write,
-				 NULL, frontbuffer_retire);
-
-	obj->mm.madv = I915_MADV_WILLNEED;
-	INIT_RADIX_TREE(&obj->mm.get_page.radix, GFP_KERNEL | __GFP_NOWARN);
-	mutex_init(&obj->mm.get_page.lock);
-
-	i915_gem_info_add_obj(to_i915(obj->base.dev), obj->base.size);
-}
-
-static const struct drm_i915_gem_object_ops i915_gem_object_ops = {
-	.flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE |
-		 I915_GEM_OBJECT_IS_SHRINKABLE,
-
-	.get_pages = i915_gem_object_get_pages_gtt,
-	.put_pages = i915_gem_object_put_pages_gtt,
-
-	.pwrite = i915_gem_object_pwrite_gtt,
-};
-
-static int i915_gem_object_create_shmem(struct drm_device *dev,
-					struct drm_gem_object *obj,
-					size_t size)
-{
-	struct drm_i915_private *i915 = to_i915(dev);
-	unsigned long flags = VM_NORESERVE;
-	struct file *filp;
-
-	drm_gem_private_object_init(dev, obj, size);
-
-	if (i915->mm.gemfs)
-		filp = shmem_file_setup_with_mnt(i915->mm.gemfs, "i915", size,
-						 flags);
-	else
-		filp = shmem_file_setup("i915", size, flags);
-
-	if (IS_ERR(filp))
-		return PTR_ERR(filp);
-
-	obj->filp = filp;
-
-	return 0;
-}
-
-struct drm_i915_gem_object *
-i915_gem_object_create(struct drm_i915_private *dev_priv, u64 size)
-{
-	struct drm_i915_gem_object *obj;
-	struct address_space *mapping;
-	unsigned int cache_level;
-	gfp_t mask;
-	int ret;
-
-	/* There is a prevalence of the assumption that we fit the object's
-	 * page count inside a 32bit _signed_ variable. Let's document this and
-	 * catch if we ever need to fix it. In the meantime, if you do spot
-	 * such a local variable, please consider fixing!
-	 */
-	if (size >> PAGE_SHIFT > INT_MAX)
-		return ERR_PTR(-E2BIG);
-
-	if (overflows_type(size, obj->base.size))
-		return ERR_PTR(-E2BIG);
-
-	obj = i915_gem_object_alloc();
-	if (obj == NULL)
-		return ERR_PTR(-ENOMEM);
-
-	ret = i915_gem_object_create_shmem(&dev_priv->drm, &obj->base, size);
-	if (ret)
-		goto fail;
-
-	mask = GFP_HIGHUSER | __GFP_RECLAIMABLE;
-	if (IS_I965GM(dev_priv) || IS_I965G(dev_priv)) {
-		/* 965gm cannot relocate objects above 4GiB. */
-		mask &= ~__GFP_HIGHMEM;
-		mask |= __GFP_DMA32;
-	}
-
-	mapping = obj->base.filp->f_mapping;
-	mapping_set_gfp_mask(mapping, mask);
-	GEM_BUG_ON(!(mapping_gfp_mask(mapping) & __GFP_RECLAIM));
-
-	i915_gem_object_init(obj, &i915_gem_object_ops);
-
-	obj->write_domain = I915_GEM_DOMAIN_CPU;
-	obj->read_domains = I915_GEM_DOMAIN_CPU;
-
-	if (HAS_LLC(dev_priv))
-		/* On some devices, we can have the GPU use the LLC (the CPU
-		 * cache) for about a 10% performance improvement
-		 * compared to uncached.  Graphics requests other than
-		 * display scanout are coherent with the CPU in
-		 * accessing this cache.  This means in this mode we
-		 * don't need to clflush on the CPU side, and on the
-		 * GPU side we only need to flush internal caches to
-		 * get data visible to the CPU.
-		 *
-		 * However, we maintain the display planes as UC, and so
-		 * need to rebind when first used as such.
-		 */
-		cache_level = I915_CACHE_LLC;
-	else
-		cache_level = I915_CACHE_NONE;
-
-	i915_gem_object_set_cache_coherency(obj, cache_level);
-
-	trace_i915_gem_object_create(obj);
-
-	return obj;
-
-fail:
-	i915_gem_object_free(obj);
-	return ERR_PTR(ret);
-}
-
-static bool discard_backing_storage(struct drm_i915_gem_object *obj)
-{
-	/* If we are the last user of the backing storage (be it shmemfs
-	 * pages or stolen etc), we know that the pages are going to be
-	 * immediately released. In this case, we can then skip copying
-	 * back the contents from the GPU.
-	 */
-
-	if (obj->mm.madv != I915_MADV_WILLNEED)
-		return false;
-
-	if (obj->base.filp == NULL)
-		return true;
-
-	/* At first glance, this looks racy, but then again so would be
-	 * userspace racing mmap against close. However, the first external
-	 * reference to the filp can only be obtained through the
-	 * i915_gem_mmap_ioctl() which safeguards us against the user
-	 * acquiring such a reference whilst we are in the middle of
-	 * freeing the object.
-	 */
-	return file_count(obj->base.filp) == 1;
-}
-
-static void __i915_gem_free_objects(struct drm_i915_private *i915,
-				    struct llist_node *freed)
-{
-	struct drm_i915_gem_object *obj, *on;
-	intel_wakeref_t wakeref;
-
-	wakeref = intel_runtime_pm_get(i915);
-	llist_for_each_entry_safe(obj, on, freed, freed) {
-		struct i915_vma *vma, *vn;
-
-		trace_i915_gem_object_destroy(obj);
-
-		mutex_lock(&i915->drm.struct_mutex);
-
-		GEM_BUG_ON(i915_gem_object_is_active(obj));
-		list_for_each_entry_safe(vma, vn, &obj->vma.list, obj_link) {
-			GEM_BUG_ON(i915_vma_is_active(vma));
-			vma->flags &= ~I915_VMA_PIN_MASK;
-			i915_vma_destroy(vma);
-		}
-		GEM_BUG_ON(!list_empty(&obj->vma.list));
-		GEM_BUG_ON(!RB_EMPTY_ROOT(&obj->vma.tree));
-
-		/* This serializes freeing with the shrinker. Since the free
-		 * is delayed, first by RCU then by the workqueue, we want the
-		 * shrinker to be able to free pages of unreferenced objects,
-		 * or else we may oom whilst there are plenty of deferred
-		 * freed objects.
-		 */
-		if (i915_gem_object_has_pages(obj)) {
-			spin_lock(&i915->mm.obj_lock);
-			list_del_init(&obj->mm.link);
-			spin_unlock(&i915->mm.obj_lock);
-		}
-
-		mutex_unlock(&i915->drm.struct_mutex);
-
-		GEM_BUG_ON(obj->bind_count);
-		GEM_BUG_ON(obj->userfault_count);
-		GEM_BUG_ON(atomic_read(&obj->frontbuffer_bits));
-		GEM_BUG_ON(!list_empty(&obj->lut_list));
-
-		if (obj->ops->release)
-			obj->ops->release(obj);
-
-		if (WARN_ON(i915_gem_object_has_pinned_pages(obj)))
-			atomic_set(&obj->mm.pages_pin_count, 0);
-		__i915_gem_object_put_pages(obj, I915_MM_NORMAL);
-		GEM_BUG_ON(i915_gem_object_has_pages(obj));
-
-		if (obj->base.import_attach)
-			drm_prime_gem_destroy(&obj->base, NULL);
-
-		reservation_object_fini(&obj->__builtin_resv);
-		drm_gem_object_release(&obj->base);
-		i915_gem_info_remove_obj(i915, obj->base.size);
-
-		bitmap_free(obj->bit_17);
-		i915_gem_object_free(obj);
-
-		GEM_BUG_ON(!atomic_read(&i915->mm.free_count));
-		atomic_dec(&i915->mm.free_count);
-
-		if (on)
-			cond_resched();
-	}
-	intel_runtime_pm_put(i915, wakeref);
-}
-
-static void i915_gem_flush_free_objects(struct drm_i915_private *i915)
-{
-	struct llist_node *freed;
-
-	/* Free the oldest, most stale object to keep the free_list short */
-	freed = NULL;
-	if (!llist_empty(&i915->mm.free_list)) { /* quick test for hotpath */
-		/* Only one consumer of llist_del_first() allowed */
-		spin_lock(&i915->mm.free_lock);
-		freed = llist_del_first(&i915->mm.free_list);
-		spin_unlock(&i915->mm.free_lock);
-	}
-	if (unlikely(freed)) {
-		freed->next = NULL;
-		__i915_gem_free_objects(i915, freed);
-	}
-}
-
-static void __i915_gem_free_work(struct work_struct *work)
-{
-	struct drm_i915_private *i915 =
-		container_of(work, struct drm_i915_private, mm.free_work);
-	struct llist_node *freed;
-
-	/*
-	 * All file-owned VMA should have been released by this point through
-	 * i915_gem_close_object(), or earlier by i915_gem_context_close().
-	 * However, the object may also be bound into the global GTT (e.g.
-	 * older GPUs without per-process support, or for direct access through
-	 * the GTT either for the user or for scanout). Those VMA still need to
-	 * unbound now.
-	 */
-
-	spin_lock(&i915->mm.free_lock);
-	while ((freed = llist_del_all(&i915->mm.free_list))) {
-		spin_unlock(&i915->mm.free_lock);
-
-		__i915_gem_free_objects(i915, freed);
-		if (need_resched())
-			return;
-
-		spin_lock(&i915->mm.free_lock);
-	}
-	spin_unlock(&i915->mm.free_lock);
-}
-
-static void __i915_gem_free_object_rcu(struct rcu_head *head)
-{
-	struct drm_i915_gem_object *obj =
-		container_of(head, typeof(*obj), rcu);
-	struct drm_i915_private *i915 = to_i915(obj->base.dev);
-
-	/*
-	 * We reuse obj->rcu for the freed list, so we had better not treat
-	 * it like a rcu_head from this point forwards. And we expect all
-	 * objects to be freed via this path.
-	 */
-	destroy_rcu_head(&obj->rcu);
-
-	/*
-	 * Since we require blocking on struct_mutex to unbind the freed
-	 * object from the GPU before releasing resources back to the
-	 * system, we can not do that directly from the RCU callback (which may
-	 * be a softirq context), but must instead then defer that work onto a
-	 * kthread. We use the RCU callback rather than move the freed object
-	 * directly onto the work queue so that we can mix between using the
-	 * worker and performing frees directly from subsequent allocations for
-	 * crude but effective memory throttling.
-	 */
-	if (llist_add(&obj->freed, &i915->mm.free_list))
-		queue_work(i915->wq, &i915->mm.free_work);
-}
-
-void i915_gem_free_object(struct drm_gem_object *gem_obj)
-{
-	struct drm_i915_gem_object *obj = to_intel_bo(gem_obj);
-
-	if (obj->mm.quirked)
-		__i915_gem_object_unpin_pages(obj);
-
-	if (discard_backing_storage(obj))
-		obj->mm.madv = I915_MADV_DONTNEED;
-
-	/*
-	 * Before we free the object, make sure any pure RCU-only
-	 * read-side critical sections are complete, e.g.
-	 * i915_gem_busy_ioctl(). For the corresponding synchronized
-	 * lookup see i915_gem_object_lookup_rcu().
-	 */
-	atomic_inc(&to_i915(obj->base.dev)->mm.free_count);
-	call_rcu(&obj->rcu, __i915_gem_free_object_rcu);
-}
-
-void __i915_gem_object_release_unless_active(struct drm_i915_gem_object *obj)
-{
-	lockdep_assert_held(&obj->base.dev->struct_mutex);
-
-	if (!i915_gem_object_has_active_reference(obj) &&
-	    i915_gem_object_is_active(obj))
-		i915_gem_object_set_active_reference(obj);
-	else
-		i915_gem_object_put(obj);
-}
-
 void i915_gem_sanitize(struct drm_i915_private *i915)
 {
 	intel_wakeref_t wakeref;
 
 	GEM_TRACE("\n");
 
-	wakeref = intel_runtime_pm_get(i915);
+	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
 	intel_uncore_forcewake_get(&i915->uncore, FORCEWAKE_ALL);
 
 	/*
@@ -4401,141 +1196,10 @@ void i915_gem_sanitize(struct drm_i915_private *i915)
 	 * it may impact the display and we are uncertain about the stability
 	 * of the reset, so this could be applied to even earlier gen.
 	 */
-	intel_engines_sanitize(i915, false);
-
-	intel_uncore_forcewake_put(&i915->uncore, FORCEWAKE_ALL);
-	intel_runtime_pm_put(i915, wakeref);
-
-	mutex_lock(&i915->drm.struct_mutex);
-	i915_gem_contexts_lost(i915);
-	mutex_unlock(&i915->drm.struct_mutex);
-}
-
-void i915_gem_suspend(struct drm_i915_private *i915)
-{
-	intel_wakeref_t wakeref;
-
-	GEM_TRACE("\n");
-
-	wakeref = intel_runtime_pm_get(i915);
-
-	flush_workqueue(i915->wq);
-
-	mutex_lock(&i915->drm.struct_mutex);
-
-	/*
-	 * We have to flush all the executing contexts to main memory so
-	 * that they can saved in the hibernation image. To ensure the last
-	 * context image is coherent, we have to switch away from it. That
-	 * leaves the i915->kernel_context still active when
-	 * we actually suspend, and its image in memory may not match the GPU
-	 * state. Fortunately, the kernel_context is disposable and we do
-	 * not rely on its state.
-	 */
-	switch_to_kernel_context_sync(i915, i915->gt.active_engines);
-
-	mutex_unlock(&i915->drm.struct_mutex);
-	i915_reset_flush(i915);
-
-	drain_delayed_work(&i915->gt.retire_work);
-
-	/*
-	 * As the idle_work is rearming if it detects a race, play safe and
-	 * repeat the flush until it is definitely idle.
-	 */
-	drain_delayed_work(&i915->gt.idle_work);
-
-	/*
-	 * Assert that we successfully flushed all the work and
-	 * reset the GPU back to its idle, low power state.
-	 */
-	GEM_BUG_ON(i915->gt.awake);
-
-	intel_uc_suspend(i915);
-
-	intel_runtime_pm_put(i915, wakeref);
-}
-
-void i915_gem_suspend_late(struct drm_i915_private *i915)
-{
-	struct drm_i915_gem_object *obj;
-	struct list_head *phases[] = {
-		&i915->mm.unbound_list,
-		&i915->mm.bound_list,
-		NULL
-	}, **phase;
-
-	/*
-	 * Neither the BIOS, ourselves or any other kernel
-	 * expects the system to be in execlists mode on startup,
-	 * so we need to reset the GPU back to legacy mode. And the only
-	 * known way to disable logical contexts is through a GPU reset.
-	 *
-	 * So in order to leave the system in a known default configuration,
-	 * always reset the GPU upon unload and suspend. Afterwards we then
-	 * clean up the GEM state tracking, flushing off the requests and
-	 * leaving the system in a known idle state.
-	 *
-	 * Note that is of the upmost importance that the GPU is idle and
-	 * all stray writes are flushed *before* we dismantle the backing
-	 * storage for the pinned objects.
-	 *
-	 * However, since we are uncertain that resetting the GPU on older
-	 * machines is a good idea, we don't - just in case it leaves the
-	 * machine in an unusable condition.
-	 */
-
-	mutex_lock(&i915->drm.struct_mutex);
-	for (phase = phases; *phase; phase++) {
-		list_for_each_entry(obj, *phase, mm.link)
-			WARN_ON(i915_gem_object_set_to_gtt_domain(obj, false));
-	}
-	mutex_unlock(&i915->drm.struct_mutex);
+	intel_gt_sanitize(i915, false);
 
-	intel_uc_sanitize(i915);
-	i915_gem_sanitize(i915);
-}
-
-void i915_gem_resume(struct drm_i915_private *i915)
-{
-	GEM_TRACE("\n");
-
-	WARN_ON(i915->gt.awake);
-
-	mutex_lock(&i915->drm.struct_mutex);
-	intel_uncore_forcewake_get(&i915->uncore, FORCEWAKE_ALL);
-
-	i915_gem_restore_gtt_mappings(i915);
-	i915_gem_restore_fences(i915);
-
-	/*
-	 * As we didn't flush the kernel context before suspend, we cannot
-	 * guarantee that the context image is complete. So let's just reset
-	 * it and start again.
-	 */
-	intel_gt_resume(i915);
-
-	if (i915_gem_init_hw(i915))
-		goto err_wedged;
-
-	intel_uc_resume(i915);
-
-	/* Always reload a context for powersaving. */
-	if (!load_power_context(i915))
-		goto err_wedged;
-
-out_unlock:
 	intel_uncore_forcewake_put(&i915->uncore, FORCEWAKE_ALL);
-	mutex_unlock(&i915->drm.struct_mutex);
-	return;
-
-err_wedged:
-	if (!i915_reset_failed(i915)) {
-		dev_err(i915->drm.dev,
-			"Failed to re-initialize GPU, declaring it wedged!\n");
-		i915_gem_set_wedged(i915);
-	}
-	goto out_unlock;
+	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
 }
 
 void i915_gem_init_swizzling(struct drm_i915_private *dev_priv)
@@ -4586,27 +1250,6 @@ static void init_unused_rings(struct drm_i915_private *dev_priv)
 	}
 }
 
-static int __i915_gem_restart_engines(void *data)
-{
-	struct drm_i915_private *i915 = data;
-	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
-	int err;
-
-	for_each_engine(engine, i915, id) {
-		err = engine->init_hw(engine);
-		if (err) {
-			DRM_ERROR("Failed to restart %s (%d)\n",
-				  engine->name, err);
-			return err;
-		}
-	}
-
-	intel_engines_set_scheduler_caps(i915);
-
-	return 0;
-}
-
 int i915_gem_init_hw(struct drm_i915_private *dev_priv)
 {
 	int ret;
@@ -4665,12 +1308,13 @@ int i915_gem_init_hw(struct drm_i915_private *dev_priv)
 	intel_mocs_init_l3cc_table(dev_priv);
 
 	/* Only when the HW is re-initialised, can we replay the requests */
-	ret = __i915_gem_restart_engines(dev_priv);
+	ret = intel_engines_resume(dev_priv);
 	if (ret)
 		goto cleanup_uc;
 
 	intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
 
+	intel_engines_set_scheduler_caps(dev_priv);
 	return 0;
 
 cleanup_uc:
@@ -4683,8 +1327,9 @@ out:
 
 static int __intel_engines_record_defaults(struct drm_i915_private *i915)
 {
-	struct i915_gem_context *ctx;
 	struct intel_engine_cs *engine;
+	struct i915_gem_context *ctx;
+	struct i915_gem_engines *e;
 	enum intel_engine_id id;
 	int err = 0;
 
@@ -4701,18 +1346,21 @@ static int __intel_engines_record_defaults(struct drm_i915_private *i915)
 	if (IS_ERR(ctx))
 		return PTR_ERR(ctx);
 
+	e = i915_gem_context_lock_engines(ctx);
+
 	for_each_engine(engine, i915, id) {
+		struct intel_context *ce = e->engines[id];
 		struct i915_request *rq;
 
-		rq = i915_request_alloc(engine, ctx);
+		rq = intel_context_create_request(ce);
 		if (IS_ERR(rq)) {
 			err = PTR_ERR(rq);
-			goto out_ctx;
+			goto err_active;
 		}
 
 		err = 0;
-		if (engine->init_context)
-			err = engine->init_context(rq);
+		if (rq->engine->init_context)
+			err = rq->engine->init_context(rq);
 
 		i915_request_add(rq);
 		if (err)
@@ -4720,21 +1368,16 @@ static int __intel_engines_record_defaults(struct drm_i915_private *i915)
 	}
 
 	/* Flush the default context image to memory, and enable powersaving. */
-	if (!load_power_context(i915)) {
+	if (!i915_gem_load_power_context(i915)) {
 		err = -EIO;
 		goto err_active;
 	}
 
 	for_each_engine(engine, i915, id) {
-		struct intel_context *ce;
-		struct i915_vma *state;
+		struct intel_context *ce = e->engines[id];
+		struct i915_vma *state = ce->state;
 		void *vaddr;
 
-		ce = intel_context_lookup(ctx, engine);
-		if (!ce)
-			continue;
-
-		state = ce->state;
 		if (!state)
 			continue;
 
@@ -4752,7 +1395,9 @@ static int __intel_engines_record_defaults(struct drm_i915_private *i915)
 		if (err)
 			goto err_active;
 
+		i915_gem_object_lock(state->obj);
 		err = i915_gem_object_set_to_cpu_domain(state->obj, false);
+		i915_gem_object_unlock(state->obj);
 		if (err)
 			goto err_active;
 
@@ -4790,6 +1435,7 @@ static int __intel_engines_record_defaults(struct drm_i915_private *i915)
 	}
 
 out_ctx:
+	i915_gem_context_unlock_engines(ctx);
 	i915_gem_context_set_closed(ctx);
 	i915_gem_context_put(ctx);
 	return err;
@@ -4842,6 +1488,23 @@ static void i915_gem_fini_scratch(struct drm_i915_private *i915)
 	i915_vma_unpin_and_release(&i915->gt.scratch, 0);
 }
 
+static int intel_engines_verify_workarounds(struct drm_i915_private *i915)
+{
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+	int err = 0;
+
+	if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
+		return 0;
+
+	for_each_engine(engine, i915, id) {
+		if (intel_engine_verify_workarounds(engine, "load"))
+			err = -EIO;
+	}
+
+	return err;
+}
+
 int i915_gem_init(struct drm_i915_private *dev_priv)
 {
 	int ret;
@@ -4853,11 +1516,6 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
 
 	dev_priv->mm.unordered_timeline = dma_fence_context_alloc(1);
 
-	if (HAS_LOGICAL_RING_CONTEXTS(dev_priv))
-		dev_priv->gt.cleanup_engine = intel_logical_ring_cleanup;
-	else
-		dev_priv->gt.cleanup_engine = intel_engine_cleanup;
-
 	i915_timelines_init(dev_priv);
 
 	ret = i915_gem_init_userptr(dev_priv);
@@ -4894,6 +1552,12 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
 		goto err_ggtt;
 	}
 
+	ret = intel_engines_setup(dev_priv);
+	if (ret) {
+		GEM_BUG_ON(ret == -EIO);
+		goto err_unlock;
+	}
+
 	ret = i915_gem_contexts_init(dev_priv);
 	if (ret) {
 		GEM_BUG_ON(ret == -EIO);
@@ -4927,6 +1591,10 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
 	 */
 	intel_init_clock_gating(dev_priv);
 
+	ret = intel_engines_verify_workarounds(dev_priv);
+	if (ret)
+		goto err_init_hw;
+
 	ret = __intel_engines_record_defaults(dev_priv);
 	if (ret)
 		goto err_init_hw;
@@ -4955,6 +1623,7 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
 err_init_hw:
 	mutex_unlock(&dev_priv->drm.struct_mutex);
 
+	i915_gem_set_wedged(dev_priv);
 	i915_gem_suspend(dev_priv);
 	i915_gem_suspend_late(dev_priv);
 
@@ -4967,7 +1636,7 @@ err_uc_init:
 err_pm:
 	if (ret != -EIO) {
 		intel_cleanup_gt_powersave(dev_priv);
-		i915_gem_cleanup_engines(dev_priv);
+		intel_engines_cleanup(dev_priv);
 	}
 err_context:
 	if (ret != -EIO)
@@ -5014,8 +1683,12 @@ err_uc_misc:
 	return ret;
 }
 
-void i915_gem_fini(struct drm_i915_private *dev_priv)
+void i915_gem_fini_hw(struct drm_i915_private *dev_priv)
 {
+	GEM_BUG_ON(dev_priv->gt.awake);
+
+	intel_wakeref_auto_fini(&dev_priv->ggtt.userfault_wakeref);
+
 	i915_gem_suspend_late(dev_priv);
 	intel_disable_gt_powersave(dev_priv);
 
@@ -5025,7 +1698,15 @@ void i915_gem_fini(struct drm_i915_private *dev_priv)
 	mutex_lock(&dev_priv->drm.struct_mutex);
 	intel_uc_fini_hw(dev_priv);
 	intel_uc_fini(dev_priv);
-	i915_gem_cleanup_engines(dev_priv);
+	mutex_unlock(&dev_priv->drm.struct_mutex);
+
+	i915_gem_drain_freed_objects(dev_priv);
+}
+
+void i915_gem_fini(struct drm_i915_private *dev_priv)
+{
+	mutex_lock(&dev_priv->drm.struct_mutex);
+	intel_engines_cleanup(dev_priv);
 	i915_gem_contexts_fini(dev_priv);
 	i915_gem_fini_scratch(dev_priv);
 	mutex_unlock(&dev_priv->drm.struct_mutex);
@@ -5048,77 +1729,32 @@ void i915_gem_init_mmio(struct drm_i915_private *i915)
 	i915_gem_sanitize(i915);
 }
 
-void
-i915_gem_cleanup_engines(struct drm_i915_private *dev_priv)
-{
-	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
-
-	for_each_engine(engine, dev_priv, id)
-		dev_priv->gt.cleanup_engine(engine);
-}
-
-void
-i915_gem_load_init_fences(struct drm_i915_private *dev_priv)
-{
-	int i;
-
-	if (INTEL_GEN(dev_priv) >= 7 && !IS_VALLEYVIEW(dev_priv) &&
-	    !IS_CHERRYVIEW(dev_priv))
-		dev_priv->num_fence_regs = 32;
-	else if (INTEL_GEN(dev_priv) >= 4 ||
-		 IS_I945G(dev_priv) || IS_I945GM(dev_priv) ||
-		 IS_G33(dev_priv) || IS_PINEVIEW(dev_priv))
-		dev_priv->num_fence_regs = 16;
-	else
-		dev_priv->num_fence_regs = 8;
-
-	if (intel_vgpu_active(dev_priv))
-		dev_priv->num_fence_regs =
-				I915_READ(vgtif_reg(avail_rs.fence_num));
-
-	/* Initialize fence registers to zero */
-	for (i = 0; i < dev_priv->num_fence_regs; i++) {
-		struct drm_i915_fence_reg *fence = &dev_priv->fence_regs[i];
-
-		fence->i915 = dev_priv;
-		fence->id = i;
-		list_add_tail(&fence->link, &dev_priv->mm.fence_list);
-	}
-	i915_gem_restore_fences(dev_priv);
-
-	i915_gem_detect_bit_6_swizzle(dev_priv);
-}
-
 static void i915_gem_init__mm(struct drm_i915_private *i915)
 {
-	spin_lock_init(&i915->mm.object_stat_lock);
 	spin_lock_init(&i915->mm.obj_lock);
 	spin_lock_init(&i915->mm.free_lock);
 
 	init_llist_head(&i915->mm.free_list);
 
-	INIT_LIST_HEAD(&i915->mm.unbound_list);
-	INIT_LIST_HEAD(&i915->mm.bound_list);
-	INIT_LIST_HEAD(&i915->mm.fence_list);
-	INIT_LIST_HEAD(&i915->mm.userfault_list);
+	INIT_LIST_HEAD(&i915->mm.purge_list);
+	INIT_LIST_HEAD(&i915->mm.shrink_list);
 
-	INIT_WORK(&i915->mm.free_work, __i915_gem_free_work);
+	i915_gem_init__objects(i915);
 }
 
 int i915_gem_init_early(struct drm_i915_private *dev_priv)
 {
 	int err;
 
+	intel_gt_pm_init(dev_priv);
+
 	INIT_LIST_HEAD(&dev_priv->gt.active_rings);
 	INIT_LIST_HEAD(&dev_priv->gt.closed_vma);
+	spin_lock_init(&dev_priv->gt.closed_lock);
 
 	i915_gem_init__mm(dev_priv);
+	i915_gem_init__pm(dev_priv);
 
-	INIT_DELAYED_WORK(&dev_priv->gt.retire_work,
-			  i915_gem_retire_work_handler);
-	INIT_DELAYED_WORK(&dev_priv->gt.idle_work,
-			  i915_gem_idle_work_handler);
 	init_waitqueue_head(&dev_priv->gpu_error.wait_queue);
 	init_waitqueue_head(&dev_priv->gpu_error.reset_queue);
 	mutex_init(&dev_priv->gpu_error.wedge_mutex);
@@ -5140,7 +1776,7 @@ void i915_gem_cleanup_early(struct drm_i915_private *dev_priv)
 	i915_gem_drain_freed_objects(dev_priv);
 	GEM_BUG_ON(!llist_empty(&dev_priv->mm.free_list));
 	GEM_BUG_ON(atomic_read(&dev_priv->mm.free_count));
-	WARN_ON(dev_priv->mm.object_count);
+	WARN_ON(dev_priv->mm.shrink_count);
 
 	cleanup_srcu_struct(&dev_priv->gpu_error.reset_backoff_srcu);
 
@@ -5160,11 +1796,7 @@ int i915_gem_freeze(struct drm_i915_private *dev_priv)
 int i915_gem_freeze_late(struct drm_i915_private *i915)
 {
 	struct drm_i915_gem_object *obj;
-	struct list_head *phases[] = {
-		&i915->mm.unbound_list,
-		&i915->mm.bound_list,
-		NULL
-	}, **phase;
+	intel_wakeref_t wakeref;
 
 	/*
 	 * Called just before we write the hibernation image.
@@ -5181,15 +1813,18 @@ int i915_gem_freeze_late(struct drm_i915_private *i915)
 	 * the objects as well, see i915_gem_freeze()
 	 */
 
-	i915_gem_shrink(i915, -1UL, NULL, I915_SHRINK_UNBOUND);
+	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
+
+	i915_gem_shrink(i915, -1UL, NULL, ~0);
 	i915_gem_drain_freed_objects(i915);
 
-	mutex_lock(&i915->drm.struct_mutex);
-	for (phase = phases; *phase; phase++) {
-		list_for_each_entry(obj, *phase, mm.link)
-			WARN_ON(i915_gem_object_set_to_cpu_domain(obj, true));
+	list_for_each_entry(obj, &i915->mm.shrink_list, mm.link) {
+		i915_gem_object_lock(obj);
+		WARN_ON(i915_gem_object_set_to_cpu_domain(obj, true));
+		i915_gem_object_unlock(obj);
 	}
-	mutex_unlock(&i915->drm.struct_mutex);
+
+	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
 
 	return 0;
 }
@@ -5270,276 +1905,7 @@ void i915_gem_track_fb(struct drm_i915_gem_object *old,
 	}
 }
 
-/* Allocate a new GEM object and fill it with the supplied data */
-struct drm_i915_gem_object *
-i915_gem_object_create_from_data(struct drm_i915_private *dev_priv,
-			         const void *data, size_t size)
-{
-	struct drm_i915_gem_object *obj;
-	struct file *file;
-	size_t offset;
-	int err;
-
-	obj = i915_gem_object_create(dev_priv, round_up(size, PAGE_SIZE));
-	if (IS_ERR(obj))
-		return obj;
-
-	GEM_BUG_ON(obj->write_domain != I915_GEM_DOMAIN_CPU);
-
-	file = obj->base.filp;
-	offset = 0;
-	do {
-		unsigned int len = min_t(typeof(size), size, PAGE_SIZE);
-		struct page *page;
-		void *pgdata, *vaddr;
-
-		err = pagecache_write_begin(file, file->f_mapping,
-					    offset, len, 0,
-					    &page, &pgdata);
-		if (err < 0)
-			goto fail;
-
-		vaddr = kmap(page);
-		memcpy(vaddr, data, len);
-		kunmap(page);
-
-		err = pagecache_write_end(file, file->f_mapping,
-					  offset, len, len,
-					  page, pgdata);
-		if (err < 0)
-			goto fail;
-
-		size -= len;
-		data += len;
-		offset += len;
-	} while (size);
-
-	return obj;
-
-fail:
-	i915_gem_object_put(obj);
-	return ERR_PTR(err);
-}
-
-struct scatterlist *
-i915_gem_object_get_sg(struct drm_i915_gem_object *obj,
-		       unsigned int n,
-		       unsigned int *offset)
-{
-	struct i915_gem_object_page_iter *iter = &obj->mm.get_page;
-	struct scatterlist *sg;
-	unsigned int idx, count;
-
-	might_sleep();
-	GEM_BUG_ON(n >= obj->base.size >> PAGE_SHIFT);
-	GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
-
-	/* As we iterate forward through the sg, we record each entry in a
-	 * radixtree for quick repeated (backwards) lookups. If we have seen
-	 * this index previously, we will have an entry for it.
-	 *
-	 * Initial lookup is O(N), but this is amortized to O(1) for
-	 * sequential page access (where each new request is consecutive
-	 * to the previous one). Repeated lookups are O(lg(obj->base.size)),
-	 * i.e. O(1) with a large constant!
-	 */
-	if (n < READ_ONCE(iter->sg_idx))
-		goto lookup;
-
-	mutex_lock(&iter->lock);
-
-	/* We prefer to reuse the last sg so that repeated lookup of this
-	 * (or the subsequent) sg are fast - comparing against the last
-	 * sg is faster than going through the radixtree.
-	 */
-
-	sg = iter->sg_pos;
-	idx = iter->sg_idx;
-	count = __sg_page_count(sg);
-
-	while (idx + count <= n) {
-		void *entry;
-		unsigned long i;
-		int ret;
-
-		/* If we cannot allocate and insert this entry, or the
-		 * individual pages from this range, cancel updating the
-		 * sg_idx so that on this lookup we are forced to linearly
-		 * scan onwards, but on future lookups we will try the
-		 * insertion again (in which case we need to be careful of
-		 * the error return reporting that we have already inserted
-		 * this index).
-		 */
-		ret = radix_tree_insert(&iter->radix, idx, sg);
-		if (ret && ret != -EEXIST)
-			goto scan;
-
-		entry = xa_mk_value(idx);
-		for (i = 1; i < count; i++) {
-			ret = radix_tree_insert(&iter->radix, idx + i, entry);
-			if (ret && ret != -EEXIST)
-				goto scan;
-		}
-
-		idx += count;
-		sg = ____sg_next(sg);
-		count = __sg_page_count(sg);
-	}
-
-scan:
-	iter->sg_pos = sg;
-	iter->sg_idx = idx;
-
-	mutex_unlock(&iter->lock);
-
-	if (unlikely(n < idx)) /* insertion completed by another thread */
-		goto lookup;
-
-	/* In case we failed to insert the entry into the radixtree, we need
-	 * to look beyond the current sg.
-	 */
-	while (idx + count <= n) {
-		idx += count;
-		sg = ____sg_next(sg);
-		count = __sg_page_count(sg);
-	}
-
-	*offset = n - idx;
-	return sg;
-
-lookup:
-	rcu_read_lock();
-
-	sg = radix_tree_lookup(&iter->radix, n);
-	GEM_BUG_ON(!sg);
-
-	/* If this index is in the middle of multi-page sg entry,
-	 * the radix tree will contain a value entry that points
-	 * to the start of that range. We will return the pointer to
-	 * the base page and the offset of this page within the
-	 * sg entry's range.
-	 */
-	*offset = 0;
-	if (unlikely(xa_is_value(sg))) {
-		unsigned long base = xa_to_value(sg);
-
-		sg = radix_tree_lookup(&iter->radix, base);
-		GEM_BUG_ON(!sg);
-
-		*offset = n - base;
-	}
-
-	rcu_read_unlock();
-
-	return sg;
-}
-
-struct page *
-i915_gem_object_get_page(struct drm_i915_gem_object *obj, unsigned int n)
-{
-	struct scatterlist *sg;
-	unsigned int offset;
-
-	GEM_BUG_ON(!i915_gem_object_has_struct_page(obj));
-
-	sg = i915_gem_object_get_sg(obj, n, &offset);
-	return nth_page(sg_page(sg), offset);
-}
-
-/* Like i915_gem_object_get_page(), but mark the returned page dirty */
-struct page *
-i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj,
-			       unsigned int n)
-{
-	struct page *page;
-
-	page = i915_gem_object_get_page(obj, n);
-	if (!obj->mm.dirty)
-		set_page_dirty(page);
-
-	return page;
-}
-
-dma_addr_t
-i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj,
-				unsigned long n)
-{
-	struct scatterlist *sg;
-	unsigned int offset;
-
-	sg = i915_gem_object_get_sg(obj, n, &offset);
-	return sg_dma_address(sg) + (offset << PAGE_SHIFT);
-}
-
-int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, int align)
-{
-	struct sg_table *pages;
-	int err;
-
-	if (align > obj->base.size)
-		return -EINVAL;
-
-	if (obj->ops == &i915_gem_phys_ops)
-		return 0;
-
-	if (obj->ops != &i915_gem_object_ops)
-		return -EINVAL;
-
-	err = i915_gem_object_unbind(obj);
-	if (err)
-		return err;
-
-	mutex_lock(&obj->mm.lock);
-
-	if (obj->mm.madv != I915_MADV_WILLNEED) {
-		err = -EFAULT;
-		goto err_unlock;
-	}
-
-	if (obj->mm.quirked) {
-		err = -EFAULT;
-		goto err_unlock;
-	}
-
-	if (obj->mm.mapping) {
-		err = -EBUSY;
-		goto err_unlock;
-	}
-
-	pages = __i915_gem_object_unset_pages(obj);
-
-	obj->ops = &i915_gem_phys_ops;
-
-	err = ____i915_gem_object_get_pages(obj);
-	if (err)
-		goto err_xfer;
-
-	/* Perma-pin (until release) the physical set of pages */
-	__i915_gem_object_pin_pages(obj);
-
-	if (!IS_ERR_OR_NULL(pages))
-		i915_gem_object_ops.put_pages(obj, pages);
-	mutex_unlock(&obj->mm.lock);
-	return 0;
-
-err_xfer:
-	obj->ops = &i915_gem_object_ops;
-	if (!IS_ERR_OR_NULL(pages)) {
-		unsigned int sg_page_sizes = i915_sg_page_sizes(pages->sgl);
-
-		__i915_gem_object_set_pages(obj, pages, sg_page_sizes);
-	}
-err_unlock:
-	mutex_unlock(&obj->mm.lock);
-	return err;
-}
-
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
-#include "selftests/scatterlist.c"
 #include "selftests/mock_gem_device.c"
-#include "selftests/huge_gem_object.c"
-#include "selftests/huge_pages.c"
-#include "selftests/i915_gem_object.c"
-#include "selftests/i915_gem_coherency.c"
 #include "selftests/i915_gem.c"
 #endif
diff --git a/drivers/gpu/drm/i915/i915_gem.h b/drivers/gpu/drm/i915/i915_gem.h
index 9074eb1e843f..fe82d3571072 100644
--- a/drivers/gpu/drm/i915/i915_gem.h
+++ b/drivers/gpu/drm/i915/i915_gem.h
@@ -75,9 +75,6 @@ struct drm_i915_private;
 
 #define I915_GEM_IDLE_TIMEOUT (HZ / 5)
 
-void i915_gem_park(struct drm_i915_private *i915);
-void i915_gem_unpark(struct drm_i915_private *i915);
-
 static inline void __tasklet_disable_sync_once(struct tasklet_struct *t)
 {
 	if (!atomic_fetch_inc(&t->count))
@@ -94,4 +91,9 @@ static inline bool __tasklet_enable(struct tasklet_struct *t)
 	return atomic_dec_and_test(&t->count);
 }
 
+static inline bool __tasklet_is_scheduled(struct tasklet_struct *t)
+{
+	return test_bit(TASKLET_STATE_SCHED, &t->state);
+}
+
 #endif /* __I915_GEM_H__ */
diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.c b/drivers/gpu/drm/i915/i915_gem_batch_pool.c
index f3890b664e3f..25a3e4d09a2f 100644
--- a/drivers/gpu/drm/i915/i915_gem_batch_pool.c
+++ b/drivers/gpu/drm/i915/i915_gem_batch_pool.c
@@ -55,7 +55,7 @@ void i915_gem_batch_pool_fini(struct i915_gem_batch_pool *pool)
 		list_for_each_entry_safe(obj, next,
 					 &pool->cache_list[n],
 					 batch_pool_link)
-			__i915_gem_object_release_unless_active(obj);
+			i915_gem_object_put(obj);
 
 		INIT_LIST_HEAD(&pool->cache_list[n]);
 	}
@@ -96,7 +96,7 @@ i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool,
 	list_for_each_entry(obj, list, batch_pool_link) {
 		/* The batches are strictly LRU ordered */
 		if (i915_gem_object_is_active(obj)) {
-			struct reservation_object *resv = obj->resv;
+			struct reservation_object *resv = obj->base.resv;
 
 			if (!reservation_object_test_signaled_rcu(resv, true))
 				break;
@@ -119,7 +119,7 @@ i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool,
 			}
 		}
 
-		GEM_BUG_ON(!reservation_object_test_signaled_rcu(obj->resv,
+		GEM_BUG_ON(!reservation_object_test_signaled_rcu(obj->base.resv,
 								 true));
 
 		if (obj->base.size >= size)
diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.h b/drivers/gpu/drm/i915/i915_gem_batch_pool.h
index 56947daaaf65..feeeeeaa54d8 100644
--- a/drivers/gpu/drm/i915/i915_gem_batch_pool.h
+++ b/drivers/gpu/drm/i915/i915_gem_batch_pool.h
@@ -9,6 +9,7 @@
 
 #include <linux/types.h>
 
+struct drm_i915_gem_object;
 struct intel_engine_cs;
 
 struct i915_gem_batch_pool {
@@ -19,7 +20,7 @@ struct i915_gem_batch_pool {
 void i915_gem_batch_pool_init(struct i915_gem_batch_pool *pool,
 			      struct intel_engine_cs *engine);
 void i915_gem_batch_pool_fini(struct i915_gem_batch_pool *pool);
-struct drm_i915_gem_object*
+struct drm_i915_gem_object *
 i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool, size_t size);
 
 #endif /* I915_GEM_BATCH_POOL_H */
diff --git a/drivers/gpu/drm/i915/i915_gem_clflush.h b/drivers/gpu/drm/i915/i915_gem_clflush.h
deleted file mode 100644
index f390247561b3..000000000000
--- a/drivers/gpu/drm/i915/i915_gem_clflush.h
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * Copyright © 2016 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- */
-
-#ifndef __I915_GEM_CLFLUSH_H__
-#define __I915_GEM_CLFLUSH_H__
-
-struct drm_i915_private;
-struct drm_i915_gem_object;
-
-bool i915_gem_clflush_object(struct drm_i915_gem_object *obj,
-			     unsigned int flags);
-#define I915_CLFLUSH_FORCE BIT(0)
-#define I915_CLFLUSH_SYNC BIT(1)
-
-#endif /* __I915_GEM_CLFLUSH_H__ */
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
index 060f5903544a..a5783c4cb98b 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
@@ -28,6 +28,8 @@
 
 #include <drm/i915_drm.h>
 
+#include "gem/i915_gem_context.h"
+
 #include "i915_drv.h"
 #include "intel_drv.h"
 #include "i915_trace.h"
@@ -36,15 +38,8 @@ I915_SELFTEST_DECLARE(static struct igt_evict_ctl {
 	bool fail_if_busy:1;
 } igt_evict_ctl;)
 
-static bool ggtt_is_idle(struct drm_i915_private *i915)
-{
-	return !i915->gt.active_requests;
-}
-
 static int ggtt_flush(struct drm_i915_private *i915)
 {
-	int err;
-
 	/*
 	 * Not everything in the GGTT is tracked via vma (otherwise we
 	 * could evict as required with minimal stalling) so we are forced
@@ -52,19 +47,10 @@ static int ggtt_flush(struct drm_i915_private *i915)
 	 * the hopes that we can then remove contexts and the like only
 	 * bound by their active reference.
 	 */
-	err = i915_gem_switch_to_kernel_context(i915, i915->gt.active_engines);
-	if (err)
-		return err;
-
-	err = i915_gem_wait_for_idle(i915,
-				     I915_WAIT_INTERRUPTIBLE |
-				     I915_WAIT_LOCKED,
-				     MAX_SCHEDULE_TIMEOUT);
-	if (err)
-		return err;
-
-	GEM_BUG_ON(!ggtt_is_idle(i915));
-	return 0;
+	return i915_gem_wait_for_idle(i915,
+				      I915_WAIT_INTERRUPTIBLE |
+				      I915_WAIT_LOCKED,
+				      MAX_SCHEDULE_TIMEOUT);
 }
 
 static bool
@@ -222,24 +208,17 @@ search_again:
 	 * us a termination condition, when the last retired context is
 	 * the kernel's there is no more we can evict.
 	 */
-	if (!ggtt_is_idle(dev_priv)) {
-		if (I915_SELFTEST_ONLY(igt_evict_ctl.fail_if_busy))
-			return -EBUSY;
+	if (I915_SELFTEST_ONLY(igt_evict_ctl.fail_if_busy))
+		return -EBUSY;
 
-		ret = ggtt_flush(dev_priv);
-		if (ret)
-			return ret;
+	ret = ggtt_flush(dev_priv);
+	if (ret)
+		return ret;
 
-		cond_resched();
-		goto search_again;
-	}
+	cond_resched();
 
-	/*
-	 * If we still have pending pageflip completions, drop
-	 * back to userspace to give our workqueues time to
-	 * acquire our locks and unpin the old scanouts.
-	 */
-	return intel_has_pending_fb_unpin(dev_priv) ? -EAGAIN : -ENOSPC;
+	flags |= PIN_NONBLOCK;
+	goto search_again;
 
 found:
 	/* drm_mm doesn't allow any other other operations while
diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.c b/drivers/gpu/drm/i915/i915_gem_fence_reg.c
index 3084f52e3372..0bf53ac1c835 100644
--- a/drivers/gpu/drm/i915/i915_gem_fence_reg.c
+++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.c
@@ -22,7 +22,10 @@
  */
 
 #include <drm/i915_drm.h>
+
 #include "i915_drv.h"
+#include "i915_scatterlist.h"
+#include "i915_vgpu.h"
 
 /**
  * DOC: fence register handling
@@ -56,7 +59,7 @@
 
 #define pipelined 0
 
-static void i965_write_fence_reg(struct drm_i915_fence_reg *fence,
+static void i965_write_fence_reg(struct i915_fence_reg *fence,
 				 struct i915_vma *vma)
 {
 	i915_reg_t fence_reg_lo, fence_reg_hi;
@@ -92,9 +95,10 @@ static void i965_write_fence_reg(struct drm_i915_fence_reg *fence,
 	}
 
 	if (!pipelined) {
-		struct drm_i915_private *dev_priv = fence->i915;
+		struct intel_uncore *uncore = &fence->i915->uncore;
 
-		/* To w/a incoherency with non-atomic 64-bit register updates,
+		/*
+		 * To w/a incoherency with non-atomic 64-bit register updates,
 		 * we split the 64-bit update into two 32-bit writes. In order
 		 * for a partial fence not to be evaluated between writes, we
 		 * precede the update with write to turn off the fence register,
@@ -103,16 +107,16 @@ static void i965_write_fence_reg(struct drm_i915_fence_reg *fence,
 		 * For extra levels of paranoia, we make sure each step lands
 		 * before applying the next step.
 		 */
-		I915_WRITE(fence_reg_lo, 0);
-		POSTING_READ(fence_reg_lo);
+		intel_uncore_write_fw(uncore, fence_reg_lo, 0);
+		intel_uncore_posting_read_fw(uncore, fence_reg_lo);
 
-		I915_WRITE(fence_reg_hi, upper_32_bits(val));
-		I915_WRITE(fence_reg_lo, lower_32_bits(val));
-		POSTING_READ(fence_reg_lo);
+		intel_uncore_write_fw(uncore, fence_reg_hi, upper_32_bits(val));
+		intel_uncore_write_fw(uncore, fence_reg_lo, lower_32_bits(val));
+		intel_uncore_posting_read_fw(uncore, fence_reg_lo);
 	}
 }
 
-static void i915_write_fence_reg(struct drm_i915_fence_reg *fence,
+static void i915_write_fence_reg(struct i915_fence_reg *fence,
 				 struct i915_vma *vma)
 {
 	u32 val;
@@ -144,15 +148,15 @@ static void i915_write_fence_reg(struct drm_i915_fence_reg *fence,
 	}
 
 	if (!pipelined) {
-		struct drm_i915_private *dev_priv = fence->i915;
+		struct intel_uncore *uncore = &fence->i915->uncore;
 		i915_reg_t reg = FENCE_REG(fence->id);
 
-		I915_WRITE(reg, val);
-		POSTING_READ(reg);
+		intel_uncore_write_fw(uncore, reg, val);
+		intel_uncore_posting_read_fw(uncore, reg);
 	}
 }
 
-static void i830_write_fence_reg(struct drm_i915_fence_reg *fence,
+static void i830_write_fence_reg(struct i915_fence_reg *fence,
 				 struct i915_vma *vma)
 {
 	u32 val;
@@ -176,18 +180,19 @@ static void i830_write_fence_reg(struct drm_i915_fence_reg *fence,
 	}
 
 	if (!pipelined) {
-		struct drm_i915_private *dev_priv = fence->i915;
+		struct intel_uncore *uncore = &fence->i915->uncore;
 		i915_reg_t reg = FENCE_REG(fence->id);
 
-		I915_WRITE(reg, val);
-		POSTING_READ(reg);
+		intel_uncore_write_fw(uncore, reg, val);
+		intel_uncore_posting_read_fw(uncore, reg);
 	}
 }
 
-static void fence_write(struct drm_i915_fence_reg *fence,
+static void fence_write(struct i915_fence_reg *fence,
 			struct i915_vma *vma)
 {
-	/* Previous access through the fence register is marshalled by
+	/*
+	 * Previous access through the fence register is marshalled by
 	 * the mb() inside the fault handlers (i915_gem_release_mmaps)
 	 * and explicitly managed for internal users.
 	 */
@@ -199,14 +204,15 @@ static void fence_write(struct drm_i915_fence_reg *fence,
 	else
 		i965_write_fence_reg(fence, vma);
 
-	/* Access through the fenced region afterwards is
+	/*
+	 * Access through the fenced region afterwards is
 	 * ordered by the posting reads whilst writing the registers.
 	 */
 
 	fence->dirty = false;
 }
 
-static int fence_update(struct drm_i915_fence_reg *fence,
+static int fence_update(struct i915_fence_reg *fence,
 			struct i915_vma *vma)
 {
 	intel_wakeref_t wakeref;
@@ -251,7 +257,7 @@ static int fence_update(struct drm_i915_fence_reg *fence,
 			old->fence = NULL;
 		}
 
-		list_move(&fence->link, &fence->i915->mm.fence_list);
+		list_move(&fence->link, &fence->i915->ggtt.fence_list);
 	}
 
 	/*
@@ -264,7 +270,7 @@ static int fence_update(struct drm_i915_fence_reg *fence,
 	 * be cleared before we can use any other fences to ensure that
 	 * the new fences do not overlap the elided clears, confusing HW.
 	 */
-	wakeref = intel_runtime_pm_get_if_in_use(fence->i915);
+	wakeref = intel_runtime_pm_get_if_in_use(&fence->i915->runtime_pm);
 	if (!wakeref) {
 		GEM_BUG_ON(vma);
 		return 0;
@@ -275,10 +281,10 @@ static int fence_update(struct drm_i915_fence_reg *fence,
 
 	if (vma) {
 		vma->fence = fence;
-		list_move_tail(&fence->link, &fence->i915->mm.fence_list);
+		list_move_tail(&fence->link, &fence->i915->ggtt.fence_list);
 	}
 
-	intel_runtime_pm_put(fence->i915, wakeref);
+	intel_runtime_pm_put(&fence->i915->runtime_pm, wakeref);
 	return 0;
 }
 
@@ -295,7 +301,7 @@ static int fence_update(struct drm_i915_fence_reg *fence,
  */
 int i915_vma_put_fence(struct i915_vma *vma)
 {
-	struct drm_i915_fence_reg *fence = vma->fence;
+	struct i915_fence_reg *fence = vma->fence;
 
 	if (!fence)
 		return 0;
@@ -306,11 +312,11 @@ int i915_vma_put_fence(struct i915_vma *vma)
 	return fence_update(fence, NULL);
 }
 
-static struct drm_i915_fence_reg *fence_find(struct drm_i915_private *dev_priv)
+static struct i915_fence_reg *fence_find(struct drm_i915_private *i915)
 {
-	struct drm_i915_fence_reg *fence;
+	struct i915_fence_reg *fence;
 
-	list_for_each_entry(fence, &dev_priv->mm.fence_list, link) {
+	list_for_each_entry(fence, &i915->ggtt.fence_list, link) {
 		GEM_BUG_ON(fence->vma && fence->vma->fence != fence);
 
 		if (fence->pin_count)
@@ -320,7 +326,7 @@ static struct drm_i915_fence_reg *fence_find(struct drm_i915_private *dev_priv)
 	}
 
 	/* Wait for completion of pending flips which consume fences */
-	if (intel_has_pending_fb_unpin(dev_priv))
+	if (intel_has_pending_fb_unpin(i915))
 		return ERR_PTR(-EAGAIN);
 
 	return ERR_PTR(-EDEADLK);
@@ -344,17 +350,17 @@ static struct drm_i915_fence_reg *fence_find(struct drm_i915_private *dev_priv)
  *
  * 0 on success, negative error code on failure.
  */
-int
-i915_vma_pin_fence(struct i915_vma *vma)
+int i915_vma_pin_fence(struct i915_vma *vma)
 {
-	struct drm_i915_fence_reg *fence;
+	struct i915_fence_reg *fence;
 	struct i915_vma *set = i915_gem_object_is_tiled(vma->obj) ? vma : NULL;
 	int err;
 
-	/* Note that we revoke fences on runtime suspend. Therefore the user
+	/*
+	 * Note that we revoke fences on runtime suspend. Therefore the user
 	 * must keep the device awake whilst using the fence.
 	 */
-	assert_rpm_wakelock_held(vma->vm->i915);
+	assert_rpm_wakelock_held(&vma->vm->i915->runtime_pm);
 
 	/* Just update our place in the LRU if our fence is getting reused. */
 	if (vma->fence) {
@@ -363,7 +369,7 @@ i915_vma_pin_fence(struct i915_vma *vma)
 		fence->pin_count++;
 		if (!fence->dirty) {
 			list_move_tail(&fence->link,
-				       &fence->i915->mm.fence_list);
+				       &fence->i915->ggtt.fence_list);
 			return 0;
 		}
 	} else if (set) {
@@ -393,28 +399,27 @@ out_unpin:
 
 /**
  * i915_reserve_fence - Reserve a fence for vGPU
- * @dev_priv: i915 device private
+ * @i915: i915 device private
  *
  * This function walks the fence regs looking for a free one and remove
  * it from the fence_list. It is used to reserve fence for vGPU to use.
  */
-struct drm_i915_fence_reg *
-i915_reserve_fence(struct drm_i915_private *dev_priv)
+struct i915_fence_reg *i915_reserve_fence(struct drm_i915_private *i915)
 {
-	struct drm_i915_fence_reg *fence;
+	struct i915_fence_reg *fence;
 	int count;
 	int ret;
 
-	lockdep_assert_held(&dev_priv->drm.struct_mutex);
+	lockdep_assert_held(&i915->drm.struct_mutex);
 
 	/* Keep at least one fence available for the display engine. */
 	count = 0;
-	list_for_each_entry(fence, &dev_priv->mm.fence_list, link)
+	list_for_each_entry(fence, &i915->ggtt.fence_list, link)
 		count += !fence->pin_count;
 	if (count <= 1)
 		return ERR_PTR(-ENOSPC);
 
-	fence = fence_find(dev_priv);
+	fence = fence_find(i915);
 	if (IS_ERR(fence))
 		return fence;
 
@@ -435,28 +440,28 @@ i915_reserve_fence(struct drm_i915_private *dev_priv)
  *
  * This function add a reserved fence register from vGPU to the fence_list.
  */
-void i915_unreserve_fence(struct drm_i915_fence_reg *fence)
+void i915_unreserve_fence(struct i915_fence_reg *fence)
 {
 	lockdep_assert_held(&fence->i915->drm.struct_mutex);
 
-	list_add(&fence->link, &fence->i915->mm.fence_list);
+	list_add(&fence->link, &fence->i915->ggtt.fence_list);
 }
 
 /**
  * i915_gem_restore_fences - restore fence state
- * @dev_priv: i915 device private
+ * @i915: i915 device private
  *
  * Restore the hw fence state to match the software tracking again, to be called
  * after a gpu reset and on resume. Note that on runtime suspend we only cancel
  * the fences, to be reacquired by the user later.
  */
-void i915_gem_restore_fences(struct drm_i915_private *dev_priv)
+void i915_gem_restore_fences(struct drm_i915_private *i915)
 {
 	int i;
 
 	rcu_read_lock(); /* keep obj alive as we dereference */
-	for (i = 0; i < dev_priv->num_fence_regs; i++) {
-		struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i];
+	for (i = 0; i < i915->ggtt.num_fences; i++) {
+		struct i915_fence_reg *reg = &i915->ggtt.fence_regs[i];
 		struct i915_vma *vma = READ_ONCE(reg->vma);
 
 		GEM_BUG_ON(vma && vma->fence != reg);
@@ -523,18 +528,18 @@ void i915_gem_restore_fences(struct drm_i915_private *dev_priv)
 
 /**
  * i915_gem_detect_bit_6_swizzle - detect bit 6 swizzling pattern
- * @dev_priv: i915 device private
+ * @i915: i915 device private
  *
  * Detects bit 6 swizzling of address lookup between IGD access and CPU
  * access through main memory.
  */
-void
-i915_gem_detect_bit_6_swizzle(struct drm_i915_private *dev_priv)
+static void detect_bit_6_swizzle(struct drm_i915_private *i915)
 {
+	struct intel_uncore *uncore = &i915->uncore;
 	u32 swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN;
 	u32 swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN;
 
-	if (INTEL_GEN(dev_priv) >= 8 || IS_VALLEYVIEW(dev_priv)) {
+	if (INTEL_GEN(i915) >= 8 || IS_VALLEYVIEW(i915)) {
 		/*
 		 * On BDW+, swizzling is not used. We leave the CPU memory
 		 * controller in charge of optimizing memory accesses without
@@ -544,9 +549,9 @@ i915_gem_detect_bit_6_swizzle(struct drm_i915_private *dev_priv)
 		 */
 		swizzle_x = I915_BIT_6_SWIZZLE_NONE;
 		swizzle_y = I915_BIT_6_SWIZZLE_NONE;
-	} else if (INTEL_GEN(dev_priv) >= 6) {
-		if (dev_priv->preserve_bios_swizzle) {
-			if (I915_READ(DISP_ARB_CTL) &
+	} else if (INTEL_GEN(i915) >= 6) {
+		if (i915->preserve_bios_swizzle) {
+			if (intel_uncore_read(uncore, DISP_ARB_CTL) &
 			    DISP_TILE_SURFACE_SWIZZLING) {
 				swizzle_x = I915_BIT_6_SWIZZLE_9_10;
 				swizzle_y = I915_BIT_6_SWIZZLE_9;
@@ -556,15 +561,17 @@ i915_gem_detect_bit_6_swizzle(struct drm_i915_private *dev_priv)
 			}
 		} else {
 			u32 dimm_c0, dimm_c1;
-			dimm_c0 = I915_READ(MAD_DIMM_C0);
-			dimm_c1 = I915_READ(MAD_DIMM_C1);
+			dimm_c0 = intel_uncore_read(uncore, MAD_DIMM_C0);
+			dimm_c1 = intel_uncore_read(uncore, MAD_DIMM_C1);
 			dimm_c0 &= MAD_DIMM_A_SIZE_MASK | MAD_DIMM_B_SIZE_MASK;
 			dimm_c1 &= MAD_DIMM_A_SIZE_MASK | MAD_DIMM_B_SIZE_MASK;
-			/* Enable swizzling when the channels are populated
+			/*
+			 * Enable swizzling when the channels are populated
 			 * with identically sized dimms. We don't need to check
 			 * the 3rd channel because no cpu with gpu attached
 			 * ships in that configuration. Also, swizzling only
-			 * makes sense for 2 channels anyway. */
+			 * makes sense for 2 channels anyway.
+			 */
 			if (dimm_c0 == dimm_c1) {
 				swizzle_x = I915_BIT_6_SWIZZLE_9_10;
 				swizzle_y = I915_BIT_6_SWIZZLE_9;
@@ -573,20 +580,23 @@ i915_gem_detect_bit_6_swizzle(struct drm_i915_private *dev_priv)
 				swizzle_y = I915_BIT_6_SWIZZLE_NONE;
 			}
 		}
-	} else if (IS_GEN(dev_priv, 5)) {
-		/* On Ironlake whatever DRAM config, GPU always do
+	} else if (IS_GEN(i915, 5)) {
+		/*
+		 * On Ironlake whatever DRAM config, GPU always do
 		 * same swizzling setup.
 		 */
 		swizzle_x = I915_BIT_6_SWIZZLE_9_10;
 		swizzle_y = I915_BIT_6_SWIZZLE_9;
-	} else if (IS_GEN(dev_priv, 2)) {
-		/* As far as we know, the 865 doesn't have these bit 6
+	} else if (IS_GEN(i915, 2)) {
+		/*
+		 * As far as we know, the 865 doesn't have these bit 6
 		 * swizzling issues.
 		 */
 		swizzle_x = I915_BIT_6_SWIZZLE_NONE;
 		swizzle_y = I915_BIT_6_SWIZZLE_NONE;
-	} else if (IS_G45(dev_priv) || IS_I965G(dev_priv) || IS_G33(dev_priv)) {
-		/* The 965, G33, and newer, have a very flexible memory
+	} else if (IS_G45(i915) || IS_I965G(i915) || IS_G33(i915)) {
+		/*
+		 * The 965, G33, and newer, have a very flexible memory
 		 * configuration.  It will enable dual-channel mode
 		 * (interleaving) on as much memory as it can, and the GPU
 		 * will additionally sometimes enable different bit 6
@@ -612,14 +622,16 @@ i915_gem_detect_bit_6_swizzle(struct drm_i915_private *dev_priv)
 		 * banks of memory are paired and unswizzled on the
 		 * uneven portion, so leave that as unknown.
 		 */
-		if (I915_READ16(C0DRB3) == I915_READ16(C1DRB3)) {
+		if (intel_uncore_read(uncore, C0DRB3) ==
+		    intel_uncore_read(uncore, C1DRB3)) {
 			swizzle_x = I915_BIT_6_SWIZZLE_9_10;
 			swizzle_y = I915_BIT_6_SWIZZLE_9;
 		}
 	} else {
-		u32 dcc;
+		u32 dcc = intel_uncore_read(uncore, DCC);
 
-		/* On 9xx chipsets, channel interleave by the CPU is
+		/*
+		 * On 9xx chipsets, channel interleave by the CPU is
 		 * determined by DCC.  For single-channel, neither the CPU
 		 * nor the GPU do swizzling.  For dual channel interleaved,
 		 * the GPU's interleave is bit 9 and 10 for X tiled, and bit
@@ -627,7 +639,6 @@ i915_gem_detect_bit_6_swizzle(struct drm_i915_private *dev_priv)
 		 * can be based on either bit 11 (haven't seen this yet) or
 		 * bit 17 (common).
 		 */
-		dcc = I915_READ(DCC);
 		switch (dcc & DCC_ADDRESSING_MODE_MASK) {
 		case DCC_ADDRESSING_MODE_SINGLE_CHANNEL:
 		case DCC_ADDRESSING_MODE_DUAL_CHANNEL_ASYMMETRIC:
@@ -636,7 +647,8 @@ i915_gem_detect_bit_6_swizzle(struct drm_i915_private *dev_priv)
 			break;
 		case DCC_ADDRESSING_MODE_DUAL_CHANNEL_INTERLEAVED:
 			if (dcc & DCC_CHANNEL_XOR_DISABLE) {
-				/* This is the base swizzling by the GPU for
+				/*
+				 * This is the base swizzling by the GPU for
 				 * tiled buffers.
 				 */
 				swizzle_x = I915_BIT_6_SWIZZLE_9_10;
@@ -654,8 +666,8 @@ i915_gem_detect_bit_6_swizzle(struct drm_i915_private *dev_priv)
 		}
 
 		/* check for L-shaped memory aka modified enhanced addressing */
-		if (IS_GEN(dev_priv, 4) &&
-		    !(I915_READ(DCC2) & DCC2_MODIFIED_ENHANCED_DISABLE)) {
+		if (IS_GEN(i915, 4) &&
+		    !(intel_uncore_read(uncore, DCC2) & DCC2_MODIFIED_ENHANCED_DISABLE)) {
 			swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN;
 			swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN;
 		}
@@ -670,7 +682,8 @@ i915_gem_detect_bit_6_swizzle(struct drm_i915_private *dev_priv)
 
 	if (swizzle_x == I915_BIT_6_SWIZZLE_UNKNOWN ||
 	    swizzle_y == I915_BIT_6_SWIZZLE_UNKNOWN) {
-		/* Userspace likes to explode if it sees unknown swizzling,
+		/*
+		 * Userspace likes to explode if it sees unknown swizzling,
 		 * so lie. We will finish the lie when reporting through
 		 * the get-tiling-ioctl by reporting the physical swizzle
 		 * mode as unknown instead.
@@ -679,13 +692,13 @@ i915_gem_detect_bit_6_swizzle(struct drm_i915_private *dev_priv)
 		 * bit17 dependent, and so we need to also prevent the pages
 		 * from being moved.
 		 */
-		dev_priv->quirks |= QUIRK_PIN_SWIZZLED_PAGES;
+		i915->quirks |= QUIRK_PIN_SWIZZLED_PAGES;
 		swizzle_x = I915_BIT_6_SWIZZLE_NONE;
 		swizzle_y = I915_BIT_6_SWIZZLE_NONE;
 	}
 
-	dev_priv->mm.bit_6_swizzle_x = swizzle_x;
-	dev_priv->mm.bit_6_swizzle_y = swizzle_y;
+	i915->mm.bit_6_swizzle_x = swizzle_x;
+	i915->mm.bit_6_swizzle_y = swizzle_y;
 }
 
 /*
@@ -693,8 +706,7 @@ i915_gem_detect_bit_6_swizzle(struct drm_i915_private *dev_priv)
  * bit 17 of its physical address and therefore being interpreted differently
  * by the GPU.
  */
-static void
-i915_gem_swizzle_page(struct page *page)
+static void i915_gem_swizzle_page(struct page *page)
 {
 	char temp[64];
 	char *vaddr;
@@ -783,3 +795,42 @@ i915_gem_object_save_bit_17_swizzle(struct drm_i915_gem_object *obj,
 		i++;
 	}
 }
+
+void i915_ggtt_init_fences(struct i915_ggtt *ggtt)
+{
+	struct drm_i915_private *i915 = ggtt->vm.i915;
+	int num_fences;
+	int i;
+
+	INIT_LIST_HEAD(&ggtt->fence_list);
+	INIT_LIST_HEAD(&ggtt->userfault_list);
+	intel_wakeref_auto_init(&ggtt->userfault_wakeref, &i915->runtime_pm);
+
+	detect_bit_6_swizzle(i915);
+
+	if (INTEL_GEN(i915) >= 7 &&
+	    !(IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)))
+		num_fences = 32;
+	else if (INTEL_GEN(i915) >= 4 ||
+		 IS_I945G(i915) || IS_I945GM(i915) ||
+		 IS_G33(i915) || IS_PINEVIEW(i915))
+		num_fences = 16;
+	else
+		num_fences = 8;
+
+	if (intel_vgpu_active(i915))
+		num_fences = intel_uncore_read(&i915->uncore,
+					       vgtif_reg(avail_rs.fence_num));
+
+	/* Initialize fence registers to zero */
+	for (i = 0; i < num_fences; i++) {
+		struct i915_fence_reg *fence = &ggtt->fence_regs[i];
+
+		fence->i915 = i915;
+		fence->id = i;
+		list_add_tail(&fence->link, &ggtt->fence_list);
+	}
+	ggtt->num_fences = num_fences;
+
+	i915_gem_restore_fences(i915);
+}
diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.h b/drivers/gpu/drm/i915/i915_gem_fence_reg.h
index 09dcaf14121b..d2da98828179 100644
--- a/drivers/gpu/drm/i915/i915_gem_fence_reg.h
+++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.h
@@ -26,13 +26,17 @@
 #define __I915_FENCE_REG_H__
 
 #include <linux/list.h>
+#include <linux/types.h>
 
+struct drm_i915_gem_object;
 struct drm_i915_private;
+struct i915_ggtt;
 struct i915_vma;
+struct sg_table;
 
 #define I965_FENCE_PAGE 4096UL
 
-struct drm_i915_fence_reg {
+struct i915_fence_reg {
 	struct list_head link;
 	struct drm_i915_private *i915;
 	struct i915_vma *vma;
@@ -49,4 +53,17 @@ struct drm_i915_fence_reg {
 	bool dirty;
 };
 
+/* i915_gem_fence_reg.c */
+struct i915_fence_reg *i915_reserve_fence(struct drm_i915_private *i915);
+void i915_unreserve_fence(struct i915_fence_reg *fence);
+
+void i915_gem_restore_fences(struct drm_i915_private *i915);
+
+void i915_gem_object_do_bit_17_swizzle(struct drm_i915_gem_object *obj,
+				       struct sg_table *pages);
+void i915_gem_object_save_bit_17_swizzle(struct drm_i915_gem_object *obj,
+					 struct sg_table *pages);
+
+void i915_ggtt_init_fences(struct i915_ggtt *ggtt);
+
 #endif
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 8f460cc4cc1f..8ab820145ea6 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -35,12 +35,13 @@
 
 #include <drm/i915_drm.h>
 
+#include "display/intel_frontbuffer.h"
+
 #include "i915_drv.h"
-#include "i915_vgpu.h"
-#include "i915_reset.h"
+#include "i915_scatterlist.h"
 #include "i915_trace.h"
+#include "i915_vgpu.h"
 #include "intel_drv.h"
-#include "intel_frontbuffer.h"
 
 #define I915_GFP_ALLOW_FAIL (GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN)
 
@@ -108,22 +109,26 @@
 static int
 i915_get_ggtt_vma_pages(struct i915_vma *vma);
 
-static void gen6_ggtt_invalidate(struct drm_i915_private *dev_priv)
+static void gen6_ggtt_invalidate(struct drm_i915_private *i915)
 {
+	struct intel_uncore *uncore = &i915->uncore;
+
 	/*
 	 * Note that as an uncached mmio write, this will flush the
 	 * WCB of the writes into the GGTT before it triggers the invalidate.
 	 */
-	I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
+	intel_uncore_write_fw(uncore, GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
 }
 
-static void guc_ggtt_invalidate(struct drm_i915_private *dev_priv)
+static void guc_ggtt_invalidate(struct drm_i915_private *i915)
 {
-	gen6_ggtt_invalidate(dev_priv);
-	I915_WRITE(GEN8_GTCR, GEN8_GTCR_INVALIDATE);
+	struct intel_uncore *uncore = &i915->uncore;
+
+	gen6_ggtt_invalidate(i915);
+	intel_uncore_write_fw(uncore, GEN8_GTCR, GEN8_GTCR_INVALIDATE);
 }
 
-static void gmch_ggtt_invalidate(struct drm_i915_private *dev_priv)
+static void gmch_ggtt_invalidate(struct drm_i915_private *i915)
 {
 	intel_gtt_chipset_flush();
 }
@@ -341,11 +346,11 @@ static struct page *stash_pop_page(struct pagestash *stash)
 
 static void stash_push_pagevec(struct pagestash *stash, struct pagevec *pvec)
 {
-	int nr;
+	unsigned int nr;
 
 	spin_lock_nested(&stash->lock, SINGLE_DEPTH_NESTING);
 
-	nr = min_t(int, pvec->nr, pagevec_space(&stash->pvec));
+	nr = min_t(typeof(nr), pvec->nr, pagevec_space(&stash->pvec));
 	memcpy(stash->pvec.pages + stash->pvec.nr,
 	       pvec->pages + pvec->nr - nr,
 	       sizeof(pvec->pages[0]) * nr);
@@ -399,7 +404,8 @@ static struct page *vm_alloc_page(struct i915_address_space *vm, gfp_t gfp)
 		page = stack.pages[--stack.nr];
 
 		/* Merge spare WC pages to the global stash */
-		stash_push_pagevec(&vm->i915->mm.wc_stash, &stack);
+		if (stack.nr)
+			stash_push_pagevec(&vm->i915->mm.wc_stash, &stack);
 
 		/* Push any surplus WC pages onto the local VM stash */
 		if (stack.nr)
@@ -469,13 +475,17 @@ static void vm_free_page(struct i915_address_space *vm, struct page *page)
 	 */
 	might_sleep();
 	spin_lock(&vm->free_pages.lock);
-	if (!pagevec_add(&vm->free_pages.pvec, page))
+	while (!pagevec_space(&vm->free_pages.pvec))
 		vm_free_pages_release(vm, false);
+	GEM_BUG_ON(pagevec_count(&vm->free_pages.pvec) >= PAGEVEC_SIZE);
+	pagevec_add(&vm->free_pages.pvec, page);
 	spin_unlock(&vm->free_pages.lock);
 }
 
 static void i915_address_space_init(struct i915_address_space *vm, int subclass)
 {
+	kref_init(&vm->ref);
+
 	/*
 	 * The vm->mutex must be reclaim safe (for use in the shrinker).
 	 * Do a dummy acquire now under fs_reclaim so that any allocation
@@ -652,7 +662,8 @@ static struct i915_page_table *alloc_pt(struct i915_address_space *vm)
 		return ERR_PTR(-ENOMEM);
 	}
 
-	pt->used_ptes = 0;
+	atomic_set(&pt->used, 0);
+
 	return pt;
 }
 
@@ -674,117 +685,71 @@ static void gen6_initialize_pt(struct i915_address_space *vm,
 	fill32_px(vm, pt, vm->scratch_pte);
 }
 
-static struct i915_page_directory *alloc_pd(struct i915_address_space *vm)
+static struct i915_page_directory *__alloc_pd(void)
 {
 	struct i915_page_directory *pd;
 
-	pd = kzalloc(sizeof(*pd), I915_GFP_ALLOW_FAIL);
+	pd = kmalloc(sizeof(*pd), I915_GFP_ALLOW_FAIL);
+
 	if (unlikely(!pd))
-		return ERR_PTR(-ENOMEM);
+		return NULL;
 
-	if (unlikely(setup_px(vm, pd))) {
-		kfree(pd);
-		return ERR_PTR(-ENOMEM);
-	}
+	memset(&pd->base, 0, sizeof(pd->base));
+	atomic_set(&pd->used, 0);
+	spin_lock_init(&pd->lock);
 
-	pd->used_pdes = 0;
-	return pd;
-}
+	/* for safety */
+	pd->entry[0] = NULL;
 
-static void free_pd(struct i915_address_space *vm,
-		    struct i915_page_directory *pd)
-{
-	cleanup_px(vm, pd);
-	kfree(pd);
-}
-
-static void gen8_initialize_pd(struct i915_address_space *vm,
-			       struct i915_page_directory *pd)
-{
-	fill_px(vm, pd,
-		gen8_pde_encode(px_dma(vm->scratch_pt), I915_CACHE_LLC));
-	memset_p((void **)pd->page_table, vm->scratch_pt, I915_PDES);
+	return pd;
 }
 
-static int __pdp_init(struct i915_address_space *vm,
-		      struct i915_page_directory_pointer *pdp)
+static struct i915_page_directory *alloc_pd(struct i915_address_space *vm)
 {
-	const unsigned int pdpes = i915_pdpes_per_pdp(vm);
+	struct i915_page_directory *pd;
 
-	pdp->page_directory = kmalloc_array(pdpes, sizeof(*pdp->page_directory),
-					    I915_GFP_ALLOW_FAIL);
-	if (unlikely(!pdp->page_directory))
-		return -ENOMEM;
+	pd = __alloc_pd();
+	if (unlikely(!pd))
+		return ERR_PTR(-ENOMEM);
 
-	memset_p((void **)pdp->page_directory, vm->scratch_pd, pdpes);
+	if (unlikely(setup_px(vm, pd))) {
+		kfree(pd);
+		return ERR_PTR(-ENOMEM);
+	}
 
-	return 0;
+	return pd;
 }
 
-static void __pdp_fini(struct i915_page_directory_pointer *pdp)
+static inline bool pd_has_phys_page(const struct i915_page_directory * const pd)
 {
-	kfree(pdp->page_directory);
-	pdp->page_directory = NULL;
+	return pd->base.page;
 }
 
-static struct i915_page_directory_pointer *
-alloc_pdp(struct i915_address_space *vm)
+static void free_pd(struct i915_address_space *vm,
+		    struct i915_page_directory *pd)
 {
-	struct i915_page_directory_pointer *pdp;
-	int ret = -ENOMEM;
-
-	GEM_BUG_ON(!i915_vm_is_4lvl(vm));
-
-	pdp = kzalloc(sizeof(*pdp), GFP_KERNEL);
-	if (!pdp)
-		return ERR_PTR(-ENOMEM);
-
-	ret = __pdp_init(vm, pdp);
-	if (ret)
-		goto fail_bitmap;
-
-	ret = setup_px(vm, pdp);
-	if (ret)
-		goto fail_page_m;
-
-	return pdp;
+	if (likely(pd_has_phys_page(pd)))
+		cleanup_px(vm, pd);
 
-fail_page_m:
-	__pdp_fini(pdp);
-fail_bitmap:
-	kfree(pdp);
-
-	return ERR_PTR(ret);
+	kfree(pd);
 }
 
-static void free_pdp(struct i915_address_space *vm,
-		     struct i915_page_directory_pointer *pdp)
+static void init_pd_with_page(struct i915_address_space *vm,
+			      struct i915_page_directory * const pd,
+			      struct i915_page_table *pt)
 {
-	__pdp_fini(pdp);
-
-	if (!i915_vm_is_4lvl(vm))
-		return;
-
-	cleanup_px(vm, pdp);
-	kfree(pdp);
+	fill_px(vm, pd, gen8_pde_encode(px_dma(pt), I915_CACHE_LLC));
+	memset_p(pd->entry, pt, 512);
 }
 
-static void gen8_initialize_pdp(struct i915_address_space *vm,
-				struct i915_page_directory_pointer *pdp)
+static void init_pd(struct i915_address_space *vm,
+		    struct i915_page_directory * const pd,
+		    struct i915_page_directory * const to)
 {
-	gen8_ppgtt_pdpe_t scratch_pdpe;
-
-	scratch_pdpe = gen8_pdpe_encode(px_dma(vm->scratch_pd), I915_CACHE_LLC);
-
-	fill_px(vm, pdp, scratch_pdpe);
-}
+	GEM_DEBUG_BUG_ON(!pd_has_phys_page(pd));
 
-static void gen8_initialize_pml4(struct i915_address_space *vm,
-				 struct i915_pml4 *pml4)
-{
-	fill_px(vm, pml4,
-		gen8_pml4e_encode(px_dma(vm->scratch_pdp), I915_CACHE_LLC));
-	memset_p((void **)pml4->pdps, vm->scratch_pdp, GEN8_PML4ES_PER_PML4);
+	fill_px(vm, pd, gen8_pdpe_encode(px_dma(to), I915_CACHE_LLC));
+	memset_p(pd->entry, to, 512);
 }
 
 /*
@@ -793,7 +758,7 @@ static void gen8_initialize_pml4(struct i915_address_space *vm,
  * context switching/execlist queuing code takes extra steps
  * to ensure that tlbs are flushed.
  */
-static void mark_tlbs_dirty(struct i915_hw_ppgtt *ppgtt)
+static void mark_tlbs_dirty(struct i915_ppgtt *ppgtt)
 {
 	ppgtt->pd_dirty_engines = ALL_ENGINES;
 }
@@ -808,17 +773,12 @@ static bool gen8_ppgtt_clear_pt(const struct i915_address_space *vm,
 	unsigned int num_entries = gen8_pte_count(start, length);
 	gen8_pte_t *vaddr;
 
-	GEM_BUG_ON(num_entries > pt->used_ptes);
-
-	pt->used_ptes -= num_entries;
-	if (!pt->used_ptes)
-		return true;
-
 	vaddr = kmap_atomic_px(pt);
 	memset64(vaddr + gen8_pte_index(start), vm->scratch_pte, num_entries);
 	kunmap_atomic(vaddr);
 
-	return false;
+	GEM_BUG_ON(num_entries > atomic_read(&pt->used));
+	return !atomic_sub_return(num_entries, &pt->used);
 }
 
 static void gen8_ppgtt_set_pde(struct i915_address_space *vm,
@@ -828,8 +788,6 @@ static void gen8_ppgtt_set_pde(struct i915_address_space *vm,
 {
 	gen8_pde_t *vaddr;
 
-	pd->page_table[pde] = pt;
-
 	vaddr = kmap_atomic_px(pd);
 	vaddr[pde] = gen8_pde_encode(px_dma(pt), I915_CACHE_LLC);
 	kunmap_atomic(vaddr);
@@ -843,30 +801,37 @@ static bool gen8_ppgtt_clear_pd(struct i915_address_space *vm,
 	u32 pde;
 
 	gen8_for_each_pde(pt, pd, start, length, pde) {
+		bool free = false;
+
 		GEM_BUG_ON(pt == vm->scratch_pt);
 
 		if (!gen8_ppgtt_clear_pt(vm, pt, start, length))
 			continue;
 
-		gen8_ppgtt_set_pde(vm, pd, vm->scratch_pt, pde);
-		GEM_BUG_ON(!pd->used_pdes);
-		pd->used_pdes--;
+		spin_lock(&pd->lock);
+		if (!atomic_read(&pt->used)) {
+			gen8_ppgtt_set_pde(vm, pd, vm->scratch_pt, pde);
+			pd->entry[pde] = vm->scratch_pt;
 
-		free_pt(vm, pt);
+			GEM_BUG_ON(!atomic_read(&pd->used));
+			atomic_dec(&pd->used);
+			free = true;
+		}
+		spin_unlock(&pd->lock);
+		if (free)
+			free_pt(vm, pt);
 	}
 
-	return !pd->used_pdes;
+	return !atomic_read(&pd->used);
 }
 
-static void gen8_ppgtt_set_pdpe(struct i915_address_space *vm,
-				struct i915_page_directory_pointer *pdp,
+static void gen8_ppgtt_set_pdpe(struct i915_page_directory *pdp,
 				struct i915_page_directory *pd,
 				unsigned int pdpe)
 {
 	gen8_ppgtt_pdpe_t *vaddr;
 
-	pdp->page_directory[pdpe] = pd;
-	if (!i915_vm_is_4lvl(vm))
+	if (!pd_has_phys_page(pdp))
 		return;
 
 	vaddr = kmap_atomic_px(pdp);
@@ -878,42 +843,49 @@ static void gen8_ppgtt_set_pdpe(struct i915_address_space *vm,
  * Caller can use the return value to update higher-level entries
  */
 static bool gen8_ppgtt_clear_pdp(struct i915_address_space *vm,
-				 struct i915_page_directory_pointer *pdp,
+				 struct i915_page_directory * const pdp,
 				 u64 start, u64 length)
 {
 	struct i915_page_directory *pd;
 	unsigned int pdpe;
 
 	gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
+		bool free = false;
+
 		GEM_BUG_ON(pd == vm->scratch_pd);
 
 		if (!gen8_ppgtt_clear_pd(vm, pd, start, length))
 			continue;
 
-		gen8_ppgtt_set_pdpe(vm, pdp, vm->scratch_pd, pdpe);
-		GEM_BUG_ON(!pdp->used_pdpes);
-		pdp->used_pdpes--;
+		spin_lock(&pdp->lock);
+		if (!atomic_read(&pd->used)) {
+			gen8_ppgtt_set_pdpe(pdp, vm->scratch_pd, pdpe);
+			pdp->entry[pdpe] = vm->scratch_pd;
 
-		free_pd(vm, pd);
+			GEM_BUG_ON(!atomic_read(&pdp->used));
+			atomic_dec(&pdp->used);
+			free = true;
+		}
+		spin_unlock(&pdp->lock);
+		if (free)
+			free_pd(vm, pd);
 	}
 
-	return !pdp->used_pdpes;
+	return !atomic_read(&pdp->used);
 }
 
 static void gen8_ppgtt_clear_3lvl(struct i915_address_space *vm,
 				  u64 start, u64 length)
 {
-	gen8_ppgtt_clear_pdp(vm, &i915_vm_to_ppgtt(vm)->pdp, start, length);
+	gen8_ppgtt_clear_pdp(vm, i915_vm_to_ppgtt(vm)->pd, start, length);
 }
 
-static void gen8_ppgtt_set_pml4e(struct i915_pml4 *pml4,
-				 struct i915_page_directory_pointer *pdp,
+static void gen8_ppgtt_set_pml4e(struct i915_page_directory *pml4,
+				 struct i915_page_directory *pdp,
 				 unsigned int pml4e)
 {
 	gen8_ppgtt_pml4e_t *vaddr;
 
-	pml4->pdps[pml4e] = pdp;
-
 	vaddr = kmap_atomic_px(pml4);
 	vaddr[pml4e] = gen8_pml4e_encode(px_dma(pdp), I915_CACHE_LLC);
 	kunmap_atomic(vaddr);
@@ -926,22 +898,29 @@ static void gen8_ppgtt_set_pml4e(struct i915_pml4 *pml4,
 static void gen8_ppgtt_clear_4lvl(struct i915_address_space *vm,
 				  u64 start, u64 length)
 {
-	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
-	struct i915_pml4 *pml4 = &ppgtt->pml4;
-	struct i915_page_directory_pointer *pdp;
+	struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
+	struct i915_page_directory * const pml4 = ppgtt->pd;
+	struct i915_page_directory *pdp;
 	unsigned int pml4e;
 
 	GEM_BUG_ON(!i915_vm_is_4lvl(vm));
 
 	gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
+		bool free = false;
 		GEM_BUG_ON(pdp == vm->scratch_pdp);
 
 		if (!gen8_ppgtt_clear_pdp(vm, pdp, start, length))
 			continue;
 
-		gen8_ppgtt_set_pml4e(pml4, vm->scratch_pdp, pml4e);
-
-		free_pdp(vm, pdp);
+		spin_lock(&pml4->lock);
+		if (!atomic_read(&pdp->used)) {
+			gen8_ppgtt_set_pml4e(pml4, vm->scratch_pdp, pml4e);
+			pml4->entry[pml4e] = vm->scratch_pdp;
+			free = true;
+		}
+		spin_unlock(&pml4->lock);
+		if (free)
+			free_pd(vm, pdp);
 	}
 }
 
@@ -972,8 +951,8 @@ static __always_inline struct gen8_insert_pte gen8_insert_pte(u64 start)
 }
 
 static __always_inline bool
-gen8_ppgtt_insert_pte_entries(struct i915_hw_ppgtt *ppgtt,
-			      struct i915_page_directory_pointer *pdp,
+gen8_ppgtt_insert_pte_entries(struct i915_ppgtt *ppgtt,
+			      struct i915_page_directory *pdp,
 			      struct sgt_dma *iter,
 			      struct gen8_insert_pte *idx,
 			      enum i915_cache_level cache_level,
@@ -985,8 +964,8 @@ gen8_ppgtt_insert_pte_entries(struct i915_hw_ppgtt *ppgtt,
 	bool ret;
 
 	GEM_BUG_ON(idx->pdpe >= i915_pdpes_per_pdp(&ppgtt->vm));
-	pd = pdp->page_directory[idx->pdpe];
-	vaddr = kmap_atomic_px(pd->page_table[idx->pde]);
+	pd = i915_pd_entry(pdp, idx->pdpe);
+	vaddr = kmap_atomic_px(i915_pt_entry(pd, idx->pde));
 	do {
 		vaddr[idx->pte] = pte_encode | iter->dma;
 
@@ -1016,11 +995,11 @@ gen8_ppgtt_insert_pte_entries(struct i915_hw_ppgtt *ppgtt,
 				}
 
 				GEM_BUG_ON(idx->pdpe >= i915_pdpes_per_pdp(&ppgtt->vm));
-				pd = pdp->page_directory[idx->pdpe];
+				pd = pdp->entry[idx->pdpe];
 			}
 
 			kunmap_atomic(vaddr);
-			vaddr = kmap_atomic_px(pd->page_table[idx->pde]);
+			vaddr = kmap_atomic_px(i915_pt_entry(pd, idx->pde));
 		}
 	} while (1);
 	kunmap_atomic(vaddr);
@@ -1033,18 +1012,18 @@ static void gen8_ppgtt_insert_3lvl(struct i915_address_space *vm,
 				   enum i915_cache_level cache_level,
 				   u32 flags)
 {
-	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
+	struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
 	struct sgt_dma iter = sgt_dma(vma);
 	struct gen8_insert_pte idx = gen8_insert_pte(vma->node.start);
 
-	gen8_ppgtt_insert_pte_entries(ppgtt, &ppgtt->pdp, &iter, &idx,
+	gen8_ppgtt_insert_pte_entries(ppgtt, ppgtt->pd, &iter, &idx,
 				      cache_level, flags);
 
 	vma->page_sizes.gtt = I915_GTT_PAGE_SIZE;
 }
 
 static void gen8_ppgtt_insert_huge_entries(struct i915_vma *vma,
-					   struct i915_page_directory_pointer **pdps,
+					   struct i915_page_directory *pml4,
 					   struct sgt_dma *iter,
 					   enum i915_cache_level cache_level,
 					   u32 flags)
@@ -1055,8 +1034,9 @@ static void gen8_ppgtt_insert_huge_entries(struct i915_vma *vma,
 
 	do {
 		struct gen8_insert_pte idx = gen8_insert_pte(start);
-		struct i915_page_directory_pointer *pdp = pdps[idx.pml4e];
-		struct i915_page_directory *pd = pdp->page_directory[idx.pdpe];
+		struct i915_page_directory *pdp =
+			i915_pdp_entry(pml4, idx.pml4e);
+		struct i915_page_directory *pd = i915_pd_entry(pdp, idx.pdpe);
 		unsigned int page_size;
 		bool maybe_64K = false;
 		gen8_pte_t encode = pte_encode;
@@ -1074,7 +1054,7 @@ static void gen8_ppgtt_insert_huge_entries(struct i915_vma *vma,
 
 			vaddr = kmap_atomic_px(pd);
 		} else {
-			struct i915_page_table *pt = pd->page_table[idx.pde];
+			struct i915_page_table *pt = i915_pt_entry(pd, idx.pde);
 
 			index = idx.pte;
 			max = GEN8_PTES;
@@ -1149,7 +1129,8 @@ static void gen8_ppgtt_insert_huge_entries(struct i915_vma *vma,
 				u16 i;
 
 				encode = vma->vm->scratch_pte;
-				vaddr = kmap_atomic_px(pd->page_table[idx.pde]);
+				vaddr = kmap_atomic_px(i915_pt_entry(pd,
+								     idx.pde));
 
 				for (i = 1; i < index; i += 16)
 					memset64(vaddr + i, encode, 15);
@@ -1167,17 +1148,18 @@ static void gen8_ppgtt_insert_4lvl(struct i915_address_space *vm,
 				   enum i915_cache_level cache_level,
 				   u32 flags)
 {
-	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
+	struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
 	struct sgt_dma iter = sgt_dma(vma);
-	struct i915_page_directory_pointer **pdps = ppgtt->pml4.pdps;
+	struct i915_page_directory * const pml4 = ppgtt->pd;
 
 	if (vma->page_sizes.sg > I915_GTT_PAGE_SIZE) {
-		gen8_ppgtt_insert_huge_entries(vma, pdps, &iter, cache_level,
+		gen8_ppgtt_insert_huge_entries(vma, pml4, &iter, cache_level,
 					       flags);
 	} else {
 		struct gen8_insert_pte idx = gen8_insert_pte(vma->node.start);
 
-		while (gen8_ppgtt_insert_pte_entries(ppgtt, pdps[idx.pml4e++],
+		while (gen8_ppgtt_insert_pte_entries(ppgtt,
+						     i915_pdp_entry(pml4, idx.pml4e++),
 						     &iter, &idx, cache_level,
 						     flags))
 			GEM_BUG_ON(idx.pml4e >= GEN8_PML4ES_PER_PML4);
@@ -1192,8 +1174,8 @@ static void gen8_free_page_tables(struct i915_address_space *vm,
 	int i;
 
 	for (i = 0; i < I915_PDES; i++) {
-		if (pd->page_table[i] != vm->scratch_pt)
-			free_pt(vm, pd->page_table[i]);
+		if (pd->entry[i] != vm->scratch_pt)
+			free_pt(vm, pd->entry[i]);
 	}
 }
 
@@ -1207,9 +1189,8 @@ static int gen8_init_scratch(struct i915_address_space *vm)
 	 */
 	if (vm->has_read_only &&
 	    vm->i915->kernel_context &&
-	    vm->i915->kernel_context->ppgtt) {
-		struct i915_address_space *clone =
-			&vm->i915->kernel_context->ppgtt->vm;
+	    vm->i915->kernel_context->vm) {
+		struct i915_address_space *clone = vm->i915->kernel_context->vm;
 
 		GEM_BUG_ON(!clone->has_read_only);
 
@@ -1243,7 +1224,7 @@ static int gen8_init_scratch(struct i915_address_space *vm)
 	}
 
 	if (i915_vm_is_4lvl(vm)) {
-		vm->scratch_pdp = alloc_pdp(vm);
+		vm->scratch_pdp = alloc_pd(vm);
 		if (IS_ERR(vm->scratch_pdp)) {
 			ret = PTR_ERR(vm->scratch_pdp);
 			goto free_pd;
@@ -1251,9 +1232,9 @@ static int gen8_init_scratch(struct i915_address_space *vm)
 	}
 
 	gen8_initialize_pt(vm, vm->scratch_pt);
-	gen8_initialize_pd(vm, vm->scratch_pd);
+	init_pd_with_page(vm, vm->scratch_pd, vm->scratch_pt);
 	if (i915_vm_is_4lvl(vm))
-		gen8_initialize_pdp(vm, vm->scratch_pdp);
+		init_pd(vm, vm->scratch_pdp, vm->scratch_pd);
 
 	return 0;
 
@@ -1267,7 +1248,7 @@ free_scratch_page:
 	return ret;
 }
 
-static int gen8_ppgtt_notify_vgt(struct i915_hw_ppgtt *ppgtt, bool create)
+static int gen8_ppgtt_notify_vgt(struct i915_ppgtt *ppgtt, bool create)
 {
 	struct i915_address_space *vm = &ppgtt->vm;
 	struct drm_i915_private *dev_priv = vm->i915;
@@ -1275,7 +1256,7 @@ static int gen8_ppgtt_notify_vgt(struct i915_hw_ppgtt *ppgtt, bool create)
 	int i;
 
 	if (i915_vm_is_4lvl(vm)) {
-		const u64 daddr = px_dma(&ppgtt->pml4);
+		const u64 daddr = px_dma(ppgtt->pd);
 
 		I915_WRITE(vgtif_reg(pdp[0].lo), lower_32_bits(daddr));
 		I915_WRITE(vgtif_reg(pdp[0].hi), upper_32_bits(daddr));
@@ -1305,55 +1286,58 @@ static void gen8_free_scratch(struct i915_address_space *vm)
 		return;
 
 	if (i915_vm_is_4lvl(vm))
-		free_pdp(vm, vm->scratch_pdp);
+		free_pd(vm, vm->scratch_pdp);
 	free_pd(vm, vm->scratch_pd);
 	free_pt(vm, vm->scratch_pt);
 	cleanup_scratch_page(vm);
 }
 
 static void gen8_ppgtt_cleanup_3lvl(struct i915_address_space *vm,
-				    struct i915_page_directory_pointer *pdp)
+				    struct i915_page_directory *pdp)
 {
 	const unsigned int pdpes = i915_pdpes_per_pdp(vm);
 	int i;
 
 	for (i = 0; i < pdpes; i++) {
-		if (pdp->page_directory[i] == vm->scratch_pd)
+		if (pdp->entry[i] == vm->scratch_pd)
 			continue;
 
-		gen8_free_page_tables(vm, pdp->page_directory[i]);
-		free_pd(vm, pdp->page_directory[i]);
+		gen8_free_page_tables(vm, pdp->entry[i]);
+		free_pd(vm, pdp->entry[i]);
 	}
 
-	free_pdp(vm, pdp);
+	free_pd(vm, pdp);
 }
 
-static void gen8_ppgtt_cleanup_4lvl(struct i915_hw_ppgtt *ppgtt)
+static void gen8_ppgtt_cleanup_4lvl(struct i915_ppgtt *ppgtt)
 {
+	struct i915_page_directory * const pml4 = ppgtt->pd;
 	int i;
 
 	for (i = 0; i < GEN8_PML4ES_PER_PML4; i++) {
-		if (ppgtt->pml4.pdps[i] == ppgtt->vm.scratch_pdp)
+		struct i915_page_directory *pdp = i915_pdp_entry(pml4, i);
+
+		if (pdp == ppgtt->vm.scratch_pdp)
 			continue;
 
-		gen8_ppgtt_cleanup_3lvl(&ppgtt->vm, ppgtt->pml4.pdps[i]);
+		gen8_ppgtt_cleanup_3lvl(&ppgtt->vm, pdp);
 	}
 
-	cleanup_px(&ppgtt->vm, &ppgtt->pml4);
+	free_pd(&ppgtt->vm, pml4);
 }
 
 static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
 {
-	struct drm_i915_private *dev_priv = vm->i915;
-	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
+	struct drm_i915_private *i915 = vm->i915;
+	struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
 
-	if (intel_vgpu_active(dev_priv))
+	if (intel_vgpu_active(i915))
 		gen8_ppgtt_notify_vgt(ppgtt, false);
 
 	if (i915_vm_is_4lvl(vm))
 		gen8_ppgtt_cleanup_4lvl(ppgtt);
 	else
-		gen8_ppgtt_cleanup_3lvl(&ppgtt->vm, &ppgtt->pdp);
+		gen8_ppgtt_cleanup_3lvl(&ppgtt->vm, ppgtt->pd);
 
 	gen8_free_scratch(vm);
 }
@@ -1362,129 +1346,190 @@ static int gen8_ppgtt_alloc_pd(struct i915_address_space *vm,
 			       struct i915_page_directory *pd,
 			       u64 start, u64 length)
 {
-	struct i915_page_table *pt;
+	struct i915_page_table *pt, *alloc = NULL;
 	u64 from = start;
 	unsigned int pde;
+	int ret = 0;
 
+	spin_lock(&pd->lock);
 	gen8_for_each_pde(pt, pd, start, length, pde) {
-		int count = gen8_pte_count(start, length);
+		const int count = gen8_pte_count(start, length);
 
 		if (pt == vm->scratch_pt) {
-			pd->used_pdes++;
+			spin_unlock(&pd->lock);
 
-			pt = alloc_pt(vm);
+			pt = fetch_and_zero(&alloc);
+			if (!pt)
+				pt = alloc_pt(vm);
 			if (IS_ERR(pt)) {
-				pd->used_pdes--;
+				ret = PTR_ERR(pt);
 				goto unwind;
 			}
 
 			if (count < GEN8_PTES || intel_vgpu_active(vm->i915))
 				gen8_initialize_pt(vm, pt);
 
-			gen8_ppgtt_set_pde(vm, pd, pt, pde);
-			GEM_BUG_ON(pd->used_pdes > I915_PDES);
+			spin_lock(&pd->lock);
+			if (pd->entry[pde] == vm->scratch_pt) {
+				gen8_ppgtt_set_pde(vm, pd, pt, pde);
+				pd->entry[pde] = pt;
+				atomic_inc(&pd->used);
+			} else {
+				alloc = pt;
+				pt = pd->entry[pde];
+			}
 		}
 
-		pt->used_ptes += count;
+		atomic_add(count, &pt->used);
 	}
-	return 0;
+	spin_unlock(&pd->lock);
+	goto out;
 
 unwind:
 	gen8_ppgtt_clear_pd(vm, pd, from, start - from);
-	return -ENOMEM;
+out:
+	if (alloc)
+		free_pt(vm, alloc);
+	return ret;
 }
 
 static int gen8_ppgtt_alloc_pdp(struct i915_address_space *vm,
-				struct i915_page_directory_pointer *pdp,
+				struct i915_page_directory *pdp,
 				u64 start, u64 length)
 {
-	struct i915_page_directory *pd;
+	struct i915_page_directory *pd, *alloc = NULL;
 	u64 from = start;
 	unsigned int pdpe;
-	int ret;
+	int ret = 0;
 
+	spin_lock(&pdp->lock);
 	gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
 		if (pd == vm->scratch_pd) {
-			pdp->used_pdpes++;
+			spin_unlock(&pdp->lock);
 
-			pd = alloc_pd(vm);
+			pd = fetch_and_zero(&alloc);
+			if (!pd)
+				pd = alloc_pd(vm);
 			if (IS_ERR(pd)) {
-				pdp->used_pdpes--;
+				ret = PTR_ERR(pd);
 				goto unwind;
 			}
 
-			gen8_initialize_pd(vm, pd);
-			gen8_ppgtt_set_pdpe(vm, pdp, pd, pdpe);
-			GEM_BUG_ON(pdp->used_pdpes > i915_pdpes_per_pdp(vm));
+			init_pd_with_page(vm, pd, vm->scratch_pt);
+
+			spin_lock(&pdp->lock);
+			if (pdp->entry[pdpe] == vm->scratch_pd) {
+				gen8_ppgtt_set_pdpe(pdp, pd, pdpe);
+				pdp->entry[pdpe] = pd;
+				atomic_inc(&pdp->used);
+			} else {
+				alloc = pd;
+				pd = pdp->entry[pdpe];
+			}
 		}
+		atomic_inc(&pd->used);
+		spin_unlock(&pdp->lock);
 
 		ret = gen8_ppgtt_alloc_pd(vm, pd, start, length);
 		if (unlikely(ret))
 			goto unwind_pd;
-	}
 
-	return 0;
+		spin_lock(&pdp->lock);
+		atomic_dec(&pd->used);
+	}
+	spin_unlock(&pdp->lock);
+	goto out;
 
 unwind_pd:
-	if (!pd->used_pdes) {
-		gen8_ppgtt_set_pdpe(vm, pdp, vm->scratch_pd, pdpe);
-		GEM_BUG_ON(!pdp->used_pdpes);
-		pdp->used_pdpes--;
+	spin_lock(&pdp->lock);
+	if (atomic_dec_and_test(&pd->used)) {
+		gen8_ppgtt_set_pdpe(pdp, vm->scratch_pd, pdpe);
+		GEM_BUG_ON(!atomic_read(&pdp->used));
+		atomic_dec(&pdp->used);
 		free_pd(vm, pd);
 	}
+	spin_unlock(&pdp->lock);
 unwind:
 	gen8_ppgtt_clear_pdp(vm, pdp, from, start - from);
-	return -ENOMEM;
+out:
+	if (alloc)
+		free_pd(vm, alloc);
+	return ret;
 }
 
 static int gen8_ppgtt_alloc_3lvl(struct i915_address_space *vm,
 				 u64 start, u64 length)
 {
 	return gen8_ppgtt_alloc_pdp(vm,
-				    &i915_vm_to_ppgtt(vm)->pdp, start, length);
+				    i915_vm_to_ppgtt(vm)->pd, start, length);
 }
 
 static int gen8_ppgtt_alloc_4lvl(struct i915_address_space *vm,
 				 u64 start, u64 length)
 {
-	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
-	struct i915_pml4 *pml4 = &ppgtt->pml4;
-	struct i915_page_directory_pointer *pdp;
+	struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
+	struct i915_page_directory * const pml4 = ppgtt->pd;
+	struct i915_page_directory *pdp, *alloc = NULL;
 	u64 from = start;
+	int ret = 0;
 	u32 pml4e;
-	int ret;
 
+	spin_lock(&pml4->lock);
 	gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
-		if (pml4->pdps[pml4e] == vm->scratch_pdp) {
-			pdp = alloc_pdp(vm);
-			if (IS_ERR(pdp))
+		if (pdp == vm->scratch_pdp) {
+			spin_unlock(&pml4->lock);
+
+			pdp = fetch_and_zero(&alloc);
+			if (!pdp)
+				pdp = alloc_pd(vm);
+			if (IS_ERR(pdp)) {
+				ret = PTR_ERR(pdp);
 				goto unwind;
+			}
+
+			init_pd(vm, pdp, vm->scratch_pd);
 
-			gen8_initialize_pdp(vm, pdp);
-			gen8_ppgtt_set_pml4e(pml4, pdp, pml4e);
+			spin_lock(&pml4->lock);
+			if (pml4->entry[pml4e] == vm->scratch_pdp) {
+				gen8_ppgtt_set_pml4e(pml4, pdp, pml4e);
+				pml4->entry[pml4e] = pdp;
+			} else {
+				alloc = pdp;
+				pdp = pml4->entry[pml4e];
+			}
 		}
+		atomic_inc(&pdp->used);
+		spin_unlock(&pml4->lock);
 
 		ret = gen8_ppgtt_alloc_pdp(vm, pdp, start, length);
 		if (unlikely(ret))
 			goto unwind_pdp;
-	}
 
-	return 0;
+		spin_lock(&pml4->lock);
+		atomic_dec(&pdp->used);
+	}
+	spin_unlock(&pml4->lock);
+	goto out;
 
 unwind_pdp:
-	if (!pdp->used_pdpes) {
+	spin_lock(&pml4->lock);
+	if (atomic_dec_and_test(&pdp->used)) {
 		gen8_ppgtt_set_pml4e(pml4, vm->scratch_pdp, pml4e);
-		free_pdp(vm, pdp);
+		free_pd(vm, pdp);
 	}
+	spin_unlock(&pml4->lock);
 unwind:
 	gen8_ppgtt_clear_4lvl(vm, from, start - from);
-	return -ENOMEM;
+out:
+	if (alloc)
+		free_pd(vm, alloc);
+	return ret;
 }
 
-static int gen8_preallocate_top_level_pdp(struct i915_hw_ppgtt *ppgtt)
+static int gen8_preallocate_top_level_pdp(struct i915_ppgtt *ppgtt)
 {
 	struct i915_address_space *vm = &ppgtt->vm;
-	struct i915_page_directory_pointer *pdp = &ppgtt->pdp;
+	struct i915_page_directory *pdp = ppgtt->pd;
 	struct i915_page_directory *pd;
 	u64 start = 0, length = ppgtt->vm.total;
 	u64 from = start;
@@ -1495,29 +1540,29 @@ static int gen8_preallocate_top_level_pdp(struct i915_hw_ppgtt *ppgtt)
 		if (IS_ERR(pd))
 			goto unwind;
 
-		gen8_initialize_pd(vm, pd);
-		gen8_ppgtt_set_pdpe(vm, pdp, pd, pdpe);
-		pdp->used_pdpes++;
+		init_pd_with_page(vm, pd, vm->scratch_pt);
+		gen8_ppgtt_set_pdpe(pdp, pd, pdpe);
+
+		atomic_inc(&pdp->used);
 	}
 
-	pdp->used_pdpes++; /* never remove */
+	atomic_inc(&pdp->used); /* never remove */
+
 	return 0;
 
 unwind:
 	start -= from;
 	gen8_for_each_pdpe(pd, pdp, from, start, pdpe) {
-		gen8_ppgtt_set_pdpe(vm, pdp, vm->scratch_pd, pdpe);
+		gen8_ppgtt_set_pdpe(pdp, vm->scratch_pd, pdpe);
 		free_pd(vm, pd);
 	}
-	pdp->used_pdpes = 0;
+	atomic_set(&pdp->used, 0);
 	return -ENOMEM;
 }
 
 static void ppgtt_init(struct drm_i915_private *i915,
-		       struct i915_hw_ppgtt *ppgtt)
+		       struct i915_ppgtt *ppgtt)
 {
-	kref_init(&ppgtt->ref);
-
 	ppgtt->vm.i915 = i915;
 	ppgtt->vm.dma = &i915->drm.pdev->dev;
 	ppgtt->vm.total = BIT_ULL(INTEL_INFO(i915)->ppgtt_size);
@@ -1537,9 +1582,9 @@ static void ppgtt_init(struct drm_i915_private *i915,
  * space.
  *
  */
-static struct i915_hw_ppgtt *gen8_ppgtt_create(struct drm_i915_private *i915)
+static struct i915_ppgtt *gen8_ppgtt_create(struct drm_i915_private *i915)
 {
-	struct i915_hw_ppgtt *ppgtt;
+	struct i915_ppgtt *ppgtt;
 	int err;
 
 	ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL);
@@ -1566,27 +1611,34 @@ static struct i915_hw_ppgtt *gen8_ppgtt_create(struct drm_i915_private *i915)
 	if (err)
 		goto err_free;
 
+	ppgtt->pd = __alloc_pd();
+	if (!ppgtt->pd) {
+		err = -ENOMEM;
+		goto err_free_scratch;
+	}
+
 	if (i915_vm_is_4lvl(&ppgtt->vm)) {
-		err = setup_px(&ppgtt->vm, &ppgtt->pml4);
+		err = setup_px(&ppgtt->vm, ppgtt->pd);
 		if (err)
-			goto err_scratch;
+			goto err_free_pdp;
 
-		gen8_initialize_pml4(&ppgtt->vm, &ppgtt->pml4);
+		init_pd(&ppgtt->vm, ppgtt->pd, ppgtt->vm.scratch_pdp);
 
 		ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc_4lvl;
 		ppgtt->vm.insert_entries = gen8_ppgtt_insert_4lvl;
 		ppgtt->vm.clear_range = gen8_ppgtt_clear_4lvl;
 	} else {
-		err = __pdp_init(&ppgtt->vm, &ppgtt->pdp);
-		if (err)
-			goto err_scratch;
+		/*
+		 * We don't need to setup dma for top level pdp, only
+		 * for entries. So point entries to scratch.
+		 */
+		memset_p(ppgtt->pd->entry, ppgtt->vm.scratch_pd,
+			 GEN8_3LVL_PDPES);
 
 		if (intel_vgpu_active(i915)) {
 			err = gen8_preallocate_top_level_pdp(ppgtt);
-			if (err) {
-				__pdp_fini(&ppgtt->pdp);
-				goto err_scratch;
-			}
+			if (err)
+				goto err_free_pdp;
 		}
 
 		ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc_3lvl;
@@ -1601,7 +1653,9 @@ static struct i915_hw_ppgtt *gen8_ppgtt_create(struct drm_i915_private *i915)
 
 	return ppgtt;
 
-err_scratch:
+err_free_pdp:
+	free_pd(&ppgtt->vm, ppgtt->pd);
+err_free_scratch:
 	gen8_free_scratch(&ppgtt->vm);
 err_free:
 	kfree(ppgtt);
@@ -1609,7 +1663,7 @@ err_free:
 }
 
 /* Write pde (index) from the page directory @pd to the page table @pt */
-static inline void gen6_write_pde(const struct gen6_hw_ppgtt *ppgtt,
+static inline void gen6_write_pde(const struct gen6_ppgtt *ppgtt,
 				  const unsigned int pde,
 				  const struct i915_page_table *pt)
 {
@@ -1638,8 +1692,9 @@ static void gen7_ppgtt_enable(struct drm_i915_private *dev_priv)
 
 	for_each_engine(engine, dev_priv, id) {
 		/* GFX_MODE is per-ring on gen7+ */
-		I915_WRITE(RING_MODE_GEN7(engine),
-			   _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
+		ENGINE_WRITE(engine,
+			     RING_MODE_GEN7,
+			     _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
 	}
 }
 
@@ -1665,15 +1720,16 @@ static void gen6_ppgtt_enable(struct drm_i915_private *dev_priv)
 static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
 				   u64 start, u64 length)
 {
-	struct gen6_hw_ppgtt *ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm));
-	unsigned int first_entry = start / I915_GTT_PAGE_SIZE;
+	struct gen6_ppgtt * const ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm));
+	const unsigned int first_entry = start / I915_GTT_PAGE_SIZE;
+	const gen6_pte_t scratch_pte = vm->scratch_pte;
 	unsigned int pde = first_entry / GEN6_PTES;
 	unsigned int pte = first_entry % GEN6_PTES;
 	unsigned int num_entries = length / I915_GTT_PAGE_SIZE;
-	const gen6_pte_t scratch_pte = vm->scratch_pte;
 
 	while (num_entries) {
-		struct i915_page_table *pt = ppgtt->base.pd.page_table[pde++];
+		struct i915_page_table * const pt =
+			i915_pt_entry(ppgtt->base.pd, pde++);
 		const unsigned int count = min(num_entries, GEN6_PTES - pte);
 		gen6_pte_t *vaddr;
 
@@ -1681,9 +1737,8 @@ static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
 
 		num_entries -= count;
 
-		GEM_BUG_ON(count > pt->used_ptes);
-		pt->used_ptes -= count;
-		if (!pt->used_ptes)
+		GEM_BUG_ON(count > atomic_read(&pt->used));
+		if (!atomic_sub_return(count, &pt->used))
 			ppgtt->scan_for_unused_pt = true;
 
 		/*
@@ -1706,7 +1761,8 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
 				      enum i915_cache_level cache_level,
 				      u32 flags)
 {
-	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
+	struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
+	struct i915_page_directory * const pd = ppgtt->pd;
 	unsigned first_entry = vma->node.start / I915_GTT_PAGE_SIZE;
 	unsigned act_pt = first_entry / GEN6_PTES;
 	unsigned act_pte = first_entry % GEN6_PTES;
@@ -1714,9 +1770,9 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
 	struct sgt_dma iter = sgt_dma(vma);
 	gen6_pte_t *vaddr;
 
-	GEM_BUG_ON(ppgtt->pd.page_table[act_pt] == vm->scratch_pt);
+	GEM_BUG_ON(i915_pt_entry(pd, act_pt) == vm->scratch_pt);
 
-	vaddr = kmap_atomic_px(ppgtt->pd.page_table[act_pt]);
+	vaddr = kmap_atomic_px(i915_pt_entry(pd, act_pt));
 	do {
 		vaddr[act_pte] = pte_encode | GEN6_PTE_ADDR_ENCODE(iter.dma);
 
@@ -1732,7 +1788,7 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
 
 		if (++act_pte == GEN6_PTES) {
 			kunmap_atomic(vaddr);
-			vaddr = kmap_atomic_px(ppgtt->pd.page_table[++act_pt]);
+			vaddr = kmap_atomic_px(i915_pt_entry(pd, ++act_pt));
 			act_pte = 0;
 		}
 	} while (1);
@@ -1744,50 +1800,72 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
 static int gen6_alloc_va_range(struct i915_address_space *vm,
 			       u64 start, u64 length)
 {
-	struct gen6_hw_ppgtt *ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm));
-	struct i915_page_table *pt;
+	struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm));
+	struct i915_page_directory * const pd = ppgtt->base.pd;
+	struct i915_page_table *pt, *alloc = NULL;
+	intel_wakeref_t wakeref;
 	u64 from = start;
 	unsigned int pde;
 	bool flush = false;
+	int ret = 0;
 
-	gen6_for_each_pde(pt, &ppgtt->base.pd, start, length, pde) {
+	wakeref = intel_runtime_pm_get(&vm->i915->runtime_pm);
+
+	spin_lock(&pd->lock);
+	gen6_for_each_pde(pt, pd, start, length, pde) {
 		const unsigned int count = gen6_pte_count(start, length);
 
 		if (pt == vm->scratch_pt) {
-			pt = alloc_pt(vm);
-			if (IS_ERR(pt))
+			spin_unlock(&pd->lock);
+
+			pt = fetch_and_zero(&alloc);
+			if (!pt)
+				pt = alloc_pt(vm);
+			if (IS_ERR(pt)) {
+				ret = PTR_ERR(pt);
 				goto unwind_out;
+			}
 
 			gen6_initialize_pt(vm, pt);
-			ppgtt->base.pd.page_table[pde] = pt;
 
-			if (i915_vma_is_bound(ppgtt->vma,
-					      I915_VMA_GLOBAL_BIND)) {
-				gen6_write_pde(ppgtt, pde, pt);
-				flush = true;
+			spin_lock(&pd->lock);
+			if (pd->entry[pde] == vm->scratch_pt) {
+				pd->entry[pde] = pt;
+				if (i915_vma_is_bound(ppgtt->vma,
+						      I915_VMA_GLOBAL_BIND)) {
+					gen6_write_pde(ppgtt, pde, pt);
+					flush = true;
+				}
+			} else {
+				alloc = pt;
+				pt = pd->entry[pde];
 			}
-
-			GEM_BUG_ON(pt->used_ptes);
 		}
 
-		pt->used_ptes += count;
+		atomic_add(count, &pt->used);
 	}
+	spin_unlock(&pd->lock);
 
 	if (flush) {
 		mark_tlbs_dirty(&ppgtt->base);
-		gen6_ggtt_invalidate(ppgtt->base.vm.i915);
+		gen6_ggtt_invalidate(vm->i915);
 	}
 
-	return 0;
+	goto out;
 
 unwind_out:
 	gen6_ppgtt_clear_range(vm, from, start - from);
-	return -ENOMEM;
+out:
+	if (alloc)
+		free_pt(vm, alloc);
+	intel_runtime_pm_put(&vm->i915->runtime_pm, wakeref);
+	return ret;
 }
 
-static int gen6_ppgtt_init_scratch(struct gen6_hw_ppgtt *ppgtt)
+static int gen6_ppgtt_init_scratch(struct gen6_ppgtt *ppgtt)
 {
 	struct i915_address_space * const vm = &ppgtt->base.vm;
+	struct i915_page_directory * const pd = ppgtt->base.pd;
 	struct i915_page_table *unused;
 	u32 pde;
 	int ret;
@@ -1807,8 +1885,9 @@ static int gen6_ppgtt_init_scratch(struct gen6_hw_ppgtt *ppgtt)
 	}
 
 	gen6_initialize_pt(vm, vm->scratch_pt);
-	gen6_for_all_pdes(unused, &ppgtt->base.pd, pde)
-		ppgtt->base.pd.page_table[pde] = vm->scratch_pt;
+
+	gen6_for_all_pdes(unused, pd, pde)
+		pd->entry[pde] = vm->scratch_pt;
 
 	return 0;
 }
@@ -1819,24 +1898,77 @@ static void gen6_ppgtt_free_scratch(struct i915_address_space *vm)
 	cleanup_scratch_page(vm);
 }
 
-static void gen6_ppgtt_free_pd(struct gen6_hw_ppgtt *ppgtt)
+static void gen6_ppgtt_free_pd(struct gen6_ppgtt *ppgtt)
 {
+	struct i915_page_directory * const pd = ppgtt->base.pd;
 	struct i915_page_table *pt;
 	u32 pde;
 
-	gen6_for_all_pdes(pt, &ppgtt->base.pd, pde)
+	gen6_for_all_pdes(pt, pd, pde)
 		if (pt != ppgtt->base.vm.scratch_pt)
 			free_pt(&ppgtt->base.vm, pt);
 }
 
+struct gen6_ppgtt_cleanup_work {
+	struct work_struct base;
+	struct i915_vma *vma;
+};
+
+static void gen6_ppgtt_cleanup_work(struct work_struct *wrk)
+{
+	struct gen6_ppgtt_cleanup_work *work =
+		container_of(wrk, typeof(*work), base);
+	/* Side note, vma->vm is the GGTT not the ppgtt we just destroyed! */
+	struct drm_i915_private *i915 = work->vma->vm->i915;
+
+	mutex_lock(&i915->drm.struct_mutex);
+	i915_vma_destroy(work->vma);
+	mutex_unlock(&i915->drm.struct_mutex);
+
+	kfree(work);
+}
+
+static int nop_set_pages(struct i915_vma *vma)
+{
+	return -ENODEV;
+}
+
+static void nop_clear_pages(struct i915_vma *vma)
+{
+}
+
+static int nop_bind(struct i915_vma *vma,
+		    enum i915_cache_level cache_level,
+		    u32 unused)
+{
+	return -ENODEV;
+}
+
+static void nop_unbind(struct i915_vma *vma)
+{
+}
+
+static const struct i915_vma_ops nop_vma_ops = {
+	.set_pages = nop_set_pages,
+	.clear_pages = nop_clear_pages,
+	.bind_vma = nop_bind,
+	.unbind_vma = nop_unbind,
+};
+
 static void gen6_ppgtt_cleanup(struct i915_address_space *vm)
 {
-	struct gen6_hw_ppgtt *ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm));
+	struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm));
+	struct gen6_ppgtt_cleanup_work *work = ppgtt->work;
 
-	i915_vma_destroy(ppgtt->vma);
+	/* FIXME remove the struct_mutex to bring the locking under control */
+	INIT_WORK(&work->base, gen6_ppgtt_cleanup_work);
+	work->vma = ppgtt->vma;
+	work->vma->ops = &nop_vma_ops;
+	schedule_work(&work->base);
 
 	gen6_ppgtt_free_pd(ppgtt);
 	gen6_ppgtt_free_scratch(vm);
+	kfree(ppgtt->base.pd);
 }
 
 static int pd_vma_set_pages(struct i915_vma *vma)
@@ -1857,15 +1989,15 @@ static int pd_vma_bind(struct i915_vma *vma,
 		       u32 unused)
 {
 	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vma->vm);
-	struct gen6_hw_ppgtt *ppgtt = vma->private;
+	struct gen6_ppgtt *ppgtt = vma->private;
 	u32 ggtt_offset = i915_ggtt_offset(vma) / I915_GTT_PAGE_SIZE;
 	struct i915_page_table *pt;
 	unsigned int pde;
 
-	ppgtt->base.pd.base.ggtt_offset = ggtt_offset * sizeof(gen6_pte_t);
+	ppgtt->base.pd->base.ggtt_offset = ggtt_offset * sizeof(gen6_pte_t);
 	ppgtt->pd_addr = (gen6_pte_t __iomem *)ggtt->gsm + ggtt_offset;
 
-	gen6_for_all_pdes(pt, &ppgtt->base.pd, pde)
+	gen6_for_all_pdes(pt, ppgtt->base.pd, pde)
 		gen6_write_pde(ppgtt, pde, pt);
 
 	mark_tlbs_dirty(&ppgtt->base);
@@ -1876,7 +2008,8 @@ static int pd_vma_bind(struct i915_vma *vma,
 
 static void pd_vma_unbind(struct i915_vma *vma)
 {
-	struct gen6_hw_ppgtt *ppgtt = vma->private;
+	struct gen6_ppgtt *ppgtt = vma->private;
+	struct i915_page_directory * const pd = ppgtt->base.pd;
 	struct i915_page_table * const scratch_pt = ppgtt->base.vm.scratch_pt;
 	struct i915_page_table *pt;
 	unsigned int pde;
@@ -1885,12 +2018,12 @@ static void pd_vma_unbind(struct i915_vma *vma)
 		return;
 
 	/* Free all no longer used page tables */
-	gen6_for_all_pdes(pt, &ppgtt->base.pd, pde) {
-		if (pt->used_ptes || pt == scratch_pt)
+	gen6_for_all_pdes(pt, ppgtt->base.pd, pde) {
+		if (atomic_read(&pt->used) || pt == scratch_pt)
 			continue;
 
 		free_pt(&ppgtt->base.vm, pt);
-		ppgtt->base.pd.page_table[pde] = scratch_pt;
+		pd->entry[pde] = scratch_pt;
 	}
 
 	ppgtt->scan_for_unused_pt = false;
@@ -1903,7 +2036,7 @@ static const struct i915_vma_ops pd_vma_ops = {
 	.unbind_vma = pd_vma_unbind,
 };
 
-static struct i915_vma *pd_vma_create(struct gen6_hw_ppgtt *ppgtt, int size)
+static struct i915_vma *pd_vma_create(struct gen6_ppgtt *ppgtt, int size)
 {
 	struct drm_i915_private *i915 = ppgtt->base.vm.i915;
 	struct i915_ggtt *ggtt = &i915->ggtt;
@@ -1929,6 +2062,7 @@ static struct i915_vma *pd_vma_create(struct gen6_hw_ppgtt *ppgtt, int size)
 	vma->ggtt_view.type = I915_GGTT_VIEW_ROTATED; /* prevent fencing */
 
 	INIT_LIST_HEAD(&vma->obj_link);
+	INIT_LIST_HEAD(&vma->closed_link);
 
 	mutex_lock(&vma->vm->mutex);
 	list_add(&vma->vm_link, &vma->vm->unbound_list);
@@ -1937,9 +2071,9 @@ static struct i915_vma *pd_vma_create(struct gen6_hw_ppgtt *ppgtt, int size)
 	return vma;
 }
 
-int gen6_ppgtt_pin(struct i915_hw_ppgtt *base)
+int gen6_ppgtt_pin(struct i915_ppgtt *base)
 {
-	struct gen6_hw_ppgtt *ppgtt = to_gen6_ppgtt(base);
+	struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base);
 	int err;
 
 	GEM_BUG_ON(ppgtt->base.vm.closed);
@@ -1971,9 +2105,9 @@ unpin:
 	return err;
 }
 
-void gen6_ppgtt_unpin(struct i915_hw_ppgtt *base)
+void gen6_ppgtt_unpin(struct i915_ppgtt *base)
 {
-	struct gen6_hw_ppgtt *ppgtt = to_gen6_ppgtt(base);
+	struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base);
 
 	GEM_BUG_ON(!ppgtt->pin_count);
 	if (--ppgtt->pin_count)
@@ -1982,9 +2116,9 @@ void gen6_ppgtt_unpin(struct i915_hw_ppgtt *base)
 	i915_vma_unpin(ppgtt->vma);
 }
 
-void gen6_ppgtt_unpin_all(struct i915_hw_ppgtt *base)
+void gen6_ppgtt_unpin_all(struct i915_ppgtt *base)
 {
-	struct gen6_hw_ppgtt *ppgtt = to_gen6_ppgtt(base);
+	struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base);
 
 	if (!ppgtt->pin_count)
 		return;
@@ -1993,10 +2127,10 @@ void gen6_ppgtt_unpin_all(struct i915_hw_ppgtt *base)
 	i915_vma_unpin(ppgtt->vma);
 }
 
-static struct i915_hw_ppgtt *gen6_ppgtt_create(struct drm_i915_private *i915)
+static struct i915_ppgtt *gen6_ppgtt_create(struct drm_i915_private *i915)
 {
 	struct i915_ggtt * const ggtt = &i915->ggtt;
-	struct gen6_hw_ppgtt *ppgtt;
+	struct gen6_ppgtt *ppgtt;
 	int err;
 
 	ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL);
@@ -2012,9 +2146,21 @@ static struct i915_hw_ppgtt *gen6_ppgtt_create(struct drm_i915_private *i915)
 
 	ppgtt->base.vm.pte_encode = ggtt->vm.pte_encode;
 
+	ppgtt->work = kmalloc(sizeof(*ppgtt->work), GFP_KERNEL);
+	if (!ppgtt->work) {
+		err = -ENOMEM;
+		goto err_free;
+	}
+
+	ppgtt->base.pd = __alloc_pd();
+	if (!ppgtt->base.pd) {
+		err = -ENOMEM;
+		goto err_work;
+	}
+
 	err = gen6_ppgtt_init_scratch(ppgtt);
 	if (err)
-		goto err_free;
+		goto err_pd;
 
 	ppgtt->vma = pd_vma_create(ppgtt, GEN6_PD_SIZE);
 	if (IS_ERR(ppgtt->vma)) {
@@ -2026,6 +2172,10 @@ static struct i915_hw_ppgtt *gen6_ppgtt_create(struct drm_i915_private *i915)
 
 err_scratch:
 	gen6_ppgtt_free_scratch(&ppgtt->base.vm);
+err_pd:
+	kfree(ppgtt->base.pd);
+err_work:
+	kfree(ppgtt->work);
 err_free:
 	kfree(ppgtt);
 	return ERR_PTR(err);
@@ -2077,8 +2227,8 @@ int i915_ppgtt_init_hw(struct drm_i915_private *dev_priv)
 	return 0;
 }
 
-static struct i915_hw_ppgtt *
-__hw_ppgtt_create(struct drm_i915_private *i915)
+static struct i915_ppgtt *
+__ppgtt_create(struct drm_i915_private *i915)
 {
 	if (INTEL_GEN(i915) < 8)
 		return gen6_ppgtt_create(i915);
@@ -2086,12 +2236,12 @@ __hw_ppgtt_create(struct drm_i915_private *i915)
 		return gen8_ppgtt_create(i915);
 }
 
-struct i915_hw_ppgtt *
+struct i915_ppgtt *
 i915_ppgtt_create(struct drm_i915_private *i915)
 {
-	struct i915_hw_ppgtt *ppgtt;
+	struct i915_ppgtt *ppgtt;
 
-	ppgtt = __hw_ppgtt_create(i915);
+	ppgtt = __ppgtt_create(i915);
 	if (IS_ERR(ppgtt))
 		return ppgtt;
 
@@ -2117,21 +2267,23 @@ static void ppgtt_destroy_vma(struct i915_address_space *vm)
 	}
 }
 
-void i915_ppgtt_release(struct kref *kref)
+void i915_vm_release(struct kref *kref)
 {
-	struct i915_hw_ppgtt *ppgtt =
-		container_of(kref, struct i915_hw_ppgtt, ref);
+	struct i915_address_space *vm =
+		container_of(kref, struct i915_address_space, ref);
 
-	trace_i915_ppgtt_release(&ppgtt->vm);
+	GEM_BUG_ON(i915_is_ggtt(vm));
+	trace_i915_ppgtt_release(vm);
 
-	ppgtt_destroy_vma(&ppgtt->vm);
+	ppgtt_destroy_vma(vm);
 
-	GEM_BUG_ON(!list_empty(&ppgtt->vm.bound_list));
-	GEM_BUG_ON(!list_empty(&ppgtt->vm.unbound_list));
+	GEM_BUG_ON(!list_empty(&vm->bound_list));
+	GEM_BUG_ON(!list_empty(&vm->unbound_list));
 
-	ppgtt->vm.cleanup(&ppgtt->vm);
-	i915_address_space_fini(&ppgtt->vm);
-	kfree(ppgtt);
+	vm->cleanup(vm);
+	i915_address_space_fini(vm);
+
+	kfree(vm);
 }
 
 /* Certain Gen5 chipsets require require idling the GPU before
@@ -2145,69 +2297,6 @@ static bool needs_idle_maps(struct drm_i915_private *dev_priv)
 	return IS_GEN(dev_priv, 5) && IS_MOBILE(dev_priv) && intel_vtd_active();
 }
 
-static void gen6_check_faults(struct drm_i915_private *dev_priv)
-{
-	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
-	u32 fault;
-
-	for_each_engine(engine, dev_priv, id) {
-		fault = I915_READ(RING_FAULT_REG(engine));
-		if (fault & RING_FAULT_VALID) {
-			DRM_DEBUG_DRIVER("Unexpected fault\n"
-					 "\tAddr: 0x%08lx\n"
-					 "\tAddress space: %s\n"
-					 "\tSource ID: %d\n"
-					 "\tType: %d\n",
-					 fault & PAGE_MASK,
-					 fault & RING_FAULT_GTTSEL_MASK ? "GGTT" : "PPGTT",
-					 RING_FAULT_SRCID(fault),
-					 RING_FAULT_FAULT_TYPE(fault));
-		}
-	}
-}
-
-static void gen8_check_faults(struct drm_i915_private *dev_priv)
-{
-	u32 fault = I915_READ(GEN8_RING_FAULT_REG);
-
-	if (fault & RING_FAULT_VALID) {
-		u32 fault_data0, fault_data1;
-		u64 fault_addr;
-
-		fault_data0 = I915_READ(GEN8_FAULT_TLB_DATA0);
-		fault_data1 = I915_READ(GEN8_FAULT_TLB_DATA1);
-		fault_addr = ((u64)(fault_data1 & FAULT_VA_HIGH_BITS) << 44) |
-			     ((u64)fault_data0 << 12);
-
-		DRM_DEBUG_DRIVER("Unexpected fault\n"
-				 "\tAddr: 0x%08x_%08x\n"
-				 "\tAddress space: %s\n"
-				 "\tEngine ID: %d\n"
-				 "\tSource ID: %d\n"
-				 "\tType: %d\n",
-				 upper_32_bits(fault_addr),
-				 lower_32_bits(fault_addr),
-				 fault_data1 & FAULT_GTT_SEL ? "GGTT" : "PPGTT",
-				 GEN8_RING_FAULT_ENGINE_ID(fault),
-				 RING_FAULT_SRCID(fault),
-				 RING_FAULT_FAULT_TYPE(fault));
-	}
-}
-
-void i915_check_and_clear_faults(struct drm_i915_private *dev_priv)
-{
-	/* From GEN8 onwards we only have one 'All Engine Fault Register' */
-	if (INTEL_GEN(dev_priv) >= 8)
-		gen8_check_faults(dev_priv);
-	else if (INTEL_GEN(dev_priv) >= 6)
-		gen6_check_faults(dev_priv);
-	else
-		return;
-
-	i915_clear_error_registers(dev_priv);
-}
-
 void i915_gem_suspend_gtt_mappings(struct drm_i915_private *dev_priv)
 {
 	struct i915_ggtt *ggtt = &dev_priv->ggtt;
@@ -2526,7 +2615,7 @@ static int ggtt_bind_vma(struct i915_vma *vma,
 	if (i915_gem_object_is_readonly(obj))
 		pte_flags |= PTE_READ_ONLY;
 
-	with_intel_runtime_pm(i915, wakeref)
+	with_intel_runtime_pm(&i915->runtime_pm, wakeref)
 		vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags);
 
 	vma->page_sizes.gtt = I915_GTT_PAGE_SIZE;
@@ -2546,7 +2635,7 @@ static void ggtt_unbind_vma(struct i915_vma *vma)
 	struct drm_i915_private *i915 = vma->vm->i915;
 	intel_wakeref_t wakeref;
 
-	with_intel_runtime_pm(i915, wakeref)
+	with_intel_runtime_pm(&i915->runtime_pm, wakeref)
 		vma->vm->clear_range(vma->vm, vma->node.start, vma->size);
 }
 
@@ -2564,7 +2653,7 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma,
 		pte_flags |= PTE_READ_ONLY;
 
 	if (flags & I915_VMA_LOCAL_BIND) {
-		struct i915_hw_ppgtt *appgtt = i915->mm.aliasing_ppgtt;
+		struct i915_ppgtt *appgtt = i915->mm.aliasing_ppgtt;
 
 		if (!(vma->flags & I915_VMA_LOCAL_BIND)) {
 			ret = appgtt->vm.allocate_va_range(&appgtt->vm,
@@ -2581,7 +2670,7 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma,
 	if (flags & I915_VMA_GLOBAL_BIND) {
 		intel_wakeref_t wakeref;
 
-		with_intel_runtime_pm(i915, wakeref) {
+		with_intel_runtime_pm(&i915->runtime_pm, wakeref) {
 			vma->vm->insert_entries(vma->vm, vma,
 						cache_level, pte_flags);
 		}
@@ -2598,7 +2687,7 @@ static void aliasing_gtt_unbind_vma(struct i915_vma *vma)
 		struct i915_address_space *vm = vma->vm;
 		intel_wakeref_t wakeref;
 
-		with_intel_runtime_pm(i915, wakeref)
+		with_intel_runtime_pm(&i915->runtime_pm, wakeref)
 			vm->clear_range(vm, vma->node.start, vma->size);
 	}
 
@@ -2660,10 +2749,10 @@ static void i915_gtt_color_adjust(const struct drm_mm_node *node,
 		*end -= I915_GTT_PAGE_SIZE;
 }
 
-int i915_gem_init_aliasing_ppgtt(struct drm_i915_private *i915)
+static int init_aliasing_ppgtt(struct drm_i915_private *i915)
 {
 	struct i915_ggtt *ggtt = &i915->ggtt;
-	struct i915_hw_ppgtt *ppgtt;
+	struct i915_ppgtt *ppgtt;
 	int err;
 
 	ppgtt = i915_ppgtt_create(i915);
@@ -2696,25 +2785,51 @@ int i915_gem_init_aliasing_ppgtt(struct drm_i915_private *i915)
 	return 0;
 
 err_ppgtt:
-	i915_ppgtt_put(ppgtt);
+	i915_vm_put(&ppgtt->vm);
 	return err;
 }
 
-void i915_gem_fini_aliasing_ppgtt(struct drm_i915_private *i915)
+static void fini_aliasing_ppgtt(struct drm_i915_private *i915)
 {
 	struct i915_ggtt *ggtt = &i915->ggtt;
-	struct i915_hw_ppgtt *ppgtt;
+	struct i915_ppgtt *ppgtt;
 
 	ppgtt = fetch_and_zero(&i915->mm.aliasing_ppgtt);
 	if (!ppgtt)
 		return;
 
-	i915_ppgtt_put(ppgtt);
+	i915_vm_put(&ppgtt->vm);
 
 	ggtt->vm.vma_ops.bind_vma   = ggtt_bind_vma;
 	ggtt->vm.vma_ops.unbind_vma = ggtt_unbind_vma;
 }
 
+static int ggtt_reserve_guc_top(struct i915_ggtt *ggtt)
+{
+	u64 size;
+	int ret;
+
+	if (!USES_GUC(ggtt->vm.i915))
+		return 0;
+
+	GEM_BUG_ON(ggtt->vm.total <= GUC_GGTT_TOP);
+	size = ggtt->vm.total - GUC_GGTT_TOP;
+
+	ret = i915_gem_gtt_reserve(&ggtt->vm, &ggtt->uc_fw, size,
+				   GUC_GGTT_TOP, I915_COLOR_UNEVICTABLE,
+				   PIN_NOEVICT);
+	if (ret)
+		DRM_DEBUG_DRIVER("Failed to reserve top of GGTT for GuC\n");
+
+	return ret;
+}
+
+static void ggtt_release_guc_top(struct i915_ggtt *ggtt)
+{
+	if (drm_mm_node_allocated(&ggtt->uc_fw))
+		drm_mm_remove_node(&ggtt->uc_fw);
+}
+
 int i915_gem_init_ggtt(struct drm_i915_private *dev_priv)
 {
 	/* Let GEM Manage all of the aperture.
@@ -2738,7 +2853,7 @@ int i915_gem_init_ggtt(struct drm_i915_private *dev_priv)
 	 * why.
 	 */
 	ggtt->pin_bias = max_t(u32, I915_GTT_PAGE_SIZE,
-			       intel_guc_reserved_gtt_size(&dev_priv->guc));
+			       intel_wopcm_guc_size(&dev_priv->wopcm));
 
 	ret = intel_vgt_balloon(dev_priv);
 	if (ret)
@@ -2752,6 +2867,15 @@ int i915_gem_init_ggtt(struct drm_i915_private *dev_priv)
 	if (ret)
 		return ret;
 
+	/*
+	 * The upper portion of the GuC address space has a sizeable hole
+	 * (several MB) that is inaccessible by GuC. Reserve this range within
+	 * GGTT as it can comfortably hold GuC/HuC firmware images.
+	 */
+	ret = ggtt_reserve_guc_top(ggtt);
+	if (ret)
+		goto err_reserve;
+
 	/* Clear any non-preallocated blocks */
 	drm_mm_for_each_hole(entry, &ggtt->vm.mm, hole_start, hole_end) {
 		DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n",
@@ -2764,14 +2888,16 @@ int i915_gem_init_ggtt(struct drm_i915_private *dev_priv)
 	ggtt->vm.clear_range(&ggtt->vm, ggtt->vm.total - PAGE_SIZE, PAGE_SIZE);
 
 	if (INTEL_PPGTT(dev_priv) == INTEL_PPGTT_ALIASING) {
-		ret = i915_gem_init_aliasing_ppgtt(dev_priv);
+		ret = init_aliasing_ppgtt(dev_priv);
 		if (ret)
-			goto err;
+			goto err_appgtt;
 	}
 
 	return 0;
 
-err:
+err_appgtt:
+	ggtt_release_guc_top(ggtt);
+err_reserve:
 	drm_mm_remove_node(&ggtt->error_capture);
 	return ret;
 }
@@ -2789,7 +2915,7 @@ void i915_ggtt_cleanup_hw(struct drm_i915_private *dev_priv)
 	ggtt->vm.closed = true;
 
 	mutex_lock(&dev_priv->drm.struct_mutex);
-	i915_gem_fini_aliasing_ppgtt(dev_priv);
+	fini_aliasing_ppgtt(dev_priv);
 
 	list_for_each_entry_safe(vma, vn, &ggtt->vm.bound_list, vm_link)
 		WARN_ON(i915_vma_unbind(vma));
@@ -2797,6 +2923,8 @@ void i915_ggtt_cleanup_hw(struct drm_i915_private *dev_priv)
 	if (drm_mm_node_allocated(&ggtt->error_capture))
 		drm_mm_remove_node(&ggtt->error_capture);
 
+	ggtt_release_guc_top(ggtt);
+
 	if (drm_mm_initialized(&ggtt->vm.mm)) {
 		intel_vgt_deballoon(dev_priv);
 		i915_address_space_fini(&ggtt->vm);
@@ -3280,7 +3408,9 @@ static int gen6_gmch_probe(struct i915_ggtt *ggtt)
 	size = gen6_get_total_gtt_size(snb_gmch_ctl);
 	ggtt->vm.total = (size / sizeof(gen6_pte_t)) * I915_GTT_PAGE_SIZE;
 
-	ggtt->vm.clear_range = gen6_ggtt_clear_range;
+	ggtt->vm.clear_range = nop_clear_range;
+	if (!HAS_FULL_PPGTT(dev_priv) || intel_scanout_needs_vtd_wa(dev_priv))
+		ggtt->vm.clear_range = gen6_ggtt_clear_range;
 	ggtt->vm.insert_page = gen6_ggtt_insert_page;
 	ggtt->vm.insert_entries = gen6_ggtt_insert_entries;
 	ggtt->vm.cleanup = gen6_gmch_remove;
@@ -3369,17 +3499,6 @@ int i915_ggtt_probe_hw(struct drm_i915_private *dev_priv)
 	if (ret)
 		return ret;
 
-	/* Trim the GGTT to fit the GuC mappable upper range (when enabled).
-	 * This is easier than doing range restriction on the fly, as we
-	 * currently don't have any bits spare to pass in this upper
-	 * restriction!
-	 */
-	if (USES_GUC(dev_priv)) {
-		ggtt->vm.total = min_t(u64, ggtt->vm.total, GUC_GGTT_TOP);
-		ggtt->mappable_end =
-			min_t(u64, ggtt->mappable_end, ggtt->vm.total);
-	}
-
 	if ((ggtt->vm.total - 1) >> 32) {
 		DRM_ERROR("We never expected a Global GTT with more than 32bits"
 			  " of address space! Found %lldM!\n",
@@ -3444,6 +3563,8 @@ int i915_ggtt_init_hw(struct drm_i915_private *dev_priv)
 
 	ggtt->mtrr = arch_phys_wc_add(ggtt->gmadr.start, ggtt->mappable_end);
 
+	i915_ggtt_init_fences(ggtt);
+
 	/*
 	 * Initialise stolen early so that we may reserve preallocated
 	 * objects for the BIOS to KMS transition.
@@ -3518,8 +3639,11 @@ void i915_gem_restore_gtt_mappings(struct drm_i915_private *dev_priv)
 		WARN_ON(i915_vma_bind(vma,
 				      obj ? obj->cache_level : 0,
 				      PIN_UPDATE));
-		if (obj)
+		if (obj) {
+			i915_gem_object_lock(obj);
 			WARN_ON(i915_gem_object_set_to_gtt_domain(obj, false));
+			i915_gem_object_unlock(obj);
+		}
 
 lock:
 		mutex_lock(&ggtt->vm.mutex);
@@ -3608,6 +3732,89 @@ err_st_alloc:
 	return ERR_PTR(ret);
 }
 
+static struct scatterlist *
+remap_pages(struct drm_i915_gem_object *obj, unsigned int offset,
+	    unsigned int width, unsigned int height,
+	    unsigned int stride,
+	    struct sg_table *st, struct scatterlist *sg)
+{
+	unsigned int row;
+
+	for (row = 0; row < height; row++) {
+		unsigned int left = width * I915_GTT_PAGE_SIZE;
+
+		while (left) {
+			dma_addr_t addr;
+			unsigned int length;
+
+			/* We don't need the pages, but need to initialize
+			 * the entries so the sg list can be happily traversed.
+			 * The only thing we need are DMA addresses.
+			 */
+
+			addr = i915_gem_object_get_dma_address_len(obj, offset, &length);
+
+			length = min(left, length);
+
+			st->nents++;
+
+			sg_set_page(sg, NULL, length, 0);
+			sg_dma_address(sg) = addr;
+			sg_dma_len(sg) = length;
+			sg = sg_next(sg);
+
+			offset += length / I915_GTT_PAGE_SIZE;
+			left -= length;
+		}
+
+		offset += stride - width;
+	}
+
+	return sg;
+}
+
+static noinline struct sg_table *
+intel_remap_pages(struct intel_remapped_info *rem_info,
+		  struct drm_i915_gem_object *obj)
+{
+	unsigned int size = intel_remapped_info_size(rem_info);
+	struct sg_table *st;
+	struct scatterlist *sg;
+	int ret = -ENOMEM;
+	int i;
+
+	/* Allocate target SG list. */
+	st = kmalloc(sizeof(*st), GFP_KERNEL);
+	if (!st)
+		goto err_st_alloc;
+
+	ret = sg_alloc_table(st, size, GFP_KERNEL);
+	if (ret)
+		goto err_sg_alloc;
+
+	st->nents = 0;
+	sg = st->sgl;
+
+	for (i = 0 ; i < ARRAY_SIZE(rem_info->plane); i++) {
+		sg = remap_pages(obj, rem_info->plane[i].offset,
+				 rem_info->plane[i].width, rem_info->plane[i].height,
+				 rem_info->plane[i].stride, st, sg);
+	}
+
+	i915_sg_trim(st);
+
+	return st;
+
+err_sg_alloc:
+	kfree(st);
+err_st_alloc:
+
+	DRM_DEBUG_DRIVER("Failed to create remapped mapping for object size %zu! (%ux%u tiles, %u pages)\n",
+			 obj->base.size, rem_info->plane[0].width, rem_info->plane[0].height, size);
+
+	return ERR_PTR(ret);
+}
+
 static noinline struct sg_table *
 intel_partial_pages(const struct i915_ggtt_view *view,
 		    struct drm_i915_gem_object *obj)
@@ -3686,6 +3893,11 @@ i915_get_ggtt_vma_pages(struct i915_vma *vma)
 			intel_rotate_pages(&vma->ggtt_view.rotated, vma->obj);
 		break;
 
+	case I915_GGTT_VIEW_REMAPPED:
+		vma->pages =
+			intel_remap_pages(&vma->ggtt_view.remapped, vma->obj);
+		break;
+
 	case I915_GGTT_VIEW_PARTIAL:
 		vma->pages = intel_partial_pages(&vma->ggtt_view, vma->obj);
 		break;
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index f597f35b109b..812717ccc69b 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -38,8 +38,10 @@
 #include <linux/mm.h>
 #include <linux/pagevec.h>
 
+#include "gt/intel_reset.h"
+#include "i915_gem_fence_reg.h"
 #include "i915_request.h"
-#include "i915_reset.h"
+#include "i915_scatterlist.h"
 #include "i915_selftest.h"
 #include "i915_timeline.h"
 
@@ -60,7 +62,7 @@
 #define I915_MAX_NUM_FENCE_BITS 6
 
 struct drm_i915_file_private;
-struct drm_i915_fence_reg;
+struct drm_i915_gem_object;
 struct i915_vma;
 
 typedef u32 gen6_pte_t;
@@ -161,13 +163,21 @@ typedef u64 gen8_ppgtt_pml4e_t;
 #define GEN8_PDE_IPS_64K BIT(11)
 #define GEN8_PDE_PS_2M   BIT(7)
 
-struct sg_table;
+#define for_each_sgt_dma(__dmap, __iter, __sgt) \
+	__for_each_sgt_dma(__dmap, __iter, __sgt, I915_GTT_PAGE_SIZE)
+
+struct intel_remapped_plane_info {
+	/* in gtt pages */
+	unsigned int width, height, stride, offset;
+} __packed;
+
+struct intel_remapped_info {
+	struct intel_remapped_plane_info plane[2];
+	unsigned int unused_mbz;
+} __packed;
 
 struct intel_rotation_info {
-	struct intel_rotation_plane_info {
-		/* tiles */
-		unsigned int width, height, stride, offset;
-	} plane[2];
+	struct intel_remapped_plane_info plane[2];
 } __packed;
 
 struct intel_partial_info {
@@ -179,12 +189,20 @@ enum i915_ggtt_view_type {
 	I915_GGTT_VIEW_NORMAL = 0,
 	I915_GGTT_VIEW_ROTATED = sizeof(struct intel_rotation_info),
 	I915_GGTT_VIEW_PARTIAL = sizeof(struct intel_partial_info),
+	I915_GGTT_VIEW_REMAPPED = sizeof(struct intel_remapped_info),
 };
 
 static inline void assert_i915_gem_gtt_types(void)
 {
 	BUILD_BUG_ON(sizeof(struct intel_rotation_info) != 8*sizeof(unsigned int));
 	BUILD_BUG_ON(sizeof(struct intel_partial_info) != sizeof(u64) + sizeof(unsigned int));
+	BUILD_BUG_ON(sizeof(struct intel_remapped_info) != 9*sizeof(unsigned int));
+
+	/* Check that rotation/remapped shares offsets for simplicity */
+	BUILD_BUG_ON(offsetof(struct intel_remapped_info, plane[0]) !=
+		     offsetof(struct intel_rotation_info, plane[0]));
+	BUILD_BUG_ON(offsetofend(struct intel_remapped_info, plane[1]) !=
+		     offsetofend(struct intel_rotation_info, plane[1]));
 
 	/* As we encode the size of each branch inside the union into its type,
 	 * we have to be careful that each branch has a unique size.
@@ -193,6 +211,7 @@ static inline void assert_i915_gem_gtt_types(void)
 	case I915_GGTT_VIEW_NORMAL:
 	case I915_GGTT_VIEW_PARTIAL:
 	case I915_GGTT_VIEW_ROTATED:
+	case I915_GGTT_VIEW_REMAPPED:
 		/* gcc complains if these are identical cases */
 		break;
 	}
@@ -204,6 +223,7 @@ struct i915_ggtt_view {
 		/* Members need to contain no holes/padding */
 		struct intel_partial_info partial;
 		struct intel_rotation_info rotated;
+		struct intel_remapped_info remapped;
 	};
 };
 
@@ -228,25 +248,14 @@ struct i915_page_dma {
 
 struct i915_page_table {
 	struct i915_page_dma base;
-	unsigned int used_ptes;
+	atomic_t used;
 };
 
 struct i915_page_directory {
 	struct i915_page_dma base;
-
-	struct i915_page_table *page_table[I915_PDES]; /* PDEs */
-	unsigned int used_pdes;
-};
-
-struct i915_page_directory_pointer {
-	struct i915_page_dma base;
-	struct i915_page_directory **page_directory;
-	unsigned int used_pdpes;
-};
-
-struct i915_pml4 {
-	struct i915_page_dma base;
-	struct i915_page_directory_pointer *pdps[GEN8_PML4ES_PER_PML4];
+	atomic_t used;
+	spinlock_t lock;
+	void *entry[512];
 };
 
 struct i915_vma_ops {
@@ -270,6 +279,8 @@ struct pagestash {
 };
 
 struct i915_address_space {
+	struct kref ref;
+
 	struct drm_mm mm;
 	struct drm_i915_private *i915;
 	struct device *dma;
@@ -296,7 +307,7 @@ struct i915_address_space {
 	struct i915_page_dma scratch_page;
 	struct i915_page_table *scratch_pt;
 	struct i915_page_directory *scratch_pd;
-	struct i915_page_directory_pointer *scratch_pdp; /* GEN8+ & 48b PPGTT */
+	struct i915_page_directory *scratch_pdp; /* GEN8+ & 48b PPGTT */
 
 	/**
 	 * List of vma currently bound.
@@ -383,38 +394,46 @@ struct i915_ggtt {
 
 	u32 pin_bias;
 
+	unsigned int num_fences;
+	struct i915_fence_reg fence_regs[I915_MAX_NUM_FENCES];
+	struct list_head fence_list;
+
+	/** List of all objects in gtt_space, currently mmaped by userspace.
+	 * All objects within this list must also be on bound_list.
+	 */
+	struct list_head userfault_list;
+
+	/* Manual runtime pm autosuspend delay for user GGTT mmaps */
+	struct intel_wakeref_auto userfault_wakeref;
+
 	struct drm_mm_node error_capture;
+	struct drm_mm_node uc_fw;
 };
 
-struct i915_hw_ppgtt {
+struct i915_ppgtt {
 	struct i915_address_space vm;
-	struct kref ref;
 
 	intel_engine_mask_t pd_dirty_engines;
-	union {
-		struct i915_pml4 pml4;		/* GEN8+ & 48b PPGTT */
-		struct i915_page_directory_pointer pdp;	/* GEN8+ */
-		struct i915_page_directory pd;		/* GEN6-7 */
-	};
-
-	u32 user_handle;
+	struct i915_page_directory *pd;
 };
 
-struct gen6_hw_ppgtt {
-	struct i915_hw_ppgtt base;
+struct gen6_ppgtt {
+	struct i915_ppgtt base;
 
 	struct i915_vma *vma;
 	gen6_pte_t __iomem *pd_addr;
 
 	unsigned int pin_count;
 	bool scan_for_unused_pt;
+
+	struct gen6_ppgtt_cleanup_work *work;
 };
 
-#define __to_gen6_ppgtt(base) container_of(base, struct gen6_hw_ppgtt, base)
+#define __to_gen6_ppgtt(base) container_of(base, struct gen6_ppgtt, base)
 
-static inline struct gen6_hw_ppgtt *to_gen6_ppgtt(struct i915_hw_ppgtt *base)
+static inline struct gen6_ppgtt *to_gen6_ppgtt(struct i915_ppgtt *base)
 {
-	BUILD_BUG_ON(offsetof(struct gen6_hw_ppgtt, base));
+	BUILD_BUG_ON(offsetof(struct gen6_ppgtt, base));
 	return __to_gen6_ppgtt(base);
 }
 
@@ -429,7 +448,7 @@ static inline struct gen6_hw_ppgtt *to_gen6_ppgtt(struct i915_hw_ppgtt *base)
 #define gen6_for_each_pde(pt, pd, start, length, iter)			\
 	for (iter = gen6_pde_index(start);				\
 	     length > 0 && iter < I915_PDES &&				\
-		(pt = (pd)->page_table[iter], true);			\
+		     (pt = i915_pt_entry(pd, iter), true);		\
 	     ({ u32 temp = ALIGN(start+1, 1 << GEN6_PDE_SHIFT);		\
 		    temp = min(temp - start, length);			\
 		    start += temp, length -= temp; }), ++iter)
@@ -437,7 +456,7 @@ static inline struct gen6_hw_ppgtt *to_gen6_ppgtt(struct i915_hw_ppgtt *base)
 #define gen6_for_all_pdes(pt, pd, iter)					\
 	for (iter = 0;							\
 	     iter < I915_PDES &&					\
-		(pt = (pd)->page_table[iter], true);			\
+		     (pt = i915_pt_entry(pd, iter), true);		\
 	     ++iter)
 
 static inline u32 i915_pte_index(u64 address, unsigned int pde_shift)
@@ -496,6 +515,27 @@ i915_pdpes_per_pdp(const struct i915_address_space *vm)
 	return GEN8_3LVL_PDPES;
 }
 
+static inline struct i915_page_table *
+i915_pt_entry(const struct i915_page_directory * const pd,
+	      const unsigned short n)
+{
+	return pd->entry[n];
+}
+
+static inline struct i915_page_directory *
+i915_pd_entry(const struct i915_page_directory * const pdp,
+	      const unsigned short n)
+{
+	return pdp->entry[n];
+}
+
+static inline struct i915_page_directory *
+i915_pdp_entry(const struct i915_page_directory * const pml4,
+	       const unsigned short n)
+{
+	return pml4->entry[n];
+}
+
 /* Equivalent to the gen6 version, For each pde iterates over every pde
  * between from start until start + length. On gen8+ it simply iterates
  * over every page directory entry in a page directory.
@@ -503,7 +543,7 @@ i915_pdpes_per_pdp(const struct i915_address_space *vm)
 #define gen8_for_each_pde(pt, pd, start, length, iter)			\
 	for (iter = gen8_pde_index(start);				\
 	     length > 0 && iter < I915_PDES &&				\
-		(pt = (pd)->page_table[iter], true);			\
+		     (pt = i915_pt_entry(pd, iter), true);		\
 	     ({ u64 temp = ALIGN(start+1, 1 << GEN8_PDE_SHIFT);		\
 		    temp = min(temp - start, length);			\
 		    start += temp, length -= temp; }), ++iter)
@@ -511,7 +551,7 @@ i915_pdpes_per_pdp(const struct i915_address_space *vm)
 #define gen8_for_each_pdpe(pd, pdp, start, length, iter)		\
 	for (iter = gen8_pdpe_index(start);				\
 	     length > 0 && iter < i915_pdpes_per_pdp(vm) &&		\
-		(pd = (pdp)->page_directory[iter], true);		\
+		     (pd = i915_pd_entry(pdp, iter), true);		\
 	     ({ u64 temp = ALIGN(start+1, 1 << GEN8_PDPE_SHIFT);	\
 		    temp = min(temp - start, length);			\
 		    start += temp, length -= temp; }), ++iter)
@@ -519,7 +559,7 @@ i915_pdpes_per_pdp(const struct i915_address_space *vm)
 #define gen8_for_each_pml4e(pdp, pml4, start, length, iter)		\
 	for (iter = gen8_pml4e_index(start);				\
 	     length > 0 && iter < GEN8_PML4ES_PER_PML4 &&		\
-		(pdp = (pml4)->pdps[iter], true);			\
+		     (pdp = i915_pdp_entry(pml4, iter), true);		\
 	     ({ u64 temp = ALIGN(start+1, 1ULL << GEN8_PML4E_SHIFT);	\
 		    temp = min(temp - start, length);			\
 		    start += temp, length -= temp; }), ++iter)
@@ -550,18 +590,30 @@ static inline u64 gen8_pte_count(u64 address, u64 length)
 }
 
 static inline dma_addr_t
-i915_page_dir_dma_addr(const struct i915_hw_ppgtt *ppgtt, const unsigned n)
+i915_page_dir_dma_addr(const struct i915_ppgtt *ppgtt, const unsigned int n)
 {
-	return px_dma(ppgtt->pdp.page_directory[n]);
+	struct i915_page_directory *pd;
+
+	pd = i915_pdp_entry(ppgtt->pd, n);
+	return px_dma(pd);
 }
 
 static inline struct i915_ggtt *
 i915_vm_to_ggtt(struct i915_address_space *vm)
 {
+	BUILD_BUG_ON(offsetof(struct i915_ggtt, vm));
 	GEM_BUG_ON(!i915_is_ggtt(vm));
 	return container_of(vm, struct i915_ggtt, vm);
 }
 
+static inline struct i915_ppgtt *
+i915_vm_to_ppgtt(struct i915_address_space *vm)
+{
+	BUILD_BUG_ON(offsetof(struct i915_ppgtt, vm));
+	GEM_BUG_ON(i915_is_ggtt(vm));
+	return container_of(vm, struct i915_ppgtt, vm);
+}
+
 #define INTEL_MAX_PPAT_ENTRIES 8
 #define INTEL_PPAT_PERFECT_MATCH (~0U)
 
@@ -593,9 +645,6 @@ const struct intel_ppat_entry *
 intel_ppat_get(struct drm_i915_private *i915, u8 value);
 void intel_ppat_put(const struct intel_ppat_entry *entry);
 
-int i915_gem_init_aliasing_ppgtt(struct drm_i915_private *i915);
-void i915_gem_fini_aliasing_ppgtt(struct drm_i915_private *i915);
-
 int i915_ggtt_probe_hw(struct drm_i915_private *dev_priv);
 int i915_ggtt_init_hw(struct drm_i915_private *dev_priv);
 int i915_ggtt_enable_hw(struct drm_i915_private *dev_priv);
@@ -606,26 +655,26 @@ void i915_ggtt_cleanup_hw(struct drm_i915_private *dev_priv);
 
 int i915_ppgtt_init_hw(struct drm_i915_private *dev_priv);
 
-struct i915_hw_ppgtt *i915_ppgtt_create(struct drm_i915_private *dev_priv);
-void i915_ppgtt_release(struct kref *kref);
+struct i915_ppgtt *i915_ppgtt_create(struct drm_i915_private *dev_priv);
 
-static inline struct i915_hw_ppgtt *i915_ppgtt_get(struct i915_hw_ppgtt *ppgtt)
+static inline struct i915_address_space *
+i915_vm_get(struct i915_address_space *vm)
 {
-	kref_get(&ppgtt->ref);
-	return ppgtt;
+	kref_get(&vm->ref);
+	return vm;
 }
 
-static inline void i915_ppgtt_put(struct i915_hw_ppgtt *ppgtt)
+void i915_vm_release(struct kref *kref);
+
+static inline void i915_vm_put(struct i915_address_space *vm)
 {
-	if (ppgtt)
-		kref_put(&ppgtt->ref, i915_ppgtt_release);
+	kref_put(&vm->ref, i915_vm_release);
 }
 
-int gen6_ppgtt_pin(struct i915_hw_ppgtt *base);
-void gen6_ppgtt_unpin(struct i915_hw_ppgtt *base);
-void gen6_ppgtt_unpin_all(struct i915_hw_ppgtt *base);
+int gen6_ppgtt_pin(struct i915_ppgtt *base);
+void gen6_ppgtt_unpin(struct i915_ppgtt *base);
+void gen6_ppgtt_unpin_all(struct i915_ppgtt *base);
 
-void i915_check_and_clear_faults(struct drm_i915_private *dev_priv);
 void i915_gem_suspend_gtt_mappings(struct drm_i915_private *dev_priv);
 void i915_gem_restore_gtt_mappings(struct drm_i915_private *dev_priv);
 
diff --git a/drivers/gpu/drm/i915/i915_gem_object.c b/drivers/gpu/drm/i915/i915_gem_object.c
deleted file mode 100644
index ac6a5ab84586..000000000000
--- a/drivers/gpu/drm/i915/i915_gem_object.c
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- * Copyright © 2017 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- */
-
-#include "i915_drv.h"
-#include "i915_gem_object.h"
-#include "i915_globals.h"
-
-static struct i915_global_object {
-	struct i915_global base;
-	struct kmem_cache *slab_objects;
-} global;
-
-struct drm_i915_gem_object *i915_gem_object_alloc(void)
-{
-	return kmem_cache_zalloc(global.slab_objects, GFP_KERNEL);
-}
-
-void i915_gem_object_free(struct drm_i915_gem_object *obj)
-{
-	return kmem_cache_free(global.slab_objects, obj);
-}
-
-/**
- * Mark up the object's coherency levels for a given cache_level
- * @obj: #drm_i915_gem_object
- * @cache_level: cache level
- */
-void i915_gem_object_set_cache_coherency(struct drm_i915_gem_object *obj,
-					 unsigned int cache_level)
-{
-	obj->cache_level = cache_level;
-
-	if (cache_level != I915_CACHE_NONE)
-		obj->cache_coherent = (I915_BO_CACHE_COHERENT_FOR_READ |
-				       I915_BO_CACHE_COHERENT_FOR_WRITE);
-	else if (HAS_LLC(to_i915(obj->base.dev)))
-		obj->cache_coherent = I915_BO_CACHE_COHERENT_FOR_READ;
-	else
-		obj->cache_coherent = 0;
-
-	obj->cache_dirty =
-		!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE);
-}
-
-static void i915_global_objects_shrink(void)
-{
-	kmem_cache_shrink(global.slab_objects);
-}
-
-static void i915_global_objects_exit(void)
-{
-	kmem_cache_destroy(global.slab_objects);
-}
-
-static struct i915_global_object global = { {
-	.shrink = i915_global_objects_shrink,
-	.exit = i915_global_objects_exit,
-} };
-
-int __init i915_global_objects_init(void)
-{
-	global.slab_objects =
-		KMEM_CACHE(drm_i915_gem_object, SLAB_HWCACHE_ALIGN);
-	if (!global.slab_objects)
-		return -ENOMEM;
-
-	i915_global_register(&global.base);
-	return 0;
-}
diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h
deleted file mode 100644
index ca93a40c0c87..000000000000
--- a/drivers/gpu/drm/i915/i915_gem_object.h
+++ /dev/null
@@ -1,509 +0,0 @@
-/*
- * Copyright © 2016 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- */
-
-#ifndef __I915_GEM_OBJECT_H__
-#define __I915_GEM_OBJECT_H__
-
-#include <linux/reservation.h>
-
-#include <drm/drm_vma_manager.h>
-#include <drm/drm_gem.h>
-#include <drm/drm_file.h>
-#include <drm/drm_device.h>
-
-#include <drm/i915_drm.h>
-
-#include "i915_request.h"
-#include "i915_selftest.h"
-
-struct drm_i915_gem_object;
-
-/*
- * struct i915_lut_handle tracks the fast lookups from handle to vma used
- * for execbuf. Although we use a radixtree for that mapping, in order to
- * remove them as the object or context is closed, we need a secondary list
- * and a translation entry (i915_lut_handle).
- */
-struct i915_lut_handle {
-	struct list_head obj_link;
-	struct list_head ctx_link;
-	struct i915_gem_context *ctx;
-	u32 handle;
-};
-
-struct drm_i915_gem_object_ops {
-	unsigned int flags;
-#define I915_GEM_OBJECT_HAS_STRUCT_PAGE	BIT(0)
-#define I915_GEM_OBJECT_IS_SHRINKABLE	BIT(1)
-#define I915_GEM_OBJECT_IS_PROXY	BIT(2)
-#define I915_GEM_OBJECT_ASYNC_CANCEL	BIT(3)
-
-	/* Interface between the GEM object and its backing storage.
-	 * get_pages() is called once prior to the use of the associated set
-	 * of pages before to binding them into the GTT, and put_pages() is
-	 * called after we no longer need them. As we expect there to be
-	 * associated cost with migrating pages between the backing storage
-	 * and making them available for the GPU (e.g. clflush), we may hold
-	 * onto the pages after they are no longer referenced by the GPU
-	 * in case they may be used again shortly (for example migrating the
-	 * pages to a different memory domain within the GTT). put_pages()
-	 * will therefore most likely be called when the object itself is
-	 * being released or under memory pressure (where we attempt to
-	 * reap pages for the shrinker).
-	 */
-	int (*get_pages)(struct drm_i915_gem_object *);
-	void (*put_pages)(struct drm_i915_gem_object *, struct sg_table *);
-
-	int (*pwrite)(struct drm_i915_gem_object *,
-		      const struct drm_i915_gem_pwrite *);
-
-	int (*dmabuf_export)(struct drm_i915_gem_object *);
-	void (*release)(struct drm_i915_gem_object *);
-};
-
-struct drm_i915_gem_object {
-	struct drm_gem_object base;
-
-	const struct drm_i915_gem_object_ops *ops;
-
-	struct {
-		/**
-		 * @vma.lock: protect the list/tree of vmas
-		 */
-		spinlock_t lock;
-
-		/**
-		 * @vma.list: List of VMAs backed by this object
-		 *
-		 * The VMA on this list are ordered by type, all GGTT vma are
-		 * placed at the head and all ppGTT vma are placed at the tail.
-		 * The different types of GGTT vma are unordered between
-		 * themselves, use the @vma.tree (which has a defined order
-		 * between all VMA) to quickly find an exact match.
-		 */
-		struct list_head list;
-
-		/**
-		 * @vma.tree: Ordered tree of VMAs backed by this object
-		 *
-		 * All VMA created for this object are placed in the @vma.tree
-		 * for fast retrieval via a binary search in
-		 * i915_vma_instance(). They are also added to @vma.list for
-		 * easy iteration.
-		 */
-		struct rb_root tree;
-	} vma;
-
-	/**
-	 * @lut_list: List of vma lookup entries in use for this object.
-	 *
-	 * If this object is closed, we need to remove all of its VMA from
-	 * the fast lookup index in associated contexts; @lut_list provides
-	 * this translation from object to context->handles_vma.
-	 */
-	struct list_head lut_list;
-
-	/** Stolen memory for this object, instead of being backed by shmem. */
-	struct drm_mm_node *stolen;
-	union {
-		struct rcu_head rcu;
-		struct llist_node freed;
-	};
-
-	/**
-	 * Whether the object is currently in the GGTT mmap.
-	 */
-	unsigned int userfault_count;
-	struct list_head userfault_link;
-
-	struct list_head batch_pool_link;
-	I915_SELFTEST_DECLARE(struct list_head st_link);
-
-	unsigned long flags;
-
-	/**
-	 * Have we taken a reference for the object for incomplete GPU
-	 * activity?
-	 */
-#define I915_BO_ACTIVE_REF 0
-
-	/*
-	 * Is the object to be mapped as read-only to the GPU
-	 * Only honoured if hardware has relevant pte bit
-	 */
-	unsigned int cache_level:3;
-	unsigned int cache_coherent:2;
-#define I915_BO_CACHE_COHERENT_FOR_READ BIT(0)
-#define I915_BO_CACHE_COHERENT_FOR_WRITE BIT(1)
-	unsigned int cache_dirty:1;
-
-	/**
-	 * @read_domains: Read memory domains.
-	 *
-	 * These monitor which caches contain read/write data related to the
-	 * object. When transitioning from one set of domains to another,
-	 * the driver is called to ensure that caches are suitably flushed and
-	 * invalidated.
-	 */
-	u16 read_domains;
-
-	/**
-	 * @write_domain: Corresponding unique write memory domain.
-	 */
-	u16 write_domain;
-
-	atomic_t frontbuffer_bits;
-	unsigned int frontbuffer_ggtt_origin; /* write once */
-	struct i915_active_request frontbuffer_write;
-
-	/** Current tiling stride for the object, if it's tiled. */
-	unsigned int tiling_and_stride;
-#define FENCE_MINIMUM_STRIDE 128 /* See i915_tiling_ok() */
-#define TILING_MASK (FENCE_MINIMUM_STRIDE-1)
-#define STRIDE_MASK (~TILING_MASK)
-
-	/** Count of VMA actually bound by this object */
-	unsigned int bind_count;
-	unsigned int active_count;
-	/** Count of how many global VMA are currently pinned for use by HW */
-	unsigned int pin_global;
-
-	struct {
-		struct mutex lock; /* protects the pages and their use */
-		atomic_t pages_pin_count;
-
-		struct sg_table *pages;
-		void *mapping;
-
-		/* TODO: whack some of this into the error state */
-		struct i915_page_sizes {
-			/**
-			 * The sg mask of the pages sg_table. i.e the mask of
-			 * of the lengths for each sg entry.
-			 */
-			unsigned int phys;
-
-			/**
-			 * The gtt page sizes we are allowed to use given the
-			 * sg mask and the supported page sizes. This will
-			 * express the smallest unit we can use for the whole
-			 * object, as well as the larger sizes we may be able
-			 * to use opportunistically.
-			 */
-			unsigned int sg;
-
-			/**
-			 * The actual gtt page size usage. Since we can have
-			 * multiple vma associated with this object we need to
-			 * prevent any trampling of state, hence a copy of this
-			 * struct also lives in each vma, therefore the gtt
-			 * value here should only be read/write through the vma.
-			 */
-			unsigned int gtt;
-		} page_sizes;
-
-		I915_SELFTEST_DECLARE(unsigned int page_mask);
-
-		struct i915_gem_object_page_iter {
-			struct scatterlist *sg_pos;
-			unsigned int sg_idx; /* in pages, but 32bit eek! */
-
-			struct radix_tree_root radix;
-			struct mutex lock; /* protects this cache */
-		} get_page;
-
-		/**
-		 * Element within i915->mm.unbound_list or i915->mm.bound_list,
-		 * locked by i915->mm.obj_lock.
-		 */
-		struct list_head link;
-
-		/**
-		 * Advice: are the backing pages purgeable?
-		 */
-		unsigned int madv:2;
-
-		/**
-		 * This is set if the object has been written to since the
-		 * pages were last acquired.
-		 */
-		bool dirty:1;
-
-		/**
-		 * This is set if the object has been pinned due to unknown
-		 * swizzling.
-		 */
-		bool quirked:1;
-	} mm;
-
-	/** Breadcrumb of last rendering to the buffer.
-	 * There can only be one writer, but we allow for multiple readers.
-	 * If there is a writer that necessarily implies that all other
-	 * read requests are complete - but we may only be lazily clearing
-	 * the read requests. A read request is naturally the most recent
-	 * request on a ring, so we may have two different write and read
-	 * requests on one ring where the write request is older than the
-	 * read request. This allows for the CPU to read from an active
-	 * buffer by only waiting for the write to complete.
-	 */
-	struct reservation_object *resv;
-
-	/** References from framebuffers, locks out tiling changes. */
-	unsigned int framebuffer_references;
-
-	/** Record of address bit 17 of each page at last unbind. */
-	unsigned long *bit_17;
-
-	union {
-		struct i915_gem_userptr {
-			uintptr_t ptr;
-
-			struct i915_mm_struct *mm;
-			struct i915_mmu_object *mmu_object;
-			struct work_struct *work;
-		} userptr;
-
-		unsigned long scratch;
-
-		void *gvt_info;
-	};
-
-	/** for phys allocated objects */
-	struct drm_dma_handle *phys_handle;
-
-	struct reservation_object __builtin_resv;
-};
-
-static inline struct drm_i915_gem_object *
-to_intel_bo(struct drm_gem_object *gem)
-{
-	/* Assert that to_intel_bo(NULL) == NULL */
-	BUILD_BUG_ON(offsetof(struct drm_i915_gem_object, base));
-
-	return container_of(gem, struct drm_i915_gem_object, base);
-}
-
-struct drm_i915_gem_object *i915_gem_object_alloc(void);
-void i915_gem_object_free(struct drm_i915_gem_object *obj);
-
-/**
- * i915_gem_object_lookup_rcu - look up a temporary GEM object from its handle
- * @filp: DRM file private date
- * @handle: userspace handle
- *
- * Returns:
- *
- * A pointer to the object named by the handle if such exists on @filp, NULL
- * otherwise. This object is only valid whilst under the RCU read lock, and
- * note carefully the object may be in the process of being destroyed.
- */
-static inline struct drm_i915_gem_object *
-i915_gem_object_lookup_rcu(struct drm_file *file, u32 handle)
-{
-#ifdef CONFIG_LOCKDEP
-	WARN_ON(debug_locks && !lock_is_held(&rcu_lock_map));
-#endif
-	return idr_find(&file->object_idr, handle);
-}
-
-static inline struct drm_i915_gem_object *
-i915_gem_object_lookup(struct drm_file *file, u32 handle)
-{
-	struct drm_i915_gem_object *obj;
-
-	rcu_read_lock();
-	obj = i915_gem_object_lookup_rcu(file, handle);
-	if (obj && !kref_get_unless_zero(&obj->base.refcount))
-		obj = NULL;
-	rcu_read_unlock();
-
-	return obj;
-}
-
-__deprecated
-extern struct drm_gem_object *
-drm_gem_object_lookup(struct drm_file *file, u32 handle);
-
-__attribute__((nonnull))
-static inline struct drm_i915_gem_object *
-i915_gem_object_get(struct drm_i915_gem_object *obj)
-{
-	drm_gem_object_get(&obj->base);
-	return obj;
-}
-
-__attribute__((nonnull))
-static inline void
-i915_gem_object_put(struct drm_i915_gem_object *obj)
-{
-	__drm_gem_object_put(&obj->base);
-}
-
-static inline void i915_gem_object_lock(struct drm_i915_gem_object *obj)
-{
-	reservation_object_lock(obj->resv, NULL);
-}
-
-static inline void i915_gem_object_unlock(struct drm_i915_gem_object *obj)
-{
-	reservation_object_unlock(obj->resv);
-}
-
-static inline void
-i915_gem_object_set_readonly(struct drm_i915_gem_object *obj)
-{
-	obj->base.vma_node.readonly = true;
-}
-
-static inline bool
-i915_gem_object_is_readonly(const struct drm_i915_gem_object *obj)
-{
-	return obj->base.vma_node.readonly;
-}
-
-static inline bool
-i915_gem_object_has_struct_page(const struct drm_i915_gem_object *obj)
-{
-	return obj->ops->flags & I915_GEM_OBJECT_HAS_STRUCT_PAGE;
-}
-
-static inline bool
-i915_gem_object_is_shrinkable(const struct drm_i915_gem_object *obj)
-{
-	return obj->ops->flags & I915_GEM_OBJECT_IS_SHRINKABLE;
-}
-
-static inline bool
-i915_gem_object_is_proxy(const struct drm_i915_gem_object *obj)
-{
-	return obj->ops->flags & I915_GEM_OBJECT_IS_PROXY;
-}
-
-static inline bool
-i915_gem_object_needs_async_cancel(const struct drm_i915_gem_object *obj)
-{
-	return obj->ops->flags & I915_GEM_OBJECT_ASYNC_CANCEL;
-}
-
-static inline bool
-i915_gem_object_is_active(const struct drm_i915_gem_object *obj)
-{
-	return obj->active_count;
-}
-
-static inline bool
-i915_gem_object_has_active_reference(const struct drm_i915_gem_object *obj)
-{
-	return test_bit(I915_BO_ACTIVE_REF, &obj->flags);
-}
-
-static inline void
-i915_gem_object_set_active_reference(struct drm_i915_gem_object *obj)
-{
-	lockdep_assert_held(&obj->base.dev->struct_mutex);
-	__set_bit(I915_BO_ACTIVE_REF, &obj->flags);
-}
-
-static inline void
-i915_gem_object_clear_active_reference(struct drm_i915_gem_object *obj)
-{
-	lockdep_assert_held(&obj->base.dev->struct_mutex);
-	__clear_bit(I915_BO_ACTIVE_REF, &obj->flags);
-}
-
-void __i915_gem_object_release_unless_active(struct drm_i915_gem_object *obj);
-
-static inline bool
-i915_gem_object_is_framebuffer(const struct drm_i915_gem_object *obj)
-{
-	return READ_ONCE(obj->framebuffer_references);
-}
-
-static inline unsigned int
-i915_gem_object_get_tiling(const struct drm_i915_gem_object *obj)
-{
-	return obj->tiling_and_stride & TILING_MASK;
-}
-
-static inline bool
-i915_gem_object_is_tiled(const struct drm_i915_gem_object *obj)
-{
-	return i915_gem_object_get_tiling(obj) != I915_TILING_NONE;
-}
-
-static inline unsigned int
-i915_gem_object_get_stride(const struct drm_i915_gem_object *obj)
-{
-	return obj->tiling_and_stride & STRIDE_MASK;
-}
-
-static inline unsigned int
-i915_gem_tile_height(unsigned int tiling)
-{
-	GEM_BUG_ON(!tiling);
-	return tiling == I915_TILING_Y ? 32 : 8;
-}
-
-static inline unsigned int
-i915_gem_object_get_tile_height(const struct drm_i915_gem_object *obj)
-{
-	return i915_gem_tile_height(i915_gem_object_get_tiling(obj));
-}
-
-static inline unsigned int
-i915_gem_object_get_tile_row_size(const struct drm_i915_gem_object *obj)
-{
-	return (i915_gem_object_get_stride(obj) *
-		i915_gem_object_get_tile_height(obj));
-}
-
-int i915_gem_object_set_tiling(struct drm_i915_gem_object *obj,
-			       unsigned int tiling, unsigned int stride);
-
-static inline struct intel_engine_cs *
-i915_gem_object_last_write_engine(struct drm_i915_gem_object *obj)
-{
-	struct intel_engine_cs *engine = NULL;
-	struct dma_fence *fence;
-
-	rcu_read_lock();
-	fence = reservation_object_get_excl_rcu(obj->resv);
-	rcu_read_unlock();
-
-	if (fence && dma_fence_is_i915(fence) && !dma_fence_is_signaled(fence))
-		engine = to_request(fence)->engine;
-	dma_fence_put(fence);
-
-	return engine;
-}
-
-void i915_gem_object_set_cache_coherency(struct drm_i915_gem_object *obj,
-					 unsigned int cache_level);
-void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj);
-
-void __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj,
-				     struct sg_table *pages,
-				     bool needs_clflush);
-
-#endif
diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c
index 9440024c763f..4ee032072d4f 100644
--- a/drivers/gpu/drm/i915/i915_gem_render_state.c
+++ b/drivers/gpu/drm/i915/i915_gem_render_state.c
@@ -84,7 +84,7 @@ static int render_state_setup(struct intel_render_state *so,
 	u32 *d;
 	int ret;
 
-	ret = i915_gem_obj_prepare_shmem_write(so->obj, &needs_clflush);
+	ret = i915_gem_object_prepare_write(so->obj, &needs_clflush);
 	if (ret)
 		return ret;
 
@@ -166,7 +166,7 @@ static int render_state_setup(struct intel_render_state *so,
 
 	ret = 0;
 out:
-	i915_gem_obj_finish_shmem_access(so->obj);
+	i915_gem_object_finish_access(so->obj);
 	return ret;
 
 err:
@@ -222,12 +222,14 @@ int i915_gem_render_state_emit(struct i915_request *rq)
 			goto err_unpin;
 	}
 
+	i915_vma_lock(so.vma);
 	err = i915_vma_move_to_active(so.vma, rq, 0);
+	i915_vma_unlock(so.vma);
 err_unpin:
 	i915_vma_unpin(so.vma);
 err_vma:
 	i915_vma_close(so.vma);
 err_obj:
-	__i915_gem_object_release_unless_active(so.obj);
+	i915_gem_object_put(so.obj);
 	return err;
 }
diff --git a/drivers/gpu/drm/i915/i915_globals.c b/drivers/gpu/drm/i915/i915_globals.c
index 81e5c2ce336b..2d5fcba98841 100644
--- a/drivers/gpu/drm/i915/i915_globals.c
+++ b/drivers/gpu/drm/i915/i915_globals.c
@@ -8,8 +8,8 @@
 #include <linux/workqueue.h>
 
 #include "i915_active.h"
-#include "i915_gem_context.h"
-#include "i915_gem_object.h"
+#include "gem/i915_gem_context.h"
+#include "gem/i915_gem_object.h"
 #include "i915_globals.h"
 #include "i915_request.h"
 #include "i915_scheduler.h"
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index f51ff683dd2e..b7e9fddef270 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -36,8 +36,15 @@
 
 #include <drm/drm_print.h>
 
-#include "i915_gpu_error.h"
+#include "display/intel_atomic.h"
+#include "display/intel_overlay.h"
+
+#include "gem/i915_gem_context.h"
+
 #include "i915_drv.h"
+#include "i915_gpu_error.h"
+#include "i915_scatterlist.h"
+#include "intel_csr.h"
 
 static inline const struct intel_engine_cs *
 engine_lookup(const struct drm_i915_private *i915, unsigned int id)
@@ -1117,17 +1124,23 @@ static u32 i915_error_generate_code(struct i915_gpu_state *error,
 static void gem_record_fences(struct i915_gpu_state *error)
 {
 	struct drm_i915_private *dev_priv = error->i915;
+	struct intel_uncore *uncore = &dev_priv->uncore;
 	int i;
 
 	if (INTEL_GEN(dev_priv) >= 6) {
-		for (i = 0; i < dev_priv->num_fence_regs; i++)
-			error->fence[i] = I915_READ64(FENCE_REG_GEN6_LO(i));
+		for (i = 0; i < dev_priv->ggtt.num_fences; i++)
+			error->fence[i] =
+				intel_uncore_read64(uncore,
+						    FENCE_REG_GEN6_LO(i));
 	} else if (INTEL_GEN(dev_priv) >= 4) {
-		for (i = 0; i < dev_priv->num_fence_regs; i++)
-			error->fence[i] = I915_READ64(FENCE_REG_965_LO(i));
+		for (i = 0; i < dev_priv->ggtt.num_fences; i++)
+			error->fence[i] =
+				intel_uncore_read64(uncore,
+						    FENCE_REG_965_LO(i));
 	} else {
-		for (i = 0; i < dev_priv->num_fence_regs; i++)
-			error->fence[i] = I915_READ(FENCE_REG(i));
+		for (i = 0; i < dev_priv->ggtt.num_fences; i++)
+			error->fence[i] =
+				intel_uncore_read(uncore, FENCE_REG(i));
 	}
 	error->nfence = i;
 }
@@ -1143,7 +1156,7 @@ static void error_record_engine_registers(struct i915_gpu_state *error,
 		if (INTEL_GEN(dev_priv) >= 8)
 			ee->fault_reg = I915_READ(GEN8_RING_FAULT_REG);
 		else
-			ee->fault_reg = I915_READ(RING_FAULT_REG(engine));
+			ee->fault_reg = GEN6_RING_FAULT_REG_READ(engine);
 	}
 
 	if (INTEL_GEN(dev_priv) >= 4) {
@@ -1213,7 +1226,7 @@ static void error_record_engine_registers(struct i915_gpu_state *error,
 	if (HAS_PPGTT(dev_priv)) {
 		int i;
 
-		ee->vm_info.gfx_mode = I915_READ(RING_MODE_GEN7(engine));
+		ee->vm_info.gfx_mode = ENGINE_READ(engine, RING_MODE_GEN7);
 
 		if (IS_GEN(dev_priv, 6)) {
 			ee->vm_info.pp_dir_base =
@@ -1263,7 +1276,7 @@ static void engine_record_requests(struct intel_engine_cs *engine,
 
 	count = 0;
 	request = first;
-	list_for_each_entry_from(request, &engine->timeline.requests, link)
+	list_for_each_entry_from(request, &engine->active.requests, sched.link)
 		count++;
 	if (!count)
 		return;
@@ -1276,7 +1289,8 @@ static void engine_record_requests(struct intel_engine_cs *engine,
 
 	count = 0;
 	request = first;
-	list_for_each_entry_from(request, &engine->timeline.requests, link) {
+	list_for_each_entry_from(request,
+				 &engine->active.requests, sched.link) {
 		if (count >= ee->num_requests) {
 			/*
 			 * If the ring request list was changed in
@@ -1419,7 +1433,7 @@ static void gem_record_rings(struct i915_gpu_state *error)
 			struct i915_gem_context *ctx = request->gem_context;
 			struct intel_ring *ring;
 
-			ee->vm = ctx->ppgtt ? &ctx->ppgtt->vm : &ggtt->vm;
+			ee->vm = ctx->vm ?: &ggtt->vm;
 
 			record_context(&ee->context, ctx);
 
@@ -1564,7 +1578,8 @@ static void capture_uc_state(struct i915_gpu_state *error)
 /* Capture all registers which don't fit into another category. */
 static void capture_reg_state(struct i915_gpu_state *error)
 {
-	struct drm_i915_private *dev_priv = error->i915;
+	struct drm_i915_private *i915 = error->i915;
+	struct intel_uncore *uncore = &i915->uncore;
 	int i;
 
 	/* General organization
@@ -1576,71 +1591,84 @@ static void capture_reg_state(struct i915_gpu_state *error)
 	 */
 
 	/* 1: Registers specific to a single generation */
-	if (IS_VALLEYVIEW(dev_priv)) {
-		error->gtier[0] = I915_READ(GTIER);
-		error->ier = I915_READ(VLV_IER);
-		error->forcewake = I915_READ_FW(FORCEWAKE_VLV);
+	if (IS_VALLEYVIEW(i915)) {
+		error->gtier[0] = intel_uncore_read(uncore, GTIER);
+		error->ier = intel_uncore_read(uncore, VLV_IER);
+		error->forcewake = intel_uncore_read_fw(uncore, FORCEWAKE_VLV);
 	}
 
-	if (IS_GEN(dev_priv, 7))
-		error->err_int = I915_READ(GEN7_ERR_INT);
+	if (IS_GEN(i915, 7))
+		error->err_int = intel_uncore_read(uncore, GEN7_ERR_INT);
 
-	if (INTEL_GEN(dev_priv) >= 8) {
-		error->fault_data0 = I915_READ(GEN8_FAULT_TLB_DATA0);
-		error->fault_data1 = I915_READ(GEN8_FAULT_TLB_DATA1);
+	if (INTEL_GEN(i915) >= 8) {
+		error->fault_data0 = intel_uncore_read(uncore,
+						       GEN8_FAULT_TLB_DATA0);
+		error->fault_data1 = intel_uncore_read(uncore,
+						       GEN8_FAULT_TLB_DATA1);
 	}
 
-	if (IS_GEN(dev_priv, 6)) {
-		error->forcewake = I915_READ_FW(FORCEWAKE);
-		error->gab_ctl = I915_READ(GAB_CTL);
-		error->gfx_mode = I915_READ(GFX_MODE);
+	if (IS_GEN(i915, 6)) {
+		error->forcewake = intel_uncore_read_fw(uncore, FORCEWAKE);
+		error->gab_ctl = intel_uncore_read(uncore, GAB_CTL);
+		error->gfx_mode = intel_uncore_read(uncore, GFX_MODE);
 	}
 
 	/* 2: Registers which belong to multiple generations */
-	if (INTEL_GEN(dev_priv) >= 7)
-		error->forcewake = I915_READ_FW(FORCEWAKE_MT);
+	if (INTEL_GEN(i915) >= 7)
+		error->forcewake = intel_uncore_read_fw(uncore, FORCEWAKE_MT);
 
-	if (INTEL_GEN(dev_priv) >= 6) {
-		error->derrmr = I915_READ(DERRMR);
-		error->error = I915_READ(ERROR_GEN6);
-		error->done_reg = I915_READ(DONE_REG);
+	if (INTEL_GEN(i915) >= 6) {
+		error->derrmr = intel_uncore_read(uncore, DERRMR);
+		error->error = intel_uncore_read(uncore, ERROR_GEN6);
+		error->done_reg = intel_uncore_read(uncore, DONE_REG);
 	}
 
-	if (INTEL_GEN(dev_priv) >= 5)
-		error->ccid = I915_READ(CCID(RENDER_RING_BASE));
+	if (INTEL_GEN(i915) >= 5)
+		error->ccid = intel_uncore_read(uncore, CCID(RENDER_RING_BASE));
 
 	/* 3: Feature specific registers */
-	if (IS_GEN_RANGE(dev_priv, 6, 7)) {
-		error->gam_ecochk = I915_READ(GAM_ECOCHK);
-		error->gac_eco = I915_READ(GAC_ECO_BITS);
+	if (IS_GEN_RANGE(i915, 6, 7)) {
+		error->gam_ecochk = intel_uncore_read(uncore, GAM_ECOCHK);
+		error->gac_eco = intel_uncore_read(uncore, GAC_ECO_BITS);
 	}
 
 	/* 4: Everything else */
-	if (INTEL_GEN(dev_priv) >= 11) {
-		error->ier = I915_READ(GEN8_DE_MISC_IER);
-		error->gtier[0] = I915_READ(GEN11_RENDER_COPY_INTR_ENABLE);
-		error->gtier[1] = I915_READ(GEN11_VCS_VECS_INTR_ENABLE);
-		error->gtier[2] = I915_READ(GEN11_GUC_SG_INTR_ENABLE);
-		error->gtier[3] = I915_READ(GEN11_GPM_WGBOXPERF_INTR_ENABLE);
-		error->gtier[4] = I915_READ(GEN11_CRYPTO_RSVD_INTR_ENABLE);
-		error->gtier[5] = I915_READ(GEN11_GUNIT_CSME_INTR_ENABLE);
+	if (INTEL_GEN(i915) >= 11) {
+		error->ier = intel_uncore_read(uncore, GEN8_DE_MISC_IER);
+		error->gtier[0] =
+			intel_uncore_read(uncore,
+					  GEN11_RENDER_COPY_INTR_ENABLE);
+		error->gtier[1] =
+			intel_uncore_read(uncore, GEN11_VCS_VECS_INTR_ENABLE);
+		error->gtier[2] =
+			intel_uncore_read(uncore, GEN11_GUC_SG_INTR_ENABLE);
+		error->gtier[3] =
+			intel_uncore_read(uncore,
+					  GEN11_GPM_WGBOXPERF_INTR_ENABLE);
+		error->gtier[4] =
+			intel_uncore_read(uncore,
+					  GEN11_CRYPTO_RSVD_INTR_ENABLE);
+		error->gtier[5] =
+			intel_uncore_read(uncore,
+					  GEN11_GUNIT_CSME_INTR_ENABLE);
 		error->ngtier = 6;
-	} else if (INTEL_GEN(dev_priv) >= 8) {
-		error->ier = I915_READ(GEN8_DE_MISC_IER);
+	} else if (INTEL_GEN(i915) >= 8) {
+		error->ier = intel_uncore_read(uncore, GEN8_DE_MISC_IER);
 		for (i = 0; i < 4; i++)
-			error->gtier[i] = I915_READ(GEN8_GT_IER(i));
+			error->gtier[i] = intel_uncore_read(uncore,
+							    GEN8_GT_IER(i));
 		error->ngtier = 4;
-	} else if (HAS_PCH_SPLIT(dev_priv)) {
-		error->ier = I915_READ(DEIER);
-		error->gtier[0] = I915_READ(GTIER);
+	} else if (HAS_PCH_SPLIT(i915)) {
+		error->ier = intel_uncore_read(uncore, DEIER);
+		error->gtier[0] = intel_uncore_read(uncore, GTIER);
 		error->ngtier = 1;
-	} else if (IS_GEN(dev_priv, 2)) {
-		error->ier = I915_READ16(GEN2_IER);
-	} else if (!IS_VALLEYVIEW(dev_priv)) {
-		error->ier = I915_READ(GEN2_IER);
+	} else if (IS_GEN(i915, 2)) {
+		error->ier = intel_uncore_read16(uncore, GEN2_IER);
+	} else if (!IS_VALLEYVIEW(i915)) {
+		error->ier = intel_uncore_read(uncore, GEN2_IER);
 	}
-	error->eir = I915_READ(EIR);
-	error->pgtbl_er = I915_READ(PGTBL_ER);
+	error->eir = intel_uncore_read(uncore, EIR);
+	error->pgtbl_er = intel_uncore_read(uncore, PGTBL_ER);
 }
 
 static const char *
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h
index 5dc761e85d9d..2ecd0c6a1c94 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.h
+++ b/drivers/gpu/drm/i915/i915_gpu_error.h
@@ -13,8 +13,9 @@
 
 #include <drm/drm_mm.h>
 
+#include "gt/intel_engine.h"
+
 #include "intel_device_info.h"
-#include "intel_ringbuffer.h"
 #include "intel_uc_fw.h"
 
 #include "i915_gem.h"
@@ -178,8 +179,6 @@ struct i915_gpu_state {
 	struct scatterlist *sgl, *fit;
 };
 
-struct i915_gpu_restart;
-
 struct i915_gpu_error {
 	/* For hangcheck timer */
 #define DRM_I915_HANGCHECK_PERIOD 1500 /* in ms */
@@ -240,8 +239,6 @@ struct i915_gpu_error {
 	wait_queue_head_t reset_queue;
 
 	struct srcu_struct reset_backoff_srcu;
-
-	struct i915_gpu_restart *restart;
 };
 
 struct drm_i915_error_state_buf {
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index b92cfd69134b..b2e27b5b0df9 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -37,10 +37,16 @@
 #include <drm/drm_irq.h>
 #include <drm/i915_drm.h>
 
+#include "display/intel_fifo_underrun.h"
+#include "display/intel_hotplug.h"
+#include "display/intel_lpe_audio.h"
+#include "display/intel_psr.h"
+
 #include "i915_drv.h"
+#include "i915_irq.h"
 #include "i915_trace.h"
 #include "intel_drv.h"
-#include "intel_psr.h"
+#include "intel_pm.h"
 
 /**
  * DOC: interrupt handling
@@ -136,6 +142,12 @@ static const u32 hpd_icp[HPD_NUM_PINS] = {
 	[HPD_PORT_F] = SDE_TC4_HOTPLUG_ICP
 };
 
+static const u32 hpd_mcc[HPD_NUM_PINS] = {
+	[HPD_PORT_A] = SDE_DDIA_HOTPLUG_ICP,
+	[HPD_PORT_B] = SDE_DDIB_HOTPLUG_ICP,
+	[HPD_PORT_C] = SDE_TC1_HOTPLUG_ICP
+};
+
 static void gen3_irq_reset(struct intel_uncore *uncore, i915_reg_t imr,
 			   i915_reg_t iir, i915_reg_t ier)
 {
@@ -382,7 +394,7 @@ static void ilk_update_gt_irq(struct drm_i915_private *dev_priv,
 void gen5_enable_gt_irq(struct drm_i915_private *dev_priv, u32 mask)
 {
 	ilk_update_gt_irq(dev_priv, mask, mask);
-	POSTING_READ_FW(GTIMR);
+	intel_uncore_posting_read_fw(&dev_priv->uncore, GTIMR);
 }
 
 void gen5_disable_gt_irq(struct drm_i915_private *dev_priv, u32 mask)
@@ -584,7 +596,7 @@ void gen6_disable_rps_interrupts(struct drm_i915_private *dev_priv)
 
 void gen9_reset_guc_interrupts(struct drm_i915_private *dev_priv)
 {
-	assert_rpm_wakelock_held(dev_priv);
+	assert_rpm_wakelock_held(&dev_priv->runtime_pm);
 
 	spin_lock_irq(&dev_priv->irq_lock);
 	gen6_reset_pm_iir(dev_priv, dev_priv->pm_guc_events);
@@ -593,13 +605,13 @@ void gen9_reset_guc_interrupts(struct drm_i915_private *dev_priv)
 
 void gen9_enable_guc_interrupts(struct drm_i915_private *dev_priv)
 {
-	assert_rpm_wakelock_held(dev_priv);
+	assert_rpm_wakelock_held(&dev_priv->runtime_pm);
 
 	spin_lock_irq(&dev_priv->irq_lock);
-	if (!dev_priv->guc.interrupts_enabled) {
+	if (!dev_priv->guc.interrupts.enabled) {
 		WARN_ON_ONCE(I915_READ(gen6_pm_iir(dev_priv)) &
 				       dev_priv->pm_guc_events);
-		dev_priv->guc.interrupts_enabled = true;
+		dev_priv->guc.interrupts.enabled = true;
 		gen6_enable_pm_irq(dev_priv, dev_priv->pm_guc_events);
 	}
 	spin_unlock_irq(&dev_priv->irq_lock);
@@ -607,10 +619,10 @@ void gen9_enable_guc_interrupts(struct drm_i915_private *dev_priv)
 
 void gen9_disable_guc_interrupts(struct drm_i915_private *dev_priv)
 {
-	assert_rpm_wakelock_held(dev_priv);
+	assert_rpm_wakelock_held(&dev_priv->runtime_pm);
 
 	spin_lock_irq(&dev_priv->irq_lock);
-	dev_priv->guc.interrupts_enabled = false;
+	dev_priv->guc.interrupts.enabled = false;
 
 	gen6_disable_pm_irq(dev_priv, dev_priv->pm_guc_events);
 
@@ -620,6 +632,42 @@ void gen9_disable_guc_interrupts(struct drm_i915_private *dev_priv)
 	gen9_reset_guc_interrupts(dev_priv);
 }
 
+void gen11_reset_guc_interrupts(struct drm_i915_private *i915)
+{
+	spin_lock_irq(&i915->irq_lock);
+	gen11_reset_one_iir(i915, 0, GEN11_GUC);
+	spin_unlock_irq(&i915->irq_lock);
+}
+
+void gen11_enable_guc_interrupts(struct drm_i915_private *dev_priv)
+{
+	spin_lock_irq(&dev_priv->irq_lock);
+	if (!dev_priv->guc.interrupts.enabled) {
+		u32 events = REG_FIELD_PREP(ENGINE1_MASK,
+					    GEN11_GUC_INTR_GUC2HOST);
+
+		WARN_ON_ONCE(gen11_reset_one_iir(dev_priv, 0, GEN11_GUC));
+		I915_WRITE(GEN11_GUC_SG_INTR_ENABLE, events);
+		I915_WRITE(GEN11_GUC_SG_INTR_MASK, ~events);
+		dev_priv->guc.interrupts.enabled = true;
+	}
+	spin_unlock_irq(&dev_priv->irq_lock);
+}
+
+void gen11_disable_guc_interrupts(struct drm_i915_private *dev_priv)
+{
+	spin_lock_irq(&dev_priv->irq_lock);
+	dev_priv->guc.interrupts.enabled = false;
+
+	I915_WRITE(GEN11_GUC_SG_INTR_MASK, ~0);
+	I915_WRITE(GEN11_GUC_SG_INTR_ENABLE, 0);
+
+	spin_unlock_irq(&dev_priv->irq_lock);
+	synchronize_irq(dev_priv->drm.irq);
+
+	gen11_reset_guc_interrupts(dev_priv);
+}
+
 /**
  * bdw_update_port_irq - update DE port interrupt
  * @dev_priv: driver private
@@ -1191,20 +1239,23 @@ int intel_get_crtc_scanline(struct intel_crtc *crtc)
 
 static void ironlake_rps_change_irq_handler(struct drm_i915_private *dev_priv)
 {
+	struct intel_uncore *uncore = &dev_priv->uncore;
 	u32 busy_up, busy_down, max_avg, min_avg;
 	u8 new_delay;
 
 	spin_lock(&mchdev_lock);
 
-	I915_WRITE16(MEMINTRSTS, I915_READ(MEMINTRSTS));
+	intel_uncore_write16(uncore,
+			     MEMINTRSTS,
+			     intel_uncore_read(uncore, MEMINTRSTS));
 
 	new_delay = dev_priv->ips.cur_delay;
 
-	I915_WRITE16(MEMINTRSTS, MEMINT_EVAL_CHG);
-	busy_up = I915_READ(RCPREVBSYTUPAVG);
-	busy_down = I915_READ(RCPREVBSYTDNAVG);
-	max_avg = I915_READ(RCBMAXAVG);
-	min_avg = I915_READ(RCBMINAVG);
+	intel_uncore_write16(uncore, MEMINTRSTS, MEMINT_EVAL_CHG);
+	busy_up = intel_uncore_read(uncore, RCPREVBSYTUPAVG);
+	busy_down = intel_uncore_read(uncore, RCPREVBSYTDNAVG);
+	max_avg = intel_uncore_read(uncore, RCBMAXAVG);
+	min_avg = intel_uncore_read(uncore, RCBMINAVG);
 
 	/* Handle RCS change request from hw */
 	if (busy_up > max_avg) {
@@ -1301,7 +1352,7 @@ static void gen6_pm_rps_work(struct work_struct *work)
 	if ((pm_iir & dev_priv->pm_rps_events) == 0 && !client_boost)
 		goto out;
 
-	mutex_lock(&dev_priv->pcu_lock);
+	mutex_lock(&rps->lock);
 
 	pm_iir |= vlv_wa_c0_ei(dev_priv, pm_iir);
 
@@ -1367,7 +1418,7 @@ static void gen6_pm_rps_work(struct work_struct *work)
 		rps->last_adj = 0;
 	}
 
-	mutex_unlock(&dev_priv->pcu_lock);
+	mutex_unlock(&rps->lock);
 
 out:
 	/* Make sure not to corrupt PMIMR state used by ringbuffer on GEN6 */
@@ -1889,6 +1940,12 @@ static void gen9_guc_irq_handler(struct drm_i915_private *dev_priv, u32 gt_iir)
 		intel_guc_to_host_event_handler(&dev_priv->guc);
 }
 
+static void gen11_guc_irq_handler(struct drm_i915_private *i915, u16 iir)
+{
+	if (iir & GEN11_GUC_INTR_GUC2HOST)
+		intel_guc_to_host_event_handler(&i915->guc);
+}
+
 static void i9xx_pipestat_irq_reset(struct drm_i915_private *dev_priv)
 {
 	enum pipe pipe;
@@ -2136,7 +2193,7 @@ static irqreturn_t valleyview_irq_handler(int irq, void *arg)
 		return IRQ_NONE;
 
 	/* IRQs are synced during runtime_suspend, we don't require a wakeref */
-	disable_rpm_wakeref_asserts(dev_priv);
+	disable_rpm_wakeref_asserts(&dev_priv->runtime_pm);
 
 	do {
 		u32 iir, gt_iir, pm_iir;
@@ -2207,7 +2264,7 @@ static irqreturn_t valleyview_irq_handler(int irq, void *arg)
 		valleyview_pipestat_irq_handler(dev_priv, pipe_stats);
 	} while (0);
 
-	enable_rpm_wakeref_asserts(dev_priv);
+	enable_rpm_wakeref_asserts(&dev_priv->runtime_pm);
 
 	return ret;
 }
@@ -2222,7 +2279,7 @@ static irqreturn_t cherryview_irq_handler(int irq, void *arg)
 		return IRQ_NONE;
 
 	/* IRQs are synced during runtime_suspend, we don't require a wakeref */
-	disable_rpm_wakeref_asserts(dev_priv);
+	disable_rpm_wakeref_asserts(&dev_priv->runtime_pm);
 
 	do {
 		u32 master_ctl, iir;
@@ -2288,7 +2345,7 @@ static irqreturn_t cherryview_irq_handler(int irq, void *arg)
 		valleyview_pipestat_irq_handler(dev_priv, pipe_stats);
 	} while (0);
 
-	enable_rpm_wakeref_asserts(dev_priv);
+	enable_rpm_wakeref_asserts(&dev_priv->runtime_pm);
 
 	return ret;
 }
@@ -2447,7 +2504,8 @@ static void cpt_irq_handler(struct drm_i915_private *dev_priv, u32 pch_iir)
 		cpt_serr_int_handler(dev_priv);
 }
 
-static void icp_irq_handler(struct drm_i915_private *dev_priv, u32 pch_iir)
+static void icp_irq_handler(struct drm_i915_private *dev_priv, u32 pch_iir,
+			    const u32 *pins)
 {
 	u32 ddi_hotplug_trigger = pch_iir & SDE_DDI_MASK_ICP;
 	u32 tc_hotplug_trigger = pch_iir & SDE_TC_MASK_ICP;
@@ -2461,7 +2519,7 @@ static void icp_irq_handler(struct drm_i915_private *dev_priv, u32 pch_iir)
 
 		intel_get_hpd_pins(dev_priv, &pin_mask, &long_mask,
 				   ddi_hotplug_trigger,
-				   dig_hotplug_reg, hpd_icp,
+				   dig_hotplug_reg, pins,
 				   icp_ddi_port_hotplug_long_detect);
 	}
 
@@ -2473,7 +2531,7 @@ static void icp_irq_handler(struct drm_i915_private *dev_priv, u32 pch_iir)
 
 		intel_get_hpd_pins(dev_priv, &pin_mask, &long_mask,
 				   tc_hotplug_trigger,
-				   dig_hotplug_reg, hpd_icp,
+				   dig_hotplug_reg, pins,
 				   icp_tc_port_hotplug_long_detect);
 	}
 
@@ -2642,7 +2700,7 @@ static irqreturn_t ironlake_irq_handler(int irq, void *arg)
 		return IRQ_NONE;
 
 	/* IRQs are synced during runtime_suspend, we don't require a wakeref */
-	disable_rpm_wakeref_asserts(dev_priv);
+	disable_rpm_wakeref_asserts(&dev_priv->runtime_pm);
 
 	/* disable master interrupt before clearing iir  */
 	de_ier = I915_READ(DEIER);
@@ -2694,7 +2752,7 @@ static irqreturn_t ironlake_irq_handler(int irq, void *arg)
 		I915_WRITE(SDEIER, sde_ier);
 
 	/* IRQs are synced during runtime_suspend, we don't require a wakeref */
-	enable_rpm_wakeref_asserts(dev_priv);
+	enable_rpm_wakeref_asserts(&dev_priv->runtime_pm);
 
 	return ret;
 }
@@ -2904,8 +2962,10 @@ gen8_de_irq_handler(struct drm_i915_private *dev_priv, u32 master_ctl)
 			I915_WRITE(SDEIIR, iir);
 			ret = IRQ_HANDLED;
 
-			if (INTEL_PCH_TYPE(dev_priv) >= PCH_ICP)
-				icp_irq_handler(dev_priv, iir);
+			if (INTEL_PCH_TYPE(dev_priv) >= PCH_MCC)
+				icp_irq_handler(dev_priv, iir, hpd_mcc);
+			else if (INTEL_PCH_TYPE(dev_priv) >= PCH_ICP)
+				icp_irq_handler(dev_priv, iir, hpd_icp);
 			else if (INTEL_PCH_TYPE(dev_priv) >= PCH_SPT)
 				spt_irq_handler(dev_priv, iir);
 			else
@@ -2961,9 +3021,9 @@ static irqreturn_t gen8_irq_handler(int irq, void *arg)
 
 	/* IRQs are synced during runtime_suspend, we don't require a wakeref */
 	if (master_ctl & ~GEN8_GT_IRQS) {
-		disable_rpm_wakeref_asserts(dev_priv);
+		disable_rpm_wakeref_asserts(&dev_priv->runtime_pm);
 		gen8_de_irq_handler(dev_priv, master_ctl);
-		enable_rpm_wakeref_asserts(dev_priv);
+		enable_rpm_wakeref_asserts(&dev_priv->runtime_pm);
 	}
 
 	gen8_master_intr_enable(regs);
@@ -3011,6 +3071,9 @@ static void
 gen11_other_irq_handler(struct drm_i915_private * const i915,
 			const u8 instance, const u16 iir)
 {
+	if (instance == OTHER_GUC_INSTANCE)
+		return gen11_guc_irq_handler(i915, iir);
+
 	if (instance == OTHER_GTPM_INSTANCE)
 		return gen11_rps_irq_handler(i915, iir);
 
@@ -3159,13 +3222,13 @@ static irqreturn_t gen11_irq_handler(int irq, void *arg)
 	if (master_ctl & GEN11_DISPLAY_IRQ) {
 		const u32 disp_ctl = raw_reg_read(regs, GEN11_DISPLAY_INT_CTL);
 
-		disable_rpm_wakeref_asserts(i915);
+		disable_rpm_wakeref_asserts(&i915->runtime_pm);
 		/*
 		 * GEN11_DISPLAY_INT_CTL has same format as GEN8_MASTER_IRQ
 		 * for the display related bits.
 		 */
 		gen8_de_irq_handler(i915, disp_ctl);
-		enable_rpm_wakeref_asserts(i915);
+		enable_rpm_wakeref_asserts(&i915->runtime_pm);
 	}
 
 	gu_misc_iir = gen11_gu_misc_irq_ack(i915, master_ctl);
@@ -3541,6 +3604,8 @@ static void gen11_gt_irq_reset(struct drm_i915_private *dev_priv)
 
 	I915_WRITE(GEN11_GPM_WGBOXPERF_INTR_ENABLE, 0);
 	I915_WRITE(GEN11_GPM_WGBOXPERF_INTR_MASK,  ~0);
+	I915_WRITE(GEN11_GUC_SG_INTR_ENABLE, 0);
+	I915_WRITE(GEN11_GUC_SG_INTR_MASK,  ~0);
 }
 
 static void gen11_irq_reset(struct drm_device *dev)
@@ -4196,6 +4261,10 @@ static void gen11_gt_irq_postinstall(struct drm_i915_private *dev_priv)
 	dev_priv->pm_imr = ~dev_priv->pm_ier;
 	I915_WRITE(GEN11_GPM_WGBOXPERF_INTR_ENABLE, 0);
 	I915_WRITE(GEN11_GPM_WGBOXPERF_INTR_MASK,  ~0);
+
+	/* Same thing for GuC interrupts */
+	I915_WRITE(GEN11_GUC_SG_INTR_ENABLE, 0);
+	I915_WRITE(GEN11_GUC_SG_INTR_MASK,  ~0);
 }
 
 static void icp_irq_postinstall(struct drm_device *dev)
@@ -4268,8 +4337,10 @@ static int i8xx_irq_postinstall(struct drm_device *dev)
 	struct intel_uncore *uncore = &dev_priv->uncore;
 	u16 enable_mask;
 
-	I915_WRITE16(EMR, ~(I915_ERROR_PAGE_TABLE |
-			    I915_ERROR_MEMORY_REFRESH));
+	intel_uncore_write16(uncore,
+			     EMR,
+			     ~(I915_ERROR_PAGE_TABLE |
+			       I915_ERROR_MEMORY_REFRESH));
 
 	/* Unmask the interrupts that we always want on. */
 	dev_priv->irq_mask =
@@ -4295,17 +4366,18 @@ static int i8xx_irq_postinstall(struct drm_device *dev)
 	return 0;
 }
 
-static void i8xx_error_irq_ack(struct drm_i915_private *dev_priv,
+static void i8xx_error_irq_ack(struct drm_i915_private *i915,
 			       u16 *eir, u16 *eir_stuck)
 {
+	struct intel_uncore *uncore = &i915->uncore;
 	u16 emr;
 
-	*eir = I915_READ16(EIR);
+	*eir = intel_uncore_read16(uncore, EIR);
 
 	if (*eir)
-		I915_WRITE16(EIR, *eir);
+		intel_uncore_write16(uncore, EIR, *eir);
 
-	*eir_stuck = I915_READ16(EIR);
+	*eir_stuck = intel_uncore_read16(uncore, EIR);
 	if (*eir_stuck == 0)
 		return;
 
@@ -4319,9 +4391,9 @@ static void i8xx_error_irq_ack(struct drm_i915_private *dev_priv,
 	 * (or by a GPU reset) so we mask any bit that
 	 * remains set.
 	 */
-	emr = I915_READ16(EMR);
-	I915_WRITE16(EMR, 0xffff);
-	I915_WRITE16(EMR, emr | *eir_stuck);
+	emr = intel_uncore_read16(uncore, EMR);
+	intel_uncore_write16(uncore, EMR, 0xffff);
+	intel_uncore_write16(uncore, EMR, emr | *eir_stuck);
 }
 
 static void i8xx_error_irq_handler(struct drm_i915_private *dev_priv,
@@ -4380,14 +4452,14 @@ static irqreturn_t i8xx_irq_handler(int irq, void *arg)
 		return IRQ_NONE;
 
 	/* IRQs are synced during runtime_suspend, we don't require a wakeref */
-	disable_rpm_wakeref_asserts(dev_priv);
+	disable_rpm_wakeref_asserts(&dev_priv->runtime_pm);
 
 	do {
 		u32 pipe_stats[I915_MAX_PIPES] = {};
 		u16 eir = 0, eir_stuck = 0;
 		u16 iir;
 
-		iir = I915_READ16(GEN2_IIR);
+		iir = intel_uncore_read16(&dev_priv->uncore, GEN2_IIR);
 		if (iir == 0)
 			break;
 
@@ -4400,7 +4472,7 @@ static irqreturn_t i8xx_irq_handler(int irq, void *arg)
 		if (iir & I915_MASTER_ERROR_INTERRUPT)
 			i8xx_error_irq_ack(dev_priv, &eir, &eir_stuck);
 
-		I915_WRITE16(GEN2_IIR, iir);
+		intel_uncore_write16(&dev_priv->uncore, GEN2_IIR, iir);
 
 		if (iir & I915_USER_INTERRUPT)
 			intel_engine_breadcrumbs_irq(dev_priv->engine[RCS0]);
@@ -4411,7 +4483,7 @@ static irqreturn_t i8xx_irq_handler(int irq, void *arg)
 		i8xx_pipestat_irq_handler(dev_priv, iir, pipe_stats);
 	} while (0);
 
-	enable_rpm_wakeref_asserts(dev_priv);
+	enable_rpm_wakeref_asserts(&dev_priv->runtime_pm);
 
 	return ret;
 }
@@ -4485,7 +4557,7 @@ static irqreturn_t i915_irq_handler(int irq, void *arg)
 		return IRQ_NONE;
 
 	/* IRQs are synced during runtime_suspend, we don't require a wakeref */
-	disable_rpm_wakeref_asserts(dev_priv);
+	disable_rpm_wakeref_asserts(&dev_priv->runtime_pm);
 
 	do {
 		u32 pipe_stats[I915_MAX_PIPES] = {};
@@ -4524,7 +4596,7 @@ static irqreturn_t i915_irq_handler(int irq, void *arg)
 		i915_pipestat_irq_handler(dev_priv, iir, pipe_stats);
 	} while (0);
 
-	enable_rpm_wakeref_asserts(dev_priv);
+	enable_rpm_wakeref_asserts(&dev_priv->runtime_pm);
 
 	return ret;
 }
@@ -4633,7 +4705,7 @@ static irqreturn_t i965_irq_handler(int irq, void *arg)
 		return IRQ_NONE;
 
 	/* IRQs are synced during runtime_suspend, we don't require a wakeref */
-	disable_rpm_wakeref_asserts(dev_priv);
+	disable_rpm_wakeref_asserts(&dev_priv->runtime_pm);
 
 	do {
 		u32 pipe_stats[I915_MAX_PIPES] = {};
@@ -4674,7 +4746,7 @@ static irqreturn_t i965_irq_handler(int irq, void *arg)
 		i965_pipestat_irq_handler(dev_priv, iir, pipe_stats);
 	} while (0);
 
-	enable_rpm_wakeref_asserts(dev_priv);
+	enable_rpm_wakeref_asserts(&dev_priv->runtime_pm);
 
 	return ret;
 }
@@ -4703,7 +4775,7 @@ void intel_irq_init(struct drm_i915_private *dev_priv)
 	for (i = 0; i < MAX_L3_SLICES; ++i)
 		dev_priv->l3_parity.remap_info[i] = NULL;
 
-	if (HAS_GUC_SCHED(dev_priv))
+	if (HAS_GUC_SCHED(dev_priv) && INTEL_GEN(dev_priv) < 11)
 		dev_priv->pm_guc_events = GEN9_GUC_TO_HOST_INT_EVENT;
 
 	/* Let's track the enabled rps events */
diff --git a/drivers/gpu/drm/i915/i915_irq.h b/drivers/gpu/drm/i915/i915_irq.h
new file mode 100644
index 000000000000..cb25dd213308
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_irq.h
@@ -0,0 +1,117 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef __I915_IRQ_H__
+#define __I915_IRQ_H__
+
+#include <linux/types.h>
+
+#include "i915_drv.h"
+
+struct drm_i915_private;
+struct intel_crtc;
+
+extern void intel_irq_init(struct drm_i915_private *dev_priv);
+extern void intel_irq_fini(struct drm_i915_private *dev_priv);
+int intel_irq_install(struct drm_i915_private *dev_priv);
+void intel_irq_uninstall(struct drm_i915_private *dev_priv);
+
+u32 i915_pipestat_enable_mask(struct drm_i915_private *dev_priv,
+			      enum pipe pipe);
+void
+i915_enable_pipestat(struct drm_i915_private *dev_priv, enum pipe pipe,
+		     u32 status_mask);
+
+void
+i915_disable_pipestat(struct drm_i915_private *dev_priv, enum pipe pipe,
+		      u32 status_mask);
+
+void valleyview_enable_display_irqs(struct drm_i915_private *dev_priv);
+void valleyview_disable_display_irqs(struct drm_i915_private *dev_priv);
+
+void i915_hotplug_interrupt_update(struct drm_i915_private *dev_priv,
+				   u32 mask,
+				   u32 bits);
+void ilk_update_display_irq(struct drm_i915_private *dev_priv,
+			    u32 interrupt_mask,
+			    u32 enabled_irq_mask);
+static inline void
+ilk_enable_display_irq(struct drm_i915_private *dev_priv, u32 bits)
+{
+	ilk_update_display_irq(dev_priv, bits, bits);
+}
+static inline void
+ilk_disable_display_irq(struct drm_i915_private *dev_priv, u32 bits)
+{
+	ilk_update_display_irq(dev_priv, bits, 0);
+}
+void bdw_update_pipe_irq(struct drm_i915_private *dev_priv,
+			 enum pipe pipe,
+			 u32 interrupt_mask,
+			 u32 enabled_irq_mask);
+static inline void bdw_enable_pipe_irq(struct drm_i915_private *dev_priv,
+				       enum pipe pipe, u32 bits)
+{
+	bdw_update_pipe_irq(dev_priv, pipe, bits, bits);
+}
+static inline void bdw_disable_pipe_irq(struct drm_i915_private *dev_priv,
+					enum pipe pipe, u32 bits)
+{
+	bdw_update_pipe_irq(dev_priv, pipe, bits, 0);
+}
+void ibx_display_interrupt_update(struct drm_i915_private *dev_priv,
+				  u32 interrupt_mask,
+				  u32 enabled_irq_mask);
+static inline void
+ibx_enable_display_interrupt(struct drm_i915_private *dev_priv, u32 bits)
+{
+	ibx_display_interrupt_update(dev_priv, bits, bits);
+}
+static inline void
+ibx_disable_display_interrupt(struct drm_i915_private *dev_priv, u32 bits)
+{
+	ibx_display_interrupt_update(dev_priv, bits, 0);
+}
+
+void gen5_enable_gt_irq(struct drm_i915_private *dev_priv, u32 mask);
+void gen5_disable_gt_irq(struct drm_i915_private *dev_priv, u32 mask);
+void gen6_mask_pm_irq(struct drm_i915_private *dev_priv, u32 mask);
+void gen6_unmask_pm_irq(struct drm_i915_private *dev_priv, u32 mask);
+void gen11_reset_rps_interrupts(struct drm_i915_private *dev_priv);
+void gen6_reset_rps_interrupts(struct drm_i915_private *dev_priv);
+void gen6_enable_rps_interrupts(struct drm_i915_private *dev_priv);
+void gen6_disable_rps_interrupts(struct drm_i915_private *dev_priv);
+void gen6_rps_reset_ei(struct drm_i915_private *dev_priv);
+
+static inline u32 gen6_sanitize_rps_pm_mask(const struct drm_i915_private *i915,
+					    u32 mask)
+{
+	return mask & ~i915->gt_pm.rps.pm_intrmsk_mbz;
+}
+
+void intel_runtime_pm_disable_interrupts(struct drm_i915_private *dev_priv);
+void intel_runtime_pm_enable_interrupts(struct drm_i915_private *dev_priv);
+static inline bool intel_irqs_enabled(struct drm_i915_private *dev_priv)
+{
+	/*
+	 * We only use drm_irq_uninstall() at unload and VT switch, so
+	 * this is the only thing we need to check.
+	 */
+	return dev_priv->runtime_pm.irqs_enabled;
+}
+
+int intel_get_crtc_scanline(struct intel_crtc *crtc);
+void gen8_irq_power_well_post_enable(struct drm_i915_private *dev_priv,
+				     u8 pipe_mask);
+void gen8_irq_power_well_pre_disable(struct drm_i915_private *dev_priv,
+				     u8 pipe_mask);
+void gen9_reset_guc_interrupts(struct drm_i915_private *dev_priv);
+void gen9_enable_guc_interrupts(struct drm_i915_private *dev_priv);
+void gen9_disable_guc_interrupts(struct drm_i915_private *dev_priv);
+void gen11_reset_guc_interrupts(struct drm_i915_private *i915);
+void gen11_enable_guc_interrupts(struct drm_i915_private *i915);
+void gen11_disable_guc_interrupts(struct drm_i915_private *i915);
+
+#endif /* __I915_IRQ_H__ */
diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c
index b5be0abbba35..5b07766a1c26 100644
--- a/drivers/gpu/drm/i915/i915_params.c
+++ b/drivers/gpu/drm/i915/i915_params.c
@@ -87,9 +87,12 @@ i915_param_named_unsafe(enable_psr, int, 0600,
 	"(0=disabled, 1=enabled) "
 	"Default: -1 (use per-chip default)");
 
+i915_param_named_unsafe(force_probe, charp, 0400,
+	"Force probe the driver for specified devices. "
+	"See CONFIG_DRM_I915_FORCE_PROBE for details.");
+
 i915_param_named_unsafe(alpha_support, bool, 0400,
-	"Enable alpha quality driver support for latest hardware. "
-	"See also CONFIG_DRM_I915_ALPHA_SUPPORT.");
+	"Deprecated. See i915.force_probe.");
 
 i915_param_named_unsafe(disable_power_well, int, 0400,
 	"Disable display power wells when possible "
diff --git a/drivers/gpu/drm/i915/i915_params.h b/drivers/gpu/drm/i915/i915_params.h
index 3f14e9881a0d..a4770ce46bd2 100644
--- a/drivers/gpu/drm/i915/i915_params.h
+++ b/drivers/gpu/drm/i915/i915_params.h
@@ -59,11 +59,12 @@ struct drm_printer;
 	param(char *, guc_firmware_path, NULL) \
 	param(char *, huc_firmware_path, NULL) \
 	param(char *, dmc_firmware_path, NULL) \
-	param(int, mmio_debug, 0) \
+	param(int, mmio_debug, -IS_ENABLED(CONFIG_DRM_I915_DEBUG_MMIO)) \
 	param(int, edp_vswing, 0) \
 	param(int, reset, 2) \
 	param(unsigned int, inject_load_failure, 0) \
 	param(int, fastboot, -1) \
+	param(char *, force_probe, CONFIG_DRM_I915_FORCE_PROBE) \
 	/* leave bools at the end to not create holes */ \
 	param(bool, alpha_support, IS_ENABLED(CONFIG_DRM_I915_ALPHA_SUPPORT)) \
 	param(bool, enable_hangcheck, true) \
diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index f893c2cbce15..6c9f46fc3e12 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -28,10 +28,11 @@
 
 #include <drm/drm_drv.h>
 
+#include "display/intel_fbdev.h"
+
 #include "i915_drv.h"
 #include "i915_globals.h"
 #include "i915_selftest.h"
-#include "intel_fbdev.h"
 
 #define PLATFORM(x) .platform = (x)
 #define GEN(x) .gen = (x), .gen_mask = BIT((x) - 1)
@@ -370,6 +371,7 @@ static const struct intel_device_info intel_ironlake_m_info = {
 	.has_llc = 1, \
 	.has_rc6 = 1, \
 	.has_rc6p = 1, \
+	.has_rps = true, \
 	.ppgtt_type = INTEL_PPGTT_ALIASING, \
 	.ppgtt_size = 31, \
 	I9XX_PIPE_OFFSETS, \
@@ -417,6 +419,7 @@ static const struct intel_device_info intel_sandybridge_m_gt2_info = {
 	.has_llc = 1, \
 	.has_rc6 = 1, \
 	.has_rc6p = 1, \
+	.has_rps = true, \
 	.ppgtt_type = INTEL_PPGTT_FULL, \
 	.ppgtt_size = 31, \
 	IVB_PIPE_OFFSETS, \
@@ -470,6 +473,7 @@ static const struct intel_device_info intel_valleyview_info = {
 	.num_pipes = 2,
 	.has_runtime_pm = 1,
 	.has_rc6 = 1,
+	.has_rps = true,
 	.display.has_gmch = 1,
 	.display.has_hotplug = 1,
 	.ppgtt_type = INTEL_PPGTT_FULL,
@@ -565,6 +569,7 @@ static const struct intel_device_info intel_cherryview_info = {
 	.has_64bit_reloc = 1,
 	.has_runtime_pm = 1,
 	.has_rc6 = 1,
+	.has_rps = true,
 	.has_logical_ring_contexts = 1,
 	.display.has_gmch = 1,
 	.ppgtt_type = INTEL_PPGTT_FULL,
@@ -596,8 +601,6 @@ static const struct intel_device_info intel_cherryview_info = {
 
 #define SKL_PLATFORM \
 	GEN9_FEATURES, \
-	/* Display WA #0477 WaDisableIPC: skl */ \
-	.display.has_ipc = 0, \
 	PLATFORM(INTEL_SKYLAKE)
 
 static const struct intel_device_info intel_skylake_gt1_info = {
@@ -640,6 +643,7 @@ static const struct intel_device_info intel_skylake_gt4_info = {
 	.has_runtime_pm = 1, \
 	.display.has_csr = 1, \
 	.has_rc6 = 1, \
+	.has_rps = true, \
 	.display.has_dp_mst = 1, \
 	.has_logical_ring_contexts = 1, \
 	.has_logical_ring_preemption = 1, \
@@ -744,7 +748,7 @@ static const struct intel_device_info intel_cannonlake_info = {
 	GEN(11), \
 	.ddb_size = 2048, \
 	.has_logical_ring_elsq = 1, \
-	.color = { .degamma_lut_size = 33, .gamma_lut_size = 1024 }
+	.color = { .degamma_lut_size = 33, .gamma_lut_size = 262145 }
 
 static const struct intel_device_info intel_icelake_11_info = {
 	GEN11_FEATURES,
@@ -756,7 +760,7 @@ static const struct intel_device_info intel_icelake_11_info = {
 static const struct intel_device_info intel_elkhartlake_info = {
 	GEN11_FEATURES,
 	PLATFORM(INTEL_ELKHARTLAKE),
-	.is_alpha_support = 1,
+	.require_force_probe = 1,
 	.engine_mask = BIT(RCS0) | BIT(BCS0) | BIT(VCS0),
 	.ppgtt_size = 36,
 };
@@ -850,16 +854,57 @@ static void i915_pci_remove(struct pci_dev *pdev)
 	pci_set_drvdata(pdev, NULL);
 }
 
+/* is device_id present in comma separated list of ids */
+static bool force_probe(u16 device_id, const char *devices)
+{
+	char *s, *p, *tok;
+	bool ret;
+
+	/* FIXME: transitional */
+	if (i915_modparams.alpha_support) {
+		DRM_INFO("i915.alpha_support is deprecated, use i915.force_probe=%04x instead\n",
+			 device_id);
+		return true;
+	}
+
+	if (!devices || !*devices)
+		return false;
+
+	/* match everything */
+	if (strcmp(devices, "*") == 0)
+		return true;
+
+	s = kstrdup(devices, GFP_KERNEL);
+	if (!s)
+		return false;
+
+	for (p = s, ret = false; (tok = strsep(&p, ",")) != NULL; ) {
+		u16 val;
+
+		if (kstrtou16(tok, 16, &val) == 0 && val == device_id) {
+			ret = true;
+			break;
+		}
+	}
+
+	kfree(s);
+
+	return ret;
+}
+
 static int i915_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 {
 	struct intel_device_info *intel_info =
 		(struct intel_device_info *) ent->driver_data;
 	int err;
 
-	if (IS_ALPHA_SUPPORT(intel_info) && !i915_modparams.alpha_support) {
-		DRM_INFO("The driver support for your hardware in this kernel version is alpha quality\n"
-			 "See CONFIG_DRM_I915_ALPHA_SUPPORT or i915.alpha_support module parameter\n"
-			 "to enable support in this kernel version, or check for kernel updates.\n");
+	if (intel_info->require_force_probe &&
+	    !force_probe(pdev->device, i915_modparams.force_probe)) {
+		DRM_INFO("Your graphics device %04x is not properly supported by the driver in this\n"
+			 "kernel version. To force driver probe anyway, use i915.force_probe=%04x\n"
+			 "module parameter or CONFIG_DRM_I915_FORCE_PROBE=%04x configuration option,\n"
+			 "or (recommended) check for kernel updates.\n",
+			 pdev->device, pdev->device, pdev->device);
 		return -ENODEV;
 	}
 
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 39a4804091d7..3d8162d28730 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -195,6 +195,10 @@
 #include <linux/sizes.h>
 #include <linux/uuid.h>
 
+#include "gem/i915_gem_context.h"
+#include "gem/i915_gem_pm.h"
+#include "gt/intel_lrc_reg.h"
+
 #include "i915_drv.h"
 #include "i915_oa_hsw.h"
 #include "i915_oa_bdw.h"
@@ -210,7 +214,6 @@
 #include "i915_oa_cflgt3.h"
 #include "i915_oa_cnl.h"
 #include "i915_oa_icl.h"
-#include "intel_lrc_reg.h"
 
 /* HW requires this to be a power of two, between 128k and 16M, though driver
  * is currently generally designed assuming the largest 16M size is used such
@@ -1202,28 +1205,35 @@ static int i915_oa_read(struct i915_perf_stream *stream,
 static struct intel_context *oa_pin_context(struct drm_i915_private *i915,
 					    struct i915_gem_context *ctx)
 {
-	struct intel_engine_cs *engine = i915->engine[RCS0];
+	struct i915_gem_engines_iter it;
 	struct intel_context *ce;
-	int ret;
+	int err;
 
-	ret = i915_mutex_lock_interruptible(&i915->drm);
-	if (ret)
-		return ERR_PTR(ret);
+	err = i915_mutex_lock_interruptible(&i915->drm);
+	if (err)
+		return ERR_PTR(err);
 
-	/*
-	 * As the ID is the gtt offset of the context's vma we
-	 * pin the vma to ensure the ID remains fixed.
-	 *
-	 * NB: implied RCS engine...
-	 */
-	ce = intel_context_pin(ctx, engine);
-	mutex_unlock(&i915->drm.struct_mutex);
-	if (IS_ERR(ce))
-		return ce;
+	for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) {
+		if (ce->engine->class != RENDER_CLASS)
+			continue;
+
+		/*
+		 * As the ID is the gtt offset of the context's vma we
+		 * pin the vma to ensure the ID remains fixed.
+		 */
+		err = intel_context_pin(ce);
+		if (err == 0) {
+			i915->perf.oa.pinned_ctx = ce;
+			break;
+		}
+	}
+	i915_gem_context_unlock_engines(ctx);
 
-	i915->perf.oa.pinned_ctx = ce;
+	mutex_unlock(&i915->drm.struct_mutex);
+	if (err)
+		return ERR_PTR(err);
 
-	return ce;
+	return i915->perf.oa.pinned_ctx;
 }
 
 /**
@@ -1365,7 +1375,7 @@ static void i915_oa_stream_destroy(struct i915_perf_stream *stream)
 	free_oa_buffer(dev_priv);
 
 	intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
-	intel_runtime_pm_put(dev_priv, stream->wakeref);
+	intel_runtime_pm_put(&dev_priv->runtime_pm, stream->wakeref);
 
 	if (stream->ctx)
 		oa_put_render_ctx_id(stream);
@@ -1502,7 +1512,7 @@ static int alloc_oa_buffer(struct drm_i915_private *dev_priv)
 	BUILD_BUG_ON_NOT_POWER_OF_2(OA_BUFFER_SIZE);
 	BUILD_BUG_ON(OA_BUFFER_SIZE < SZ_128K || OA_BUFFER_SIZE > SZ_16M);
 
-	bo = i915_gem_object_create(dev_priv, OA_BUFFER_SIZE);
+	bo = i915_gem_object_create_shmem(dev_priv, OA_BUFFER_SIZE);
 	if (IS_ERR(bo)) {
 		DRM_ERROR("Failed to allocate OA buffer\n");
 		ret = PTR_ERR(bo);
@@ -1679,7 +1689,7 @@ gen8_update_reg_state_unlocked(struct intel_context *ce,
 
 	CTX_REG(reg_state,
 		CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE,
-		gen8_make_rpcs(i915, &ce->sseu));
+		intel_sseu_make_rpcs(i915, &ce->sseu));
 }
 
 /*
@@ -1709,7 +1719,6 @@ gen8_update_reg_state_unlocked(struct intel_context *ce,
 static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv,
 				       const struct i915_oa_config *oa_config)
 {
-	struct intel_engine_cs *engine = dev_priv->engine[RCS0];
 	unsigned int map_type = i915_coherent_map_type(dev_priv);
 	struct i915_gem_context *ctx;
 	struct i915_request *rq;
@@ -1738,30 +1747,43 @@ static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv,
 
 	/* Update all contexts now that we've stalled the submission. */
 	list_for_each_entry(ctx, &dev_priv->contexts.list, link) {
-		struct intel_context *ce = intel_context_lookup(ctx, engine);
-		u32 *regs;
-
-		/* OA settings will be set upon first use */
-		if (!ce || !ce->state)
-			continue;
-
-		regs = i915_gem_object_pin_map(ce->state->obj, map_type);
-		if (IS_ERR(regs))
-			return PTR_ERR(regs);
+		struct i915_gem_engines_iter it;
+		struct intel_context *ce;
+
+		for_each_gem_engine(ce,
+				    i915_gem_context_lock_engines(ctx),
+				    it) {
+			u32 *regs;
+
+			if (ce->engine->class != RENDER_CLASS)
+				continue;
+
+			/* OA settings will be set upon first use */
+			if (!ce->state)
+				continue;
+
+			regs = i915_gem_object_pin_map(ce->state->obj,
+						       map_type);
+			if (IS_ERR(regs)) {
+				i915_gem_context_unlock_engines(ctx);
+				return PTR_ERR(regs);
+			}
 
-		ce->state->obj->mm.dirty = true;
-		regs += LRC_STATE_PN * PAGE_SIZE / sizeof(*regs);
+			ce->state->obj->mm.dirty = true;
+			regs += LRC_STATE_PN * PAGE_SIZE / sizeof(*regs);
 
-		gen8_update_reg_state_unlocked(ce, regs, oa_config);
+			gen8_update_reg_state_unlocked(ce, regs, oa_config);
 
-		i915_gem_object_unpin_map(ce->state->obj);
+			i915_gem_object_unpin_map(ce->state->obj);
+		}
+		i915_gem_context_unlock_engines(ctx);
 	}
 
 	/*
 	 * Apply the configuration by doing one context restore of the edited
 	 * context image.
 	 */
-	rq = i915_request_alloc(engine, dev_priv->kernel_context);
+	rq = i915_request_create(dev_priv->engine[RCS0]->kernel_context);
 	if (IS_ERR(rq))
 		return PTR_ERR(rq);
 
@@ -2090,7 +2112,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
 	 *   In our case we are expecting that taking pm + FORCEWAKE
 	 *   references will effectively disable RC6.
 	 */
-	stream->wakeref = intel_runtime_pm_get(dev_priv);
+	stream->wakeref = intel_runtime_pm_get(&dev_priv->runtime_pm);
 	intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL);
 
 	ret = alloc_oa_buffer(dev_priv);
@@ -2126,7 +2148,7 @@ err_oa_buf_alloc:
 	put_oa_config(dev_priv, stream->oa_config);
 
 	intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
-	intel_runtime_pm_put(dev_priv, stream->wakeref);
+	intel_runtime_pm_put(&dev_priv->runtime_pm, stream->wakeref);
 
 err_config:
 	if (stream->ctx)
@@ -3005,6 +3027,7 @@ static bool gen8_is_valid_mux_addr(struct drm_i915_private *dev_priv, u32 addr)
 static bool gen10_is_valid_mux_addr(struct drm_i915_private *dev_priv, u32 addr)
 {
 	return gen8_is_valid_mux_addr(dev_priv, addr) ||
+		addr == i915_mmio_reg_offset(GEN10_NOA_WRITE_HIGH) ||
 		(addr >= i915_mmio_reg_offset(OA_PERFCNT3_LO) &&
 		 addr <= i915_mmio_reg_offset(OA_PERFCNT4_HI));
 }
diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index 46a52da3db29..8fe46ee920a0 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -6,9 +6,12 @@
 
 #include <linux/irq.h>
 #include <linux/pm_runtime.h>
-#include "i915_pmu.h"
-#include "intel_ringbuffer.h"
+
+#include "gt/intel_engine.h"
+
 #include "i915_drv.h"
+#include "i915_pmu.h"
+#include "intel_pm.h"
 
 /* Frequency for the sampling timer for events which need it. */
 #define FREQUENCY 200
@@ -168,7 +171,7 @@ engines_sample(struct drm_i915_private *dev_priv, unsigned int period_ns)
 
 	wakeref = 0;
 	if (READ_ONCE(dev_priv->gt.awake))
-		wakeref = intel_runtime_pm_get_if_in_use(dev_priv);
+		wakeref = intel_runtime_pm_get_if_in_use(&dev_priv->runtime_pm);
 	if (!wakeref)
 		return;
 
@@ -204,7 +207,7 @@ engines_sample(struct drm_i915_private *dev_priv, unsigned int period_ns)
 	}
 	spin_unlock_irqrestore(&dev_priv->uncore.lock, flags);
 
-	intel_runtime_pm_put(dev_priv, wakeref);
+	intel_runtime_pm_put(&dev_priv->runtime_pm, wakeref);
 }
 
 static void
@@ -224,9 +227,12 @@ frequency_sample(struct drm_i915_private *dev_priv, unsigned int period_ns)
 		if (dev_priv->gt.awake) {
 			intel_wakeref_t wakeref;
 
-			with_intel_runtime_pm_if_in_use(dev_priv, wakeref)
-				val = intel_get_cagf(dev_priv,
-						     I915_READ_NOTRACE(GEN6_RPSTAT1));
+			with_intel_runtime_pm_if_in_use(&dev_priv->runtime_pm,
+							wakeref) {
+				val = intel_uncore_read_notrace(&dev_priv->uncore,
+								GEN6_RPSTAT1);
+				val = intel_get_cagf(dev_priv, val);
+			}
 		}
 
 		add_sample_mult(&dev_priv->pmu.sample[__I915_SAMPLE_FREQ_ACT],
@@ -438,14 +444,15 @@ static u64 __get_rc6(struct drm_i915_private *i915)
 static u64 get_rc6(struct drm_i915_private *i915)
 {
 #if IS_ENABLED(CONFIG_PM)
+	struct intel_runtime_pm *rpm = &i915->runtime_pm;
 	intel_wakeref_t wakeref;
 	unsigned long flags;
 	u64 val;
 
-	wakeref = intel_runtime_pm_get_if_in_use(i915);
+	wakeref = intel_runtime_pm_get_if_in_use(rpm);
 	if (wakeref) {
 		val = __get_rc6(i915);
-		intel_runtime_pm_put(i915, wakeref);
+		intel_runtime_pm_put(rpm, wakeref);
 
 		/*
 		 * If we are coming back from being runtime suspended we must
@@ -464,8 +471,7 @@ static u64 get_rc6(struct drm_i915_private *i915)
 
 		spin_unlock_irqrestore(&i915->pmu.lock, flags);
 	} else {
-		struct pci_dev *pdev = i915->drm.pdev;
-		struct device *kdev = &pdev->dev;
+		struct device *kdev = rpm->kdev;
 
 		/*
 		 * We are runtime suspended.
diff --git a/drivers/gpu/drm/i915/i915_query.c b/drivers/gpu/drm/i915/i915_query.c
index 782183b78f49..7b7016171057 100644
--- a/drivers/gpu/drm/i915/i915_query.c
+++ b/drivers/gpu/drm/i915/i915_query.c
@@ -37,6 +37,8 @@ static int query_topology_info(struct drm_i915_private *dev_priv,
 	const struct sseu_dev_info *sseu = &RUNTIME_INFO(dev_priv)->sseu;
 	struct drm_i915_query_topology_info topo;
 	u32 slice_length, subslice_length, eu_length, total_length;
+	u8 subslice_stride = GEN_SSEU_STRIDE(sseu->max_subslices);
+	u8 eu_stride = GEN_SSEU_STRIDE(sseu->max_eus_per_subslice);
 	int ret;
 
 	if (query_item->flags != 0)
@@ -48,12 +50,10 @@ static int query_topology_info(struct drm_i915_private *dev_priv,
 	BUILD_BUG_ON(sizeof(u8) != sizeof(sseu->slice_mask));
 
 	slice_length = sizeof(sseu->slice_mask);
-	subslice_length = sseu->max_slices *
-		DIV_ROUND_UP(sseu->max_subslices, BITS_PER_BYTE);
-	eu_length = sseu->max_slices * sseu->max_subslices *
-		DIV_ROUND_UP(sseu->max_eus_per_subslice, BITS_PER_BYTE);
-
-	total_length = sizeof(topo) + slice_length + subslice_length + eu_length;
+	subslice_length = sseu->max_slices * subslice_stride;
+	eu_length = sseu->max_slices * sseu->max_subslices * eu_stride;
+	total_length = sizeof(topo) + slice_length + subslice_length +
+		       eu_length;
 
 	ret = copy_query_item(&topo, sizeof(topo), total_length,
 			      query_item);
@@ -69,10 +69,9 @@ static int query_topology_info(struct drm_i915_private *dev_priv,
 	topo.max_eus_per_subslice = sseu->max_eus_per_subslice;
 
 	topo.subslice_offset = slice_length;
-	topo.subslice_stride = DIV_ROUND_UP(sseu->max_subslices, BITS_PER_BYTE);
+	topo.subslice_stride = subslice_stride;
 	topo.eu_offset = slice_length + subslice_length;
-	topo.eu_stride =
-		DIV_ROUND_UP(sseu->max_eus_per_subslice, BITS_PER_BYTE);
+	topo.eu_stride = eu_stride;
 
 	if (__copy_to_user(u64_to_user_ptr(query_item->data_ptr),
 			   &topo, sizeof(topo)))
@@ -96,9 +95,58 @@ static int query_topology_info(struct drm_i915_private *dev_priv,
 	return total_length;
 }
 
+static int
+query_engine_info(struct drm_i915_private *i915,
+		  struct drm_i915_query_item *query_item)
+{
+	struct drm_i915_query_engine_info __user *query_ptr =
+				u64_to_user_ptr(query_item->data_ptr);
+	struct drm_i915_engine_info __user *info_ptr;
+	struct drm_i915_query_engine_info query;
+	struct drm_i915_engine_info info = { };
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+	int len, ret;
+
+	if (query_item->flags)
+		return -EINVAL;
+
+	len = sizeof(struct drm_i915_query_engine_info) +
+	      RUNTIME_INFO(i915)->num_engines *
+	      sizeof(struct drm_i915_engine_info);
+
+	ret = copy_query_item(&query, sizeof(query), len, query_item);
+	if (ret != 0)
+		return ret;
+
+	if (query.num_engines || query.rsvd[0] || query.rsvd[1] ||
+	    query.rsvd[2])
+		return -EINVAL;
+
+	info_ptr = &query_ptr->engines[0];
+
+	for_each_engine(engine, i915, id) {
+		info.engine.engine_class = engine->uabi_class;
+		info.engine.engine_instance = engine->instance;
+		info.capabilities = engine->uabi_capabilities;
+
+		if (__copy_to_user(info_ptr, &info, sizeof(info)))
+			return -EFAULT;
+
+		query.num_engines++;
+		info_ptr++;
+	}
+
+	if (__copy_to_user(query_ptr, &query, sizeof(query)))
+		return -EFAULT;
+
+	return len;
+}
+
 static int (* const i915_query_funcs[])(struct drm_i915_private *dev_priv,
 					struct drm_i915_query_item *query_item) = {
 	query_topology_info,
+	query_engine_info,
 };
 
 int i915_query_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 2aa69d347ec4..d6483b5dc8e5 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -126,7 +126,7 @@
  */
 #define REG_BIT(__n)							\
 	((u32)(BIT(__n) +						\
-	       BUILD_BUG_ON_ZERO(__builtin_constant_p(__n) &&		\
+	       BUILD_BUG_ON_ZERO(__is_constexpr(__n) &&		\
 				 ((__n) < 0 || (__n) > 31))))
 
 /**
@@ -140,8 +140,8 @@
  */
 #define REG_GENMASK(__high, __low)					\
 	((u32)(GENMASK(__high, __low) +					\
-	       BUILD_BUG_ON_ZERO(__builtin_constant_p(__high) &&	\
-				 __builtin_constant_p(__low) &&		\
+	       BUILD_BUG_ON_ZERO(__is_constexpr(__high) &&	\
+				 __is_constexpr(__low) &&		\
 				 ((__low) < 0 || (__high) > 31 || (__low) > (__high)))))
 
 /*
@@ -153,7 +153,7 @@
  * REG_FIELD_PREP() - Prepare a u32 bitfield value
  * @__mask: shifted mask defining the field's length and position
  * @__val: value to put in the field
-
+ *
  * Local copy of FIELD_PREP() to generate an integer constant expression, force
  * u32 and for consistency with REG_FIELD_GET(), REG_BIT() and REG_GENMASK().
  *
@@ -290,6 +290,7 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
 #define OTHER_CLASS		4
 #define MAX_ENGINE_CLASS	4
 
+#define OTHER_GUC_INSTANCE	0
 #define OTHER_GTPM_INSTANCE	1
 #define MAX_ENGINE_INSTANCE    3
 
@@ -1062,6 +1063,7 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
 
 #define NOA_DATA	    _MMIO(0x986C)
 #define NOA_WRITE	    _MMIO(0x9888)
+#define GEN10_NOA_WRITE_HIGH _MMIO(0x9884)
 
 #define _GEN7_PIPEA_DE_LOAD_SL	0x70068
 #define _GEN7_PIPEB_DE_LOAD_SL	0x71068
@@ -1813,7 +1815,6 @@ enum i915_power_well_id {
 #define  PWR_DOWN_LN_3			(0x8 << 4)
 #define  PWR_DOWN_LN_2_1_0		(0x7 << 4)
 #define  PWR_DOWN_LN_1_0		(0x3 << 4)
-#define  PWR_DOWN_LN_1			(0x2 << 4)
 #define  PWR_DOWN_LN_3_1		(0xa << 4)
 #define  PWR_DOWN_LN_3_1_0		(0xb << 4)
 #define  PWR_DOWN_LN_MASK		(0xf << 4)
@@ -1847,6 +1848,9 @@ enum i915_power_well_id {
 #define   VOLTAGE_INFO_MASK		(3 << 24)
 #define   VOLTAGE_INFO_SHIFT		24
 
+#define ICL_PORT_COMP_DW8(port)		_MMIO(_ICL_PORT_COMP_DW(8, port))
+#define   IREFGEN			(1 << 24)
+
 #define CNL_PORT_COMP_DW9		_MMIO(0x162124)
 #define ICL_PORT_COMP_DW9(port)		_MMIO(_ICL_PORT_COMP_DW(9, port))
 
@@ -2509,6 +2513,13 @@ enum i915_power_well_id {
 #define   RING_WAIT_SEMAPHORE	(1 << 10) /* gen6+ */
 
 #define RING_FORCE_TO_NONPRIV(base, i) _MMIO(((base) + 0x4D0) + (i) * 4)
+#define   RING_FORCE_TO_NONPRIV_RW		(0 << 28)    /* CFL+ & Gen11+ */
+#define   RING_FORCE_TO_NONPRIV_RD		(1 << 28)
+#define   RING_FORCE_TO_NONPRIV_WR		(2 << 28)
+#define   RING_FORCE_TO_NONPRIV_RANGE_1		(0 << 0)     /* CFL+ & Gen11+ */
+#define   RING_FORCE_TO_NONPRIV_RANGE_4		(1 << 0)
+#define   RING_FORCE_TO_NONPRIV_RANGE_16	(2 << 0)
+#define   RING_FORCE_TO_NONPRIV_RANGE_64	(3 << 0)
 #define   RING_MAX_NONPRIV_SLOTS  12
 
 #define GEN7_TLB_RD_ADDR	_MMIO(0x4700)
@@ -2695,7 +2706,7 @@ enum i915_power_well_id {
 
 #define GFX_MODE	_MMIO(0x2520)
 #define GFX_MODE_GEN7	_MMIO(0x229c)
-#define RING_MODE_GEN7(engine)	_MMIO((engine)->mmio_base + 0x29c)
+#define RING_MODE_GEN7(base)	_MMIO((base) + 0x29c)
 #define   GFX_RUN_LIST_ENABLE		(1 << 15)
 #define   GFX_INTERRUPT_STEERING	(1 << 14)
 #define   GFX_TLB_INVALIDATE_EXPLICIT	(1 << 13)
@@ -2870,6 +2881,7 @@ enum i915_power_well_id {
 #define GFX_FLSH_CNTL_GEN6	_MMIO(0x101008)
 #define   GFX_FLSH_CNTL_EN	(1 << 0)
 #define ECOSKPD		_MMIO(0x21d0)
+#define   ECO_CONSTANT_BUFFER_SR_DISABLE REG_BIT(4)
 #define   ECO_GATING_CX_ONLY	(1 << 3)
 #define   ECO_FLIP_DONE		(1 << 0)
 
@@ -3158,6 +3170,7 @@ enum i915_power_well_id {
 #define ILK_DPFC_FENCE_YOFF	_MMIO(0x43218)
 #define ILK_DPFC_CHICKEN	_MMIO(0x43224)
 #define   ILK_DPFC_DISABLE_DUMMY0 (1 << 8)
+#define   ILK_DPFC_CHICKEN_COMP_DUMMY_PIXEL	(1 << 14)
 #define   ILK_DPFC_NUKE_ON_ANY_MODIFICATION	(1 << 23)
 #define ILK_FBC_RT_BASE		_MMIO(0x2128)
 #define   ILK_FBC_RT_VALID	(1 << 0)
@@ -4553,7 +4566,7 @@ enum {
 #define   HDMI_MODE_SELECT_HDMI			(1 << 9) /* HDMI only */
 #define   HDMI_MODE_SELECT_DVI			(0 << 9) /* HDMI only */
 #define   HDMI_COLOR_RANGE_16_235		(1 << 8) /* HDMI only */
-#define   SDVO_AUDIO_ENABLE			(1 << 6)
+#define   HDMI_AUDIO_ENABLE			(1 << 6) /* HDMI only */
 /* VSYNC/HSYNC bits new with 965, default is to be set */
 #define   SDVO_VSYNC_ACTIVE_HIGH		(1 << 4)
 #define   SDVO_HSYNC_ACTIVE_HIGH		(1 << 3)
@@ -4693,7 +4706,7 @@ enum {
 #define   VIDEO_DIP_FREQ_2VSYNC		(2 << 16)
 #define   VIDEO_DIP_FREQ_MASK		(3 << 16)
 /* HSW and later: */
-#define   DRM_DIP_ENABLE		(1 << 28)
+#define   VIDEO_DIP_ENABLE_DRM_GLK	(1 << 28)
 #define   PSR_VSC_BIT_7_SET		(1 << 27)
 #define   VSC_SELECT_MASK		(0x3 << 25)
 #define   VSC_SELECT_SHIFT		25
@@ -5769,6 +5782,7 @@ enum {
 #define _PIPE_MISC_B			0x71030
 #define   PIPEMISC_YUV420_ENABLE	(1 << 27)
 #define   PIPEMISC_YUV420_MODE_FULL_BLEND (1 << 26)
+#define   PIPEMISC_HDR_MODE_PRECISION	(1 << 23) /* icl+ */
 #define   PIPEMISC_OUTPUT_COLORSPACE_YUV  (1 << 11)
 #define   PIPEMISC_DITHER_BPC_MASK	(7 << 5)
 #define   PIPEMISC_DITHER_8_BPC		(0 << 5)
@@ -7197,7 +7211,8 @@ enum {
 #define  GAMMA_MODE_MODE_8BIT	(0 << 0)
 #define  GAMMA_MODE_MODE_10BIT	(1 << 0)
 #define  GAMMA_MODE_MODE_12BIT	(2 << 0)
-#define  GAMMA_MODE_MODE_SPLIT	(3 << 0)
+#define  GAMMA_MODE_MODE_SPLIT	(3 << 0) /* ivb-bdw */
+#define  GAMMA_MODE_MODE_12BIT_MULTI_SEGMENTED	(3 << 0) /* icl + */
 
 /* DMC/CSR */
 #define CSR_PROGRAM(i)		_MMIO(0x80000 + (i) * 4)
@@ -7489,6 +7504,9 @@ enum {
 #define GEN11_CRYPTO_RSVD_INTR_MASK	_MMIO(0x1900f0)
 #define GEN11_GUNIT_CSME_INTR_MASK	_MMIO(0x1900f4)
 
+#define   ENGINE1_MASK			REG_GENMASK(31, 16)
+#define   ENGINE0_MASK			REG_GENMASK(15, 0)
+
 #define ILK_DISPLAY_CHICKEN2	_MMIO(0x42004)
 /* Required on all Ironlake and Sandybridge according to the B-Spec. */
 #define  ILK_ELPIN_409_SELECT	(1 << 25)
@@ -7503,6 +7521,10 @@ enum {
 #define  ILK_eDP_A_DISABLE		(1 << 24)
 #define  HSW_CDCLK_LIMIT		(1 << 24)
 #define  ILK_DESKTOP			(1 << 23)
+#define  HSW_CPU_SSC_ENABLE		(1 << 21)
+
+#define FUSE_STRAP3			_MMIO(0x42020)
+#define  HSW_REF_CLK_SELECT		(1 << 1)
 
 #define ILK_DSPCLK_GATE_D			_MMIO(0x42020)
 #define   ILK_VRHUNIT_CLOCK_GATE_DISABLE	(1 << 28)
@@ -8148,6 +8170,7 @@ enum {
 #define _HSW_VIDEO_DIP_SPD_DATA_A	0x602A0
 #define _HSW_VIDEO_DIP_GMP_DATA_A	0x602E0
 #define _HSW_VIDEO_DIP_VSC_DATA_A	0x60320
+#define _GLK_VIDEO_DIP_DRM_DATA_A	0x60440
 #define _HSW_VIDEO_DIP_AVI_ECC_A	0x60240
 #define _HSW_VIDEO_DIP_VS_ECC_A		0x60280
 #define _HSW_VIDEO_DIP_SPD_ECC_A	0x602C0
@@ -8161,6 +8184,7 @@ enum {
 #define _HSW_VIDEO_DIP_SPD_DATA_B	0x612A0
 #define _HSW_VIDEO_DIP_GMP_DATA_B	0x612E0
 #define _HSW_VIDEO_DIP_VSC_DATA_B	0x61320
+#define _GLK_VIDEO_DIP_DRM_DATA_B	0x61440
 #define _HSW_VIDEO_DIP_BVI_ECC_B	0x61240
 #define _HSW_VIDEO_DIP_VS_ECC_B		0x61280
 #define _HSW_VIDEO_DIP_SPD_ECC_B	0x612C0
@@ -8186,6 +8210,7 @@ enum {
 #define HSW_TVIDEO_DIP_SPD_DATA(trans, i)	_MMIO_TRANS2(trans, _HSW_VIDEO_DIP_SPD_DATA_A + (i) * 4)
 #define HSW_TVIDEO_DIP_GMP_DATA(trans, i)	_MMIO_TRANS2(trans, _HSW_VIDEO_DIP_GMP_DATA_A + (i) * 4)
 #define HSW_TVIDEO_DIP_VSC_DATA(trans, i)	_MMIO_TRANS2(trans, _HSW_VIDEO_DIP_VSC_DATA_A + (i) * 4)
+#define GLK_TVIDEO_DIP_DRM_DATA(trans, i)	_MMIO_TRANS2(trans, _GLK_VIDEO_DIP_DRM_DATA_A + (i) * 4)
 #define ICL_VIDEO_DIP_PPS_DATA(trans, i)	_MMIO_TRANS2(trans, _ICL_VIDEO_DIP_PPS_DATA_A + (i) * 4)
 #define ICL_VIDEO_DIP_PPS_ECC(trans, i)		_MMIO_TRANS2(trans, _ICL_VIDEO_DIP_PPS_ECC_A + (i) * 4)
 
@@ -8776,6 +8801,9 @@ enum {
 #define   GEN6_PCODE_WRITE_MIN_FREQ_TABLE	0x8
 #define   GEN6_PCODE_READ_MIN_FREQ_TABLE	0x9
 #define   GEN6_READ_OC_PARAMS			0xc
+#define   ICL_PCODE_MEM_SUBSYSYSTEM_INFO	0xd
+#define     ICL_PCODE_MEM_SS_READ_GLOBAL_INFO	(0x0 << 8)
+#define     ICL_PCODE_MEM_SS_READ_QGV_POINT_INFO(point)	(((point) << 16) | (0x1 << 8))
 #define   GEN6_PCODE_READ_D_COMP		0x10
 #define   GEN6_PCODE_WRITE_D_COMP		0x11
 #define   HSW_PCODE_DE_WRITE_FREQ_REQ		0x17
@@ -8865,6 +8893,7 @@ enum {
 #define   GEN11_LSN_UNSLCVC_GAFS_HALF_SF_MAXALLOC	(1 << 7)
 
 #define GEN10_SAMPLER_MODE		_MMIO(0xE18C)
+#define   GEN11_SAMPLER_ENABLE_HEADLESS_MSG	REG_BIT(5)
 
 /* IVYBRIDGE DPF */
 #define GEN7_L3CDERRST1(slice)		_MMIO(0xB008 + (slice) * 0x200) /* L3CD Error Status 1 */
@@ -9012,32 +9041,32 @@ enum {
 /* HSW Audio */
 #define _HSW_AUD_CONFIG_A		0x65000
 #define _HSW_AUD_CONFIG_B		0x65100
-#define HSW_AUD_CFG(pipe)		_MMIO_PIPE(pipe, _HSW_AUD_CONFIG_A, _HSW_AUD_CONFIG_B)
+#define HSW_AUD_CFG(trans)		_MMIO_TRANS(trans, _HSW_AUD_CONFIG_A, _HSW_AUD_CONFIG_B)
 
 #define _HSW_AUD_MISC_CTRL_A		0x65010
 #define _HSW_AUD_MISC_CTRL_B		0x65110
-#define HSW_AUD_MISC_CTRL(pipe)		_MMIO_PIPE(pipe, _HSW_AUD_MISC_CTRL_A, _HSW_AUD_MISC_CTRL_B)
+#define HSW_AUD_MISC_CTRL(trans)	_MMIO_TRANS(trans, _HSW_AUD_MISC_CTRL_A, _HSW_AUD_MISC_CTRL_B)
 
 #define _HSW_AUD_M_CTS_ENABLE_A		0x65028
 #define _HSW_AUD_M_CTS_ENABLE_B		0x65128
-#define HSW_AUD_M_CTS_ENABLE(pipe)	_MMIO_PIPE(pipe, _HSW_AUD_M_CTS_ENABLE_A, _HSW_AUD_M_CTS_ENABLE_B)
+#define HSW_AUD_M_CTS_ENABLE(trans)	_MMIO_TRANS(trans, _HSW_AUD_M_CTS_ENABLE_A, _HSW_AUD_M_CTS_ENABLE_B)
 #define   AUD_M_CTS_M_VALUE_INDEX	(1 << 21)
 #define   AUD_M_CTS_M_PROG_ENABLE	(1 << 20)
 #define   AUD_CONFIG_M_MASK		0xfffff
 
 #define _HSW_AUD_DIP_ELD_CTRL_ST_A	0x650b4
 #define _HSW_AUD_DIP_ELD_CTRL_ST_B	0x651b4
-#define HSW_AUD_DIP_ELD_CTRL(pipe)	_MMIO_PIPE(pipe, _HSW_AUD_DIP_ELD_CTRL_ST_A, _HSW_AUD_DIP_ELD_CTRL_ST_B)
+#define HSW_AUD_DIP_ELD_CTRL(trans)	_MMIO_TRANS(trans, _HSW_AUD_DIP_ELD_CTRL_ST_A, _HSW_AUD_DIP_ELD_CTRL_ST_B)
 
 /* Audio Digital Converter */
 #define _HSW_AUD_DIG_CNVT_1		0x65080
 #define _HSW_AUD_DIG_CNVT_2		0x65180
-#define AUD_DIG_CNVT(pipe)		_MMIO_PIPE(pipe, _HSW_AUD_DIG_CNVT_1, _HSW_AUD_DIG_CNVT_2)
+#define AUD_DIG_CNVT(trans)		_MMIO_TRANS(trans, _HSW_AUD_DIG_CNVT_1, _HSW_AUD_DIG_CNVT_2)
 #define DIP_PORT_SEL_MASK		0x3
 
 #define _HSW_AUD_EDID_DATA_A		0x65050
 #define _HSW_AUD_EDID_DATA_B		0x65150
-#define HSW_AUD_EDID_DATA(pipe)		_MMIO_PIPE(pipe, _HSW_AUD_EDID_DATA_A, _HSW_AUD_EDID_DATA_B)
+#define HSW_AUD_EDID_DATA(trans)	_MMIO_TRANS(trans, _HSW_AUD_EDID_DATA_A, _HSW_AUD_EDID_DATA_B)
 
 #define HSW_AUD_PIPE_CONV_CFG		_MMIO(0x6507c)
 #define HSW_AUD_PIN_ELD_CP_VLD		_MMIO(0x650c0)
@@ -9449,24 +9478,28 @@ enum skl_power_gate {
 /* SPLL */
 #define SPLL_CTL			_MMIO(0x46020)
 #define  SPLL_PLL_ENABLE		(1 << 31)
-#define  SPLL_PLL_SSC			(1 << 28)
-#define  SPLL_PLL_NON_SSC		(2 << 28)
-#define  SPLL_PLL_LCPLL			(3 << 28)
-#define  SPLL_PLL_REF_MASK		(3 << 28)
-#define  SPLL_PLL_FREQ_810MHz		(0 << 26)
-#define  SPLL_PLL_FREQ_1350MHz		(1 << 26)
-#define  SPLL_PLL_FREQ_2700MHz		(2 << 26)
-#define  SPLL_PLL_FREQ_MASK		(3 << 26)
+#define  SPLL_REF_BCLK			(0 << 28)
+#define  SPLL_REF_MUXED_SSC		(1 << 28) /* CPU SSC if fused enabled, PCH SSC otherwise */
+#define  SPLL_REF_NON_SSC_HSW		(2 << 28)
+#define  SPLL_REF_PCH_SSC_BDW		(2 << 28)
+#define  SPLL_REF_LCPLL			(3 << 28)
+#define  SPLL_REF_MASK			(3 << 28)
+#define  SPLL_FREQ_810MHz		(0 << 26)
+#define  SPLL_FREQ_1350MHz		(1 << 26)
+#define  SPLL_FREQ_2700MHz		(2 << 26)
+#define  SPLL_FREQ_MASK			(3 << 26)
 
 /* WRPLL */
 #define _WRPLL_CTL1			0x46040
 #define _WRPLL_CTL2			0x46060
 #define WRPLL_CTL(pll)			_MMIO_PIPE(pll, _WRPLL_CTL1, _WRPLL_CTL2)
 #define  WRPLL_PLL_ENABLE		(1 << 31)
-#define  WRPLL_PLL_SSC			(1 << 28)
-#define  WRPLL_PLL_NON_SSC		(2 << 28)
-#define  WRPLL_PLL_LCPLL		(3 << 28)
-#define  WRPLL_PLL_REF_MASK		(3 << 28)
+#define  WRPLL_REF_BCLK			(0 << 28)
+#define  WRPLL_REF_PCH_SSC		(1 << 28)
+#define  WRPLL_REF_MUXED_SSC_BDW	(2 << 28) /* CPU SSC if fused enabled, PCH SSC otherwise */
+#define  WRPLL_REF_SPECIAL_HSW		(2 << 28) /* muxed SSC (ULT), non-SSC (non-ULT) */
+#define  WRPLL_REF_LCPLL		(3 << 28)
+#define  WRPLL_REF_MASK			(3 << 28)
 /* WRPLL divider programming */
 #define  WRPLL_DIVIDER_REFERENCE(x)	((x) << 0)
 #define  WRPLL_DIVIDER_REF_MASK		(0xff)
@@ -9526,11 +9559,16 @@ enum skl_power_gate {
 #define  TRANS_MSA_12_BPC		(3 << 5)
 #define  TRANS_MSA_16_BPC		(4 << 5)
 #define  TRANS_MSA_CEA_RANGE		(1 << 3)
+#define  TRANS_MSA_USE_VSC_SDP		(1 << 14)
 
 /* LCPLL Control */
 #define LCPLL_CTL			_MMIO(0x130040)
 #define  LCPLL_PLL_DISABLE		(1 << 31)
 #define  LCPLL_PLL_LOCK			(1 << 30)
+#define  LCPLL_REF_NON_SSC		(0 << 28)
+#define  LCPLL_REF_BCLK			(2 << 28)
+#define  LCPLL_REF_PCH_SSC		(3 << 28)
+#define  LCPLL_REF_MASK			(3 << 28)
 #define  LCPLL_CLK_FREQ_MASK		(3 << 26)
 #define  LCPLL_CLK_FREQ_450		(0 << 26)
 #define  LCPLL_CLK_FREQ_54O_BDW		(1 << 26)
@@ -10147,6 +10185,22 @@ enum skl_power_gate {
 #define PRE_CSC_GAMC_INDEX(pipe)	_MMIO_PIPE(pipe, _PRE_CSC_GAMC_INDEX_A, _PRE_CSC_GAMC_INDEX_B)
 #define PRE_CSC_GAMC_DATA(pipe)		_MMIO_PIPE(pipe, _PRE_CSC_GAMC_DATA_A, _PRE_CSC_GAMC_DATA_B)
 
+/* ICL Multi segmented gamma */
+#define _PAL_PREC_MULTI_SEG_INDEX_A	0x4A408
+#define _PAL_PREC_MULTI_SEG_INDEX_B	0x4AC08
+#define  PAL_PREC_MULTI_SEGMENT_AUTO_INCREMENT		REG_BIT(15)
+#define  PAL_PREC_MULTI_SEGMENT_INDEX_VALUE_MASK	REG_GENMASK(4, 0)
+
+#define _PAL_PREC_MULTI_SEG_DATA_A	0x4A40C
+#define _PAL_PREC_MULTI_SEG_DATA_B	0x4AC0C
+
+#define PREC_PAL_MULTI_SEG_INDEX(pipe)	_MMIO_PIPE(pipe, \
+					_PAL_PREC_MULTI_SEG_INDEX_A, \
+					_PAL_PREC_MULTI_SEG_INDEX_B)
+#define PREC_PAL_MULTI_SEG_DATA(pipe)	_MMIO_PIPE(pipe, \
+					_PAL_PREC_MULTI_SEG_DATA_A, \
+					_PAL_PREC_MULTI_SEG_DATA_B)
+
 /* pipe CSC & degamma/gamma LUTs on CHV */
 #define _CGM_PIPE_A_CSC_COEFF01	(VLV_DISPLAY_BASE + 0x67900)
 #define _CGM_PIPE_A_CSC_COEFF23	(VLV_DISPLAY_BASE + 0x67904)
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index c88e538b2ef4..a195a92d0105 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -29,16 +29,20 @@
 #include <linux/sched/clock.h>
 #include <linux/sched/signal.h>
 
+#include "gem/i915_gem_context.h"
+#include "gt/intel_context.h"
+
 #include "i915_active.h"
 #include "i915_drv.h"
 #include "i915_globals.h"
-#include "i915_reset.h"
 #include "intel_pm.h"
 
 struct execute_cb {
 	struct list_head link;
 	struct irq_work work;
 	struct i915_sw_fence *fence;
+	void (*hook)(struct i915_request *rq, struct dma_fence *signal);
+	struct i915_request *signal;
 };
 
 static struct i915_global_request {
@@ -132,19 +136,6 @@ i915_request_remove_from_client(struct i915_request *request)
 	spin_unlock(&file_priv->mm.lock);
 }
 
-static void reserve_gt(struct drm_i915_private *i915)
-{
-	if (!i915->gt.active_requests++)
-		i915_gem_unpark(i915);
-}
-
-static void unreserve_gt(struct drm_i915_private *i915)
-{
-	GEM_BUG_ON(!i915->gt.active_requests);
-	if (!--i915->gt.active_requests)
-		i915_gem_park(i915);
-}
-
 static void advance_ring(struct i915_request *request)
 {
 	struct intel_ring *ring = request->ring;
@@ -192,84 +183,23 @@ static void free_capture_list(struct i915_request *request)
 	}
 }
 
-static void __retire_engine_request(struct intel_engine_cs *engine,
-				    struct i915_request *rq)
-{
-	GEM_TRACE("%s(%s) fence %llx:%lld, current %d\n",
-		  __func__, engine->name,
-		  rq->fence.context, rq->fence.seqno,
-		  hwsp_seqno(rq));
-
-	GEM_BUG_ON(!i915_request_completed(rq));
-
-	local_irq_disable();
-
-	spin_lock(&engine->timeline.lock);
-	GEM_BUG_ON(!list_is_first(&rq->link, &engine->timeline.requests));
-	list_del_init(&rq->link);
-	spin_unlock(&engine->timeline.lock);
-
-	spin_lock(&rq->lock);
-	i915_request_mark_complete(rq);
-	if (!i915_request_signaled(rq))
-		dma_fence_signal_locked(&rq->fence);
-	if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &rq->fence.flags))
-		i915_request_cancel_breadcrumb(rq);
-	if (rq->waitboost) {
-		GEM_BUG_ON(!atomic_read(&rq->i915->gt_pm.rps.num_waiters));
-		atomic_dec(&rq->i915->gt_pm.rps.num_waiters);
-	}
-	spin_unlock(&rq->lock);
-
-	local_irq_enable();
-
-	/*
-	 * The backing object for the context is done after switching to the
-	 * *next* context. Therefore we cannot retire the previous context until
-	 * the next context has already started running. However, since we
-	 * cannot take the required locks at i915_request_submit() we
-	 * defer the unpinning of the active context to now, retirement of
-	 * the subsequent request.
-	 */
-	if (engine->last_retired_context)
-		intel_context_unpin(engine->last_retired_context);
-	engine->last_retired_context = rq->hw_context;
-}
-
-static void __retire_engine_upto(struct intel_engine_cs *engine,
-				 struct i915_request *rq)
-{
-	struct i915_request *tmp;
-
-	if (list_empty(&rq->link))
-		return;
-
-	do {
-		tmp = list_first_entry(&engine->timeline.requests,
-				       typeof(*tmp), link);
-
-		GEM_BUG_ON(tmp->engine != engine);
-		__retire_engine_request(engine, tmp);
-	} while (tmp != rq);
-}
-
-static void i915_request_retire(struct i915_request *request)
+static bool i915_request_retire(struct i915_request *rq)
 {
 	struct i915_active_request *active, *next;
 
-	GEM_TRACE("%s fence %llx:%lld, current %d\n",
-		  request->engine->name,
-		  request->fence.context, request->fence.seqno,
-		  hwsp_seqno(request));
+	lockdep_assert_held(&rq->i915->drm.struct_mutex);
+	if (!i915_request_completed(rq))
+		return false;
 
-	lockdep_assert_held(&request->i915->drm.struct_mutex);
-	GEM_BUG_ON(!i915_sw_fence_signaled(&request->submit));
-	GEM_BUG_ON(!i915_request_completed(request));
+	GEM_TRACE("%s fence %llx:%lld, current %d\n",
+		  rq->engine->name,
+		  rq->fence.context, rq->fence.seqno,
+		  hwsp_seqno(rq));
 
-	trace_i915_request_retire(request);
+	GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit));
+	trace_i915_request_retire(rq);
 
-	advance_ring(request);
-	free_capture_list(request);
+	advance_ring(rq);
 
 	/*
 	 * Walk through the active list, calling retire on each. This allows
@@ -281,7 +211,7 @@ static void i915_request_retire(struct i915_request *request)
 	 * pass along the auxiliary information (to avoid dereferencing
 	 * the node after the callback).
 	 */
-	list_for_each_entry_safe(active, next, &request->active_list, link) {
+	list_for_each_entry_safe(active, next, &rq->active_list, link) {
 		/*
 		 * In microbenchmarks or focusing upon time inside the kernel,
 		 * we may spend an inordinate amount of time simply handling
@@ -297,19 +227,40 @@ static void i915_request_retire(struct i915_request *request)
 		INIT_LIST_HEAD(&active->link);
 		RCU_INIT_POINTER(active->request, NULL);
 
-		active->retire(active, request);
+		active->retire(active, rq);
 	}
 
-	i915_request_remove_from_client(request);
+	local_irq_disable();
 
-	intel_context_unpin(request->hw_context);
+	spin_lock(&rq->engine->active.lock);
+	list_del(&rq->sched.link);
+	spin_unlock(&rq->engine->active.lock);
 
-	__retire_engine_upto(request->engine, request);
+	spin_lock(&rq->lock);
+	i915_request_mark_complete(rq);
+	if (!i915_request_signaled(rq))
+		dma_fence_signal_locked(&rq->fence);
+	if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &rq->fence.flags))
+		i915_request_cancel_breadcrumb(rq);
+	if (rq->waitboost) {
+		GEM_BUG_ON(!atomic_read(&rq->i915->gt_pm.rps.num_waiters));
+		atomic_dec(&rq->i915->gt_pm.rps.num_waiters);
+	}
+	spin_unlock(&rq->lock);
 
-	unreserve_gt(request->i915);
+	local_irq_enable();
+
+	intel_context_exit(rq->hw_context);
+	intel_context_unpin(rq->hw_context);
+
+	i915_request_remove_from_client(rq);
+	list_del(&rq->link);
+
+	free_capture_list(rq);
+	i915_sched_node_fini(&rq->sched);
+	i915_request_put(rq);
 
-	i915_sched_node_fini(&request->sched);
-	i915_request_put(request);
+	return true;
 }
 
 void i915_request_retire_upto(struct i915_request *rq)
@@ -331,9 +282,7 @@ void i915_request_retire_upto(struct i915_request *rq)
 	do {
 		tmp = list_first_entry(&ring->request_list,
 				       typeof(*tmp), ring_link);
-
-		i915_request_retire(tmp);
-	} while (tmp != rq);
+	} while (i915_request_retire(tmp) && tmp != rq);
 }
 
 static void irq_execute_cb(struct irq_work *wrk)
@@ -344,6 +293,17 @@ static void irq_execute_cb(struct irq_work *wrk)
 	kmem_cache_free(global.slab_execute_cbs, cb);
 }
 
+static void irq_execute_cb_hook(struct irq_work *wrk)
+{
+	struct execute_cb *cb = container_of(wrk, typeof(*cb), work);
+
+	cb->hook(container_of(cb->fence, struct i915_request, submit),
+		 &cb->signal->fence);
+	i915_request_put(cb->signal);
+
+	irq_execute_cb(wrk);
+}
+
 static void __notify_execute_cb(struct i915_request *rq)
 {
 	struct execute_cb *cb;
@@ -370,14 +330,19 @@ static void __notify_execute_cb(struct i915_request *rq)
 }
 
 static int
-i915_request_await_execution(struct i915_request *rq,
-			     struct i915_request *signal,
-			     gfp_t gfp)
+__i915_request_await_execution(struct i915_request *rq,
+			       struct i915_request *signal,
+			       void (*hook)(struct i915_request *rq,
+					    struct dma_fence *signal),
+			       gfp_t gfp)
 {
 	struct execute_cb *cb;
 
-	if (i915_request_is_active(signal))
+	if (i915_request_is_active(signal)) {
+		if (hook)
+			hook(rq, &signal->fence);
 		return 0;
+	}
 
 	cb = kmem_cache_alloc(global.slab_execute_cbs, gfp);
 	if (!cb)
@@ -387,8 +352,18 @@ i915_request_await_execution(struct i915_request *rq,
 	i915_sw_fence_await(cb->fence);
 	init_irq_work(&cb->work, irq_execute_cb);
 
+	if (hook) {
+		cb->hook = hook;
+		cb->signal = i915_request_get(signal);
+		cb->work.func = irq_execute_cb_hook;
+	}
+
 	spin_lock_irq(&signal->lock);
 	if (i915_request_is_active(signal)) {
+		if (hook) {
+			hook(rq, &signal->fence);
+			i915_request_put(signal);
+		}
 		i915_sw_fence_complete(cb->fence);
 		kmem_cache_free(global.slab_execute_cbs, cb);
 	} else {
@@ -399,28 +374,17 @@ i915_request_await_execution(struct i915_request *rq,
 	return 0;
 }
 
-static void move_to_timeline(struct i915_request *request,
-			     struct i915_timeline *timeline)
-{
-	GEM_BUG_ON(request->timeline == &request->engine->timeline);
-	lockdep_assert_held(&request->engine->timeline.lock);
-
-	spin_lock(&request->timeline->lock);
-	list_move_tail(&request->link, &timeline->requests);
-	spin_unlock(&request->timeline->lock);
-}
-
 void __i915_request_submit(struct i915_request *request)
 {
 	struct intel_engine_cs *engine = request->engine;
 
-	GEM_TRACE("%s fence %llx:%lld -> current %d\n",
+	GEM_TRACE("%s fence %llx:%lld, current %d\n",
 		  engine->name,
 		  request->fence.context, request->fence.seqno,
 		  hwsp_seqno(request));
 
 	GEM_BUG_ON(!irqs_disabled());
-	lockdep_assert_held(&engine->timeline.lock);
+	lockdep_assert_held(&engine->active.lock);
 
 	if (i915_gem_context_is_banned(request->gem_context))
 		i915_request_skip(request, -EIO);
@@ -443,11 +407,13 @@ void __i915_request_submit(struct i915_request *request)
 	 */
 	if (request->sched.semaphores &&
 	    i915_sw_fence_signaled(&request->semaphore))
-		request->hw_context->saturated |= request->sched.semaphores;
+		engine->saturated |= request->sched.semaphores;
 
 	/* We may be recursing from the signal callback of another i915 fence */
 	spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
 
+	list_move_tail(&request->sched.link, &engine->active.requests);
+
 	GEM_BUG_ON(test_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags));
 	set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags);
 
@@ -463,8 +429,7 @@ void __i915_request_submit(struct i915_request *request)
 	engine->emit_fini_breadcrumb(request,
 				     request->ring->vaddr + request->postfix);
 
-	/* Transfer from per-context onto the global per-engine timeline */
-	move_to_timeline(request, &engine->timeline);
+	engine->serial++;
 
 	trace_i915_request_execute(request);
 }
@@ -475,11 +440,11 @@ void i915_request_submit(struct i915_request *request)
 	unsigned long flags;
 
 	/* Will be called from irq-context when using foreign fences. */
-	spin_lock_irqsave(&engine->timeline.lock, flags);
+	spin_lock_irqsave(&engine->active.lock, flags);
 
 	__i915_request_submit(request);
 
-	spin_unlock_irqrestore(&engine->timeline.lock, flags);
+	spin_unlock_irqrestore(&engine->active.lock, flags);
 }
 
 void __i915_request_unsubmit(struct i915_request *request)
@@ -492,7 +457,7 @@ void __i915_request_unsubmit(struct i915_request *request)
 		  hwsp_seqno(request));
 
 	GEM_BUG_ON(!irqs_disabled());
-	lockdep_assert_held(&engine->timeline.lock);
+	lockdep_assert_held(&engine->active.lock);
 
 	/*
 	 * Only unwind in reverse order, required so that the per-context list
@@ -510,8 +475,11 @@ void __i915_request_unsubmit(struct i915_request *request)
 
 	spin_unlock(&request->lock);
 
-	/* Transfer back from the global per-engine timeline to per-context */
-	move_to_timeline(request, request->timeline);
+	/* We've already spun, don't charge on resubmitting. */
+	if (request->sched.semaphores && i915_request_started(request)) {
+		request->sched.attr.priority |= I915_PRIORITY_NOSEMAPHORE;
+		request->sched.semaphores = 0;
+	}
 
 	/*
 	 * We don't need to wake_up any waiters on request->execute, they
@@ -528,11 +496,11 @@ void i915_request_unsubmit(struct i915_request *request)
 	unsigned long flags;
 
 	/* Will be called from irq-context when using foreign fences. */
-	spin_lock_irqsave(&engine->timeline.lock, flags);
+	spin_lock_irqsave(&engine->active.lock, flags);
 
 	__i915_request_unsubmit(request);
 
-	spin_unlock_irqrestore(&engine->timeline.lock, flags);
+	spin_unlock_irqrestore(&engine->active.lock, flags);
 }
 
 static int __i915_sw_fence_call
@@ -588,16 +556,13 @@ static void ring_retire_requests(struct intel_ring *ring)
 {
 	struct i915_request *rq, *rn;
 
-	list_for_each_entry_safe(rq, rn, &ring->request_list, ring_link) {
-		if (!i915_request_completed(rq))
+	list_for_each_entry_safe(rq, rn, &ring->request_list, ring_link)
+		if (!i915_request_retire(rq))
 			break;
-
-		i915_request_retire(rq);
-	}
 }
 
 static noinline struct i915_request *
-i915_request_alloc_slow(struct intel_context *ce)
+request_alloc_slow(struct intel_context *ce, gfp_t gfp)
 {
 	struct intel_ring *ring = ce->ring;
 	struct i915_request *rq;
@@ -605,6 +570,18 @@ i915_request_alloc_slow(struct intel_context *ce)
 	if (list_empty(&ring->request_list))
 		goto out;
 
+	if (!gfpflags_allow_blocking(gfp))
+		goto out;
+
+	/* Move our oldest request to the slab-cache (if not in use!) */
+	rq = list_first_entry(&ring->request_list, typeof(*rq), ring_link);
+	i915_request_retire(rq);
+
+	rq = kmem_cache_alloc(global.slab_requests,
+			      gfp | __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
+	if (rq)
+		return rq;
+
 	/* Ratelimit ourselves to prevent oom from malicious clients */
 	rq = list_last_entry(&ring->request_list, typeof(*rq), ring_link);
 	cond_synchronize_rcu(rq->rcustate);
@@ -613,62 +590,21 @@ i915_request_alloc_slow(struct intel_context *ce)
 	ring_retire_requests(ring);
 
 out:
-	return kmem_cache_alloc(global.slab_requests, GFP_KERNEL);
+	return kmem_cache_alloc(global.slab_requests, gfp);
 }
 
-/**
- * i915_request_alloc - allocate a request structure
- *
- * @engine: engine that we wish to issue the request on.
- * @ctx: context that the request will be associated with.
- *
- * Returns a pointer to the allocated request if successful,
- * or an error code if not.
- */
 struct i915_request *
-i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
+__i915_request_create(struct intel_context *ce, gfp_t gfp)
 {
-	struct drm_i915_private *i915 = engine->i915;
-	struct intel_context *ce;
-	struct i915_timeline *tl;
+	struct i915_timeline *tl = ce->ring->timeline;
 	struct i915_request *rq;
 	u32 seqno;
 	int ret;
 
-	lockdep_assert_held(&i915->drm.struct_mutex);
-
-	/*
-	 * Preempt contexts are reserved for exclusive use to inject a
-	 * preemption context switch. They are never to be used for any trivial
-	 * request!
-	 */
-	GEM_BUG_ON(ctx == i915->preempt_context);
-
-	/*
-	 * ABI: Before userspace accesses the GPU (e.g. execbuffer), report
-	 * EIO if the GPU is already wedged.
-	 */
-	ret = i915_terminally_wedged(i915);
-	if (ret)
-		return ERR_PTR(ret);
-
-	/*
-	 * Pinning the contexts may generate requests in order to acquire
-	 * GGTT space, so do this first before we reserve a seqno for
-	 * ourselves.
-	 */
-	ce = intel_context_pin(ctx, engine);
-	if (IS_ERR(ce))
-		return ERR_CAST(ce);
-
-	reserve_gt(i915);
-	mutex_lock(&ce->ring->timeline->mutex);
+	might_sleep_if(gfpflags_allow_blocking(gfp));
 
-	/* Move our oldest request to the slab-cache (if not in use!) */
-	rq = list_first_entry(&ce->ring->request_list, typeof(*rq), ring_link);
-	if (!list_is_last(&rq->ring_link, &ce->ring->request_list) &&
-	    i915_request_completed(rq))
-		i915_request_retire(rq);
+	/* Check that the caller provided an already pinned context */
+	__intel_context_pin(ce);
 
 	/*
 	 * Beware: Dragons be flying overhead.
@@ -700,30 +636,25 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
 	 * Do not use kmem_cache_zalloc() here!
 	 */
 	rq = kmem_cache_alloc(global.slab_requests,
-			      GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
+			      gfp | __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
 	if (unlikely(!rq)) {
-		rq = i915_request_alloc_slow(ce);
+		rq = request_alloc_slow(ce, gfp);
 		if (!rq) {
 			ret = -ENOMEM;
 			goto err_unreserve;
 		}
 	}
 
-	INIT_LIST_HEAD(&rq->active_list);
-	INIT_LIST_HEAD(&rq->execute_cb);
-
-	tl = ce->ring->timeline;
 	ret = i915_timeline_get_seqno(tl, rq, &seqno);
 	if (ret)
 		goto err_free;
 
-	rq->i915 = i915;
-	rq->engine = engine;
-	rq->gem_context = ctx;
+	rq->i915 = ce->engine->i915;
 	rq->hw_context = ce;
+	rq->gem_context = ce->gem_context;
+	rq->engine = ce->engine;
 	rq->ring = ce->ring;
 	rq->timeline = tl;
-	GEM_BUG_ON(rq->timeline == &engine->timeline);
 	rq->hwsp_seqno = tl->hwsp_seqno;
 	rq->hwsp_cacheline = tl->hwsp_cacheline;
 	rq->rcustate = get_state_synchronize_rcu(); /* acts as smp_mb() */
@@ -743,6 +674,10 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
 	rq->batch = NULL;
 	rq->capture_list = NULL;
 	rq->waitboost = false;
+	rq->execution_mask = ALL_ENGINES;
+
+	INIT_LIST_HEAD(&rq->active_list);
+	INIT_LIST_HEAD(&rq->execute_cb);
 
 	/*
 	 * Reserve space in the ring buffer for all the commands required to
@@ -756,7 +691,8 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
 	 * around inside i915_request_add() there is sufficient space at
 	 * the beginning of the ring as well.
 	 */
-	rq->reserved_space = 2 * engine->emit_fini_breadcrumb_dw * sizeof(u32);
+	rq->reserved_space =
+		2 * rq->engine->emit_fini_breadcrumb_dw * sizeof(u32);
 
 	/*
 	 * Record the position of the start of the request so that
@@ -766,20 +702,13 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
 	 */
 	rq->head = rq->ring->emit;
 
-	ret = engine->request_alloc(rq);
+	ret = rq->engine->request_alloc(rq);
 	if (ret)
 		goto err_unwind;
 
-	/* Keep a second pin for the dual retirement along engine and ring */
-	__intel_context_pin(ce);
-
 	rq->infix = rq->ring->emit; /* end of header; start of user payload */
 
-	/* Check that we didn't interrupt ourselves with a new request */
-	lockdep_assert_held(&rq->timeline->mutex);
-	GEM_BUG_ON(rq->timeline->seqno != rq->fence.seqno);
-	rq->cookie = lockdep_pin_lock(&rq->timeline->mutex);
-
+	intel_context_mark_active(ce);
 	return rq;
 
 err_unwind:
@@ -793,12 +722,41 @@ err_unwind:
 err_free:
 	kmem_cache_free(global.slab_requests, rq);
 err_unreserve:
-	mutex_unlock(&ce->ring->timeline->mutex);
-	unreserve_gt(i915);
 	intel_context_unpin(ce);
 	return ERR_PTR(ret);
 }
 
+struct i915_request *
+i915_request_create(struct intel_context *ce)
+{
+	struct i915_request *rq;
+	int err;
+
+	err = intel_context_timeline_lock(ce);
+	if (err)
+		return ERR_PTR(err);
+
+	/* Move our oldest request to the slab-cache (if not in use!) */
+	rq = list_first_entry(&ce->ring->request_list, typeof(*rq), ring_link);
+	if (!list_is_last(&rq->ring_link, &ce->ring->request_list))
+		i915_request_retire(rq);
+
+	intel_context_enter(ce);
+	rq = __i915_request_create(ce, GFP_KERNEL);
+	intel_context_exit(ce); /* active reference transferred to request */
+	if (IS_ERR(rq))
+		goto err_unlock;
+
+	/* Check that we do not interrupt ourselves with a new request */
+	rq->cookie = lockdep_pin_lock(&ce->ring->timeline->mutex);
+
+	return rq;
+
+err_unlock:
+	intel_context_timeline_unlock(ce);
+	return rq;
+}
+
 static int
 i915_request_await_start(struct i915_request *rq, struct i915_request *signal)
 {
@@ -829,7 +787,7 @@ already_busywaiting(struct i915_request *rq)
 	 *
 	 * See the are-we-too-late? check in __i915_request_submit().
 	 */
-	return rq->sched.semaphores | rq->hw_context->saturated;
+	return rq->sched.semaphores | rq->engine->saturated;
 }
 
 static int
@@ -854,13 +812,13 @@ emit_semaphore_wait(struct i915_request *to,
 	if (err < 0)
 		return err;
 
-	/* We need to pin the signaler's HWSP until we are finished reading. */
-	err = i915_timeline_read_hwsp(from, to, &hwsp_offset);
+	/* Only submit our spinner after the signaler is running! */
+	err = __i915_request_await_execution(to, from, NULL, gfp);
 	if (err)
 		return err;
 
-	/* Only submit our spinner after the signaler is running! */
-	err = i915_request_await_execution(to, from, gfp);
+	/* We need to pin the signaler's HWSP until we are finished reading. */
+	err = i915_timeline_read_hwsp(from, to, &hwsp_offset);
 	if (err)
 		return err;
 
@@ -991,6 +949,52 @@ i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence)
 	return 0;
 }
 
+int
+i915_request_await_execution(struct i915_request *rq,
+			     struct dma_fence *fence,
+			     void (*hook)(struct i915_request *rq,
+					  struct dma_fence *signal))
+{
+	struct dma_fence **child = &fence;
+	unsigned int nchild = 1;
+	int ret;
+
+	if (dma_fence_is_array(fence)) {
+		struct dma_fence_array *array = to_dma_fence_array(fence);
+
+		/* XXX Error for signal-on-any fence arrays */
+
+		child = array->fences;
+		nchild = array->num_fences;
+		GEM_BUG_ON(!nchild);
+	}
+
+	do {
+		fence = *child++;
+		if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
+			continue;
+
+		/*
+		 * We don't squash repeated fence dependencies here as we
+		 * want to run our callback in all cases.
+		 */
+
+		if (dma_fence_is_i915(fence))
+			ret = __i915_request_await_execution(rq,
+							     to_request(fence),
+							     hook,
+							     I915_FENCE_GFP);
+		else
+			ret = i915_sw_fence_await_dma_fence(&rq->submit, fence,
+							    I915_FENCE_TIMEOUT,
+							    GFP_KERNEL);
+		if (ret < 0)
+			return ret;
+	} while (--nchild);
+
+	return 0;
+}
+
 /**
  * i915_request_await_object - set this request to (async) wait upon a bo
  * @to: request we are wishing to use
@@ -1023,7 +1027,7 @@ i915_request_await_object(struct i915_request *to,
 		struct dma_fence **shared;
 		unsigned int count, i;
 
-		ret = reservation_object_get_fences_rcu(obj->resv,
+		ret = reservation_object_get_fences_rcu(obj->base.resv,
 							&excl, &count, &shared);
 		if (ret)
 			return ret;
@@ -1040,7 +1044,7 @@ i915_request_await_object(struct i915_request *to,
 			dma_fence_put(shared[i]);
 		kfree(shared);
 	} else {
-		excl = reservation_object_get_excl_rcu(obj->resv);
+		excl = reservation_object_get_excl_rcu(obj->base.resv);
 	}
 
 	if (excl) {
@@ -1100,8 +1104,7 @@ __i915_request_add_to_timeline(struct i915_request *rq)
 	 * precludes optimising to use semaphores serialisation of a single
 	 * timeline across engines.
 	 */
-	prev = i915_active_request_raw(&timeline->last_request,
-				       &rq->i915->drm.struct_mutex);
+	prev = rcu_dereference_protected(timeline->last_request.request, 1);
 	if (prev && !i915_request_completed(prev)) {
 		if (is_power_of_2(prev->engine->mask | rq->engine->mask))
 			i915_sw_fence_await_sw_fence(&rq->submit,
@@ -1118,10 +1121,13 @@ __i915_request_add_to_timeline(struct i915_request *rq)
 							 0);
 	}
 
-	spin_lock_irq(&timeline->lock);
 	list_add_tail(&rq->link, &timeline->requests);
-	spin_unlock_irq(&timeline->lock);
 
+	/*
+	 * Make sure that no request gazumped us - if it was allocated after
+	 * our i915_request_alloc() and called __i915_request_add() before
+	 * us, the timeline will hold its seqno which is later than ours.
+	 */
 	GEM_BUG_ON(timeline->seqno != rq->fence.seqno);
 	__i915_active_request_set(&timeline->last_request, rq);
 
@@ -1133,36 +1139,23 @@ __i915_request_add_to_timeline(struct i915_request *rq)
  * request is not being tracked for completion but the work itself is
  * going to happen on the hardware. This would be a Bad Thing(tm).
  */
-void i915_request_add(struct i915_request *request)
+struct i915_request *__i915_request_commit(struct i915_request *rq)
 {
-	struct intel_engine_cs *engine = request->engine;
-	struct i915_timeline *timeline = request->timeline;
-	struct intel_ring *ring = request->ring;
+	struct intel_engine_cs *engine = rq->engine;
+	struct intel_ring *ring = rq->ring;
 	struct i915_request *prev;
 	u32 *cs;
 
 	GEM_TRACE("%s fence %llx:%lld\n",
-		  engine->name, request->fence.context, request->fence.seqno);
-
-	lockdep_assert_held(&request->timeline->mutex);
-	lockdep_unpin_lock(&request->timeline->mutex, request->cookie);
-
-	trace_i915_request_add(request);
-
-	/*
-	 * Make sure that no request gazumped us - if it was allocated after
-	 * our i915_request_alloc() and called __i915_request_add() before
-	 * us, the timeline will hold its seqno which is later than ours.
-	 */
-	GEM_BUG_ON(timeline->seqno != request->fence.seqno);
+		  engine->name, rq->fence.context, rq->fence.seqno);
 
 	/*
 	 * To ensure that this call will not fail, space for its emissions
 	 * should already have been reserved in the ring buffer. Let the ring
 	 * know that it is time to use that space up.
 	 */
-	GEM_BUG_ON(request->reserved_space > request->ring->space);
-	request->reserved_space = 0;
+	GEM_BUG_ON(rq->reserved_space > ring->space);
+	rq->reserved_space = 0;
 
 	/*
 	 * Record the position of the start of the breadcrumb so that
@@ -1170,17 +1163,16 @@ void i915_request_add(struct i915_request *request)
 	 * GPU processing the request, we never over-estimate the
 	 * position of the ring's HEAD.
 	 */
-	cs = intel_ring_begin(request, engine->emit_fini_breadcrumb_dw);
+	cs = intel_ring_begin(rq, engine->emit_fini_breadcrumb_dw);
 	GEM_BUG_ON(IS_ERR(cs));
-	request->postfix = intel_ring_offset(request, cs);
+	rq->postfix = intel_ring_offset(rq, cs);
 
-	prev = __i915_request_add_to_timeline(request);
+	prev = __i915_request_add_to_timeline(rq);
 
-	list_add_tail(&request->ring_link, &ring->request_list);
-	if (list_is_first(&request->ring_link, &ring->request_list))
-		list_add(&ring->active_link, &request->i915->gt.active_rings);
-	request->i915->gt.active_engines |= request->engine->mask;
-	request->emitted_jiffies = jiffies;
+	list_add_tail(&rq->ring_link, &ring->request_list);
+	if (list_is_first(&rq->ring_link, &ring->request_list))
+		list_add(&ring->active_link, &rq->i915->gt.active_rings);
+	rq->emitted_jiffies = jiffies;
 
 	/*
 	 * Let the backend know a new request has arrived that may need
@@ -1194,10 +1186,10 @@ void i915_request_add(struct i915_request *request)
 	 * run at the earliest possible convenience.
 	 */
 	local_bh_disable();
-	i915_sw_fence_commit(&request->semaphore);
+	i915_sw_fence_commit(&rq->semaphore);
 	rcu_read_lock(); /* RCU serialisation for set-wedged protection */
 	if (engine->schedule) {
-		struct i915_sched_attr attr = request->gem_context->sched;
+		struct i915_sched_attr attr = rq->gem_context->sched;
 
 		/*
 		 * Boost actual workloads past semaphores!
@@ -1211,7 +1203,7 @@ void i915_request_add(struct i915_request *request)
 		 * far in the distance past over useful work, we keep a history
 		 * of any semaphore use along our dependency chain.
 		 */
-		if (!(request->sched.flags & I915_SCHED_HAS_SEMAPHORE_CHAIN))
+		if (!(rq->sched.flags & I915_SCHED_HAS_SEMAPHORE_CHAIN))
 			attr.priority |= I915_PRIORITY_NOSEMAPHORE;
 
 		/*
@@ -1220,15 +1212,29 @@ void i915_request_add(struct i915_request *request)
 		 * Allow interactive/synchronous clients to jump ahead of
 		 * the bulk clients. (FQ_CODEL)
 		 */
-		if (list_empty(&request->sched.signalers_list))
+		if (list_empty(&rq->sched.signalers_list))
 			attr.priority |= I915_PRIORITY_WAIT;
 
-		engine->schedule(request, &attr);
+		engine->schedule(rq, &attr);
 	}
 	rcu_read_unlock();
-	i915_sw_fence_commit(&request->submit);
+	i915_sw_fence_commit(&rq->submit);
 	local_bh_enable(); /* Kick the execlists tasklet if just scheduled */
 
+	return prev;
+}
+
+void i915_request_add(struct i915_request *rq)
+{
+	struct i915_request *prev;
+
+	lockdep_assert_held(&rq->timeline->mutex);
+	lockdep_unpin_lock(&rq->timeline->mutex, rq->cookie);
+
+	trace_i915_request_add(rq);
+
+	prev = __i915_request_commit(rq);
+
 	/*
 	 * In typical scenarios, we do not expect the previous request on
 	 * the timeline to be still tracked by timeline->last_request if it
@@ -1249,7 +1255,7 @@ void i915_request_add(struct i915_request *request)
 	if (prev && i915_request_completed(prev))
 		i915_request_retire_upto(prev);
 
-	mutex_unlock(&request->timeline->mutex);
+	mutex_unlock(&rq->timeline->mutex);
 }
 
 static unsigned long local_clock_us(unsigned int *cpu)
@@ -1354,10 +1360,6 @@ static void request_wait_wake(struct dma_fence *fence, struct dma_fence_cb *cb)
  * maximum of @timeout jiffies (with MAX_SCHEDULE_TIMEOUT implying an
  * unbounded wait).
  *
- * If the caller holds the struct_mutex, the caller must pass I915_WAIT_LOCKED
- * in via the flags, and vice versa if the struct_mutex is not held, the caller
- * must not specify that the wait is locked.
- *
  * Returns the remaining time (in jiffies) if the request completed, which may
  * be zero or -ETIME if the request is unfinished after the timeout expires.
  * May return -EINTR is called with I915_WAIT_INTERRUPTIBLE and a signal is
@@ -1374,7 +1376,7 @@ long i915_request_wait(struct i915_request *rq,
 	might_sleep();
 	GEM_BUG_ON(timeout < 0);
 
-	if (i915_request_completed(rq))
+	if (dma_fence_is_signaled(&rq->fence))
 		return timeout;
 
 	if (!timeout)
@@ -1382,9 +1384,43 @@ long i915_request_wait(struct i915_request *rq,
 
 	trace_i915_request_wait_begin(rq, flags);
 
-	/* Optimistic short spin before touching IRQs */
-	if (__i915_spin_request(rq, state, 5))
+	/*
+	 * We must never wait on the GPU while holding a lock as we
+	 * may need to perform a GPU reset. So while we don't need to
+	 * serialise wait/reset with an explicit lock, we do want
+	 * lockdep to detect potential dependency cycles.
+	 */
+	mutex_acquire(&rq->i915->gpu_error.wedge_mutex.dep_map,
+		      0, 0, _THIS_IP_);
+
+	/*
+	 * Optimistic spin before touching IRQs.
+	 *
+	 * We may use a rather large value here to offset the penalty of
+	 * switching away from the active task. Frequently, the client will
+	 * wait upon an old swapbuffer to throttle itself to remain within a
+	 * frame of the gpu. If the client is running in lockstep with the gpu,
+	 * then it should not be waiting long at all, and a sleep now will incur
+	 * extra scheduler latency in producing the next frame. To try to
+	 * avoid adding the cost of enabling/disabling the interrupt to the
+	 * short wait, we first spin to see if the request would have completed
+	 * in the time taken to setup the interrupt.
+	 *
+	 * We need upto 5us to enable the irq, and upto 20us to hide the
+	 * scheduler latency of a context switch, ignoring the secondary
+	 * impacts from a context switch such as cache eviction.
+	 *
+	 * The scheme used for low-latency IO is called "hybrid interrupt
+	 * polling". The suggestion there is to sleep until just before you
+	 * expect to be woken by the device interrupt and then poll for its
+	 * completion. That requires having a good predictor for the request
+	 * duration, which we currently lack.
+	 */
+	if (CONFIG_DRM_I915_SPIN_REQUEST &&
+	    __i915_spin_request(rq, state, CONFIG_DRM_I915_SPIN_REQUEST)) {
+		dma_fence_signal(&rq->fence);
 		goto out;
+	}
 
 	/*
 	 * This client is about to stall waiting for the GPU. In many cases
@@ -1401,9 +1437,7 @@ long i915_request_wait(struct i915_request *rq,
 	if (flags & I915_WAIT_PRIORITY) {
 		if (!i915_request_started(rq) && INTEL_GEN(rq->i915) >= 6)
 			gen6_rps_boost(rq);
-		local_bh_disable(); /* suspend tasklets for reprioritisation */
 		i915_schedule_bump_priority(rq, I915_PRIORITY_WAIT);
-		local_bh_enable(); /* kick tasklets en masse */
 	}
 
 	wait.tsk = current;
@@ -1433,25 +1467,25 @@ long i915_request_wait(struct i915_request *rq,
 	dma_fence_remove_callback(&rq->fence, &wait.cb);
 
 out:
+	mutex_release(&rq->i915->gpu_error.wedge_mutex.dep_map, 0, _THIS_IP_);
 	trace_i915_request_wait_end(rq);
 	return timeout;
 }
 
-void i915_retire_requests(struct drm_i915_private *i915)
+bool i915_retire_requests(struct drm_i915_private *i915)
 {
 	struct intel_ring *ring, *tmp;
 
 	lockdep_assert_held(&i915->drm.struct_mutex);
 
-	if (!i915->gt.active_requests)
-		return;
-
 	list_for_each_entry_safe(ring, tmp,
 				 &i915->gt.active_rings, active_link) {
 		intel_ring_get(ring); /* last rq holds reference! */
 		ring_retire_requests(ring);
 		intel_ring_put(ring);
 	}
+
+	return !list_empty(&i915->gt.active_rings);
 }
 
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index a982664618c2..edbbdfec24ab 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -28,6 +28,8 @@
 #include <linux/dma-fence.h>
 #include <linux/lockdep.h>
 
+#include "gt/intel_engine_types.h"
+
 #include "i915_gem.h"
 #include "i915_scheduler.h"
 #include "i915_selftest.h"
@@ -156,6 +158,7 @@ struct i915_request {
 	 */
 	struct i915_sched_node sched;
 	struct i915_dependency dep;
+	intel_engine_mask_t execution_mask;
 
 	/*
 	 * A convenience pointer to the current breadcrumb value stored in
@@ -214,7 +217,7 @@ struct i915_request {
 
 	bool waitboost;
 
-	/** engine->request_list entry for this request */
+	/** timeline->request entry for this request */
 	struct list_head link;
 
 	/** ring->request_list entry for this request */
@@ -240,8 +243,12 @@ static inline bool dma_fence_is_i915(const struct dma_fence *fence)
 }
 
 struct i915_request * __must_check
-i915_request_alloc(struct intel_engine_cs *engine,
-		   struct i915_gem_context *ctx);
+__i915_request_create(struct intel_context *ce, gfp_t gfp);
+struct i915_request * __must_check
+i915_request_create(struct intel_context *ce);
+
+struct i915_request *__i915_request_commit(struct i915_request *request);
+
 void i915_request_retire_upto(struct i915_request *rq);
 
 static inline struct i915_request *
@@ -276,6 +283,10 @@ int i915_request_await_object(struct i915_request *to,
 			      bool write);
 int i915_request_await_dma_fence(struct i915_request *rq,
 				 struct dma_fence *fence);
+int i915_request_await_execution(struct i915_request *rq,
+				 struct dma_fence *fence,
+				 void (*hook)(struct i915_request *rq,
+					      struct dma_fence *signal));
 
 void i915_request_add(struct i915_request *rq);
 
@@ -418,6 +429,6 @@ static inline void i915_request_mark_complete(struct i915_request *rq)
 	rq->hwsp_seqno = (u32 *)&rq->fence.seqno; /* decouple from HWSP */
 }
 
-void i915_retire_requests(struct drm_i915_private *i915);
+bool i915_retire_requests(struct drm_i915_private *i915);
 
 #endif /* I915_REQUEST_H */
diff --git a/drivers/gpu/drm/i915/i915_scatterlist.c b/drivers/gpu/drm/i915/i915_scatterlist.c
new file mode 100644
index 000000000000..cc6b3846a8c7
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_scatterlist.c
@@ -0,0 +1,39 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2016 Intel Corporation
+ */
+
+#include "i915_scatterlist.h"
+
+bool i915_sg_trim(struct sg_table *orig_st)
+{
+	struct sg_table new_st;
+	struct scatterlist *sg, *new_sg;
+	unsigned int i;
+
+	if (orig_st->nents == orig_st->orig_nents)
+		return false;
+
+	if (sg_alloc_table(&new_st, orig_st->nents, GFP_KERNEL | __GFP_NOWARN))
+		return false;
+
+	new_sg = new_st.sgl;
+	for_each_sg(orig_st->sgl, sg, orig_st->nents, i) {
+		sg_set_page(new_sg, sg_page(sg), sg->length, 0);
+		sg_dma_address(new_sg) = sg_dma_address(sg);
+		sg_dma_len(new_sg) = sg_dma_len(sg);
+
+		new_sg = sg_next(new_sg);
+	}
+	GEM_BUG_ON(new_sg); /* Should walk exactly nents and hit the end */
+
+	sg_free_table(orig_st);
+
+	*orig_st = new_st;
+	return true;
+}
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftests/scatterlist.c"
+#endif
diff --git a/drivers/gpu/drm/i915/i915_scatterlist.h b/drivers/gpu/drm/i915/i915_scatterlist.h
new file mode 100644
index 000000000000..6617963df9ed
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_scatterlist.h
@@ -0,0 +1,127 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2016 Intel Corporation
+ */
+
+#ifndef I915_SCATTERLIST_H
+#define I915_SCATTERLIST_H
+
+#include <linux/pfn.h>
+#include <linux/scatterlist.h>
+#include <linux/swiotlb.h>
+
+#include "i915_gem.h"
+
+/*
+ * Optimised SGL iterator for GEM objects
+ */
+static __always_inline struct sgt_iter {
+	struct scatterlist *sgp;
+	union {
+		unsigned long pfn;
+		dma_addr_t dma;
+	};
+	unsigned int curr;
+	unsigned int max;
+} __sgt_iter(struct scatterlist *sgl, bool dma) {
+	struct sgt_iter s = { .sgp = sgl };
+
+	if (s.sgp) {
+		s.max = s.curr = s.sgp->offset;
+		s.max += s.sgp->length;
+		if (dma)
+			s.dma = sg_dma_address(s.sgp);
+		else
+			s.pfn = page_to_pfn(sg_page(s.sgp));
+	}
+
+	return s;
+}
+
+static inline int __sg_page_count(const struct scatterlist *sg)
+{
+	return sg->length >> PAGE_SHIFT;
+}
+
+static inline struct scatterlist *____sg_next(struct scatterlist *sg)
+{
+	++sg;
+	if (unlikely(sg_is_chain(sg)))
+		sg = sg_chain_ptr(sg);
+	return sg;
+}
+
+/**
+ * __sg_next - return the next scatterlist entry in a list
+ * @sg:		The current sg entry
+ *
+ * Description:
+ *   If the entry is the last, return NULL; otherwise, step to the next
+ *   element in the array (@sg@+1). If that's a chain pointer, follow it;
+ *   otherwise just return the pointer to the current element.
+ **/
+static inline struct scatterlist *__sg_next(struct scatterlist *sg)
+{
+	return sg_is_last(sg) ? NULL : ____sg_next(sg);
+}
+
+/**
+ * __for_each_sgt_dma - iterate over the DMA addresses of the given sg_table
+ * @__dmap:	DMA address (output)
+ * @__iter:	'struct sgt_iter' (iterator state, internal)
+ * @__sgt:	sg_table to iterate over (input)
+ * @__step:	step size
+ */
+#define __for_each_sgt_dma(__dmap, __iter, __sgt, __step)		\
+	for ((__iter) = __sgt_iter((__sgt)->sgl, true);			\
+	     ((__dmap) = (__iter).dma + (__iter).curr);			\
+	     (((__iter).curr += (__step)) >= (__iter).max) ?		\
+	     (__iter) = __sgt_iter(__sg_next((__iter).sgp), true), 0 : 0)
+
+/**
+ * for_each_sgt_page - iterate over the pages of the given sg_table
+ * @__pp:	page pointer (output)
+ * @__iter:	'struct sgt_iter' (iterator state, internal)
+ * @__sgt:	sg_table to iterate over (input)
+ */
+#define for_each_sgt_page(__pp, __iter, __sgt)				\
+	for ((__iter) = __sgt_iter((__sgt)->sgl, false);		\
+	     ((__pp) = (__iter).pfn == 0 ? NULL :			\
+	      pfn_to_page((__iter).pfn + ((__iter).curr >> PAGE_SHIFT))); \
+	     (((__iter).curr += PAGE_SIZE) >= (__iter).max) ?		\
+	     (__iter) = __sgt_iter(__sg_next((__iter).sgp), false), 0 : 0)
+
+static inline unsigned int i915_sg_page_sizes(struct scatterlist *sg)
+{
+	unsigned int page_sizes;
+
+	page_sizes = 0;
+	while (sg) {
+		GEM_BUG_ON(sg->offset);
+		GEM_BUG_ON(!IS_ALIGNED(sg->length, PAGE_SIZE));
+		page_sizes |= sg->length;
+		sg = __sg_next(sg);
+	}
+
+	return page_sizes;
+}
+
+static inline unsigned int i915_sg_segment_size(void)
+{
+	unsigned int size = swiotlb_max_segment();
+
+	if (size == 0)
+		return SCATTERLIST_MAX_SEGMENT;
+
+	size = rounddown(size, PAGE_SIZE);
+	/* swiotlb_max_segment_size can return 1 byte when it means one page. */
+	if (size < PAGE_SIZE)
+		size = PAGE_SIZE;
+
+	return size;
+}
+
+bool i915_sg_trim(struct sg_table *orig_st);
+
+#endif
diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
index 108f52e1bf35..2e9b38bdc33c 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -77,7 +77,7 @@ i915_sched_lookup_priolist(struct intel_engine_cs *engine, int prio)
 	bool first = true;
 	int idx, i;
 
-	lockdep_assert_held(&engine->timeline.lock);
+	lockdep_assert_held(&engine->active.lock);
 	assert_priolists(execlists);
 
 	/* buckets sorted from highest [in slot 0] to lowest priority */
@@ -150,29 +150,49 @@ sched_lock_engine(const struct i915_sched_node *node,
 		  struct intel_engine_cs *locked,
 		  struct sched_cache *cache)
 {
-	struct intel_engine_cs *engine = node_to_request(node)->engine;
+	const struct i915_request *rq = node_to_request(node);
+	struct intel_engine_cs *engine;
 
 	GEM_BUG_ON(!locked);
 
-	if (engine != locked) {
-		spin_unlock(&locked->timeline.lock);
+	/*
+	 * Virtual engines complicate acquiring the engine timeline lock,
+	 * as their rq->engine pointer is not stable until under that
+	 * engine lock. The simple ploy we use is to take the lock then
+	 * check that the rq still belongs to the newly locked engine.
+	 */
+	while (locked != (engine = READ_ONCE(rq->engine))) {
+		spin_unlock(&locked->active.lock);
 		memset(cache, 0, sizeof(*cache));
-		spin_lock(&engine->timeline.lock);
+		spin_lock(&engine->active.lock);
+		locked = engine;
 	}
 
-	return engine;
+	GEM_BUG_ON(locked != engine);
+	return locked;
 }
 
-static bool inflight(const struct i915_request *rq,
-		     const struct intel_engine_cs *engine)
+static inline int rq_prio(const struct i915_request *rq)
 {
-	const struct i915_request *active;
+	return rq->sched.attr.priority | __NO_PREEMPTION;
+}
 
-	if (!i915_request_is_active(rq))
-		return false;
+static void kick_submission(struct intel_engine_cs *engine, int prio)
+{
+	const struct i915_request *inflight =
+		port_request(engine->execlists.port);
+
+	/*
+	 * If we are already the currently executing context, don't
+	 * bother evaluating if we should preempt ourselves, or if
+	 * we expect nothing to change as a result of running the
+	 * tasklet, i.e. we have not change the priority queue
+	 * sufficiently to oust the running context.
+	 */
+	if (!inflight || !i915_scheduler_need_preempt(prio, rq_prio(inflight)))
+		return;
 
-	active = port_request(engine->execlists.port);
-	return active->hw_context == rq->hw_context;
+	tasklet_hi_schedule(&engine->execlists.tasklet);
 }
 
 static void __i915_schedule(struct i915_sched_node *node,
@@ -189,10 +209,10 @@ static void __i915_schedule(struct i915_sched_node *node,
 	lockdep_assert_held(&schedule_lock);
 	GEM_BUG_ON(prio == I915_PRIORITY_INVALID);
 
-	if (node_signaled(node))
+	if (prio <= READ_ONCE(node->attr.priority))
 		return;
 
-	if (prio <= READ_ONCE(node->attr.priority))
+	if (node_signaled(node))
 		return;
 
 	stack.signaler = node;
@@ -258,28 +278,26 @@ static void __i915_schedule(struct i915_sched_node *node,
 
 	memset(&cache, 0, sizeof(cache));
 	engine = node_to_request(node)->engine;
-	spin_lock(&engine->timeline.lock);
+	spin_lock(&engine->active.lock);
 
 	/* Fifo and depth-first replacement ensure our deps execute before us */
+	engine = sched_lock_engine(node, engine, &cache);
 	list_for_each_entry_safe_reverse(dep, p, &dfs, dfs_link) {
 		INIT_LIST_HEAD(&dep->dfs_link);
 
 		node = dep->signaler;
 		engine = sched_lock_engine(node, engine, &cache);
-		lockdep_assert_held(&engine->timeline.lock);
+		lockdep_assert_held(&engine->active.lock);
 
 		/* Recheck after acquiring the engine->timeline.lock */
 		if (prio <= node->attr.priority || node_signaled(node))
 			continue;
 
+		GEM_BUG_ON(node_to_request(node)->engine != engine);
+
 		node->attr.priority = prio;
-		if (!list_empty(&node->link)) {
-			if (!cache.priolist)
-				cache.priolist =
-					i915_sched_lookup_priolist(engine,
-								   prio);
-			list_move_tail(&node->link, cache.priolist);
-		} else {
+
+		if (list_empty(&node->link)) {
 			/*
 			 * If the request is not in the priolist queue because
 			 * it is not yet runnable, then it doesn't contribute
@@ -288,8 +306,16 @@ static void __i915_schedule(struct i915_sched_node *node,
 			 * queue; but in that case we may still need to reorder
 			 * the inflight requests.
 			 */
-			if (!i915_sw_fence_done(&node_to_request(node)->submit))
-				continue;
+			continue;
+		}
+
+		if (!intel_engine_is_virtual(engine) &&
+		    !i915_request_is_active(node_to_request(node))) {
+			if (!cache.priolist)
+				cache.priolist =
+					i915_sched_lookup_priolist(engine,
+								   prio);
+			list_move_tail(&node->link, cache.priolist);
 		}
 
 		if (prio <= engine->execlists.queue_priority_hint)
@@ -297,18 +323,11 @@ static void __i915_schedule(struct i915_sched_node *node,
 
 		engine->execlists.queue_priority_hint = prio;
 
-		/*
-		 * If we are already the currently executing context, don't
-		 * bother evaluating if we should preempt ourselves.
-		 */
-		if (inflight(node_to_request(node), engine))
-			continue;
-
 		/* Defer (tasklet) submission until after all of our updates. */
-		tasklet_hi_schedule(&engine->execlists.tasklet);
+		kick_submission(engine, prio);
 	}
 
-	spin_unlock(&engine->timeline.lock);
+	spin_unlock(&engine->active.lock);
 }
 
 void i915_schedule(struct i915_request *rq, const struct i915_sched_attr *attr)
@@ -422,8 +441,6 @@ void i915_sched_node_fini(struct i915_sched_node *node)
 {
 	struct i915_dependency *dep, *tmp;
 
-	GEM_BUG_ON(!list_empty(&node->link));
-
 	spin_lock_irq(&schedule_lock);
 
 	/*
diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h
index 07d243acf553..7eefccff39bf 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.h
+++ b/drivers/gpu/drm/i915/i915_scheduler.h
@@ -52,4 +52,22 @@ static inline void i915_priolist_free(struct i915_priolist *p)
 		__i915_priolist_free(p);
 }
 
+static inline bool i915_scheduler_need_preempt(int prio, int active)
+{
+	/*
+	 * Allow preemption of low -> normal -> high, but we do
+	 * not allow low priority tasks to preempt other low priority
+	 * tasks under the impression that latency for low priority
+	 * tasks does not matter (as much as background throughput),
+	 * so kiss.
+	 *
+	 * More naturally we would write
+	 *	prio >= max(0, last);
+	 * except that we wish to prevent triggering preemption at the same
+	 * priority level: the task that is running should remain running
+	 * to preserve FIFO ordering of dependencies.
+	 */
+	return prio > max(I915_PRIORITY_NORMAL - 1, active);
+}
+
 #endif /* _I915_SCHEDULER_H_ */
diff --git a/drivers/gpu/drm/i915/i915_scheduler_types.h b/drivers/gpu/drm/i915/i915_scheduler_types.h
index 4f2b2eb7c3e5..3e309631bd0b 100644
--- a/drivers/gpu/drm/i915/i915_scheduler_types.h
+++ b/drivers/gpu/drm/i915/i915_scheduler_types.h
@@ -9,8 +9,8 @@
 
 #include <linux/list.h>
 
+#include "gt/intel_engine_types.h"
 #include "i915_priolist_types.h"
-#include "intel_engine_types.h"
 
 struct drm_i915_private;
 struct i915_request;
diff --git a/drivers/gpu/drm/i915/i915_suspend.c b/drivers/gpu/drm/i915/i915_suspend.c
index 95f3dab1b229..a08d7d16621b 100644
--- a/drivers/gpu/drm/i915/i915_suspend.c
+++ b/drivers/gpu/drm/i915/i915_suspend.c
@@ -26,9 +26,11 @@
 
 #include <drm/i915_drm.h>
 
+#include "display/intel_fbc.h"
+#include "display/intel_gmbus.h"
+
 #include "i915_reg.h"
 #include "intel_drv.h"
-#include "intel_fbc.h"
 
 static void i915_save_display(struct drm_i915_private *dev_priv)
 {
@@ -144,7 +146,7 @@ int i915_restore_state(struct drm_i915_private *dev_priv)
 
 	mutex_unlock(&dev_priv->drm.struct_mutex);
 
-	intel_i2c_reset(dev_priv);
+	intel_gmbus_reset(dev_priv);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c
index 41313005af42..ecac1c386109 100644
--- a/drivers/gpu/drm/i915/i915_sysfs.c
+++ b/drivers/gpu/drm/i915/i915_sysfs.c
@@ -29,8 +29,11 @@
 #include <linux/module.h>
 #include <linux/stat.h>
 #include <linux/sysfs.h>
-#include "intel_drv.h"
+
 #include "i915_drv.h"
+#include "intel_drv.h"
+#include "intel_pm.h"
+#include "intel_sideband.h"
 
 static inline struct drm_i915_private *kdev_minor_to_i915(struct device *kdev)
 {
@@ -45,7 +48,7 @@ static u32 calc_residency(struct drm_i915_private *dev_priv,
 	intel_wakeref_t wakeref;
 	u64 res = 0;
 
-	with_intel_runtime_pm(dev_priv, wakeref)
+	with_intel_runtime_pm(&dev_priv->runtime_pm, wakeref)
 		res = intel_rc6_residency_us(dev_priv, reg);
 
 	return DIV_ROUND_CLOSEST_ULL(res, 1000);
@@ -259,25 +262,23 @@ static ssize_t gt_act_freq_mhz_show(struct device *kdev,
 {
 	struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev);
 	intel_wakeref_t wakeref;
-	int ret;
+	u32 freq;
 
-	wakeref = intel_runtime_pm_get(dev_priv);
+	wakeref = intel_runtime_pm_get(&dev_priv->runtime_pm);
 
-	mutex_lock(&dev_priv->pcu_lock);
 	if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) {
-		u32 freq;
+		vlv_punit_get(dev_priv);
 		freq = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
-		ret = intel_gpu_freq(dev_priv, (freq >> 8) & 0xff);
+		vlv_punit_put(dev_priv);
+
+		freq = (freq >> 8) & 0xff;
 	} else {
-		ret = intel_gpu_freq(dev_priv,
-				     intel_get_cagf(dev_priv,
-						    I915_READ(GEN6_RPSTAT1)));
+		freq = intel_get_cagf(dev_priv, I915_READ(GEN6_RPSTAT1));
 	}
-	mutex_unlock(&dev_priv->pcu_lock);
 
-	intel_runtime_pm_put(dev_priv, wakeref);
+	intel_runtime_pm_put(&dev_priv->runtime_pm, wakeref);
 
-	return snprintf(buf, PAGE_SIZE, "%d\n", ret);
+	return snprintf(buf, PAGE_SIZE, "%d\n", intel_gpu_freq(dev_priv, freq));
 }
 
 static ssize_t gt_cur_freq_mhz_show(struct device *kdev,
@@ -318,12 +319,12 @@ static ssize_t gt_boost_freq_mhz_store(struct device *kdev,
 	if (val < rps->min_freq || val > rps->max_freq)
 		return -EINVAL;
 
-	mutex_lock(&dev_priv->pcu_lock);
+	mutex_lock(&rps->lock);
 	if (val != rps->boost_freq) {
 		rps->boost_freq = val;
 		boost = atomic_read(&rps->num_waiters);
 	}
-	mutex_unlock(&dev_priv->pcu_lock);
+	mutex_unlock(&rps->lock);
 	if (boost)
 		schedule_work(&rps->work);
 
@@ -363,18 +364,15 @@ static ssize_t gt_max_freq_mhz_store(struct device *kdev,
 	if (ret)
 		return ret;
 
-	wakeref = intel_runtime_pm_get(dev_priv);
-
-	mutex_lock(&dev_priv->pcu_lock);
+	wakeref = intel_runtime_pm_get(&dev_priv->runtime_pm);
+	mutex_lock(&rps->lock);
 
 	val = intel_freq_opcode(dev_priv, val);
-
 	if (val < rps->min_freq ||
 	    val > rps->max_freq ||
 	    val < rps->min_freq_softlimit) {
-		mutex_unlock(&dev_priv->pcu_lock);
-		intel_runtime_pm_put(dev_priv, wakeref);
-		return -EINVAL;
+		ret = -EINVAL;
+		goto unlock;
 	}
 
 	if (val > rps->rp0_freq)
@@ -392,9 +390,9 @@ static ssize_t gt_max_freq_mhz_store(struct device *kdev,
 	 * frequency request may be unchanged. */
 	ret = intel_set_rps(dev_priv, val);
 
-	mutex_unlock(&dev_priv->pcu_lock);
-
-	intel_runtime_pm_put(dev_priv, wakeref);
+unlock:
+	mutex_unlock(&rps->lock);
+	intel_runtime_pm_put(&dev_priv->runtime_pm, wakeref);
 
 	return ret ?: count;
 }
@@ -422,18 +420,15 @@ static ssize_t gt_min_freq_mhz_store(struct device *kdev,
 	if (ret)
 		return ret;
 
-	wakeref = intel_runtime_pm_get(dev_priv);
-
-	mutex_lock(&dev_priv->pcu_lock);
+	wakeref = intel_runtime_pm_get(&dev_priv->runtime_pm);
+	mutex_lock(&rps->lock);
 
 	val = intel_freq_opcode(dev_priv, val);
-
 	if (val < rps->min_freq ||
 	    val > rps->max_freq ||
 	    val > rps->max_freq_softlimit) {
-		mutex_unlock(&dev_priv->pcu_lock);
-		intel_runtime_pm_put(dev_priv, wakeref);
-		return -EINVAL;
+		ret = -EINVAL;
+		goto unlock;
 	}
 
 	rps->min_freq_softlimit = val;
@@ -447,9 +442,9 @@ static ssize_t gt_min_freq_mhz_store(struct device *kdev,
 	 * frequency request may be unchanged. */
 	ret = intel_set_rps(dev_priv, val);
 
-	mutex_unlock(&dev_priv->pcu_lock);
-
-	intel_runtime_pm_put(dev_priv, wakeref);
+unlock:
+	mutex_unlock(&rps->lock);
+	intel_runtime_pm_put(&dev_priv->runtime_pm, wakeref);
 
 	return ret ?: count;
 }
diff --git a/drivers/gpu/drm/i915/i915_timeline.c b/drivers/gpu/drm/i915/i915_timeline.c
index 5fbea0892f33..c311ce9c6f9d 100644
--- a/drivers/gpu/drm/i915/i915_timeline.c
+++ b/drivers/gpu/drm/i915/i915_timeline.c
@@ -61,7 +61,7 @@ hwsp_alloc(struct i915_timeline *timeline, unsigned int *cacheline)
 
 	BUILD_BUG_ON(BITS_PER_TYPE(u64) * CACHELINE_BYTES > PAGE_SIZE);
 
-	spin_lock(&gt->hwsp_lock);
+	spin_lock_irq(&gt->hwsp_lock);
 
 	/* hwsp_free_list only contains HWSP that have available cachelines */
 	hwsp = list_first_entry_or_null(&gt->hwsp_free_list,
@@ -69,7 +69,7 @@ hwsp_alloc(struct i915_timeline *timeline, unsigned int *cacheline)
 	if (!hwsp) {
 		struct i915_vma *vma;
 
-		spin_unlock(&gt->hwsp_lock);
+		spin_unlock_irq(&gt->hwsp_lock);
 
 		hwsp = kmalloc(sizeof(*hwsp), GFP_KERNEL);
 		if (!hwsp)
@@ -86,7 +86,7 @@ hwsp_alloc(struct i915_timeline *timeline, unsigned int *cacheline)
 		hwsp->free_bitmap = ~0ull;
 		hwsp->gt = gt;
 
-		spin_lock(&gt->hwsp_lock);
+		spin_lock_irq(&gt->hwsp_lock);
 		list_add(&hwsp->free_link, &gt->hwsp_free_list);
 	}
 
@@ -96,7 +96,7 @@ hwsp_alloc(struct i915_timeline *timeline, unsigned int *cacheline)
 	if (!hwsp->free_bitmap)
 		list_del(&hwsp->free_link);
 
-	spin_unlock(&gt->hwsp_lock);
+	spin_unlock_irq(&gt->hwsp_lock);
 
 	GEM_BUG_ON(hwsp->vma->private != hwsp);
 	return hwsp->vma;
@@ -105,8 +105,9 @@ hwsp_alloc(struct i915_timeline *timeline, unsigned int *cacheline)
 static void __idle_hwsp_free(struct i915_timeline_hwsp *hwsp, int cacheline)
 {
 	struct i915_gt_timelines *gt = hwsp->gt;
+	unsigned long flags;
 
-	spin_lock(&gt->hwsp_lock);
+	spin_lock_irqsave(&gt->hwsp_lock, flags);
 
 	/* As a cacheline becomes available, publish the HWSP on the freelist */
 	if (!hwsp->free_bitmap)
@@ -122,7 +123,7 @@ static void __idle_hwsp_free(struct i915_timeline_hwsp *hwsp, int cacheline)
 		kfree(hwsp);
 	}
 
-	spin_unlock(&gt->hwsp_lock);
+	spin_unlock_irqrestore(&gt->hwsp_lock, flags);
 }
 
 static void __idle_cacheline_free(struct i915_timeline_cacheline *cl)
@@ -250,7 +251,6 @@ int i915_timeline_init(struct drm_i915_private *i915,
 
 	timeline->fence_context = dma_fence_context_alloc(1);
 
-	spin_lock_init(&timeline->lock);
 	mutex_init(&timeline->mutex);
 
 	INIT_ACTIVE_REQUEST(&timeline->last_request);
diff --git a/drivers/gpu/drm/i915/i915_timeline.h b/drivers/gpu/drm/i915/i915_timeline.h
index 27668a1a69a3..36e5e5a65155 100644
--- a/drivers/gpu/drm/i915/i915_timeline.h
+++ b/drivers/gpu/drm/i915/i915_timeline.h
@@ -36,25 +36,6 @@ int i915_timeline_init(struct drm_i915_private *i915,
 		       struct i915_vma *hwsp);
 void i915_timeline_fini(struct i915_timeline *tl);
 
-static inline void
-i915_timeline_set_subclass(struct i915_timeline *timeline,
-			   unsigned int subclass)
-{
-	lockdep_set_subclass(&timeline->lock, subclass);
-
-	/*
-	 * Due to an interesting quirk in lockdep's internal debug tracking,
-	 * after setting a subclass we must ensure the lock is used. Otherwise,
-	 * nr_unused_locks is incremented once too often.
-	 */
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-	local_irq_disable();
-	lock_map_acquire(&timeline->lock.dep_map);
-	lock_map_release(&timeline->lock.dep_map);
-	local_irq_enable();
-#endif
-}
-
 struct i915_timeline *
 i915_timeline_create(struct drm_i915_private *i915,
 		     struct i915_vma *global_hwsp);
diff --git a/drivers/gpu/drm/i915/i915_timeline_types.h b/drivers/gpu/drm/i915/i915_timeline_types.h
index 5256a0b5c5f7..fce5cb4f1090 100644
--- a/drivers/gpu/drm/i915/i915_timeline_types.h
+++ b/drivers/gpu/drm/i915/i915_timeline_types.h
@@ -23,9 +23,6 @@ struct i915_timeline {
 	u64 fence_context;
 	u32 seqno;
 
-	spinlock_t lock;
-#define TIMELINE_CLIENT 0 /* default subclass */
-#define TIMELINE_ENGINE 1
 	struct mutex mutex; /* protects the flow of requests */
 
 	unsigned int pin_count;
diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h
index 12893304c8f8..f4ce643b3bc3 100644
--- a/drivers/gpu/drm/i915/i915_trace.h
+++ b/drivers/gpu/drm/i915/i915_trace.h
@@ -8,9 +8,11 @@
 
 #include <drm/drm_drv.h>
 
+#include "gt/intel_engine.h"
+
 #include "i915_drv.h"
+#include "i915_irq.h"
 #include "intel_drv.h"
-#include "intel_ringbuffer.h"
 
 #undef TRACE_SYSTEM
 #define TRACE_SYSTEM i915
@@ -861,10 +863,9 @@ TRACE_EVENT(i915_request_wait_begin,
 			   __entry->flags = flags;
 			   ),
 
-	    TP_printk("dev=%u, engine=%u:%u, hw_id=%u, ctx=%llu, seqno=%u, blocking=%u, flags=0x%x",
+	    TP_printk("dev=%u, engine=%u:%u, hw_id=%u, ctx=%llu, seqno=%u, flags=0x%x",
 		      __entry->dev, __entry->class, __entry->instance,
 		      __entry->hw_id, __entry->ctx, __entry->seqno,
-		      !!(__entry->flags & I915_WAIT_LOCKED),
 		      __entry->flags)
 );
 
@@ -975,7 +976,7 @@ DECLARE_EVENT_CLASS(i915_context,
 			__entry->dev = ctx->i915->drm.primary->index;
 			__entry->ctx = ctx;
 			__entry->hw_id = ctx->hw_id;
-			__entry->vm = ctx->ppgtt ? &ctx->ppgtt->vm : NULL;
+			__entry->vm = ctx->vm;
 	),
 
 	TP_printk("dev=%u, ctx=%p, ctx_vm=%p, hw_id=%u",
diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h
index 2dbe8933b50a..2987219a6300 100644
--- a/drivers/gpu/drm/i915/i915_utils.h
+++ b/drivers/gpu/drm/i915/i915_utils.h
@@ -25,6 +25,12 @@
 #ifndef __I915_UTILS_H
 #define __I915_UTILS_H
 
+#include <linux/list.h>
+#include <linux/overflow.h>
+#include <linux/sched.h>
+#include <linux/types.h>
+#include <linux/workqueue.h>
+
 #undef WARN_ON
 /* Many gcc seem to no see through this and fall over :( */
 #if 0
@@ -73,6 +79,39 @@
 #define overflows_type(x, T) \
 	(sizeof(x) > sizeof(T) && (x) >> BITS_PER_TYPE(T))
 
+static inline bool
+__check_struct_size(size_t base, size_t arr, size_t count, size_t *size)
+{
+	size_t sz;
+
+	if (check_mul_overflow(count, arr, &sz))
+		return false;
+
+	if (check_add_overflow(sz, base, &sz))
+		return false;
+
+	*size = sz;
+	return true;
+}
+
+/**
+ * check_struct_size() - Calculate size of structure with trailing array.
+ * @p: Pointer to the structure.
+ * @member: Name of the array member.
+ * @n: Number of elements in the array.
+ * @sz: Total size of structure and array
+ *
+ * Calculates size of memory needed for structure @p followed by an
+ * array of @n @member elements, like struct_size() but reports
+ * whether it overflowed, and the resultant size in @sz
+ *
+ * Return: false if the calculation overflowed.
+ */
+#define check_struct_size(p, member, n, sz) \
+	likely(__check_struct_size(sizeof(*(p)), \
+				   sizeof(*(p)->member) + __must_be_array((p)->member), \
+				   n, sz))
+
 #define ptr_mask_bits(ptr, n) ({					\
 	unsigned long __v = (unsigned long)(ptr);			\
 	(typeof(ptr))(__v & -BIT(n));					\
@@ -97,6 +136,8 @@
 #define page_pack_bits(ptr, bits) ptr_pack_bits(ptr, bits, PAGE_SHIFT)
 #define page_unpack_bits(ptr, bits) ptr_unpack_bits(ptr, bits, PAGE_SHIFT)
 
+#define struct_member(T, member) (((T *)0)->member)
+
 #define ptr_offset(ptr, member) offsetof(typeof(*(ptr)), member)
 
 #define fetch_and_zero(ptr) ({						\
@@ -113,7 +154,7 @@
  */
 #define container_of_user(ptr, type, member) ({				\
 	void __user *__mptr = (void __user *)(ptr);			\
-	BUILD_BUG_ON_MSG(!__same_type(*(ptr), ((type *)0)->member) &&	\
+	BUILD_BUG_ON_MSG(!__same_type(*(ptr), struct_member(type, member)) && \
 			 !__same_type(*(ptr), void),			\
 			 "pointer type mismatch in container_of()");	\
 	((type __user *)(__mptr - offsetof(type, member))); })
@@ -152,8 +193,6 @@ static inline u64 ptr_to_u64(const void *ptr)
 	__idx;								\
 })
 
-#include <linux/list.h>
-
 static inline void __list_del_many(struct list_head *head,
 				   struct list_head *first)
 {
@@ -174,6 +213,148 @@ static inline void drain_delayed_work(struct delayed_work *dw)
 	} while (delayed_work_pending(dw));
 }
 
+static inline unsigned long msecs_to_jiffies_timeout(const unsigned int m)
+{
+	unsigned long j = msecs_to_jiffies(m);
+
+	return min_t(unsigned long, MAX_JIFFY_OFFSET, j + 1);
+}
+
+/*
+ * If you need to wait X milliseconds between events A and B, but event B
+ * doesn't happen exactly after event A, you record the timestamp (jiffies) of
+ * when event A happened, then just before event B you call this function and
+ * pass the timestamp as the first argument, and X as the second argument.
+ */
+static inline void
+wait_remaining_ms_from_jiffies(unsigned long timestamp_jiffies, int to_wait_ms)
+{
+	unsigned long target_jiffies, tmp_jiffies, remaining_jiffies;
+
+	/*
+	 * Don't re-read the value of "jiffies" every time since it may change
+	 * behind our back and break the math.
+	 */
+	tmp_jiffies = jiffies;
+	target_jiffies = timestamp_jiffies +
+			 msecs_to_jiffies_timeout(to_wait_ms);
+
+	if (time_after(target_jiffies, tmp_jiffies)) {
+		remaining_jiffies = target_jiffies - tmp_jiffies;
+		while (remaining_jiffies)
+			remaining_jiffies =
+			    schedule_timeout_uninterruptible(remaining_jiffies);
+	}
+}
+
+/**
+ * __wait_for - magic wait macro
+ *
+ * Macro to help avoid open coding check/wait/timeout patterns. Note that it's
+ * important that we check the condition again after having timed out, since the
+ * timeout could be due to preemption or similar and we've never had a chance to
+ * check the condition before the timeout.
+ */
+#define __wait_for(OP, COND, US, Wmin, Wmax) ({ \
+	const ktime_t end__ = ktime_add_ns(ktime_get_raw(), 1000ll * (US)); \
+	long wait__ = (Wmin); /* recommended min for usleep is 10 us */	\
+	int ret__;							\
+	might_sleep();							\
+	for (;;) {							\
+		const bool expired__ = ktime_after(ktime_get_raw(), end__); \
+		OP;							\
+		/* Guarantee COND check prior to timeout */		\
+		barrier();						\
+		if (COND) {						\
+			ret__ = 0;					\
+			break;						\
+		}							\
+		if (expired__) {					\
+			ret__ = -ETIMEDOUT;				\
+			break;						\
+		}							\
+		usleep_range(wait__, wait__ * 2);			\
+		if (wait__ < (Wmax))					\
+			wait__ <<= 1;					\
+	}								\
+	ret__;								\
+})
+
+#define _wait_for(COND, US, Wmin, Wmax)	__wait_for(, (COND), (US), (Wmin), \
+						   (Wmax))
+#define wait_for(COND, MS)		_wait_for((COND), (MS) * 1000, 10, 1000)
+
+/* If CONFIG_PREEMPT_COUNT is disabled, in_atomic() always reports false. */
+#if defined(CONFIG_DRM_I915_DEBUG) && defined(CONFIG_PREEMPT_COUNT)
+# define _WAIT_FOR_ATOMIC_CHECK(ATOMIC) WARN_ON_ONCE((ATOMIC) && !in_atomic())
+#else
+# define _WAIT_FOR_ATOMIC_CHECK(ATOMIC) do { } while (0)
+#endif
+
+#define _wait_for_atomic(COND, US, ATOMIC) \
+({ \
+	int cpu, ret, timeout = (US) * 1000; \
+	u64 base; \
+	_WAIT_FOR_ATOMIC_CHECK(ATOMIC); \
+	if (!(ATOMIC)) { \
+		preempt_disable(); \
+		cpu = smp_processor_id(); \
+	} \
+	base = local_clock(); \
+	for (;;) { \
+		u64 now = local_clock(); \
+		if (!(ATOMIC)) \
+			preempt_enable(); \
+		/* Guarantee COND check prior to timeout */ \
+		barrier(); \
+		if (COND) { \
+			ret = 0; \
+			break; \
+		} \
+		if (now - base >= timeout) { \
+			ret = -ETIMEDOUT; \
+			break; \
+		} \
+		cpu_relax(); \
+		if (!(ATOMIC)) { \
+			preempt_disable(); \
+			if (unlikely(cpu != smp_processor_id())) { \
+				timeout -= now - base; \
+				cpu = smp_processor_id(); \
+				base = local_clock(); \
+			} \
+		} \
+	} \
+	ret; \
+})
+
+#define wait_for_us(COND, US) \
+({ \
+	int ret__; \
+	BUILD_BUG_ON(!__builtin_constant_p(US)); \
+	if ((US) > 10) \
+		ret__ = _wait_for((COND), (US), 10, 10); \
+	else \
+		ret__ = _wait_for_atomic((COND), (US), 0); \
+	ret__; \
+})
+
+#define wait_for_atomic_us(COND, US) \
+({ \
+	BUILD_BUG_ON(!__builtin_constant_p(US)); \
+	BUILD_BUG_ON((US) > 50000); \
+	_wait_for_atomic((COND), (US), 1); \
+})
+
+#define wait_for_atomic(COND, MS) wait_for_atomic_us((COND), (MS) * 1000)
+
+#define KHz(x) (1000 * (x))
+#define MHz(x) KHz(1000 * (x))
+
+#define KBps(x) (1000 * (x))
+#define MBps(x) KBps(1000 * (x))
+#define GBps(x) ((u64)1000 * MBps((x)))
+
 static inline const char *yesno(bool v)
 {
 	return v ? "yes" : "no";
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 961268f66c63..a57729be8312 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -22,14 +22,15 @@
  *
  */
 
-#include "i915_vma.h"
+#include <drm/drm_gem.h>
+
+#include "display/intel_frontbuffer.h"
+
+#include "gt/intel_engine.h"
 
 #include "i915_drv.h"
 #include "i915_globals.h"
-#include "intel_ringbuffer.h"
-#include "intel_frontbuffer.h"
-
-#include <drm/drm_gem.h>
+#include "i915_vma.h"
 
 static struct i915_global_vma {
 	struct i915_global base;
@@ -79,11 +80,11 @@ static void vma_print_allocator(struct i915_vma *vma, const char *reason)
 static void obj_bump_mru(struct drm_i915_gem_object *obj)
 {
 	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+	unsigned long flags;
 
-	spin_lock(&i915->mm.obj_lock);
-	if (obj->bind_count)
-		list_move_tail(&obj->mm.link, &i915->mm.bound_list);
-	spin_unlock(&i915->mm.obj_lock);
+	spin_lock_irqsave(&i915->mm.obj_lock, flags);
+	list_move_tail(&obj->mm.link, &i915->mm.shrink_list);
+	spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
 
 	obj->mm.dirty = true; /* be paranoid  */
 }
@@ -98,10 +99,10 @@ static void __i915_vma_retire(struct i915_active *ref)
 		return;
 
 	/* Prune the shared fence arrays iff completely idle (inc. external) */
-	if (reservation_object_trylock(obj->resv)) {
-		if (reservation_object_test_signaled_rcu(obj->resv, true))
-			reservation_object_add_excl_fence(obj->resv, NULL);
-		reservation_object_unlock(obj->resv);
+	if (reservation_object_trylock(obj->base.resv)) {
+		if (reservation_object_test_signaled_rcu(obj->base.resv, true))
+			reservation_object_add_excl_fence(obj->base.resv, NULL);
+		reservation_object_unlock(obj->base.resv);
 	}
 
 	/*
@@ -109,12 +110,10 @@ static void __i915_vma_retire(struct i915_active *ref)
 	 * so that we don't steal from recently used but inactive objects
 	 * (unless we are forced to ofc!)
 	 */
-	obj_bump_mru(obj);
+	if (i915_gem_object_is_shrinkable(obj))
+		obj_bump_mru(obj);
 
-	if (i915_gem_object_has_active_reference(obj)) {
-		i915_gem_object_clear_active_reference(obj);
-		i915_gem_object_put(obj);
-	}
+	i915_gem_object_put(obj); /* and drop the active reference */
 }
 
 static struct i915_vma *
@@ -132,16 +131,18 @@ vma_create(struct drm_i915_gem_object *obj,
 	if (vma == NULL)
 		return ERR_PTR(-ENOMEM);
 
-	i915_active_init(vm->i915, &vma->active, __i915_vma_retire);
-	INIT_ACTIVE_REQUEST(&vma->last_fence);
-
 	vma->vm = vm;
 	vma->ops = &vm->vma_ops;
 	vma->obj = obj;
-	vma->resv = obj->resv;
+	vma->resv = obj->base.resv;
 	vma->size = obj->base.size;
 	vma->display_alignment = I915_GTT_MIN_ALIGNMENT;
 
+	i915_active_init(vm->i915, &vma->active, __i915_vma_retire);
+	INIT_ACTIVE_REQUEST(&vma->last_fence);
+
+	INIT_LIST_HEAD(&vma->closed_link);
+
 	if (view && view->type != I915_GGTT_VIEW_NORMAL) {
 		vma->ggtt_view = *view;
 		if (view->type == I915_GGTT_VIEW_PARTIAL) {
@@ -155,6 +156,9 @@ vma_create(struct drm_i915_gem_object *obj,
 		} else if (view->type == I915_GGTT_VIEW_ROTATED) {
 			vma->size = intel_rotation_info_size(&view->rotated);
 			vma->size <<= PAGE_SHIFT;
+		} else if (view->type == I915_GGTT_VIEW_REMAPPED) {
+			vma->size = intel_remapped_info_size(&view->remapped);
+			vma->size <<= PAGE_SHIFT;
 		}
 	}
 
@@ -360,7 +364,7 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma)
 	int err;
 
 	/* Access through the GTT requires the device to be awake. */
-	assert_rpm_wakelock_held(vma->vm->i915);
+	assert_rpm_wakelock_held(&vma->vm->i915->runtime_pm);
 
 	lockdep_assert_held(&vma->vm->i915->drm.struct_mutex);
 	if (WARN_ON(!i915_vma_is_map_and_fenceable(vma))) {
@@ -439,7 +443,7 @@ void i915_vma_unpin_and_release(struct i915_vma **p_vma, unsigned int flags)
 	if (flags & I915_VMA_RELEASE_MAP)
 		i915_gem_object_unpin_map(obj);
 
-	__i915_gem_object_release_unless_active(obj);
+	i915_gem_object_put(obj);
 }
 
 bool i915_vma_misplaced(const struct i915_vma *vma,
@@ -476,13 +480,6 @@ void __i915_vma_set_map_and_fenceable(struct i915_vma *vma)
 	GEM_BUG_ON(!i915_vma_is_ggtt(vma));
 	GEM_BUG_ON(!vma->fence_size);
 
-	/*
-	 * Explicitly disable for rotated VMA since the display does not
-	 * need the fence and the VMA is not accessible to other users.
-	 */
-	if (vma->ggtt_view.type == I915_GGTT_VIEW_ROTATED)
-		return;
-
 	fenceable = (vma->node.size >= vma->fence_size &&
 		     IS_ALIGNED(vma->node.start, vma->fence_alignment));
 
@@ -538,7 +535,7 @@ static void assert_bind_count(const struct drm_i915_gem_object *obj)
 	 * assume that no else is pinning the pages, but as a rough assertion
 	 * that we will not run into problems later, this will do!)
 	 */
-	GEM_BUG_ON(atomic_read(&obj->mm.pages_pin_count) < obj->bind_count);
+	GEM_BUG_ON(atomic_read(&obj->mm.pages_pin_count) < atomic_read(&obj->bind_count));
 }
 
 /**
@@ -680,14 +677,8 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
 	mutex_unlock(&vma->vm->mutex);
 
 	if (vma->obj) {
-		struct drm_i915_gem_object *obj = vma->obj;
-
-		spin_lock(&dev_priv->mm.obj_lock);
-		list_move_tail(&obj->mm.link, &dev_priv->mm.bound_list);
-		obj->bind_count++;
-		spin_unlock(&dev_priv->mm.obj_lock);
-
-		assert_bind_count(obj);
+		atomic_inc(&vma->obj->bind_count);
+		assert_bind_count(vma->obj);
 	}
 
 	return 0;
@@ -703,8 +694,6 @@ err_unpin:
 static void
 i915_vma_remove(struct i915_vma *vma)
 {
-	struct drm_i915_private *i915 = vma->vm->i915;
-
 	GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
 	GEM_BUG_ON(vma->flags & (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND));
 
@@ -722,10 +711,7 @@ i915_vma_remove(struct i915_vma *vma)
 	if (vma->obj) {
 		struct drm_i915_gem_object *obj = vma->obj;
 
-		spin_lock(&i915->mm.obj_lock);
-		if (--obj->bind_count == 0)
-			list_move_tail(&obj->mm.link, &i915->mm.unbound_list);
-		spin_unlock(&i915->mm.obj_lock);
+		atomic_dec(&obj->bind_count);
 
 		/*
 		 * And finally now the object is completely decoupled from this
@@ -784,10 +770,10 @@ err_unpin:
 
 void i915_vma_close(struct i915_vma *vma)
 {
-	lockdep_assert_held(&vma->vm->i915->drm.struct_mutex);
+	struct drm_i915_private *i915 = vma->vm->i915;
+	unsigned long flags;
 
 	GEM_BUG_ON(i915_vma_is_closed(vma));
-	vma->flags |= I915_VMA_CLOSED;
 
 	/*
 	 * We defer actually closing, unbinding and destroying the VMA until
@@ -801,17 +787,26 @@ void i915_vma_close(struct i915_vma *vma)
 	 * causing us to rebind the VMA once more. This ends up being a lot
 	 * of wasted work for the steady state.
 	 */
-	list_add_tail(&vma->closed_link, &vma->vm->i915->gt.closed_vma);
+	spin_lock_irqsave(&i915->gt.closed_lock, flags);
+	list_add(&vma->closed_link, &i915->gt.closed_vma);
+	spin_unlock_irqrestore(&i915->gt.closed_lock, flags);
 }
 
-void i915_vma_reopen(struct i915_vma *vma)
+static void __i915_vma_remove_closed(struct i915_vma *vma)
 {
-	lockdep_assert_held(&vma->vm->i915->drm.struct_mutex);
+	struct drm_i915_private *i915 = vma->vm->i915;
 
-	if (vma->flags & I915_VMA_CLOSED) {
-		vma->flags &= ~I915_VMA_CLOSED;
-		list_del(&vma->closed_link);
-	}
+	if (!i915_vma_is_closed(vma))
+		return;
+
+	spin_lock_irq(&i915->gt.closed_lock);
+	list_del_init(&vma->closed_link);
+	spin_unlock_irq(&i915->gt.closed_lock);
+}
+
+void i915_vma_reopen(struct i915_vma *vma)
+{
+	__i915_vma_remove_closed(vma);
 }
 
 static void __i915_vma_destroy(struct i915_vma *vma)
@@ -843,13 +838,13 @@ void i915_vma_destroy(struct i915_vma *vma)
 {
 	lockdep_assert_held(&vma->vm->i915->drm.struct_mutex);
 
-	GEM_BUG_ON(i915_vma_is_active(vma));
 	GEM_BUG_ON(i915_vma_is_pinned(vma));
 
-	if (i915_vma_is_closed(vma))
-		list_del(&vma->closed_link);
+	__i915_vma_remove_closed(vma);
 
 	WARN_ON(i915_vma_unbind(vma));
+	GEM_BUG_ON(i915_vma_is_active(vma));
+
 	__i915_vma_destroy(vma);
 }
 
@@ -857,12 +852,16 @@ void i915_vma_parked(struct drm_i915_private *i915)
 {
 	struct i915_vma *vma, *next;
 
+	spin_lock_irq(&i915->gt.closed_lock);
 	list_for_each_entry_safe(vma, next, &i915->gt.closed_vma, closed_link) {
-		GEM_BUG_ON(!i915_vma_is_closed(vma));
+		list_del_init(&vma->closed_link);
+		spin_unlock_irq(&i915->gt.closed_lock);
+
 		i915_vma_destroy(vma);
-	}
 
-	GEM_BUG_ON(!list_empty(&i915->gt.closed_vma));
+		spin_lock_irq(&i915->gt.closed_lock);
+	}
+	spin_unlock_irq(&i915->gt.closed_lock);
 }
 
 static void __i915_vma_iounmap(struct i915_vma *vma)
@@ -911,12 +910,10 @@ static void export_fence(struct i915_vma *vma,
 	 * handle an error right now. Worst case should be missed
 	 * synchronisation leading to rendering corruption.
 	 */
-	reservation_object_lock(resv, NULL);
 	if (flags & EXEC_OBJECT_WRITE)
 		reservation_object_add_excl_fence(resv, &rq->fence);
 	else if (reservation_object_reserve_shared(resv, 1) == 0)
 		reservation_object_add_shared_fence(resv, &rq->fence);
-	reservation_object_unlock(resv);
 }
 
 int i915_vma_move_to_active(struct i915_vma *vma,
@@ -925,7 +922,8 @@ int i915_vma_move_to_active(struct i915_vma *vma,
 {
 	struct drm_i915_gem_object *obj = vma->obj;
 
-	lockdep_assert_held(&rq->i915->drm.struct_mutex);
+	assert_vma_held(vma);
+	assert_object_held(obj);
 	GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
 
 	/*
@@ -936,12 +934,12 @@ int i915_vma_move_to_active(struct i915_vma *vma,
 	 * add the active reference first and queue for it to be dropped
 	 * *last*.
 	 */
-	if (!vma->active.count)
-		obj->active_count++;
+	if (!vma->active.count && !obj->active_count++)
+		i915_gem_object_get(obj); /* once more for the active ref */
 
 	if (unlikely(i915_active_ref(&vma->active, rq->fence.context, rq))) {
-		if (!vma->active.count)
-			obj->active_count--;
+		if (!vma->active.count && !--obj->active_count)
+			i915_gem_object_put(obj);
 		return -ENOMEM;
 	}
 
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index 6eab70953a57..4b769db649bf 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -32,7 +32,7 @@
 
 #include "i915_gem_gtt.h"
 #include "i915_gem_fence_reg.h"
-#include "i915_gem_object.h"
+#include "gem/i915_gem_object.h"
 
 #include "i915_active.h"
 #include "i915_request.h"
@@ -40,6 +40,8 @@
 enum i915_cache_level;
 
 /**
+ * DOC: Virtual Memory Address
+ *
  * A VMA represents a GEM BO that is bound into an address space. Therefore, a
  * VMA's presence cannot be guaranteed before binding, or after unbinding the
  * object into/from the address space.
@@ -52,7 +54,7 @@ struct i915_vma {
 	struct drm_i915_gem_object *obj;
 	struct i915_address_space *vm;
 	const struct i915_vma_ops *ops;
-	struct drm_i915_fence_reg *fence;
+	struct i915_fence_reg *fence;
 	struct reservation_object *resv; /** Alias of obj->resv */
 	struct sg_table *pages;
 	void __iomem *iomap;
@@ -69,7 +71,7 @@ struct i915_vma {
 	 * handles (but same file) for execbuf, i.e. the number of aliases
 	 * that exist in the ctx->handle_vmas LUT for this vma.
 	 */
-	unsigned int open_count;
+	atomic_t open_count;
 	unsigned long flags;
 	/**
 	 * How many users have pinned this object in GTT space.
@@ -104,10 +106,9 @@ struct i915_vma {
 
 #define I915_VMA_GGTT		BIT(11)
 #define I915_VMA_CAN_FENCE	BIT(12)
-#define I915_VMA_CLOSED		BIT(13)
-#define I915_VMA_USERFAULT_BIT	14
+#define I915_VMA_USERFAULT_BIT	13
 #define I915_VMA_USERFAULT	BIT(I915_VMA_USERFAULT_BIT)
-#define I915_VMA_GGTT_WRITE	BIT(15)
+#define I915_VMA_GGTT_WRITE	BIT(14)
 
 	struct i915_active active;
 	struct i915_active_request last_fence;
@@ -190,11 +191,6 @@ static inline bool i915_vma_is_map_and_fenceable(const struct i915_vma *vma)
 	return vma->flags & I915_VMA_CAN_FENCE;
 }
 
-static inline bool i915_vma_is_closed(const struct i915_vma *vma)
-{
-	return vma->flags & I915_VMA_CLOSED;
-}
-
 static inline bool i915_vma_set_userfault(struct i915_vma *vma)
 {
 	GEM_BUG_ON(!i915_vma_is_map_and_fenceable(vma));
@@ -211,6 +207,11 @@ static inline bool i915_vma_has_userfault(const struct i915_vma *vma)
 	return test_bit(I915_VMA_USERFAULT_BIT, &vma->flags);
 }
 
+static inline bool i915_vma_is_closed(const struct i915_vma *vma)
+{
+	return !list_empty(&vma->closed_link);
+}
+
 static inline u32 i915_ggtt_offset(const struct i915_vma *vma)
 {
 	GEM_BUG_ON(!i915_vma_is_ggtt(vma));
@@ -277,8 +278,11 @@ i915_vma_compare(struct i915_vma *vma,
 	 */
 	BUILD_BUG_ON(I915_GGTT_VIEW_NORMAL >= I915_GGTT_VIEW_PARTIAL);
 	BUILD_BUG_ON(I915_GGTT_VIEW_PARTIAL >= I915_GGTT_VIEW_ROTATED);
+	BUILD_BUG_ON(I915_GGTT_VIEW_ROTATED >= I915_GGTT_VIEW_REMAPPED);
 	BUILD_BUG_ON(offsetof(typeof(*view), rotated) !=
 		     offsetof(typeof(*view), partial));
+	BUILD_BUG_ON(offsetof(typeof(*view), rotated) !=
+		     offsetof(typeof(*view), remapped));
 	return memcmp(&vma->ggtt_view.partial, &view->partial, view->type);
 }
 
@@ -295,6 +299,18 @@ void i915_vma_close(struct i915_vma *vma);
 void i915_vma_reopen(struct i915_vma *vma);
 void i915_vma_destroy(struct i915_vma *vma);
 
+#define assert_vma_held(vma) reservation_object_assert_held((vma)->resv)
+
+static inline void i915_vma_lock(struct i915_vma *vma)
+{
+	reservation_object_lock(vma->resv, NULL);
+}
+
+static inline void i915_vma_unlock(struct i915_vma *vma)
+{
+	reservation_object_unlock(vma->resv);
+}
+
 int __i915_vma_do_pin(struct i915_vma *vma,
 		      u64 size, u64 alignment, u64 flags);
 static inline int __must_check
diff --git a/drivers/gpu/drm/i915/intel_context.c b/drivers/gpu/drm/i915/intel_context.c
deleted file mode 100644
index 924cc556223a..000000000000
--- a/drivers/gpu/drm/i915/intel_context.c
+++ /dev/null
@@ -1,270 +0,0 @@
-/*
- * SPDX-License-Identifier: MIT
- *
- * Copyright © 2019 Intel Corporation
- */
-
-#include "i915_drv.h"
-#include "i915_gem_context.h"
-#include "i915_globals.h"
-#include "intel_context.h"
-#include "intel_ringbuffer.h"
-
-static struct i915_global_context {
-	struct i915_global base;
-	struct kmem_cache *slab_ce;
-} global;
-
-struct intel_context *intel_context_alloc(void)
-{
-	return kmem_cache_zalloc(global.slab_ce, GFP_KERNEL);
-}
-
-void intel_context_free(struct intel_context *ce)
-{
-	kmem_cache_free(global.slab_ce, ce);
-}
-
-struct intel_context *
-intel_context_lookup(struct i915_gem_context *ctx,
-		     struct intel_engine_cs *engine)
-{
-	struct intel_context *ce = NULL;
-	struct rb_node *p;
-
-	spin_lock(&ctx->hw_contexts_lock);
-	p = ctx->hw_contexts.rb_node;
-	while (p) {
-		struct intel_context *this =
-			rb_entry(p, struct intel_context, node);
-
-		if (this->engine == engine) {
-			GEM_BUG_ON(this->gem_context != ctx);
-			ce = this;
-			break;
-		}
-
-		if (this->engine < engine)
-			p = p->rb_right;
-		else
-			p = p->rb_left;
-	}
-	spin_unlock(&ctx->hw_contexts_lock);
-
-	return ce;
-}
-
-struct intel_context *
-__intel_context_insert(struct i915_gem_context *ctx,
-		       struct intel_engine_cs *engine,
-		       struct intel_context *ce)
-{
-	struct rb_node **p, *parent;
-	int err = 0;
-
-	spin_lock(&ctx->hw_contexts_lock);
-
-	parent = NULL;
-	p = &ctx->hw_contexts.rb_node;
-	while (*p) {
-		struct intel_context *this;
-
-		parent = *p;
-		this = rb_entry(parent, struct intel_context, node);
-
-		if (this->engine == engine) {
-			err = -EEXIST;
-			ce = this;
-			break;
-		}
-
-		if (this->engine < engine)
-			p = &parent->rb_right;
-		else
-			p = &parent->rb_left;
-	}
-	if (!err) {
-		rb_link_node(&ce->node, parent, p);
-		rb_insert_color(&ce->node, &ctx->hw_contexts);
-	}
-
-	spin_unlock(&ctx->hw_contexts_lock);
-
-	return ce;
-}
-
-void __intel_context_remove(struct intel_context *ce)
-{
-	struct i915_gem_context *ctx = ce->gem_context;
-
-	spin_lock(&ctx->hw_contexts_lock);
-	rb_erase(&ce->node, &ctx->hw_contexts);
-	spin_unlock(&ctx->hw_contexts_lock);
-}
-
-static struct intel_context *
-intel_context_instance(struct i915_gem_context *ctx,
-		       struct intel_engine_cs *engine)
-{
-	struct intel_context *ce, *pos;
-
-	ce = intel_context_lookup(ctx, engine);
-	if (likely(ce))
-		return ce;
-
-	ce = intel_context_alloc();
-	if (!ce)
-		return ERR_PTR(-ENOMEM);
-
-	intel_context_init(ce, ctx, engine);
-
-	pos = __intel_context_insert(ctx, engine, ce);
-	if (unlikely(pos != ce)) /* Beaten! Use their HW context instead */
-		intel_context_free(ce);
-
-	GEM_BUG_ON(intel_context_lookup(ctx, engine) != pos);
-	return pos;
-}
-
-struct intel_context *
-intel_context_pin_lock(struct i915_gem_context *ctx,
-		       struct intel_engine_cs *engine)
-	__acquires(ce->pin_mutex)
-{
-	struct intel_context *ce;
-
-	ce = intel_context_instance(ctx, engine);
-	if (IS_ERR(ce))
-		return ce;
-
-	if (mutex_lock_interruptible(&ce->pin_mutex))
-		return ERR_PTR(-EINTR);
-
-	return ce;
-}
-
-struct intel_context *
-intel_context_pin(struct i915_gem_context *ctx,
-		  struct intel_engine_cs *engine)
-{
-	struct intel_context *ce;
-	int err;
-
-	ce = intel_context_instance(ctx, engine);
-	if (IS_ERR(ce))
-		return ce;
-
-	if (likely(atomic_inc_not_zero(&ce->pin_count)))
-		return ce;
-
-	if (mutex_lock_interruptible(&ce->pin_mutex))
-		return ERR_PTR(-EINTR);
-
-	if (likely(!atomic_read(&ce->pin_count))) {
-		err = ce->ops->pin(ce);
-		if (err)
-			goto err;
-
-		i915_gem_context_get(ctx);
-		GEM_BUG_ON(ce->gem_context != ctx);
-
-		mutex_lock(&ctx->mutex);
-		list_add(&ce->active_link, &ctx->active_engines);
-		mutex_unlock(&ctx->mutex);
-
-		intel_context_get(ce);
-		smp_mb__before_atomic(); /* flush pin before it is visible */
-	}
-
-	atomic_inc(&ce->pin_count);
-	GEM_BUG_ON(!intel_context_is_pinned(ce)); /* no overflow! */
-
-	mutex_unlock(&ce->pin_mutex);
-	return ce;
-
-err:
-	mutex_unlock(&ce->pin_mutex);
-	return ERR_PTR(err);
-}
-
-void intel_context_unpin(struct intel_context *ce)
-{
-	if (likely(atomic_add_unless(&ce->pin_count, -1, 1)))
-		return;
-
-	/* We may be called from inside intel_context_pin() to evict another */
-	intel_context_get(ce);
-	mutex_lock_nested(&ce->pin_mutex, SINGLE_DEPTH_NESTING);
-
-	if (likely(atomic_dec_and_test(&ce->pin_count))) {
-		ce->ops->unpin(ce);
-
-		mutex_lock(&ce->gem_context->mutex);
-		list_del(&ce->active_link);
-		mutex_unlock(&ce->gem_context->mutex);
-
-		i915_gem_context_put(ce->gem_context);
-		intel_context_put(ce);
-	}
-
-	mutex_unlock(&ce->pin_mutex);
-	intel_context_put(ce);
-}
-
-static void intel_context_retire(struct i915_active_request *active,
-				 struct i915_request *rq)
-{
-	struct intel_context *ce =
-		container_of(active, typeof(*ce), active_tracker);
-
-	intel_context_unpin(ce);
-}
-
-void
-intel_context_init(struct intel_context *ce,
-		   struct i915_gem_context *ctx,
-		   struct intel_engine_cs *engine)
-{
-	kref_init(&ce->ref);
-
-	ce->gem_context = ctx;
-	ce->engine = engine;
-	ce->ops = engine->cops;
-	ce->saturated = 0;
-
-	INIT_LIST_HEAD(&ce->signal_link);
-	INIT_LIST_HEAD(&ce->signals);
-
-	mutex_init(&ce->pin_mutex);
-
-	/* Use the whole device by default */
-	ce->sseu = intel_device_default_sseu(ctx->i915);
-
-	i915_active_request_init(&ce->active_tracker,
-				 NULL, intel_context_retire);
-}
-
-static void i915_global_context_shrink(void)
-{
-	kmem_cache_shrink(global.slab_ce);
-}
-
-static void i915_global_context_exit(void)
-{
-	kmem_cache_destroy(global.slab_ce);
-}
-
-static struct i915_global_context global = { {
-	.shrink = i915_global_context_shrink,
-	.exit = i915_global_context_exit,
-} };
-
-int __init i915_global_context_init(void)
-{
-	global.slab_ce = KMEM_CACHE(intel_context, SLAB_HWCACHE_ALIGN);
-	if (!global.slab_ce)
-		return -ENOMEM;
-
-	i915_global_register(&global.base);
-	return 0;
-}
diff --git a/drivers/gpu/drm/i915/intel_context.h b/drivers/gpu/drm/i915/intel_context.h
deleted file mode 100644
index ebc861b1a49e..000000000000
--- a/drivers/gpu/drm/i915/intel_context.h
+++ /dev/null
@@ -1,87 +0,0 @@
-/*
- * SPDX-License-Identifier: MIT
- *
- * Copyright © 2019 Intel Corporation
- */
-
-#ifndef __INTEL_CONTEXT_H__
-#define __INTEL_CONTEXT_H__
-
-#include <linux/lockdep.h>
-
-#include "intel_context_types.h"
-#include "intel_engine_types.h"
-
-struct intel_context *intel_context_alloc(void);
-void intel_context_free(struct intel_context *ce);
-
-void intel_context_init(struct intel_context *ce,
-			struct i915_gem_context *ctx,
-			struct intel_engine_cs *engine);
-
-/**
- * intel_context_lookup - Find the matching HW context for this (ctx, engine)
- * @ctx - the parent GEM context
- * @engine - the target HW engine
- *
- * May return NULL if the HW context hasn't been instantiated (i.e. unused).
- */
-struct intel_context *
-intel_context_lookup(struct i915_gem_context *ctx,
-		     struct intel_engine_cs *engine);
-
-/**
- * intel_context_pin_lock - Stablises the 'pinned' status of the HW context
- * @ctx - the parent GEM context
- * @engine - the target HW engine
- *
- * Acquire a lock on the pinned status of the HW context, such that the context
- * can neither be bound to the GPU or unbound whilst the lock is held, i.e.
- * intel_context_is_pinned() remains stable.
- */
-struct intel_context *
-intel_context_pin_lock(struct i915_gem_context *ctx,
-		       struct intel_engine_cs *engine);
-
-static inline bool
-intel_context_is_pinned(struct intel_context *ce)
-{
-	return atomic_read(&ce->pin_count);
-}
-
-static inline void intel_context_pin_unlock(struct intel_context *ce)
-__releases(ce->pin_mutex)
-{
-	mutex_unlock(&ce->pin_mutex);
-}
-
-struct intel_context *
-__intel_context_insert(struct i915_gem_context *ctx,
-		       struct intel_engine_cs *engine,
-		       struct intel_context *ce);
-void
-__intel_context_remove(struct intel_context *ce);
-
-struct intel_context *
-intel_context_pin(struct i915_gem_context *ctx, struct intel_engine_cs *engine);
-
-static inline void __intel_context_pin(struct intel_context *ce)
-{
-	GEM_BUG_ON(!intel_context_is_pinned(ce));
-	atomic_inc(&ce->pin_count);
-}
-
-void intel_context_unpin(struct intel_context *ce);
-
-static inline struct intel_context *intel_context_get(struct intel_context *ce)
-{
-	kref_get(&ce->ref);
-	return ce;
-}
-
-static inline void intel_context_put(struct intel_context *ce)
-{
-	kref_put(&ce->ref, ce->ops->destroy);
-}
-
-#endif /* __INTEL_CONTEXT_H__ */
diff --git a/drivers/gpu/drm/i915/intel_csr.c b/drivers/gpu/drm/i915/intel_csr.c
index f43c2a2563a5..6ef74531588a 100644
--- a/drivers/gpu/drm/i915/intel_csr.c
+++ b/drivers/gpu/drm/i915/intel_csr.c
@@ -70,6 +70,10 @@ MODULE_FIRMWARE(SKL_CSR_PATH);
 MODULE_FIRMWARE(BXT_CSR_PATH);
 
 #define CSR_DEFAULT_FW_OFFSET		0xFFFFFFFF
+#define PACKAGE_MAX_FW_INFO_ENTRIES	20
+#define PACKAGE_V2_MAX_FW_INFO_ENTRIES	32
+#define DMC_V1_MAX_MMIO_COUNT		8
+#define DMC_V3_MAX_MMIO_COUNT		20
 
 struct intel_css_header {
 	/* 0x09 for DMC */
@@ -116,7 +120,10 @@ struct intel_css_header {
 } __packed;
 
 struct intel_fw_info {
-	u16 reserved1;
+	u8 reserved1;
+
+	/* reserved on package_header version 1, must be 0 on version 2 */
+	u8 dmc_id;
 
 	/* Stepping (A, B, C, ..., *). * is a wildcard */
 	char stepping;
@@ -130,28 +137,26 @@ struct intel_fw_info {
 
 struct intel_package_header {
 	/* DMC container header length in dwords */
-	unsigned char header_len;
+	u8 header_len;
 
-	/* always value would be 0x01 */
-	unsigned char header_ver;
+	/* 0x01, 0x02 */
+	u8 header_ver;
 
-	unsigned char reserved[10];
+	u8 reserved[10];
 
 	/* Number of valid entries in the FWInfo array below */
 	u32 num_entries;
-
-	struct intel_fw_info fw_info[20];
 } __packed;
 
-struct intel_dmc_header {
+struct intel_dmc_header_base {
 	/* always value would be 0x40403E3E */
 	u32 signature;
 
 	/* DMC binary header length */
-	unsigned char header_len;
+	u8 header_len;
 
 	/* 0x01 */
-	unsigned char header_ver;
+	u8 header_ver;
 
 	/* Reserved */
 	u16 dmcc_ver;
@@ -164,22 +169,47 @@ struct intel_dmc_header {
 
 	/* Major Minor version */
 	u32 fw_version;
+} __packed;
+
+struct intel_dmc_header_v1 {
+	struct intel_dmc_header_base base;
 
 	/* Number of valid MMIO cycles present. */
 	u32 mmio_count;
 
 	/* MMIO address */
-	u32 mmioaddr[8];
+	u32 mmioaddr[DMC_V1_MAX_MMIO_COUNT];
 
 	/* MMIO data */
-	u32 mmiodata[8];
+	u32 mmiodata[DMC_V1_MAX_MMIO_COUNT];
 
 	/* FW filename  */
-	unsigned char dfile[32];
+	char dfile[32];
 
 	u32 reserved1[2];
 } __packed;
 
+struct intel_dmc_header_v3 {
+	struct intel_dmc_header_base base;
+
+	/* DMC RAM start MMIO address */
+	u32 start_mmioaddr;
+
+	u32 reserved[9];
+
+	/* FW filename */
+	char dfile[32];
+
+	/* Number of valid MMIO cycles present. */
+	u32 mmio_count;
+
+	/* MMIO address */
+	u32 mmioaddr[DMC_V3_MAX_MMIO_COUNT];
+
+	/* MMIO data */
+	u32 mmiodata[DMC_V3_MAX_MMIO_COUNT];
+} __packed;
+
 struct stepping_info {
 	char stepping;
 	char substepping;
@@ -273,7 +303,7 @@ void intel_csr_load_program(struct drm_i915_private *dev_priv)
 	}
 
 	fw_size = dev_priv->csr.dmc_fw_size;
-	assert_rpm_wakelock_held(dev_priv);
+	assert_rpm_wakelock_held(&dev_priv->runtime_pm);
 
 	preempt_disable();
 
@@ -292,29 +322,231 @@ void intel_csr_load_program(struct drm_i915_private *dev_priv)
 	gen9_set_dc_state_debugmask(dev_priv);
 }
 
-static u32 *parse_csr_fw(struct drm_i915_private *dev_priv,
-			 const struct firmware *fw)
+/*
+ * Search fw_info table for dmc_offset to find firmware binary: num_entries is
+ * already sanitized.
+ */
+static u32 find_dmc_fw_offset(const struct intel_fw_info *fw_info,
+			      unsigned int num_entries,
+			      const struct stepping_info *si,
+			      u8 package_ver)
 {
-	struct intel_css_header *css_header;
-	struct intel_package_header *package_header;
-	struct intel_dmc_header *dmc_header;
-	struct intel_csr *csr = &dev_priv->csr;
-	const struct stepping_info *si = intel_get_stepping_info(dev_priv);
-	u32 dmc_offset = CSR_DEFAULT_FW_OFFSET, readcount = 0, nbytes;
-	u32 i;
-	u32 *dmc_payload;
+	u32 dmc_offset = CSR_DEFAULT_FW_OFFSET;
+	unsigned int i;
 
-	if (!fw)
-		return NULL;
+	for (i = 0; i < num_entries; i++) {
+		if (package_ver > 1 && fw_info[i].dmc_id != 0)
+			continue;
+
+		if (fw_info[i].substepping == '*' &&
+		    si->stepping == fw_info[i].stepping) {
+			dmc_offset = fw_info[i].offset;
+			break;
+		}
+
+		if (si->stepping == fw_info[i].stepping &&
+		    si->substepping == fw_info[i].substepping) {
+			dmc_offset = fw_info[i].offset;
+			break;
+		}
+
+		if (fw_info[i].stepping == '*' &&
+		    fw_info[i].substepping == '*') {
+			/*
+			 * In theory we should stop the search as generic
+			 * entries should always come after the more specific
+			 * ones, but let's continue to make sure to work even
+			 * with "broken" firmwares. If we don't find a more
+			 * specific one, then we use this entry
+			 */
+			dmc_offset = fw_info[i].offset;
+		}
+	}
+
+	return dmc_offset;
+}
+
+static u32 parse_csr_fw_dmc(struct intel_csr *csr,
+			    const struct intel_dmc_header_base *dmc_header,
+			    size_t rem_size)
+{
+	unsigned int header_len_bytes, dmc_header_size, payload_size, i;
+	const u32 *mmioaddr, *mmiodata;
+	u32 mmio_count, mmio_count_max;
+	u8 *payload;
+
+	BUILD_BUG_ON(ARRAY_SIZE(csr->mmioaddr) < DMC_V3_MAX_MMIO_COUNT ||
+		     ARRAY_SIZE(csr->mmioaddr) < DMC_V1_MAX_MMIO_COUNT);
+
+	/*
+	 * Check if we can access common fields, we will checkc again below
+	 * after we have read the version
+	 */
+	if (rem_size < sizeof(struct intel_dmc_header_base))
+		goto error_truncated;
+
+	/* Cope with small differences between v1 and v3 */
+	if (dmc_header->header_ver == 3) {
+		const struct intel_dmc_header_v3 *v3 =
+			(const struct intel_dmc_header_v3 *)dmc_header;
+
+		if (rem_size < sizeof(struct intel_dmc_header_v3))
+			goto error_truncated;
+
+		mmioaddr = v3->mmioaddr;
+		mmiodata = v3->mmiodata;
+		mmio_count = v3->mmio_count;
+		mmio_count_max = DMC_V3_MAX_MMIO_COUNT;
+		/* header_len is in dwords */
+		header_len_bytes = dmc_header->header_len * 4;
+		dmc_header_size = sizeof(*v3);
+	} else if (dmc_header->header_ver == 1) {
+		const struct intel_dmc_header_v1 *v1 =
+			(const struct intel_dmc_header_v1 *)dmc_header;
+
+		if (rem_size < sizeof(struct intel_dmc_header_v1))
+			goto error_truncated;
+
+		mmioaddr = v1->mmioaddr;
+		mmiodata = v1->mmiodata;
+		mmio_count = v1->mmio_count;
+		mmio_count_max = DMC_V1_MAX_MMIO_COUNT;
+		header_len_bytes = dmc_header->header_len;
+		dmc_header_size = sizeof(*v1);
+	} else {
+		DRM_ERROR("Unknown DMC fw header version: %u\n",
+			  dmc_header->header_ver);
+		return 0;
+	}
+
+	if (header_len_bytes != dmc_header_size) {
+		DRM_ERROR("DMC firmware has wrong dmc header length "
+			  "(%u bytes)\n", header_len_bytes);
+		return 0;
+	}
+
+	/* Cache the dmc header info. */
+	if (mmio_count > mmio_count_max) {
+		DRM_ERROR("DMC firmware has wrong mmio count %u\n", mmio_count);
+		return 0;
+	}
+
+	for (i = 0; i < mmio_count; i++) {
+		if (mmioaddr[i] < CSR_MMIO_START_RANGE ||
+		    mmioaddr[i] > CSR_MMIO_END_RANGE) {
+			DRM_ERROR("DMC firmware has wrong mmio address 0x%x\n",
+				  mmioaddr[i]);
+			return 0;
+		}
+		csr->mmioaddr[i] = _MMIO(mmioaddr[i]);
+		csr->mmiodata[i] = mmiodata[i];
+	}
+	csr->mmio_count = mmio_count;
+
+	rem_size -= header_len_bytes;
+
+	/* fw_size is in dwords, so multiplied by 4 to convert into bytes. */
+	payload_size = dmc_header->fw_size * 4;
+	if (rem_size < payload_size)
+		goto error_truncated;
+
+	if (payload_size > csr->max_fw_size) {
+		DRM_ERROR("DMC FW too big (%u bytes)\n", payload_size);
+		return 0;
+	}
+	csr->dmc_fw_size = dmc_header->fw_size;
+
+	csr->dmc_payload = kmalloc(payload_size, GFP_KERNEL);
+	if (!csr->dmc_payload) {
+		DRM_ERROR("Memory allocation failed for dmc payload\n");
+		return 0;
+	}
+
+	payload = (u8 *)(dmc_header) + header_len_bytes;
+	memcpy(csr->dmc_payload, payload, payload_size);
+
+	return header_len_bytes + payload_size;
+
+error_truncated:
+	DRM_ERROR("Truncated DMC firmware, refusing.\n");
+	return 0;
+}
+
+static u32
+parse_csr_fw_package(struct intel_csr *csr,
+		     const struct intel_package_header *package_header,
+		     const struct stepping_info *si,
+		     size_t rem_size)
+{
+	u32 package_size = sizeof(struct intel_package_header);
+	u32 num_entries, max_entries, dmc_offset;
+	const struct intel_fw_info *fw_info;
+
+	if (rem_size < package_size)
+		goto error_truncated;
+
+	if (package_header->header_ver == 1) {
+		max_entries = PACKAGE_MAX_FW_INFO_ENTRIES;
+	} else if (package_header->header_ver == 2) {
+		max_entries = PACKAGE_V2_MAX_FW_INFO_ENTRIES;
+	} else {
+		DRM_ERROR("DMC firmware has unknown header version %u\n",
+			  package_header->header_ver);
+		return 0;
+	}
+
+	/*
+	 * We should always have space for max_entries,
+	 * even if not all are used
+	 */
+	package_size += max_entries * sizeof(struct intel_fw_info);
+	if (rem_size < package_size)
+		goto error_truncated;
+
+	if (package_header->header_len * 4 != package_size) {
+		DRM_ERROR("DMC firmware has wrong package header length "
+			  "(%u bytes)\n", package_size);
+		return 0;
+	}
+
+	num_entries = package_header->num_entries;
+	if (WARN_ON(package_header->num_entries > max_entries))
+		num_entries = max_entries;
+
+	fw_info = (const struct intel_fw_info *)
+		((u8 *)package_header + sizeof(*package_header));
+	dmc_offset = find_dmc_fw_offset(fw_info, num_entries, si,
+					package_header->header_ver);
+	if (dmc_offset == CSR_DEFAULT_FW_OFFSET) {
+		DRM_ERROR("DMC firmware not supported for %c stepping\n",
+			  si->stepping);
+		return 0;
+	}
+
+	/* dmc_offset is in dwords */
+	return package_size + dmc_offset * 4;
+
+error_truncated:
+	DRM_ERROR("Truncated DMC firmware, refusing.\n");
+	return 0;
+}
+
+/* Return number of bytes parsed or 0 on error */
+static u32 parse_csr_fw_css(struct intel_csr *csr,
+			    struct intel_css_header *css_header,
+			    size_t rem_size)
+{
+	if (rem_size < sizeof(struct intel_css_header)) {
+		DRM_ERROR("Truncated DMC firmware, refusing.\n");
+		return 0;
+	}
 
-	/* Extract CSS Header information*/
-	css_header = (struct intel_css_header *)fw->data;
 	if (sizeof(struct intel_css_header) !=
 	    (css_header->header_len * 4)) {
 		DRM_ERROR("DMC firmware has wrong CSS header length "
 			  "(%u bytes)\n",
 			  (css_header->header_len * 4));
-		return NULL;
+		return 0;
 	}
 
 	if (csr->required_version &&
@@ -325,91 +557,47 @@ static u32 *parse_csr_fw(struct drm_i915_private *dev_priv,
 			 CSR_VERSION_MINOR(css_header->version),
 			 CSR_VERSION_MAJOR(csr->required_version),
 			 CSR_VERSION_MINOR(csr->required_version));
-		return NULL;
+		return 0;
 	}
 
 	csr->version = css_header->version;
 
-	readcount += sizeof(struct intel_css_header);
+	return sizeof(struct intel_css_header);
+}
 
-	/* Extract Package Header information*/
-	package_header = (struct intel_package_header *)
-		&fw->data[readcount];
-	if (sizeof(struct intel_package_header) !=
-	    (package_header->header_len * 4)) {
-		DRM_ERROR("DMC firmware has wrong package header length "
-			  "(%u bytes)\n",
-			  (package_header->header_len * 4));
-		return NULL;
-	}
-	readcount += sizeof(struct intel_package_header);
+static void parse_csr_fw(struct drm_i915_private *dev_priv,
+			 const struct firmware *fw)
+{
+	struct intel_css_header *css_header;
+	struct intel_package_header *package_header;
+	struct intel_dmc_header_base *dmc_header;
+	struct intel_csr *csr = &dev_priv->csr;
+	const struct stepping_info *si = intel_get_stepping_info(dev_priv);
+	u32 readcount = 0;
+	u32 r;
 
-	/* Search for dmc_offset to find firware binary. */
-	for (i = 0; i < package_header->num_entries; i++) {
-		if (package_header->fw_info[i].substepping == '*' &&
-		    si->stepping == package_header->fw_info[i].stepping) {
-			dmc_offset = package_header->fw_info[i].offset;
-			break;
-		} else if (si->stepping == package_header->fw_info[i].stepping &&
-			   si->substepping == package_header->fw_info[i].substepping) {
-			dmc_offset = package_header->fw_info[i].offset;
-			break;
-		} else if (package_header->fw_info[i].stepping == '*' &&
-			   package_header->fw_info[i].substepping == '*')
-			dmc_offset = package_header->fw_info[i].offset;
-	}
-	if (dmc_offset == CSR_DEFAULT_FW_OFFSET) {
-		DRM_ERROR("DMC firmware not supported for %c stepping\n",
-			  si->stepping);
-		return NULL;
-	}
-	/* Convert dmc_offset into number of bytes. By default it is in dwords*/
-	dmc_offset *= 4;
-	readcount += dmc_offset;
+	if (!fw)
+		return;
 
-	/* Extract dmc_header information. */
-	dmc_header = (struct intel_dmc_header *)&fw->data[readcount];
-	if (sizeof(struct intel_dmc_header) != (dmc_header->header_len)) {
-		DRM_ERROR("DMC firmware has wrong dmc header length "
-			  "(%u bytes)\n",
-			  (dmc_header->header_len));
-		return NULL;
-	}
-	readcount += sizeof(struct intel_dmc_header);
+	/* Extract CSS Header information */
+	css_header = (struct intel_css_header *)fw->data;
+	r = parse_csr_fw_css(csr, css_header, fw->size);
+	if (!r)
+		return;
 
-	/* Cache the dmc header info. */
-	if (dmc_header->mmio_count > ARRAY_SIZE(csr->mmioaddr)) {
-		DRM_ERROR("DMC firmware has wrong mmio count %u\n",
-			  dmc_header->mmio_count);
-		return NULL;
-	}
-	csr->mmio_count = dmc_header->mmio_count;
-	for (i = 0; i < dmc_header->mmio_count; i++) {
-		if (dmc_header->mmioaddr[i] < CSR_MMIO_START_RANGE ||
-		    dmc_header->mmioaddr[i] > CSR_MMIO_END_RANGE) {
-			DRM_ERROR("DMC firmware has wrong mmio address 0x%x\n",
-				  dmc_header->mmioaddr[i]);
-			return NULL;
-		}
-		csr->mmioaddr[i] = _MMIO(dmc_header->mmioaddr[i]);
-		csr->mmiodata[i] = dmc_header->mmiodata[i];
-	}
+	readcount += r;
 
-	/* fw_size is in dwords, so multiplied by 4 to convert into bytes. */
-	nbytes = dmc_header->fw_size * 4;
-	if (nbytes > csr->max_fw_size) {
-		DRM_ERROR("DMC FW too big (%u bytes)\n", nbytes);
-		return NULL;
-	}
-	csr->dmc_fw_size = dmc_header->fw_size;
+	/* Extract Package Header information */
+	package_header = (struct intel_package_header *)&fw->data[readcount];
+	r = parse_csr_fw_package(csr, package_header, si, fw->size - readcount);
+	if (!r)
+		return;
 
-	dmc_payload = kmalloc(nbytes, GFP_KERNEL);
-	if (!dmc_payload) {
-		DRM_ERROR("Memory allocation failed for dmc payload\n");
-		return NULL;
-	}
+	readcount += r;
 
-	return memcpy(dmc_payload, &fw->data[readcount], nbytes);
+	/* Extract dmc_header information */
+	dmc_header = (struct intel_dmc_header_base *)&fw->data[readcount];
+	parse_csr_fw_dmc(csr, dmc_header, fw->size - readcount);
 }
 
 static void intel_csr_runtime_pm_get(struct drm_i915_private *dev_priv)
@@ -437,8 +625,7 @@ static void csr_load_work_fn(struct work_struct *work)
 	csr = &dev_priv->csr;
 
 	request_firmware(&fw, dev_priv->csr.fw_path, &dev_priv->drm.pdev->dev);
-	if (fw)
-		dev_priv->csr.dmc_payload = parse_csr_fw(dev_priv, fw);
+	parse_csr_fw(dev_priv, fw);
 
 	if (dev_priv->csr.dmc_payload) {
 		intel_csr_load_program(dev_priv);
@@ -529,8 +716,6 @@ void intel_csr_ucode_init(struct drm_i915_private *dev_priv)
 
 	if (csr->fw_path == NULL) {
 		DRM_DEBUG_KMS("No known CSR firmware for platform, disabling runtime PM\n");
-		WARN_ON(!IS_ALPHA_SUPPORT(INTEL_INFO(dev_priv)));
-
 		return;
 	}
 
diff --git a/drivers/gpu/drm/i915/intel_csr.h b/drivers/gpu/drm/i915/intel_csr.h
index 17a32c1e8a35..03c64f8af7ab 100644
--- a/drivers/gpu/drm/i915/intel_csr.h
+++ b/drivers/gpu/drm/i915/intel_csr.h
@@ -8,6 +8,10 @@
 
 struct drm_i915_private;
 
+#define CSR_VERSION(major, minor)	((major) << 16 | (minor))
+#define CSR_VERSION_MAJOR(version)	((version) >> 16)
+#define CSR_VERSION_MINOR(version)	((version) & 0xffff)
+
 void intel_csr_ucode_init(struct drm_i915_private *i915);
 void intel_csr_load_program(struct drm_i915_private *i915);
 void intel_csr_ucode_fini(struct drm_i915_private *i915);
diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c
index 6af480b95bc6..7135d8dc32a7 100644
--- a/drivers/gpu/drm/i915/intel_device_info.c
+++ b/drivers/gpu/drm/i915/intel_device_info.c
@@ -90,10 +90,10 @@ static void sseu_dump(const struct sseu_dev_info *sseu, struct drm_printer *p)
 
 	drm_printf(p, "slice total: %u, mask=%04x\n",
 		   hweight8(sseu->slice_mask), sseu->slice_mask);
-	drm_printf(p, "subslice total: %u\n", sseu_subslice_total(sseu));
+	drm_printf(p, "subslice total: %u\n", intel_sseu_subslice_total(sseu));
 	for (s = 0; s < sseu->max_slices; s++) {
 		drm_printf(p, "slice%d: %u subslices, mask=%04x\n",
-			   s, hweight8(sseu->subslice_mask[s]),
+			   s, intel_sseu_subslices_per_slice(sseu, s),
 			   sseu->subslice_mask[s]);
 	}
 	drm_printf(p, "EU total: %u\n", sseu->eu_total);
@@ -114,6 +114,40 @@ void intel_device_info_dump_runtime(const struct intel_runtime_info *info,
 		   info->cs_timestamp_frequency_khz);
 }
 
+static int sseu_eu_idx(const struct sseu_dev_info *sseu, int slice,
+		       int subslice)
+{
+	int subslice_stride = GEN_SSEU_STRIDE(sseu->max_eus_per_subslice);
+	int slice_stride = sseu->max_subslices * subslice_stride;
+
+	return slice * slice_stride + subslice * subslice_stride;
+}
+
+static u16 sseu_get_eus(const struct sseu_dev_info *sseu, int slice,
+			int subslice)
+{
+	int i, offset = sseu_eu_idx(sseu, slice, subslice);
+	u16 eu_mask = 0;
+
+	for (i = 0; i < GEN_SSEU_STRIDE(sseu->max_eus_per_subslice); i++) {
+		eu_mask |= ((u16)sseu->eu_mask[offset + i]) <<
+			(i * BITS_PER_BYTE);
+	}
+
+	return eu_mask;
+}
+
+static void sseu_set_eus(struct sseu_dev_info *sseu, int slice, int subslice,
+			 u16 eu_mask)
+{
+	int i, offset = sseu_eu_idx(sseu, slice, subslice);
+
+	for (i = 0; i < GEN_SSEU_STRIDE(sseu->max_eus_per_subslice); i++) {
+		sseu->eu_mask[offset + i] =
+			(eu_mask >> (BITS_PER_BYTE * i)) & 0xff;
+	}
+}
+
 void intel_device_info_dump_topology(const struct sseu_dev_info *sseu,
 				     struct drm_printer *p)
 {
@@ -126,7 +160,7 @@ void intel_device_info_dump_topology(const struct sseu_dev_info *sseu,
 
 	for (s = 0; s < sseu->max_slices; s++) {
 		drm_printf(p, "slice%d: %u subslice(s) (0x%hhx):\n",
-			   s, hweight8(sseu->subslice_mask[s]),
+			   s, intel_sseu_subslices_per_slice(sseu, s),
 			   sseu->subslice_mask[s]);
 
 		for (ss = 0; ss < sseu->max_subslices; ss++) {
@@ -260,9 +294,10 @@ static void gen10_sseu_info_init(struct drm_i915_private *dev_priv)
 	 * EU in any one subslice may be fused off for die
 	 * recovery.
 	 */
-	sseu->eu_per_subslice = sseu_subslice_total(sseu) ?
+	sseu->eu_per_subslice = intel_sseu_subslice_total(sseu) ?
 				DIV_ROUND_UP(sseu->eu_total,
-					     sseu_subslice_total(sseu)) : 0;
+					     intel_sseu_subslice_total(sseu)) :
+				0;
 
 	/* No restrictions on Power Gating */
 	sseu->has_slice_pg = 1;
@@ -310,8 +345,9 @@ static void cherryview_sseu_info_init(struct drm_i915_private *dev_priv)
 	 * CHV expected to always have a uniform distribution of EU
 	 * across subslices.
 	*/
-	sseu->eu_per_subslice = sseu_subslice_total(sseu) ?
-				sseu->eu_total / sseu_subslice_total(sseu) :
+	sseu->eu_per_subslice = intel_sseu_subslice_total(sseu) ?
+				sseu->eu_total /
+					intel_sseu_subslice_total(sseu) :
 				0;
 	/*
 	 * CHV supports subslice power gating on devices with more than
@@ -319,7 +355,7 @@ static void cherryview_sseu_info_init(struct drm_i915_private *dev_priv)
 	 * more than one EU pair per subslice.
 	*/
 	sseu->has_slice_pg = 0;
-	sseu->has_subslice_pg = sseu_subslice_total(sseu) > 1;
+	sseu->has_subslice_pg = intel_sseu_subslice_total(sseu) > 1;
 	sseu->has_eu_pg = (sseu->eu_per_subslice > 2);
 }
 
@@ -393,9 +429,10 @@ static void gen9_sseu_info_init(struct drm_i915_private *dev_priv)
 	 * recovery. BXT is expected to be perfectly uniform in EU
 	 * distribution.
 	*/
-	sseu->eu_per_subslice = sseu_subslice_total(sseu) ?
+	sseu->eu_per_subslice = intel_sseu_subslice_total(sseu) ?
 				DIV_ROUND_UP(sseu->eu_total,
-					     sseu_subslice_total(sseu)) : 0;
+					     intel_sseu_subslice_total(sseu)) :
+				0;
 	/*
 	 * SKL+ supports slice power gating on devices with more than
 	 * one slice, and supports EU power gating on devices with
@@ -407,7 +444,7 @@ static void gen9_sseu_info_init(struct drm_i915_private *dev_priv)
 	sseu->has_slice_pg =
 		!IS_GEN9_LP(dev_priv) && hweight8(sseu->slice_mask) > 1;
 	sseu->has_subslice_pg =
-		IS_GEN9_LP(dev_priv) && sseu_subslice_total(sseu) > 1;
+		IS_GEN9_LP(dev_priv) && intel_sseu_subslice_total(sseu) > 1;
 	sseu->has_eu_pg = sseu->eu_per_subslice > 2;
 
 	if (IS_GEN9_LP(dev_priv)) {
@@ -496,9 +533,10 @@ static void broadwell_sseu_info_init(struct drm_i915_private *dev_priv)
 	 * subslices with the exception that any one EU in any one subslice may
 	 * be fused off for die recovery.
 	 */
-	sseu->eu_per_subslice = sseu_subslice_total(sseu) ?
+	sseu->eu_per_subslice = intel_sseu_subslice_total(sseu) ?
 				DIV_ROUND_UP(sseu->eu_total,
-					     sseu_subslice_total(sseu)) : 0;
+					     intel_sseu_subslice_total(sseu)) :
+				0;
 
 	/*
 	 * BDW supports slice power gating on devices with more than
@@ -735,7 +773,7 @@ static const u16 subplatform_ult_ids[] = {
 	INTEL_CFL_U_GT3_IDS(0),
 	INTEL_WHL_U_GT1_IDS(0),
 	INTEL_WHL_U_GT2_IDS(0),
-	INTEL_WHL_U_GT3_IDS(0)
+	INTEL_WHL_U_GT3_IDS(0),
 };
 
 static const u16 subplatform_ulx_ids[] = {
@@ -748,17 +786,14 @@ static const u16 subplatform_ulx_ids[] = {
 	INTEL_SKL_ULX_GT1_IDS(0),
 	INTEL_SKL_ULX_GT2_IDS(0),
 	INTEL_KBL_ULX_GT1_IDS(0),
-	INTEL_KBL_ULX_GT2_IDS(0)
-};
-
-static const u16 subplatform_aml_ids[] = {
+	INTEL_KBL_ULX_GT2_IDS(0),
 	INTEL_AML_KBL_GT2_IDS(0),
-	INTEL_AML_CFL_GT2_IDS(0)
+	INTEL_AML_CFL_GT2_IDS(0),
 };
 
 static const u16 subplatform_portf_ids[] = {
 	INTEL_CNL_PORT_F_IDS(0),
-	INTEL_ICL_PORT_F_IDS(0)
+	INTEL_ICL_PORT_F_IDS(0),
 };
 
 static bool find_devid(u16 id, const u16 *p, unsigned int num)
@@ -794,9 +829,6 @@ void intel_device_info_subplatform_init(struct drm_i915_private *i915)
 			/* ULX machines are also considered ULT. */
 			mask |= BIT(INTEL_SUBPLATFORM_ULT);
 		}
-	} else if (find_devid(devid, subplatform_aml_ids,
-			      ARRAY_SIZE(subplatform_aml_ids))) {
-		mask = BIT(INTEL_SUBPLATFORM_AML);
 	} else if (find_devid(devid, subplatform_portf_ids,
 			      ARRAY_SIZE(subplatform_portf_ids))) {
 		mask = BIT(INTEL_SUBPLATFORM_PORTF);
diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h
index 0e579f158016..ddafc819bf30 100644
--- a/drivers/gpu/drm/i915/intel_device_info.h
+++ b/drivers/gpu/drm/i915/intel_device_info.h
@@ -27,8 +27,11 @@
 
 #include <uapi/drm/i915_drm.h>
 
-#include "intel_engine_types.h"
-#include "intel_display.h"
+#include "display/intel_display.h"
+
+#include "gt/intel_engine_types.h"
+#include "gt/intel_context_types.h"
+#include "gt/intel_sseu.h"
 
 struct drm_printer;
 struct drm_i915_private;
@@ -88,7 +91,6 @@ enum intel_platform {
 /* HSW/BDW/SKL/KBL/CFL */
 #define INTEL_SUBPLATFORM_ULT	(0)
 #define INTEL_SUBPLATFORM_ULX	(1)
-#define INTEL_SUBPLATFORM_AML	(2)
 
 /* CNL/ICL */
 #define INTEL_SUBPLATFORM_PORTF	(0)
@@ -102,14 +104,13 @@ enum intel_ppgtt_type {
 #define DEV_INFO_FOR_EACH_FLAG(func) \
 	func(is_mobile); \
 	func(is_lp); \
-	func(is_alpha_support); \
+	func(require_force_probe); \
 	/* Keep has_* in alphabetical order */ \
 	func(has_64bit_reloc); \
 	func(gpu_reset_clobbers_display); \
 	func(has_reset_engine); \
 	func(has_fpga_dbg); \
 	func(has_guc); \
-	func(has_guc_ct); \
 	func(has_l3_dpf); \
 	func(has_llc); \
 	func(has_logical_ring_contexts); \
@@ -118,6 +119,7 @@ enum intel_ppgtt_type {
 	func(has_pooled_eu); \
 	func(has_rc6); \
 	func(has_rc6p); \
+	func(has_rps); \
 	func(has_runtime_pm); \
 	func(has_snoop); \
 	func(has_coherent_ggtt); \
@@ -139,33 +141,6 @@ enum intel_ppgtt_type {
 	func(overlay_needs_physical); \
 	func(supports_tv);
 
-#define GEN_MAX_SLICES		(6) /* CNL upper bound */
-#define GEN_MAX_SUBSLICES	(8) /* ICL upper bound */
-
-struct sseu_dev_info {
-	u8 slice_mask;
-	u8 subslice_mask[GEN_MAX_SLICES];
-	u16 eu_total;
-	u8 eu_per_subslice;
-	u8 min_eu_in_pool;
-	/* For each slice, which subslice(s) has(have) 7 EUs (bitfield)? */
-	u8 subslice_7eu[3];
-	u8 has_slice_pg:1;
-	u8 has_subslice_pg:1;
-	u8 has_eu_pg:1;
-
-	/* Topology fields */
-	u8 max_slices;
-	u8 max_subslices;
-	u8 max_eus_per_subslice;
-
-	/* We don't have more than 8 eus per subslice at the moment and as we
-	 * store eus enabled using bits, no need to multiply by eus per
-	 * subslice.
-	 */
-	u8 eu_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICES];
-};
-
 struct intel_device_info {
 	u16 gen_mask;
 
@@ -202,8 +177,8 @@ struct intel_device_info {
 	int cursor_offsets[I915_MAX_PIPES];
 
 	struct color_luts {
-		u16 degamma_lut_size;
-		u16 gamma_lut_size;
+		u32 degamma_lut_size;
+		u32 gamma_lut_size;
 		u32 degamma_lut_tests;
 		u32 gamma_lut_tests;
 	} color;
@@ -241,53 +216,6 @@ struct intel_driver_caps {
 	bool has_logical_contexts:1;
 };
 
-static inline unsigned int sseu_subslice_total(const struct sseu_dev_info *sseu)
-{
-	unsigned int i, total = 0;
-
-	for (i = 0; i < ARRAY_SIZE(sseu->subslice_mask); i++)
-		total += hweight8(sseu->subslice_mask[i]);
-
-	return total;
-}
-
-static inline int sseu_eu_idx(const struct sseu_dev_info *sseu,
-			      int slice, int subslice)
-{
-	int subslice_stride = DIV_ROUND_UP(sseu->max_eus_per_subslice,
-					   BITS_PER_BYTE);
-	int slice_stride = sseu->max_subslices * subslice_stride;
-
-	return slice * slice_stride + subslice * subslice_stride;
-}
-
-static inline u16 sseu_get_eus(const struct sseu_dev_info *sseu,
-			       int slice, int subslice)
-{
-	int i, offset = sseu_eu_idx(sseu, slice, subslice);
-	u16 eu_mask = 0;
-
-	for (i = 0;
-	     i < DIV_ROUND_UP(sseu->max_eus_per_subslice, BITS_PER_BYTE); i++) {
-		eu_mask |= ((u16) sseu->eu_mask[offset + i]) <<
-			(i * BITS_PER_BYTE);
-	}
-
-	return eu_mask;
-}
-
-static inline void sseu_set_eus(struct sseu_dev_info *sseu,
-				int slice, int subslice, u16 eu_mask)
-{
-	int i, offset = sseu_eu_idx(sseu, slice, subslice);
-
-	for (i = 0;
-	     i < DIV_ROUND_UP(sseu->max_eus_per_subslice, BITS_PER_BYTE); i++) {
-		sseu->eu_mask[offset + i] =
-			(eu_mask >> (BITS_PER_BYTE * i)) & 0xff;
-	}
-}
-
 const char *intel_platform_name(enum intel_platform platform);
 
 void intel_device_info_subplatform_init(struct drm_i915_private *dev_priv);
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index a38b9cff5cd0..1d58f7ec5d84 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -28,7 +28,6 @@
 #include <linux/async.h>
 #include <linux/i2c.h>
 #include <linux/sched/clock.h>
-#include <linux/stackdepot.h>
 
 #include <drm/drm_atomic.h>
 #include <drm/drm_crtc.h>
@@ -47,127 +46,10 @@
 
 struct drm_printer;
 
-/**
- * __wait_for - magic wait macro
- *
- * Macro to help avoid open coding check/wait/timeout patterns. Note that it's
- * important that we check the condition again after having timed out, since the
- * timeout could be due to preemption or similar and we've never had a chance to
- * check the condition before the timeout.
- */
-#define __wait_for(OP, COND, US, Wmin, Wmax) ({ \
-	const ktime_t end__ = ktime_add_ns(ktime_get_raw(), 1000ll * (US)); \
-	long wait__ = (Wmin); /* recommended min for usleep is 10 us */	\
-	int ret__;							\
-	might_sleep();							\
-	for (;;) {							\
-		const bool expired__ = ktime_after(ktime_get_raw(), end__); \
-		OP;							\
-		/* Guarantee COND check prior to timeout */		\
-		barrier();						\
-		if (COND) {						\
-			ret__ = 0;					\
-			break;						\
-		}							\
-		if (expired__) {					\
-			ret__ = -ETIMEDOUT;				\
-			break;						\
-		}							\
-		usleep_range(wait__, wait__ * 2);			\
-		if (wait__ < (Wmax))					\
-			wait__ <<= 1;					\
-	}								\
-	ret__;								\
-})
-
-#define _wait_for(COND, US, Wmin, Wmax)	__wait_for(, (COND), (US), (Wmin), \
-						   (Wmax))
-#define wait_for(COND, MS)		_wait_for((COND), (MS) * 1000, 10, 1000)
-
-/* If CONFIG_PREEMPT_COUNT is disabled, in_atomic() always reports false. */
-#if defined(CONFIG_DRM_I915_DEBUG) && defined(CONFIG_PREEMPT_COUNT)
-# define _WAIT_FOR_ATOMIC_CHECK(ATOMIC) WARN_ON_ONCE((ATOMIC) && !in_atomic())
-#else
-# define _WAIT_FOR_ATOMIC_CHECK(ATOMIC) do { } while (0)
-#endif
-
-#define _wait_for_atomic(COND, US, ATOMIC) \
-({ \
-	int cpu, ret, timeout = (US) * 1000; \
-	u64 base; \
-	_WAIT_FOR_ATOMIC_CHECK(ATOMIC); \
-	if (!(ATOMIC)) { \
-		preempt_disable(); \
-		cpu = smp_processor_id(); \
-	} \
-	base = local_clock(); \
-	for (;;) { \
-		u64 now = local_clock(); \
-		if (!(ATOMIC)) \
-			preempt_enable(); \
-		/* Guarantee COND check prior to timeout */ \
-		barrier(); \
-		if (COND) { \
-			ret = 0; \
-			break; \
-		} \
-		if (now - base >= timeout) { \
-			ret = -ETIMEDOUT; \
-			break; \
-		} \
-		cpu_relax(); \
-		if (!(ATOMIC)) { \
-			preempt_disable(); \
-			if (unlikely(cpu != smp_processor_id())) { \
-				timeout -= now - base; \
-				cpu = smp_processor_id(); \
-				base = local_clock(); \
-			} \
-		} \
-	} \
-	ret; \
-})
-
-#define wait_for_us(COND, US) \
-({ \
-	int ret__; \
-	BUILD_BUG_ON(!__builtin_constant_p(US)); \
-	if ((US) > 10) \
-		ret__ = _wait_for((COND), (US), 10, 10); \
-	else \
-		ret__ = _wait_for_atomic((COND), (US), 0); \
-	ret__; \
-})
-
-#define wait_for_atomic_us(COND, US) \
-({ \
-	BUILD_BUG_ON(!__builtin_constant_p(US)); \
-	BUILD_BUG_ON((US) > 50000); \
-	_wait_for_atomic((COND), (US), 1); \
-})
-
-#define wait_for_atomic(COND, MS) wait_for_atomic_us((COND), (MS) * 1000)
-
-#define KHz(x) (1000 * (x))
-#define MHz(x) KHz(1000 * (x))
-
-#define KBps(x) (1000 * (x))
-#define MBps(x) KBps(1000 * (x))
-#define GBps(x) ((u64)1000 * MBps((x)))
-
 /*
  * Display related stuff
  */
 
-/* store information about an Ixxx DVO */
-/* The i830->i865 use multiple DVOs with multiple i2cs */
-/* the i915, i945 have a single sDVO i2c bus - which is different */
-#define MAX_OUTPUTS 6
-/* maximum connectors per crtcs in the mode set */
-
-#define INTEL_I2C_BUS_DVO 1
-#define INTEL_I2C_BUS_SDVO 2
-
 /* these are outputs from the chip - integrated only
    external chips are via DVO or SDVO output */
 enum intel_output_type {
@@ -185,14 +67,6 @@ enum intel_output_type {
 	INTEL_OUTPUT_DP_MST = 11,
 };
 
-#define INTEL_DVO_CHIP_NONE 0
-#define INTEL_DVO_CHIP_LVDS 1
-#define INTEL_DVO_CHIP_TMDS 2
-#define INTEL_DVO_CHIP_TVOUT 4
-
-#define INTEL_DSI_VIDEO_MODE	0
-#define INTEL_DSI_COMMAND_MODE	1
-
 struct intel_framebuffer {
 	struct drm_framebuffer base;
 	struct intel_rotation_info rot_info;
@@ -546,6 +420,8 @@ struct dpll {
 struct intel_atomic_state {
 	struct drm_atomic_state base;
 
+	intel_wakeref_t wakeref;
+
 	struct {
 		/*
 		 * Logical state of cdclk (used for all scaling, watermark,
@@ -677,21 +553,6 @@ struct intel_initial_plane_config {
 	u8 rotation;
 };
 
-#define SKL_MIN_SRC_W 8
-#define SKL_MAX_SRC_W 4096
-#define SKL_MIN_SRC_H 8
-#define SKL_MAX_SRC_H 4096
-#define SKL_MIN_DST_W 8
-#define SKL_MAX_DST_W 4096
-#define SKL_MIN_DST_H 8
-#define SKL_MAX_DST_H 4096
-#define ICL_MAX_SRC_W 5120
-#define ICL_MAX_SRC_H 4096
-#define ICL_MAX_DST_W 5120
-#define ICL_MAX_DST_H 4096
-#define SKL_MIN_YUV_420_SRC_W 16
-#define SKL_MIN_YUV_420_SRC_H 16
-
 struct intel_scaler {
 	int in_use;
 	u32 mode;
@@ -1026,6 +887,8 @@ struct intel_crtc_state {
 
 	struct intel_crtc_wm_state wm;
 
+	u32 data_rate[I915_MAX_PLANES];
+
 	/* Gamma mode programmed on the pipe */
 	u32 gamma_mode;
 
@@ -1051,6 +914,7 @@ struct intel_crtc_state {
 		union hdmi_infoframe avi;
 		union hdmi_infoframe spd;
 		union hdmi_infoframe hdmi;
+		union hdmi_infoframe drm;
 	} infoframes;
 
 	/* HDMI scrambling status */
@@ -1581,56 +1445,6 @@ intel_atomic_get_new_crtc_state(struct intel_atomic_state *state,
 								 &crtc->base));
 }
 
-/* intel_fifo_underrun.c */
-bool intel_set_cpu_fifo_underrun_reporting(struct drm_i915_private *dev_priv,
-					   enum pipe pipe, bool enable);
-bool intel_set_pch_fifo_underrun_reporting(struct drm_i915_private *dev_priv,
-					   enum pipe pch_transcoder,
-					   bool enable);
-void intel_cpu_fifo_underrun_irq_handler(struct drm_i915_private *dev_priv,
-					 enum pipe pipe);
-void intel_pch_fifo_underrun_irq_handler(struct drm_i915_private *dev_priv,
-					 enum pipe pch_transcoder);
-void intel_check_cpu_fifo_underruns(struct drm_i915_private *dev_priv);
-void intel_check_pch_fifo_underruns(struct drm_i915_private *dev_priv);
-
-/* i915_irq.c */
-void gen5_enable_gt_irq(struct drm_i915_private *dev_priv, u32 mask);
-void gen5_disable_gt_irq(struct drm_i915_private *dev_priv, u32 mask);
-void gen6_mask_pm_irq(struct drm_i915_private *dev_priv, u32 mask);
-void gen6_unmask_pm_irq(struct drm_i915_private *dev_priv, u32 mask);
-void gen11_reset_rps_interrupts(struct drm_i915_private *dev_priv);
-void gen6_reset_rps_interrupts(struct drm_i915_private *dev_priv);
-void gen6_enable_rps_interrupts(struct drm_i915_private *dev_priv);
-void gen6_disable_rps_interrupts(struct drm_i915_private *dev_priv);
-void gen6_rps_reset_ei(struct drm_i915_private *dev_priv);
-
-static inline u32 gen6_sanitize_rps_pm_mask(const struct drm_i915_private *i915,
-					    u32 mask)
-{
-	return mask & ~i915->gt_pm.rps.pm_intrmsk_mbz;
-}
-
-void intel_runtime_pm_disable_interrupts(struct drm_i915_private *dev_priv);
-void intel_runtime_pm_enable_interrupts(struct drm_i915_private *dev_priv);
-static inline bool intel_irqs_enabled(struct drm_i915_private *dev_priv)
-{
-	/*
-	 * We only use drm_irq_uninstall() at unload and VT switch, so
-	 * this is the only thing we need to check.
-	 */
-	return dev_priv->runtime_pm.irqs_enabled;
-}
-
-int intel_get_crtc_scanline(struct intel_crtc *crtc);
-void gen8_irq_power_well_post_enable(struct drm_i915_private *dev_priv,
-				     u8 pipe_mask);
-void gen8_irq_power_well_pre_disable(struct drm_i915_private *dev_priv,
-				     u8 pipe_mask);
-void gen9_reset_guc_interrupts(struct drm_i915_private *dev_priv);
-void gen9_enable_guc_interrupts(struct drm_i915_private *dev_priv);
-void gen9_disable_guc_interrupts(struct drm_i915_private *dev_priv);
-
 /* intel_display.c */
 void intel_plane_destroy(struct drm_plane *plane);
 void i830_enable_pipe(struct drm_i915_private *dev_priv, enum pipe pipe);
@@ -1652,9 +1466,8 @@ unsigned int intel_fb_align_height(const struct drm_framebuffer *fb,
 void intel_add_fb_offsets(int *x, int *y,
 			  const struct intel_plane_state *state, int plane);
 unsigned int intel_rotation_info_size(const struct intel_rotation_info *rot_info);
+unsigned int intel_remapped_info_size(const struct intel_remapped_info *rem_info);
 bool intel_has_pending_fb_unpin(struct drm_i915_private *dev_priv);
-void intel_mark_busy(struct drm_i915_private *dev_priv);
-void intel_mark_idle(struct drm_i915_private *dev_priv);
 int intel_display_suspend(struct drm_device *dev);
 void intel_pps_unlock_regs_wa(struct drm_i915_private *dev_priv);
 void intel_encoder_destroy(struct drm_encoder *encoder);
@@ -1722,18 +1535,6 @@ int intel_prepare_plane_fb(struct drm_plane *plane,
 			   struct drm_plane_state *new_state);
 void intel_cleanup_plane_fb(struct drm_plane *plane,
 			    struct drm_plane_state *old_state);
-int intel_plane_atomic_get_property(struct drm_plane *plane,
-				    const struct drm_plane_state *state,
-				    struct drm_property *property,
-				    u64 *val);
-int intel_plane_atomic_set_property(struct drm_plane *plane,
-				    struct drm_plane_state *state,
-				    struct drm_property *property,
-				    u64 val);
-int intel_plane_atomic_calc_changes(const struct intel_crtc_state *old_crtc_state,
-				    struct drm_crtc_state *crtc_state,
-				    const struct intel_plane_state *old_plane_state,
-				    struct drm_plane_state *plane_state);
 
 void assert_pch_transcoder_disabled(struct drm_i915_private *dev_priv,
 				    enum pipe pipe);
@@ -1742,6 +1543,7 @@ int vlv_force_pll_on(struct drm_i915_private *dev_priv, enum pipe pipe,
 		     const struct dpll *dpll);
 void vlv_force_pll_off(struct drm_i915_private *dev_priv, enum pipe pipe);
 int lpt_get_iclkip(struct drm_i915_private *dev_priv);
+bool intel_fuzzy_clock_check(int clock1, int clock2);
 
 /* modesetting asserts */
 void assert_panel_unlocked(struct drm_i915_private *dev_priv,
@@ -1762,18 +1564,12 @@ void assert_pipe(struct drm_i915_private *dev_priv, enum pipe pipe, bool state);
 #define assert_pipe_disabled(d, p) assert_pipe(d, p, false)
 void intel_prepare_reset(struct drm_i915_private *dev_priv);
 void intel_finish_reset(struct drm_i915_private *dev_priv);
-void hsw_enable_pc8(struct drm_i915_private *dev_priv);
-void hsw_disable_pc8(struct drm_i915_private *dev_priv);
-void gen9_sanitize_dc_state(struct drm_i915_private *dev_priv);
-void bxt_enable_dc9(struct drm_i915_private *dev_priv);
-void bxt_disable_dc9(struct drm_i915_private *dev_priv);
-void gen9_enable_dc5(struct drm_i915_private *dev_priv);
-unsigned int skl_cdclk_get_vco(unsigned int freq);
-void skl_enable_dc6(struct drm_i915_private *dev_priv);
 void intel_dp_get_m_n(struct intel_crtc *crtc,
 		      struct intel_crtc_state *pipe_config);
 void intel_dp_set_m_n(const struct intel_crtc_state *crtc_state,
 		      enum link_m_n_set m_n);
+void intel_dp_ycbcr_420_enable(struct intel_dp *intel_dp,
+			       const struct intel_crtc_state *crtc_state);
 int intel_dotclock_calculate(int link_freq, const struct intel_link_m_n *m_n);
 bool bxt_find_best_dpll(struct intel_crtc_state *crtc_state,
 			struct dpll *best_clock);
@@ -1815,230 +1611,6 @@ int skl_format_to_fourcc(int format, bool rgb_order, bool alpha);
 unsigned int i9xx_plane_max_stride(struct intel_plane *plane,
 				   u32 pixel_format, u64 modifier,
 				   unsigned int rotation);
-
-/* intel_dp_link_training.c */
-void intel_dp_start_link_train(struct intel_dp *intel_dp);
-void intel_dp_stop_link_train(struct intel_dp *intel_dp);
-
-/* intel_vdsc.c */
-int intel_dp_compute_dsc_params(struct intel_dp *intel_dp,
-				struct intel_crtc_state *pipe_config);
-enum intel_display_power_domain
-intel_dsc_power_domain(const struct intel_crtc_state *crtc_state);
-
-/* intel_dp_aux_backlight.c */
-int intel_dp_aux_init_backlight_funcs(struct intel_connector *intel_connector);
-
-/* intel_dp_mst.c */
-int intel_dp_mst_encoder_init(struct intel_digital_port *intel_dig_port, int conn_id);
-void intel_dp_mst_encoder_cleanup(struct intel_digital_port *intel_dig_port);
-/* vlv_dsi.c */
-void vlv_dsi_init(struct drm_i915_private *dev_priv);
-
-/* icl_dsi.c */
-void icl_dsi_init(struct drm_i915_private *dev_priv);
-
-/* intel_dsi_dcs_backlight.c */
-int intel_dsi_dcs_init_backlight_funcs(struct intel_connector *intel_connector);
-
-/* intel_hotplug.c */
-void intel_hpd_poll_init(struct drm_i915_private *dev_priv);
-bool intel_encoder_hotplug(struct intel_encoder *encoder,
-			   struct intel_connector *connector);
-
-/* intel_overlay.c */
-void intel_overlay_setup(struct drm_i915_private *dev_priv);
-void intel_overlay_cleanup(struct drm_i915_private *dev_priv);
-int intel_overlay_switch_off(struct intel_overlay *overlay);
-int intel_overlay_put_image_ioctl(struct drm_device *dev, void *data,
-				  struct drm_file *file_priv);
-int intel_overlay_attrs_ioctl(struct drm_device *dev, void *data,
-			      struct drm_file *file_priv);
-void intel_overlay_reset(struct drm_i915_private *dev_priv);
-
-/* intel_quirks.c */
-void intel_init_quirks(struct drm_i915_private *dev_priv);
-
-/* intel_runtime_pm.c */
-void intel_runtime_pm_init_early(struct drm_i915_private *dev_priv);
-int intel_power_domains_init(struct drm_i915_private *);
-void intel_power_domains_cleanup(struct drm_i915_private *dev_priv);
-void intel_power_domains_init_hw(struct drm_i915_private *dev_priv, bool resume);
-void intel_power_domains_fini_hw(struct drm_i915_private *dev_priv);
-void icl_display_core_init(struct drm_i915_private *dev_priv, bool resume);
-void icl_display_core_uninit(struct drm_i915_private *dev_priv);
-void intel_power_domains_enable(struct drm_i915_private *dev_priv);
-void intel_power_domains_disable(struct drm_i915_private *dev_priv);
-
-enum i915_drm_suspend_mode {
-	I915_DRM_SUSPEND_IDLE,
-	I915_DRM_SUSPEND_MEM,
-	I915_DRM_SUSPEND_HIBERNATE,
-};
-
-void intel_power_domains_suspend(struct drm_i915_private *dev_priv,
-				 enum i915_drm_suspend_mode);
-void intel_power_domains_resume(struct drm_i915_private *dev_priv);
-void bxt_display_core_init(struct drm_i915_private *dev_priv, bool resume);
-void bxt_display_core_uninit(struct drm_i915_private *dev_priv);
-void intel_runtime_pm_enable(struct drm_i915_private *dev_priv);
-void intel_runtime_pm_disable(struct drm_i915_private *dev_priv);
-void intel_runtime_pm_cleanup(struct drm_i915_private *dev_priv);
-const char *
-intel_display_power_domain_str(enum intel_display_power_domain domain);
-
-bool intel_display_power_is_enabled(struct drm_i915_private *dev_priv,
-				    enum intel_display_power_domain domain);
-bool __intel_display_power_is_enabled(struct drm_i915_private *dev_priv,
-				      enum intel_display_power_domain domain);
-intel_wakeref_t intel_display_power_get(struct drm_i915_private *dev_priv,
-					enum intel_display_power_domain domain);
-intel_wakeref_t
-intel_display_power_get_if_enabled(struct drm_i915_private *dev_priv,
-				   enum intel_display_power_domain domain);
-void intel_display_power_put_unchecked(struct drm_i915_private *dev_priv,
-				       enum intel_display_power_domain domain);
-#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_RUNTIME_PM)
-void intel_display_power_put(struct drm_i915_private *dev_priv,
-			     enum intel_display_power_domain domain,
-			     intel_wakeref_t wakeref);
-#else
-#define intel_display_power_put(i915, domain, wakeref) \
-	intel_display_power_put_unchecked(i915, domain)
-#endif
-void icl_dbuf_slices_update(struct drm_i915_private *dev_priv,
-			    u8 req_slices);
-
-static inline void
-assert_rpm_device_not_suspended(struct i915_runtime_pm *rpm)
-{
-	WARN_ONCE(rpm->suspended,
-		  "Device suspended during HW access\n");
-}
-
-static inline void
-__assert_rpm_wakelock_held(struct i915_runtime_pm *rpm)
-{
-	assert_rpm_device_not_suspended(rpm);
-	WARN_ONCE(!atomic_read(&rpm->wakeref_count),
-		  "RPM wakelock ref not held during HW access");
-}
-
-static inline void
-assert_rpm_wakelock_held(struct drm_i915_private *i915)
-{
-	__assert_rpm_wakelock_held(&i915->runtime_pm);
-}
-
-/**
- * disable_rpm_wakeref_asserts - disable the RPM assert checks
- * @i915: i915 device instance
- *
- * This function disable asserts that check if we hold an RPM wakelock
- * reference, while keeping the device-not-suspended checks still enabled.
- * It's meant to be used only in special circumstances where our rule about
- * the wakelock refcount wrt. the device power state doesn't hold. According
- * to this rule at any point where we access the HW or want to keep the HW in
- * an active state we must hold an RPM wakelock reference acquired via one of
- * the intel_runtime_pm_get() helpers. Currently there are a few special spots
- * where this rule doesn't hold: the IRQ and suspend/resume handlers, the
- * forcewake release timer, and the GPU RPS and hangcheck works. All other
- * users should avoid using this function.
- *
- * Any calls to this function must have a symmetric call to
- * enable_rpm_wakeref_asserts().
- */
-static inline void
-disable_rpm_wakeref_asserts(struct drm_i915_private *i915)
-{
-	atomic_inc(&i915->runtime_pm.wakeref_count);
-}
-
-/**
- * enable_rpm_wakeref_asserts - re-enable the RPM assert checks
- * @i915: i915 device instance
- *
- * This function re-enables the RPM assert checks after disabling them with
- * disable_rpm_wakeref_asserts. It's meant to be used only in special
- * circumstances otherwise its use should be avoided.
- *
- * Any calls to this function must have a symmetric call to
- * disable_rpm_wakeref_asserts().
- */
-static inline void
-enable_rpm_wakeref_asserts(struct drm_i915_private *i915)
-{
-	atomic_dec(&i915->runtime_pm.wakeref_count);
-}
-
-intel_wakeref_t intel_runtime_pm_get(struct drm_i915_private *i915);
-intel_wakeref_t intel_runtime_pm_get_if_in_use(struct drm_i915_private *i915);
-intel_wakeref_t intel_runtime_pm_get_noresume(struct drm_i915_private *i915);
-
-#define with_intel_runtime_pm(i915, wf) \
-	for ((wf) = intel_runtime_pm_get(i915); (wf); \
-	     intel_runtime_pm_put((i915), (wf)), (wf) = 0)
-
-#define with_intel_runtime_pm_if_in_use(i915, wf) \
-	for ((wf) = intel_runtime_pm_get_if_in_use(i915); (wf); \
-	     intel_runtime_pm_put((i915), (wf)), (wf) = 0)
-
-void intel_runtime_pm_put_unchecked(struct drm_i915_private *i915);
-#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_RUNTIME_PM)
-void intel_runtime_pm_put(struct drm_i915_private *i915, intel_wakeref_t wref);
-#else
-#define intel_runtime_pm_put(i915, wref) intel_runtime_pm_put_unchecked(i915)
-#endif
-
-#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_RUNTIME_PM)
-void print_intel_runtime_pm_wakeref(struct drm_i915_private *i915,
-				    struct drm_printer *p);
-#else
-static inline void print_intel_runtime_pm_wakeref(struct drm_i915_private *i915,
-						  struct drm_printer *p)
-{
-}
-#endif
-
-void chv_phy_powergate_lanes(struct intel_encoder *encoder,
-			     bool override, unsigned int mask);
-bool chv_phy_powergate_ch(struct drm_i915_private *dev_priv, enum dpio_phy phy,
-			  enum dpio_channel ch, bool override);
-
-/* intel_atomic.c */
-int intel_digital_connector_atomic_get_property(struct drm_connector *connector,
-						const struct drm_connector_state *state,
-						struct drm_property *property,
-						u64 *val);
-int intel_digital_connector_atomic_set_property(struct drm_connector *connector,
-						struct drm_connector_state *state,
-						struct drm_property *property,
-						u64 val);
-int intel_digital_connector_atomic_check(struct drm_connector *conn,
-					 struct drm_connector_state *new_state);
-struct drm_connector_state *
-intel_digital_connector_duplicate_state(struct drm_connector *connector);
-
-struct drm_crtc_state *intel_crtc_duplicate_state(struct drm_crtc *crtc);
-void intel_crtc_destroy_state(struct drm_crtc *crtc,
-			       struct drm_crtc_state *state);
-struct drm_atomic_state *intel_atomic_state_alloc(struct drm_device *dev);
-void intel_atomic_state_clear(struct drm_atomic_state *);
-
-static inline struct intel_crtc_state *
-intel_atomic_get_crtc_state(struct drm_atomic_state *state,
-			    struct intel_crtc *crtc)
-{
-	struct drm_crtc_state *crtc_state;
-	crtc_state = drm_atomic_get_crtc_state(state, &crtc->base);
-	if (IS_ERR(crtc_state))
-		return ERR_CAST(crtc_state);
-
-	return to_intel_crtc_state(crtc_state);
-}
-
-int intel_atomic_setup_scalers(struct drm_i915_private *dev_priv,
-			       struct intel_crtc *intel_crtc,
-			       struct intel_crtc_state *crtc_state);
+int bdw_get_pipemisc_bpp(struct intel_crtc *crtc);
 
 #endif /* __INTEL_DRV_H__ */
diff --git a/drivers/gpu/drm/i915/intel_guc.c b/drivers/gpu/drm/i915/intel_guc.c
index 3aabfa2d9198..c40a6efdd33a 100644
--- a/drivers/gpu/drm/i915/intel_guc.c
+++ b/drivers/gpu/drm/i915/intel_guc.c
@@ -34,6 +34,13 @@ static void gen8_guc_raise_irq(struct intel_guc *guc)
 	I915_WRITE(GUC_SEND_INTERRUPT, GUC_SEND_TRIGGER);
 }
 
+static void gen11_guc_raise_irq(struct intel_guc *guc)
+{
+	struct drm_i915_private *dev_priv = guc_to_i915(guc);
+
+	I915_WRITE(GEN11_GUC_HOST_INTERRUPT, 0);
+}
+
 static inline i915_reg_t guc_send_reg(struct intel_guc *guc, u32 i)
 {
 	GEM_BUG_ON(!guc->send_regs.base);
@@ -49,9 +56,15 @@ void intel_guc_init_send_regs(struct intel_guc *guc)
 	enum forcewake_domains fw_domains = 0;
 	unsigned int i;
 
-	guc->send_regs.base = i915_mmio_reg_offset(SOFT_SCRATCH(0));
-	guc->send_regs.count = GUC_MAX_MMIO_MSG_LEN;
-	BUILD_BUG_ON(GUC_MAX_MMIO_MSG_LEN > SOFT_SCRATCH_COUNT);
+	if (INTEL_GEN(dev_priv) >= 11) {
+		guc->send_regs.base =
+				i915_mmio_reg_offset(GEN11_SOFT_SCRATCH(0));
+		guc->send_regs.count = GEN11_SOFT_SCRATCH_COUNT;
+	} else {
+		guc->send_regs.base = i915_mmio_reg_offset(SOFT_SCRATCH(0));
+		guc->send_regs.count = GUC_MAX_MMIO_MSG_LEN;
+		BUILD_BUG_ON(GUC_MAX_MMIO_MSG_LEN > SOFT_SCRATCH_COUNT);
+	}
 
 	for (i = 0; i < guc->send_regs.count; i++) {
 		fw_domains |= intel_uncore_forcewake_for_reg(&dev_priv->uncore,
@@ -63,6 +76,8 @@ void intel_guc_init_send_regs(struct intel_guc *guc)
 
 void intel_guc_init_early(struct intel_guc *guc)
 {
+	struct drm_i915_private *i915 = guc_to_i915(guc);
+
 	intel_guc_fw_init_early(guc);
 	intel_guc_ct_init_early(&guc->ct);
 	intel_guc_log_init_early(&guc->log);
@@ -71,7 +86,17 @@ void intel_guc_init_early(struct intel_guc *guc)
 	spin_lock_init(&guc->irq_lock);
 	guc->send = intel_guc_send_nop;
 	guc->handler = intel_guc_to_host_event_handler_nop;
-	guc->notify = gen8_guc_raise_irq;
+	if (INTEL_GEN(i915) >= 11) {
+		guc->notify = gen11_guc_raise_irq;
+		guc->interrupts.reset = gen11_reset_guc_interrupts;
+		guc->interrupts.enable = gen11_enable_guc_interrupts;
+		guc->interrupts.disable = gen11_disable_guc_interrupts;
+	} else {
+		guc->notify = gen8_guc_raise_irq;
+		guc->interrupts.reset = gen9_reset_guc_interrupts;
+		guc->interrupts.enable = gen9_enable_guc_interrupts;
+		guc->interrupts.disable = gen9_disable_guc_interrupts;
+	}
 }
 
 static int guc_init_wq(struct intel_guc *guc)
@@ -154,7 +179,7 @@ int intel_guc_init_misc(struct intel_guc *guc)
 
 void intel_guc_fini_misc(struct intel_guc *guc)
 {
-	intel_uc_fw_fini(&guc->fw);
+	intel_uc_fw_cleanup_fetch(&guc->fw);
 	guc_fini_wq(guc);
 }
 
@@ -189,9 +214,13 @@ int intel_guc_init(struct intel_guc *guc)
 	struct drm_i915_private *dev_priv = guc_to_i915(guc);
 	int ret;
 
-	ret = guc_shared_data_create(guc);
+	ret = intel_uc_fw_init(&guc->fw);
 	if (ret)
 		goto err_fetch;
+
+	ret = guc_shared_data_create(guc);
+	if (ret)
+		goto err_fw;
 	GEM_BUG_ON(!guc->shared_data);
 
 	ret = intel_guc_log_create(&guc->log);
@@ -203,11 +232,9 @@ int intel_guc_init(struct intel_guc *guc)
 		goto err_log;
 	GEM_BUG_ON(!guc->ads_vma);
 
-	if (HAS_GUC_CT(dev_priv)) {
-		ret = intel_guc_ct_init(&guc->ct);
-		if (ret)
-			goto err_ads;
-	}
+	ret = intel_guc_ct_init(&guc->ct);
+	if (ret)
+		goto err_ads;
 
 	/* We need to notify the guc whenever we change the GGTT */
 	i915_ggtt_enable_guc(dev_priv);
@@ -220,8 +247,10 @@ err_log:
 	intel_guc_log_destroy(&guc->log);
 err_shared:
 	guc_shared_data_destroy(guc);
-err_fetch:
+err_fw:
 	intel_uc_fw_fini(&guc->fw);
+err_fetch:
+	intel_uc_fw_cleanup_fetch(&guc->fw);
 	return ret;
 }
 
@@ -231,26 +260,19 @@ void intel_guc_fini(struct intel_guc *guc)
 
 	i915_ggtt_disable_guc(dev_priv);
 
-	if (HAS_GUC_CT(dev_priv))
-		intel_guc_ct_fini(&guc->ct);
+	intel_guc_ct_fini(&guc->ct);
 
 	intel_guc_ads_destroy(guc);
 	intel_guc_log_destroy(&guc->log);
 	guc_shared_data_destroy(guc);
 	intel_uc_fw_fini(&guc->fw);
+	intel_uc_fw_cleanup_fetch(&guc->fw);
 }
 
 static u32 guc_ctl_debug_flags(struct intel_guc *guc)
 {
 	u32 level = intel_guc_log_get_level(&guc->log);
-	u32 flags;
-	u32 ads;
-
-	ads = intel_guc_ggtt_offset(guc, guc->ads_vma) >> PAGE_SHIFT;
-	flags = ads << GUC_ADS_ADDR_SHIFT | GUC_ADS_ENABLED;
-
-	if (!GUC_LOG_LEVEL_IS_ENABLED(level))
-		flags |= GUC_LOG_DEFAULT_DISABLED;
+	u32 flags = 0;
 
 	if (!GUC_LOG_LEVEL_IS_VERBOSE(level))
 		flags |= GUC_LOG_DISABLED;
@@ -265,11 +287,7 @@ static u32 guc_ctl_feature_flags(struct intel_guc *guc)
 {
 	u32 flags = 0;
 
-	flags |=  GUC_CTL_VCS2_ENABLED;
-
-	if (USES_GUC_SUBMISSION(guc_to_i915(guc)))
-		flags |= GUC_CTL_KERNEL_SUBMISSIONS;
-	else
+	if (!USES_GUC_SUBMISSION(guc_to_i915(guc)))
 		flags |= GUC_CTL_DISABLE_SCHEDULER;
 
 	return flags;
@@ -333,6 +351,14 @@ static u32 guc_ctl_log_params_flags(struct intel_guc *guc)
 	return flags;
 }
 
+static u32 guc_ctl_ads_flags(struct intel_guc *guc)
+{
+	u32 ads = intel_guc_ggtt_offset(guc, guc->ads_vma) >> PAGE_SHIFT;
+	u32 flags = ads << GUC_ADS_ADDR_SHIFT;
+
+	return flags;
+}
+
 /*
  * Initialise the GuC parameter block before starting the firmware
  * transfer. These parameters are read by the firmware on startup
@@ -346,20 +372,11 @@ void intel_guc_init_params(struct intel_guc *guc)
 
 	memset(params, 0, sizeof(params));
 
-	/*
-	 * GuC ARAT increment is 10 ns. GuC default scheduler quantum is one
-	 * second. This ARAR is calculated by:
-	 * Scheduler-Quantum-in-ns / ARAT-increment-in-ns = 1000000000 / 10
-	 */
-	params[GUC_CTL_ARAT_HIGH] = 0;
-	params[GUC_CTL_ARAT_LOW] = 100000000;
-
-	params[GUC_CTL_WA] |= GUC_CTL_WA_UK_BY_DRIVER;
-
+	params[GUC_CTL_CTXINFO] = guc_ctl_ctxinfo_flags(guc);
+	params[GUC_CTL_LOG_PARAMS] = guc_ctl_log_params_flags(guc);
 	params[GUC_CTL_FEATURE] = guc_ctl_feature_flags(guc);
-	params[GUC_CTL_LOG_PARAMS]  = guc_ctl_log_params_flags(guc);
 	params[GUC_CTL_DEBUG] = guc_ctl_debug_flags(guc);
-	params[GUC_CTL_CTXINFO] = guc_ctl_ctxinfo_flags(guc);
+	params[GUC_CTL_ADS] = guc_ctl_ads_flags(guc);
 
 	for (i = 0; i < GUC_CTL_MAX_DWORDS; i++)
 		DRM_DEBUG_DRIVER("param[%2d] = %#x\n", i, params[i]);
@@ -410,9 +427,8 @@ int intel_guc_send_mmio(struct intel_guc *guc, const u32 *action, u32 len,
 	GEM_BUG_ON(*action & ~INTEL_GUC_MSG_CODE_MASK);
 
 	/* If CT is available, we expect to use MMIO only during init/fini */
-	GEM_BUG_ON(HAS_GUC_CT(dev_priv) &&
-		*action != INTEL_GUC_ACTION_REGISTER_COMMAND_TRANSPORT_BUFFER &&
-		*action != INTEL_GUC_ACTION_DEREGISTER_COMMAND_TRANSPORT_BUFFER);
+	GEM_BUG_ON(*action != INTEL_GUC_ACTION_REGISTER_COMMAND_TRANSPORT_BUFFER &&
+		   *action != INTEL_GUC_ACTION_DEREGISTER_COMMAND_TRANSPORT_BUFFER);
 
 	mutex_lock(&guc->send_mutex);
 	intel_uncore_forcewake_get(uncore, guc->send_regs.fw_domains);
@@ -461,33 +477,6 @@ out:
 	return ret;
 }
 
-void intel_guc_to_host_event_handler_mmio(struct intel_guc *guc)
-{
-	struct drm_i915_private *dev_priv = guc_to_i915(guc);
-	u32 msg, val;
-
-	/*
-	 * Sample the log buffer flush related bits & clear them out now
-	 * itself from the message identity register to minimize the
-	 * probability of losing a flush interrupt, when there are back
-	 * to back flush interrupts.
-	 * There can be a new flush interrupt, for different log buffer
-	 * type (like for ISR), whilst Host is handling one (for DPC).
-	 * Since same bit is used in message register for ISR & DPC, it
-	 * could happen that GuC sets the bit for 2nd interrupt but Host
-	 * clears out the bit on handling the 1st interrupt.
-	 */
-	disable_rpm_wakeref_asserts(dev_priv);
-	spin_lock(&guc->irq_lock);
-	val = I915_READ(SOFT_SCRATCH(15));
-	msg = val & guc->msg_enabled_mask;
-	I915_WRITE(SOFT_SCRATCH(15), val & ~msg);
-	spin_unlock(&guc->irq_lock);
-	enable_rpm_wakeref_asserts(dev_priv);
-
-	intel_guc_to_host_process_recv_msg(guc, &msg, 1);
-}
-
 int intel_guc_to_host_process_recv_msg(struct intel_guc *guc,
 				       const u32 *payload, u32 len)
 {
@@ -543,25 +532,33 @@ int intel_guc_auth_huc(struct intel_guc *guc, u32 rsa_offset)
 	return intel_guc_send(guc, action, ARRAY_SIZE(action));
 }
 
-/*
- * The ENTER/EXIT_S_STATE actions queue the save/restore operation in GuC FW and
- * then return, so waiting on the H2G is not enough to guarantee GuC is done.
- * When all the processing is done, GuC writes INTEL_GUC_SLEEP_STATE_SUCCESS to
- * scratch register 14, so we can poll on that. Note that GuC does not ensure
- * that the value in the register is different from
- * INTEL_GUC_SLEEP_STATE_SUCCESS while the action is in progress so we need to
- * take care of that ourselves as well.
+/**
+ * intel_guc_suspend() - notify GuC entering suspend state
+ * @guc:	the guc
  */
-static int guc_sleep_state_action(struct intel_guc *guc,
-				  const u32 *action, u32 len)
+int intel_guc_suspend(struct intel_guc *guc)
 {
 	struct drm_i915_private *dev_priv = guc_to_i915(guc);
 	int ret;
 	u32 status;
+	u32 action[] = {
+		INTEL_GUC_ACTION_ENTER_S_STATE,
+		GUC_POWER_D1, /* any value greater than GUC_POWER_D0 */
+	};
+
+	/*
+	 * The ENTER_S_STATE action queues the save/restore operation in GuC FW
+	 * and then returns, so waiting on the H2G is not enough to guarantee
+	 * GuC is done. When all the processing is done, GuC writes
+	 * INTEL_GUC_SLEEP_STATE_SUCCESS to scratch register 14, so we can poll
+	 * on that. Note that GuC does not ensure that the value in the register
+	 * is different from INTEL_GUC_SLEEP_STATE_SUCCESS while the action is
+	 * in progress so we need to take care of that ourselves as well.
+	 */
 
 	I915_WRITE(SOFT_SCRATCH(14), INTEL_GUC_SLEEP_STATE_INVALID_MASK);
 
-	ret = intel_guc_send(guc, action, len);
+	ret = intel_guc_send(guc, action, ARRAY_SIZE(action));
 	if (ret)
 		return ret;
 
@@ -582,21 +579,6 @@ static int guc_sleep_state_action(struct intel_guc *guc,
 }
 
 /**
- * intel_guc_suspend() - notify GuC entering suspend state
- * @guc:	the guc
- */
-int intel_guc_suspend(struct intel_guc *guc)
-{
-	u32 data[] = {
-		INTEL_GUC_ACTION_ENTER_S_STATE,
-		GUC_POWER_D1, /* any value greater than GUC_POWER_D0 */
-		intel_guc_ggtt_offset(guc, guc->shared_data)
-	};
-
-	return guc_sleep_state_action(guc, data, ARRAY_SIZE(data));
-}
-
-/**
  * intel_guc_reset_engine() - ask GuC to reset an engine
  * @guc:	intel_guc structure
  * @engine:	engine to be reset
@@ -625,13 +607,12 @@ int intel_guc_reset_engine(struct intel_guc *guc,
  */
 int intel_guc_resume(struct intel_guc *guc)
 {
-	u32 data[] = {
+	u32 action[] = {
 		INTEL_GUC_ACTION_EXIT_S_STATE,
 		GUC_POWER_D0,
-		intel_guc_ggtt_offset(guc, guc->shared_data)
 	};
 
-	return guc_sleep_state_action(guc, data, ARRAY_SIZE(data));
+	return intel_guc_send(guc, action, ARRAY_SIZE(action));
 }
 
 /**
@@ -683,7 +664,7 @@ struct i915_vma *intel_guc_allocate_vma(struct intel_guc *guc, u32 size)
 	u64 flags;
 	int ret;
 
-	obj = i915_gem_object_create(dev_priv, size);
+	obj = i915_gem_object_create_shmem(dev_priv, size);
 	if (IS_ERR(obj))
 		return ERR_CAST(obj);
 
@@ -704,20 +685,3 @@ err:
 	i915_gem_object_put(obj);
 	return vma;
 }
-
-/**
- * intel_guc_reserved_gtt_size()
- * @guc:	intel_guc structure
- *
- * The GuC WOPCM mapping shadows the lower part of the GGTT, so if we are using
- * GuC we can't have any objects pinned in that region. This function returns
- * the size of the shadowed region.
- *
- * Returns:
- * 0 if GuC is not present or not in use.
- * Otherwise, the GuC WOPCM size.
- */
-u32 intel_guc_reserved_gtt_size(struct intel_guc *guc)
-{
-	return guc_to_i915(guc)->wopcm.guc.size;
-}
diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h
index 2c59ff8d9f39..08c906abdfa2 100644
--- a/drivers/gpu/drm/i915/intel_guc.h
+++ b/drivers/gpu/drm/i915/intel_guc.h
@@ -55,9 +55,15 @@ struct intel_guc {
 
 	/* intel_guc_recv interrupt related state */
 	spinlock_t irq_lock;
-	bool interrupts_enabled;
 	unsigned int msg_enabled_mask;
 
+	struct {
+		bool enabled;
+		void (*reset)(struct drm_i915_private *i915);
+		void (*enable)(struct drm_i915_private *i915);
+		void (*disable)(struct drm_i915_private *i915);
+	} interrupts;
+
 	struct i915_vma *ads_vma;
 	struct i915_vma *stage_desc_pool;
 	void *stage_desc_pool_vaddr;
@@ -96,11 +102,6 @@ struct intel_guc {
 	void (*notify)(struct intel_guc *guc);
 };
 
-static inline bool intel_guc_is_alive(struct intel_guc *guc)
-{
-	return intel_uc_fw_is_loaded(&guc->fw);
-}
-
 static
 inline int intel_guc_send(struct intel_guc *guc, const u32 *action, u32 len)
 {
@@ -164,7 +165,6 @@ int intel_guc_send_mmio(struct intel_guc *guc, const u32 *action, u32 len,
 			u32 *response_buf, u32 response_buf_size);
 void intel_guc_to_host_event_handler(struct intel_guc *guc);
 void intel_guc_to_host_event_handler_nop(struct intel_guc *guc);
-void intel_guc_to_host_event_handler_mmio(struct intel_guc *guc);
 int intel_guc_to_host_process_recv_msg(struct intel_guc *guc,
 				       const u32 *payload, u32 len);
 int intel_guc_sample_forcewake(struct intel_guc *guc);
@@ -172,7 +172,11 @@ int intel_guc_auth_huc(struct intel_guc *guc, u32 rsa_offset);
 int intel_guc_suspend(struct intel_guc *guc);
 int intel_guc_resume(struct intel_guc *guc);
 struct i915_vma *intel_guc_allocate_vma(struct intel_guc *guc, u32 size);
-u32 intel_guc_reserved_gtt_size(struct intel_guc *guc);
+
+static inline bool intel_guc_is_loaded(struct intel_guc *guc)
+{
+	return intel_uc_fw_is_loaded(&guc->fw);
+}
 
 static inline int intel_guc_sanitize(struct intel_guc *guc)
 {
diff --git a/drivers/gpu/drm/i915/intel_guc_ads.c b/drivers/gpu/drm/i915/intel_guc_ads.c
index bec62f34b15a..ecb69fc94218 100644
--- a/drivers/gpu/drm/i915/intel_guc_ads.c
+++ b/drivers/gpu/drm/i915/intel_guc_ads.c
@@ -51,7 +51,7 @@ static void guc_policies_init(struct guc_policies *policies)
 	policies->max_num_work_items = POLICY_MAX_NUM_WI;
 
 	for (p = 0; p < GUC_CLIENT_PRIORITY_NUM; p++) {
-		for (i = GUC_RENDER_ENGINE; i < GUC_MAX_ENGINES_NUM; i++) {
+		for (i = 0; i < GUC_MAX_ENGINE_CLASSES; i++) {
 			policy = &policies->policy[p][i];
 
 			guc_policy_init(policy);
@@ -61,91 +61,142 @@ static void guc_policies_init(struct guc_policies *policies)
 	policies->is_valid = 1;
 }
 
+static void guc_ct_pool_entries_init(struct guc_ct_pool_entry *pool, u32 num)
+{
+	memset(pool, 0, num * sizeof(*pool));
+}
+
 /*
  * The first 80 dwords of the register state context, containing the
  * execlists and ppgtt registers.
  */
 #define LR_HW_CONTEXT_SIZE	(80 * sizeof(u32))
 
-/**
- * intel_guc_ads_create() - creates GuC ADS
- * @guc: intel_guc struct
- *
- */
-int intel_guc_ads_create(struct intel_guc *guc)
+/* The ads obj includes the struct itself and buffers passed to GuC */
+struct __guc_ads_blob {
+	struct guc_ads ads;
+	struct guc_policies policies;
+	struct guc_mmio_reg_state reg_state;
+	struct guc_gt_system_info system_info;
+	struct guc_clients_info clients_info;
+	struct guc_ct_pool_entry ct_pool[GUC_CT_POOL_SIZE];
+	u8 reg_state_buffer[GUC_S3_SAVE_SPACE_PAGES * PAGE_SIZE];
+} __packed;
+
+static int __guc_ads_init(struct intel_guc *guc)
 {
 	struct drm_i915_private *dev_priv = guc_to_i915(guc);
-	struct i915_vma *vma, *kernel_ctx_vma;
-	struct page *page;
-	/* The ads obj includes the struct itself and buffers passed to GuC */
-	struct {
-		struct guc_ads ads;
-		struct guc_policies policies;
-		struct guc_mmio_reg_state reg_state;
-		u8 reg_state_buffer[GUC_S3_SAVE_SPACE_PAGES * PAGE_SIZE];
-	} __packed *blob;
-	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
-	const u32 skipped_offset = LRC_HEADER_PAGES * PAGE_SIZE;
+	struct __guc_ads_blob *blob;
 	const u32 skipped_size = LRC_PPHWSP_SZ * PAGE_SIZE + LR_HW_CONTEXT_SIZE;
 	u32 base;
+	u8 engine_class;
 
-	GEM_BUG_ON(guc->ads_vma);
-
-	vma = intel_guc_allocate_vma(guc, PAGE_ALIGN(sizeof(*blob)));
-	if (IS_ERR(vma))
-		return PTR_ERR(vma);
-
-	guc->ads_vma = vma;
-
-	page = i915_vma_first_page(vma);
-	blob = kmap(page);
+	blob = i915_gem_object_pin_map(guc->ads_vma->obj, I915_MAP_WB);
+	if (IS_ERR(blob))
+		return PTR_ERR(blob);
 
 	/* GuC scheduling policies */
 	guc_policies_init(&blob->policies);
 
-	/* MMIO reg state */
-	for_each_engine(engine, dev_priv, id) {
-		blob->reg_state.white_list[engine->guc_id].mmio_start =
-			engine->mmio_base + GUC_MMIO_WHITE_LIST_START;
-
-		/* Nothing to be saved or restored for now. */
-		blob->reg_state.white_list[engine->guc_id].count = 0;
-	}
-
 	/*
-	 * The GuC requires a "Golden Context" when it reinitialises
-	 * engines after a reset. Here we use the Render ring default
-	 * context, which must already exist and be pinned in the GGTT,
-	 * so its address won't change after we've told the GuC where
-	 * to find it. Note that we have to skip our header (1 page),
-	 * because our GuC shared data is there.
+	 * GuC expects a per-engine-class context image and size
+	 * (minus hwsp and ring context). The context image will be
+	 * used to reinitialize engines after a reset. It must exist
+	 * and be pinned in the GGTT, so that the address won't change after
+	 * we have told GuC where to find it. The context size will be used
+	 * to validate that the LRC base + size fall within allowed GGTT.
 	 */
-	kernel_ctx_vma = dev_priv->engine[RCS0]->kernel_context->state;
-	blob->ads.golden_context_lrca =
-		intel_guc_ggtt_offset(guc, kernel_ctx_vma) + skipped_offset;
+	for (engine_class = 0; engine_class <= MAX_ENGINE_CLASS; ++engine_class) {
+		if (engine_class == OTHER_CLASS)
+			continue;
+		/*
+		 * TODO: Set context pointer to default state to allow
+		 * GuC to re-init guilty contexts after internal reset.
+		 */
+		blob->ads.golden_context_lrca[engine_class] = 0;
+		blob->ads.eng_state_size[engine_class] =
+			intel_engine_context_size(dev_priv, engine_class) -
+			skipped_size;
+	}
 
-	/*
-	 * The GuC expects us to exclude the portion of the context image that
-	 * it skips from the size it is to read. It starts reading from after
-	 * the execlist context (so skipping the first page [PPHWSP] and 80
-	 * dwords). Weird guc is weird.
-	 */
-	for_each_engine(engine, dev_priv, id)
-		blob->ads.eng_state_size[engine->guc_id] =
-			engine->context_size - skipped_size;
+	/* System info */
+	blob->system_info.slice_enabled = hweight8(RUNTIME_INFO(dev_priv)->sseu.slice_mask);
+	blob->system_info.rcs_enabled = 1;
+	blob->system_info.bcs_enabled = 1;
+
+	blob->system_info.vdbox_enable_mask = VDBOX_MASK(dev_priv);
+	blob->system_info.vebox_enable_mask = VEBOX_MASK(dev_priv);
+	blob->system_info.vdbox_sfc_support_mask = RUNTIME_INFO(dev_priv)->vdbox_sfc_access;
+
+	base = intel_guc_ggtt_offset(guc, guc->ads_vma);
 
-	base = intel_guc_ggtt_offset(guc, vma);
+	/* Clients info  */
+	guc_ct_pool_entries_init(blob->ct_pool, ARRAY_SIZE(blob->ct_pool));
+
+	blob->clients_info.clients_num = 1;
+	blob->clients_info.ct_pool_addr = base + ptr_offset(blob, ct_pool);
+	blob->clients_info.ct_pool_count = ARRAY_SIZE(blob->ct_pool);
+
+	/* ADS */
 	blob->ads.scheduler_policies = base + ptr_offset(blob, policies);
 	blob->ads.reg_state_buffer = base + ptr_offset(blob, reg_state_buffer);
 	blob->ads.reg_state_addr = base + ptr_offset(blob, reg_state);
+	blob->ads.gt_system_info = base + ptr_offset(blob, system_info);
+	blob->ads.clients_info = base + ptr_offset(blob, clients_info);
+
+	i915_gem_object_unpin_map(guc->ads_vma->obj);
 
-	kunmap(page);
+	return 0;
+}
+
+/**
+ * intel_guc_ads_create() - allocates and initializes GuC ADS.
+ * @guc: intel_guc struct
+ *
+ * GuC needs memory block (Additional Data Struct), where it will store
+ * some data. Allocate and initialize such memory block for GuC use.
+ */
+int intel_guc_ads_create(struct intel_guc *guc)
+{
+	const u32 size = PAGE_ALIGN(sizeof(struct __guc_ads_blob));
+	struct i915_vma *vma;
+	int ret;
+
+	GEM_BUG_ON(guc->ads_vma);
+
+	vma = intel_guc_allocate_vma(guc, size);
+	if (IS_ERR(vma))
+		return PTR_ERR(vma);
+
+	guc->ads_vma = vma;
+
+	ret = __guc_ads_init(guc);
+	if (ret)
+		goto err_vma;
 
 	return 0;
+
+err_vma:
+	i915_vma_unpin_and_release(&guc->ads_vma, 0);
+	return ret;
 }
 
 void intel_guc_ads_destroy(struct intel_guc *guc)
 {
 	i915_vma_unpin_and_release(&guc->ads_vma, 0);
 }
+
+/**
+ * intel_guc_ads_reset() - prepares GuC Additional Data Struct for reuse
+ * @guc: intel_guc struct
+ *
+ * GuC stores some data in ADS, which might be stale after a reset.
+ * Reinitialize whole ADS in case any part of it was corrupted during
+ * previous GuC run.
+ */
+void intel_guc_ads_reset(struct intel_guc *guc)
+{
+	if (!guc->ads_vma)
+		return;
+	__guc_ads_init(guc);
+}
diff --git a/drivers/gpu/drm/i915/intel_guc_ads.h b/drivers/gpu/drm/i915/intel_guc_ads.h
index c4735742c564..7f40f9cd5fb9 100644
--- a/drivers/gpu/drm/i915/intel_guc_ads.h
+++ b/drivers/gpu/drm/i915/intel_guc_ads.h
@@ -29,5 +29,6 @@ struct intel_guc;
 
 int intel_guc_ads_create(struct intel_guc *guc);
 void intel_guc_ads_destroy(struct intel_guc *guc);
+void intel_guc_ads_reset(struct intel_guc *guc);
 
 #endif
diff --git a/drivers/gpu/drm/i915/intel_guc_ct.c b/drivers/gpu/drm/i915/intel_guc_ct.c
index dde1dc0d6e69..3921809f812b 100644
--- a/drivers/gpu/drm/i915/intel_guc_ct.c
+++ b/drivers/gpu/drm/i915/intel_guc_ct.c
@@ -565,7 +565,7 @@ static inline unsigned int ct_header_get_action(u32 header)
 
 static inline bool ct_header_is_response(u32 header)
 {
-	return ct_header_get_action(header) == INTEL_GUC_ACTION_DEFAULT;
+	return !!(header & GUC_CT_MSG_IS_RESPONSE);
 }
 
 static int ctb_read(struct intel_guc_ct_buffer *ctb, u32 *data)
@@ -848,8 +848,6 @@ static void intel_guc_to_host_event_handler_ct(struct intel_guc *guc)
  * Allocate memory required for communication via
  * the CT channel.
  *
- * Shall only be called for platforms with HAS_GUC_CT.
- *
  * Return: 0 on success, a negative errno code on failure.
  */
 int intel_guc_ct_init(struct intel_guc_ct *ct)
@@ -875,8 +873,6 @@ int intel_guc_ct_init(struct intel_guc_ct *ct)
  *
  * Deallocate memory required for communication via
  * the CT channel.
- *
- * Shall only be called for platforms with HAS_GUC_CT.
  */
 void intel_guc_ct_fini(struct intel_guc_ct *ct)
 {
@@ -890,19 +886,14 @@ void intel_guc_ct_fini(struct intel_guc_ct *ct)
  * intel_guc_ct_enable - Enable buffer based command transport.
  * @ct: pointer to CT struct
  *
- * Shall only be called for platforms with HAS_GUC_CT.
- *
  * Return: 0 on success, a negative errno code on failure.
  */
 int intel_guc_ct_enable(struct intel_guc_ct *ct)
 {
 	struct intel_guc *guc = ct_to_guc(ct);
-	struct drm_i915_private *i915 = guc_to_i915(guc);
 	struct intel_guc_ct_channel *ctch = &ct->host_channel;
 	int err;
 
-	GEM_BUG_ON(!HAS_GUC_CT(i915));
-
 	if (ctch->enabled)
 		return 0;
 
@@ -920,17 +911,12 @@ int intel_guc_ct_enable(struct intel_guc_ct *ct)
 /**
  * intel_guc_ct_disable - Disable buffer based command transport.
  * @ct: pointer to CT struct
- *
- * Shall only be called for platforms with HAS_GUC_CT.
  */
 void intel_guc_ct_disable(struct intel_guc_ct *ct)
 {
 	struct intel_guc *guc = ct_to_guc(ct);
-	struct drm_i915_private *i915 = guc_to_i915(guc);
 	struct intel_guc_ct_channel *ctch = &ct->host_channel;
 
-	GEM_BUG_ON(!HAS_GUC_CT(i915));
-
 	if (!ctch->enabled)
 		return;
 
diff --git a/drivers/gpu/drm/i915/intel_guc_ct.h b/drivers/gpu/drm/i915/intel_guc_ct.h
index f5e7f0663304..41ba593a4df7 100644
--- a/drivers/gpu/drm/i915/intel_guc_ct.h
+++ b/drivers/gpu/drm/i915/intel_guc_ct.h
@@ -96,4 +96,9 @@ void intel_guc_ct_fini(struct intel_guc_ct *ct);
 int intel_guc_ct_enable(struct intel_guc_ct *ct);
 void intel_guc_ct_disable(struct intel_guc_ct *ct);
 
+static inline void intel_guc_ct_stop(struct intel_guc_ct *ct)
+{
+	ct->host_channel.enabled = false;
+}
+
 #endif /* _INTEL_GUC_CT_H_ */
diff --git a/drivers/gpu/drm/i915/intel_guc_fw.c b/drivers/gpu/drm/i915/intel_guc_fw.c
index 792a551450c7..72cdafd9636a 100644
--- a/drivers/gpu/drm/i915/intel_guc_fw.c
+++ b/drivers/gpu/drm/i915/intel_guc_fw.c
@@ -30,53 +30,82 @@
 #include "intel_guc_fw.h"
 #include "i915_drv.h"
 
-#define SKL_FW_MAJOR 9
-#define SKL_FW_MINOR 33
-
-#define BXT_FW_MAJOR 9
-#define BXT_FW_MINOR 29
-
-#define KBL_FW_MAJOR 9
-#define KBL_FW_MINOR 39
-
-#define GUC_FW_PATH(platform, major, minor) \
-       "i915/" __stringify(platform) "_guc_ver" __stringify(major) "_" __stringify(minor) ".bin"
-
-#define I915_SKL_GUC_UCODE GUC_FW_PATH(skl, SKL_FW_MAJOR, SKL_FW_MINOR)
-MODULE_FIRMWARE(I915_SKL_GUC_UCODE);
-
-#define I915_BXT_GUC_UCODE GUC_FW_PATH(bxt, BXT_FW_MAJOR, BXT_FW_MINOR)
-MODULE_FIRMWARE(I915_BXT_GUC_UCODE);
-
-#define I915_KBL_GUC_UCODE GUC_FW_PATH(kbl, KBL_FW_MAJOR, KBL_FW_MINOR)
-MODULE_FIRMWARE(I915_KBL_GUC_UCODE);
+#define __MAKE_GUC_FW_PATH(KEY) \
+	"i915/" \
+	__stringify(KEY##_GUC_FW_PREFIX) "_guc_" \
+	__stringify(KEY##_GUC_FW_MAJOR) "." \
+	__stringify(KEY##_GUC_FW_MINOR) "." \
+	__stringify(KEY##_GUC_FW_PATCH) ".bin"
+
+#define SKL_GUC_FW_PREFIX skl
+#define SKL_GUC_FW_MAJOR 32
+#define SKL_GUC_FW_MINOR 0
+#define SKL_GUC_FW_PATCH 3
+#define SKL_GUC_FIRMWARE_PATH __MAKE_GUC_FW_PATH(SKL)
+MODULE_FIRMWARE(SKL_GUC_FIRMWARE_PATH);
+
+#define BXT_GUC_FW_PREFIX bxt
+#define BXT_GUC_FW_MAJOR 32
+#define BXT_GUC_FW_MINOR 0
+#define BXT_GUC_FW_PATCH 3
+#define BXT_GUC_FIRMWARE_PATH __MAKE_GUC_FW_PATH(BXT)
+MODULE_FIRMWARE(BXT_GUC_FIRMWARE_PATH);
+
+#define KBL_GUC_FW_PREFIX kbl
+#define KBL_GUC_FW_MAJOR 32
+#define KBL_GUC_FW_MINOR 0
+#define KBL_GUC_FW_PATCH 3
+#define KBL_GUC_FIRMWARE_PATH __MAKE_GUC_FW_PATH(KBL)
+MODULE_FIRMWARE(KBL_GUC_FIRMWARE_PATH);
+
+#define GLK_GUC_FW_PREFIX glk
+#define GLK_GUC_FW_MAJOR 32
+#define GLK_GUC_FW_MINOR 0
+#define GLK_GUC_FW_PATCH 3
+#define GLK_GUC_FIRMWARE_PATH __MAKE_GUC_FW_PATH(GLK)
+MODULE_FIRMWARE(GLK_GUC_FIRMWARE_PATH);
+
+#define ICL_GUC_FW_PREFIX icl
+#define ICL_GUC_FW_MAJOR 32
+#define ICL_GUC_FW_MINOR 0
+#define ICL_GUC_FW_PATCH 3
+#define ICL_GUC_FIRMWARE_PATH __MAKE_GUC_FW_PATH(ICL)
+MODULE_FIRMWARE(ICL_GUC_FIRMWARE_PATH);
 
 static void guc_fw_select(struct intel_uc_fw *guc_fw)
 {
 	struct intel_guc *guc = container_of(guc_fw, struct intel_guc, fw);
-	struct drm_i915_private *dev_priv = guc_to_i915(guc);
+	struct drm_i915_private *i915 = guc_to_i915(guc);
 
 	GEM_BUG_ON(guc_fw->type != INTEL_UC_FW_TYPE_GUC);
 
-	if (!HAS_GUC(dev_priv))
+	if (!HAS_GUC(i915))
 		return;
 
 	if (i915_modparams.guc_firmware_path) {
 		guc_fw->path = i915_modparams.guc_firmware_path;
 		guc_fw->major_ver_wanted = 0;
 		guc_fw->minor_ver_wanted = 0;
-	} else if (IS_SKYLAKE(dev_priv)) {
-		guc_fw->path = I915_SKL_GUC_UCODE;
-		guc_fw->major_ver_wanted = SKL_FW_MAJOR;
-		guc_fw->minor_ver_wanted = SKL_FW_MINOR;
-	} else if (IS_BROXTON(dev_priv)) {
-		guc_fw->path = I915_BXT_GUC_UCODE;
-		guc_fw->major_ver_wanted = BXT_FW_MAJOR;
-		guc_fw->minor_ver_wanted = BXT_FW_MINOR;
-	} else if (IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv)) {
-		guc_fw->path = I915_KBL_GUC_UCODE;
-		guc_fw->major_ver_wanted = KBL_FW_MAJOR;
-		guc_fw->minor_ver_wanted = KBL_FW_MINOR;
+	} else if (IS_ICELAKE(i915)) {
+		guc_fw->path = ICL_GUC_FIRMWARE_PATH;
+		guc_fw->major_ver_wanted = ICL_GUC_FW_MAJOR;
+		guc_fw->minor_ver_wanted = ICL_GUC_FW_MINOR;
+	} else if (IS_GEMINILAKE(i915)) {
+		guc_fw->path = GLK_GUC_FIRMWARE_PATH;
+		guc_fw->major_ver_wanted = GLK_GUC_FW_MAJOR;
+		guc_fw->minor_ver_wanted = GLK_GUC_FW_MINOR;
+	} else if (IS_KABYLAKE(i915) || IS_COFFEELAKE(i915)) {
+		guc_fw->path = KBL_GUC_FIRMWARE_PATH;
+		guc_fw->major_ver_wanted = KBL_GUC_FW_MAJOR;
+		guc_fw->minor_ver_wanted = KBL_GUC_FW_MINOR;
+	} else if (IS_BROXTON(i915)) {
+		guc_fw->path = BXT_GUC_FIRMWARE_PATH;
+		guc_fw->major_ver_wanted = BXT_GUC_FW_MAJOR;
+		guc_fw->minor_ver_wanted = BXT_GUC_FW_MINOR;
+	} else if (IS_SKYLAKE(i915)) {
+		guc_fw->path = SKL_GUC_FIRMWARE_PATH;
+		guc_fw->major_ver_wanted = SKL_GUC_FW_MAJOR;
+		guc_fw->minor_ver_wanted = SKL_GUC_FW_MINOR;
 	}
 }
 
@@ -90,7 +119,7 @@ void intel_guc_fw_init_early(struct intel_guc *guc)
 {
 	struct intel_uc_fw *guc_fw = &guc->fw;
 
-	intel_uc_fw_init(guc_fw, INTEL_UC_FW_TYPE_GUC);
+	intel_uc_fw_init_early(guc_fw, INTEL_UC_FW_TYPE_GUC);
 	guc_fw_select(guc_fw);
 }
 
@@ -122,14 +151,16 @@ static void guc_prepare_xfer(struct intel_guc *guc)
 }
 
 /* Copy RSA signature from the fw image to HW for verification */
-static void guc_xfer_rsa(struct intel_guc *guc, struct i915_vma *vma)
+static void guc_xfer_rsa(struct intel_guc *guc)
 {
 	struct drm_i915_private *dev_priv = guc_to_i915(guc);
+	struct intel_uc_fw *fw = &guc->fw;
+	struct sg_table *pages = fw->obj->mm.pages;
 	u32 rsa[UOS_RSA_SCRATCH_COUNT];
 	int i;
 
-	sg_pcopy_to_buffer(vma->pages->sgl, vma->pages->nents,
-			   rsa, sizeof(rsa), guc->fw.rsa_offset);
+	sg_pcopy_to_buffer(pages->sgl, pages->nents,
+			   rsa, sizeof(rsa), fw->rsa_offset);
 
 	for (i = 0; i < UOS_RSA_SCRATCH_COUNT; i++)
 		I915_WRITE(UOS_RSA_SCRATCH(i), rsa[i]);
@@ -201,7 +232,7 @@ static int guc_wait_ucode(struct intel_guc *guc)
  * transfer between GTT locations. This functionality is left out of the API
  * for now as there is no need for it.
  */
-static int guc_xfer_ucode(struct intel_guc *guc, struct i915_vma *vma)
+static int guc_xfer_ucode(struct intel_guc *guc)
 {
 	struct drm_i915_private *dev_priv = guc_to_i915(guc);
 	struct intel_uc_fw *guc_fw = &guc->fw;
@@ -214,7 +245,7 @@ static int guc_xfer_ucode(struct intel_guc *guc, struct i915_vma *vma)
 	I915_WRITE(DMA_COPY_SIZE, guc_fw->header_size + guc_fw->ucode_size);
 
 	/* Set the source address for the new blob */
-	offset = intel_guc_ggtt_offset(guc, vma) + guc_fw->header_offset;
+	offset = intel_uc_fw_ggtt_offset(guc_fw) + guc_fw->header_offset;
 	I915_WRITE(DMA_ADDR_0_LOW, lower_32_bits(offset));
 	I915_WRITE(DMA_ADDR_0_HIGH, upper_32_bits(offset) & 0xFFFF);
 
@@ -233,7 +264,7 @@ static int guc_xfer_ucode(struct intel_guc *guc, struct i915_vma *vma)
 /*
  * Load the GuC firmware blob into the MinuteIA.
  */
-static int guc_fw_xfer(struct intel_uc_fw *guc_fw, struct i915_vma *vma)
+static int guc_fw_xfer(struct intel_uc_fw *guc_fw)
 {
 	struct intel_guc *guc = container_of(guc_fw, struct intel_guc, fw);
 	struct drm_i915_private *dev_priv = guc_to_i915(guc);
@@ -250,9 +281,9 @@ static int guc_fw_xfer(struct intel_uc_fw *guc_fw, struct i915_vma *vma)
 	 * by the DMA engine in one operation, whereas the RSA signature is
 	 * loaded via MMIO.
 	 */
-	guc_xfer_rsa(guc, vma);
+	guc_xfer_rsa(guc);
 
-	ret = guc_xfer_ucode(guc, vma);
+	ret = guc_xfer_ucode(guc);
 
 	intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
 
diff --git a/drivers/gpu/drm/i915/intel_guc_fwif.h b/drivers/gpu/drm/i915/intel_guc_fwif.h
index b2f5148f4f17..f55f3bc8524d 100644
--- a/drivers/gpu/drm/i915/intel_guc_fwif.h
+++ b/drivers/gpu/drm/i915/intel_guc_fwif.h
@@ -39,6 +39,14 @@
 #define GUC_VIDEO_ENGINE2		4
 #define GUC_MAX_ENGINES_NUM		(GUC_VIDEO_ENGINE2 + 1)
 
+/*
+ * XXX: Beware that Gen9 firmware 32.x uses wrong definition for
+ * GUC_MAX_INSTANCES_PER_CLASS (1) but this is harmless for us now
+ * as we are not enabling GuC submission mode where this will be used
+ */
+#define GUC_MAX_ENGINE_CLASSES		5
+#define GUC_MAX_INSTANCES_PER_CLASS	4
+
 #define GUC_DOORBELL_INVALID		256
 
 #define GUC_DB_SIZE			(PAGE_SIZE)
@@ -73,44 +81,28 @@
 #define GUC_STAGE_DESC_ATTR_PCH		BIT(6)
 #define GUC_STAGE_DESC_ATTR_TERMINATED	BIT(7)
 
-/* The guc control data is 10 DWORDs */
+/* New GuC control data */
 #define GUC_CTL_CTXINFO			0
 #define   GUC_CTL_CTXNUM_IN16_SHIFT	0
 #define   GUC_CTL_BASE_ADDR_SHIFT	12
 
-#define GUC_CTL_ARAT_HIGH		1
-#define GUC_CTL_ARAT_LOW		2
-
-#define GUC_CTL_DEVICE_INFO		3
-
-#define GUC_CTL_LOG_PARAMS		4
+#define GUC_CTL_LOG_PARAMS		1
 #define   GUC_LOG_VALID			(1 << 0)
 #define   GUC_LOG_NOTIFY_ON_HALF_FULL	(1 << 1)
 #define   GUC_LOG_ALLOC_IN_MEGABYTE	(1 << 3)
 #define   GUC_LOG_CRASH_SHIFT		4
-#define   GUC_LOG_CRASH_MASK		(0x1 << GUC_LOG_CRASH_SHIFT)
+#define   GUC_LOG_CRASH_MASK		(0x3 << GUC_LOG_CRASH_SHIFT)
 #define   GUC_LOG_DPC_SHIFT		6
 #define   GUC_LOG_DPC_MASK	        (0x7 << GUC_LOG_DPC_SHIFT)
 #define   GUC_LOG_ISR_SHIFT		9
 #define   GUC_LOG_ISR_MASK	        (0x7 << GUC_LOG_ISR_SHIFT)
 #define   GUC_LOG_BUF_ADDR_SHIFT	12
 
-#define GUC_CTL_PAGE_FAULT_CONTROL	5
+#define GUC_CTL_WA			2
+#define GUC_CTL_FEATURE			3
+#define   GUC_CTL_DISABLE_SCHEDULER	(1 << 14)
 
-#define GUC_CTL_WA			6
-#define   GUC_CTL_WA_UK_BY_DRIVER	(1 << 3)
-
-#define GUC_CTL_FEATURE			7
-#define   GUC_CTL_VCS2_ENABLED		(1 << 0)
-#define   GUC_CTL_KERNEL_SUBMISSIONS	(1 << 1)
-#define   GUC_CTL_FEATURE2		(1 << 2)
-#define   GUC_CTL_POWER_GATING		(1 << 3)
-#define   GUC_CTL_DISABLE_SCHEDULER	(1 << 4)
-#define   GUC_CTL_PREEMPTION_LOG	(1 << 5)
-#define   GUC_CTL_ENABLE_SLPC		(1 << 7)
-#define   GUC_CTL_RESET_ON_PREMPT_FAILURE	(1 << 8)
-
-#define GUC_CTL_DEBUG			8
+#define GUC_CTL_DEBUG			4
 #define   GUC_LOG_VERBOSITY_SHIFT	0
 #define   GUC_LOG_VERBOSITY_LOW		(0 << GUC_LOG_VERBOSITY_SHIFT)
 #define   GUC_LOG_VERBOSITY_MED		(1 << GUC_LOG_VERBOSITY_SHIFT)
@@ -123,13 +115,10 @@
 #define	  GUC_LOG_DESTINATION_MASK	(3 << 4)
 #define   GUC_LOG_DISABLED		(1 << 6)
 #define   GUC_PROFILE_ENABLED		(1 << 7)
-#define   GUC_WQ_TRACK_ENABLED		(1 << 8)
-#define   GUC_ADS_ENABLED		(1 << 9)
-#define   GUC_LOG_DEFAULT_DISABLED	(1 << 10)
-#define   GUC_ADS_ADDR_SHIFT		11
-#define   GUC_ADS_ADDR_MASK		0xfffff800
 
-#define GUC_CTL_RSRVD			9
+#define GUC_CTL_ADS			5
+#define   GUC_ADS_ADDR_SHIFT		1
+#define   GUC_ADS_ADDR_MASK		(0xFFFFF << GUC_ADS_ADDR_SHIFT)
 
 #define GUC_CTL_MAX_DWORDS		(SOFT_SCRATCH_COUNT - 2) /* [1..14] */
 
@@ -168,11 +157,7 @@
  *    in fw. So driver will load a truncated firmware in this case.
  *
  * HuC firmware layout is same as GuC firmware.
- *
- * HuC firmware css header is different. However, the only difference is where
- * the version information is saved. The uc_css_header is unified to support
- * both. Driver should get HuC version from uc_css_header.huc_sw_version, while
- * uc_css_header.guc_sw_version for GuC.
+ * Only HuC version information is saved in a different way.
  */
 
 struct uc_css_header {
@@ -183,41 +168,27 @@ struct uc_css_header {
 	u32 header_version;
 	u32 module_id;
 	u32 module_vendor;
-	union {
-		struct {
-			u8 day;
-			u8 month;
-			u16 year;
-		};
-		u32 date;
-	};
+	u32 date;
+#define CSS_DATE_DAY			(0xFF << 0)
+#define CSS_DATE_MONTH			(0xFF << 8)
+#define CSS_DATE_YEAR			(0xFFFF << 16)
 	u32 size_dw; /* uCode plus header_size_dw */
 	u32 key_size_dw;
 	u32 modulus_size_dw;
 	u32 exponent_size_dw;
-	union {
-		struct {
-			u8 hour;
-			u8 min;
-			u16 sec;
-		};
-		u32 time;
-	};
-
+	u32 time;
+#define CSS_TIME_HOUR			(0xFF << 0)
+#define CSS_DATE_MIN			(0xFF << 8)
+#define CSS_DATE_SEC			(0xFFFF << 16)
 	char username[8];
 	char buildnumber[12];
-	union {
-		struct {
-			u32 branch_client_version;
-			u32 sw_version;
-	} guc;
-		struct {
-			u32 sw_version;
-			u32 reserved;
-	} huc;
-	};
-	u32 prod_preprod_fw;
-	u32 reserved[12];
+	u32 sw_version;
+#define CSS_SW_VERSION_GUC_MAJOR	(0xFF << 16)
+#define CSS_SW_VERSION_GUC_MINOR	(0xFF << 8)
+#define CSS_SW_VERSION_GUC_PATCH	(0xFF << 0)
+#define CSS_SW_VERSION_HUC_MAJOR	(0xFFFF << 16)
+#define CSS_SW_VERSION_HUC_MINOR	(0xFFFF << 0)
+	u32 reserved[14];
 	u32 header_info;
 } __packed;
 
@@ -384,14 +355,16 @@ struct guc_ct_buffer_desc {
  *
  * bit[4..0]	message len (in dwords)
  * bit[7..5]	reserved
- * bit[8]	write fence to desc
- * bit[9]	write status to H2G buff
- * bit[10]	send status (via G2H)
+ * bit[8]	response (G2H only)
+ * bit[8]	write fence to desc (H2G only)
+ * bit[9]	write status to H2G buff (H2G only)
+ * bit[10]	send status back via G2H (H2G only)
  * bit[15..11]	reserved
  * bit[31..16]	action code
  */
 #define GUC_CT_MSG_LEN_SHIFT			0
 #define GUC_CT_MSG_LEN_MASK			0x1F
+#define GUC_CT_MSG_IS_RESPONSE			(1 << 8)
 #define GUC_CT_MSG_WRITE_FENCE_TO_DESC		(1 << 8)
 #define GUC_CT_MSG_WRITE_STATUS_TO_BUFF		(1 << 9)
 #define GUC_CT_MSG_SEND_STATUS			(1 << 10)
@@ -423,23 +396,19 @@ struct guc_ct_buffer_desc {
 struct guc_policy {
 	/* Time for one workload to execute. (in micro seconds) */
 	u32 execution_quantum;
-	u32 reserved1;
-
 	/* Time to wait for a preemption request to completed before issuing a
 	 * reset. (in micro seconds). */
 	u32 preemption_time;
-
 	/* How much time to allow to run after the first fault is observed.
 	 * Then preempt afterwards. (in micro seconds) */
 	u32 fault_time;
-
 	u32 policy_flags;
-	u32 reserved[2];
+	u32 reserved[8];
 } __packed;
 
 struct guc_policies {
-	struct guc_policy policy[GUC_CLIENT_PRIORITY_NUM][GUC_MAX_ENGINES_NUM];
-
+	struct guc_policy policy[GUC_CLIENT_PRIORITY_NUM][GUC_MAX_ENGINE_CLASSES];
+	u32 submission_queue_depth[GUC_MAX_ENGINE_CLASSES];
 	/* In micro seconds. How much time to allow before DPC processing is
 	 * called back via interrupt (to prevent DPC queue drain starving).
 	 * Typically 1000s of micro seconds (example only, not granularity). */
@@ -452,57 +421,73 @@ struct guc_policies {
 	 * idle. */
 	u32 max_num_work_items;
 
-	u32 reserved[19];
+	u32 reserved[4];
 } __packed;
 
 /* GuC MMIO reg state struct */
 
-#define GUC_REGSET_FLAGS_NONE		0x0
-#define GUC_REGSET_POWERCYCLE		0x1
-#define GUC_REGSET_MASKED		0x2
-#define GUC_REGSET_ENGINERESET		0x4
-#define GUC_REGSET_SAVE_DEFAULT_VALUE	0x8
-#define GUC_REGSET_SAVE_CURRENT_VALUE	0x10
 
-#define GUC_REGSET_MAX_REGISTERS	25
-#define GUC_MMIO_WHITE_LIST_START	0x24d0
-#define GUC_MMIO_WHITE_LIST_MAX		12
+#define GUC_REGSET_MAX_REGISTERS	64
 #define GUC_S3_SAVE_SPACE_PAGES		10
 
-struct guc_mmio_regset {
-	struct __packed {
-		u32 offset;
-		u32 value;
-		u32 flags;
-	} registers[GUC_REGSET_MAX_REGISTERS];
+struct guc_mmio_reg {
+	u32 offset;
+	u32 value;
+	u32 flags;
+#define GUC_REGSET_MASKED		(1 << 0)
+} __packed;
 
+struct guc_mmio_regset {
+	struct guc_mmio_reg registers[GUC_REGSET_MAX_REGISTERS];
 	u32 values_valid;
 	u32 number_of_registers;
 } __packed;
 
-/* MMIO registers that are set as non privileged */
-struct mmio_white_list {
-	u32 mmio_start;
-	u32 offsets[GUC_MMIO_WHITE_LIST_MAX];
-	u32 count;
+/* GuC register sets */
+struct guc_mmio_reg_state {
+	struct guc_mmio_regset engine_reg[GUC_MAX_ENGINE_CLASSES][GUC_MAX_INSTANCES_PER_CLASS];
+	u32 reserved[98];
 } __packed;
 
-struct guc_mmio_reg_state {
-	struct guc_mmio_regset global_reg;
-	struct guc_mmio_regset engine_reg[GUC_MAX_ENGINES_NUM];
-	struct mmio_white_list white_list[GUC_MAX_ENGINES_NUM];
+/* HW info */
+struct guc_gt_system_info {
+	u32 slice_enabled;
+	u32 rcs_enabled;
+	u32 reserved0;
+	u32 bcs_enabled;
+	u32 vdbox_enable_mask;
+	u32 vdbox_sfc_support_mask;
+	u32 vebox_enable_mask;
+	u32 reserved[9];
 } __packed;
 
-/* GuC Additional Data Struct */
+/* Clients info */
+struct guc_ct_pool_entry {
+	struct guc_ct_buffer_desc desc;
+	u32 reserved[7];
+} __packed;
+
+#define GUC_CT_POOL_SIZE	2
 
+struct guc_clients_info {
+	u32 clients_num;
+	u32 reserved0[13];
+	u32 ct_pool_addr;
+	u32 ct_pool_count;
+	u32 reserved[4];
+} __packed;
+
+/* GuC Additional Data Struct */
 struct guc_ads {
 	u32 reg_state_addr;
 	u32 reg_state_buffer;
-	u32 golden_context_lrca;
 	u32 scheduler_policies;
-	u32 reserved0[3];
-	u32 eng_state_size[GUC_MAX_ENGINES_NUM];
-	u32 reserved2[4];
+	u32 gt_system_info;
+	u32 clients_info;
+	u32 control_data;
+	u32 golden_context_lrca[GUC_MAX_ENGINE_CLASSES];
+	u32 eng_state_size[GUC_MAX_ENGINE_CLASSES];
+	u32 reserved[16];
 } __packed;
 
 /* GuC logging structures */
@@ -515,6 +500,8 @@ enum guc_log_buffer_type {
 };
 
 /**
+ * struct guc_log_buffer_state - GuC log buffer state
+ *
  * Below state structure is used for coordination of retrieval of GuC firmware
  * logs. Separate state is maintained for each log buffer type.
  * read_ptr points to the location where i915 read last in log buffer and
@@ -646,7 +633,6 @@ enum intel_guc_action {
 	INTEL_GUC_ACTION_DEFAULT = 0x0,
 	INTEL_GUC_ACTION_REQUEST_PREEMPTION = 0x2,
 	INTEL_GUC_ACTION_REQUEST_ENGINE_RESET = 0x3,
-	INTEL_GUC_ACTION_SAMPLE_FORCEWAKE = 0x6,
 	INTEL_GUC_ACTION_ALLOCATE_DOORBELL = 0x10,
 	INTEL_GUC_ACTION_DEALLOCATE_DOORBELL = 0x20,
 	INTEL_GUC_ACTION_LOG_BUFFER_FILE_FLUSH_COMPLETE = 0x30,
@@ -654,6 +640,7 @@ enum intel_guc_action {
 	INTEL_GUC_ACTION_ENTER_S_STATE = 0x501,
 	INTEL_GUC_ACTION_EXIT_S_STATE = 0x502,
 	INTEL_GUC_ACTION_SLPC_REQUEST = 0x3003,
+	INTEL_GUC_ACTION_SAMPLE_FORCEWAKE = 0x3005,
 	INTEL_GUC_ACTION_AUTHENTICATE_HUC = 0x4000,
 	INTEL_GUC_ACTION_REGISTER_COMMAND_TRANSPORT_BUFFER = 0x4505,
 	INTEL_GUC_ACTION_DEREGISTER_COMMAND_TRANSPORT_BUFFER = 0x4506,
@@ -674,9 +661,9 @@ enum intel_guc_report_status {
 };
 
 enum intel_guc_sleep_state_status {
-	INTEL_GUC_SLEEP_STATE_SUCCESS = 0x0,
-	INTEL_GUC_SLEEP_STATE_PREEMPT_TO_IDLE_FAILED = 0x1,
-	INTEL_GUC_SLEEP_STATE_ENGINE_RESET_FAILED = 0x2
+	INTEL_GUC_SLEEP_STATE_SUCCESS = 0x1,
+	INTEL_GUC_SLEEP_STATE_PREEMPT_TO_IDLE_FAILED = 0x2,
+	INTEL_GUC_SLEEP_STATE_ENGINE_RESET_FAILED = 0x3
 #define INTEL_GUC_SLEEP_STATE_INVALID_MASK 0x80000000
 };
 
diff --git a/drivers/gpu/drm/i915/intel_guc_log.c b/drivers/gpu/drm/i915/intel_guc_log.c
index 7146524264dd..e3b83ecb90b5 100644
--- a/drivers/gpu/drm/i915/intel_guc_log.c
+++ b/drivers/gpu/drm/i915/intel_guc_log.c
@@ -208,7 +208,9 @@ static bool guc_check_log_buf_overflow(struct intel_guc_log *log,
 			/* buffer_full_cnt is a 4 bit counter */
 			log->stats[type].sampled_overflow += 16;
 		}
-		DRM_ERROR_RATELIMITED("GuC log buffer overflow\n");
+
+		dev_notice_ratelimited(guc_to_i915(log_to_guc(log))->drm.dev,
+				       "GuC log buffer overflow\n");
 	}
 
 	return overflow;
@@ -343,32 +345,21 @@ static void capture_logs_work(struct work_struct *work)
 
 static int guc_log_map(struct intel_guc_log *log)
 {
-	struct intel_guc *guc = log_to_guc(log);
-	struct drm_i915_private *dev_priv = guc_to_i915(guc);
 	void *vaddr;
-	int ret;
 
 	lockdep_assert_held(&log->relay.lock);
 
 	if (!log->vma)
 		return -ENODEV;
 
-	mutex_lock(&dev_priv->drm.struct_mutex);
-	ret = i915_gem_object_set_to_wc_domain(log->vma->obj, true);
-	mutex_unlock(&dev_priv->drm.struct_mutex);
-	if (ret)
-		return ret;
-
 	/*
 	 * Create a WC (Uncached for read) vmalloc mapping of log
 	 * buffer pages, so that we can directly get the data
 	 * (up-to-date) from memory.
 	 */
 	vaddr = i915_gem_object_pin_map(log->vma->obj, I915_MAP_WC);
-	if (IS_ERR(vaddr)) {
-		DRM_ERROR("Couldn't map log buffer pages %d\n", ret);
+	if (IS_ERR(vaddr))
 		return PTR_ERR(vaddr);
-	}
 
 	log->relay.buf_addr = vaddr;
 
@@ -447,7 +438,7 @@ static void guc_log_capture_logs(struct intel_guc_log *log)
 	 * Generally device is expected to be active only at this
 	 * time, so get/put should be really quick.
 	 */
-	with_intel_runtime_pm(dev_priv, wakeref)
+	with_intel_runtime_pm(&dev_priv->runtime_pm, wakeref)
 		guc_action_flush_log_complete(guc);
 }
 
@@ -526,7 +517,7 @@ int intel_guc_log_set_level(struct intel_guc_log *log, u32 level)
 	if (log->level == level)
 		goto out_unlock;
 
-	with_intel_runtime_pm(dev_priv, wakeref)
+	with_intel_runtime_pm(&dev_priv->runtime_pm, wakeref)
 		ret = guc_action_control_log(guc,
 					     GUC_LOG_LEVEL_IS_VERBOSE(level),
 					     GUC_LOG_LEVEL_IS_ENABLED(level),
@@ -611,7 +602,7 @@ void intel_guc_log_relay_flush(struct intel_guc_log *log)
 	 */
 	flush_work(&log->relay.flush_work);
 
-	with_intel_runtime_pm(i915, wakeref)
+	with_intel_runtime_pm(&i915->runtime_pm, wakeref)
 		guc_action_flush_log(guc);
 
 	/* GuC would have updated log buffer by now, so capture it */
diff --git a/drivers/gpu/drm/i915/intel_guc_reg.h b/drivers/gpu/drm/i915/intel_guc_reg.h
index 57e7ad522c2f..a214f8b71929 100644
--- a/drivers/gpu/drm/i915/intel_guc_reg.h
+++ b/drivers/gpu/drm/i915/intel_guc_reg.h
@@ -51,6 +51,9 @@
 #define SOFT_SCRATCH(n)			_MMIO(0xc180 + (n) * 4)
 #define SOFT_SCRATCH_COUNT		16
 
+#define GEN11_SOFT_SCRATCH(n)		_MMIO(0x190240 + (n) * 4)
+#define GEN11_SOFT_SCRATCH_COUNT	4
+
 #define UOS_RSA_SCRATCH(i)		_MMIO(0xc200 + (i) * 4)
 #define UOS_RSA_SCRATCH_COUNT		64
 
@@ -76,6 +79,9 @@
 #define HUC_STATUS2             _MMIO(0xD3B0)
 #define   HUC_FW_VERIFIED       (1<<7)
 
+#define GEN11_HUC_KERNEL_LOAD_INFO	_MMIO(0xC1DC)
+#define   HUC_LOAD_SUCCESSFUL		  (1 << 0)
+
 #define GUC_WOPCM_SIZE			_MMIO(0xc050)
 #define   GUC_WOPCM_SIZE_LOCKED		  (1<<0)
 #define   GUC_WOPCM_SIZE_SHIFT		12
@@ -103,6 +109,7 @@
 
 #define GUC_SEND_INTERRUPT		_MMIO(0xc4c8)
 #define   GUC_SEND_TRIGGER		  (1<<0)
+#define GEN11_GUC_HOST_INTERRUPT	_MMIO(0x1901f0)
 
 #define GUC_NUM_DOORBELLS		256
 
@@ -127,4 +134,22 @@ struct guc_doorbell_info {
 #define GUC_WD_VECS_IER			_MMIO(0xC558)
 #define GUC_PM_P24C_IER			_MMIO(0xC55C)
 
+/* GuC Interrupt Vector */
+#define GEN11_GUC_INTR_GUC2HOST		(1 << 15)
+#define GEN11_GUC_INTR_EXEC_ERROR	(1 << 14)
+#define GEN11_GUC_INTR_DISPLAY_EVENT	(1 << 13)
+#define GEN11_GUC_INTR_SEM_SIG		(1 << 12)
+#define GEN11_GUC_INTR_IOMMU2GUC	(1 << 11)
+#define GEN11_GUC_INTR_DOORBELL_RANG	(1 << 10)
+#define GEN11_GUC_INTR_DMA_DONE		(1 <<  9)
+#define GEN11_GUC_INTR_FATAL_ERROR	(1 <<  8)
+#define GEN11_GUC_INTR_NOTIF_ERROR	(1 <<  7)
+#define GEN11_GUC_INTR_SW_INT_6		(1 <<  6)
+#define GEN11_GUC_INTR_SW_INT_5		(1 <<  5)
+#define GEN11_GUC_INTR_SW_INT_4		(1 <<  4)
+#define GEN11_GUC_INTR_SW_INT_3		(1 <<  3)
+#define GEN11_GUC_INTR_SW_INT_2		(1 <<  2)
+#define GEN11_GUC_INTR_SW_INT_1		(1 <<  1)
+#define GEN11_GUC_INTR_SW_INT_0		(1 <<  0)
+
 #endif
diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
index 46cd0e70aecb..db531ebc7704 100644
--- a/drivers/gpu/drm/i915/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/intel_guc_submission.c
@@ -24,8 +24,12 @@
 
 #include <linux/circ_buf.h>
 
+#include "gt/intel_engine_pm.h"
+#include "gt/intel_lrc_reg.h"
+#include "gt/intel_context.h"
+#include "gem/i915_gem_context.h"
+
 #include "intel_guc_submission.h"
-#include "intel_lrc_reg.h"
 #include "i915_drv.h"
 
 #define GUC_PREEMPT_FINISHED		0x1
@@ -362,11 +366,10 @@ static void guc_stage_desc_pool_destroy(struct intel_guc *guc)
 static void guc_stage_desc_init(struct intel_guc_client *client)
 {
 	struct intel_guc *guc = client->guc;
-	struct drm_i915_private *dev_priv = guc_to_i915(guc);
-	struct intel_engine_cs *engine;
 	struct i915_gem_context *ctx = client->owner;
+	struct i915_gem_engines_iter it;
 	struct guc_stage_desc *desc;
-	unsigned int tmp;
+	struct intel_context *ce;
 	u32 gfx_addr;
 
 	desc = __get_stage_desc(client);
@@ -380,10 +383,11 @@ static void guc_stage_desc_init(struct intel_guc_client *client)
 	desc->priority = client->priority;
 	desc->db_id = client->doorbell_id;
 
-	for_each_engine_masked(engine, dev_priv, client->engines, tmp) {
-		struct intel_context *ce = intel_context_lookup(ctx, engine);
-		u32 guc_engine_id = engine->guc_id;
-		struct guc_execlist_context *lrc = &desc->lrc[guc_engine_id];
+	for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) {
+		struct guc_execlist_context *lrc;
+
+		if (!(ce->engine->mask & client->engines))
+			continue;
 
 		/* TODO: We have a design issue to be solved here. Only when we
 		 * receive the first batch, we know which engine is used by the
@@ -392,7 +396,7 @@ static void guc_stage_desc_init(struct intel_guc_client *client)
 		 * for now who owns a GuC client. But for future owner of GuC
 		 * client, need to make sure lrc is pinned prior to enter here.
 		 */
-		if (!ce || !ce->state)
+		if (!ce->state)
 			break;	/* XXX: continue? */
 
 		/*
@@ -402,6 +406,7 @@ static void guc_stage_desc_init(struct intel_guc_client *client)
 		 * Instead, the GuC uses the LRCA of the user mode context (see
 		 * guc_add_request below).
 		 */
+		lrc = &desc->lrc[ce->engine->guc_id];
 		lrc->context_desc = lower_32_bits(ce->lrc_desc);
 
 		/* The state page is after PPHWSP */
@@ -412,15 +417,16 @@ static void guc_stage_desc_init(struct intel_guc_client *client)
 		 * here. In proxy submission, it wants the stage id
 		 */
 		lrc->context_id = (client->stage_id << GUC_ELC_CTXID_OFFSET) |
-				(guc_engine_id << GUC_ELC_ENGINE_OFFSET);
+				(ce->engine->guc_id << GUC_ELC_ENGINE_OFFSET);
 
 		lrc->ring_begin = intel_guc_ggtt_offset(guc, ce->ring->vma);
 		lrc->ring_end = lrc->ring_begin + ce->ring->size - 1;
 		lrc->ring_next_free_location = lrc->ring_begin;
 		lrc->ring_current_tail_pointer_value = 0;
 
-		desc->engines_used |= (1 << guc_engine_id);
+		desc->engines_used |= BIT(ce->engine->guc_id);
 	}
+	i915_gem_context_unlock_engines(ctx);
 
 	DRM_DEBUG_DRIVER("Host engines 0x%x => GuC engines used 0x%x\n",
 			 client->engines, desc->engines_used);
@@ -551,10 +557,10 @@ static void guc_add_request(struct intel_guc *guc, struct i915_request *rq)
  */
 static void flush_ggtt_writes(struct i915_vma *vma)
 {
-	struct drm_i915_private *dev_priv = vma->vm->i915;
+	struct drm_i915_private *i915 = vma->vm->i915;
 
 	if (i915_vma_is_map_and_fenceable(vma))
-		POSTING_READ_FW(GUC_STATUS);
+		intel_uncore_posting_read_fw(&i915->uncore, GUC_STATUS);
 }
 
 static void inject_preempt_context(struct work_struct *work)
@@ -734,7 +740,7 @@ static bool __guc_dequeue(struct intel_engine_cs *engine)
 	bool submit = false;
 	struct rb_node *rb;
 
-	lockdep_assert_held(&engine->timeline.lock);
+	lockdep_assert_held(&engine->active.lock);
 
 	if (port_isset(port)) {
 		if (intel_engine_has_preemption(engine)) {
@@ -742,7 +748,8 @@ static bool __guc_dequeue(struct intel_engine_cs *engine)
 				&engine->i915->guc.preempt_work[engine->id];
 			int prio = execlists->queue_priority_hint;
 
-			if (__execlists_need_preempt(prio, port_prio(port))) {
+			if (i915_scheduler_need_preempt(prio,
+							port_prio(port))) {
 				execlists_set_active(execlists,
 						     EXECLISTS_ACTIVE_PREEMPT);
 				queue_work(engine->i915->guc.preempt_wq,
@@ -815,7 +822,7 @@ static void guc_submission_tasklet(unsigned long data)
 	struct i915_request *rq;
 	unsigned long flags;
 
-	spin_lock_irqsave(&engine->timeline.lock, flags);
+	spin_lock_irqsave(&engine->active.lock, flags);
 
 	rq = port_request(port);
 	while (rq && i915_request_completed(rq)) {
@@ -840,7 +847,7 @@ static void guc_submission_tasklet(unsigned long data)
 	if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT))
 		guc_dequeue(engine);
 
-	spin_unlock_irqrestore(&engine->timeline.lock, flags);
+	spin_unlock_irqrestore(&engine->active.lock, flags);
 }
 
 static void guc_reset_prepare(struct intel_engine_cs *engine)
@@ -877,7 +884,7 @@ static void guc_reset(struct intel_engine_cs *engine, bool stalled)
 	struct i915_request *rq;
 	unsigned long flags;
 
-	spin_lock_irqsave(&engine->timeline.lock, flags);
+	spin_lock_irqsave(&engine->active.lock, flags);
 
 	execlists_cancel_port_requests(execlists);
 
@@ -893,7 +900,7 @@ static void guc_reset(struct intel_engine_cs *engine, bool stalled)
 	intel_lr_context_reset(engine, rq->hw_context, rq->head, stalled);
 
 out_unlock:
-	spin_unlock_irqrestore(&engine->timeline.lock, flags);
+	spin_unlock_irqrestore(&engine->active.lock, flags);
 }
 
 static void guc_cancel_requests(struct intel_engine_cs *engine)
@@ -919,13 +926,13 @@ static void guc_cancel_requests(struct intel_engine_cs *engine)
 	 * submission's irq state, we also wish to remind ourselves that
 	 * it is irq state.)
 	 */
-	spin_lock_irqsave(&engine->timeline.lock, flags);
+	spin_lock_irqsave(&engine->active.lock, flags);
 
 	/* Cancel the requests on the HW and clear the ELSP tracker. */
 	execlists_cancel_port_requests(execlists);
 
 	/* Mark all executing requests as skipped. */
-	list_for_each_entry(rq, &engine->timeline.requests, link) {
+	list_for_each_entry(rq, &engine->active.requests, sched.link) {
 		if (!i915_request_signaled(rq))
 			dma_fence_set_error(&rq->fence, -EIO);
 
@@ -954,7 +961,7 @@ static void guc_cancel_requests(struct intel_engine_cs *engine)
 	execlists->queue = RB_ROOT_CACHED;
 	GEM_BUG_ON(port_isset(execlists->port));
 
-	spin_unlock_irqrestore(&engine->timeline.lock, flags);
+	spin_unlock_irqrestore(&engine->active.lock, flags);
 }
 
 static void guc_reset_finish(struct intel_engine_cs *engine)
@@ -1194,7 +1201,7 @@ static void __guc_client_disable(struct intel_guc_client *client)
 	 * the case, instead of trying (in vain) to communicate with it, let's
 	 * just cleanup the doorbell HW and our internal state.
 	 */
-	if (intel_guc_is_alive(client->guc))
+	if (intel_guc_is_loaded(client->guc))
 		destroy_doorbell(client);
 	else
 		__fini_doorbell(client);
@@ -1299,7 +1306,7 @@ static void guc_interrupts_capture(struct drm_i915_private *dev_priv)
 	 */
 	irqs = _MASKED_BIT_ENABLE(GFX_INTERRUPT_STEERING);
 	for_each_engine(engine, dev_priv, id)
-		I915_WRITE(RING_MODE_GEN7(engine), irqs);
+		ENGINE_WRITE(engine, RING_MODE_GEN7, irqs);
 
 	/* route USER_INTERRUPT to Host, all others are sent to GuC. */
 	irqs = GT_RENDER_USER_INTERRUPT << GEN8_RCS_IRQ_SHIFT |
@@ -1346,7 +1353,7 @@ static void guc_interrupts_release(struct drm_i915_private *dev_priv)
 	irqs = _MASKED_FIELD(GFX_FORWARD_VBLANK_MASK, GFX_FORWARD_VBLANK_NEVER);
 	irqs |= _MASKED_BIT_DISABLE(GFX_INTERRUPT_STEERING);
 	for_each_engine(engine, dev_priv, id)
-		I915_WRITE(RING_MODE_GEN7(engine), irqs);
+		ENGINE_WRITE(engine, RING_MODE_GEN7, irqs);
 
 	/* route all GT interrupts to the host */
 	I915_WRITE(GUC_BCS_RCS_IER, 0);
@@ -1359,6 +1366,7 @@ static void guc_interrupts_release(struct drm_i915_private *dev_priv)
 
 static void guc_submission_park(struct intel_engine_cs *engine)
 {
+	intel_engine_park(engine);
 	intel_engine_unpin_breadcrumbs_irq(engine);
 	engine->flags &= ~I915_ENGINE_NEEDS_BREADCRUMB_TASKLET;
 }
@@ -1420,10 +1428,6 @@ int intel_guc_submission_enable(struct intel_guc *guc)
 
 	GEM_BUG_ON(!guc->execbuf_client);
 
-	err = intel_guc_sample_forcewake(guc);
-	if (err)
-		return err;
-
 	err = guc_clients_enable(guc);
 	if (err)
 		return err;
diff --git a/drivers/gpu/drm/i915/intel_guc_submission.h b/drivers/gpu/drm/i915/intel_guc_submission.h
index aa5e6749c925..7d823a513b9c 100644
--- a/drivers/gpu/drm/i915/intel_guc_submission.h
+++ b/drivers/gpu/drm/i915/intel_guc_submission.h
@@ -27,9 +27,10 @@
 
 #include <linux/spinlock.h>
 
+#include "gt/intel_engine_types.h"
+
 #include "i915_gem.h"
 #include "i915_selftest.h"
-#include "intel_engine_types.h"
 
 struct drm_i915_private;
 
diff --git a/drivers/gpu/drm/i915/intel_huc.c b/drivers/gpu/drm/i915/intel_huc.c
index 94c04f16a2ad..fb6f693d3cac 100644
--- a/drivers/gpu/drm/i915/intel_huc.c
+++ b/drivers/gpu/drm/i915/intel_huc.c
@@ -29,7 +29,19 @@
 
 void intel_huc_init_early(struct intel_huc *huc)
 {
+	struct drm_i915_private *i915 = huc_to_i915(huc);
+
 	intel_huc_fw_init_early(huc);
+
+	if (INTEL_GEN(i915) >= 11) {
+		huc->status.reg = GEN11_HUC_KERNEL_LOAD_INFO;
+		huc->status.mask = HUC_LOAD_SUCCESSFUL;
+		huc->status.value = HUC_LOAD_SUCCESSFUL;
+	} else {
+		huc->status.reg = HUC_STATUS2;
+		huc->status.mask = HUC_FW_VERIFIED;
+		huc->status.value = HUC_FW_VERIFIED;
+	}
 }
 
 int intel_huc_init_misc(struct intel_huc *huc)
@@ -40,6 +52,61 @@ int intel_huc_init_misc(struct intel_huc *huc)
 	return 0;
 }
 
+static int intel_huc_rsa_data_create(struct intel_huc *huc)
+{
+	struct drm_i915_private *i915 = huc_to_i915(huc);
+	struct intel_guc *guc = &i915->guc;
+	struct i915_vma *vma;
+	void *vaddr;
+
+	/*
+	 * HuC firmware will sit above GUC_GGTT_TOP and will not map
+	 * through GTT. Unfortunately, this means GuC cannot perform
+	 * the HuC auth. as the rsa offset now falls within the GuC
+	 * inaccessible range. We resort to perma-pinning an additional
+	 * vma within the accessible range that only contains the rsa
+	 * signature. The GuC can use this extra pinning to perform
+	 * the authentication since its GGTT offset will be GuC
+	 * accessible.
+	 */
+	vma = intel_guc_allocate_vma(guc, PAGE_SIZE);
+	if (IS_ERR(vma))
+		return PTR_ERR(vma);
+
+	vaddr = i915_gem_object_pin_map(vma->obj, I915_MAP_WB);
+	if (IS_ERR(vaddr)) {
+		i915_vma_unpin_and_release(&vma, 0);
+		return PTR_ERR(vaddr);
+	}
+
+	huc->rsa_data = vma;
+	huc->rsa_data_vaddr = vaddr;
+
+	return 0;
+}
+
+static void intel_huc_rsa_data_destroy(struct intel_huc *huc)
+{
+	i915_vma_unpin_and_release(&huc->rsa_data, I915_VMA_RELEASE_MAP);
+}
+
+int intel_huc_init(struct intel_huc *huc)
+{
+	int err;
+
+	err = intel_huc_rsa_data_create(huc);
+	if (err)
+		return err;
+
+	return intel_uc_fw_init(&huc->fw);
+}
+
+void intel_huc_fini(struct intel_huc *huc)
+{
+	intel_uc_fw_fini(&huc->fw);
+	intel_huc_rsa_data_destroy(huc);
+}
+
 /**
  * intel_huc_auth() - Authenticate HuC uCode
  * @huc: intel_huc structure
@@ -55,45 +122,31 @@ int intel_huc_auth(struct intel_huc *huc)
 {
 	struct drm_i915_private *i915 = huc_to_i915(huc);
 	struct intel_guc *guc = &i915->guc;
-	struct i915_vma *vma;
-	u32 status;
 	int ret;
 
 	if (huc->fw.load_status != INTEL_UC_FIRMWARE_SUCCESS)
 		return -ENOEXEC;
 
-	vma = i915_gem_object_ggtt_pin(huc->fw.obj, NULL, 0, 0,
-				       PIN_OFFSET_BIAS | i915->ggtt.pin_bias);
-	if (IS_ERR(vma)) {
-		ret = PTR_ERR(vma);
-		DRM_ERROR("HuC: Failed to pin huc fw object %d\n", ret);
-		goto fail;
-	}
-
 	ret = intel_guc_auth_huc(guc,
-				 intel_guc_ggtt_offset(guc, vma) +
-				 huc->fw.rsa_offset);
+				 intel_guc_ggtt_offset(guc, huc->rsa_data));
 	if (ret) {
 		DRM_ERROR("HuC: GuC did not ack Auth request %d\n", ret);
-		goto fail_unpin;
+		goto fail;
 	}
 
 	/* Check authentication status, it should be done by now */
 	ret = __intel_wait_for_register(&i915->uncore,
-					HUC_STATUS2,
-					HUC_FW_VERIFIED,
-					HUC_FW_VERIFIED,
-					2, 50, &status);
+					huc->status.reg,
+					huc->status.mask,
+					huc->status.value,
+					2, 50, NULL);
 	if (ret) {
-		DRM_ERROR("HuC: Firmware not verified %#x\n", status);
-		goto fail_unpin;
+		DRM_ERROR("HuC: Firmware not verified %d\n", ret);
+		goto fail;
 	}
 
-	i915_vma_unpin(vma);
 	return 0;
 
-fail_unpin:
-	i915_vma_unpin(vma);
 fail:
 	huc->fw.load_status = INTEL_UC_FIRMWARE_FAIL;
 
@@ -121,8 +174,9 @@ int intel_huc_check_status(struct intel_huc *huc)
 	if (!HAS_HUC(dev_priv))
 		return -ENODEV;
 
-	with_intel_runtime_pm(dev_priv, wakeref)
-		status = I915_READ(HUC_STATUS2) & HUC_FW_VERIFIED;
+	with_intel_runtime_pm(&dev_priv->runtime_pm, wakeref)
+		status = (I915_READ(huc->status.reg) & huc->status.mask) ==
+			  huc->status.value;
 
 	return status;
 }
diff --git a/drivers/gpu/drm/i915/intel_huc.h b/drivers/gpu/drm/i915/intel_huc.h
index 7e41d870b509..2a6c94e79f17 100644
--- a/drivers/gpu/drm/i915/intel_huc.h
+++ b/drivers/gpu/drm/i915/intel_huc.h
@@ -25,6 +25,7 @@
 #ifndef _INTEL_HUC_H_
 #define _INTEL_HUC_H_
 
+#include "i915_reg.h"
 #include "intel_uc_fw.h"
 #include "intel_huc_fw.h"
 
@@ -33,16 +34,26 @@ struct intel_huc {
 	struct intel_uc_fw fw;
 
 	/* HuC-specific additions */
+	struct i915_vma *rsa_data;
+	void *rsa_data_vaddr;
+
+	struct {
+		i915_reg_t reg;
+		u32 mask;
+		u32 value;
+	} status;
 };
 
 void intel_huc_init_early(struct intel_huc *huc);
 int intel_huc_init_misc(struct intel_huc *huc);
+int intel_huc_init(struct intel_huc *huc);
+void intel_huc_fini(struct intel_huc *huc);
 int intel_huc_auth(struct intel_huc *huc);
 int intel_huc_check_status(struct intel_huc *huc);
 
 static inline void intel_huc_fini_misc(struct intel_huc *huc)
 {
-	intel_uc_fw_fini(&huc->fw);
+	intel_uc_fw_cleanup_fetch(&huc->fw);
 }
 
 static inline int intel_huc_sanitize(struct intel_huc *huc)
diff --git a/drivers/gpu/drm/i915/intel_huc_fw.c b/drivers/gpu/drm/i915/intel_huc_fw.c
index 68d47c105939..05cbf8338f53 100644
--- a/drivers/gpu/drm/i915/intel_huc_fw.c
+++ b/drivers/gpu/drm/i915/intel_huc_fw.c
@@ -34,6 +34,14 @@
 #define KBL_HUC_FW_MINOR 00
 #define KBL_BLD_NUM 1810
 
+#define GLK_HUC_FW_MAJOR 03
+#define GLK_HUC_FW_MINOR 01
+#define GLK_BLD_NUM 2893
+
+#define ICL_HUC_FW_MAJOR 8
+#define ICL_HUC_FW_MINOR 4
+#define ICL_BLD_NUM 3238
+
 #define HUC_FW_PATH(platform, major, minor, bld_num) \
 	"i915/" __stringify(platform) "_huc_ver" __stringify(major) "_" \
 	__stringify(minor) "_" __stringify(bld_num) ".bin"
@@ -50,6 +58,14 @@ MODULE_FIRMWARE(I915_BXT_HUC_UCODE);
 	KBL_HUC_FW_MINOR, KBL_BLD_NUM)
 MODULE_FIRMWARE(I915_KBL_HUC_UCODE);
 
+#define I915_GLK_HUC_UCODE HUC_FW_PATH(glk, GLK_HUC_FW_MAJOR, \
+	GLK_HUC_FW_MINOR, GLK_BLD_NUM)
+MODULE_FIRMWARE(I915_GLK_HUC_UCODE);
+
+#define I915_ICL_HUC_UCODE HUC_FW_PATH(icl, ICL_HUC_FW_MAJOR, \
+	ICL_HUC_FW_MINOR, ICL_BLD_NUM)
+MODULE_FIRMWARE(I915_ICL_HUC_UCODE);
+
 static void huc_fw_select(struct intel_uc_fw *huc_fw)
 {
 	struct intel_huc *huc = container_of(huc_fw, struct intel_huc, fw);
@@ -76,6 +92,14 @@ static void huc_fw_select(struct intel_uc_fw *huc_fw)
 		huc_fw->path = I915_KBL_HUC_UCODE;
 		huc_fw->major_ver_wanted = KBL_HUC_FW_MAJOR;
 		huc_fw->minor_ver_wanted = KBL_HUC_FW_MINOR;
+	} else if (IS_GEMINILAKE(dev_priv)) {
+		huc_fw->path = I915_GLK_HUC_UCODE;
+		huc_fw->major_ver_wanted = GLK_HUC_FW_MAJOR;
+		huc_fw->minor_ver_wanted = GLK_HUC_FW_MINOR;
+	} else if (IS_ICELAKE(dev_priv)) {
+		huc_fw->path = I915_ICL_HUC_UCODE;
+		huc_fw->major_ver_wanted = ICL_HUC_FW_MAJOR;
+		huc_fw->minor_ver_wanted = ICL_HUC_FW_MINOR;
 	}
 }
 
@@ -89,22 +113,28 @@ void intel_huc_fw_init_early(struct intel_huc *huc)
 {
 	struct intel_uc_fw *huc_fw = &huc->fw;
 
-	intel_uc_fw_init(huc_fw, INTEL_UC_FW_TYPE_HUC);
+	intel_uc_fw_init_early(huc_fw, INTEL_UC_FW_TYPE_HUC);
 	huc_fw_select(huc_fw);
 }
 
-/**
- * huc_fw_xfer() - DMA's the firmware
- * @huc_fw: the firmware descriptor
- * @vma: the firmware image (bound into the GGTT)
- *
- * Transfer the firmware image to RAM for execution by the microcontroller.
- *
- * Return: 0 on success, non-zero on failure
- */
-static int huc_fw_xfer(struct intel_uc_fw *huc_fw, struct i915_vma *vma)
+static void huc_xfer_rsa(struct intel_huc *huc)
 {
-	struct intel_huc *huc = container_of(huc_fw, struct intel_huc, fw);
+	struct intel_uc_fw *fw = &huc->fw;
+	struct sg_table *pages = fw->obj->mm.pages;
+
+	/*
+	 * HuC firmware image is outside GuC accessible range.
+	 * Copy the RSA signature out of the image into
+	 * the perma-pinned region set aside for it
+	 */
+	sg_pcopy_to_buffer(pages->sgl, pages->nents,
+			   huc->rsa_data_vaddr, fw->rsa_size,
+			   fw->rsa_offset);
+}
+
+static int huc_xfer_ucode(struct intel_huc *huc)
+{
+	struct intel_uc_fw *huc_fw = &huc->fw;
 	struct drm_i915_private *dev_priv = huc_to_i915(huc);
 	struct intel_uncore *uncore = &dev_priv->uncore;
 	unsigned long offset = 0;
@@ -116,7 +146,7 @@ static int huc_fw_xfer(struct intel_uc_fw *huc_fw, struct i915_vma *vma)
 	intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
 
 	/* Set the source address for the uCode */
-	offset = intel_guc_ggtt_offset(&dev_priv->guc, vma) +
+	offset = intel_uc_fw_ggtt_offset(huc_fw) +
 		 huc_fw->header_offset;
 	intel_uncore_write(uncore, DMA_ADDR_0_LOW,
 			   lower_32_bits(offset));
@@ -151,6 +181,23 @@ static int huc_fw_xfer(struct intel_uc_fw *huc_fw, struct i915_vma *vma)
 }
 
 /**
+ * huc_fw_xfer() - DMA's the firmware
+ * @huc_fw: the firmware descriptor
+ *
+ * Transfer the firmware image to RAM for execution by the microcontroller.
+ *
+ * Return: 0 on success, non-zero on failure
+ */
+static int huc_fw_xfer(struct intel_uc_fw *huc_fw)
+{
+	struct intel_huc *huc = container_of(huc_fw, struct intel_huc, fw);
+
+	huc_xfer_rsa(huc);
+
+	return huc_xfer_ucode(huc);
+}
+
+/**
  * intel_huc_fw_upload() - load HuC uCode to device
  * @huc: intel_huc structure
  *
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 44be676fabd6..d9a7a13ce32a 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -33,11 +33,15 @@
 #include <drm/drm_fourcc.h>
 #include <drm/drm_plane_helper.h>
 
+#include "display/intel_atomic.h"
+#include "display/intel_fbc.h"
+#include "display/intel_sprite.h"
+
 #include "i915_drv.h"
+#include "i915_irq.h"
 #include "intel_drv.h"
-#include "intel_fbc.h"
 #include "intel_pm.h"
-#include "intel_sprite.h"
+#include "intel_sideband.h"
 #include "../../../platform/x86/intel_ips.h"
 
 /**
@@ -188,8 +192,8 @@ static void i915_ironlake_get_mem_freq(struct drm_i915_private *dev_priv)
 {
 	u16 ddrpll, csipll;
 
-	ddrpll = I915_READ16(DDRMPLL1);
-	csipll = I915_READ16(CSIPLL0);
+	ddrpll = intel_uncore_read16(&dev_priv->uncore, DDRMPLL1);
+	csipll = intel_uncore_read16(&dev_priv->uncore, CSIPLL0);
 
 	switch (ddrpll & 0xff) {
 	case 0xc:
@@ -317,7 +321,7 @@ static void chv_set_memory_dvfs(struct drm_i915_private *dev_priv, bool enable)
 {
 	u32 val;
 
-	mutex_lock(&dev_priv->pcu_lock);
+	vlv_punit_get(dev_priv);
 
 	val = vlv_punit_read(dev_priv, PUNIT_REG_DDR_SETUP2);
 	if (enable)
@@ -332,14 +336,14 @@ static void chv_set_memory_dvfs(struct drm_i915_private *dev_priv, bool enable)
 		      FORCE_DDR_FREQ_REQ_ACK) == 0, 3))
 		DRM_ERROR("timed out waiting for Punit DDR DVFS request\n");
 
-	mutex_unlock(&dev_priv->pcu_lock);
+	vlv_punit_put(dev_priv);
 }
 
 static void chv_set_memory_pm5(struct drm_i915_private *dev_priv, bool enable)
 {
 	u32 val;
 
-	mutex_lock(&dev_priv->pcu_lock);
+	vlv_punit_get(dev_priv);
 
 	val = vlv_punit_read(dev_priv, PUNIT_REG_DSPSSPM);
 	if (enable)
@@ -348,7 +352,7 @@ static void chv_set_memory_pm5(struct drm_i915_private *dev_priv, bool enable)
 		val &= ~DSP_MAXFIFO_PM5_ENABLE;
 	vlv_punit_write(dev_priv, PUNIT_REG_DSPSSPM, val);
 
-	mutex_unlock(&dev_priv->pcu_lock);
+	vlv_punit_put(dev_priv);
 }
 
 #define FW_WM(value, plane) \
@@ -675,7 +679,7 @@ static unsigned int intel_wm_method1(unsigned int pixel_rate,
 {
 	u64 ret;
 
-	ret = (u64)pixel_rate * cpp * latency;
+	ret = mul_u32_u32(pixel_rate, cpp * latency);
 	ret = DIV_ROUND_UP_ULL(ret, 10000);
 
 	return ret;
@@ -1946,6 +1950,7 @@ static void vlv_atomic_update_fifo(struct intel_atomic_state *state,
 {
 	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
+	struct intel_uncore *uncore = &dev_priv->uncore;
 	const struct vlv_fifo_state *fifo_state =
 		&crtc_state->wm.vlv.fifo_state;
 	int sprite0_start, sprite1_start, fifo_size;
@@ -1971,13 +1976,13 @@ static void vlv_atomic_update_fifo(struct intel_atomic_state *state,
 	 * intel_pipe_update_start() has already disabled interrupts
 	 * for us, so a plain spin_lock() is sufficient here.
 	 */
-	spin_lock(&dev_priv->uncore.lock);
+	spin_lock(&uncore->lock);
 
 	switch (crtc->pipe) {
 		u32 dsparb, dsparb2, dsparb3;
 	case PIPE_A:
-		dsparb = I915_READ_FW(DSPARB);
-		dsparb2 = I915_READ_FW(DSPARB2);
+		dsparb = intel_uncore_read_fw(uncore, DSPARB);
+		dsparb2 = intel_uncore_read_fw(uncore, DSPARB2);
 
 		dsparb &= ~(VLV_FIFO(SPRITEA, 0xff) |
 			    VLV_FIFO(SPRITEB, 0xff));
@@ -1989,12 +1994,12 @@ static void vlv_atomic_update_fifo(struct intel_atomic_state *state,
 		dsparb2 |= (VLV_FIFO(SPRITEA_HI, sprite0_start >> 8) |
 			   VLV_FIFO(SPRITEB_HI, sprite1_start >> 8));
 
-		I915_WRITE_FW(DSPARB, dsparb);
-		I915_WRITE_FW(DSPARB2, dsparb2);
+		intel_uncore_write_fw(uncore, DSPARB, dsparb);
+		intel_uncore_write_fw(uncore, DSPARB2, dsparb2);
 		break;
 	case PIPE_B:
-		dsparb = I915_READ_FW(DSPARB);
-		dsparb2 = I915_READ_FW(DSPARB2);
+		dsparb = intel_uncore_read_fw(uncore, DSPARB);
+		dsparb2 = intel_uncore_read_fw(uncore, DSPARB2);
 
 		dsparb &= ~(VLV_FIFO(SPRITEC, 0xff) |
 			    VLV_FIFO(SPRITED, 0xff));
@@ -2006,12 +2011,12 @@ static void vlv_atomic_update_fifo(struct intel_atomic_state *state,
 		dsparb2 |= (VLV_FIFO(SPRITEC_HI, sprite0_start >> 8) |
 			   VLV_FIFO(SPRITED_HI, sprite1_start >> 8));
 
-		I915_WRITE_FW(DSPARB, dsparb);
-		I915_WRITE_FW(DSPARB2, dsparb2);
+		intel_uncore_write_fw(uncore, DSPARB, dsparb);
+		intel_uncore_write_fw(uncore, DSPARB2, dsparb2);
 		break;
 	case PIPE_C:
-		dsparb3 = I915_READ_FW(DSPARB3);
-		dsparb2 = I915_READ_FW(DSPARB2);
+		dsparb3 = intel_uncore_read_fw(uncore, DSPARB3);
+		dsparb2 = intel_uncore_read_fw(uncore, DSPARB2);
 
 		dsparb3 &= ~(VLV_FIFO(SPRITEE, 0xff) |
 			     VLV_FIFO(SPRITEF, 0xff));
@@ -2023,16 +2028,16 @@ static void vlv_atomic_update_fifo(struct intel_atomic_state *state,
 		dsparb2 |= (VLV_FIFO(SPRITEE_HI, sprite0_start >> 8) |
 			   VLV_FIFO(SPRITEF_HI, sprite1_start >> 8));
 
-		I915_WRITE_FW(DSPARB3, dsparb3);
-		I915_WRITE_FW(DSPARB2, dsparb2);
+		intel_uncore_write_fw(uncore, DSPARB3, dsparb3);
+		intel_uncore_write_fw(uncore, DSPARB2, dsparb2);
 		break;
 	default:
 		break;
 	}
 
-	POSTING_READ_FW(DSPARB);
+	intel_uncore_posting_read_fw(uncore, DSPARB);
 
-	spin_unlock(&dev_priv->uncore.lock);
+	spin_unlock(&uncore->lock);
 }
 
 #undef VLV_FIFO
@@ -2810,6 +2815,8 @@ hsw_compute_linetime_wm(const struct intel_crtc_state *cstate)
 static void intel_read_wm_latency(struct drm_i915_private *dev_priv,
 				  u16 wm[8])
 {
+	struct intel_uncore *uncore = &dev_priv->uncore;
+
 	if (INTEL_GEN(dev_priv) >= 9) {
 		u32 val;
 		int ret, i;
@@ -2817,11 +2824,9 @@ static void intel_read_wm_latency(struct drm_i915_private *dev_priv,
 
 		/* read the first set of memory latencies[0:3] */
 		val = 0; /* data0 to be programmed to 0 for first set */
-		mutex_lock(&dev_priv->pcu_lock);
 		ret = sandybridge_pcode_read(dev_priv,
 					     GEN9_PCODE_READ_MEM_LATENCY,
-					     &val);
-		mutex_unlock(&dev_priv->pcu_lock);
+					     &val, NULL);
 
 		if (ret) {
 			DRM_ERROR("SKL Mailbox read error = %d\n", ret);
@@ -2838,11 +2843,9 @@ static void intel_read_wm_latency(struct drm_i915_private *dev_priv,
 
 		/* read the second set of memory latencies[4:7] */
 		val = 1; /* data0 to be programmed to 1 for second set */
-		mutex_lock(&dev_priv->pcu_lock);
 		ret = sandybridge_pcode_read(dev_priv,
 					     GEN9_PCODE_READ_MEM_LATENCY,
-					     &val);
-		mutex_unlock(&dev_priv->pcu_lock);
+					     &val, NULL);
 		if (ret) {
 			DRM_ERROR("SKL Mailbox read error = %d\n", ret);
 			return;
@@ -2895,7 +2898,7 @@ static void intel_read_wm_latency(struct drm_i915_private *dev_priv,
 			wm[0] += 1;
 
 	} else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) {
-		u64 sskpd = I915_READ64(MCH_SSKPD);
+		u64 sskpd = intel_uncore_read64(uncore, MCH_SSKPD);
 
 		wm[0] = (sskpd >> 56) & 0xFF;
 		if (wm[0] == 0)
@@ -2905,14 +2908,14 @@ static void intel_read_wm_latency(struct drm_i915_private *dev_priv,
 		wm[3] = (sskpd >> 20) & 0x1FF;
 		wm[4] = (sskpd >> 32) & 0x1FF;
 	} else if (INTEL_GEN(dev_priv) >= 6) {
-		u32 sskpd = I915_READ(MCH_SSKPD);
+		u32 sskpd = intel_uncore_read(uncore, MCH_SSKPD);
 
 		wm[0] = (sskpd >> SSKPD_WM0_SHIFT) & SSKPD_WM_MASK;
 		wm[1] = (sskpd >> SSKPD_WM1_SHIFT) & SSKPD_WM_MASK;
 		wm[2] = (sskpd >> SSKPD_WM2_SHIFT) & SSKPD_WM_MASK;
 		wm[3] = (sskpd >> SSKPD_WM3_SHIFT) & SSKPD_WM_MASK;
 	} else if (INTEL_GEN(dev_priv) >= 5) {
-		u32 mltr = I915_READ(MLTR_ILK);
+		u32 mltr = intel_uncore_read(uncore, MLTR_ILK);
 
 		/* ILK primary LP0 latency is 700 ns */
 		wm[0] = 7;
@@ -3677,13 +3680,10 @@ intel_enable_sagv(struct drm_i915_private *dev_priv)
 		return 0;
 
 	DRM_DEBUG_KMS("Enabling SAGV\n");
-	mutex_lock(&dev_priv->pcu_lock);
-
 	ret = sandybridge_pcode_write(dev_priv, GEN9_PCODE_SAGV_CONTROL,
 				      GEN9_SAGV_ENABLE);
 
 	/* We don't need to wait for SAGV when enabling */
-	mutex_unlock(&dev_priv->pcu_lock);
 
 	/*
 	 * Some skl systems, pre-release machines in particular,
@@ -3714,15 +3714,11 @@ intel_disable_sagv(struct drm_i915_private *dev_priv)
 		return 0;
 
 	DRM_DEBUG_KMS("Disabling SAGV\n");
-	mutex_lock(&dev_priv->pcu_lock);
-
 	/* bspec says to keep retrying for at least 1 ms */
 	ret = skl_pcode_request(dev_priv, GEN9_PCODE_SAGV_CONTROL,
 				GEN9_SAGV_DISABLE,
 				GEN9_SAGV_IS_DISABLED, GEN9_SAGV_IS_DISABLED,
 				1);
-	mutex_unlock(&dev_priv->pcu_lock);
-
 	/*
 	 * Some skl systems, pre-release machines in particular,
 	 * don't actually have SAGV.
@@ -3763,14 +3759,16 @@ bool intel_can_enable_sagv(struct drm_atomic_state *state)
 		sagv_block_time_us = 10;
 
 	/*
-	 * SKL+ workaround: bspec recommends we disable SAGV when we have
-	 * more then one pipe enabled
-	 *
 	 * If there are no active CRTCs, no additional checks need be performed
 	 */
 	if (hweight32(intel_state->active_crtcs) == 0)
 		return true;
-	else if (hweight32(intel_state->active_crtcs) > 1)
+
+	/*
+	 * SKL+ workaround: bspec recommends we disable SAGV when we have
+	 * more then one pipe enabled
+	 */
+	if (hweight32(intel_state->active_crtcs) > 1)
 		return false;
 
 	/* Since we're now guaranteed to only have one active CRTC... */
@@ -4370,15 +4368,16 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *cstate,
 		return 0;
 	}
 
-	if (INTEL_GEN(dev_priv) < 11)
+	if (INTEL_GEN(dev_priv) >= 11)
+		total_data_rate =
+			icl_get_total_relative_data_rate(cstate,
+							 plane_data_rate);
+	else
 		total_data_rate =
 			skl_get_total_relative_data_rate(cstate,
 							 plane_data_rate,
 							 uv_plane_data_rate);
-	else
-		total_data_rate =
-			icl_get_total_relative_data_rate(cstate,
-							 plane_data_rate);
+
 
 	skl_ddb_get_pipe_allocation_limits(dev_priv, cstate, total_data_rate,
 					   ddb, alloc, &num_active);
@@ -4787,9 +4786,11 @@ static void skl_compute_plane_wm(const struct intel_crtc_state *cstate,
 		return;
 	}
 
-	/* Display WA #1141: kbl,cfl */
-	if ((IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv) ||
-	    IS_CNL_REVID(dev_priv, CNL_REVID_A0, CNL_REVID_B0)) &&
+	/*
+	 * WaIncreaseLatencyIPCEnabled: kbl,cfl
+	 * Display WA #1141: kbl,cfl
+	 */
+	if ((IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv)) ||
 	    dev_priv->ipc_enabled)
 		latency += 4;
 
@@ -6139,7 +6140,7 @@ void vlv_wm_get_hw_state(struct drm_i915_private *dev_priv)
 	wm->level = VLV_WM_LEVEL_PM2;
 
 	if (IS_CHERRYVIEW(dev_priv)) {
-		mutex_lock(&dev_priv->pcu_lock);
+		vlv_punit_get(dev_priv);
 
 		val = vlv_punit_read(dev_priv, PUNIT_REG_DSPSSPM);
 		if (val & DSP_MAXFIFO_PM5_ENABLE)
@@ -6169,7 +6170,7 @@ void vlv_wm_get_hw_state(struct drm_i915_private *dev_priv)
 				wm->level = VLV_WM_LEVEL_DDR_DVFS;
 		}
 
-		mutex_unlock(&dev_priv->pcu_lock);
+		vlv_punit_put(dev_priv);
 	}
 
 	for_each_intel_crtc(&dev_priv->drm, crtc) {
@@ -6378,16 +6379,25 @@ void intel_enable_ipc(struct drm_i915_private *dev_priv)
 	I915_WRITE(DISP_ARB_CTL2, val);
 }
 
+static bool intel_can_enable_ipc(struct drm_i915_private *dev_priv)
+{
+	/* Display WA #0477 WaDisableIPC: skl */
+	if (IS_SKYLAKE(dev_priv))
+		return false;
+
+	/* Display WA #1141: SKL:all KBL:all CFL */
+	if (IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv))
+		return dev_priv->dram_info.symmetric_memory;
+
+	return true;
+}
+
 void intel_init_ipc(struct drm_i915_private *dev_priv)
 {
 	if (!HAS_IPC(dev_priv))
 		return;
 
-	/* Display WA #1141: SKL:all KBL:all CFL */
-	if (IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv))
-		dev_priv->ipc_enabled = dev_priv->dram_info.symmetric_memory;
-	else
-		dev_priv->ipc_enabled = true;
+	dev_priv->ipc_enabled = intel_can_enable_ipc(dev_priv);
 
 	intel_enable_ipc(dev_priv);
 }
@@ -6397,13 +6407,14 @@ void intel_init_ipc(struct drm_i915_private *dev_priv)
  */
 DEFINE_SPINLOCK(mchdev_lock);
 
-bool ironlake_set_drps(struct drm_i915_private *dev_priv, u8 val)
+bool ironlake_set_drps(struct drm_i915_private *i915, u8 val)
 {
+	struct intel_uncore *uncore = &i915->uncore;
 	u16 rgvswctl;
 
 	lockdep_assert_held(&mchdev_lock);
 
-	rgvswctl = I915_READ16(MEMSWCTL);
+	rgvswctl = intel_uncore_read16(uncore, MEMSWCTL);
 	if (rgvswctl & MEMCTL_CMD_STS) {
 		DRM_DEBUG("gpu busy, RCS change rejected\n");
 		return false; /* still busy with another command */
@@ -6411,37 +6422,38 @@ bool ironlake_set_drps(struct drm_i915_private *dev_priv, u8 val)
 
 	rgvswctl = (MEMCTL_CMD_CHFREQ << MEMCTL_CMD_SHIFT) |
 		(val << MEMCTL_FREQ_SHIFT) | MEMCTL_SFCAVM;
-	I915_WRITE16(MEMSWCTL, rgvswctl);
-	POSTING_READ16(MEMSWCTL);
+	intel_uncore_write16(uncore, MEMSWCTL, rgvswctl);
+	intel_uncore_posting_read16(uncore, MEMSWCTL);
 
 	rgvswctl |= MEMCTL_CMD_STS;
-	I915_WRITE16(MEMSWCTL, rgvswctl);
+	intel_uncore_write16(uncore, MEMSWCTL, rgvswctl);
 
 	return true;
 }
 
 static void ironlake_enable_drps(struct drm_i915_private *dev_priv)
 {
+	struct intel_uncore *uncore = &dev_priv->uncore;
 	u32 rgvmodectl;
 	u8 fmax, fmin, fstart, vstart;
 
 	spin_lock_irq(&mchdev_lock);
 
-	rgvmodectl = I915_READ(MEMMODECTL);
+	rgvmodectl = intel_uncore_read(uncore, MEMMODECTL);
 
 	/* Enable temp reporting */
-	I915_WRITE16(PMMISC, I915_READ(PMMISC) | MCPPCE_EN);
-	I915_WRITE16(TSC1, I915_READ(TSC1) | TSE);
+	intel_uncore_write16(uncore, PMMISC, I915_READ(PMMISC) | MCPPCE_EN);
+	intel_uncore_write16(uncore, TSC1, I915_READ(TSC1) | TSE);
 
 	/* 100ms RC evaluation intervals */
-	I915_WRITE(RCUPEI, 100000);
-	I915_WRITE(RCDNEI, 100000);
+	intel_uncore_write(uncore, RCUPEI, 100000);
+	intel_uncore_write(uncore, RCDNEI, 100000);
 
 	/* Set max/min thresholds to 90ms and 80ms respectively */
-	I915_WRITE(RCBMAXAVG, 90000);
-	I915_WRITE(RCBMINAVG, 80000);
+	intel_uncore_write(uncore, RCBMAXAVG, 90000);
+	intel_uncore_write(uncore, RCBMINAVG, 80000);
 
-	I915_WRITE(MEMIHYST, 1);
+	intel_uncore_write(uncore, MEMIHYST, 1);
 
 	/* Set up min, max, and cur for interrupt handling */
 	fmax = (rgvmodectl & MEMMODE_FMAX_MASK) >> MEMMODE_FMAX_SHIFT;
@@ -6449,8 +6461,8 @@ static void ironlake_enable_drps(struct drm_i915_private *dev_priv)
 	fstart = (rgvmodectl & MEMMODE_FSTART_MASK) >>
 		MEMMODE_FSTART_SHIFT;
 
-	vstart = (I915_READ(PXVFREQ(fstart)) & PXVFREQ_PX_MASK) >>
-		PXVFREQ_PX_SHIFT;
+	vstart = (intel_uncore_read(uncore, PXVFREQ(fstart)) &
+		  PXVFREQ_PX_MASK) >> PXVFREQ_PX_SHIFT;
 
 	dev_priv->ips.fmax = fmax; /* IPS callback will increase this */
 	dev_priv->ips.fstart = fstart;
@@ -6462,53 +6474,66 @@ static void ironlake_enable_drps(struct drm_i915_private *dev_priv)
 	DRM_DEBUG_DRIVER("fmax: %d, fmin: %d, fstart: %d\n",
 			 fmax, fmin, fstart);
 
-	I915_WRITE(MEMINTREN, MEMINT_CX_SUPR_EN | MEMINT_EVAL_CHG_EN);
+	intel_uncore_write(uncore,
+			   MEMINTREN,
+			   MEMINT_CX_SUPR_EN | MEMINT_EVAL_CHG_EN);
 
 	/*
 	 * Interrupts will be enabled in ironlake_irq_postinstall
 	 */
 
-	I915_WRITE(VIDSTART, vstart);
-	POSTING_READ(VIDSTART);
+	intel_uncore_write(uncore, VIDSTART, vstart);
+	intel_uncore_posting_read(uncore, VIDSTART);
 
 	rgvmodectl |= MEMMODE_SWMODE_EN;
-	I915_WRITE(MEMMODECTL, rgvmodectl);
+	intel_uncore_write(uncore, MEMMODECTL, rgvmodectl);
 
-	if (wait_for_atomic((I915_READ(MEMSWCTL) & MEMCTL_CMD_STS) == 0, 10))
+	if (wait_for_atomic((intel_uncore_read(uncore, MEMSWCTL) &
+			     MEMCTL_CMD_STS) == 0, 10))
 		DRM_ERROR("stuck trying to change perf mode\n");
 	mdelay(1);
 
 	ironlake_set_drps(dev_priv, fstart);
 
-	dev_priv->ips.last_count1 = I915_READ(DMIEC) +
-		I915_READ(DDREC) + I915_READ(CSIEC);
+	dev_priv->ips.last_count1 =
+		intel_uncore_read(uncore, DMIEC) +
+		intel_uncore_read(uncore, DDREC) +
+		intel_uncore_read(uncore, CSIEC);
 	dev_priv->ips.last_time1 = jiffies_to_msecs(jiffies);
-	dev_priv->ips.last_count2 = I915_READ(GFXEC);
+	dev_priv->ips.last_count2 = intel_uncore_read(uncore, GFXEC);
 	dev_priv->ips.last_time2 = ktime_get_raw_ns();
 
 	spin_unlock_irq(&mchdev_lock);
 }
 
-static void ironlake_disable_drps(struct drm_i915_private *dev_priv)
+static void ironlake_disable_drps(struct drm_i915_private *i915)
 {
+	struct intel_uncore *uncore = &i915->uncore;
 	u16 rgvswctl;
 
 	spin_lock_irq(&mchdev_lock);
 
-	rgvswctl = I915_READ16(MEMSWCTL);
+	rgvswctl = intel_uncore_read16(uncore, MEMSWCTL);
 
 	/* Ack interrupts, disable EFC interrupt */
-	I915_WRITE(MEMINTREN, I915_READ(MEMINTREN) & ~MEMINT_EVAL_CHG_EN);
-	I915_WRITE(MEMINTRSTS, MEMINT_EVAL_CHG);
-	I915_WRITE(DEIER, I915_READ(DEIER) & ~DE_PCU_EVENT);
-	I915_WRITE(DEIIR, DE_PCU_EVENT);
-	I915_WRITE(DEIMR, I915_READ(DEIMR) | DE_PCU_EVENT);
+	intel_uncore_write(uncore,
+			   MEMINTREN,
+			   intel_uncore_read(uncore, MEMINTREN) &
+			   ~MEMINT_EVAL_CHG_EN);
+	intel_uncore_write(uncore, MEMINTRSTS, MEMINT_EVAL_CHG);
+	intel_uncore_write(uncore,
+			   DEIER,
+			   intel_uncore_read(uncore, DEIER) & ~DE_PCU_EVENT);
+	intel_uncore_write(uncore, DEIIR, DE_PCU_EVENT);
+	intel_uncore_write(uncore,
+			   DEIMR,
+			   intel_uncore_read(uncore, DEIMR) | DE_PCU_EVENT);
 
 	/* Go back to the starting frequency */
-	ironlake_set_drps(dev_priv, dev_priv->ips.fstart);
+	ironlake_set_drps(i915, i915->ips.fstart);
 	mdelay(1);
 	rgvswctl |= MEMCTL_CMD_STS;
-	I915_WRITE(MEMSWCTL, rgvswctl);
+	intel_uncore_write(uncore, MEMSWCTL, rgvswctl);
 	mdelay(1);
 
 	spin_unlock_irq(&mchdev_lock);
@@ -6743,7 +6768,9 @@ static int valleyview_set_rps(struct drm_i915_private *dev_priv, u8 val)
 	I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val));
 
 	if (val != dev_priv->gt_pm.rps.cur_freq) {
+		vlv_punit_get(dev_priv);
 		err = vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ, val);
+		vlv_punit_put(dev_priv);
 		if (err)
 			return err;
 
@@ -6796,7 +6823,7 @@ void gen6_rps_busy(struct drm_i915_private *dev_priv)
 {
 	struct intel_rps *rps = &dev_priv->gt_pm.rps;
 
-	mutex_lock(&dev_priv->pcu_lock);
+	mutex_lock(&rps->lock);
 	if (rps->enabled) {
 		u8 freq;
 
@@ -6819,7 +6846,7 @@ void gen6_rps_busy(struct drm_i915_private *dev_priv)
 					rps->max_freq_softlimit)))
 			DRM_DEBUG_DRIVER("Failed to set idle frequency\n");
 	}
-	mutex_unlock(&dev_priv->pcu_lock);
+	mutex_unlock(&rps->lock);
 }
 
 void gen6_rps_idle(struct drm_i915_private *dev_priv)
@@ -6833,7 +6860,7 @@ void gen6_rps_idle(struct drm_i915_private *dev_priv)
 	 */
 	gen6_disable_rps_interrupts(dev_priv);
 
-	mutex_lock(&dev_priv->pcu_lock);
+	mutex_lock(&rps->lock);
 	if (rps->enabled) {
 		if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
 			vlv_set_rps_idle(dev_priv);
@@ -6843,7 +6870,7 @@ void gen6_rps_idle(struct drm_i915_private *dev_priv)
 		I915_WRITE(GEN6_PMINTRMSK,
 			   gen6_sanitize_rps_pm_mask(dev_priv, ~0));
 	}
-	mutex_unlock(&dev_priv->pcu_lock);
+	mutex_unlock(&rps->lock);
 }
 
 void gen6_rps_boost(struct i915_request *rq)
@@ -6883,7 +6910,7 @@ int intel_set_rps(struct drm_i915_private *dev_priv, u8 val)
 	struct intel_rps *rps = &dev_priv->gt_pm.rps;
 	int err;
 
-	lockdep_assert_held(&dev_priv->pcu_lock);
+	lockdep_assert_held(&rps->lock);
 	GEM_BUG_ON(val > rps->max_freq);
 	GEM_BUG_ON(val < rps->min_freq);
 
@@ -7013,8 +7040,10 @@ static bool sanitize_rc6(struct drm_i915_private *i915)
 	struct intel_device_info *info = mkwrite_device_info(i915);
 
 	/* Powersaving is controlled by the host when inside a VM */
-	if (intel_vgpu_active(i915))
+	if (intel_vgpu_active(i915)) {
 		info->has_rc6 = 0;
+		info->has_rps = false;
+	}
 
 	if (info->has_rc6 &&
 	    IS_GEN9_LP(i915) && !bxt_check_bios_rc6_setup(i915)) {
@@ -7062,7 +7091,7 @@ static void gen6_init_rps_frequencies(struct drm_i915_private *dev_priv)
 
 		if (sandybridge_pcode_read(dev_priv,
 					   HSW_PCODE_DYNAMIC_DUTY_CYCLE_CONTROL,
-					   &ddcc_status) == 0)
+					   &ddcc_status, NULL) == 0)
 			rps->efficient_freq =
 				clamp_t(u8,
 					((ddcc_status >> 8) & 0xff),
@@ -7409,7 +7438,8 @@ static void gen6_enable_rc6(struct drm_i915_private *dev_priv)
 		   GEN6_RC_CTL_HW_ENABLE);
 
 	rc6vids = 0;
-	ret = sandybridge_pcode_read(dev_priv, GEN6_PCODE_READ_RC6VIDS, &rc6vids);
+	ret = sandybridge_pcode_read(dev_priv, GEN6_PCODE_READ_RC6VIDS,
+				     &rc6vids, NULL);
 	if (IS_GEN(dev_priv, 6) && ret) {
 		DRM_DEBUG_DRIVER("Couldn't check for BIOS workaround\n");
 	} else if (IS_GEN(dev_priv, 6) && (GEN6_DECODE_RC6_VID(rc6vids & 0xff) < 450)) {
@@ -7454,7 +7484,7 @@ static void gen6_update_ring_freq(struct drm_i915_private *dev_priv)
 	unsigned int max_gpu_freq, min_gpu_freq;
 	struct cpufreq_policy *policy;
 
-	WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock));
+	lockdep_assert_held(&rps->lock);
 
 	if (rps->max_freq <= rps->min_freq)
 		return;
@@ -7753,6 +7783,11 @@ static void valleyview_init_gt_powersave(struct drm_i915_private *dev_priv)
 
 	valleyview_setup_pctx(dev_priv);
 
+	vlv_iosf_sb_get(dev_priv,
+			BIT(VLV_IOSF_SB_PUNIT) |
+			BIT(VLV_IOSF_SB_NC) |
+			BIT(VLV_IOSF_SB_CCK));
+
 	vlv_init_gpll_ref_freq(dev_priv);
 
 	val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
@@ -7790,6 +7825,11 @@ static void valleyview_init_gt_powersave(struct drm_i915_private *dev_priv)
 	DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n",
 			 intel_gpu_freq(dev_priv, rps->min_freq),
 			 rps->min_freq);
+
+	vlv_iosf_sb_put(dev_priv,
+			BIT(VLV_IOSF_SB_PUNIT) |
+			BIT(VLV_IOSF_SB_NC) |
+			BIT(VLV_IOSF_SB_CCK));
 }
 
 static void cherryview_init_gt_powersave(struct drm_i915_private *dev_priv)
@@ -7799,11 +7839,14 @@ static void cherryview_init_gt_powersave(struct drm_i915_private *dev_priv)
 
 	cherryview_setup_pctx(dev_priv);
 
+	vlv_iosf_sb_get(dev_priv,
+			BIT(VLV_IOSF_SB_PUNIT) |
+			BIT(VLV_IOSF_SB_NC) |
+			BIT(VLV_IOSF_SB_CCK));
+
 	vlv_init_gpll_ref_freq(dev_priv);
 
-	mutex_lock(&dev_priv->sb_lock);
 	val = vlv_cck_read(dev_priv, CCK_FUSE_REG);
-	mutex_unlock(&dev_priv->sb_lock);
 
 	switch ((val >> 2) & 0x7) {
 	case 3:
@@ -7836,6 +7879,11 @@ static void cherryview_init_gt_powersave(struct drm_i915_private *dev_priv)
 			 intel_gpu_freq(dev_priv, rps->min_freq),
 			 rps->min_freq);
 
+	vlv_iosf_sb_put(dev_priv,
+			BIT(VLV_IOSF_SB_PUNIT) |
+			BIT(VLV_IOSF_SB_NC) |
+			BIT(VLV_IOSF_SB_CCK));
+
 	WARN_ONCE((rps->max_freq | rps->efficient_freq | rps->rp1_freq |
 		   rps->min_freq) & 1,
 		  "Odd GPU freq values\n");
@@ -7923,13 +7971,15 @@ static void cherryview_enable_rps(struct drm_i915_private *dev_priv)
 		   GEN6_RP_DOWN_IDLE_AVG);
 
 	/* Setting Fixed Bias */
-	val = VLV_OVERRIDE_EN |
-		  VLV_SOC_TDP_EN |
-		  CHV_BIAS_CPU_50_SOC_50;
+	vlv_punit_get(dev_priv);
+
+	val = VLV_OVERRIDE_EN | VLV_SOC_TDP_EN | CHV_BIAS_CPU_50_SOC_50;
 	vlv_punit_write(dev_priv, VLV_TURBO_SOC_OVERRIDE, val);
 
 	val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
 
+	vlv_punit_put(dev_priv);
+
 	/* RPS code assumes GPLL is used */
 	WARN_ONCE((val & GPLLENABLE) == 0, "GPLL not enabled\n");
 
@@ -8006,14 +8056,16 @@ static void valleyview_enable_rps(struct drm_i915_private *dev_priv)
 		   GEN6_RP_UP_BUSY_AVG |
 		   GEN6_RP_DOWN_IDLE_CONT);
 
+	vlv_punit_get(dev_priv);
+
 	/* Setting Fixed Bias */
-	val = VLV_OVERRIDE_EN |
-		  VLV_SOC_TDP_EN |
-		  VLV_BIAS_CPU_125_SOC_875;
+	val = VLV_OVERRIDE_EN | VLV_SOC_TDP_EN | VLV_BIAS_CPU_125_SOC_875;
 	vlv_punit_write(dev_priv, VLV_TURBO_SOC_OVERRIDE, val);
 
 	val = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
 
+	vlv_punit_put(dev_priv);
+
 	/* RPS code assumes GPLL is used */
 	WARN_ONCE((val & GPLLENABLE) == 0, "GPLL not enabled\n");
 
@@ -8116,7 +8168,7 @@ unsigned long i915_chipset_val(struct drm_i915_private *dev_priv)
 	if (!IS_GEN(dev_priv, 5))
 		return 0;
 
-	with_intel_runtime_pm(dev_priv, wakeref) {
+	with_intel_runtime_pm(&dev_priv->runtime_pm, wakeref) {
 		spin_lock_irq(&mchdev_lock);
 		val = __i915_chipset_val(dev_priv);
 		spin_unlock_irq(&mchdev_lock);
@@ -8125,15 +8177,15 @@ unsigned long i915_chipset_val(struct drm_i915_private *dev_priv)
 	return val;
 }
 
-unsigned long i915_mch_val(struct drm_i915_private *dev_priv)
+unsigned long i915_mch_val(struct drm_i915_private *i915)
 {
 	unsigned long m, x, b;
 	u32 tsfs;
 
-	tsfs = I915_READ(TSFS);
+	tsfs = intel_uncore_read(&i915->uncore, TSFS);
 
 	m = ((tsfs & TSFS_SLOPE_MASK) >> TSFS_SLOPE_SHIFT);
-	x = I915_READ8(TR1);
+	x = intel_uncore_read8(&i915->uncore, TR1);
 
 	b = tsfs & TSFS_INTR_MASK;
 
@@ -8202,7 +8254,7 @@ void i915_update_gfx_val(struct drm_i915_private *dev_priv)
 	if (!IS_GEN(dev_priv, 5))
 		return;
 
-	with_intel_runtime_pm(dev_priv, wakeref) {
+	with_intel_runtime_pm(&dev_priv->runtime_pm, wakeref) {
 		spin_lock_irq(&mchdev_lock);
 		__i915_update_gfx_val(dev_priv);
 		spin_unlock_irq(&mchdev_lock);
@@ -8254,7 +8306,7 @@ unsigned long i915_gfx_val(struct drm_i915_private *dev_priv)
 	if (!IS_GEN(dev_priv, 5))
 		return 0;
 
-	with_intel_runtime_pm(dev_priv, wakeref) {
+	with_intel_runtime_pm(&dev_priv->runtime_pm, wakeref) {
 		spin_lock_irq(&mchdev_lock);
 		val = __i915_gfx_val(dev_priv);
 		spin_unlock_irq(&mchdev_lock);
@@ -8295,7 +8347,7 @@ unsigned long i915_read_mch_val(void)
 	if (!i915)
 		return 0;
 
-	with_intel_runtime_pm(i915, wakeref) {
+	with_intel_runtime_pm(&i915->runtime_pm, wakeref) {
 		spin_lock_irq(&mchdev_lock);
 		chipset_val = __i915_chipset_val(i915);
 		graphics_val = __i915_gfx_val(i915);
@@ -8517,8 +8569,6 @@ void intel_init_gt_powersave(struct drm_i915_private *dev_priv)
 		pm_runtime_get(&dev_priv->drm.pdev->dev);
 	}
 
-	mutex_lock(&dev_priv->pcu_lock);
-
 	/* Initialize RPS limits (for userspace) */
 	if (IS_CHERRYVIEW(dev_priv))
 		cherryview_init_gt_powersave(dev_priv);
@@ -8528,24 +8578,16 @@ void intel_init_gt_powersave(struct drm_i915_private *dev_priv)
 		gen6_init_rps_frequencies(dev_priv);
 
 	/* Derive initial user preferences/limits from the hardware limits */
-	rps->idle_freq = rps->min_freq;
-	rps->cur_freq = rps->idle_freq;
-
 	rps->max_freq_softlimit = rps->max_freq;
 	rps->min_freq_softlimit = rps->min_freq;
 
-	if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
-		rps->min_freq_softlimit =
-			max_t(int,
-			      rps->efficient_freq,
-			      intel_freq_opcode(dev_priv, 450));
-
 	/* After setting max-softlimit, find the overclock max freq */
 	if (IS_GEN(dev_priv, 6) ||
 	    IS_IVYBRIDGE(dev_priv) || IS_HASWELL(dev_priv)) {
 		u32 params = 0;
 
-		sandybridge_pcode_read(dev_priv, GEN6_READ_OC_PARAMS, &params);
+		sandybridge_pcode_read(dev_priv, GEN6_READ_OC_PARAMS,
+				       &params, NULL);
 		if (params & BIT(31)) { /* OC supported */
 			DRM_DEBUG_DRIVER("Overclocking supported, max: %dMHz, overclock: %dMHz\n",
 					 (rps->max_freq & 0xff) * 50,
@@ -8556,8 +8598,8 @@ void intel_init_gt_powersave(struct drm_i915_private *dev_priv)
 
 	/* Finally allow us to boost to max by default */
 	rps->boost_freq = rps->max_freq;
-
-	mutex_unlock(&dev_priv->pcu_lock);
+	rps->idle_freq = rps->min_freq;
+	rps->cur_freq = rps->idle_freq;
 }
 
 void intel_cleanup_gt_powersave(struct drm_i915_private *dev_priv)
@@ -8583,7 +8625,7 @@ void intel_sanitize_gt_powersave(struct drm_i915_private *dev_priv)
 
 static inline void intel_disable_llc_pstate(struct drm_i915_private *i915)
 {
-	lockdep_assert_held(&i915->pcu_lock);
+	lockdep_assert_held(&i915->gt_pm.rps.lock);
 
 	if (!i915->gt_pm.llc_pstate.enabled)
 		return;
@@ -8595,7 +8637,7 @@ static inline void intel_disable_llc_pstate(struct drm_i915_private *i915)
 
 static void intel_disable_rc6(struct drm_i915_private *dev_priv)
 {
-	lockdep_assert_held(&dev_priv->pcu_lock);
+	lockdep_assert_held(&dev_priv->gt_pm.rps.lock);
 
 	if (!dev_priv->gt_pm.rc6.enabled)
 		return;
@@ -8614,7 +8656,7 @@ static void intel_disable_rc6(struct drm_i915_private *dev_priv)
 
 static void intel_disable_rps(struct drm_i915_private *dev_priv)
 {
-	lockdep_assert_held(&dev_priv->pcu_lock);
+	lockdep_assert_held(&dev_priv->gt_pm.rps.lock);
 
 	if (!dev_priv->gt_pm.rps.enabled)
 		return;
@@ -8635,19 +8677,19 @@ static void intel_disable_rps(struct drm_i915_private *dev_priv)
 
 void intel_disable_gt_powersave(struct drm_i915_private *dev_priv)
 {
-	mutex_lock(&dev_priv->pcu_lock);
+	mutex_lock(&dev_priv->gt_pm.rps.lock);
 
 	intel_disable_rc6(dev_priv);
 	intel_disable_rps(dev_priv);
 	if (HAS_LLC(dev_priv))
 		intel_disable_llc_pstate(dev_priv);
 
-	mutex_unlock(&dev_priv->pcu_lock);
+	mutex_unlock(&dev_priv->gt_pm.rps.lock);
 }
 
 static inline void intel_enable_llc_pstate(struct drm_i915_private *i915)
 {
-	lockdep_assert_held(&i915->pcu_lock);
+	lockdep_assert_held(&i915->gt_pm.rps.lock);
 
 	if (i915->gt_pm.llc_pstate.enabled)
 		return;
@@ -8659,7 +8701,7 @@ static inline void intel_enable_llc_pstate(struct drm_i915_private *i915)
 
 static void intel_enable_rc6(struct drm_i915_private *dev_priv)
 {
-	lockdep_assert_held(&dev_priv->pcu_lock);
+	lockdep_assert_held(&dev_priv->gt_pm.rps.lock);
 
 	if (dev_priv->gt_pm.rc6.enabled)
 		return;
@@ -8684,7 +8726,7 @@ static void intel_enable_rps(struct drm_i915_private *dev_priv)
 {
 	struct intel_rps *rps = &dev_priv->gt_pm.rps;
 
-	lockdep_assert_held(&dev_priv->pcu_lock);
+	lockdep_assert_held(&rps->lock);
 
 	if (rps->enabled)
 		return;
@@ -8719,15 +8761,16 @@ void intel_enable_gt_powersave(struct drm_i915_private *dev_priv)
 	if (intel_vgpu_active(dev_priv))
 		return;
 
-	mutex_lock(&dev_priv->pcu_lock);
+	mutex_lock(&dev_priv->gt_pm.rps.lock);
 
 	if (HAS_RC6(dev_priv))
 		intel_enable_rc6(dev_priv);
-	intel_enable_rps(dev_priv);
+	if (HAS_RPS(dev_priv))
+		intel_enable_rps(dev_priv);
 	if (HAS_LLC(dev_priv))
 		intel_enable_llc_pstate(dev_priv);
 
-	mutex_unlock(&dev_priv->pcu_lock);
+	mutex_unlock(&dev_priv->gt_pm.rps.lock);
 }
 
 static void ibx_init_clock_gating(struct drm_i915_private *dev_priv)
@@ -9476,16 +9519,21 @@ static void g4x_init_clock_gating(struct drm_i915_private *dev_priv)
 
 static void i965gm_init_clock_gating(struct drm_i915_private *dev_priv)
 {
-	I915_WRITE(RENCLK_GATE_D1, I965_RCC_CLOCK_GATE_DISABLE);
-	I915_WRITE(RENCLK_GATE_D2, 0);
-	I915_WRITE(DSPCLK_GATE_D, 0);
-	I915_WRITE(RAMCLK_GATE_D, 0);
-	I915_WRITE16(DEUC, 0);
-	I915_WRITE(MI_ARB_STATE,
-		   _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE));
+	struct intel_uncore *uncore = &dev_priv->uncore;
+
+	intel_uncore_write(uncore, RENCLK_GATE_D1, I965_RCC_CLOCK_GATE_DISABLE);
+	intel_uncore_write(uncore, RENCLK_GATE_D2, 0);
+	intel_uncore_write(uncore, DSPCLK_GATE_D, 0);
+	intel_uncore_write(uncore, RAMCLK_GATE_D, 0);
+	intel_uncore_write16(uncore, DEUC, 0);
+	intel_uncore_write(uncore,
+			   MI_ARB_STATE,
+			   _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE));
 
 	/* WaDisable_RenderCache_OperationalFlush:gen4 */
-	I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
+	intel_uncore_write(uncore,
+			   CACHE_MODE_0,
+			   _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
 }
 
 static void i965g_init_clock_gating(struct drm_i915_private *dev_priv)
@@ -9698,221 +9746,6 @@ void intel_init_pm(struct drm_i915_private *dev_priv)
 	}
 }
 
-static inline int gen6_check_mailbox_status(struct drm_i915_private *dev_priv)
-{
-	u32 flags =
-		I915_READ_FW(GEN6_PCODE_MAILBOX) & GEN6_PCODE_ERROR_MASK;
-
-	switch (flags) {
-	case GEN6_PCODE_SUCCESS:
-		return 0;
-	case GEN6_PCODE_UNIMPLEMENTED_CMD:
-		return -ENODEV;
-	case GEN6_PCODE_ILLEGAL_CMD:
-		return -ENXIO;
-	case GEN6_PCODE_MIN_FREQ_TABLE_GT_RATIO_OUT_OF_RANGE:
-	case GEN7_PCODE_MIN_FREQ_TABLE_GT_RATIO_OUT_OF_RANGE:
-		return -EOVERFLOW;
-	case GEN6_PCODE_TIMEOUT:
-		return -ETIMEDOUT;
-	default:
-		MISSING_CASE(flags);
-		return 0;
-	}
-}
-
-static inline int gen7_check_mailbox_status(struct drm_i915_private *dev_priv)
-{
-	u32 flags =
-		I915_READ_FW(GEN6_PCODE_MAILBOX) & GEN6_PCODE_ERROR_MASK;
-
-	switch (flags) {
-	case GEN6_PCODE_SUCCESS:
-		return 0;
-	case GEN6_PCODE_ILLEGAL_CMD:
-		return -ENXIO;
-	case GEN7_PCODE_TIMEOUT:
-		return -ETIMEDOUT;
-	case GEN7_PCODE_ILLEGAL_DATA:
-		return -EINVAL;
-	case GEN7_PCODE_MIN_FREQ_TABLE_GT_RATIO_OUT_OF_RANGE:
-		return -EOVERFLOW;
-	default:
-		MISSING_CASE(flags);
-		return 0;
-	}
-}
-
-int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox, u32 *val)
-{
-	int status;
-
-	WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock));
-
-	/* GEN6_PCODE_* are outside of the forcewake domain, we can
-	 * use te fw I915_READ variants to reduce the amount of work
-	 * required when reading/writing.
-	 */
-
-	if (I915_READ_FW(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) {
-		DRM_DEBUG_DRIVER("warning: pcode (read from mbox %x) mailbox access failed for %ps\n",
-				 mbox, __builtin_return_address(0));
-		return -EAGAIN;
-	}
-
-	I915_WRITE_FW(GEN6_PCODE_DATA, *val);
-	I915_WRITE_FW(GEN6_PCODE_DATA1, 0);
-	I915_WRITE_FW(GEN6_PCODE_MAILBOX, GEN6_PCODE_READY | mbox);
-
-	if (__intel_wait_for_register_fw(&dev_priv->uncore,
-					 GEN6_PCODE_MAILBOX, GEN6_PCODE_READY, 0,
-					 500, 0, NULL)) {
-		DRM_ERROR("timeout waiting for pcode read (from mbox %x) to finish for %ps\n",
-			  mbox, __builtin_return_address(0));
-		return -ETIMEDOUT;
-	}
-
-	*val = I915_READ_FW(GEN6_PCODE_DATA);
-	I915_WRITE_FW(GEN6_PCODE_DATA, 0);
-
-	if (INTEL_GEN(dev_priv) > 6)
-		status = gen7_check_mailbox_status(dev_priv);
-	else
-		status = gen6_check_mailbox_status(dev_priv);
-
-	if (status) {
-		DRM_DEBUG_DRIVER("warning: pcode (read from mbox %x) mailbox access failed for %ps: %d\n",
-				 mbox, __builtin_return_address(0), status);
-		return status;
-	}
-
-	return 0;
-}
-
-int sandybridge_pcode_write_timeout(struct drm_i915_private *dev_priv,
-				    u32 mbox, u32 val,
-				    int fast_timeout_us, int slow_timeout_ms)
-{
-	int status;
-
-	WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock));
-
-	/* GEN6_PCODE_* are outside of the forcewake domain, we can
-	 * use te fw I915_READ variants to reduce the amount of work
-	 * required when reading/writing.
-	 */
-
-	if (I915_READ_FW(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) {
-		DRM_DEBUG_DRIVER("warning: pcode (write of 0x%08x to mbox %x) mailbox access failed for %ps\n",
-				 val, mbox, __builtin_return_address(0));
-		return -EAGAIN;
-	}
-
-	I915_WRITE_FW(GEN6_PCODE_DATA, val);
-	I915_WRITE_FW(GEN6_PCODE_DATA1, 0);
-	I915_WRITE_FW(GEN6_PCODE_MAILBOX, GEN6_PCODE_READY | mbox);
-
-	if (__intel_wait_for_register_fw(&dev_priv->uncore,
-					 GEN6_PCODE_MAILBOX, GEN6_PCODE_READY, 0,
-					 fast_timeout_us, slow_timeout_ms,
-					 NULL)) {
-		DRM_ERROR("timeout waiting for pcode write of 0x%08x to mbox %x to finish for %ps\n",
-			  val, mbox, __builtin_return_address(0));
-		return -ETIMEDOUT;
-	}
-
-	I915_WRITE_FW(GEN6_PCODE_DATA, 0);
-
-	if (INTEL_GEN(dev_priv) > 6)
-		status = gen7_check_mailbox_status(dev_priv);
-	else
-		status = gen6_check_mailbox_status(dev_priv);
-
-	if (status) {
-		DRM_DEBUG_DRIVER("warning: pcode (write of 0x%08x to mbox %x) mailbox access failed for %ps: %d\n",
-				 val, mbox, __builtin_return_address(0), status);
-		return status;
-	}
-
-	return 0;
-}
-
-static bool skl_pcode_try_request(struct drm_i915_private *dev_priv, u32 mbox,
-				  u32 request, u32 reply_mask, u32 reply,
-				  u32 *status)
-{
-	u32 val = request;
-
-	*status = sandybridge_pcode_read(dev_priv, mbox, &val);
-
-	return *status || ((val & reply_mask) == reply);
-}
-
-/**
- * skl_pcode_request - send PCODE request until acknowledgment
- * @dev_priv: device private
- * @mbox: PCODE mailbox ID the request is targeted for
- * @request: request ID
- * @reply_mask: mask used to check for request acknowledgment
- * @reply: value used to check for request acknowledgment
- * @timeout_base_ms: timeout for polling with preemption enabled
- *
- * Keep resending the @request to @mbox until PCODE acknowledges it, PCODE
- * reports an error or an overall timeout of @timeout_base_ms+50 ms expires.
- * The request is acknowledged once the PCODE reply dword equals @reply after
- * applying @reply_mask. Polling is first attempted with preemption enabled
- * for @timeout_base_ms and if this times out for another 50 ms with
- * preemption disabled.
- *
- * Returns 0 on success, %-ETIMEDOUT in case of a timeout, <0 in case of some
- * other error as reported by PCODE.
- */
-int skl_pcode_request(struct drm_i915_private *dev_priv, u32 mbox, u32 request,
-		      u32 reply_mask, u32 reply, int timeout_base_ms)
-{
-	u32 status;
-	int ret;
-
-	WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock));
-
-#define COND skl_pcode_try_request(dev_priv, mbox, request, reply_mask, reply, \
-				   &status)
-
-	/*
-	 * Prime the PCODE by doing a request first. Normally it guarantees
-	 * that a subsequent request, at most @timeout_base_ms later, succeeds.
-	 * _wait_for() doesn't guarantee when its passed condition is evaluated
-	 * first, so send the first request explicitly.
-	 */
-	if (COND) {
-		ret = 0;
-		goto out;
-	}
-	ret = _wait_for(COND, timeout_base_ms * 1000, 10, 10);
-	if (!ret)
-		goto out;
-
-	/*
-	 * The above can time out if the number of requests was low (2 in the
-	 * worst case) _and_ PCODE was busy for some reason even after a
-	 * (queued) request and @timeout_base_ms delay. As a workaround retry
-	 * the poll with preemption disabled to maximize the number of
-	 * requests. Increase the timeout from @timeout_base_ms to 50ms to
-	 * account for interrupts that could reduce the number of these
-	 * requests, and for any quirks of the PCODE firmware that delays
-	 * the request completion.
-	 */
-	DRM_DEBUG_KMS("PCODE timeout, retrying with preemption disabled\n");
-	WARN_ON_ONCE(timeout_base_ms > 3);
-	preempt_disable();
-	ret = wait_for_atomic(COND, 50);
-	preempt_enable();
-
-out:
-	return ret ? ret : status;
-#undef COND
-}
-
 static int byt_gpu_freq(struct drm_i915_private *dev_priv, int val)
 {
 	struct intel_rps *rps = &dev_priv->gt_pm.rps;
@@ -9978,7 +9811,7 @@ int intel_freq_opcode(struct drm_i915_private *dev_priv, int val)
 
 void intel_pm_setup(struct drm_i915_private *dev_priv)
 {
-	mutex_init(&dev_priv->pcu_lock);
+	mutex_init(&dev_priv->gt_pm.rps.lock);
 	mutex_init(&dev_priv->gt_pm.rps.power.mutex);
 
 	atomic_set(&dev_priv->gt_pm.rps.num_waiters, 0);
@@ -10108,6 +9941,12 @@ u64 intel_rc6_residency_ns(struct drm_i915_private *dev_priv,
 	return mul_u64_u32_div(time_hw, mul, div);
 }
 
+u64 intel_rc6_residency_us(struct drm_i915_private *dev_priv,
+			   i915_reg_t reg)
+{
+	return DIV_ROUND_UP_ULL(intel_rc6_residency_ns(dev_priv, reg), 1000);
+}
+
 u32 intel_get_cagf(struct drm_i915_private *dev_priv, u32 rpstat)
 {
 	u32 cagf;
diff --git a/drivers/gpu/drm/i915/intel_pm.h b/drivers/gpu/drm/i915/intel_pm.h
index 674a3f0f16a7..1b489fa399e1 100644
--- a/drivers/gpu/drm/i915/intel_pm.h
+++ b/drivers/gpu/drm/i915/intel_pm.h
@@ -8,6 +8,8 @@
 
 #include <linux/types.h>
 
+#include "i915_reg.h"
+
 struct drm_atomic_state;
 struct drm_device;
 struct drm_i915_private;
@@ -68,4 +70,21 @@ int skl_check_pipe_max_pixel_rate(struct intel_crtc *intel_crtc,
 void intel_init_ipc(struct drm_i915_private *dev_priv);
 void intel_enable_ipc(struct drm_i915_private *dev_priv);
 
+int intel_gpu_freq(struct drm_i915_private *dev_priv, int val);
+int intel_freq_opcode(struct drm_i915_private *dev_priv, int val);
+u64 intel_rc6_residency_ns(struct drm_i915_private *dev_priv, i915_reg_t reg);
+u64 intel_rc6_residency_us(struct drm_i915_private *dev_priv, i915_reg_t reg);
+
+u32 intel_get_cagf(struct drm_i915_private *dev_priv, u32 rpstat1);
+
+unsigned long i915_chipset_val(struct drm_i915_private *dev_priv);
+unsigned long i915_mch_val(struct drm_i915_private *dev_priv);
+unsigned long i915_gfx_val(struct drm_i915_private *dev_priv);
+void i915_update_gfx_val(struct drm_i915_private *dev_priv);
+
+bool ironlake_set_drps(struct drm_i915_private *dev_priv, u8 val);
+int intel_set_rps(struct drm_i915_private *dev_priv, u8 val);
+void intel_rps_mark_interactive(struct drm_i915_private *i915, bool interactive);
+bool intel_set_memory_cxsr(struct drm_i915_private *dev_priv, bool enable);
+
 #endif /* __INTEL_PM_H__ */
diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c
index 6150e35bf7b5..502c54428570 100644
--- a/drivers/gpu/drm/i915/intel_runtime_pm.c
+++ b/drivers/gpu/drm/i915/intel_runtime_pm.c
@@ -32,11 +32,6 @@
 #include <drm/drm_print.h>
 
 #include "i915_drv.h"
-#include "intel_cdclk.h"
-#include "intel_crt.h"
-#include "intel_csr.h"
-#include "intel_dp.h"
-#include "intel_drv.h"
 
 /**
  * DOC: runtime pm
@@ -80,24 +75,18 @@ static void __print_depot_stack(depot_stack_handle_t stack,
 	stack_trace_snprint(buf, sz, entries, nr_entries, indent);
 }
 
-static void init_intel_runtime_pm_wakeref(struct drm_i915_private *i915)
+static void init_intel_runtime_pm_wakeref(struct intel_runtime_pm *rpm)
 {
-	struct i915_runtime_pm *rpm = &i915->runtime_pm;
-
 	spin_lock_init(&rpm->debug.lock);
 }
 
 static noinline depot_stack_handle_t
-track_intel_runtime_pm_wakeref(struct drm_i915_private *i915)
+track_intel_runtime_pm_wakeref(struct intel_runtime_pm *rpm)
 {
-	struct i915_runtime_pm *rpm = &i915->runtime_pm;
 	depot_stack_handle_t stack, *stacks;
 	unsigned long flags;
 
-	atomic_inc(&rpm->wakeref_count);
-	assert_rpm_wakelock_held(i915);
-
-	if (!HAS_RUNTIME_PM(i915))
+	if (!rpm->available)
 		return -1;
 
 	stack = __save_depot_stack();
@@ -124,10 +113,9 @@ track_intel_runtime_pm_wakeref(struct drm_i915_private *i915)
 	return stack;
 }
 
-static void cancel_intel_runtime_pm_wakeref(struct drm_i915_private *i915,
-					    depot_stack_handle_t stack)
+static void untrack_intel_runtime_pm_wakeref(struct intel_runtime_pm *rpm,
+					     depot_stack_handle_t stack)
 {
-	struct i915_runtime_pm *rpm = &i915->runtime_pm;
 	unsigned long flags, n;
 	bool found = false;
 
@@ -220,41 +208,66 @@ __print_intel_runtime_pm_wakeref(struct drm_printer *p,
 }
 
 static noinline void
-untrack_intel_runtime_pm_wakeref(struct drm_i915_private *i915)
+__untrack_all_wakerefs(struct intel_runtime_pm_debug *debug,
+		       struct intel_runtime_pm_debug *saved)
 {
-	struct i915_runtime_pm *rpm = &i915->runtime_pm;
-	struct intel_runtime_pm_debug dbg = {};
-	struct drm_printer p;
-	unsigned long flags;
+	*saved = *debug;
 
-	assert_rpm_wakelock_held(i915);
-	if (atomic_dec_and_lock_irqsave(&rpm->wakeref_count,
-					&rpm->debug.lock,
-					flags)) {
-		dbg = rpm->debug;
+	debug->owners = NULL;
+	debug->count = 0;
+	debug->last_release = __save_depot_stack();
+}
 
-		rpm->debug.owners = NULL;
-		rpm->debug.count = 0;
-		rpm->debug.last_release = __save_depot_stack();
+static void
+dump_and_free_wakeref_tracking(struct intel_runtime_pm_debug *debug)
+{
+	struct drm_printer p;
 
-		spin_unlock_irqrestore(&rpm->debug.lock, flags);
-	}
-	if (!dbg.count)
+	if (!debug->count)
 		return;
 
 	p = drm_debug_printer("i915");
-	__print_intel_runtime_pm_wakeref(&p, &dbg);
+	__print_intel_runtime_pm_wakeref(&p, debug);
 
-	kfree(dbg.owners);
+	kfree(debug->owners);
 }
 
-void print_intel_runtime_pm_wakeref(struct drm_i915_private *i915,
+static noinline void
+__intel_wakeref_dec_and_check_tracking(struct intel_runtime_pm *rpm)
+{
+	struct intel_runtime_pm_debug dbg = {};
+	unsigned long flags;
+
+	if (!atomic_dec_and_lock_irqsave(&rpm->wakeref_count,
+					 &rpm->debug.lock,
+					 flags))
+		return;
+
+	__untrack_all_wakerefs(&rpm->debug, &dbg);
+	spin_unlock_irqrestore(&rpm->debug.lock, flags);
+
+	dump_and_free_wakeref_tracking(&dbg);
+}
+
+static noinline void
+untrack_all_intel_runtime_pm_wakerefs(struct intel_runtime_pm *rpm)
+{
+	struct intel_runtime_pm_debug dbg = {};
+	unsigned long flags;
+
+	spin_lock_irqsave(&rpm->debug.lock, flags);
+	__untrack_all_wakerefs(&rpm->debug, &dbg);
+	spin_unlock_irqrestore(&rpm->debug.lock, flags);
+
+	dump_and_free_wakeref_tracking(&dbg);
+}
+
+void print_intel_runtime_pm_wakeref(struct intel_runtime_pm *rpm,
 				    struct drm_printer *p)
 {
 	struct intel_runtime_pm_debug dbg = {};
 
 	do {
-		struct i915_runtime_pm *rpm = &i915->runtime_pm;
 		unsigned long alloc = dbg.count;
 		depot_stack_handle_t *s;
 
@@ -288,4041 +301,97 @@ out:
 
 #else
 
-static void init_intel_runtime_pm_wakeref(struct drm_i915_private *i915)
+static void init_intel_runtime_pm_wakeref(struct intel_runtime_pm *rpm)
 {
 }
 
 static depot_stack_handle_t
-track_intel_runtime_pm_wakeref(struct drm_i915_private *i915)
+track_intel_runtime_pm_wakeref(struct intel_runtime_pm *rpm)
 {
-	atomic_inc(&i915->runtime_pm.wakeref_count);
-	assert_rpm_wakelock_held(i915);
 	return -1;
 }
 
-static void untrack_intel_runtime_pm_wakeref(struct drm_i915_private *i915)
-{
-	assert_rpm_wakelock_held(i915);
-	atomic_dec(&i915->runtime_pm.wakeref_count);
-}
-
-#endif
-
-bool intel_display_power_well_is_enabled(struct drm_i915_private *dev_priv,
-					 enum i915_power_well_id power_well_id);
-
-const char *
-intel_display_power_domain_str(enum intel_display_power_domain domain)
-{
-	switch (domain) {
-	case POWER_DOMAIN_PIPE_A:
-		return "PIPE_A";
-	case POWER_DOMAIN_PIPE_B:
-		return "PIPE_B";
-	case POWER_DOMAIN_PIPE_C:
-		return "PIPE_C";
-	case POWER_DOMAIN_PIPE_A_PANEL_FITTER:
-		return "PIPE_A_PANEL_FITTER";
-	case POWER_DOMAIN_PIPE_B_PANEL_FITTER:
-		return "PIPE_B_PANEL_FITTER";
-	case POWER_DOMAIN_PIPE_C_PANEL_FITTER:
-		return "PIPE_C_PANEL_FITTER";
-	case POWER_DOMAIN_TRANSCODER_A:
-		return "TRANSCODER_A";
-	case POWER_DOMAIN_TRANSCODER_B:
-		return "TRANSCODER_B";
-	case POWER_DOMAIN_TRANSCODER_C:
-		return "TRANSCODER_C";
-	case POWER_DOMAIN_TRANSCODER_EDP:
-		return "TRANSCODER_EDP";
-	case POWER_DOMAIN_TRANSCODER_EDP_VDSC:
-		return "TRANSCODER_EDP_VDSC";
-	case POWER_DOMAIN_TRANSCODER_DSI_A:
-		return "TRANSCODER_DSI_A";
-	case POWER_DOMAIN_TRANSCODER_DSI_C:
-		return "TRANSCODER_DSI_C";
-	case POWER_DOMAIN_PORT_DDI_A_LANES:
-		return "PORT_DDI_A_LANES";
-	case POWER_DOMAIN_PORT_DDI_B_LANES:
-		return "PORT_DDI_B_LANES";
-	case POWER_DOMAIN_PORT_DDI_C_LANES:
-		return "PORT_DDI_C_LANES";
-	case POWER_DOMAIN_PORT_DDI_D_LANES:
-		return "PORT_DDI_D_LANES";
-	case POWER_DOMAIN_PORT_DDI_E_LANES:
-		return "PORT_DDI_E_LANES";
-	case POWER_DOMAIN_PORT_DDI_F_LANES:
-		return "PORT_DDI_F_LANES";
-	case POWER_DOMAIN_PORT_DDI_A_IO:
-		return "PORT_DDI_A_IO";
-	case POWER_DOMAIN_PORT_DDI_B_IO:
-		return "PORT_DDI_B_IO";
-	case POWER_DOMAIN_PORT_DDI_C_IO:
-		return "PORT_DDI_C_IO";
-	case POWER_DOMAIN_PORT_DDI_D_IO:
-		return "PORT_DDI_D_IO";
-	case POWER_DOMAIN_PORT_DDI_E_IO:
-		return "PORT_DDI_E_IO";
-	case POWER_DOMAIN_PORT_DDI_F_IO:
-		return "PORT_DDI_F_IO";
-	case POWER_DOMAIN_PORT_DSI:
-		return "PORT_DSI";
-	case POWER_DOMAIN_PORT_CRT:
-		return "PORT_CRT";
-	case POWER_DOMAIN_PORT_OTHER:
-		return "PORT_OTHER";
-	case POWER_DOMAIN_VGA:
-		return "VGA";
-	case POWER_DOMAIN_AUDIO:
-		return "AUDIO";
-	case POWER_DOMAIN_PLLS:
-		return "PLLS";
-	case POWER_DOMAIN_AUX_A:
-		return "AUX_A";
-	case POWER_DOMAIN_AUX_B:
-		return "AUX_B";
-	case POWER_DOMAIN_AUX_C:
-		return "AUX_C";
-	case POWER_DOMAIN_AUX_D:
-		return "AUX_D";
-	case POWER_DOMAIN_AUX_E:
-		return "AUX_E";
-	case POWER_DOMAIN_AUX_F:
-		return "AUX_F";
-	case POWER_DOMAIN_AUX_IO_A:
-		return "AUX_IO_A";
-	case POWER_DOMAIN_AUX_TBT1:
-		return "AUX_TBT1";
-	case POWER_DOMAIN_AUX_TBT2:
-		return "AUX_TBT2";
-	case POWER_DOMAIN_AUX_TBT3:
-		return "AUX_TBT3";
-	case POWER_DOMAIN_AUX_TBT4:
-		return "AUX_TBT4";
-	case POWER_DOMAIN_GMBUS:
-		return "GMBUS";
-	case POWER_DOMAIN_INIT:
-		return "INIT";
-	case POWER_DOMAIN_MODESET:
-		return "MODESET";
-	case POWER_DOMAIN_GT_IRQ:
-		return "GT_IRQ";
-	default:
-		MISSING_CASE(domain);
-		return "?";
-	}
-}
-
-static void intel_power_well_enable(struct drm_i915_private *dev_priv,
-				    struct i915_power_well *power_well)
-{
-	DRM_DEBUG_KMS("enabling %s\n", power_well->desc->name);
-	power_well->desc->ops->enable(dev_priv, power_well);
-	power_well->hw_enabled = true;
-}
-
-static void intel_power_well_disable(struct drm_i915_private *dev_priv,
-				     struct i915_power_well *power_well)
-{
-	DRM_DEBUG_KMS("disabling %s\n", power_well->desc->name);
-	power_well->hw_enabled = false;
-	power_well->desc->ops->disable(dev_priv, power_well);
-}
-
-static void intel_power_well_get(struct drm_i915_private *dev_priv,
-				 struct i915_power_well *power_well)
-{
-	if (!power_well->count++)
-		intel_power_well_enable(dev_priv, power_well);
-}
-
-static void intel_power_well_put(struct drm_i915_private *dev_priv,
-				 struct i915_power_well *power_well)
-{
-	WARN(!power_well->count, "Use count on power well %s is already zero",
-	     power_well->desc->name);
-
-	if (!--power_well->count)
-		intel_power_well_disable(dev_priv, power_well);
-}
-
-/**
- * __intel_display_power_is_enabled - unlocked check for a power domain
- * @dev_priv: i915 device instance
- * @domain: power domain to check
- *
- * This is the unlocked version of intel_display_power_is_enabled() and should
- * only be used from error capture and recovery code where deadlocks are
- * possible.
- *
- * Returns:
- * True when the power domain is enabled, false otherwise.
- */
-bool __intel_display_power_is_enabled(struct drm_i915_private *dev_priv,
-				      enum intel_display_power_domain domain)
-{
-	struct i915_power_well *power_well;
-	bool is_enabled;
-
-	if (dev_priv->runtime_pm.suspended)
-		return false;
-
-	is_enabled = true;
-
-	for_each_power_domain_well_reverse(dev_priv, power_well, BIT_ULL(domain)) {
-		if (power_well->desc->always_on)
-			continue;
-
-		if (!power_well->hw_enabled) {
-			is_enabled = false;
-			break;
-		}
-	}
-
-	return is_enabled;
-}
-
-/**
- * intel_display_power_is_enabled - check for a power domain
- * @dev_priv: i915 device instance
- * @domain: power domain to check
- *
- * This function can be used to check the hw power domain state. It is mostly
- * used in hardware state readout functions. Everywhere else code should rely
- * upon explicit power domain reference counting to ensure that the hardware
- * block is powered up before accessing it.
- *
- * Callers must hold the relevant modesetting locks to ensure that concurrent
- * threads can't disable the power well while the caller tries to read a few
- * registers.
- *
- * Returns:
- * True when the power domain is enabled, false otherwise.
- */
-bool intel_display_power_is_enabled(struct drm_i915_private *dev_priv,
-				    enum intel_display_power_domain domain)
-{
-	struct i915_power_domains *power_domains;
-	bool ret;
-
-	power_domains = &dev_priv->power_domains;
-
-	mutex_lock(&power_domains->lock);
-	ret = __intel_display_power_is_enabled(dev_priv, domain);
-	mutex_unlock(&power_domains->lock);
-
-	return ret;
-}
-
-/*
- * Starting with Haswell, we have a "Power Down Well" that can be turned off
- * when not needed anymore. We have 4 registers that can request the power well
- * to be enabled, and it will only be disabled if none of the registers is
- * requesting it to be enabled.
- */
-static void hsw_power_well_post_enable(struct drm_i915_private *dev_priv,
-				       u8 irq_pipe_mask, bool has_vga)
-{
-	struct pci_dev *pdev = dev_priv->drm.pdev;
-
-	/*
-	 * After we re-enable the power well, if we touch VGA register 0x3d5
-	 * we'll get unclaimed register interrupts. This stops after we write
-	 * anything to the VGA MSR register. The vgacon module uses this
-	 * register all the time, so if we unbind our driver and, as a
-	 * consequence, bind vgacon, we'll get stuck in an infinite loop at
-	 * console_unlock(). So make here we touch the VGA MSR register, making
-	 * sure vgacon can keep working normally without triggering interrupts
-	 * and error messages.
-	 */
-	if (has_vga) {
-		vga_get_uninterruptible(pdev, VGA_RSRC_LEGACY_IO);
-		outb(inb(VGA_MSR_READ), VGA_MSR_WRITE);
-		vga_put(pdev, VGA_RSRC_LEGACY_IO);
-	}
-
-	if (irq_pipe_mask)
-		gen8_irq_power_well_post_enable(dev_priv, irq_pipe_mask);
-}
-
-static void hsw_power_well_pre_disable(struct drm_i915_private *dev_priv,
-				       u8 irq_pipe_mask)
-{
-	if (irq_pipe_mask)
-		gen8_irq_power_well_pre_disable(dev_priv, irq_pipe_mask);
-}
-
-
-static void hsw_wait_for_power_well_enable(struct drm_i915_private *dev_priv,
-					   struct i915_power_well *power_well)
-{
-	const struct i915_power_well_regs *regs = power_well->desc->hsw.regs;
-	int pw_idx = power_well->desc->hsw.idx;
-
-	/* Timeout for PW1:10 us, AUX:not specified, other PWs:20 us. */
-	WARN_ON(intel_wait_for_register(&dev_priv->uncore,
-					regs->driver,
-					HSW_PWR_WELL_CTL_STATE(pw_idx),
-					HSW_PWR_WELL_CTL_STATE(pw_idx),
-					1));
-}
-
-static u32 hsw_power_well_requesters(struct drm_i915_private *dev_priv,
-				     const struct i915_power_well_regs *regs,
-				     int pw_idx)
+static void untrack_intel_runtime_pm_wakeref(struct intel_runtime_pm *rpm,
+					     intel_wakeref_t wref)
 {
-	u32 req_mask = HSW_PWR_WELL_CTL_REQ(pw_idx);
-	u32 ret;
-
-	ret = I915_READ(regs->bios) & req_mask ? 1 : 0;
-	ret |= I915_READ(regs->driver) & req_mask ? 2 : 0;
-	if (regs->kvmr.reg)
-		ret |= I915_READ(regs->kvmr) & req_mask ? 4 : 0;
-	ret |= I915_READ(regs->debug) & req_mask ? 8 : 0;
-
-	return ret;
-}
-
-static void hsw_wait_for_power_well_disable(struct drm_i915_private *dev_priv,
-					    struct i915_power_well *power_well)
-{
-	const struct i915_power_well_regs *regs = power_well->desc->hsw.regs;
-	int pw_idx = power_well->desc->hsw.idx;
-	bool disabled;
-	u32 reqs;
-
-	/*
-	 * Bspec doesn't require waiting for PWs to get disabled, but still do
-	 * this for paranoia. The known cases where a PW will be forced on:
-	 * - a KVMR request on any power well via the KVMR request register
-	 * - a DMC request on PW1 and MISC_IO power wells via the BIOS and
-	 *   DEBUG request registers
-	 * Skip the wait in case any of the request bits are set and print a
-	 * diagnostic message.
-	 */
-	wait_for((disabled = !(I915_READ(regs->driver) &
-			       HSW_PWR_WELL_CTL_STATE(pw_idx))) ||
-		 (reqs = hsw_power_well_requesters(dev_priv, regs, pw_idx)), 1);
-	if (disabled)
-		return;
-
-	DRM_DEBUG_KMS("%s forced on (bios:%d driver:%d kvmr:%d debug:%d)\n",
-		      power_well->desc->name,
-		      !!(reqs & 1), !!(reqs & 2), !!(reqs & 4), !!(reqs & 8));
-}
-
-static void gen9_wait_for_power_well_fuses(struct drm_i915_private *dev_priv,
-					   enum skl_power_gate pg)
-{
-	/* Timeout 5us for PG#0, for other PGs 1us */
-	WARN_ON(intel_wait_for_register(&dev_priv->uncore, SKL_FUSE_STATUS,
-					SKL_FUSE_PG_DIST_STATUS(pg),
-					SKL_FUSE_PG_DIST_STATUS(pg), 1));
-}
-
-static void hsw_power_well_enable(struct drm_i915_private *dev_priv,
-				  struct i915_power_well *power_well)
-{
-	const struct i915_power_well_regs *regs = power_well->desc->hsw.regs;
-	int pw_idx = power_well->desc->hsw.idx;
-	bool wait_fuses = power_well->desc->hsw.has_fuses;
-	enum skl_power_gate uninitialized_var(pg);
-	u32 val;
-
-	if (wait_fuses) {
-		pg = INTEL_GEN(dev_priv) >= 11 ? ICL_PW_CTL_IDX_TO_PG(pw_idx) :
-						 SKL_PW_CTL_IDX_TO_PG(pw_idx);
-		/*
-		 * For PW1 we have to wait both for the PW0/PG0 fuse state
-		 * before enabling the power well and PW1/PG1's own fuse
-		 * state after the enabling. For all other power wells with
-		 * fuses we only have to wait for that PW/PG's fuse state
-		 * after the enabling.
-		 */
-		if (pg == SKL_PG1)
-			gen9_wait_for_power_well_fuses(dev_priv, SKL_PG0);
-	}
-
-	val = I915_READ(regs->driver);
-	I915_WRITE(regs->driver, val | HSW_PWR_WELL_CTL_REQ(pw_idx));
-	hsw_wait_for_power_well_enable(dev_priv, power_well);
-
-	/* Display WA #1178: cnl */
-	if (IS_CANNONLAKE(dev_priv) &&
-	    pw_idx >= GLK_PW_CTL_IDX_AUX_B &&
-	    pw_idx <= CNL_PW_CTL_IDX_AUX_F) {
-		val = I915_READ(CNL_AUX_ANAOVRD1(pw_idx));
-		val |= CNL_AUX_ANAOVRD1_ENABLE | CNL_AUX_ANAOVRD1_LDO_BYPASS;
-		I915_WRITE(CNL_AUX_ANAOVRD1(pw_idx), val);
-	}
-
-	if (wait_fuses)
-		gen9_wait_for_power_well_fuses(dev_priv, pg);
-
-	hsw_power_well_post_enable(dev_priv,
-				   power_well->desc->hsw.irq_pipe_mask,
-				   power_well->desc->hsw.has_vga);
-}
-
-static void hsw_power_well_disable(struct drm_i915_private *dev_priv,
-				   struct i915_power_well *power_well)
-{
-	const struct i915_power_well_regs *regs = power_well->desc->hsw.regs;
-	int pw_idx = power_well->desc->hsw.idx;
-	u32 val;
-
-	hsw_power_well_pre_disable(dev_priv,
-				   power_well->desc->hsw.irq_pipe_mask);
-
-	val = I915_READ(regs->driver);
-	I915_WRITE(regs->driver, val & ~HSW_PWR_WELL_CTL_REQ(pw_idx));
-	hsw_wait_for_power_well_disable(dev_priv, power_well);
 }
 
-#define ICL_AUX_PW_TO_PORT(pw_idx)	((pw_idx) - ICL_PW_CTL_IDX_AUX_A)
-
 static void
-icl_combo_phy_aux_power_well_enable(struct drm_i915_private *dev_priv,
-				    struct i915_power_well *power_well)
+__intel_wakeref_dec_and_check_tracking(struct intel_runtime_pm *rpm)
 {
-	const struct i915_power_well_regs *regs = power_well->desc->hsw.regs;
-	int pw_idx = power_well->desc->hsw.idx;
-	enum port port = ICL_AUX_PW_TO_PORT(pw_idx);
-	u32 val;
-
-	val = I915_READ(regs->driver);
-	I915_WRITE(regs->driver, val | HSW_PWR_WELL_CTL_REQ(pw_idx));
-
-	val = I915_READ(ICL_PORT_CL_DW12(port));
-	I915_WRITE(ICL_PORT_CL_DW12(port), val | ICL_LANE_ENABLE_AUX);
-
-	hsw_wait_for_power_well_enable(dev_priv, power_well);
-
-	/* Display WA #1178: icl */
-	if (IS_ICELAKE(dev_priv) &&
-	    pw_idx >= ICL_PW_CTL_IDX_AUX_A && pw_idx <= ICL_PW_CTL_IDX_AUX_B &&
-	    !intel_bios_is_port_edp(dev_priv, port)) {
-		val = I915_READ(ICL_AUX_ANAOVRD1(pw_idx));
-		val |= ICL_AUX_ANAOVRD1_ENABLE | ICL_AUX_ANAOVRD1_LDO_BYPASS;
-		I915_WRITE(ICL_AUX_ANAOVRD1(pw_idx), val);
-	}
+	atomic_dec(&rpm->wakeref_count);
 }
 
 static void
-icl_combo_phy_aux_power_well_disable(struct drm_i915_private *dev_priv,
-				     struct i915_power_well *power_well)
+untrack_all_intel_runtime_pm_wakerefs(struct intel_runtime_pm *rpm)
 {
-	const struct i915_power_well_regs *regs = power_well->desc->hsw.regs;
-	int pw_idx = power_well->desc->hsw.idx;
-	enum port port = ICL_AUX_PW_TO_PORT(pw_idx);
-	u32 val;
-
-	val = I915_READ(ICL_PORT_CL_DW12(port));
-	I915_WRITE(ICL_PORT_CL_DW12(port), val & ~ICL_LANE_ENABLE_AUX);
-
-	val = I915_READ(regs->driver);
-	I915_WRITE(regs->driver, val & ~HSW_PWR_WELL_CTL_REQ(pw_idx));
-
-	hsw_wait_for_power_well_disable(dev_priv, power_well);
 }
 
-#define ICL_AUX_PW_TO_CH(pw_idx)	\
-	((pw_idx) - ICL_PW_CTL_IDX_AUX_A + AUX_CH_A)
+#endif
 
 static void
-icl_tc_phy_aux_power_well_enable(struct drm_i915_private *dev_priv,
-				 struct i915_power_well *power_well)
-{
-	enum aux_ch aux_ch = ICL_AUX_PW_TO_CH(power_well->desc->hsw.idx);
-	u32 val;
-
-	val = I915_READ(DP_AUX_CH_CTL(aux_ch));
-	val &= ~DP_AUX_CH_CTL_TBT_IO;
-	if (power_well->desc->hsw.is_tc_tbt)
-		val |= DP_AUX_CH_CTL_TBT_IO;
-	I915_WRITE(DP_AUX_CH_CTL(aux_ch), val);
-
-	hsw_power_well_enable(dev_priv, power_well);
-}
-
-/*
- * We should only use the power well if we explicitly asked the hardware to
- * enable it, so check if it's enabled and also check if we've requested it to
- * be enabled.
- */
-static bool hsw_power_well_enabled(struct drm_i915_private *dev_priv,
-				   struct i915_power_well *power_well)
-{
-	const struct i915_power_well_regs *regs = power_well->desc->hsw.regs;
-	enum i915_power_well_id id = power_well->desc->id;
-	int pw_idx = power_well->desc->hsw.idx;
-	u32 mask = HSW_PWR_WELL_CTL_REQ(pw_idx) |
-		   HSW_PWR_WELL_CTL_STATE(pw_idx);
-	u32 val;
-
-	val = I915_READ(regs->driver);
-
-	/*
-	 * On GEN9 big core due to a DMC bug the driver's request bits for PW1
-	 * and the MISC_IO PW will be not restored, so check instead for the
-	 * BIOS's own request bits, which are forced-on for these power wells
-	 * when exiting DC5/6.
-	 */
-	if (IS_GEN(dev_priv, 9) && !IS_GEN9_LP(dev_priv) &&
-	    (id == SKL_DISP_PW_1 || id == SKL_DISP_PW_MISC_IO))
-		val |= I915_READ(regs->bios);
-
-	return (val & mask) == mask;
-}
-
-static void assert_can_enable_dc9(struct drm_i915_private *dev_priv)
-{
-	WARN_ONCE((I915_READ(DC_STATE_EN) & DC_STATE_EN_DC9),
-		  "DC9 already programmed to be enabled.\n");
-	WARN_ONCE(I915_READ(DC_STATE_EN) & DC_STATE_EN_UPTO_DC5,
-		  "DC5 still not disabled to enable DC9.\n");
-	WARN_ONCE(I915_READ(HSW_PWR_WELL_CTL2) &
-		  HSW_PWR_WELL_CTL_REQ(SKL_PW_CTL_IDX_PW_2),
-		  "Power well 2 on.\n");
-	WARN_ONCE(intel_irqs_enabled(dev_priv),
-		  "Interrupts not disabled yet.\n");
-
-	 /*
-	  * TODO: check for the following to verify the conditions to enter DC9
-	  * state are satisfied:
-	  * 1] Check relevant display engine registers to verify if mode set
-	  * disable sequence was followed.
-	  * 2] Check if display uninitialize sequence is initialized.
-	  */
-}
-
-static void assert_can_disable_dc9(struct drm_i915_private *dev_priv)
-{
-	WARN_ONCE(intel_irqs_enabled(dev_priv),
-		  "Interrupts not disabled yet.\n");
-	WARN_ONCE(I915_READ(DC_STATE_EN) & DC_STATE_EN_UPTO_DC5,
-		  "DC5 still not disabled.\n");
-
-	 /*
-	  * TODO: check for the following to verify DC9 state was indeed
-	  * entered before programming to disable it:
-	  * 1] Check relevant display engine registers to verify if mode
-	  *  set disable sequence was followed.
-	  * 2] Check if display uninitialize sequence is initialized.
-	  */
-}
-
-static void gen9_write_dc_state(struct drm_i915_private *dev_priv,
-				u32 state)
+intel_runtime_pm_acquire(struct intel_runtime_pm *rpm, bool wakelock)
 {
-	int rewrites = 0;
-	int rereads = 0;
-	u32 v;
-
-	I915_WRITE(DC_STATE_EN, state);
-
-	/* It has been observed that disabling the dc6 state sometimes
-	 * doesn't stick and dmc keeps returning old value. Make sure
-	 * the write really sticks enough times and also force rewrite until
-	 * we are confident that state is exactly what we want.
-	 */
-	do  {
-		v = I915_READ(DC_STATE_EN);
-
-		if (v != state) {
-			I915_WRITE(DC_STATE_EN, state);
-			rewrites++;
-			rereads = 0;
-		} else if (rereads++ > 5) {
-			break;
-		}
-
-	} while (rewrites < 100);
-
-	if (v != state)
-		DRM_ERROR("Writing dc state to 0x%x failed, now 0x%x\n",
-			  state, v);
-
-	/* Most of the times we need one retry, avoid spam */
-	if (rewrites > 1)
-		DRM_DEBUG_KMS("Rewrote dc state to 0x%x %d times\n",
-			      state, rewrites);
-}
-
-static u32 gen9_dc_mask(struct drm_i915_private *dev_priv)
-{
-	u32 mask;
-
-	mask = DC_STATE_EN_UPTO_DC5;
-	if (INTEL_GEN(dev_priv) >= 11)
-		mask |= DC_STATE_EN_UPTO_DC6 | DC_STATE_EN_DC9;
-	else if (IS_GEN9_LP(dev_priv))
-		mask |= DC_STATE_EN_DC9;
-	else
-		mask |= DC_STATE_EN_UPTO_DC6;
-
-	return mask;
-}
-
-void gen9_sanitize_dc_state(struct drm_i915_private *dev_priv)
-{
-	u32 val;
-
-	val = I915_READ(DC_STATE_EN) & gen9_dc_mask(dev_priv);
-
-	DRM_DEBUG_KMS("Resetting DC state tracking from %02x to %02x\n",
-		      dev_priv->csr.dc_state, val);
-	dev_priv->csr.dc_state = val;
-}
-
-/**
- * gen9_set_dc_state - set target display C power state
- * @dev_priv: i915 device instance
- * @state: target DC power state
- * - DC_STATE_DISABLE
- * - DC_STATE_EN_UPTO_DC5
- * - DC_STATE_EN_UPTO_DC6
- * - DC_STATE_EN_DC9
- *
- * Signal to DMC firmware/HW the target DC power state passed in @state.
- * DMC/HW can turn off individual display clocks and power rails when entering
- * a deeper DC power state (higher in number) and turns these back when exiting
- * that state to a shallower power state (lower in number). The HW will decide
- * when to actually enter a given state on an on-demand basis, for instance
- * depending on the active state of display pipes. The state of display
- * registers backed by affected power rails are saved/restored as needed.
- *
- * Based on the above enabling a deeper DC power state is asynchronous wrt.
- * enabling it. Disabling a deeper power state is synchronous: for instance
- * setting %DC_STATE_DISABLE won't complete until all HW resources are turned
- * back on and register state is restored. This is guaranteed by the MMIO write
- * to DC_STATE_EN blocking until the state is restored.
- */
-static void gen9_set_dc_state(struct drm_i915_private *dev_priv, u32 state)
-{
-	u32 val;
-	u32 mask;
-
-	if (WARN_ON_ONCE(state & ~dev_priv->csr.allowed_dc_mask))
-		state &= dev_priv->csr.allowed_dc_mask;
-
-	val = I915_READ(DC_STATE_EN);
-	mask = gen9_dc_mask(dev_priv);
-	DRM_DEBUG_KMS("Setting DC state from %02x to %02x\n",
-		      val & mask, state);
-
-	/* Check if DMC is ignoring our DC state requests */
-	if ((val & mask) != dev_priv->csr.dc_state)
-		DRM_ERROR("DC state mismatch (0x%x -> 0x%x)\n",
-			  dev_priv->csr.dc_state, val & mask);
-
-	val &= ~mask;
-	val |= state;
-
-	gen9_write_dc_state(dev_priv, val);
-
-	dev_priv->csr.dc_state = val & mask;
-}
-
-void bxt_enable_dc9(struct drm_i915_private *dev_priv)
-{
-	assert_can_enable_dc9(dev_priv);
-
-	DRM_DEBUG_KMS("Enabling DC9\n");
-	/*
-	 * Power sequencer reset is not needed on
-	 * platforms with South Display Engine on PCH,
-	 * because PPS registers are always on.
-	 */
-	if (!HAS_PCH_SPLIT(dev_priv))
-		intel_power_sequencer_reset(dev_priv);
-	gen9_set_dc_state(dev_priv, DC_STATE_EN_DC9);
-}
-
-void bxt_disable_dc9(struct drm_i915_private *dev_priv)
-{
-	assert_can_disable_dc9(dev_priv);
-
-	DRM_DEBUG_KMS("Disabling DC9\n");
-
-	gen9_set_dc_state(dev_priv, DC_STATE_DISABLE);
-
-	intel_pps_unlock_regs_wa(dev_priv);
-}
-
-static void assert_csr_loaded(struct drm_i915_private *dev_priv)
-{
-	WARN_ONCE(!I915_READ(CSR_PROGRAM(0)),
-		  "CSR program storage start is NULL\n");
-	WARN_ONCE(!I915_READ(CSR_SSP_BASE), "CSR SSP Base Not fine\n");
-	WARN_ONCE(!I915_READ(CSR_HTP_SKL), "CSR HTP Not fine\n");
-}
-
-static struct i915_power_well *
-lookup_power_well(struct drm_i915_private *dev_priv,
-		  enum i915_power_well_id power_well_id)
-{
-	struct i915_power_well *power_well;
-
-	for_each_power_well(dev_priv, power_well)
-		if (power_well->desc->id == power_well_id)
-			return power_well;
-
-	/*
-	 * It's not feasible to add error checking code to the callers since
-	 * this condition really shouldn't happen and it doesn't even make sense
-	 * to abort things like display initialization sequences. Just return
-	 * the first power well and hope the WARN gets reported so we can fix
-	 * our driver.
-	 */
-	WARN(1, "Power well %d not defined for this platform\n", power_well_id);
-	return &dev_priv->power_domains.power_wells[0];
-}
-
-static void assert_can_enable_dc5(struct drm_i915_private *dev_priv)
-{
-	bool pg2_enabled = intel_display_power_well_is_enabled(dev_priv,
-					SKL_DISP_PW_2);
-
-	WARN_ONCE(pg2_enabled, "PG2 not disabled to enable DC5.\n");
-
-	WARN_ONCE((I915_READ(DC_STATE_EN) & DC_STATE_EN_UPTO_DC5),
-		  "DC5 already programmed to be enabled.\n");
-	assert_rpm_wakelock_held(dev_priv);
-
-	assert_csr_loaded(dev_priv);
-}
-
-void gen9_enable_dc5(struct drm_i915_private *dev_priv)
-{
-	assert_can_enable_dc5(dev_priv);
-
-	DRM_DEBUG_KMS("Enabling DC5\n");
-
-	/* Wa Display #1183: skl,kbl,cfl */
-	if (IS_GEN9_BC(dev_priv))
-		I915_WRITE(GEN8_CHICKEN_DCPR_1, I915_READ(GEN8_CHICKEN_DCPR_1) |
-			   SKL_SELECT_ALTERNATE_DC_EXIT);
-
-	gen9_set_dc_state(dev_priv, DC_STATE_EN_UPTO_DC5);
-}
-
-static void assert_can_enable_dc6(struct drm_i915_private *dev_priv)
-{
-	WARN_ONCE(I915_READ(UTIL_PIN_CTL) & UTIL_PIN_ENABLE,
-		  "Backlight is not disabled.\n");
-	WARN_ONCE((I915_READ(DC_STATE_EN) & DC_STATE_EN_UPTO_DC6),
-		  "DC6 already programmed to be enabled.\n");
-
-	assert_csr_loaded(dev_priv);
-}
-
-void skl_enable_dc6(struct drm_i915_private *dev_priv)
-{
-	assert_can_enable_dc6(dev_priv);
-
-	DRM_DEBUG_KMS("Enabling DC6\n");
-
-	/* Wa Display #1183: skl,kbl,cfl */
-	if (IS_GEN9_BC(dev_priv))
-		I915_WRITE(GEN8_CHICKEN_DCPR_1, I915_READ(GEN8_CHICKEN_DCPR_1) |
-			   SKL_SELECT_ALTERNATE_DC_EXIT);
-
-	gen9_set_dc_state(dev_priv, DC_STATE_EN_UPTO_DC6);
-}
-
-static void hsw_power_well_sync_hw(struct drm_i915_private *dev_priv,
-				   struct i915_power_well *power_well)
-{
-	const struct i915_power_well_regs *regs = power_well->desc->hsw.regs;
-	int pw_idx = power_well->desc->hsw.idx;
-	u32 mask = HSW_PWR_WELL_CTL_REQ(pw_idx);
-	u32 bios_req = I915_READ(regs->bios);
-
-	/* Take over the request bit if set by BIOS. */
-	if (bios_req & mask) {
-		u32 drv_req = I915_READ(regs->driver);
-
-		if (!(drv_req & mask))
-			I915_WRITE(regs->driver, drv_req | mask);
-		I915_WRITE(regs->bios, bios_req & ~mask);
-	}
-}
-
-static void bxt_dpio_cmn_power_well_enable(struct drm_i915_private *dev_priv,
-					   struct i915_power_well *power_well)
-{
-	bxt_ddi_phy_init(dev_priv, power_well->desc->bxt.phy);
-}
-
-static void bxt_dpio_cmn_power_well_disable(struct drm_i915_private *dev_priv,
-					    struct i915_power_well *power_well)
-{
-	bxt_ddi_phy_uninit(dev_priv, power_well->desc->bxt.phy);
-}
-
-static bool bxt_dpio_cmn_power_well_enabled(struct drm_i915_private *dev_priv,
-					    struct i915_power_well *power_well)
-{
-	return bxt_ddi_phy_is_enabled(dev_priv, power_well->desc->bxt.phy);
-}
-
-static void bxt_verify_ddi_phy_power_wells(struct drm_i915_private *dev_priv)
-{
-	struct i915_power_well *power_well;
-
-	power_well = lookup_power_well(dev_priv, BXT_DISP_PW_DPIO_CMN_A);
-	if (power_well->count > 0)
-		bxt_ddi_phy_verify_state(dev_priv, power_well->desc->bxt.phy);
-
-	power_well = lookup_power_well(dev_priv, VLV_DISP_PW_DPIO_CMN_BC);
-	if (power_well->count > 0)
-		bxt_ddi_phy_verify_state(dev_priv, power_well->desc->bxt.phy);
-
-	if (IS_GEMINILAKE(dev_priv)) {
-		power_well = lookup_power_well(dev_priv,
-					       GLK_DISP_PW_DPIO_CMN_C);
-		if (power_well->count > 0)
-			bxt_ddi_phy_verify_state(dev_priv,
-						 power_well->desc->bxt.phy);
-	}
-}
-
-static bool gen9_dc_off_power_well_enabled(struct drm_i915_private *dev_priv,
-					   struct i915_power_well *power_well)
-{
-	return (I915_READ(DC_STATE_EN) & DC_STATE_EN_UPTO_DC5_DC6_MASK) == 0;
-}
-
-static void gen9_assert_dbuf_enabled(struct drm_i915_private *dev_priv)
-{
-	u32 tmp = I915_READ(DBUF_CTL);
-
-	WARN((tmp & (DBUF_POWER_STATE | DBUF_POWER_REQUEST)) !=
-	     (DBUF_POWER_STATE | DBUF_POWER_REQUEST),
-	     "Unexpected DBuf power power state (0x%08x)\n", tmp);
-}
-
-static void gen9_dc_off_power_well_enable(struct drm_i915_private *dev_priv,
-					  struct i915_power_well *power_well)
-{
-	struct intel_cdclk_state cdclk_state = {};
-
-	gen9_set_dc_state(dev_priv, DC_STATE_DISABLE);
-
-	dev_priv->display.get_cdclk(dev_priv, &cdclk_state);
-	/* Can't read out voltage_level so can't use intel_cdclk_changed() */
-	WARN_ON(intel_cdclk_needs_modeset(&dev_priv->cdclk.hw, &cdclk_state));
-
-	gen9_assert_dbuf_enabled(dev_priv);
-
-	if (IS_GEN9_LP(dev_priv))
-		bxt_verify_ddi_phy_power_wells(dev_priv);
-
-	if (INTEL_GEN(dev_priv) >= 11)
-		/*
-		 * DMC retains HW context only for port A, the other combo
-		 * PHY's HW context for port B is lost after DC transitions,
-		 * so we need to restore it manually.
-		 */
-		icl_combo_phys_init(dev_priv);
-}
-
-static void gen9_dc_off_power_well_disable(struct drm_i915_private *dev_priv,
-					   struct i915_power_well *power_well)
-{
-	if (!dev_priv->csr.dmc_payload)
-		return;
-
-	if (dev_priv->csr.allowed_dc_mask & DC_STATE_EN_UPTO_DC6)
-		skl_enable_dc6(dev_priv);
-	else if (dev_priv->csr.allowed_dc_mask & DC_STATE_EN_UPTO_DC5)
-		gen9_enable_dc5(dev_priv);
-}
-
-static void i9xx_power_well_sync_hw_noop(struct drm_i915_private *dev_priv,
-					 struct i915_power_well *power_well)
-{
-}
-
-static void i9xx_always_on_power_well_noop(struct drm_i915_private *dev_priv,
-					   struct i915_power_well *power_well)
-{
-}
-
-static bool i9xx_always_on_power_well_enabled(struct drm_i915_private *dev_priv,
-					     struct i915_power_well *power_well)
-{
-	return true;
-}
-
-static void i830_pipes_power_well_enable(struct drm_i915_private *dev_priv,
-					 struct i915_power_well *power_well)
-{
-	if ((I915_READ(PIPECONF(PIPE_A)) & PIPECONF_ENABLE) == 0)
-		i830_enable_pipe(dev_priv, PIPE_A);
-	if ((I915_READ(PIPECONF(PIPE_B)) & PIPECONF_ENABLE) == 0)
-		i830_enable_pipe(dev_priv, PIPE_B);
-}
-
-static void i830_pipes_power_well_disable(struct drm_i915_private *dev_priv,
-					  struct i915_power_well *power_well)
-{
-	i830_disable_pipe(dev_priv, PIPE_B);
-	i830_disable_pipe(dev_priv, PIPE_A);
-}
-
-static bool i830_pipes_power_well_enabled(struct drm_i915_private *dev_priv,
-					  struct i915_power_well *power_well)
-{
-	return I915_READ(PIPECONF(PIPE_A)) & PIPECONF_ENABLE &&
-		I915_READ(PIPECONF(PIPE_B)) & PIPECONF_ENABLE;
-}
-
-static void i830_pipes_power_well_sync_hw(struct drm_i915_private *dev_priv,
-					  struct i915_power_well *power_well)
-{
-	if (power_well->count > 0)
-		i830_pipes_power_well_enable(dev_priv, power_well);
-	else
-		i830_pipes_power_well_disable(dev_priv, power_well);
-}
-
-static void vlv_set_power_well(struct drm_i915_private *dev_priv,
-			       struct i915_power_well *power_well, bool enable)
-{
-	int pw_idx = power_well->desc->vlv.idx;
-	u32 mask;
-	u32 state;
-	u32 ctrl;
-
-	mask = PUNIT_PWRGT_MASK(pw_idx);
-	state = enable ? PUNIT_PWRGT_PWR_ON(pw_idx) :
-			 PUNIT_PWRGT_PWR_GATE(pw_idx);
-
-	mutex_lock(&dev_priv->pcu_lock);
-
-#define COND \
-	((vlv_punit_read(dev_priv, PUNIT_REG_PWRGT_STATUS) & mask) == state)
-
-	if (COND)
-		goto out;
-
-	ctrl = vlv_punit_read(dev_priv, PUNIT_REG_PWRGT_CTRL);
-	ctrl &= ~mask;
-	ctrl |= state;
-	vlv_punit_write(dev_priv, PUNIT_REG_PWRGT_CTRL, ctrl);
-
-	if (wait_for(COND, 100))
-		DRM_ERROR("timeout setting power well state %08x (%08x)\n",
-			  state,
-			  vlv_punit_read(dev_priv, PUNIT_REG_PWRGT_CTRL));
-
-#undef COND
-
-out:
-	mutex_unlock(&dev_priv->pcu_lock);
-}
-
-static void vlv_power_well_enable(struct drm_i915_private *dev_priv,
-				  struct i915_power_well *power_well)
-{
-	vlv_set_power_well(dev_priv, power_well, true);
-}
-
-static void vlv_power_well_disable(struct drm_i915_private *dev_priv,
-				   struct i915_power_well *power_well)
-{
-	vlv_set_power_well(dev_priv, power_well, false);
-}
-
-static bool vlv_power_well_enabled(struct drm_i915_private *dev_priv,
-				   struct i915_power_well *power_well)
-{
-	int pw_idx = power_well->desc->vlv.idx;
-	bool enabled = false;
-	u32 mask;
-	u32 state;
-	u32 ctrl;
-
-	mask = PUNIT_PWRGT_MASK(pw_idx);
-	ctrl = PUNIT_PWRGT_PWR_ON(pw_idx);
-
-	mutex_lock(&dev_priv->pcu_lock);
-
-	state = vlv_punit_read(dev_priv, PUNIT_REG_PWRGT_STATUS) & mask;
-	/*
-	 * We only ever set the power-on and power-gate states, anything
-	 * else is unexpected.
-	 */
-	WARN_ON(state != PUNIT_PWRGT_PWR_ON(pw_idx) &&
-		state != PUNIT_PWRGT_PWR_GATE(pw_idx));
-	if (state == ctrl)
-		enabled = true;
-
-	/*
-	 * A transient state at this point would mean some unexpected party
-	 * is poking at the power controls too.
-	 */
-	ctrl = vlv_punit_read(dev_priv, PUNIT_REG_PWRGT_CTRL) & mask;
-	WARN_ON(ctrl != state);
-
-	mutex_unlock(&dev_priv->pcu_lock);
-
-	return enabled;
-}
-
-static void vlv_init_display_clock_gating(struct drm_i915_private *dev_priv)
-{
-	u32 val;
-
-	/*
-	 * On driver load, a pipe may be active and driving a DSI display.
-	 * Preserve DPOUNIT_CLOCK_GATE_DISABLE to avoid the pipe getting stuck
-	 * (and never recovering) in this case. intel_dsi_post_disable() will
-	 * clear it when we turn off the display.
-	 */
-	val = I915_READ(DSPCLK_GATE_D);
-	val &= DPOUNIT_CLOCK_GATE_DISABLE;
-	val |= VRHUNIT_CLOCK_GATE_DISABLE;
-	I915_WRITE(DSPCLK_GATE_D, val);
-
-	/*
-	 * Disable trickle feed and enable pnd deadline calculation
-	 */
-	I915_WRITE(MI_ARB_VLV, MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE);
-	I915_WRITE(CBR1_VLV, 0);
-
-	WARN_ON(dev_priv->rawclk_freq == 0);
-
-	I915_WRITE(RAWCLK_FREQ_VLV,
-		   DIV_ROUND_CLOSEST(dev_priv->rawclk_freq, 1000));
-}
-
-static void vlv_display_power_well_init(struct drm_i915_private *dev_priv)
-{
-	struct intel_encoder *encoder;
-	enum pipe pipe;
-
-	/*
-	 * Enable the CRI clock source so we can get at the
-	 * display and the reference clock for VGA
-	 * hotplug / manual detection. Supposedly DSI also
-	 * needs the ref clock up and running.
-	 *
-	 * CHV DPLL B/C have some issues if VGA mode is enabled.
-	 */
-	for_each_pipe(dev_priv, pipe) {
-		u32 val = I915_READ(DPLL(pipe));
-
-		val |= DPLL_REF_CLK_ENABLE_VLV | DPLL_VGA_MODE_DIS;
-		if (pipe != PIPE_A)
-			val |= DPLL_INTEGRATED_CRI_CLK_VLV;
-
-		I915_WRITE(DPLL(pipe), val);
-	}
-
-	vlv_init_display_clock_gating(dev_priv);
-
-	spin_lock_irq(&dev_priv->irq_lock);
-	valleyview_enable_display_irqs(dev_priv);
-	spin_unlock_irq(&dev_priv->irq_lock);
-
-	/*
-	 * During driver initialization/resume we can avoid restoring the
-	 * part of the HW/SW state that will be inited anyway explicitly.
-	 */
-	if (dev_priv->power_domains.initializing)
-		return;
-
-	intel_hpd_init(dev_priv);
-
-	/* Re-enable the ADPA, if we have one */
-	for_each_intel_encoder(&dev_priv->drm, encoder) {
-		if (encoder->type == INTEL_OUTPUT_ANALOG)
-			intel_crt_reset(&encoder->base);
-	}
-
-	i915_redisable_vga_power_on(dev_priv);
-
-	intel_pps_unlock_regs_wa(dev_priv);
-}
-
-static void vlv_display_power_well_deinit(struct drm_i915_private *dev_priv)
-{
-	spin_lock_irq(&dev_priv->irq_lock);
-	valleyview_disable_display_irqs(dev_priv);
-	spin_unlock_irq(&dev_priv->irq_lock);
-
-	/* make sure we're done processing display irqs */
-	synchronize_irq(dev_priv->drm.irq);
-
-	intel_power_sequencer_reset(dev_priv);
-
-	/* Prevent us from re-enabling polling on accident in late suspend */
-	if (!dev_priv->drm.dev->power.is_suspended)
-		intel_hpd_poll_init(dev_priv);
-}
-
-static void vlv_display_power_well_enable(struct drm_i915_private *dev_priv,
-					  struct i915_power_well *power_well)
-{
-	vlv_set_power_well(dev_priv, power_well, true);
-
-	vlv_display_power_well_init(dev_priv);
-}
-
-static void vlv_display_power_well_disable(struct drm_i915_private *dev_priv,
-					   struct i915_power_well *power_well)
-{
-	vlv_display_power_well_deinit(dev_priv);
-
-	vlv_set_power_well(dev_priv, power_well, false);
-}
-
-static void vlv_dpio_cmn_power_well_enable(struct drm_i915_private *dev_priv,
-					   struct i915_power_well *power_well)
-{
-	/* since ref/cri clock was enabled */
-	udelay(1); /* >10ns for cmnreset, >0ns for sidereset */
-
-	vlv_set_power_well(dev_priv, power_well, true);
-
-	/*
-	 * From VLV2A0_DP_eDP_DPIO_driver_vbios_notes_10.docx -
-	 *  6.	De-assert cmn_reset/side_reset. Same as VLV X0.
-	 *   a.	GUnit 0x2110 bit[0] set to 1 (def 0)
-	 *   b.	The other bits such as sfr settings / modesel may all
-	 *	be set to 0.
-	 *
-	 * This should only be done on init and resume from S3 with
-	 * both PLLs disabled, or we risk losing DPIO and PLL
-	 * synchronization.
-	 */
-	I915_WRITE(DPIO_CTL, I915_READ(DPIO_CTL) | DPIO_CMNRST);
-}
-
-static void vlv_dpio_cmn_power_well_disable(struct drm_i915_private *dev_priv,
-					    struct i915_power_well *power_well)
-{
-	enum pipe pipe;
-
-	for_each_pipe(dev_priv, pipe)
-		assert_pll_disabled(dev_priv, pipe);
-
-	/* Assert common reset */
-	I915_WRITE(DPIO_CTL, I915_READ(DPIO_CTL) & ~DPIO_CMNRST);
-
-	vlv_set_power_well(dev_priv, power_well, false);
-}
-
-#define POWER_DOMAIN_MASK (GENMASK_ULL(POWER_DOMAIN_NUM - 1, 0))
-
-#define BITS_SET(val, bits) (((val) & (bits)) == (bits))
-
-static void assert_chv_phy_status(struct drm_i915_private *dev_priv)
-{
-	struct i915_power_well *cmn_bc =
-		lookup_power_well(dev_priv, VLV_DISP_PW_DPIO_CMN_BC);
-	struct i915_power_well *cmn_d =
-		lookup_power_well(dev_priv, CHV_DISP_PW_DPIO_CMN_D);
-	u32 phy_control = dev_priv->chv_phy_control;
-	u32 phy_status = 0;
-	u32 phy_status_mask = 0xffffffff;
-
-	/*
-	 * The BIOS can leave the PHY is some weird state
-	 * where it doesn't fully power down some parts.
-	 * Disable the asserts until the PHY has been fully
-	 * reset (ie. the power well has been disabled at
-	 * least once).
-	 */
-	if (!dev_priv->chv_phy_assert[DPIO_PHY0])
-		phy_status_mask &= ~(PHY_STATUS_CMN_LDO(DPIO_PHY0, DPIO_CH0) |
-				     PHY_STATUS_SPLINE_LDO(DPIO_PHY0, DPIO_CH0, 0) |
-				     PHY_STATUS_SPLINE_LDO(DPIO_PHY0, DPIO_CH0, 1) |
-				     PHY_STATUS_CMN_LDO(DPIO_PHY0, DPIO_CH1) |
-				     PHY_STATUS_SPLINE_LDO(DPIO_PHY0, DPIO_CH1, 0) |
-				     PHY_STATUS_SPLINE_LDO(DPIO_PHY0, DPIO_CH1, 1));
-
-	if (!dev_priv->chv_phy_assert[DPIO_PHY1])
-		phy_status_mask &= ~(PHY_STATUS_CMN_LDO(DPIO_PHY1, DPIO_CH0) |
-				     PHY_STATUS_SPLINE_LDO(DPIO_PHY1, DPIO_CH0, 0) |
-				     PHY_STATUS_SPLINE_LDO(DPIO_PHY1, DPIO_CH0, 1));
-
-	if (cmn_bc->desc->ops->is_enabled(dev_priv, cmn_bc)) {
-		phy_status |= PHY_POWERGOOD(DPIO_PHY0);
-
-		/* this assumes override is only used to enable lanes */
-		if ((phy_control & PHY_CH_POWER_DOWN_OVRD_EN(DPIO_PHY0, DPIO_CH0)) == 0)
-			phy_control |= PHY_CH_POWER_DOWN_OVRD(0xf, DPIO_PHY0, DPIO_CH0);
-
-		if ((phy_control & PHY_CH_POWER_DOWN_OVRD_EN(DPIO_PHY0, DPIO_CH1)) == 0)
-			phy_control |= PHY_CH_POWER_DOWN_OVRD(0xf, DPIO_PHY0, DPIO_CH1);
-
-		/* CL1 is on whenever anything is on in either channel */
-		if (BITS_SET(phy_control,
-			     PHY_CH_POWER_DOWN_OVRD(0xf, DPIO_PHY0, DPIO_CH0) |
-			     PHY_CH_POWER_DOWN_OVRD(0xf, DPIO_PHY0, DPIO_CH1)))
-			phy_status |= PHY_STATUS_CMN_LDO(DPIO_PHY0, DPIO_CH0);
-
-		/*
-		 * The DPLLB check accounts for the pipe B + port A usage
-		 * with CL2 powered up but all the lanes in the second channel
-		 * powered down.
-		 */
-		if (BITS_SET(phy_control,
-			     PHY_CH_POWER_DOWN_OVRD(0xf, DPIO_PHY0, DPIO_CH1)) &&
-		    (I915_READ(DPLL(PIPE_B)) & DPLL_VCO_ENABLE) == 0)
-			phy_status |= PHY_STATUS_CMN_LDO(DPIO_PHY0, DPIO_CH1);
-
-		if (BITS_SET(phy_control,
-			     PHY_CH_POWER_DOWN_OVRD(0x3, DPIO_PHY0, DPIO_CH0)))
-			phy_status |= PHY_STATUS_SPLINE_LDO(DPIO_PHY0, DPIO_CH0, 0);
-		if (BITS_SET(phy_control,
-			     PHY_CH_POWER_DOWN_OVRD(0xc, DPIO_PHY0, DPIO_CH0)))
-			phy_status |= PHY_STATUS_SPLINE_LDO(DPIO_PHY0, DPIO_CH0, 1);
-
-		if (BITS_SET(phy_control,
-			     PHY_CH_POWER_DOWN_OVRD(0x3, DPIO_PHY0, DPIO_CH1)))
-			phy_status |= PHY_STATUS_SPLINE_LDO(DPIO_PHY0, DPIO_CH1, 0);
-		if (BITS_SET(phy_control,
-			     PHY_CH_POWER_DOWN_OVRD(0xc, DPIO_PHY0, DPIO_CH1)))
-			phy_status |= PHY_STATUS_SPLINE_LDO(DPIO_PHY0, DPIO_CH1, 1);
-	}
-
-	if (cmn_d->desc->ops->is_enabled(dev_priv, cmn_d)) {
-		phy_status |= PHY_POWERGOOD(DPIO_PHY1);
-
-		/* this assumes override is only used to enable lanes */
-		if ((phy_control & PHY_CH_POWER_DOWN_OVRD_EN(DPIO_PHY1, DPIO_CH0)) == 0)
-			phy_control |= PHY_CH_POWER_DOWN_OVRD(0xf, DPIO_PHY1, DPIO_CH0);
-
-		if (BITS_SET(phy_control,
-			     PHY_CH_POWER_DOWN_OVRD(0xf, DPIO_PHY1, DPIO_CH0)))
-			phy_status |= PHY_STATUS_CMN_LDO(DPIO_PHY1, DPIO_CH0);
-
-		if (BITS_SET(phy_control,
-			     PHY_CH_POWER_DOWN_OVRD(0x3, DPIO_PHY1, DPIO_CH0)))
-			phy_status |= PHY_STATUS_SPLINE_LDO(DPIO_PHY1, DPIO_CH0, 0);
-		if (BITS_SET(phy_control,
-			     PHY_CH_POWER_DOWN_OVRD(0xc, DPIO_PHY1, DPIO_CH0)))
-			phy_status |= PHY_STATUS_SPLINE_LDO(DPIO_PHY1, DPIO_CH0, 1);
-	}
-
-	phy_status &= phy_status_mask;
-
-	/*
-	 * The PHY may be busy with some initial calibration and whatnot,
-	 * so the power state can take a while to actually change.
-	 */
-	if (intel_wait_for_register(&dev_priv->uncore,
-				    DISPLAY_PHY_STATUS,
-				    phy_status_mask,
-				    phy_status,
-				    10))
-		DRM_ERROR("Unexpected PHY_STATUS 0x%08x, expected 0x%08x (PHY_CONTROL=0x%08x)\n",
-			  I915_READ(DISPLAY_PHY_STATUS) & phy_status_mask,
-			   phy_status, dev_priv->chv_phy_control);
-}
-
-#undef BITS_SET
-
-static void chv_dpio_cmn_power_well_enable(struct drm_i915_private *dev_priv,
-					   struct i915_power_well *power_well)
-{
-	enum dpio_phy phy;
-	enum pipe pipe;
-	u32 tmp;
-
-	WARN_ON_ONCE(power_well->desc->id != VLV_DISP_PW_DPIO_CMN_BC &&
-		     power_well->desc->id != CHV_DISP_PW_DPIO_CMN_D);
-
-	if (power_well->desc->id == VLV_DISP_PW_DPIO_CMN_BC) {
-		pipe = PIPE_A;
-		phy = DPIO_PHY0;
+	if (wakelock) {
+		atomic_add(1 + INTEL_RPM_WAKELOCK_BIAS, &rpm->wakeref_count);
+		assert_rpm_wakelock_held(rpm);
 	} else {
-		pipe = PIPE_C;
-		phy = DPIO_PHY1;
+		atomic_inc(&rpm->wakeref_count);
+		assert_rpm_raw_wakeref_held(rpm);
 	}
-
-	/* since ref/cri clock was enabled */
-	udelay(1); /* >10ns for cmnreset, >0ns for sidereset */
-	vlv_set_power_well(dev_priv, power_well, true);
-
-	/* Poll for phypwrgood signal */
-	if (intel_wait_for_register(&dev_priv->uncore,
-				    DISPLAY_PHY_STATUS,
-				    PHY_POWERGOOD(phy),
-				    PHY_POWERGOOD(phy),
-				    1))
-		DRM_ERROR("Display PHY %d is not power up\n", phy);
-
-	mutex_lock(&dev_priv->sb_lock);
-
-	/* Enable dynamic power down */
-	tmp = vlv_dpio_read(dev_priv, pipe, CHV_CMN_DW28);
-	tmp |= DPIO_DYNPWRDOWNEN_CH0 | DPIO_CL1POWERDOWNEN |
-		DPIO_SUS_CLK_CONFIG_GATE_CLKREQ;
-	vlv_dpio_write(dev_priv, pipe, CHV_CMN_DW28, tmp);
-
-	if (power_well->desc->id == VLV_DISP_PW_DPIO_CMN_BC) {
-		tmp = vlv_dpio_read(dev_priv, pipe, _CHV_CMN_DW6_CH1);
-		tmp |= DPIO_DYNPWRDOWNEN_CH1;
-		vlv_dpio_write(dev_priv, pipe, _CHV_CMN_DW6_CH1, tmp);
-	} else {
-		/*
-		 * Force the non-existing CL2 off. BXT does this
-		 * too, so maybe it saves some power even though
-		 * CL2 doesn't exist?
-		 */
-		tmp = vlv_dpio_read(dev_priv, pipe, CHV_CMN_DW30);
-		tmp |= DPIO_CL2_LDOFUSE_PWRENB;
-		vlv_dpio_write(dev_priv, pipe, CHV_CMN_DW30, tmp);
-	}
-
-	mutex_unlock(&dev_priv->sb_lock);
-
-	dev_priv->chv_phy_control |= PHY_COM_LANE_RESET_DEASSERT(phy);
-	I915_WRITE(DISPLAY_PHY_CONTROL, dev_priv->chv_phy_control);
-
-	DRM_DEBUG_KMS("Enabled DPIO PHY%d (PHY_CONTROL=0x%08x)\n",
-		      phy, dev_priv->chv_phy_control);
-
-	assert_chv_phy_status(dev_priv);
-}
-
-static void chv_dpio_cmn_power_well_disable(struct drm_i915_private *dev_priv,
-					    struct i915_power_well *power_well)
-{
-	enum dpio_phy phy;
-
-	WARN_ON_ONCE(power_well->desc->id != VLV_DISP_PW_DPIO_CMN_BC &&
-		     power_well->desc->id != CHV_DISP_PW_DPIO_CMN_D);
-
-	if (power_well->desc->id == VLV_DISP_PW_DPIO_CMN_BC) {
-		phy = DPIO_PHY0;
-		assert_pll_disabled(dev_priv, PIPE_A);
-		assert_pll_disabled(dev_priv, PIPE_B);
-	} else {
-		phy = DPIO_PHY1;
-		assert_pll_disabled(dev_priv, PIPE_C);
-	}
-
-	dev_priv->chv_phy_control &= ~PHY_COM_LANE_RESET_DEASSERT(phy);
-	I915_WRITE(DISPLAY_PHY_CONTROL, dev_priv->chv_phy_control);
-
-	vlv_set_power_well(dev_priv, power_well, false);
-
-	DRM_DEBUG_KMS("Disabled DPIO PHY%d (PHY_CONTROL=0x%08x)\n",
-		      phy, dev_priv->chv_phy_control);
-
-	/* PHY is fully reset now, so we can enable the PHY state asserts */
-	dev_priv->chv_phy_assert[phy] = true;
-
-	assert_chv_phy_status(dev_priv);
-}
-
-static void assert_chv_phy_powergate(struct drm_i915_private *dev_priv, enum dpio_phy phy,
-				     enum dpio_channel ch, bool override, unsigned int mask)
-{
-	enum pipe pipe = phy == DPIO_PHY0 ? PIPE_A : PIPE_C;
-	u32 reg, val, expected, actual;
-
-	/*
-	 * The BIOS can leave the PHY is some weird state
-	 * where it doesn't fully power down some parts.
-	 * Disable the asserts until the PHY has been fully
-	 * reset (ie. the power well has been disabled at
-	 * least once).
-	 */
-	if (!dev_priv->chv_phy_assert[phy])
-		return;
-
-	if (ch == DPIO_CH0)
-		reg = _CHV_CMN_DW0_CH0;
-	else
-		reg = _CHV_CMN_DW6_CH1;
-
-	mutex_lock(&dev_priv->sb_lock);
-	val = vlv_dpio_read(dev_priv, pipe, reg);
-	mutex_unlock(&dev_priv->sb_lock);
-
-	/*
-	 * This assumes !override is only used when the port is disabled.
-	 * All lanes should power down even without the override when
-	 * the port is disabled.
-	 */
-	if (!override || mask == 0xf) {
-		expected = DPIO_ALLDL_POWERDOWN | DPIO_ANYDL_POWERDOWN;
-		/*
-		 * If CH1 common lane is not active anymore
-		 * (eg. for pipe B DPLL) the entire channel will
-		 * shut down, which causes the common lane registers
-		 * to read as 0. That means we can't actually check
-		 * the lane power down status bits, but as the entire
-		 * register reads as 0 it's a good indication that the
-		 * channel is indeed entirely powered down.
-		 */
-		if (ch == DPIO_CH1 && val == 0)
-			expected = 0;
-	} else if (mask != 0x0) {
-		expected = DPIO_ANYDL_POWERDOWN;
-	} else {
-		expected = 0;
-	}
-
-	if (ch == DPIO_CH0)
-		actual = val >> DPIO_ANYDL_POWERDOWN_SHIFT_CH0;
-	else
-		actual = val >> DPIO_ANYDL_POWERDOWN_SHIFT_CH1;
-	actual &= DPIO_ALLDL_POWERDOWN | DPIO_ANYDL_POWERDOWN;
-
-	WARN(actual != expected,
-	     "Unexpected DPIO lane power down: all %d, any %d. Expected: all %d, any %d. (0x%x = 0x%08x)\n",
-	     !!(actual & DPIO_ALLDL_POWERDOWN), !!(actual & DPIO_ANYDL_POWERDOWN),
-	     !!(expected & DPIO_ALLDL_POWERDOWN), !!(expected & DPIO_ANYDL_POWERDOWN),
-	     reg, val);
-}
-
-bool chv_phy_powergate_ch(struct drm_i915_private *dev_priv, enum dpio_phy phy,
-			  enum dpio_channel ch, bool override)
-{
-	struct i915_power_domains *power_domains = &dev_priv->power_domains;
-	bool was_override;
-
-	mutex_lock(&power_domains->lock);
-
-	was_override = dev_priv->chv_phy_control & PHY_CH_POWER_DOWN_OVRD_EN(phy, ch);
-
-	if (override == was_override)
-		goto out;
-
-	if (override)
-		dev_priv->chv_phy_control |= PHY_CH_POWER_DOWN_OVRD_EN(phy, ch);
-	else
-		dev_priv->chv_phy_control &= ~PHY_CH_POWER_DOWN_OVRD_EN(phy, ch);
-
-	I915_WRITE(DISPLAY_PHY_CONTROL, dev_priv->chv_phy_control);
-
-	DRM_DEBUG_KMS("Power gating DPIO PHY%d CH%d (DPIO_PHY_CONTROL=0x%08x)\n",
-		      phy, ch, dev_priv->chv_phy_control);
-
-	assert_chv_phy_status(dev_priv);
-
-out:
-	mutex_unlock(&power_domains->lock);
-
-	return was_override;
-}
-
-void chv_phy_powergate_lanes(struct intel_encoder *encoder,
-			     bool override, unsigned int mask)
-{
-	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
-	struct i915_power_domains *power_domains = &dev_priv->power_domains;
-	enum dpio_phy phy = vlv_dport_to_phy(enc_to_dig_port(&encoder->base));
-	enum dpio_channel ch = vlv_dport_to_channel(enc_to_dig_port(&encoder->base));
-
-	mutex_lock(&power_domains->lock);
-
-	dev_priv->chv_phy_control &= ~PHY_CH_POWER_DOWN_OVRD(0xf, phy, ch);
-	dev_priv->chv_phy_control |= PHY_CH_POWER_DOWN_OVRD(mask, phy, ch);
-
-	if (override)
-		dev_priv->chv_phy_control |= PHY_CH_POWER_DOWN_OVRD_EN(phy, ch);
-	else
-		dev_priv->chv_phy_control &= ~PHY_CH_POWER_DOWN_OVRD_EN(phy, ch);
-
-	I915_WRITE(DISPLAY_PHY_CONTROL, dev_priv->chv_phy_control);
-
-	DRM_DEBUG_KMS("Power gating DPIO PHY%d CH%d lanes 0x%x (PHY_CONTROL=0x%08x)\n",
-		      phy, ch, mask, dev_priv->chv_phy_control);
-
-	assert_chv_phy_status(dev_priv);
-
-	assert_chv_phy_powergate(dev_priv, phy, ch, override, mask);
-
-	mutex_unlock(&power_domains->lock);
-}
-
-static bool chv_pipe_power_well_enabled(struct drm_i915_private *dev_priv,
-					struct i915_power_well *power_well)
-{
-	enum pipe pipe = PIPE_A;
-	bool enabled;
-	u32 state, ctrl;
-
-	mutex_lock(&dev_priv->pcu_lock);
-
-	state = vlv_punit_read(dev_priv, PUNIT_REG_DSPSSPM) & DP_SSS_MASK(pipe);
-	/*
-	 * We only ever set the power-on and power-gate states, anything
-	 * else is unexpected.
-	 */
-	WARN_ON(state != DP_SSS_PWR_ON(pipe) && state != DP_SSS_PWR_GATE(pipe));
-	enabled = state == DP_SSS_PWR_ON(pipe);
-
-	/*
-	 * A transient state at this point would mean some unexpected party
-	 * is poking at the power controls too.
-	 */
-	ctrl = vlv_punit_read(dev_priv, PUNIT_REG_DSPSSPM) & DP_SSC_MASK(pipe);
-	WARN_ON(ctrl << 16 != state);
-
-	mutex_unlock(&dev_priv->pcu_lock);
-
-	return enabled;
-}
-
-static void chv_set_pipe_power_well(struct drm_i915_private *dev_priv,
-				    struct i915_power_well *power_well,
-				    bool enable)
-{
-	enum pipe pipe = PIPE_A;
-	u32 state;
-	u32 ctrl;
-
-	state = enable ? DP_SSS_PWR_ON(pipe) : DP_SSS_PWR_GATE(pipe);
-
-	mutex_lock(&dev_priv->pcu_lock);
-
-#define COND \
-	((vlv_punit_read(dev_priv, PUNIT_REG_DSPSSPM) & DP_SSS_MASK(pipe)) == state)
-
-	if (COND)
-		goto out;
-
-	ctrl = vlv_punit_read(dev_priv, PUNIT_REG_DSPSSPM);
-	ctrl &= ~DP_SSC_MASK(pipe);
-	ctrl |= enable ? DP_SSC_PWR_ON(pipe) : DP_SSC_PWR_GATE(pipe);
-	vlv_punit_write(dev_priv, PUNIT_REG_DSPSSPM, ctrl);
-
-	if (wait_for(COND, 100))
-		DRM_ERROR("timeout setting power well state %08x (%08x)\n",
-			  state,
-			  vlv_punit_read(dev_priv, PUNIT_REG_DSPSSPM));
-
-#undef COND
-
-out:
-	mutex_unlock(&dev_priv->pcu_lock);
-}
-
-static void chv_pipe_power_well_enable(struct drm_i915_private *dev_priv,
-				       struct i915_power_well *power_well)
-{
-	chv_set_pipe_power_well(dev_priv, power_well, true);
-
-	vlv_display_power_well_init(dev_priv);
-}
-
-static void chv_pipe_power_well_disable(struct drm_i915_private *dev_priv,
-					struct i915_power_well *power_well)
-{
-	vlv_display_power_well_deinit(dev_priv);
-
-	chv_set_pipe_power_well(dev_priv, power_well, false);
 }
 
 static void
-__intel_display_power_get_domain(struct drm_i915_private *dev_priv,
-				 enum intel_display_power_domain domain)
-{
-	struct i915_power_domains *power_domains = &dev_priv->power_domains;
-	struct i915_power_well *power_well;
-
-	for_each_power_domain_well(dev_priv, power_well, BIT_ULL(domain))
-		intel_power_well_get(dev_priv, power_well);
-
-	power_domains->domain_use_count[domain]++;
-}
-
-/**
- * intel_display_power_get - grab a power domain reference
- * @dev_priv: i915 device instance
- * @domain: power domain to reference
- *
- * This function grabs a power domain reference for @domain and ensures that the
- * power domain and all its parents are powered up. Therefore users should only
- * grab a reference to the innermost power domain they need.
- *
- * Any power domain reference obtained by this function must have a symmetric
- * call to intel_display_power_put() to release the reference again.
- */
-intel_wakeref_t intel_display_power_get(struct drm_i915_private *dev_priv,
-					enum intel_display_power_domain domain)
-{
-	struct i915_power_domains *power_domains = &dev_priv->power_domains;
-	intel_wakeref_t wakeref = intel_runtime_pm_get(dev_priv);
-
-	mutex_lock(&power_domains->lock);
-
-	__intel_display_power_get_domain(dev_priv, domain);
-
-	mutex_unlock(&power_domains->lock);
-
-	return wakeref;
-}
-
-/**
- * intel_display_power_get_if_enabled - grab a reference for an enabled display power domain
- * @dev_priv: i915 device instance
- * @domain: power domain to reference
- *
- * This function grabs a power domain reference for @domain and ensures that the
- * power domain and all its parents are powered up. Therefore users should only
- * grab a reference to the innermost power domain they need.
- *
- * Any power domain reference obtained by this function must have a symmetric
- * call to intel_display_power_put() to release the reference again.
- */
-intel_wakeref_t
-intel_display_power_get_if_enabled(struct drm_i915_private *dev_priv,
-				   enum intel_display_power_domain domain)
-{
-	struct i915_power_domains *power_domains = &dev_priv->power_domains;
-	intel_wakeref_t wakeref;
-	bool is_enabled;
-
-	wakeref = intel_runtime_pm_get_if_in_use(dev_priv);
-	if (!wakeref)
-		return false;
-
-	mutex_lock(&power_domains->lock);
-
-	if (__intel_display_power_is_enabled(dev_priv, domain)) {
-		__intel_display_power_get_domain(dev_priv, domain);
-		is_enabled = true;
-	} else {
-		is_enabled = false;
-	}
-
-	mutex_unlock(&power_domains->lock);
-
-	if (!is_enabled) {
-		intel_runtime_pm_put(dev_priv, wakeref);
-		wakeref = 0;
-	}
-
-	return wakeref;
-}
-
-static void __intel_display_power_put(struct drm_i915_private *dev_priv,
-				      enum intel_display_power_domain domain)
-{
-	struct i915_power_domains *power_domains;
-	struct i915_power_well *power_well;
-
-	power_domains = &dev_priv->power_domains;
-
-	mutex_lock(&power_domains->lock);
-
-	WARN(!power_domains->domain_use_count[domain],
-	     "Use count on domain %s is already zero\n",
-	     intel_display_power_domain_str(domain));
-	power_domains->domain_use_count[domain]--;
-
-	for_each_power_domain_well_reverse(dev_priv, power_well, BIT_ULL(domain))
-		intel_power_well_put(dev_priv, power_well);
-
-	mutex_unlock(&power_domains->lock);
-}
-
-/**
- * intel_display_power_put - release a power domain reference
- * @dev_priv: i915 device instance
- * @domain: power domain to reference
- *
- * This function drops the power domain reference obtained by
- * intel_display_power_get() and might power down the corresponding hardware
- * block right away if this is the last reference.
- */
-void intel_display_power_put_unchecked(struct drm_i915_private *dev_priv,
-				       enum intel_display_power_domain domain)
-{
-	__intel_display_power_put(dev_priv, domain);
-	intel_runtime_pm_put_unchecked(dev_priv);
-}
-
-#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_RUNTIME_PM)
-void intel_display_power_put(struct drm_i915_private *dev_priv,
-			     enum intel_display_power_domain domain,
-			     intel_wakeref_t wakeref)
-{
-	__intel_display_power_put(dev_priv, domain);
-	intel_runtime_pm_put(dev_priv, wakeref);
-}
-#endif
-
-#define I830_PIPES_POWER_DOMAINS (		\
-	BIT_ULL(POWER_DOMAIN_PIPE_A) |		\
-	BIT_ULL(POWER_DOMAIN_PIPE_B) |		\
-	BIT_ULL(POWER_DOMAIN_PIPE_A_PANEL_FITTER) |	\
-	BIT_ULL(POWER_DOMAIN_PIPE_B_PANEL_FITTER) |	\
-	BIT_ULL(POWER_DOMAIN_TRANSCODER_A) |	\
-	BIT_ULL(POWER_DOMAIN_TRANSCODER_B) |	\
-	BIT_ULL(POWER_DOMAIN_INIT))
-
-#define VLV_DISPLAY_POWER_DOMAINS (		\
-	BIT_ULL(POWER_DOMAIN_PIPE_A) |		\
-	BIT_ULL(POWER_DOMAIN_PIPE_B) |		\
-	BIT_ULL(POWER_DOMAIN_PIPE_A_PANEL_FITTER) |	\
-	BIT_ULL(POWER_DOMAIN_PIPE_B_PANEL_FITTER) |	\
-	BIT_ULL(POWER_DOMAIN_TRANSCODER_A) |	\
-	BIT_ULL(POWER_DOMAIN_TRANSCODER_B) |	\
-	BIT_ULL(POWER_DOMAIN_PORT_DDI_B_LANES) |	\
-	BIT_ULL(POWER_DOMAIN_PORT_DDI_C_LANES) |	\
-	BIT_ULL(POWER_DOMAIN_PORT_DSI) |		\
-	BIT_ULL(POWER_DOMAIN_PORT_CRT) |		\
-	BIT_ULL(POWER_DOMAIN_VGA) |			\
-	BIT_ULL(POWER_DOMAIN_AUDIO) |		\
-	BIT_ULL(POWER_DOMAIN_AUX_B) |		\
-	BIT_ULL(POWER_DOMAIN_AUX_C) |		\
-	BIT_ULL(POWER_DOMAIN_GMBUS) |		\
-	BIT_ULL(POWER_DOMAIN_INIT))
-
-#define VLV_DPIO_CMN_BC_POWER_DOMAINS (		\
-	BIT_ULL(POWER_DOMAIN_PORT_DDI_B_LANES) |	\
-	BIT_ULL(POWER_DOMAIN_PORT_DDI_C_LANES) |	\
-	BIT_ULL(POWER_DOMAIN_PORT_CRT) |		\
-	BIT_ULL(POWER_DOMAIN_AUX_B) |		\
-	BIT_ULL(POWER_DOMAIN_AUX_C) |		\
-	BIT_ULL(POWER_DOMAIN_INIT))
-
-#define VLV_DPIO_TX_B_LANES_01_POWER_DOMAINS (	\
-	BIT_ULL(POWER_DOMAIN_PORT_DDI_B_LANES) |	\
-	BIT_ULL(POWER_DOMAIN_AUX_B) |		\
-	BIT_ULL(POWER_DOMAIN_INIT))
-
-#define VLV_DPIO_TX_B_LANES_23_POWER_DOMAINS (	\
-	BIT_ULL(POWER_DOMAIN_PORT_DDI_B_LANES) |	\
-	BIT_ULL(POWER_DOMAIN_AUX_B) |		\
-	BIT_ULL(POWER_DOMAIN_INIT))
-
-#define VLV_DPIO_TX_C_LANES_01_POWER_DOMAINS (	\
-	BIT_ULL(POWER_DOMAIN_PORT_DDI_C_LANES) |	\
-	BIT_ULL(POWER_DOMAIN_AUX_C) |		\
-	BIT_ULL(POWER_DOMAIN_INIT))
-
-#define VLV_DPIO_TX_C_LANES_23_POWER_DOMAINS (	\
-	BIT_ULL(POWER_DOMAIN_PORT_DDI_C_LANES) |	\
-	BIT_ULL(POWER_DOMAIN_AUX_C) |		\
-	BIT_ULL(POWER_DOMAIN_INIT))
-
-#define CHV_DISPLAY_POWER_DOMAINS (		\
-	BIT_ULL(POWER_DOMAIN_PIPE_A) |		\
-	BIT_ULL(POWER_DOMAIN_PIPE_B) |		\
-	BIT_ULL(POWER_DOMAIN_PIPE_C) |		\
-	BIT_ULL(POWER_DOMAIN_PIPE_A_PANEL_FITTER) |	\
-	BIT_ULL(POWER_DOMAIN_PIPE_B_PANEL_FITTER) |	\
-	BIT_ULL(POWER_DOMAIN_PIPE_C_PANEL_FITTER) |	\
-	BIT_ULL(POWER_DOMAIN_TRANSCODER_A) |	\
-	BIT_ULL(POWER_DOMAIN_TRANSCODER_B) |	\
-	BIT_ULL(POWER_DOMAIN_TRANSCODER_C) |	\
-	BIT_ULL(POWER_DOMAIN_PORT_DDI_B_LANES) |	\
-	BIT_ULL(POWER_DOMAIN_PORT_DDI_C_LANES) |	\
-	BIT_ULL(POWER_DOMAIN_PORT_DDI_D_LANES) |	\
-	BIT_ULL(POWER_DOMAIN_PORT_DSI) |		\
-	BIT_ULL(POWER_DOMAIN_VGA) |			\
-	BIT_ULL(POWER_DOMAIN_AUDIO) |		\
-	BIT_ULL(POWER_DOMAIN_AUX_B) |		\
-	BIT_ULL(POWER_DOMAIN_AUX_C) |		\
-	BIT_ULL(POWER_DOMAIN_AUX_D) |		\
-	BIT_ULL(POWER_DOMAIN_GMBUS) |		\
-	BIT_ULL(POWER_DOMAIN_INIT))
-
-#define CHV_DPIO_CMN_BC_POWER_DOMAINS (		\
-	BIT_ULL(POWER_DOMAIN_PORT_DDI_B_LANES) |	\
-	BIT_ULL(POWER_DOMAIN_PORT_DDI_C_LANES) |	\
-	BIT_ULL(POWER_DOMAIN_AUX_B) |		\
-	BIT_ULL(POWER_DOMAIN_AUX_C) |		\
-	BIT_ULL(POWER_DOMAIN_INIT))
-
-#define CHV_DPIO_CMN_D_POWER_DOMAINS (		\
-	BIT_ULL(POWER_DOMAIN_PORT_DDI_D_LANES) |	\
-	BIT_ULL(POWER_DOMAIN_AUX_D) |		\
-	BIT_ULL(POWER_DOMAIN_INIT))
-
-#define HSW_DISPLAY_POWER_DOMAINS (			\
-	BIT_ULL(POWER_DOMAIN_PIPE_B) |			\
-	BIT_ULL(POWER_DOMAIN_PIPE_C) |			\
-	BIT_ULL(POWER_DOMAIN_PIPE_A_PANEL_FITTER) |		\
-	BIT_ULL(POWER_DOMAIN_PIPE_B_PANEL_FITTER) |		\
-	BIT_ULL(POWER_DOMAIN_PIPE_C_PANEL_FITTER) |		\
-	BIT_ULL(POWER_DOMAIN_TRANSCODER_A) |		\
-	BIT_ULL(POWER_DOMAIN_TRANSCODER_B) |		\
-	BIT_ULL(POWER_DOMAIN_TRANSCODER_C) |		\
-	BIT_ULL(POWER_DOMAIN_PORT_DDI_B_LANES) |		\
-	BIT_ULL(POWER_DOMAIN_PORT_DDI_C_LANES) |		\
-	BIT_ULL(POWER_DOMAIN_PORT_DDI_D_LANES) |		\
-	BIT_ULL(POWER_DOMAIN_PORT_CRT) | /* DDI E */	\
-	BIT_ULL(POWER_DOMAIN_VGA) |				\
-	BIT_ULL(POWER_DOMAIN_AUDIO) |			\
-	BIT_ULL(POWER_DOMAIN_INIT))
-
-#define BDW_DISPLAY_POWER_DOMAINS (			\
-	BIT_ULL(POWER_DOMAIN_PIPE_B) |			\
-	BIT_ULL(POWER_DOMAIN_PIPE_C) |			\
-	BIT_ULL(POWER_DOMAIN_PIPE_B_PANEL_FITTER) |		\
-	BIT_ULL(POWER_DOMAIN_PIPE_C_PANEL_FITTER) |		\
-	BIT_ULL(POWER_DOMAIN_TRANSCODER_A) |		\
-	BIT_ULL(POWER_DOMAIN_TRANSCODER_B) |		\
-	BIT_ULL(POWER_DOMAIN_TRANSCODER_C) |		\
-	BIT_ULL(POWER_DOMAIN_PORT_DDI_B_LANES) |		\
-	BIT_ULL(POWER_DOMAIN_PORT_DDI_C_LANES) |		\
-	BIT_ULL(POWER_DOMAIN_PORT_DDI_D_LANES) |		\
-	BIT_ULL(POWER_DOMAIN_PORT_CRT) | /* DDI E */	\
-	BIT_ULL(POWER_DOMAIN_VGA) |				\
-	BIT_ULL(POWER_DOMAIN_AUDIO) |			\
-	BIT_ULL(POWER_DOMAIN_INIT))
-
-#define SKL_DISPLAY_POWERWELL_2_POWER_DOMAINS (		\
-	BIT_ULL(POWER_DOMAIN_TRANSCODER_A) |		\
-	BIT_ULL(POWER_DOMAIN_PIPE_B) |			\
-	BIT_ULL(POWER_DOMAIN_TRANSCODER_B) |		\
-	BIT_ULL(POWER_DOMAIN_PIPE_C) |			\
-	BIT_ULL(POWER_DOMAIN_TRANSCODER_C) |		\
-	BIT_ULL(POWER_DOMAIN_PIPE_B_PANEL_FITTER) |		\
-	BIT_ULL(POWER_DOMAIN_PIPE_C_PANEL_FITTER) |		\
-	BIT_ULL(POWER_DOMAIN_PORT_DDI_B_LANES) |		\
-	BIT_ULL(POWER_DOMAIN_PORT_DDI_C_LANES) |		\
-	BIT_ULL(POWER_DOMAIN_PORT_DDI_D_LANES) |		\
-	BIT_ULL(POWER_DOMAIN_PORT_DDI_E_LANES) |		\
-	BIT_ULL(POWER_DOMAIN_AUX_B) |                       \
-	BIT_ULL(POWER_DOMAIN_AUX_C) |			\
-	BIT_ULL(POWER_DOMAIN_AUX_D) |			\
-	BIT_ULL(POWER_DOMAIN_AUDIO) |			\
-	BIT_ULL(POWER_DOMAIN_VGA) |				\
-	BIT_ULL(POWER_DOMAIN_INIT))
-#define SKL_DISPLAY_DDI_IO_A_E_POWER_DOMAINS (		\
-	BIT_ULL(POWER_DOMAIN_PORT_DDI_A_IO) |		\
-	BIT_ULL(POWER_DOMAIN_PORT_DDI_E_IO) |		\
-	BIT_ULL(POWER_DOMAIN_INIT))
-#define SKL_DISPLAY_DDI_IO_B_POWER_DOMAINS (		\
-	BIT_ULL(POWER_DOMAIN_PORT_DDI_B_IO) |		\
-	BIT_ULL(POWER_DOMAIN_INIT))
-#define SKL_DISPLAY_DDI_IO_C_POWER_DOMAINS (		\
-	BIT_ULL(POWER_DOMAIN_PORT_DDI_C_IO) |		\
-	BIT_ULL(POWER_DOMAIN_INIT))
-#define SKL_DISPLAY_DDI_IO_D_POWER_DOMAINS (		\
-	BIT_ULL(POWER_DOMAIN_PORT_DDI_D_IO) |		\
-	BIT_ULL(POWER_DOMAIN_INIT))
-#define SKL_DISPLAY_DC_OFF_POWER_DOMAINS (		\
-	SKL_DISPLAY_POWERWELL_2_POWER_DOMAINS |		\
-	BIT_ULL(POWER_DOMAIN_GT_IRQ) |			\
-	BIT_ULL(POWER_DOMAIN_MODESET) |			\
-	BIT_ULL(POWER_DOMAIN_AUX_A) |			\
-	BIT_ULL(POWER_DOMAIN_INIT))
-
-#define BXT_DISPLAY_POWERWELL_2_POWER_DOMAINS (		\
-	BIT_ULL(POWER_DOMAIN_TRANSCODER_A) |		\
-	BIT_ULL(POWER_DOMAIN_PIPE_B) |			\
-	BIT_ULL(POWER_DOMAIN_TRANSCODER_B) |		\
-	BIT_ULL(POWER_DOMAIN_PIPE_C) |			\
-	BIT_ULL(POWER_DOMAIN_TRANSCODER_C) |		\
-	BIT_ULL(POWER_DOMAIN_PIPE_B_PANEL_FITTER) |		\
-	BIT_ULL(POWER_DOMAIN_PIPE_C_PANEL_FITTER) |		\
-	BIT_ULL(POWER_DOMAIN_PORT_DDI_B_LANES) |		\
-	BIT_ULL(POWER_DOMAIN_PORT_DDI_C_LANES) |		\
-	BIT_ULL(POWER_DOMAIN_AUX_B) |			\
-	BIT_ULL(POWER_DOMAIN_AUX_C) |			\
-	BIT_ULL(POWER_DOMAIN_AUDIO) |			\
-	BIT_ULL(POWER_DOMAIN_VGA) |				\
-	BIT_ULL(POWER_DOMAIN_INIT))
-#define BXT_DISPLAY_DC_OFF_POWER_DOMAINS (		\
-	BXT_DISPLAY_POWERWELL_2_POWER_DOMAINS |		\
-	BIT_ULL(POWER_DOMAIN_GT_IRQ) |			\
-	BIT_ULL(POWER_DOMAIN_MODESET) |			\
-	BIT_ULL(POWER_DOMAIN_AUX_A) |			\
-	BIT_ULL(POWER_DOMAIN_GMBUS) |			\
-	BIT_ULL(POWER_DOMAIN_INIT))
-#define BXT_DPIO_CMN_A_POWER_DOMAINS (			\
-	BIT_ULL(POWER_DOMAIN_PORT_DDI_A_LANES) |		\
-	BIT_ULL(POWER_DOMAIN_AUX_A) |			\
-	BIT_ULL(POWER_DOMAIN_INIT))
-#define BXT_DPIO_CMN_BC_POWER_DOMAINS (			\
-	BIT_ULL(POWER_DOMAIN_PORT_DDI_B_LANES) |		\
-	BIT_ULL(POWER_DOMAIN_PORT_DDI_C_LANES) |		\
-	BIT_ULL(POWER_DOMAIN_AUX_B) |			\
-	BIT_ULL(POWER_DOMAIN_AUX_C) |			\
-	BIT_ULL(POWER_DOMAIN_INIT))
-
-#define GLK_DISPLAY_POWERWELL_2_POWER_DOMAINS (		\
-	BIT_ULL(POWER_DOMAIN_TRANSCODER_A) |		\
-	BIT_ULL(POWER_DOMAIN_PIPE_B) |			\
-	BIT_ULL(POWER_DOMAIN_TRANSCODER_B) |		\
-	BIT_ULL(POWER_DOMAIN_PIPE_C) |			\
-	BIT_ULL(POWER_DOMAIN_TRANSCODER_C) |		\
-	BIT_ULL(POWER_DOMAIN_PIPE_B_PANEL_FITTER) |		\
-	BIT_ULL(POWER_DOMAIN_PIPE_C_PANEL_FITTER) |		\
-	BIT_ULL(POWER_DOMAIN_PORT_DDI_B_LANES) |		\
-	BIT_ULL(POWER_DOMAIN_PORT_DDI_C_LANES) |		\
-	BIT_ULL(POWER_DOMAIN_AUX_B) |                       \
-	BIT_ULL(POWER_DOMAIN_AUX_C) |			\
-	BIT_ULL(POWER_DOMAIN_AUDIO) |			\
-	BIT_ULL(POWER_DOMAIN_VGA) |				\
-	BIT_ULL(POWER_DOMAIN_INIT))
-#define GLK_DISPLAY_DDI_IO_A_POWER_DOMAINS (		\
-	BIT_ULL(POWER_DOMAIN_PORT_DDI_A_IO))
-#define GLK_DISPLAY_DDI_IO_B_POWER_DOMAINS (		\
-	BIT_ULL(POWER_DOMAIN_PORT_DDI_B_IO))
-#define GLK_DISPLAY_DDI_IO_C_POWER_DOMAINS (		\
-	BIT_ULL(POWER_DOMAIN_PORT_DDI_C_IO))
-#define GLK_DPIO_CMN_A_POWER_DOMAINS (			\
-	BIT_ULL(POWER_DOMAIN_PORT_DDI_A_LANES) |		\
-	BIT_ULL(POWER_DOMAIN_AUX_A) |			\
-	BIT_ULL(POWER_DOMAIN_INIT))
-#define GLK_DPIO_CMN_B_POWER_DOMAINS (			\
-	BIT_ULL(POWER_DOMAIN_PORT_DDI_B_LANES) |		\
-	BIT_ULL(POWER_DOMAIN_AUX_B) |			\
-	BIT_ULL(POWER_DOMAIN_INIT))
-#define GLK_DPIO_CMN_C_POWER_DOMAINS (			\
-	BIT_ULL(POWER_DOMAIN_PORT_DDI_C_LANES) |		\
-	BIT_ULL(POWER_DOMAIN_AUX_C) |			\
-	BIT_ULL(POWER_DOMAIN_INIT))
-#define GLK_DISPLAY_AUX_A_POWER_DOMAINS (		\
-	BIT_ULL(POWER_DOMAIN_AUX_A) |		\
-	BIT_ULL(POWER_DOMAIN_AUX_IO_A) |		\
-	BIT_ULL(POWER_DOMAIN_INIT))
-#define GLK_DISPLAY_AUX_B_POWER_DOMAINS (		\
-	BIT_ULL(POWER_DOMAIN_AUX_B) |		\
-	BIT_ULL(POWER_DOMAIN_INIT))
-#define GLK_DISPLAY_AUX_C_POWER_DOMAINS (		\
-	BIT_ULL(POWER_DOMAIN_AUX_C) |		\
-	BIT_ULL(POWER_DOMAIN_INIT))
-#define GLK_DISPLAY_DC_OFF_POWER_DOMAINS (		\
-	GLK_DISPLAY_POWERWELL_2_POWER_DOMAINS |		\
-	BIT_ULL(POWER_DOMAIN_GT_IRQ) |			\
-	BIT_ULL(POWER_DOMAIN_MODESET) |			\
-	BIT_ULL(POWER_DOMAIN_AUX_A) |			\
-	BIT_ULL(POWER_DOMAIN_GMBUS) |			\
-	BIT_ULL(POWER_DOMAIN_INIT))
-
-#define CNL_DISPLAY_POWERWELL_2_POWER_DOMAINS (		\
-	BIT_ULL(POWER_DOMAIN_TRANSCODER_A) |		\
-	BIT_ULL(POWER_DOMAIN_PIPE_B) |			\
-	BIT_ULL(POWER_DOMAIN_TRANSCODER_B) |		\
-	BIT_ULL(POWER_DOMAIN_PIPE_C) |			\
-	BIT_ULL(POWER_DOMAIN_TRANSCODER_C) |		\
-	BIT_ULL(POWER_DOMAIN_PIPE_B_PANEL_FITTER) |		\
-	BIT_ULL(POWER_DOMAIN_PIPE_C_PANEL_FITTER) |		\
-	BIT_ULL(POWER_DOMAIN_PORT_DDI_B_LANES) |		\
-	BIT_ULL(POWER_DOMAIN_PORT_DDI_C_LANES) |		\
-	BIT_ULL(POWER_DOMAIN_PORT_DDI_D_LANES) |		\
-	BIT_ULL(POWER_DOMAIN_PORT_DDI_F_LANES) |		\
-	BIT_ULL(POWER_DOMAIN_AUX_B) |                       \
-	BIT_ULL(POWER_DOMAIN_AUX_C) |			\
-	BIT_ULL(POWER_DOMAIN_AUX_D) |			\
-	BIT_ULL(POWER_DOMAIN_AUX_F) |			\
-	BIT_ULL(POWER_DOMAIN_AUDIO) |			\
-	BIT_ULL(POWER_DOMAIN_VGA) |				\
-	BIT_ULL(POWER_DOMAIN_INIT))
-#define CNL_DISPLAY_DDI_A_IO_POWER_DOMAINS (		\
-	BIT_ULL(POWER_DOMAIN_PORT_DDI_A_IO) |		\
-	BIT_ULL(POWER_DOMAIN_INIT))
-#define CNL_DISPLAY_DDI_B_IO_POWER_DOMAINS (		\
-	BIT_ULL(POWER_DOMAIN_PORT_DDI_B_IO) |		\
-	BIT_ULL(POWER_DOMAIN_INIT))
-#define CNL_DISPLAY_DDI_C_IO_POWER_DOMAINS (		\
-	BIT_ULL(POWER_DOMAIN_PORT_DDI_C_IO) |		\
-	BIT_ULL(POWER_DOMAIN_INIT))
-#define CNL_DISPLAY_DDI_D_IO_POWER_DOMAINS (		\
-	BIT_ULL(POWER_DOMAIN_PORT_DDI_D_IO) |		\
-	BIT_ULL(POWER_DOMAIN_INIT))
-#define CNL_DISPLAY_AUX_A_POWER_DOMAINS (		\
-	BIT_ULL(POWER_DOMAIN_AUX_A) |			\
-	BIT_ULL(POWER_DOMAIN_AUX_IO_A) |		\
-	BIT_ULL(POWER_DOMAIN_INIT))
-#define CNL_DISPLAY_AUX_B_POWER_DOMAINS (		\
-	BIT_ULL(POWER_DOMAIN_AUX_B) |			\
-	BIT_ULL(POWER_DOMAIN_INIT))
-#define CNL_DISPLAY_AUX_C_POWER_DOMAINS (		\
-	BIT_ULL(POWER_DOMAIN_AUX_C) |			\
-	BIT_ULL(POWER_DOMAIN_INIT))
-#define CNL_DISPLAY_AUX_D_POWER_DOMAINS (		\
-	BIT_ULL(POWER_DOMAIN_AUX_D) |			\
-	BIT_ULL(POWER_DOMAIN_INIT))
-#define CNL_DISPLAY_AUX_F_POWER_DOMAINS (		\
-	BIT_ULL(POWER_DOMAIN_AUX_F) |			\
-	BIT_ULL(POWER_DOMAIN_INIT))
-#define CNL_DISPLAY_DDI_F_IO_POWER_DOMAINS (		\
-	BIT_ULL(POWER_DOMAIN_PORT_DDI_F_IO) |		\
-	BIT_ULL(POWER_DOMAIN_INIT))
-#define CNL_DISPLAY_DC_OFF_POWER_DOMAINS (		\
-	CNL_DISPLAY_POWERWELL_2_POWER_DOMAINS |		\
-	BIT_ULL(POWER_DOMAIN_GT_IRQ) |			\
-	BIT_ULL(POWER_DOMAIN_MODESET) |			\
-	BIT_ULL(POWER_DOMAIN_AUX_A) |			\
-	BIT_ULL(POWER_DOMAIN_INIT))
-
-/*
- * ICL PW_0/PG_0 domains (HW/DMC control):
- * - PCI
- * - clocks except port PLL
- * - central power except FBC
- * - shared functions except pipe interrupts, pipe MBUS, DBUF registers
- * ICL PW_1/PG_1 domains (HW/DMC control):
- * - DBUF function
- * - PIPE_A and its planes, except VGA
- * - transcoder EDP + PSR
- * - transcoder DSI
- * - DDI_A
- * - FBC
- */
-#define ICL_PW_4_POWER_DOMAINS (			\
-	BIT_ULL(POWER_DOMAIN_PIPE_C) |			\
-	BIT_ULL(POWER_DOMAIN_PIPE_C_PANEL_FITTER) |	\
-	BIT_ULL(POWER_DOMAIN_INIT))
-	/* VDSC/joining */
-#define ICL_PW_3_POWER_DOMAINS (			\
-	ICL_PW_4_POWER_DOMAINS |			\
-	BIT_ULL(POWER_DOMAIN_PIPE_B) |			\
-	BIT_ULL(POWER_DOMAIN_TRANSCODER_A) |		\
-	BIT_ULL(POWER_DOMAIN_TRANSCODER_B) |		\
-	BIT_ULL(POWER_DOMAIN_TRANSCODER_C) |		\
-	BIT_ULL(POWER_DOMAIN_PIPE_B_PANEL_FITTER) |	\
-	BIT_ULL(POWER_DOMAIN_PORT_DDI_B_LANES) |	\
-	BIT_ULL(POWER_DOMAIN_PORT_DDI_B_IO) |		\
-	BIT_ULL(POWER_DOMAIN_PORT_DDI_C_LANES) |	\
-	BIT_ULL(POWER_DOMAIN_PORT_DDI_C_IO) |		\
-	BIT_ULL(POWER_DOMAIN_PORT_DDI_D_LANES) |	\
-	BIT_ULL(POWER_DOMAIN_PORT_DDI_D_IO) |		\
-	BIT_ULL(POWER_DOMAIN_PORT_DDI_E_LANES) |	\
-	BIT_ULL(POWER_DOMAIN_PORT_DDI_E_IO) |		\
-	BIT_ULL(POWER_DOMAIN_PORT_DDI_F_LANES) |	\
-	BIT_ULL(POWER_DOMAIN_PORT_DDI_F_IO) |		\
-	BIT_ULL(POWER_DOMAIN_AUX_B) |			\
-	BIT_ULL(POWER_DOMAIN_AUX_C) |			\
-	BIT_ULL(POWER_DOMAIN_AUX_D) |			\
-	BIT_ULL(POWER_DOMAIN_AUX_E) |			\
-	BIT_ULL(POWER_DOMAIN_AUX_F) |			\
-	BIT_ULL(POWER_DOMAIN_AUX_TBT1) |		\
-	BIT_ULL(POWER_DOMAIN_AUX_TBT2) |		\
-	BIT_ULL(POWER_DOMAIN_AUX_TBT3) |		\
-	BIT_ULL(POWER_DOMAIN_AUX_TBT4) |		\
-	BIT_ULL(POWER_DOMAIN_VGA) |			\
-	BIT_ULL(POWER_DOMAIN_AUDIO) |			\
-	BIT_ULL(POWER_DOMAIN_INIT))
-	/*
-	 * - transcoder WD
-	 * - KVMR (HW control)
-	 */
-#define ICL_PW_2_POWER_DOMAINS (			\
-	ICL_PW_3_POWER_DOMAINS |			\
-	BIT_ULL(POWER_DOMAIN_TRANSCODER_EDP_VDSC) |		\
-	BIT_ULL(POWER_DOMAIN_INIT))
-	/*
-	 * - KVMR (HW control)
-	 */
-#define ICL_DISPLAY_DC_OFF_POWER_DOMAINS (		\
-	ICL_PW_2_POWER_DOMAINS |			\
-	BIT_ULL(POWER_DOMAIN_MODESET) |			\
-	BIT_ULL(POWER_DOMAIN_AUX_A) |			\
-	BIT_ULL(POWER_DOMAIN_INIT))
-
-#define ICL_DDI_IO_A_POWER_DOMAINS (			\
-	BIT_ULL(POWER_DOMAIN_PORT_DDI_A_IO))
-#define ICL_DDI_IO_B_POWER_DOMAINS (			\
-	BIT_ULL(POWER_DOMAIN_PORT_DDI_B_IO))
-#define ICL_DDI_IO_C_POWER_DOMAINS (			\
-	BIT_ULL(POWER_DOMAIN_PORT_DDI_C_IO))
-#define ICL_DDI_IO_D_POWER_DOMAINS (			\
-	BIT_ULL(POWER_DOMAIN_PORT_DDI_D_IO))
-#define ICL_DDI_IO_E_POWER_DOMAINS (			\
-	BIT_ULL(POWER_DOMAIN_PORT_DDI_E_IO))
-#define ICL_DDI_IO_F_POWER_DOMAINS (			\
-	BIT_ULL(POWER_DOMAIN_PORT_DDI_F_IO))
-
-#define ICL_AUX_A_IO_POWER_DOMAINS (			\
-	BIT_ULL(POWER_DOMAIN_AUX_IO_A) |		\
-	BIT_ULL(POWER_DOMAIN_AUX_A))
-#define ICL_AUX_B_IO_POWER_DOMAINS (			\
-	BIT_ULL(POWER_DOMAIN_AUX_B))
-#define ICL_AUX_C_IO_POWER_DOMAINS (			\
-	BIT_ULL(POWER_DOMAIN_AUX_C))
-#define ICL_AUX_D_IO_POWER_DOMAINS (			\
-	BIT_ULL(POWER_DOMAIN_AUX_D))
-#define ICL_AUX_E_IO_POWER_DOMAINS (			\
-	BIT_ULL(POWER_DOMAIN_AUX_E))
-#define ICL_AUX_F_IO_POWER_DOMAINS (			\
-	BIT_ULL(POWER_DOMAIN_AUX_F))
-#define ICL_AUX_TBT1_IO_POWER_DOMAINS (			\
-	BIT_ULL(POWER_DOMAIN_AUX_TBT1))
-#define ICL_AUX_TBT2_IO_POWER_DOMAINS (			\
-	BIT_ULL(POWER_DOMAIN_AUX_TBT2))
-#define ICL_AUX_TBT3_IO_POWER_DOMAINS (			\
-	BIT_ULL(POWER_DOMAIN_AUX_TBT3))
-#define ICL_AUX_TBT4_IO_POWER_DOMAINS (			\
-	BIT_ULL(POWER_DOMAIN_AUX_TBT4))
-
-static const struct i915_power_well_ops i9xx_always_on_power_well_ops = {
-	.sync_hw = i9xx_power_well_sync_hw_noop,
-	.enable = i9xx_always_on_power_well_noop,
-	.disable = i9xx_always_on_power_well_noop,
-	.is_enabled = i9xx_always_on_power_well_enabled,
-};
-
-static const struct i915_power_well_ops chv_pipe_power_well_ops = {
-	.sync_hw = i9xx_power_well_sync_hw_noop,
-	.enable = chv_pipe_power_well_enable,
-	.disable = chv_pipe_power_well_disable,
-	.is_enabled = chv_pipe_power_well_enabled,
-};
-
-static const struct i915_power_well_ops chv_dpio_cmn_power_well_ops = {
-	.sync_hw = i9xx_power_well_sync_hw_noop,
-	.enable = chv_dpio_cmn_power_well_enable,
-	.disable = chv_dpio_cmn_power_well_disable,
-	.is_enabled = vlv_power_well_enabled,
-};
-
-static const struct i915_power_well_desc i9xx_always_on_power_well[] = {
-	{
-		.name = "always-on",
-		.always_on = true,
-		.domains = POWER_DOMAIN_MASK,
-		.ops = &i9xx_always_on_power_well_ops,
-		.id = DISP_PW_ID_NONE,
-	},
-};
-
-static const struct i915_power_well_ops i830_pipes_power_well_ops = {
-	.sync_hw = i830_pipes_power_well_sync_hw,
-	.enable = i830_pipes_power_well_enable,
-	.disable = i830_pipes_power_well_disable,
-	.is_enabled = i830_pipes_power_well_enabled,
-};
-
-static const struct i915_power_well_desc i830_power_wells[] = {
-	{
-		.name = "always-on",
-		.always_on = true,
-		.domains = POWER_DOMAIN_MASK,
-		.ops = &i9xx_always_on_power_well_ops,
-		.id = DISP_PW_ID_NONE,
-	},
-	{
-		.name = "pipes",
-		.domains = I830_PIPES_POWER_DOMAINS,
-		.ops = &i830_pipes_power_well_ops,
-		.id = DISP_PW_ID_NONE,
-	},
-};
-
-static const struct i915_power_well_ops hsw_power_well_ops = {
-	.sync_hw = hsw_power_well_sync_hw,
-	.enable = hsw_power_well_enable,
-	.disable = hsw_power_well_disable,
-	.is_enabled = hsw_power_well_enabled,
-};
-
-static const struct i915_power_well_ops gen9_dc_off_power_well_ops = {
-	.sync_hw = i9xx_power_well_sync_hw_noop,
-	.enable = gen9_dc_off_power_well_enable,
-	.disable = gen9_dc_off_power_well_disable,
-	.is_enabled = gen9_dc_off_power_well_enabled,
-};
-
-static const struct i915_power_well_ops bxt_dpio_cmn_power_well_ops = {
-	.sync_hw = i9xx_power_well_sync_hw_noop,
-	.enable = bxt_dpio_cmn_power_well_enable,
-	.disable = bxt_dpio_cmn_power_well_disable,
-	.is_enabled = bxt_dpio_cmn_power_well_enabled,
-};
-
-static const struct i915_power_well_regs hsw_power_well_regs = {
-	.bios	= HSW_PWR_WELL_CTL1,
-	.driver	= HSW_PWR_WELL_CTL2,
-	.kvmr	= HSW_PWR_WELL_CTL3,
-	.debug	= HSW_PWR_WELL_CTL4,
-};
-
-static const struct i915_power_well_desc hsw_power_wells[] = {
-	{
-		.name = "always-on",
-		.always_on = true,
-		.domains = POWER_DOMAIN_MASK,
-		.ops = &i9xx_always_on_power_well_ops,
-		.id = DISP_PW_ID_NONE,
-	},
-	{
-		.name = "display",
-		.domains = HSW_DISPLAY_POWER_DOMAINS,
-		.ops = &hsw_power_well_ops,
-		.id = HSW_DISP_PW_GLOBAL,
-		{
-			.hsw.regs = &hsw_power_well_regs,
-			.hsw.idx = HSW_PW_CTL_IDX_GLOBAL,
-			.hsw.has_vga = true,
-		},
-	},
-};
-
-static const struct i915_power_well_desc bdw_power_wells[] = {
-	{
-		.name = "always-on",
-		.always_on = true,
-		.domains = POWER_DOMAIN_MASK,
-		.ops = &i9xx_always_on_power_well_ops,
-		.id = DISP_PW_ID_NONE,
-	},
-	{
-		.name = "display",
-		.domains = BDW_DISPLAY_POWER_DOMAINS,
-		.ops = &hsw_power_well_ops,
-		.id = HSW_DISP_PW_GLOBAL,
-		{
-			.hsw.regs = &hsw_power_well_regs,
-			.hsw.idx = HSW_PW_CTL_IDX_GLOBAL,
-			.hsw.irq_pipe_mask = BIT(PIPE_B) | BIT(PIPE_C),
-			.hsw.has_vga = true,
-		},
-	},
-};
-
-static const struct i915_power_well_ops vlv_display_power_well_ops = {
-	.sync_hw = i9xx_power_well_sync_hw_noop,
-	.enable = vlv_display_power_well_enable,
-	.disable = vlv_display_power_well_disable,
-	.is_enabled = vlv_power_well_enabled,
-};
-
-static const struct i915_power_well_ops vlv_dpio_cmn_power_well_ops = {
-	.sync_hw = i9xx_power_well_sync_hw_noop,
-	.enable = vlv_dpio_cmn_power_well_enable,
-	.disable = vlv_dpio_cmn_power_well_disable,
-	.is_enabled = vlv_power_well_enabled,
-};
-
-static const struct i915_power_well_ops vlv_dpio_power_well_ops = {
-	.sync_hw = i9xx_power_well_sync_hw_noop,
-	.enable = vlv_power_well_enable,
-	.disable = vlv_power_well_disable,
-	.is_enabled = vlv_power_well_enabled,
-};
-
-static const struct i915_power_well_desc vlv_power_wells[] = {
-	{
-		.name = "always-on",
-		.always_on = true,
-		.domains = POWER_DOMAIN_MASK,
-		.ops = &i9xx_always_on_power_well_ops,
-		.id = DISP_PW_ID_NONE,
-	},
-	{
-		.name = "display",
-		.domains = VLV_DISPLAY_POWER_DOMAINS,
-		.ops = &vlv_display_power_well_ops,
-		.id = VLV_DISP_PW_DISP2D,
-		{
-			.vlv.idx = PUNIT_PWGT_IDX_DISP2D,
-		},
-	},
-	{
-		.name = "dpio-tx-b-01",
-		.domains = VLV_DPIO_TX_B_LANES_01_POWER_DOMAINS |
-			   VLV_DPIO_TX_B_LANES_23_POWER_DOMAINS |
-			   VLV_DPIO_TX_C_LANES_01_POWER_DOMAINS |
-			   VLV_DPIO_TX_C_LANES_23_POWER_DOMAINS,
-		.ops = &vlv_dpio_power_well_ops,
-		.id = DISP_PW_ID_NONE,
-		{
-			.vlv.idx = PUNIT_PWGT_IDX_DPIO_TX_B_LANES_01,
-		},
-	},
-	{
-		.name = "dpio-tx-b-23",
-		.domains = VLV_DPIO_TX_B_LANES_01_POWER_DOMAINS |
-			   VLV_DPIO_TX_B_LANES_23_POWER_DOMAINS |
-			   VLV_DPIO_TX_C_LANES_01_POWER_DOMAINS |
-			   VLV_DPIO_TX_C_LANES_23_POWER_DOMAINS,
-		.ops = &vlv_dpio_power_well_ops,
-		.id = DISP_PW_ID_NONE,
-		{
-			.vlv.idx = PUNIT_PWGT_IDX_DPIO_TX_B_LANES_23,
-		},
-	},
-	{
-		.name = "dpio-tx-c-01",
-		.domains = VLV_DPIO_TX_B_LANES_01_POWER_DOMAINS |
-			   VLV_DPIO_TX_B_LANES_23_POWER_DOMAINS |
-			   VLV_DPIO_TX_C_LANES_01_POWER_DOMAINS |
-			   VLV_DPIO_TX_C_LANES_23_POWER_DOMAINS,
-		.ops = &vlv_dpio_power_well_ops,
-		.id = DISP_PW_ID_NONE,
-		{
-			.vlv.idx = PUNIT_PWGT_IDX_DPIO_TX_C_LANES_01,
-		},
-	},
-	{
-		.name = "dpio-tx-c-23",
-		.domains = VLV_DPIO_TX_B_LANES_01_POWER_DOMAINS |
-			   VLV_DPIO_TX_B_LANES_23_POWER_DOMAINS |
-			   VLV_DPIO_TX_C_LANES_01_POWER_DOMAINS |
-			   VLV_DPIO_TX_C_LANES_23_POWER_DOMAINS,
-		.ops = &vlv_dpio_power_well_ops,
-		.id = DISP_PW_ID_NONE,
-		{
-			.vlv.idx = PUNIT_PWGT_IDX_DPIO_TX_C_LANES_23,
-		},
-	},
-	{
-		.name = "dpio-common",
-		.domains = VLV_DPIO_CMN_BC_POWER_DOMAINS,
-		.ops = &vlv_dpio_cmn_power_well_ops,
-		.id = VLV_DISP_PW_DPIO_CMN_BC,
-		{
-			.vlv.idx = PUNIT_PWGT_IDX_DPIO_CMN_BC,
-		},
-	},
-};
-
-static const struct i915_power_well_desc chv_power_wells[] = {
-	{
-		.name = "always-on",
-		.always_on = true,
-		.domains = POWER_DOMAIN_MASK,
-		.ops = &i9xx_always_on_power_well_ops,
-		.id = DISP_PW_ID_NONE,
-	},
-	{
-		.name = "display",
-		/*
-		 * Pipe A power well is the new disp2d well. Pipe B and C
-		 * power wells don't actually exist. Pipe A power well is
-		 * required for any pipe to work.
-		 */
-		.domains = CHV_DISPLAY_POWER_DOMAINS,
-		.ops = &chv_pipe_power_well_ops,
-		.id = DISP_PW_ID_NONE,
-	},
-	{
-		.name = "dpio-common-bc",
-		.domains = CHV_DPIO_CMN_BC_POWER_DOMAINS,
-		.ops = &chv_dpio_cmn_power_well_ops,
-		.id = VLV_DISP_PW_DPIO_CMN_BC,
-		{
-			.vlv.idx = PUNIT_PWGT_IDX_DPIO_CMN_BC,
-		},
-	},
-	{
-		.name = "dpio-common-d",
-		.domains = CHV_DPIO_CMN_D_POWER_DOMAINS,
-		.ops = &chv_dpio_cmn_power_well_ops,
-		.id = CHV_DISP_PW_DPIO_CMN_D,
-		{
-			.vlv.idx = PUNIT_PWGT_IDX_DPIO_CMN_D,
-		},
-	},
-};
-
-bool intel_display_power_well_is_enabled(struct drm_i915_private *dev_priv,
-					 enum i915_power_well_id power_well_id)
-{
-	struct i915_power_well *power_well;
-	bool ret;
-
-	power_well = lookup_power_well(dev_priv, power_well_id);
-	ret = power_well->desc->ops->is_enabled(dev_priv, power_well);
-
-	return ret;
-}
-
-static const struct i915_power_well_desc skl_power_wells[] = {
-	{
-		.name = "always-on",
-		.always_on = true,
-		.domains = POWER_DOMAIN_MASK,
-		.ops = &i9xx_always_on_power_well_ops,
-		.id = DISP_PW_ID_NONE,
-	},
-	{
-		.name = "power well 1",
-		/* Handled by the DMC firmware */
-		.always_on = true,
-		.domains = 0,
-		.ops = &hsw_power_well_ops,
-		.id = SKL_DISP_PW_1,
-		{
-			.hsw.regs = &hsw_power_well_regs,
-			.hsw.idx = SKL_PW_CTL_IDX_PW_1,
-			.hsw.has_fuses = true,
-		},
-	},
-	{
-		.name = "MISC IO power well",
-		/* Handled by the DMC firmware */
-		.always_on = true,
-		.domains = 0,
-		.ops = &hsw_power_well_ops,
-		.id = SKL_DISP_PW_MISC_IO,
-		{
-			.hsw.regs = &hsw_power_well_regs,
-			.hsw.idx = SKL_PW_CTL_IDX_MISC_IO,
-		},
-	},
-	{
-		.name = "DC off",
-		.domains = SKL_DISPLAY_DC_OFF_POWER_DOMAINS,
-		.ops = &gen9_dc_off_power_well_ops,
-		.id = DISP_PW_ID_NONE,
-	},
-	{
-		.name = "power well 2",
-		.domains = SKL_DISPLAY_POWERWELL_2_POWER_DOMAINS,
-		.ops = &hsw_power_well_ops,
-		.id = SKL_DISP_PW_2,
-		{
-			.hsw.regs = &hsw_power_well_regs,
-			.hsw.idx = SKL_PW_CTL_IDX_PW_2,
-			.hsw.irq_pipe_mask = BIT(PIPE_B) | BIT(PIPE_C),
-			.hsw.has_vga = true,
-			.hsw.has_fuses = true,
-		},
-	},
-	{
-		.name = "DDI A/E IO power well",
-		.domains = SKL_DISPLAY_DDI_IO_A_E_POWER_DOMAINS,
-		.ops = &hsw_power_well_ops,
-		.id = DISP_PW_ID_NONE,
-		{
-			.hsw.regs = &hsw_power_well_regs,
-			.hsw.idx = SKL_PW_CTL_IDX_DDI_A_E,
-		},
-	},
-	{
-		.name = "DDI B IO power well",
-		.domains = SKL_DISPLAY_DDI_IO_B_POWER_DOMAINS,
-		.ops = &hsw_power_well_ops,
-		.id = DISP_PW_ID_NONE,
-		{
-			.hsw.regs = &hsw_power_well_regs,
-			.hsw.idx = SKL_PW_CTL_IDX_DDI_B,
-		},
-	},
-	{
-		.name = "DDI C IO power well",
-		.domains = SKL_DISPLAY_DDI_IO_C_POWER_DOMAINS,
-		.ops = &hsw_power_well_ops,
-		.id = DISP_PW_ID_NONE,
-		{
-			.hsw.regs = &hsw_power_well_regs,
-			.hsw.idx = SKL_PW_CTL_IDX_DDI_C,
-		},
-	},
-	{
-		.name = "DDI D IO power well",
-		.domains = SKL_DISPLAY_DDI_IO_D_POWER_DOMAINS,
-		.ops = &hsw_power_well_ops,
-		.id = DISP_PW_ID_NONE,
-		{
-			.hsw.regs = &hsw_power_well_regs,
-			.hsw.idx = SKL_PW_CTL_IDX_DDI_D,
-		},
-	},
-};
-
-static const struct i915_power_well_desc bxt_power_wells[] = {
-	{
-		.name = "always-on",
-		.always_on = true,
-		.domains = POWER_DOMAIN_MASK,
-		.ops = &i9xx_always_on_power_well_ops,
-		.id = DISP_PW_ID_NONE,
-	},
-	{
-		.name = "power well 1",
-		/* Handled by the DMC firmware */
-		.always_on = true,
-		.domains = 0,
-		.ops = &hsw_power_well_ops,
-		.id = SKL_DISP_PW_1,
-		{
-			.hsw.regs = &hsw_power_well_regs,
-			.hsw.idx = SKL_PW_CTL_IDX_PW_1,
-			.hsw.has_fuses = true,
-		},
-	},
-	{
-		.name = "DC off",
-		.domains = BXT_DISPLAY_DC_OFF_POWER_DOMAINS,
-		.ops = &gen9_dc_off_power_well_ops,
-		.id = DISP_PW_ID_NONE,
-	},
-	{
-		.name = "power well 2",
-		.domains = BXT_DISPLAY_POWERWELL_2_POWER_DOMAINS,
-		.ops = &hsw_power_well_ops,
-		.id = SKL_DISP_PW_2,
-		{
-			.hsw.regs = &hsw_power_well_regs,
-			.hsw.idx = SKL_PW_CTL_IDX_PW_2,
-			.hsw.irq_pipe_mask = BIT(PIPE_B) | BIT(PIPE_C),
-			.hsw.has_vga = true,
-			.hsw.has_fuses = true,
-		},
-	},
-	{
-		.name = "dpio-common-a",
-		.domains = BXT_DPIO_CMN_A_POWER_DOMAINS,
-		.ops = &bxt_dpio_cmn_power_well_ops,
-		.id = BXT_DISP_PW_DPIO_CMN_A,
-		{
-			.bxt.phy = DPIO_PHY1,
-		},
-	},
-	{
-		.name = "dpio-common-bc",
-		.domains = BXT_DPIO_CMN_BC_POWER_DOMAINS,
-		.ops = &bxt_dpio_cmn_power_well_ops,
-		.id = VLV_DISP_PW_DPIO_CMN_BC,
-		{
-			.bxt.phy = DPIO_PHY0,
-		},
-	},
-};
-
-static const struct i915_power_well_desc glk_power_wells[] = {
-	{
-		.name = "always-on",
-		.always_on = true,
-		.domains = POWER_DOMAIN_MASK,
-		.ops = &i9xx_always_on_power_well_ops,
-		.id = DISP_PW_ID_NONE,
-	},
-	{
-		.name = "power well 1",
-		/* Handled by the DMC firmware */
-		.always_on = true,
-		.domains = 0,
-		.ops = &hsw_power_well_ops,
-		.id = SKL_DISP_PW_1,
-		{
-			.hsw.regs = &hsw_power_well_regs,
-			.hsw.idx = SKL_PW_CTL_IDX_PW_1,
-			.hsw.has_fuses = true,
-		},
-	},
-	{
-		.name = "DC off",
-		.domains = GLK_DISPLAY_DC_OFF_POWER_DOMAINS,
-		.ops = &gen9_dc_off_power_well_ops,
-		.id = DISP_PW_ID_NONE,
-	},
-	{
-		.name = "power well 2",
-		.domains = GLK_DISPLAY_POWERWELL_2_POWER_DOMAINS,
-		.ops = &hsw_power_well_ops,
-		.id = SKL_DISP_PW_2,
-		{
-			.hsw.regs = &hsw_power_well_regs,
-			.hsw.idx = SKL_PW_CTL_IDX_PW_2,
-			.hsw.irq_pipe_mask = BIT(PIPE_B) | BIT(PIPE_C),
-			.hsw.has_vga = true,
-			.hsw.has_fuses = true,
-		},
-	},
-	{
-		.name = "dpio-common-a",
-		.domains = GLK_DPIO_CMN_A_POWER_DOMAINS,
-		.ops = &bxt_dpio_cmn_power_well_ops,
-		.id = BXT_DISP_PW_DPIO_CMN_A,
-		{
-			.bxt.phy = DPIO_PHY1,
-		},
-	},
-	{
-		.name = "dpio-common-b",
-		.domains = GLK_DPIO_CMN_B_POWER_DOMAINS,
-		.ops = &bxt_dpio_cmn_power_well_ops,
-		.id = VLV_DISP_PW_DPIO_CMN_BC,
-		{
-			.bxt.phy = DPIO_PHY0,
-		},
-	},
-	{
-		.name = "dpio-common-c",
-		.domains = GLK_DPIO_CMN_C_POWER_DOMAINS,
-		.ops = &bxt_dpio_cmn_power_well_ops,
-		.id = GLK_DISP_PW_DPIO_CMN_C,
-		{
-			.bxt.phy = DPIO_PHY2,
-		},
-	},
-	{
-		.name = "AUX A",
-		.domains = GLK_DISPLAY_AUX_A_POWER_DOMAINS,
-		.ops = &hsw_power_well_ops,
-		.id = DISP_PW_ID_NONE,
-		{
-			.hsw.regs = &hsw_power_well_regs,
-			.hsw.idx = GLK_PW_CTL_IDX_AUX_A,
-		},
-	},
-	{
-		.name = "AUX B",
-		.domains = GLK_DISPLAY_AUX_B_POWER_DOMAINS,
-		.ops = &hsw_power_well_ops,
-		.id = DISP_PW_ID_NONE,
-		{
-			.hsw.regs = &hsw_power_well_regs,
-			.hsw.idx = GLK_PW_CTL_IDX_AUX_B,
-		},
-	},
-	{
-		.name = "AUX C",
-		.domains = GLK_DISPLAY_AUX_C_POWER_DOMAINS,
-		.ops = &hsw_power_well_ops,
-		.id = DISP_PW_ID_NONE,
-		{
-			.hsw.regs = &hsw_power_well_regs,
-			.hsw.idx = GLK_PW_CTL_IDX_AUX_C,
-		},
-	},
-	{
-		.name = "DDI A IO power well",
-		.domains = GLK_DISPLAY_DDI_IO_A_POWER_DOMAINS,
-		.ops = &hsw_power_well_ops,
-		.id = DISP_PW_ID_NONE,
-		{
-			.hsw.regs = &hsw_power_well_regs,
-			.hsw.idx = GLK_PW_CTL_IDX_DDI_A,
-		},
-	},
-	{
-		.name = "DDI B IO power well",
-		.domains = GLK_DISPLAY_DDI_IO_B_POWER_DOMAINS,
-		.ops = &hsw_power_well_ops,
-		.id = DISP_PW_ID_NONE,
-		{
-			.hsw.regs = &hsw_power_well_regs,
-			.hsw.idx = SKL_PW_CTL_IDX_DDI_B,
-		},
-	},
-	{
-		.name = "DDI C IO power well",
-		.domains = GLK_DISPLAY_DDI_IO_C_POWER_DOMAINS,
-		.ops = &hsw_power_well_ops,
-		.id = DISP_PW_ID_NONE,
-		{
-			.hsw.regs = &hsw_power_well_regs,
-			.hsw.idx = SKL_PW_CTL_IDX_DDI_C,
-		},
-	},
-};
-
-static const struct i915_power_well_desc cnl_power_wells[] = {
-	{
-		.name = "always-on",
-		.always_on = true,
-		.domains = POWER_DOMAIN_MASK,
-		.ops = &i9xx_always_on_power_well_ops,
-		.id = DISP_PW_ID_NONE,
-	},
-	{
-		.name = "power well 1",
-		/* Handled by the DMC firmware */
-		.always_on = true,
-		.domains = 0,
-		.ops = &hsw_power_well_ops,
-		.id = SKL_DISP_PW_1,
-		{
-			.hsw.regs = &hsw_power_well_regs,
-			.hsw.idx = SKL_PW_CTL_IDX_PW_1,
-			.hsw.has_fuses = true,
-		},
-	},
-	{
-		.name = "AUX A",
-		.domains = CNL_DISPLAY_AUX_A_POWER_DOMAINS,
-		.ops = &hsw_power_well_ops,
-		.id = DISP_PW_ID_NONE,
-		{
-			.hsw.regs = &hsw_power_well_regs,
-			.hsw.idx = GLK_PW_CTL_IDX_AUX_A,
-		},
-	},
-	{
-		.name = "AUX B",
-		.domains = CNL_DISPLAY_AUX_B_POWER_DOMAINS,
-		.ops = &hsw_power_well_ops,
-		.id = DISP_PW_ID_NONE,
-		{
-			.hsw.regs = &hsw_power_well_regs,
-			.hsw.idx = GLK_PW_CTL_IDX_AUX_B,
-		},
-	},
-	{
-		.name = "AUX C",
-		.domains = CNL_DISPLAY_AUX_C_POWER_DOMAINS,
-		.ops = &hsw_power_well_ops,
-		.id = DISP_PW_ID_NONE,
-		{
-			.hsw.regs = &hsw_power_well_regs,
-			.hsw.idx = GLK_PW_CTL_IDX_AUX_C,
-		},
-	},
-	{
-		.name = "AUX D",
-		.domains = CNL_DISPLAY_AUX_D_POWER_DOMAINS,
-		.ops = &hsw_power_well_ops,
-		.id = DISP_PW_ID_NONE,
-		{
-			.hsw.regs = &hsw_power_well_regs,
-			.hsw.idx = CNL_PW_CTL_IDX_AUX_D,
-		},
-	},
-	{
-		.name = "DC off",
-		.domains = CNL_DISPLAY_DC_OFF_POWER_DOMAINS,
-		.ops = &gen9_dc_off_power_well_ops,
-		.id = DISP_PW_ID_NONE,
-	},
-	{
-		.name = "power well 2",
-		.domains = CNL_DISPLAY_POWERWELL_2_POWER_DOMAINS,
-		.ops = &hsw_power_well_ops,
-		.id = SKL_DISP_PW_2,
-		{
-			.hsw.regs = &hsw_power_well_regs,
-			.hsw.idx = SKL_PW_CTL_IDX_PW_2,
-			.hsw.irq_pipe_mask = BIT(PIPE_B) | BIT(PIPE_C),
-			.hsw.has_vga = true,
-			.hsw.has_fuses = true,
-		},
-	},
-	{
-		.name = "DDI A IO power well",
-		.domains = CNL_DISPLAY_DDI_A_IO_POWER_DOMAINS,
-		.ops = &hsw_power_well_ops,
-		.id = DISP_PW_ID_NONE,
-		{
-			.hsw.regs = &hsw_power_well_regs,
-			.hsw.idx = GLK_PW_CTL_IDX_DDI_A,
-		},
-	},
-	{
-		.name = "DDI B IO power well",
-		.domains = CNL_DISPLAY_DDI_B_IO_POWER_DOMAINS,
-		.ops = &hsw_power_well_ops,
-		.id = DISP_PW_ID_NONE,
-		{
-			.hsw.regs = &hsw_power_well_regs,
-			.hsw.idx = SKL_PW_CTL_IDX_DDI_B,
-		},
-	},
-	{
-		.name = "DDI C IO power well",
-		.domains = CNL_DISPLAY_DDI_C_IO_POWER_DOMAINS,
-		.ops = &hsw_power_well_ops,
-		.id = DISP_PW_ID_NONE,
-		{
-			.hsw.regs = &hsw_power_well_regs,
-			.hsw.idx = SKL_PW_CTL_IDX_DDI_C,
-		},
-	},
-	{
-		.name = "DDI D IO power well",
-		.domains = CNL_DISPLAY_DDI_D_IO_POWER_DOMAINS,
-		.ops = &hsw_power_well_ops,
-		.id = DISP_PW_ID_NONE,
-		{
-			.hsw.regs = &hsw_power_well_regs,
-			.hsw.idx = SKL_PW_CTL_IDX_DDI_D,
-		},
-	},
-	{
-		.name = "DDI F IO power well",
-		.domains = CNL_DISPLAY_DDI_F_IO_POWER_DOMAINS,
-		.ops = &hsw_power_well_ops,
-		.id = DISP_PW_ID_NONE,
-		{
-			.hsw.regs = &hsw_power_well_regs,
-			.hsw.idx = CNL_PW_CTL_IDX_DDI_F,
-		},
-	},
-	{
-		.name = "AUX F",
-		.domains = CNL_DISPLAY_AUX_F_POWER_DOMAINS,
-		.ops = &hsw_power_well_ops,
-		.id = DISP_PW_ID_NONE,
-		{
-			.hsw.regs = &hsw_power_well_regs,
-			.hsw.idx = CNL_PW_CTL_IDX_AUX_F,
-		},
-	},
-};
-
-static const struct i915_power_well_ops icl_combo_phy_aux_power_well_ops = {
-	.sync_hw = hsw_power_well_sync_hw,
-	.enable = icl_combo_phy_aux_power_well_enable,
-	.disable = icl_combo_phy_aux_power_well_disable,
-	.is_enabled = hsw_power_well_enabled,
-};
-
-static const struct i915_power_well_ops icl_tc_phy_aux_power_well_ops = {
-	.sync_hw = hsw_power_well_sync_hw,
-	.enable = icl_tc_phy_aux_power_well_enable,
-	.disable = hsw_power_well_disable,
-	.is_enabled = hsw_power_well_enabled,
-};
-
-static const struct i915_power_well_regs icl_aux_power_well_regs = {
-	.bios	= ICL_PWR_WELL_CTL_AUX1,
-	.driver	= ICL_PWR_WELL_CTL_AUX2,
-	.debug	= ICL_PWR_WELL_CTL_AUX4,
-};
-
-static const struct i915_power_well_regs icl_ddi_power_well_regs = {
-	.bios	= ICL_PWR_WELL_CTL_DDI1,
-	.driver	= ICL_PWR_WELL_CTL_DDI2,
-	.debug	= ICL_PWR_WELL_CTL_DDI4,
-};
-
-static const struct i915_power_well_desc icl_power_wells[] = {
-	{
-		.name = "always-on",
-		.always_on = true,
-		.domains = POWER_DOMAIN_MASK,
-		.ops = &i9xx_always_on_power_well_ops,
-		.id = DISP_PW_ID_NONE,
-	},
-	{
-		.name = "power well 1",
-		/* Handled by the DMC firmware */
-		.always_on = true,
-		.domains = 0,
-		.ops = &hsw_power_well_ops,
-		.id = SKL_DISP_PW_1,
-		{
-			.hsw.regs = &hsw_power_well_regs,
-			.hsw.idx = ICL_PW_CTL_IDX_PW_1,
-			.hsw.has_fuses = true,
-		},
-	},
-	{
-		.name = "DC off",
-		.domains = ICL_DISPLAY_DC_OFF_POWER_DOMAINS,
-		.ops = &gen9_dc_off_power_well_ops,
-		.id = DISP_PW_ID_NONE,
-	},
-	{
-		.name = "power well 2",
-		.domains = ICL_PW_2_POWER_DOMAINS,
-		.ops = &hsw_power_well_ops,
-		.id = SKL_DISP_PW_2,
-		{
-			.hsw.regs = &hsw_power_well_regs,
-			.hsw.idx = ICL_PW_CTL_IDX_PW_2,
-			.hsw.has_fuses = true,
-		},
-	},
-	{
-		.name = "power well 3",
-		.domains = ICL_PW_3_POWER_DOMAINS,
-		.ops = &hsw_power_well_ops,
-		.id = DISP_PW_ID_NONE,
-		{
-			.hsw.regs = &hsw_power_well_regs,
-			.hsw.idx = ICL_PW_CTL_IDX_PW_3,
-			.hsw.irq_pipe_mask = BIT(PIPE_B),
-			.hsw.has_vga = true,
-			.hsw.has_fuses = true,
-		},
-	},
-	{
-		.name = "DDI A IO",
-		.domains = ICL_DDI_IO_A_POWER_DOMAINS,
-		.ops = &hsw_power_well_ops,
-		.id = DISP_PW_ID_NONE,
-		{
-			.hsw.regs = &icl_ddi_power_well_regs,
-			.hsw.idx = ICL_PW_CTL_IDX_DDI_A,
-		},
-	},
-	{
-		.name = "DDI B IO",
-		.domains = ICL_DDI_IO_B_POWER_DOMAINS,
-		.ops = &hsw_power_well_ops,
-		.id = DISP_PW_ID_NONE,
-		{
-			.hsw.regs = &icl_ddi_power_well_regs,
-			.hsw.idx = ICL_PW_CTL_IDX_DDI_B,
-		},
-	},
-	{
-		.name = "DDI C IO",
-		.domains = ICL_DDI_IO_C_POWER_DOMAINS,
-		.ops = &hsw_power_well_ops,
-		.id = DISP_PW_ID_NONE,
-		{
-			.hsw.regs = &icl_ddi_power_well_regs,
-			.hsw.idx = ICL_PW_CTL_IDX_DDI_C,
-		},
-	},
-	{
-		.name = "DDI D IO",
-		.domains = ICL_DDI_IO_D_POWER_DOMAINS,
-		.ops = &hsw_power_well_ops,
-		.id = DISP_PW_ID_NONE,
-		{
-			.hsw.regs = &icl_ddi_power_well_regs,
-			.hsw.idx = ICL_PW_CTL_IDX_DDI_D,
-		},
-	},
-	{
-		.name = "DDI E IO",
-		.domains = ICL_DDI_IO_E_POWER_DOMAINS,
-		.ops = &hsw_power_well_ops,
-		.id = DISP_PW_ID_NONE,
-		{
-			.hsw.regs = &icl_ddi_power_well_regs,
-			.hsw.idx = ICL_PW_CTL_IDX_DDI_E,
-		},
-	},
-	{
-		.name = "DDI F IO",
-		.domains = ICL_DDI_IO_F_POWER_DOMAINS,
-		.ops = &hsw_power_well_ops,
-		.id = DISP_PW_ID_NONE,
-		{
-			.hsw.regs = &icl_ddi_power_well_regs,
-			.hsw.idx = ICL_PW_CTL_IDX_DDI_F,
-		},
-	},
-	{
-		.name = "AUX A",
-		.domains = ICL_AUX_A_IO_POWER_DOMAINS,
-		.ops = &icl_combo_phy_aux_power_well_ops,
-		.id = DISP_PW_ID_NONE,
-		{
-			.hsw.regs = &icl_aux_power_well_regs,
-			.hsw.idx = ICL_PW_CTL_IDX_AUX_A,
-		},
-	},
-	{
-		.name = "AUX B",
-		.domains = ICL_AUX_B_IO_POWER_DOMAINS,
-		.ops = &icl_combo_phy_aux_power_well_ops,
-		.id = DISP_PW_ID_NONE,
-		{
-			.hsw.regs = &icl_aux_power_well_regs,
-			.hsw.idx = ICL_PW_CTL_IDX_AUX_B,
-		},
-	},
-	{
-		.name = "AUX C",
-		.domains = ICL_AUX_C_IO_POWER_DOMAINS,
-		.ops = &icl_tc_phy_aux_power_well_ops,
-		.id = DISP_PW_ID_NONE,
-		{
-			.hsw.regs = &icl_aux_power_well_regs,
-			.hsw.idx = ICL_PW_CTL_IDX_AUX_C,
-			.hsw.is_tc_tbt = false,
-		},
-	},
-	{
-		.name = "AUX D",
-		.domains = ICL_AUX_D_IO_POWER_DOMAINS,
-		.ops = &icl_tc_phy_aux_power_well_ops,
-		.id = DISP_PW_ID_NONE,
-		{
-			.hsw.regs = &icl_aux_power_well_regs,
-			.hsw.idx = ICL_PW_CTL_IDX_AUX_D,
-			.hsw.is_tc_tbt = false,
-		},
-	},
-	{
-		.name = "AUX E",
-		.domains = ICL_AUX_E_IO_POWER_DOMAINS,
-		.ops = &icl_tc_phy_aux_power_well_ops,
-		.id = DISP_PW_ID_NONE,
-		{
-			.hsw.regs = &icl_aux_power_well_regs,
-			.hsw.idx = ICL_PW_CTL_IDX_AUX_E,
-			.hsw.is_tc_tbt = false,
-		},
-	},
-	{
-		.name = "AUX F",
-		.domains = ICL_AUX_F_IO_POWER_DOMAINS,
-		.ops = &icl_tc_phy_aux_power_well_ops,
-		.id = DISP_PW_ID_NONE,
-		{
-			.hsw.regs = &icl_aux_power_well_regs,
-			.hsw.idx = ICL_PW_CTL_IDX_AUX_F,
-			.hsw.is_tc_tbt = false,
-		},
-	},
-	{
-		.name = "AUX TBT1",
-		.domains = ICL_AUX_TBT1_IO_POWER_DOMAINS,
-		.ops = &icl_tc_phy_aux_power_well_ops,
-		.id = DISP_PW_ID_NONE,
-		{
-			.hsw.regs = &icl_aux_power_well_regs,
-			.hsw.idx = ICL_PW_CTL_IDX_AUX_TBT1,
-			.hsw.is_tc_tbt = true,
-		},
-	},
-	{
-		.name = "AUX TBT2",
-		.domains = ICL_AUX_TBT2_IO_POWER_DOMAINS,
-		.ops = &icl_tc_phy_aux_power_well_ops,
-		.id = DISP_PW_ID_NONE,
-		{
-			.hsw.regs = &icl_aux_power_well_regs,
-			.hsw.idx = ICL_PW_CTL_IDX_AUX_TBT2,
-			.hsw.is_tc_tbt = true,
-		},
-	},
-	{
-		.name = "AUX TBT3",
-		.domains = ICL_AUX_TBT3_IO_POWER_DOMAINS,
-		.ops = &icl_tc_phy_aux_power_well_ops,
-		.id = DISP_PW_ID_NONE,
-		{
-			.hsw.regs = &icl_aux_power_well_regs,
-			.hsw.idx = ICL_PW_CTL_IDX_AUX_TBT3,
-			.hsw.is_tc_tbt = true,
-		},
-	},
-	{
-		.name = "AUX TBT4",
-		.domains = ICL_AUX_TBT4_IO_POWER_DOMAINS,
-		.ops = &icl_tc_phy_aux_power_well_ops,
-		.id = DISP_PW_ID_NONE,
-		{
-			.hsw.regs = &icl_aux_power_well_regs,
-			.hsw.idx = ICL_PW_CTL_IDX_AUX_TBT4,
-			.hsw.is_tc_tbt = true,
-		},
-	},
-	{
-		.name = "power well 4",
-		.domains = ICL_PW_4_POWER_DOMAINS,
-		.ops = &hsw_power_well_ops,
-		.id = DISP_PW_ID_NONE,
-		{
-			.hsw.regs = &hsw_power_well_regs,
-			.hsw.idx = ICL_PW_CTL_IDX_PW_4,
-			.hsw.has_fuses = true,
-			.hsw.irq_pipe_mask = BIT(PIPE_C),
-		},
-	},
-};
-
-static int
-sanitize_disable_power_well_option(const struct drm_i915_private *dev_priv,
-				   int disable_power_well)
-{
-	if (disable_power_well >= 0)
-		return !!disable_power_well;
-
-	return 1;
-}
-
-static u32 get_allowed_dc_mask(const struct drm_i915_private *dev_priv,
-			       int enable_dc)
-{
-	u32 mask;
-	int requested_dc;
-	int max_dc;
-
-	if (INTEL_GEN(dev_priv) >= 11) {
-		max_dc = 2;
-		/*
-		 * DC9 has a separate HW flow from the rest of the DC states,
-		 * not depending on the DMC firmware. It's needed by system
-		 * suspend/resume, so allow it unconditionally.
-		 */
-		mask = DC_STATE_EN_DC9;
-	} else if (IS_GEN(dev_priv, 10) || IS_GEN9_BC(dev_priv)) {
-		max_dc = 2;
-		mask = 0;
-	} else if (IS_GEN9_LP(dev_priv)) {
-		max_dc = 1;
-		mask = DC_STATE_EN_DC9;
-	} else {
-		max_dc = 0;
-		mask = 0;
-	}
-
-	if (!i915_modparams.disable_power_well)
-		max_dc = 0;
-
-	if (enable_dc >= 0 && enable_dc <= max_dc) {
-		requested_dc = enable_dc;
-	} else if (enable_dc == -1) {
-		requested_dc = max_dc;
-	} else if (enable_dc > max_dc && enable_dc <= 2) {
-		DRM_DEBUG_KMS("Adjusting requested max DC state (%d->%d)\n",
-			      enable_dc, max_dc);
-		requested_dc = max_dc;
-	} else {
-		DRM_ERROR("Unexpected value for enable_dc (%d)\n", enable_dc);
-		requested_dc = max_dc;
-	}
-
-	if (requested_dc > 1)
-		mask |= DC_STATE_EN_UPTO_DC6;
-	if (requested_dc > 0)
-		mask |= DC_STATE_EN_UPTO_DC5;
-
-	DRM_DEBUG_KMS("Allowed DC state mask %02x\n", mask);
-
-	return mask;
-}
-
-static int
-__set_power_wells(struct i915_power_domains *power_domains,
-		  const struct i915_power_well_desc *power_well_descs,
-		  int power_well_count)
-{
-	u64 power_well_ids = 0;
-	int i;
-
-	power_domains->power_well_count = power_well_count;
-	power_domains->power_wells =
-				kcalloc(power_well_count,
-					sizeof(*power_domains->power_wells),
-					GFP_KERNEL);
-	if (!power_domains->power_wells)
-		return -ENOMEM;
-
-	for (i = 0; i < power_well_count; i++) {
-		enum i915_power_well_id id = power_well_descs[i].id;
-
-		power_domains->power_wells[i].desc = &power_well_descs[i];
-
-		if (id == DISP_PW_ID_NONE)
-			continue;
-
-		WARN_ON(id >= sizeof(power_well_ids) * 8);
-		WARN_ON(power_well_ids & BIT_ULL(id));
-		power_well_ids |= BIT_ULL(id);
-	}
-
-	return 0;
-}
-
-#define set_power_wells(power_domains, __power_well_descs) \
-	__set_power_wells(power_domains, __power_well_descs, \
-			  ARRAY_SIZE(__power_well_descs))
-
-/**
- * intel_power_domains_init - initializes the power domain structures
- * @dev_priv: i915 device instance
- *
- * Initializes the power domain structures for @dev_priv depending upon the
- * supported platform.
- */
-int intel_power_domains_init(struct drm_i915_private *dev_priv)
-{
-	struct i915_power_domains *power_domains = &dev_priv->power_domains;
-	int err;
-
-	i915_modparams.disable_power_well =
-		sanitize_disable_power_well_option(dev_priv,
-						   i915_modparams.disable_power_well);
-	dev_priv->csr.allowed_dc_mask =
-		get_allowed_dc_mask(dev_priv, i915_modparams.enable_dc);
-
-	BUILD_BUG_ON(POWER_DOMAIN_NUM > 64);
-
-	mutex_init(&power_domains->lock);
-
-	/*
-	 * The enabling order will be from lower to higher indexed wells,
-	 * the disabling order is reversed.
-	 */
-	if (IS_GEN(dev_priv, 11)) {
-		err = set_power_wells(power_domains, icl_power_wells);
-	} else if (IS_CANNONLAKE(dev_priv)) {
-		err = set_power_wells(power_domains, cnl_power_wells);
-
-		/*
-		 * DDI and Aux IO are getting enabled for all ports
-		 * regardless the presence or use. So, in order to avoid
-		 * timeouts, lets remove them from the list
-		 * for the SKUs without port F.
-		 */
-		if (!IS_CNL_WITH_PORT_F(dev_priv))
-			power_domains->power_well_count -= 2;
-	} else if (IS_GEMINILAKE(dev_priv)) {
-		err = set_power_wells(power_domains, glk_power_wells);
-	} else if (IS_BROXTON(dev_priv)) {
-		err = set_power_wells(power_domains, bxt_power_wells);
-	} else if (IS_GEN9_BC(dev_priv)) {
-		err = set_power_wells(power_domains, skl_power_wells);
-	} else if (IS_CHERRYVIEW(dev_priv)) {
-		err = set_power_wells(power_domains, chv_power_wells);
-	} else if (IS_BROADWELL(dev_priv)) {
-		err = set_power_wells(power_domains, bdw_power_wells);
-	} else if (IS_HASWELL(dev_priv)) {
-		err = set_power_wells(power_domains, hsw_power_wells);
-	} else if (IS_VALLEYVIEW(dev_priv)) {
-		err = set_power_wells(power_domains, vlv_power_wells);
-	} else if (IS_I830(dev_priv)) {
-		err = set_power_wells(power_domains, i830_power_wells);
-	} else {
-		err = set_power_wells(power_domains, i9xx_always_on_power_well);
-	}
-
-	return err;
-}
-
-/**
- * intel_power_domains_cleanup - clean up power domains resources
- * @dev_priv: i915 device instance
- *
- * Release any resources acquired by intel_power_domains_init()
- */
-void intel_power_domains_cleanup(struct drm_i915_private *dev_priv)
-{
-	kfree(dev_priv->power_domains.power_wells);
-}
-
-static void intel_power_domains_sync_hw(struct drm_i915_private *dev_priv)
-{
-	struct i915_power_domains *power_domains = &dev_priv->power_domains;
-	struct i915_power_well *power_well;
-
-	mutex_lock(&power_domains->lock);
-	for_each_power_well(dev_priv, power_well) {
-		power_well->desc->ops->sync_hw(dev_priv, power_well);
-		power_well->hw_enabled =
-			power_well->desc->ops->is_enabled(dev_priv, power_well);
-	}
-	mutex_unlock(&power_domains->lock);
-}
-
-static inline
-bool intel_dbuf_slice_set(struct drm_i915_private *dev_priv,
-			  i915_reg_t reg, bool enable)
-{
-	u32 val, status;
-
-	val = I915_READ(reg);
-	val = enable ? (val | DBUF_POWER_REQUEST) : (val & ~DBUF_POWER_REQUEST);
-	I915_WRITE(reg, val);
-	POSTING_READ(reg);
-	udelay(10);
-
-	status = I915_READ(reg) & DBUF_POWER_STATE;
-	if ((enable && !status) || (!enable && status)) {
-		DRM_ERROR("DBus power %s timeout!\n",
-			  enable ? "enable" : "disable");
-		return false;
-	}
-	return true;
-}
-
-static void gen9_dbuf_enable(struct drm_i915_private *dev_priv)
-{
-	intel_dbuf_slice_set(dev_priv, DBUF_CTL, true);
-}
-
-static void gen9_dbuf_disable(struct drm_i915_private *dev_priv)
-{
-	intel_dbuf_slice_set(dev_priv, DBUF_CTL, false);
-}
-
-static u8 intel_dbuf_max_slices(struct drm_i915_private *dev_priv)
-{
-	if (INTEL_GEN(dev_priv) < 11)
-		return 1;
-	return 2;
-}
-
-void icl_dbuf_slices_update(struct drm_i915_private *dev_priv,
-			    u8 req_slices)
-{
-	const u8 hw_enabled_slices = dev_priv->wm.skl_hw.ddb.enabled_slices;
-	bool ret;
-
-	if (req_slices > intel_dbuf_max_slices(dev_priv)) {
-		DRM_ERROR("Invalid number of dbuf slices requested\n");
-		return;
-	}
-
-	if (req_slices == hw_enabled_slices || req_slices == 0)
-		return;
-
-	if (req_slices > hw_enabled_slices)
-		ret = intel_dbuf_slice_set(dev_priv, DBUF_CTL_S2, true);
-	else
-		ret = intel_dbuf_slice_set(dev_priv, DBUF_CTL_S2, false);
-
-	if (ret)
-		dev_priv->wm.skl_hw.ddb.enabled_slices = req_slices;
-}
-
-static void icl_dbuf_enable(struct drm_i915_private *dev_priv)
-{
-	I915_WRITE(DBUF_CTL_S1, I915_READ(DBUF_CTL_S1) | DBUF_POWER_REQUEST);
-	I915_WRITE(DBUF_CTL_S2, I915_READ(DBUF_CTL_S2) | DBUF_POWER_REQUEST);
-	POSTING_READ(DBUF_CTL_S2);
-
-	udelay(10);
-
-	if (!(I915_READ(DBUF_CTL_S1) & DBUF_POWER_STATE) ||
-	    !(I915_READ(DBUF_CTL_S2) & DBUF_POWER_STATE))
-		DRM_ERROR("DBuf power enable timeout\n");
-	else
-		/*
-		 * FIXME: for now pretend that we only have 1 slice, see
-		 * intel_enabled_dbuf_slices_num().
-		 */
-		dev_priv->wm.skl_hw.ddb.enabled_slices = 1;
-}
-
-static void icl_dbuf_disable(struct drm_i915_private *dev_priv)
-{
-	I915_WRITE(DBUF_CTL_S1, I915_READ(DBUF_CTL_S1) & ~DBUF_POWER_REQUEST);
-	I915_WRITE(DBUF_CTL_S2, I915_READ(DBUF_CTL_S2) & ~DBUF_POWER_REQUEST);
-	POSTING_READ(DBUF_CTL_S2);
-
-	udelay(10);
-
-	if ((I915_READ(DBUF_CTL_S1) & DBUF_POWER_STATE) ||
-	    (I915_READ(DBUF_CTL_S2) & DBUF_POWER_STATE))
-		DRM_ERROR("DBuf power disable timeout!\n");
-	else
-		/*
-		 * FIXME: for now pretend that the first slice is always
-		 * enabled, see intel_enabled_dbuf_slices_num().
-		 */
-		dev_priv->wm.skl_hw.ddb.enabled_slices = 1;
-}
-
-static void icl_mbus_init(struct drm_i915_private *dev_priv)
-{
-	u32 val;
-
-	val = MBUS_ABOX_BT_CREDIT_POOL1(16) |
-	      MBUS_ABOX_BT_CREDIT_POOL2(16) |
-	      MBUS_ABOX_B_CREDIT(1) |
-	      MBUS_ABOX_BW_CREDIT(1);
-
-	I915_WRITE(MBUS_ABOX_CTL, val);
-}
-
-static void intel_pch_reset_handshake(struct drm_i915_private *dev_priv,
-				      bool enable)
-{
-	i915_reg_t reg;
-	u32 reset_bits, val;
-
-	if (IS_IVYBRIDGE(dev_priv)) {
-		reg = GEN7_MSG_CTL;
-		reset_bits = WAIT_FOR_PCH_FLR_ACK | WAIT_FOR_PCH_RESET_ACK;
-	} else {
-		reg = HSW_NDE_RSTWRN_OPT;
-		reset_bits = RESET_PCH_HANDSHAKE_ENABLE;
-	}
-
-	val = I915_READ(reg);
-
-	if (enable)
-		val |= reset_bits;
-	else
-		val &= ~reset_bits;
-
-	I915_WRITE(reg, val);
-}
-
-static void skl_display_core_init(struct drm_i915_private *dev_priv,
-				   bool resume)
-{
-	struct i915_power_domains *power_domains = &dev_priv->power_domains;
-	struct i915_power_well *well;
-
-	gen9_set_dc_state(dev_priv, DC_STATE_DISABLE);
-
-	/* enable PCH reset handshake */
-	intel_pch_reset_handshake(dev_priv, !HAS_PCH_NOP(dev_priv));
-
-	/* enable PG1 and Misc I/O */
-	mutex_lock(&power_domains->lock);
-
-	well = lookup_power_well(dev_priv, SKL_DISP_PW_1);
-	intel_power_well_enable(dev_priv, well);
-
-	well = lookup_power_well(dev_priv, SKL_DISP_PW_MISC_IO);
-	intel_power_well_enable(dev_priv, well);
-
-	mutex_unlock(&power_domains->lock);
-
-	intel_cdclk_init(dev_priv);
-
-	gen9_dbuf_enable(dev_priv);
-
-	if (resume && dev_priv->csr.dmc_payload)
-		intel_csr_load_program(dev_priv);
-}
-
-static void skl_display_core_uninit(struct drm_i915_private *dev_priv)
-{
-	struct i915_power_domains *power_domains = &dev_priv->power_domains;
-	struct i915_power_well *well;
-
-	gen9_set_dc_state(dev_priv, DC_STATE_DISABLE);
-
-	gen9_dbuf_disable(dev_priv);
-
-	intel_cdclk_uninit(dev_priv);
-
-	/* The spec doesn't call for removing the reset handshake flag */
-	/* disable PG1 and Misc I/O */
-
-	mutex_lock(&power_domains->lock);
-
-	/*
-	 * BSpec says to keep the MISC IO power well enabled here, only
-	 * remove our request for power well 1.
-	 * Note that even though the driver's request is removed power well 1
-	 * may stay enabled after this due to DMC's own request on it.
-	 */
-	well = lookup_power_well(dev_priv, SKL_DISP_PW_1);
-	intel_power_well_disable(dev_priv, well);
-
-	mutex_unlock(&power_domains->lock);
-
-	usleep_range(10, 30);		/* 10 us delay per Bspec */
-}
-
-void bxt_display_core_init(struct drm_i915_private *dev_priv,
-			   bool resume)
-{
-	struct i915_power_domains *power_domains = &dev_priv->power_domains;
-	struct i915_power_well *well;
-
-	gen9_set_dc_state(dev_priv, DC_STATE_DISABLE);
-
-	/*
-	 * NDE_RSTWRN_OPT RST PCH Handshake En must always be 0b on BXT
-	 * or else the reset will hang because there is no PCH to respond.
-	 * Move the handshake programming to initialization sequence.
-	 * Previously was left up to BIOS.
-	 */
-	intel_pch_reset_handshake(dev_priv, false);
-
-	/* Enable PG1 */
-	mutex_lock(&power_domains->lock);
-
-	well = lookup_power_well(dev_priv, SKL_DISP_PW_1);
-	intel_power_well_enable(dev_priv, well);
-
-	mutex_unlock(&power_domains->lock);
-
-	intel_cdclk_init(dev_priv);
-
-	gen9_dbuf_enable(dev_priv);
-
-	if (resume && dev_priv->csr.dmc_payload)
-		intel_csr_load_program(dev_priv);
-}
-
-void bxt_display_core_uninit(struct drm_i915_private *dev_priv)
-{
-	struct i915_power_domains *power_domains = &dev_priv->power_domains;
-	struct i915_power_well *well;
-
-	gen9_set_dc_state(dev_priv, DC_STATE_DISABLE);
-
-	gen9_dbuf_disable(dev_priv);
-
-	intel_cdclk_uninit(dev_priv);
-
-	/* The spec doesn't call for removing the reset handshake flag */
-
-	/*
-	 * Disable PW1 (PG1).
-	 * Note that even though the driver's request is removed power well 1
-	 * may stay enabled after this due to DMC's own request on it.
-	 */
-	mutex_lock(&power_domains->lock);
-
-	well = lookup_power_well(dev_priv, SKL_DISP_PW_1);
-	intel_power_well_disable(dev_priv, well);
-
-	mutex_unlock(&power_domains->lock);
-
-	usleep_range(10, 30);		/* 10 us delay per Bspec */
-}
-
-static void cnl_display_core_init(struct drm_i915_private *dev_priv, bool resume)
-{
-	struct i915_power_domains *power_domains = &dev_priv->power_domains;
-	struct i915_power_well *well;
-
-	gen9_set_dc_state(dev_priv, DC_STATE_DISABLE);
-
-	/* 1. Enable PCH Reset Handshake */
-	intel_pch_reset_handshake(dev_priv, !HAS_PCH_NOP(dev_priv));
-
-	/* 2-3. */
-	cnl_combo_phys_init(dev_priv);
-
-	/*
-	 * 4. Enable Power Well 1 (PG1).
-	 *    The AUX IO power wells will be enabled on demand.
-	 */
-	mutex_lock(&power_domains->lock);
-	well = lookup_power_well(dev_priv, SKL_DISP_PW_1);
-	intel_power_well_enable(dev_priv, well);
-	mutex_unlock(&power_domains->lock);
-
-	/* 5. Enable CD clock */
-	intel_cdclk_init(dev_priv);
-
-	/* 6. Enable DBUF */
-	gen9_dbuf_enable(dev_priv);
-
-	if (resume && dev_priv->csr.dmc_payload)
-		intel_csr_load_program(dev_priv);
-}
-
-static void cnl_display_core_uninit(struct drm_i915_private *dev_priv)
+intel_runtime_pm_release(struct intel_runtime_pm *rpm, int wakelock)
 {
-	struct i915_power_domains *power_domains = &dev_priv->power_domains;
-	struct i915_power_well *well;
-
-	gen9_set_dc_state(dev_priv, DC_STATE_DISABLE);
-
-	/* 1. Disable all display engine functions -> aready done */
-
-	/* 2. Disable DBUF */
-	gen9_dbuf_disable(dev_priv);
-
-	/* 3. Disable CD clock */
-	intel_cdclk_uninit(dev_priv);
-
-	/*
-	 * 4. Disable Power Well 1 (PG1).
-	 *    The AUX IO power wells are toggled on demand, so they are already
-	 *    disabled at this point.
-	 */
-	mutex_lock(&power_domains->lock);
-	well = lookup_power_well(dev_priv, SKL_DISP_PW_1);
-	intel_power_well_disable(dev_priv, well);
-	mutex_unlock(&power_domains->lock);
-
-	usleep_range(10, 30);		/* 10 us delay per Bspec */
-
-	/* 5. */
-	cnl_combo_phys_uninit(dev_priv);
-}
-
-void icl_display_core_init(struct drm_i915_private *dev_priv,
-			   bool resume)
-{
-	struct i915_power_domains *power_domains = &dev_priv->power_domains;
-	struct i915_power_well *well;
-
-	gen9_set_dc_state(dev_priv, DC_STATE_DISABLE);
-
-	/* 1. Enable PCH reset handshake. */
-	intel_pch_reset_handshake(dev_priv, !HAS_PCH_NOP(dev_priv));
-
-	/* 2-3. */
-	icl_combo_phys_init(dev_priv);
-
-	/*
-	 * 4. Enable Power Well 1 (PG1).
-	 *    The AUX IO power wells will be enabled on demand.
-	 */
-	mutex_lock(&power_domains->lock);
-	well = lookup_power_well(dev_priv, SKL_DISP_PW_1);
-	intel_power_well_enable(dev_priv, well);
-	mutex_unlock(&power_domains->lock);
-
-	/* 5. Enable CDCLK. */
-	intel_cdclk_init(dev_priv);
-
-	/* 6. Enable DBUF. */
-	icl_dbuf_enable(dev_priv);
-
-	/* 7. Setup MBUS. */
-	icl_mbus_init(dev_priv);
-
-	if (resume && dev_priv->csr.dmc_payload)
-		intel_csr_load_program(dev_priv);
-}
-
-void icl_display_core_uninit(struct drm_i915_private *dev_priv)
-{
-	struct i915_power_domains *power_domains = &dev_priv->power_domains;
-	struct i915_power_well *well;
-
-	gen9_set_dc_state(dev_priv, DC_STATE_DISABLE);
-
-	/* 1. Disable all display engine functions -> aready done */
-
-	/* 2. Disable DBUF */
-	icl_dbuf_disable(dev_priv);
-
-	/* 3. Disable CD clock */
-	intel_cdclk_uninit(dev_priv);
-
-	/*
-	 * 4. Disable Power Well 1 (PG1).
-	 *    The AUX IO power wells are toggled on demand, so they are already
-	 *    disabled at this point.
-	 */
-	mutex_lock(&power_domains->lock);
-	well = lookup_power_well(dev_priv, SKL_DISP_PW_1);
-	intel_power_well_disable(dev_priv, well);
-	mutex_unlock(&power_domains->lock);
-
-	/* 5. */
-	icl_combo_phys_uninit(dev_priv);
-}
-
-static void chv_phy_control_init(struct drm_i915_private *dev_priv)
-{
-	struct i915_power_well *cmn_bc =
-		lookup_power_well(dev_priv, VLV_DISP_PW_DPIO_CMN_BC);
-	struct i915_power_well *cmn_d =
-		lookup_power_well(dev_priv, CHV_DISP_PW_DPIO_CMN_D);
-
-	/*
-	 * DISPLAY_PHY_CONTROL can get corrupted if read. As a
-	 * workaround never ever read DISPLAY_PHY_CONTROL, and
-	 * instead maintain a shadow copy ourselves. Use the actual
-	 * power well state and lane status to reconstruct the
-	 * expected initial value.
-	 */
-	dev_priv->chv_phy_control =
-		PHY_LDO_SEQ_DELAY(PHY_LDO_DELAY_600NS, DPIO_PHY0) |
-		PHY_LDO_SEQ_DELAY(PHY_LDO_DELAY_600NS, DPIO_PHY1) |
-		PHY_CH_POWER_MODE(PHY_CH_DEEP_PSR, DPIO_PHY0, DPIO_CH0) |
-		PHY_CH_POWER_MODE(PHY_CH_DEEP_PSR, DPIO_PHY0, DPIO_CH1) |
-		PHY_CH_POWER_MODE(PHY_CH_DEEP_PSR, DPIO_PHY1, DPIO_CH0);
-
-	/*
-	 * If all lanes are disabled we leave the override disabled
-	 * with all power down bits cleared to match the state we
-	 * would use after disabling the port. Otherwise enable the
-	 * override and set the lane powerdown bits accding to the
-	 * current lane status.
-	 */
-	if (cmn_bc->desc->ops->is_enabled(dev_priv, cmn_bc)) {
-		u32 status = I915_READ(DPLL(PIPE_A));
-		unsigned int mask;
-
-		mask = status & DPLL_PORTB_READY_MASK;
-		if (mask == 0xf)
-			mask = 0x0;
-		else
-			dev_priv->chv_phy_control |=
-				PHY_CH_POWER_DOWN_OVRD_EN(DPIO_PHY0, DPIO_CH0);
-
-		dev_priv->chv_phy_control |=
-			PHY_CH_POWER_DOWN_OVRD(mask, DPIO_PHY0, DPIO_CH0);
-
-		mask = (status & DPLL_PORTC_READY_MASK) >> 4;
-		if (mask == 0xf)
-			mask = 0x0;
-		else
-			dev_priv->chv_phy_control |=
-				PHY_CH_POWER_DOWN_OVRD_EN(DPIO_PHY0, DPIO_CH1);
-
-		dev_priv->chv_phy_control |=
-			PHY_CH_POWER_DOWN_OVRD(mask, DPIO_PHY0, DPIO_CH1);
-
-		dev_priv->chv_phy_control |= PHY_COM_LANE_RESET_DEASSERT(DPIO_PHY0);
-
-		dev_priv->chv_phy_assert[DPIO_PHY0] = false;
-	} else {
-		dev_priv->chv_phy_assert[DPIO_PHY0] = true;
-	}
-
-	if (cmn_d->desc->ops->is_enabled(dev_priv, cmn_d)) {
-		u32 status = I915_READ(DPIO_PHY_STATUS);
-		unsigned int mask;
-
-		mask = status & DPLL_PORTD_READY_MASK;
-
-		if (mask == 0xf)
-			mask = 0x0;
-		else
-			dev_priv->chv_phy_control |=
-				PHY_CH_POWER_DOWN_OVRD_EN(DPIO_PHY1, DPIO_CH0);
-
-		dev_priv->chv_phy_control |=
-			PHY_CH_POWER_DOWN_OVRD(mask, DPIO_PHY1, DPIO_CH0);
-
-		dev_priv->chv_phy_control |= PHY_COM_LANE_RESET_DEASSERT(DPIO_PHY1);
-
-		dev_priv->chv_phy_assert[DPIO_PHY1] = false;
+	if (wakelock) {
+		assert_rpm_wakelock_held(rpm);
+		atomic_sub(INTEL_RPM_WAKELOCK_BIAS, &rpm->wakeref_count);
 	} else {
-		dev_priv->chv_phy_assert[DPIO_PHY1] = true;
+		assert_rpm_raw_wakeref_held(rpm);
 	}
 
-	I915_WRITE(DISPLAY_PHY_CONTROL, dev_priv->chv_phy_control);
-
-	DRM_DEBUG_KMS("Initial PHY_CONTROL=0x%08x\n",
-		      dev_priv->chv_phy_control);
-}
-
-static void vlv_cmnlane_wa(struct drm_i915_private *dev_priv)
-{
-	struct i915_power_well *cmn =
-		lookup_power_well(dev_priv, VLV_DISP_PW_DPIO_CMN_BC);
-	struct i915_power_well *disp2d =
-		lookup_power_well(dev_priv, VLV_DISP_PW_DISP2D);
-
-	/* If the display might be already active skip this */
-	if (cmn->desc->ops->is_enabled(dev_priv, cmn) &&
-	    disp2d->desc->ops->is_enabled(dev_priv, disp2d) &&
-	    I915_READ(DPIO_CTL) & DPIO_CMNRST)
-		return;
-
-	DRM_DEBUG_KMS("toggling display PHY side reset\n");
-
-	/* cmnlane needs DPLL registers */
-	disp2d->desc->ops->enable(dev_priv, disp2d);
-
-	/*
-	 * From VLV2A0_DP_eDP_HDMI_DPIO_driver_vbios_notes_11.docx:
-	 * Need to assert and de-assert PHY SB reset by gating the
-	 * common lane power, then un-gating it.
-	 * Simply ungating isn't enough to reset the PHY enough to get
-	 * ports and lanes running.
-	 */
-	cmn->desc->ops->disable(dev_priv, cmn);
-}
-
-static bool vlv_punit_is_power_gated(struct drm_i915_private *dev_priv, u32 reg0)
-{
-	bool ret;
-
-	mutex_lock(&dev_priv->pcu_lock);
-	ret = (vlv_punit_read(dev_priv, reg0) & SSPM0_SSC_MASK) == SSPM0_SSC_PWR_GATE;
-	mutex_unlock(&dev_priv->pcu_lock);
-
-	return ret;
-}
-
-static void assert_ved_power_gated(struct drm_i915_private *dev_priv)
-{
-	WARN(!vlv_punit_is_power_gated(dev_priv, PUNIT_REG_VEDSSPM0),
-	     "VED not power gated\n");
-}
-
-static void assert_isp_power_gated(struct drm_i915_private *dev_priv)
-{
-	static const struct pci_device_id isp_ids[] = {
-		{PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x0f38)},
-		{PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x22b8)},
-		{}
-	};
-
-	WARN(!pci_dev_present(isp_ids) &&
-	     !vlv_punit_is_power_gated(dev_priv, PUNIT_REG_ISPSSPM0),
-	     "ISP not power gated\n");
-}
-
-static void intel_power_domains_verify_state(struct drm_i915_private *dev_priv);
-
-/**
- * intel_power_domains_init_hw - initialize hardware power domain state
- * @i915: i915 device instance
- * @resume: Called from resume code paths or not
- *
- * This function initializes the hardware power domain state and enables all
- * power wells belonging to the INIT power domain. Power wells in other
- * domains (and not in the INIT domain) are referenced or disabled by
- * intel_modeset_readout_hw_state(). After that the reference count of each
- * power well must match its HW enabled state, see
- * intel_power_domains_verify_state().
- *
- * It will return with power domains disabled (to be enabled later by
- * intel_power_domains_enable()) and must be paired with
- * intel_power_domains_fini_hw().
- */
-void intel_power_domains_init_hw(struct drm_i915_private *i915, bool resume)
-{
-	struct i915_power_domains *power_domains = &i915->power_domains;
-
-	power_domains->initializing = true;
-
-	if (INTEL_GEN(i915) >= 11) {
-		icl_display_core_init(i915, resume);
-	} else if (IS_CANNONLAKE(i915)) {
-		cnl_display_core_init(i915, resume);
-	} else if (IS_GEN9_BC(i915)) {
-		skl_display_core_init(i915, resume);
-	} else if (IS_GEN9_LP(i915)) {
-		bxt_display_core_init(i915, resume);
-	} else if (IS_CHERRYVIEW(i915)) {
-		mutex_lock(&power_domains->lock);
-		chv_phy_control_init(i915);
-		mutex_unlock(&power_domains->lock);
-		assert_isp_power_gated(i915);
-	} else if (IS_VALLEYVIEW(i915)) {
-		mutex_lock(&power_domains->lock);
-		vlv_cmnlane_wa(i915);
-		mutex_unlock(&power_domains->lock);
-		assert_ved_power_gated(i915);
-		assert_isp_power_gated(i915);
-	} else if (IS_IVYBRIDGE(i915) || INTEL_GEN(i915) >= 7) {
-		intel_pch_reset_handshake(i915, !HAS_PCH_NOP(i915));
-	}
-
-	/*
-	 * Keep all power wells enabled for any dependent HW access during
-	 * initialization and to make sure we keep BIOS enabled display HW
-	 * resources powered until display HW readout is complete. We drop
-	 * this reference in intel_power_domains_enable().
-	 */
-	power_domains->wakeref =
-		intel_display_power_get(i915, POWER_DOMAIN_INIT);
-
-	/* Disable power support if the user asked so. */
-	if (!i915_modparams.disable_power_well)
-		intel_display_power_get(i915, POWER_DOMAIN_INIT);
-	intel_power_domains_sync_hw(i915);
-
-	power_domains->initializing = false;
+	__intel_wakeref_dec_and_check_tracking(rpm);
 }
 
-/**
- * intel_power_domains_fini_hw - deinitialize hw power domain state
- * @i915: i915 device instance
- *
- * De-initializes the display power domain HW state. It also ensures that the
- * device stays powered up so that the driver can be reloaded.
- *
- * It must be called with power domains already disabled (after a call to
- * intel_power_domains_disable()) and must be paired with
- * intel_power_domains_init_hw().
- */
-void intel_power_domains_fini_hw(struct drm_i915_private *i915)
+static intel_wakeref_t __intel_runtime_pm_get(struct intel_runtime_pm *rpm,
+					      bool wakelock)
 {
-	intel_wakeref_t wakeref __maybe_unused =
-		fetch_and_zero(&i915->power_domains.wakeref);
-
-	/* Remove the refcount we took to keep power well support disabled. */
-	if (!i915_modparams.disable_power_well)
-		intel_display_power_put_unchecked(i915, POWER_DOMAIN_INIT);
-
-	intel_power_domains_verify_state(i915);
-
-	/* Keep the power well enabled, but cancel its rpm wakeref. */
-	intel_runtime_pm_put(i915, wakeref);
-}
-
-/**
- * intel_power_domains_enable - enable toggling of display power wells
- * @i915: i915 device instance
- *
- * Enable the ondemand enabling/disabling of the display power wells. Note that
- * power wells not belonging to POWER_DOMAIN_INIT are allowed to be toggled
- * only at specific points of the display modeset sequence, thus they are not
- * affected by the intel_power_domains_enable()/disable() calls. The purpose
- * of these function is to keep the rest of power wells enabled until the end
- * of display HW readout (which will acquire the power references reflecting
- * the current HW state).
- */
-void intel_power_domains_enable(struct drm_i915_private *i915)
-{
-	intel_wakeref_t wakeref __maybe_unused =
-		fetch_and_zero(&i915->power_domains.wakeref);
-
-	intel_display_power_put(i915, POWER_DOMAIN_INIT, wakeref);
-	intel_power_domains_verify_state(i915);
-}
+	int ret;
 
-/**
- * intel_power_domains_disable - disable toggling of display power wells
- * @i915: i915 device instance
- *
- * Disable the ondemand enabling/disabling of the display power wells. See
- * intel_power_domains_enable() for which power wells this call controls.
- */
-void intel_power_domains_disable(struct drm_i915_private *i915)
-{
-	struct i915_power_domains *power_domains = &i915->power_domains;
+	ret = pm_runtime_get_sync(rpm->kdev);
+	WARN_ONCE(ret < 0, "pm_runtime_get_sync() failed: %d\n", ret);
 
-	WARN_ON(power_domains->wakeref);
-	power_domains->wakeref =
-		intel_display_power_get(i915, POWER_DOMAIN_INIT);
+	intel_runtime_pm_acquire(rpm, wakelock);
 
-	intel_power_domains_verify_state(i915);
+	return track_intel_runtime_pm_wakeref(rpm);
 }
 
 /**
- * intel_power_domains_suspend - suspend power domain state
- * @i915: i915 device instance
- * @suspend_mode: specifies the target suspend state (idle, mem, hibernation)
+ * intel_runtime_pm_get_raw - grab a raw runtime pm reference
+ * @rpm: the intel_runtime_pm structure
  *
- * This function prepares the hardware power domain state before entering
- * system suspend.
- *
- * It must be called with power domains already disabled (after a call to
- * intel_power_domains_disable()) and paired with intel_power_domains_resume().
- */
-void intel_power_domains_suspend(struct drm_i915_private *i915,
-				 enum i915_drm_suspend_mode suspend_mode)
-{
-	struct i915_power_domains *power_domains = &i915->power_domains;
-	intel_wakeref_t wakeref __maybe_unused =
-		fetch_and_zero(&power_domains->wakeref);
-
-	intel_display_power_put(i915, POWER_DOMAIN_INIT, wakeref);
-
-	/*
-	 * In case of suspend-to-idle (aka S0ix) on a DMC platform without DC9
-	 * support don't manually deinit the power domains. This also means the
-	 * CSR/DMC firmware will stay active, it will power down any HW
-	 * resources as required and also enable deeper system power states
-	 * that would be blocked if the firmware was inactive.
-	 */
-	if (!(i915->csr.allowed_dc_mask & DC_STATE_EN_DC9) &&
-	    suspend_mode == I915_DRM_SUSPEND_IDLE &&
-	    i915->csr.dmc_payload) {
-		intel_power_domains_verify_state(i915);
-		return;
-	}
-
-	/*
-	 * Even if power well support was disabled we still want to disable
-	 * power wells if power domains must be deinitialized for suspend.
-	 */
-	if (!i915_modparams.disable_power_well) {
-		intel_display_power_put_unchecked(i915, POWER_DOMAIN_INIT);
-		intel_power_domains_verify_state(i915);
-	}
-
-	if (INTEL_GEN(i915) >= 11)
-		icl_display_core_uninit(i915);
-	else if (IS_CANNONLAKE(i915))
-		cnl_display_core_uninit(i915);
-	else if (IS_GEN9_BC(i915))
-		skl_display_core_uninit(i915);
-	else if (IS_GEN9_LP(i915))
-		bxt_display_core_uninit(i915);
-
-	power_domains->display_core_suspended = true;
-}
-
-/**
- * intel_power_domains_resume - resume power domain state
- * @i915: i915 device instance
+ * This is the unlocked version of intel_display_power_is_enabled() and should
+ * only be used from error capture and recovery code where deadlocks are
+ * possible.
+ * This function grabs a device-level runtime pm reference (mostly used for
+ * asynchronous PM management from display code) and ensures that it is powered
+ * up. Raw references are not considered during wakelock assert checks.
  *
- * This function resume the hardware power domain state during system resume.
+ * Any runtime pm reference obtained by this function must have a symmetric
+ * call to intel_runtime_pm_put_raw() to release the reference again.
  *
- * It will return with power domain support disabled (to be enabled later by
- * intel_power_domains_enable()) and must be paired with
- * intel_power_domains_suspend().
+ * Returns: the wakeref cookie to pass to intel_runtime_pm_put_raw(), evaluates
+ * as True if the wakeref was acquired, or False otherwise.
  */
-void intel_power_domains_resume(struct drm_i915_private *i915)
+intel_wakeref_t intel_runtime_pm_get_raw(struct intel_runtime_pm *rpm)
 {
-	struct i915_power_domains *power_domains = &i915->power_domains;
-
-	if (power_domains->display_core_suspended) {
-		intel_power_domains_init_hw(i915, true);
-		power_domains->display_core_suspended = false;
-	} else {
-		WARN_ON(power_domains->wakeref);
-		power_domains->wakeref =
-			intel_display_power_get(i915, POWER_DOMAIN_INIT);
-	}
-
-	intel_power_domains_verify_state(i915);
-}
-
-#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_RUNTIME_PM)
-
-static void intel_power_domains_dump_info(struct drm_i915_private *i915)
-{
-	struct i915_power_domains *power_domains = &i915->power_domains;
-	struct i915_power_well *power_well;
-
-	for_each_power_well(i915, power_well) {
-		enum intel_display_power_domain domain;
-
-		DRM_DEBUG_DRIVER("%-25s %d\n",
-				 power_well->desc->name, power_well->count);
-
-		for_each_power_domain(domain, power_well->desc->domains)
-			DRM_DEBUG_DRIVER("  %-23s %d\n",
-					 intel_display_power_domain_str(domain),
-					 power_domains->domain_use_count[domain]);
-	}
+	return __intel_runtime_pm_get(rpm, false);
 }
 
 /**
- * intel_power_domains_verify_state - verify the HW/SW state for all power wells
- * @i915: i915 device instance
- *
- * Verify if the reference count of each power well matches its HW enabled
- * state and the total refcount of the domains it belongs to. This must be
- * called after modeset HW state sanitization, which is responsible for
- * acquiring reference counts for any power wells in use and disabling the
- * ones left on by BIOS but not required by any active output.
- */
-static void intel_power_domains_verify_state(struct drm_i915_private *i915)
-{
-	struct i915_power_domains *power_domains = &i915->power_domains;
-	struct i915_power_well *power_well;
-	bool dump_domain_info;
-
-	mutex_lock(&power_domains->lock);
-
-	dump_domain_info = false;
-	for_each_power_well(i915, power_well) {
-		enum intel_display_power_domain domain;
-		int domains_count;
-		bool enabled;
-
-		enabled = power_well->desc->ops->is_enabled(i915, power_well);
-		if ((power_well->count || power_well->desc->always_on) !=
-		    enabled)
-			DRM_ERROR("power well %s state mismatch (refcount %d/enabled %d)",
-				  power_well->desc->name,
-				  power_well->count, enabled);
-
-		domains_count = 0;
-		for_each_power_domain(domain, power_well->desc->domains)
-			domains_count += power_domains->domain_use_count[domain];
-
-		if (power_well->count != domains_count) {
-			DRM_ERROR("power well %s refcount/domain refcount mismatch "
-				  "(refcount %d/domains refcount %d)\n",
-				  power_well->desc->name, power_well->count,
-				  domains_count);
-			dump_domain_info = true;
-		}
-	}
-
-	if (dump_domain_info) {
-		static bool dumped;
-
-		if (!dumped) {
-			intel_power_domains_dump_info(i915);
-			dumped = true;
-		}
-	}
-
-	mutex_unlock(&power_domains->lock);
-}
-
-#else
-
-static void intel_power_domains_verify_state(struct drm_i915_private *i915)
-{
-}
-
-#endif
-
-/**
  * intel_runtime_pm_get - grab a runtime pm reference
- * @i915: i915 device instance
+ * @rpm: the intel_runtime_pm structure
  *
  * This function grabs a device-level runtime pm reference (mostly used for GEM
  * code to ensure the GTT or GT is on) and ensures that it is powered up.
@@ -4332,21 +401,14 @@ static void intel_power_domains_verify_state(struct drm_i915_private *i915)
  *
  * Returns: the wakeref cookie to pass to intel_runtime_pm_put()
  */
-intel_wakeref_t intel_runtime_pm_get(struct drm_i915_private *i915)
+intel_wakeref_t intel_runtime_pm_get(struct intel_runtime_pm *rpm)
 {
-	struct pci_dev *pdev = i915->drm.pdev;
-	struct device *kdev = &pdev->dev;
-	int ret;
-
-	ret = pm_runtime_get_sync(kdev);
-	WARN_ONCE(ret < 0, "pm_runtime_get_sync() failed: %d\n", ret);
-
-	return track_intel_runtime_pm_wakeref(i915);
+	return __intel_runtime_pm_get(rpm, true);
 }
 
 /**
  * intel_runtime_pm_get_if_in_use - grab a runtime pm reference if device in use
- * @i915: i915 device instance
+ * @rpm: the intel_runtime_pm structure
  *
  * This function grabs a device-level runtime pm reference if the device is
  * already in use and ensures that it is powered up. It is illegal to try
@@ -4358,28 +420,27 @@ intel_wakeref_t intel_runtime_pm_get(struct drm_i915_private *i915)
  * Returns: the wakeref cookie to pass to intel_runtime_pm_put(), evaluates
  * as True if the wakeref was acquired, or False otherwise.
  */
-intel_wakeref_t intel_runtime_pm_get_if_in_use(struct drm_i915_private *i915)
+intel_wakeref_t intel_runtime_pm_get_if_in_use(struct intel_runtime_pm *rpm)
 {
 	if (IS_ENABLED(CONFIG_PM)) {
-		struct pci_dev *pdev = i915->drm.pdev;
-		struct device *kdev = &pdev->dev;
-
 		/*
 		 * In cases runtime PM is disabled by the RPM core and we get
 		 * an -EINVAL return value we are not supposed to call this
 		 * function, since the power state is undefined. This applies
 		 * atm to the late/early system suspend/resume handlers.
 		 */
-		if (pm_runtime_get_if_in_use(kdev) <= 0)
+		if (pm_runtime_get_if_in_use(rpm->kdev) <= 0)
 			return 0;
 	}
 
-	return track_intel_runtime_pm_wakeref(i915);
+	intel_runtime_pm_acquire(rpm, true);
+
+	return track_intel_runtime_pm_wakeref(rpm);
 }
 
 /**
  * intel_runtime_pm_get_noresume - grab a runtime pm reference
- * @i915: i915 device instance
+ * @rpm: the intel_runtime_pm structure
  *
  * This function grabs a device-level runtime pm reference (mostly used for GEM
  * code to ensure the GTT or GT is on).
@@ -4396,47 +457,81 @@ intel_wakeref_t intel_runtime_pm_get_if_in_use(struct drm_i915_private *i915)
  *
  * Returns: the wakeref cookie to pass to intel_runtime_pm_put()
  */
-intel_wakeref_t intel_runtime_pm_get_noresume(struct drm_i915_private *i915)
+intel_wakeref_t intel_runtime_pm_get_noresume(struct intel_runtime_pm *rpm)
 {
-	struct pci_dev *pdev = i915->drm.pdev;
-	struct device *kdev = &pdev->dev;
+	assert_rpm_wakelock_held(rpm);
+	pm_runtime_get_noresume(rpm->kdev);
 
-	assert_rpm_wakelock_held(i915);
-	pm_runtime_get_noresume(kdev);
+	intel_runtime_pm_acquire(rpm, true);
 
-	return track_intel_runtime_pm_wakeref(i915);
+	return track_intel_runtime_pm_wakeref(rpm);
+}
+
+static void __intel_runtime_pm_put(struct intel_runtime_pm *rpm,
+				   intel_wakeref_t wref,
+				   bool wakelock)
+{
+	struct device *kdev = rpm->kdev;
+
+	untrack_intel_runtime_pm_wakeref(rpm, wref);
+
+	intel_runtime_pm_release(rpm, wakelock);
+
+	pm_runtime_mark_last_busy(kdev);
+	pm_runtime_put_autosuspend(kdev);
 }
 
 /**
- * intel_runtime_pm_put - release a runtime pm reference
- * @i915: i915 device instance
+ * intel_runtime_pm_put_raw - release a raw runtime pm reference
+ * @rpm: the intel_runtime_pm structure
+ * @wref: wakeref acquired for the reference that is being released
  *
  * This function drops the device-level runtime pm reference obtained by
- * intel_runtime_pm_get() and might power down the corresponding
+ * intel_runtime_pm_get_raw() and might power down the corresponding
  * hardware block right away if this is the last reference.
  */
-void intel_runtime_pm_put_unchecked(struct drm_i915_private *i915)
+void
+intel_runtime_pm_put_raw(struct intel_runtime_pm *rpm, intel_wakeref_t wref)
 {
-	struct pci_dev *pdev = i915->drm.pdev;
-	struct device *kdev = &pdev->dev;
-
-	untrack_intel_runtime_pm_wakeref(i915);
+	__intel_runtime_pm_put(rpm, wref, false);
+}
 
-	pm_runtime_mark_last_busy(kdev);
-	pm_runtime_put_autosuspend(kdev);
+/**
+ * intel_runtime_pm_put_unchecked - release an unchecked runtime pm reference
+ * @rpm: the intel_runtime_pm structure
+ *
+ * This function drops the device-level runtime pm reference obtained by
+ * intel_runtime_pm_get() and might power down the corresponding
+ * hardware block right away if this is the last reference.
+ *
+ * This function exists only for historical reasons and should be avoided in
+ * new code, as the correctness of its use cannot be checked. Always use
+ * intel_runtime_pm_put() instead.
+ */
+void intel_runtime_pm_put_unchecked(struct intel_runtime_pm *rpm)
+{
+	__intel_runtime_pm_put(rpm, -1, true);
 }
 
 #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_RUNTIME_PM)
-void intel_runtime_pm_put(struct drm_i915_private *i915, intel_wakeref_t wref)
+/**
+ * intel_runtime_pm_put - release a runtime pm reference
+ * @rpm: the intel_runtime_pm structure
+ * @wref: wakeref acquired for the reference that is being released
+ *
+ * This function drops the device-level runtime pm reference obtained by
+ * intel_runtime_pm_get() and might power down the corresponding
+ * hardware block right away if this is the last reference.
+ */
+void intel_runtime_pm_put(struct intel_runtime_pm *rpm, intel_wakeref_t wref)
 {
-	cancel_intel_runtime_pm_wakeref(i915, wref);
-	intel_runtime_pm_put_unchecked(i915);
+	__intel_runtime_pm_put(rpm, wref, true);
 }
 #endif
 
 /**
  * intel_runtime_pm_enable - enable runtime pm
- * @i915: i915 device instance
+ * @rpm: the intel_runtime_pm structure
  *
  * This function enables runtime pm at the end of the driver load sequence.
  *
@@ -4444,10 +539,9 @@ void intel_runtime_pm_put(struct drm_i915_private *i915, intel_wakeref_t wref)
  * subordinate display power domains. That is done by
  * intel_power_domains_enable().
  */
-void intel_runtime_pm_enable(struct drm_i915_private *i915)
+void intel_runtime_pm_enable(struct intel_runtime_pm *rpm)
 {
-	struct pci_dev *pdev = i915->drm.pdev;
-	struct device *kdev = &pdev->dev;
+	struct device *kdev = rpm->kdev;
 
 	/*
 	 * Disable the system suspend direct complete optimization, which can
@@ -4468,7 +562,7 @@ void intel_runtime_pm_enable(struct drm_i915_private *i915)
 	 * so the driver's own RPM reference tracking asserts also work on
 	 * platforms without RPM support.
 	 */
-	if (!HAS_RUNTIME_PM(i915)) {
+	if (!rpm->available) {
 		int ret;
 
 		pm_runtime_dont_use_autosuspend(kdev);
@@ -4486,10 +580,9 @@ void intel_runtime_pm_enable(struct drm_i915_private *i915)
 	pm_runtime_put_autosuspend(kdev);
 }
 
-void intel_runtime_pm_disable(struct drm_i915_private *i915)
+void intel_runtime_pm_disable(struct intel_runtime_pm *rpm)
 {
-	struct pci_dev *pdev = i915->drm.pdev;
-	struct device *kdev = &pdev->dev;
+	struct device *kdev = rpm->kdev;
 
 	/* Transfer rpm ownership back to core */
 	WARN(pm_runtime_get_sync(kdev) < 0,
@@ -4497,24 +590,31 @@ void intel_runtime_pm_disable(struct drm_i915_private *i915)
 
 	pm_runtime_dont_use_autosuspend(kdev);
 
-	if (!HAS_RUNTIME_PM(i915))
+	if (!rpm->available)
 		pm_runtime_put(kdev);
 }
 
-void intel_runtime_pm_cleanup(struct drm_i915_private *i915)
+void intel_runtime_pm_cleanup(struct intel_runtime_pm *rpm)
 {
-	struct i915_runtime_pm *rpm = &i915->runtime_pm;
-	int count;
+	int count = atomic_read(&rpm->wakeref_count);
 
-	count = atomic_fetch_inc(&rpm->wakeref_count); /* balance untrack */
 	WARN(count,
-	     "i915->runtime_pm.wakeref_count=%d on cleanup\n",
-	     count);
+	     "i915 raw-wakerefs=%d wakelocks=%d on cleanup\n",
+	     intel_rpm_raw_wakeref_count(count),
+	     intel_rpm_wakelock_count(count));
 
-	untrack_intel_runtime_pm_wakeref(i915);
+	untrack_all_intel_runtime_pm_wakerefs(rpm);
 }
 
-void intel_runtime_pm_init_early(struct drm_i915_private *i915)
+void intel_runtime_pm_init_early(struct intel_runtime_pm *rpm)
 {
-	init_intel_runtime_pm_wakeref(i915);
+	struct drm_i915_private *i915 =
+			container_of(rpm, struct drm_i915_private, runtime_pm);
+	struct pci_dev *pdev = i915->drm.pdev;
+	struct device *kdev = &pdev->dev;
+
+	rpm->kdev = kdev;
+	rpm->available = HAS_RUNTIME_PM(i915);
+
+	init_intel_runtime_pm_wakeref(rpm);
 }
diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.h b/drivers/gpu/drm/i915/intel_runtime_pm.h
new file mode 100644
index 000000000000..473c4850c01d
--- /dev/null
+++ b/drivers/gpu/drm/i915/intel_runtime_pm.h
@@ -0,0 +1,213 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef __INTEL_RUNTIME_PM_H__
+#define __INTEL_RUNTIME_PM_H__
+
+#include <linux/types.h>
+
+#include "display/intel_display.h"
+
+#include "intel_wakeref.h"
+
+#include "i915_utils.h"
+
+struct device;
+struct drm_i915_private;
+struct drm_printer;
+
+enum i915_drm_suspend_mode {
+	I915_DRM_SUSPEND_IDLE,
+	I915_DRM_SUSPEND_MEM,
+	I915_DRM_SUSPEND_HIBERNATE,
+};
+
+/*
+ * This struct helps tracking the state needed for runtime PM, which puts the
+ * device in PCI D3 state. Notice that when this happens, nothing on the
+ * graphics device works, even register access, so we don't get interrupts nor
+ * anything else.
+ *
+ * Every piece of our code that needs to actually touch the hardware needs to
+ * either call intel_runtime_pm_get or call intel_display_power_get with the
+ * appropriate power domain.
+ *
+ * Our driver uses the autosuspend delay feature, which means we'll only really
+ * suspend if we stay with zero refcount for a certain amount of time. The
+ * default value is currently very conservative (see intel_runtime_pm_enable), but
+ * it can be changed with the standard runtime PM files from sysfs.
+ *
+ * The irqs_disabled variable becomes true exactly after we disable the IRQs and
+ * goes back to false exactly before we reenable the IRQs. We use this variable
+ * to check if someone is trying to enable/disable IRQs while they're supposed
+ * to be disabled. This shouldn't happen and we'll print some error messages in
+ * case it happens.
+ *
+ * For more, read the Documentation/power/runtime_pm.txt.
+ */
+struct intel_runtime_pm {
+	atomic_t wakeref_count;
+	struct device *kdev; /* points to i915->drm.pdev->dev */
+	bool available;
+	bool suspended;
+	bool irqs_enabled;
+
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_RUNTIME_PM)
+	/*
+	 * To aide detection of wakeref leaks and general misuse, we
+	 * track all wakeref holders. With manual markup (i.e. returning
+	 * a cookie to each rpm_get caller which they then supply to their
+	 * paired rpm_put) we can remove corresponding pairs of and keep
+	 * the array trimmed to active wakerefs.
+	 */
+	struct intel_runtime_pm_debug {
+		spinlock_t lock;
+
+		depot_stack_handle_t last_acquire;
+		depot_stack_handle_t last_release;
+
+		depot_stack_handle_t *owners;
+		unsigned long count;
+	} debug;
+#endif
+};
+
+#define BITS_PER_WAKEREF	\
+	BITS_PER_TYPE(struct_member(struct intel_runtime_pm, wakeref_count))
+#define INTEL_RPM_WAKELOCK_SHIFT	(BITS_PER_WAKEREF / 2)
+#define INTEL_RPM_WAKELOCK_BIAS		(1 << INTEL_RPM_WAKELOCK_SHIFT)
+#define INTEL_RPM_RAW_WAKEREF_MASK	(INTEL_RPM_WAKELOCK_BIAS - 1)
+
+static inline int
+intel_rpm_raw_wakeref_count(int wakeref_count)
+{
+	return wakeref_count & INTEL_RPM_RAW_WAKEREF_MASK;
+}
+
+static inline int
+intel_rpm_wakelock_count(int wakeref_count)
+{
+	return wakeref_count >> INTEL_RPM_WAKELOCK_SHIFT;
+}
+
+static inline void
+assert_rpm_device_not_suspended(struct intel_runtime_pm *rpm)
+{
+	WARN_ONCE(rpm->suspended,
+		  "Device suspended during HW access\n");
+}
+
+static inline void
+__assert_rpm_raw_wakeref_held(struct intel_runtime_pm *rpm, int wakeref_count)
+{
+	assert_rpm_device_not_suspended(rpm);
+	WARN_ONCE(!intel_rpm_raw_wakeref_count(wakeref_count),
+		  "RPM raw-wakeref not held\n");
+}
+
+static inline void
+__assert_rpm_wakelock_held(struct intel_runtime_pm *rpm, int wakeref_count)
+{
+	__assert_rpm_raw_wakeref_held(rpm, wakeref_count);
+	WARN_ONCE(!intel_rpm_wakelock_count(wakeref_count),
+		  "RPM wakelock ref not held during HW access\n");
+}
+
+static inline void
+assert_rpm_raw_wakeref_held(struct intel_runtime_pm *rpm)
+{
+	__assert_rpm_raw_wakeref_held(rpm, atomic_read(&rpm->wakeref_count));
+}
+
+static inline void
+assert_rpm_wakelock_held(struct intel_runtime_pm *rpm)
+{
+	__assert_rpm_wakelock_held(rpm, atomic_read(&rpm->wakeref_count));
+}
+
+/**
+ * disable_rpm_wakeref_asserts - disable the RPM assert checks
+ * @rpm: the intel_runtime_pm structure
+ *
+ * This function disable asserts that check if we hold an RPM wakelock
+ * reference, while keeping the device-not-suspended checks still enabled.
+ * It's meant to be used only in special circumstances where our rule about
+ * the wakelock refcount wrt. the device power state doesn't hold. According
+ * to this rule at any point where we access the HW or want to keep the HW in
+ * an active state we must hold an RPM wakelock reference acquired via one of
+ * the intel_runtime_pm_get() helpers. Currently there are a few special spots
+ * where this rule doesn't hold: the IRQ and suspend/resume handlers, the
+ * forcewake release timer, and the GPU RPS and hangcheck works. All other
+ * users should avoid using this function.
+ *
+ * Any calls to this function must have a symmetric call to
+ * enable_rpm_wakeref_asserts().
+ */
+static inline void
+disable_rpm_wakeref_asserts(struct intel_runtime_pm *rpm)
+{
+	atomic_add(INTEL_RPM_WAKELOCK_BIAS + 1,
+		   &rpm->wakeref_count);
+}
+
+/**
+ * enable_rpm_wakeref_asserts - re-enable the RPM assert checks
+ * @rpm: the intel_runtime_pm structure
+ *
+ * This function re-enables the RPM assert checks after disabling them with
+ * disable_rpm_wakeref_asserts. It's meant to be used only in special
+ * circumstances otherwise its use should be avoided.
+ *
+ * Any calls to this function must have a symmetric call to
+ * disable_rpm_wakeref_asserts().
+ */
+static inline void
+enable_rpm_wakeref_asserts(struct intel_runtime_pm *rpm)
+{
+	atomic_sub(INTEL_RPM_WAKELOCK_BIAS + 1,
+		   &rpm->wakeref_count);
+}
+
+void intel_runtime_pm_init_early(struct intel_runtime_pm *rpm);
+void intel_runtime_pm_enable(struct intel_runtime_pm *rpm);
+void intel_runtime_pm_disable(struct intel_runtime_pm *rpm);
+void intel_runtime_pm_cleanup(struct intel_runtime_pm *rpm);
+
+intel_wakeref_t intel_runtime_pm_get(struct intel_runtime_pm *rpm);
+intel_wakeref_t intel_runtime_pm_get_if_in_use(struct intel_runtime_pm *rpm);
+intel_wakeref_t intel_runtime_pm_get_noresume(struct intel_runtime_pm *rpm);
+intel_wakeref_t intel_runtime_pm_get_raw(struct intel_runtime_pm *rpm);
+
+#define with_intel_runtime_pm(rpm, wf) \
+	for ((wf) = intel_runtime_pm_get(rpm); (wf); \
+	     intel_runtime_pm_put((rpm), (wf)), (wf) = 0)
+
+#define with_intel_runtime_pm_if_in_use(rpm, wf) \
+	for ((wf) = intel_runtime_pm_get_if_in_use(rpm); (wf); \
+	     intel_runtime_pm_put((rpm), (wf)), (wf) = 0)
+
+void intel_runtime_pm_put_unchecked(struct intel_runtime_pm *rpm);
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_RUNTIME_PM)
+void intel_runtime_pm_put(struct intel_runtime_pm *rpm, intel_wakeref_t wref);
+#else
+static inline void
+intel_runtime_pm_put(struct intel_runtime_pm *rpm, intel_wakeref_t wref)
+{
+	intel_runtime_pm_put_unchecked(rpm);
+}
+#endif
+void intel_runtime_pm_put_raw(struct intel_runtime_pm *rpm, intel_wakeref_t wref);
+
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_RUNTIME_PM)
+void print_intel_runtime_pm_wakeref(struct intel_runtime_pm *rpm,
+				    struct drm_printer *p);
+#else
+static inline void print_intel_runtime_pm_wakeref(struct intel_runtime_pm *rpm,
+						  struct drm_printer *p)
+{
+}
+#endif
+
+#endif /* __INTEL_RUNTIME_PM_H__ */
diff --git a/drivers/gpu/drm/i915/intel_sideband.c b/drivers/gpu/drm/i915/intel_sideband.c
index 57de41b1f989..a115625e980c 100644
--- a/drivers/gpu/drm/i915/intel_sideband.c
+++ b/drivers/gpu/drm/i915/intel_sideband.c
@@ -22,6 +22,10 @@
  *
  */
 
+#include <asm/iosf_mbi.h>
+
+#include "intel_sideband.h"
+
 #include "i915_drv.h"
 #include "intel_drv.h"
 
@@ -39,19 +43,68 @@
 /* Private register write, double-word addressing, non-posted */
 #define SB_CRWRDA_NP	0x07
 
-static int vlv_sideband_rw(struct drm_i915_private *dev_priv, u32 devfn,
-			   u32 port, u32 opcode, u32 addr, u32 *val)
+static void ping(void *info)
+{
+}
+
+static void __vlv_punit_get(struct drm_i915_private *i915)
+{
+	iosf_mbi_punit_acquire();
+
+	/*
+	 * Prevent the cpu from sleeping while we use this sideband, otherwise
+	 * the punit may cause a machine hang. The issue appears to be isolated
+	 * with changing the power state of the CPU package while changing
+	 * the power state via the punit, and we have only observed it
+	 * reliably on 4-core Baytail systems suggesting the issue is in the
+	 * power delivery mechanism and likely to be be board/function
+	 * specific. Hence we presume the workaround needs only be applied
+	 * to the Valleyview P-unit and not all sideband communications.
+	 */
+	if (IS_VALLEYVIEW(i915)) {
+		pm_qos_update_request(&i915->sb_qos, 0);
+		on_each_cpu(ping, NULL, 1);
+	}
+}
+
+static void __vlv_punit_put(struct drm_i915_private *i915)
+{
+	if (IS_VALLEYVIEW(i915))
+		pm_qos_update_request(&i915->sb_qos, PM_QOS_DEFAULT_VALUE);
+
+	iosf_mbi_punit_release();
+}
+
+void vlv_iosf_sb_get(struct drm_i915_private *i915, unsigned long ports)
 {
-	u32 cmd, be = 0xf, bar = 0;
-	bool is_read = (opcode == SB_MRD_NP || opcode == SB_CRRDDA_NP);
+	if (ports & BIT(VLV_IOSF_SB_PUNIT))
+		__vlv_punit_get(i915);
 
-	cmd = (devfn << IOSF_DEVFN_SHIFT) | (opcode << IOSF_OPCODE_SHIFT) |
-		(port << IOSF_PORT_SHIFT) | (be << IOSF_BYTE_ENABLES_SHIFT) |
-		(bar << IOSF_BAR_SHIFT);
+	mutex_lock(&i915->sb_lock);
+}
+
+void vlv_iosf_sb_put(struct drm_i915_private *i915, unsigned long ports)
+{
+	mutex_unlock(&i915->sb_lock);
+
+	if (ports & BIT(VLV_IOSF_SB_PUNIT))
+		__vlv_punit_put(i915);
+}
+
+static int vlv_sideband_rw(struct drm_i915_private *i915,
+			   u32 devfn, u32 port, u32 opcode,
+			   u32 addr, u32 *val)
+{
+	struct intel_uncore *uncore = &i915->uncore;
+	const bool is_read = (opcode == SB_MRD_NP || opcode == SB_CRRDDA_NP);
+	int err;
 
-	WARN_ON(!mutex_is_locked(&dev_priv->sb_lock));
+	lockdep_assert_held(&i915->sb_lock);
+	if (port == IOSF_PORT_PUNIT)
+		iosf_mbi_assert_punit_acquired();
 
-	if (intel_wait_for_register(&dev_priv->uncore,
+	/* Flush the previous comms, just in case it failed last time. */
+	if (intel_wait_for_register(uncore,
 				    VLV_IOSF_DOORBELL_REQ, IOSF_SB_BUSY, 0,
 				    5)) {
 		DRM_DEBUG_DRIVER("IOSF sideband idle wait (%s) timed out\n",
@@ -59,131 +112,132 @@ static int vlv_sideband_rw(struct drm_i915_private *dev_priv, u32 devfn,
 		return -EAGAIN;
 	}
 
-	I915_WRITE(VLV_IOSF_ADDR, addr);
-	I915_WRITE(VLV_IOSF_DATA, is_read ? 0 : *val);
-	I915_WRITE(VLV_IOSF_DOORBELL_REQ, cmd);
-
-	if (intel_wait_for_register(&dev_priv->uncore,
-				    VLV_IOSF_DOORBELL_REQ, IOSF_SB_BUSY, 0,
-				    5)) {
+	preempt_disable();
+
+	intel_uncore_write_fw(uncore, VLV_IOSF_ADDR, addr);
+	intel_uncore_write_fw(uncore, VLV_IOSF_DATA, is_read ? 0 : *val);
+	intel_uncore_write_fw(uncore, VLV_IOSF_DOORBELL_REQ,
+			      (devfn << IOSF_DEVFN_SHIFT) |
+			      (opcode << IOSF_OPCODE_SHIFT) |
+			      (port << IOSF_PORT_SHIFT) |
+			      (0xf << IOSF_BYTE_ENABLES_SHIFT) |
+			      (0 << IOSF_BAR_SHIFT) |
+			      IOSF_SB_BUSY);
+
+	if (__intel_wait_for_register_fw(uncore,
+					 VLV_IOSF_DOORBELL_REQ, IOSF_SB_BUSY, 0,
+					 10000, 0, NULL) == 0) {
+		if (is_read)
+			*val = intel_uncore_read_fw(uncore, VLV_IOSF_DATA);
+		err = 0;
+	} else {
 		DRM_DEBUG_DRIVER("IOSF sideband finish wait (%s) timed out\n",
 				 is_read ? "read" : "write");
-		return -ETIMEDOUT;
+		err = -ETIMEDOUT;
 	}
 
-	if (is_read)
-		*val = I915_READ(VLV_IOSF_DATA);
+	preempt_enable();
 
-	return 0;
+	return err;
 }
 
-u32 vlv_punit_read(struct drm_i915_private *dev_priv, u32 addr)
+u32 vlv_punit_read(struct drm_i915_private *i915, u32 addr)
 {
 	u32 val = 0;
 
-	WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock));
-
-	mutex_lock(&dev_priv->sb_lock);
-	vlv_sideband_rw(dev_priv, PCI_DEVFN(0, 0), IOSF_PORT_PUNIT,
+	vlv_sideband_rw(i915, PCI_DEVFN(0, 0), IOSF_PORT_PUNIT,
 			SB_CRRDDA_NP, addr, &val);
-	mutex_unlock(&dev_priv->sb_lock);
 
 	return val;
 }
 
-int vlv_punit_write(struct drm_i915_private *dev_priv, u32 addr, u32 val)
+int vlv_punit_write(struct drm_i915_private *i915, u32 addr, u32 val)
 {
-	int err;
-
-	WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock));
-
-	mutex_lock(&dev_priv->sb_lock);
-	err = vlv_sideband_rw(dev_priv, PCI_DEVFN(0, 0), IOSF_PORT_PUNIT,
-			      SB_CRWRDA_NP, addr, &val);
-	mutex_unlock(&dev_priv->sb_lock);
-
-	return err;
+	return vlv_sideband_rw(i915, PCI_DEVFN(0, 0), IOSF_PORT_PUNIT,
+			       SB_CRWRDA_NP, addr, &val);
 }
 
-u32 vlv_bunit_read(struct drm_i915_private *dev_priv, u32 reg)
+u32 vlv_bunit_read(struct drm_i915_private *i915, u32 reg)
 {
 	u32 val = 0;
 
-	vlv_sideband_rw(dev_priv, PCI_DEVFN(0, 0), IOSF_PORT_BUNIT,
+	vlv_sideband_rw(i915, PCI_DEVFN(0, 0), IOSF_PORT_BUNIT,
 			SB_CRRDDA_NP, reg, &val);
 
 	return val;
 }
 
-void vlv_bunit_write(struct drm_i915_private *dev_priv, u32 reg, u32 val)
+void vlv_bunit_write(struct drm_i915_private *i915, u32 reg, u32 val)
 {
-	vlv_sideband_rw(dev_priv, PCI_DEVFN(0, 0), IOSF_PORT_BUNIT,
+	vlv_sideband_rw(i915, PCI_DEVFN(0, 0), IOSF_PORT_BUNIT,
 			SB_CRWRDA_NP, reg, &val);
 }
 
-u32 vlv_nc_read(struct drm_i915_private *dev_priv, u8 addr)
+u32 vlv_nc_read(struct drm_i915_private *i915, u8 addr)
 {
 	u32 val = 0;
 
-	WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock));
-
-	mutex_lock(&dev_priv->sb_lock);
-	vlv_sideband_rw(dev_priv, PCI_DEVFN(0, 0), IOSF_PORT_NC,
+	vlv_sideband_rw(i915, PCI_DEVFN(0, 0), IOSF_PORT_NC,
 			SB_CRRDDA_NP, addr, &val);
-	mutex_unlock(&dev_priv->sb_lock);
 
 	return val;
 }
 
-u32 vlv_iosf_sb_read(struct drm_i915_private *dev_priv, u8 port, u32 reg)
+u32 vlv_iosf_sb_read(struct drm_i915_private *i915, u8 port, u32 reg)
 {
 	u32 val = 0;
-	vlv_sideband_rw(dev_priv, PCI_DEVFN(0, 0), port,
+
+	vlv_sideband_rw(i915, PCI_DEVFN(0, 0), port,
 			SB_CRRDDA_NP, reg, &val);
+
 	return val;
 }
 
-void vlv_iosf_sb_write(struct drm_i915_private *dev_priv,
+void vlv_iosf_sb_write(struct drm_i915_private *i915,
 		       u8 port, u32 reg, u32 val)
 {
-	vlv_sideband_rw(dev_priv, PCI_DEVFN(0, 0), port,
+	vlv_sideband_rw(i915, PCI_DEVFN(0, 0), port,
 			SB_CRWRDA_NP, reg, &val);
 }
 
-u32 vlv_cck_read(struct drm_i915_private *dev_priv, u32 reg)
+u32 vlv_cck_read(struct drm_i915_private *i915, u32 reg)
 {
 	u32 val = 0;
-	vlv_sideband_rw(dev_priv, PCI_DEVFN(0, 0), IOSF_PORT_CCK,
+
+	vlv_sideband_rw(i915, PCI_DEVFN(0, 0), IOSF_PORT_CCK,
 			SB_CRRDDA_NP, reg, &val);
+
 	return val;
 }
 
-void vlv_cck_write(struct drm_i915_private *dev_priv, u32 reg, u32 val)
+void vlv_cck_write(struct drm_i915_private *i915, u32 reg, u32 val)
 {
-	vlv_sideband_rw(dev_priv, PCI_DEVFN(0, 0), IOSF_PORT_CCK,
+	vlv_sideband_rw(i915, PCI_DEVFN(0, 0), IOSF_PORT_CCK,
 			SB_CRWRDA_NP, reg, &val);
 }
 
-u32 vlv_ccu_read(struct drm_i915_private *dev_priv, u32 reg)
+u32 vlv_ccu_read(struct drm_i915_private *i915, u32 reg)
 {
 	u32 val = 0;
-	vlv_sideband_rw(dev_priv, PCI_DEVFN(0, 0), IOSF_PORT_CCU,
+
+	vlv_sideband_rw(i915, PCI_DEVFN(0, 0), IOSF_PORT_CCU,
 			SB_CRRDDA_NP, reg, &val);
+
 	return val;
 }
 
-void vlv_ccu_write(struct drm_i915_private *dev_priv, u32 reg, u32 val)
+void vlv_ccu_write(struct drm_i915_private *i915, u32 reg, u32 val)
 {
-	vlv_sideband_rw(dev_priv, PCI_DEVFN(0, 0), IOSF_PORT_CCU,
+	vlv_sideband_rw(i915, PCI_DEVFN(0, 0), IOSF_PORT_CCU,
 			SB_CRWRDA_NP, reg, &val);
 }
 
-u32 vlv_dpio_read(struct drm_i915_private *dev_priv, enum pipe pipe, int reg)
+u32 vlv_dpio_read(struct drm_i915_private *i915, enum pipe pipe, int reg)
 {
+	int port = i915->dpio_phy_iosf_port[DPIO_PHY(pipe)];
 	u32 val = 0;
 
-	vlv_sideband_rw(dev_priv, DPIO_DEVFN, DPIO_PHY_IOSF_PORT(DPIO_PHY(pipe)),
-			SB_MRD_NP, reg, &val);
+	vlv_sideband_rw(i915, DPIO_DEVFN, port, SB_MRD_NP, reg, &val);
 
 	/*
 	 * FIXME: There might be some registers where all 1's is a valid value,
@@ -195,101 +249,286 @@ u32 vlv_dpio_read(struct drm_i915_private *dev_priv, enum pipe pipe, int reg)
 	return val;
 }
 
-void vlv_dpio_write(struct drm_i915_private *dev_priv, enum pipe pipe, int reg, u32 val)
+void vlv_dpio_write(struct drm_i915_private *i915,
+		    enum pipe pipe, int reg, u32 val)
+{
+	int port = i915->dpio_phy_iosf_port[DPIO_PHY(pipe)];
+
+	vlv_sideband_rw(i915, DPIO_DEVFN, port, SB_MWR_NP, reg, &val);
+}
+
+u32 vlv_flisdsi_read(struct drm_i915_private *i915, u32 reg)
+{
+	u32 val = 0;
+
+	vlv_sideband_rw(i915, DPIO_DEVFN, IOSF_PORT_FLISDSI, SB_CRRDDA_NP,
+			reg, &val);
+	return val;
+}
+
+void vlv_flisdsi_write(struct drm_i915_private *i915, u32 reg, u32 val)
 {
-	vlv_sideband_rw(dev_priv, DPIO_DEVFN, DPIO_PHY_IOSF_PORT(DPIO_PHY(pipe)),
-			SB_MWR_NP, reg, &val);
+	vlv_sideband_rw(i915, DPIO_DEVFN, IOSF_PORT_FLISDSI, SB_CRWRDA_NP,
+			reg, &val);
 }
 
 /* SBI access */
-u32 intel_sbi_read(struct drm_i915_private *dev_priv, u16 reg,
-		   enum intel_sbi_destination destination)
+static int intel_sbi_rw(struct drm_i915_private *i915, u16 reg,
+			enum intel_sbi_destination destination,
+			u32 *val, bool is_read)
 {
-	u32 value = 0;
-	WARN_ON(!mutex_is_locked(&dev_priv->sb_lock));
+	struct intel_uncore *uncore = &i915->uncore;
+	u32 cmd;
+
+	lockdep_assert_held(&i915->sb_lock);
 
-	if (intel_wait_for_register(&dev_priv->uncore,
-				    SBI_CTL_STAT, SBI_BUSY, 0,
-				    100)) {
+	if (intel_wait_for_register_fw(uncore,
+				       SBI_CTL_STAT, SBI_BUSY, 0,
+				       100)) {
 		DRM_ERROR("timeout waiting for SBI to become ready\n");
-		return 0;
+		return -EBUSY;
 	}
 
-	I915_WRITE(SBI_ADDR, (reg << 16));
-	I915_WRITE(SBI_DATA, 0);
+	intel_uncore_write_fw(uncore, SBI_ADDR, (u32)reg << 16);
+	intel_uncore_write_fw(uncore, SBI_DATA, is_read ? 0 : *val);
 
 	if (destination == SBI_ICLK)
-		value = SBI_CTL_DEST_ICLK | SBI_CTL_OP_CRRD;
+		cmd = SBI_CTL_DEST_ICLK | SBI_CTL_OP_CRRD;
 	else
-		value = SBI_CTL_DEST_MPHY | SBI_CTL_OP_IORD;
-	I915_WRITE(SBI_CTL_STAT, value | SBI_BUSY);
-
-	if (intel_wait_for_register(&dev_priv->uncore,
-				    SBI_CTL_STAT,
-				    SBI_BUSY,
-				    0,
-				    100)) {
+		cmd = SBI_CTL_DEST_MPHY | SBI_CTL_OP_IORD;
+	if (!is_read)
+		cmd |= BIT(8);
+	intel_uncore_write_fw(uncore, SBI_CTL_STAT, cmd | SBI_BUSY);
+
+	if (__intel_wait_for_register_fw(uncore,
+					 SBI_CTL_STAT, SBI_BUSY, 0,
+					 100, 100, &cmd)) {
 		DRM_ERROR("timeout waiting for SBI to complete read\n");
-		return 0;
+		return -ETIMEDOUT;
 	}
 
-	if (I915_READ(SBI_CTL_STAT) & SBI_RESPONSE_FAIL) {
+	if (cmd & SBI_RESPONSE_FAIL) {
 		DRM_ERROR("error during SBI read of reg %x\n", reg);
-		return 0;
+		return -ENXIO;
 	}
 
-	return I915_READ(SBI_DATA);
+	if (is_read)
+		*val = intel_uncore_read_fw(uncore, SBI_DATA);
+
+	return 0;
+}
+
+u32 intel_sbi_read(struct drm_i915_private *i915, u16 reg,
+		   enum intel_sbi_destination destination)
+{
+	u32 result = 0;
+
+	intel_sbi_rw(i915, reg, destination, &result, true);
+
+	return result;
 }
 
-void intel_sbi_write(struct drm_i915_private *dev_priv, u16 reg, u32 value,
+void intel_sbi_write(struct drm_i915_private *i915, u16 reg, u32 value,
 		     enum intel_sbi_destination destination)
 {
-	u32 tmp;
+	intel_sbi_rw(i915, reg, destination, &value, false);
+}
 
-	WARN_ON(!mutex_is_locked(&dev_priv->sb_lock));
+static inline int gen6_check_mailbox_status(u32 mbox)
+{
+	switch (mbox & GEN6_PCODE_ERROR_MASK) {
+	case GEN6_PCODE_SUCCESS:
+		return 0;
+	case GEN6_PCODE_UNIMPLEMENTED_CMD:
+		return -ENODEV;
+	case GEN6_PCODE_ILLEGAL_CMD:
+		return -ENXIO;
+	case GEN6_PCODE_MIN_FREQ_TABLE_GT_RATIO_OUT_OF_RANGE:
+	case GEN7_PCODE_MIN_FREQ_TABLE_GT_RATIO_OUT_OF_RANGE:
+		return -EOVERFLOW;
+	case GEN6_PCODE_TIMEOUT:
+		return -ETIMEDOUT;
+	default:
+		MISSING_CASE(mbox & GEN6_PCODE_ERROR_MASK);
+		return 0;
+	}
+}
 
-	if (intel_wait_for_register(&dev_priv->uncore,
-				    SBI_CTL_STAT, SBI_BUSY, 0,
-				    100)) {
-		DRM_ERROR("timeout waiting for SBI to become ready\n");
-		return;
+static inline int gen7_check_mailbox_status(u32 mbox)
+{
+	switch (mbox & GEN6_PCODE_ERROR_MASK) {
+	case GEN6_PCODE_SUCCESS:
+		return 0;
+	case GEN6_PCODE_ILLEGAL_CMD:
+		return -ENXIO;
+	case GEN7_PCODE_TIMEOUT:
+		return -ETIMEDOUT;
+	case GEN7_PCODE_ILLEGAL_DATA:
+		return -EINVAL;
+	case GEN7_PCODE_MIN_FREQ_TABLE_GT_RATIO_OUT_OF_RANGE:
+		return -EOVERFLOW;
+	default:
+		MISSING_CASE(mbox & GEN6_PCODE_ERROR_MASK);
+		return 0;
 	}
+}
 
-	I915_WRITE(SBI_ADDR, (reg << 16));
-	I915_WRITE(SBI_DATA, value);
+static int __sandybridge_pcode_rw(struct drm_i915_private *i915,
+				  u32 mbox, u32 *val, u32 *val1,
+				  int fast_timeout_us,
+				  int slow_timeout_ms,
+				  bool is_read)
+{
+	struct intel_uncore *uncore = &i915->uncore;
 
-	if (destination == SBI_ICLK)
-		tmp = SBI_CTL_DEST_ICLK | SBI_CTL_OP_CRWR;
+	lockdep_assert_held(&i915->sb_lock);
+
+	/*
+	 * GEN6_PCODE_* are outside of the forcewake domain, we can
+	 * use te fw I915_READ variants to reduce the amount of work
+	 * required when reading/writing.
+	 */
+
+	if (intel_uncore_read_fw(uncore, GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY)
+		return -EAGAIN;
+
+	intel_uncore_write_fw(uncore, GEN6_PCODE_DATA, *val);
+	intel_uncore_write_fw(uncore, GEN6_PCODE_DATA1, val1 ? *val1 : 0);
+	intel_uncore_write_fw(uncore,
+			      GEN6_PCODE_MAILBOX, GEN6_PCODE_READY | mbox);
+
+	if (__intel_wait_for_register_fw(uncore,
+					 GEN6_PCODE_MAILBOX,
+					 GEN6_PCODE_READY, 0,
+					 fast_timeout_us,
+					 slow_timeout_ms,
+					 &mbox))
+		return -ETIMEDOUT;
+
+	if (is_read)
+		*val = intel_uncore_read_fw(uncore, GEN6_PCODE_DATA);
+	if (is_read && val1)
+		*val1 = intel_uncore_read_fw(uncore, GEN6_PCODE_DATA1);
+
+	if (INTEL_GEN(i915) > 6)
+		return gen7_check_mailbox_status(mbox);
 	else
-		tmp = SBI_CTL_DEST_MPHY | SBI_CTL_OP_IOWR;
-	I915_WRITE(SBI_CTL_STAT, SBI_BUSY | tmp);
-
-	if (intel_wait_for_register(&dev_priv->uncore,
-				    SBI_CTL_STAT,
-				    SBI_BUSY,
-				    0,
-				    100)) {
-		DRM_ERROR("timeout waiting for SBI to complete write\n");
-		return;
+		return gen6_check_mailbox_status(mbox);
+}
+
+int sandybridge_pcode_read(struct drm_i915_private *i915, u32 mbox,
+			   u32 *val, u32 *val1)
+{
+	int err;
+
+	mutex_lock(&i915->sb_lock);
+	err = __sandybridge_pcode_rw(i915, mbox, val, val1,
+				     500, 0,
+				     true);
+	mutex_unlock(&i915->sb_lock);
+
+	if (err) {
+		DRM_DEBUG_DRIVER("warning: pcode (read from mbox %x) mailbox access failed for %ps: %d\n",
+				 mbox, __builtin_return_address(0), err);
 	}
 
-	if (I915_READ(SBI_CTL_STAT) & SBI_RESPONSE_FAIL) {
-		DRM_ERROR("error during SBI write of %x to reg %x\n",
-			  value, reg);
-		return;
+	return err;
+}
+
+int sandybridge_pcode_write_timeout(struct drm_i915_private *i915,
+				    u32 mbox, u32 val,
+				    int fast_timeout_us,
+				    int slow_timeout_ms)
+{
+	int err;
+
+	mutex_lock(&i915->sb_lock);
+	err = __sandybridge_pcode_rw(i915, mbox, &val, NULL,
+				     fast_timeout_us, slow_timeout_ms,
+				     false);
+	mutex_unlock(&i915->sb_lock);
+
+	if (err) {
+		DRM_DEBUG_DRIVER("warning: pcode (write of 0x%08x to mbox %x) mailbox access failed for %ps: %d\n",
+				 val, mbox, __builtin_return_address(0), err);
 	}
+
+	return err;
 }
 
-u32 vlv_flisdsi_read(struct drm_i915_private *dev_priv, u32 reg)
+static bool skl_pcode_try_request(struct drm_i915_private *i915, u32 mbox,
+				  u32 request, u32 reply_mask, u32 reply,
+				  u32 *status)
 {
-	u32 val = 0;
-	vlv_sideband_rw(dev_priv, DPIO_DEVFN, IOSF_PORT_FLISDSI, SB_CRRDDA_NP,
-			reg, &val);
-	return val;
+	*status = __sandybridge_pcode_rw(i915, mbox, &request, NULL,
+					 500, 0,
+					 true);
+
+	return *status || ((request & reply_mask) == reply);
 }
 
-void vlv_flisdsi_write(struct drm_i915_private *dev_priv, u32 reg, u32 val)
+/**
+ * skl_pcode_request - send PCODE request until acknowledgment
+ * @i915: device private
+ * @mbox: PCODE mailbox ID the request is targeted for
+ * @request: request ID
+ * @reply_mask: mask used to check for request acknowledgment
+ * @reply: value used to check for request acknowledgment
+ * @timeout_base_ms: timeout for polling with preemption enabled
+ *
+ * Keep resending the @request to @mbox until PCODE acknowledges it, PCODE
+ * reports an error or an overall timeout of @timeout_base_ms+50 ms expires.
+ * The request is acknowledged once the PCODE reply dword equals @reply after
+ * applying @reply_mask. Polling is first attempted with preemption enabled
+ * for @timeout_base_ms and if this times out for another 50 ms with
+ * preemption disabled.
+ *
+ * Returns 0 on success, %-ETIMEDOUT in case of a timeout, <0 in case of some
+ * other error as reported by PCODE.
+ */
+int skl_pcode_request(struct drm_i915_private *i915, u32 mbox, u32 request,
+		      u32 reply_mask, u32 reply, int timeout_base_ms)
 {
-	vlv_sideband_rw(dev_priv, DPIO_DEVFN, IOSF_PORT_FLISDSI, SB_CRWRDA_NP,
-			reg, &val);
+	u32 status;
+	int ret;
+
+	mutex_lock(&i915->sb_lock);
+
+#define COND \
+	skl_pcode_try_request(i915, mbox, request, reply_mask, reply, &status)
+
+	/*
+	 * Prime the PCODE by doing a request first. Normally it guarantees
+	 * that a subsequent request, at most @timeout_base_ms later, succeeds.
+	 * _wait_for() doesn't guarantee when its passed condition is evaluated
+	 * first, so send the first request explicitly.
+	 */
+	if (COND) {
+		ret = 0;
+		goto out;
+	}
+	ret = _wait_for(COND, timeout_base_ms * 1000, 10, 10);
+	if (!ret)
+		goto out;
+
+	/*
+	 * The above can time out if the number of requests was low (2 in the
+	 * worst case) _and_ PCODE was busy for some reason even after a
+	 * (queued) request and @timeout_base_ms delay. As a workaround retry
+	 * the poll with preemption disabled to maximize the number of
+	 * requests. Increase the timeout from @timeout_base_ms to 50ms to
+	 * account for interrupts that could reduce the number of these
+	 * requests, and for any quirks of the PCODE firmware that delays
+	 * the request completion.
+	 */
+	DRM_DEBUG_KMS("PCODE timeout, retrying with preemption disabled\n");
+	WARN_ON_ONCE(timeout_base_ms > 3);
+	preempt_disable();
+	ret = wait_for_atomic(COND, 50);
+	preempt_enable();
+
+out:
+	mutex_unlock(&i915->sb_lock);
+	return ret ? ret : status;
+#undef COND
 }
diff --git a/drivers/gpu/drm/i915/intel_sideband.h b/drivers/gpu/drm/i915/intel_sideband.h
new file mode 100644
index 000000000000..7fb95745a444
--- /dev/null
+++ b/drivers/gpu/drm/i915/intel_sideband.h
@@ -0,0 +1,141 @@
+/* SPDX-License-Identifier: MIT */
+
+#ifndef _INTEL_SIDEBAND_H_
+#define _INTEL_SIDEBAND_H_
+
+#include <linux/bitops.h>
+#include <linux/types.h>
+
+struct drm_i915_private;
+enum pipe;
+
+enum intel_sbi_destination {
+	SBI_ICLK,
+	SBI_MPHY,
+};
+
+enum {
+	VLV_IOSF_SB_BUNIT,
+	VLV_IOSF_SB_CCK,
+	VLV_IOSF_SB_CCU,
+	VLV_IOSF_SB_DPIO,
+	VLV_IOSF_SB_FLISDSI,
+	VLV_IOSF_SB_GPIO,
+	VLV_IOSF_SB_NC,
+	VLV_IOSF_SB_PUNIT,
+};
+
+void vlv_iosf_sb_get(struct drm_i915_private *i915, unsigned long ports);
+u32 vlv_iosf_sb_read(struct drm_i915_private *i915, u8 port, u32 reg);
+void vlv_iosf_sb_write(struct drm_i915_private *i915,
+		       u8 port, u32 reg, u32 val);
+void vlv_iosf_sb_put(struct drm_i915_private *i915, unsigned long ports);
+
+static inline void vlv_bunit_get(struct drm_i915_private *i915)
+{
+	vlv_iosf_sb_get(i915, BIT(VLV_IOSF_SB_BUNIT));
+}
+
+u32 vlv_bunit_read(struct drm_i915_private *i915, u32 reg);
+void vlv_bunit_write(struct drm_i915_private *i915, u32 reg, u32 val);
+
+static inline void vlv_bunit_put(struct drm_i915_private *i915)
+{
+	vlv_iosf_sb_put(i915, BIT(VLV_IOSF_SB_BUNIT));
+}
+
+static inline void vlv_cck_get(struct drm_i915_private *i915)
+{
+	vlv_iosf_sb_get(i915, BIT(VLV_IOSF_SB_CCK));
+}
+
+u32 vlv_cck_read(struct drm_i915_private *i915, u32 reg);
+void vlv_cck_write(struct drm_i915_private *i915, u32 reg, u32 val);
+
+static inline void vlv_cck_put(struct drm_i915_private *i915)
+{
+	vlv_iosf_sb_put(i915, BIT(VLV_IOSF_SB_CCK));
+}
+
+static inline void vlv_ccu_get(struct drm_i915_private *i915)
+{
+	vlv_iosf_sb_get(i915, BIT(VLV_IOSF_SB_CCU));
+}
+
+u32 vlv_ccu_read(struct drm_i915_private *i915, u32 reg);
+void vlv_ccu_write(struct drm_i915_private *i915, u32 reg, u32 val);
+
+static inline void vlv_ccu_put(struct drm_i915_private *i915)
+{
+	vlv_iosf_sb_put(i915, BIT(VLV_IOSF_SB_CCU));
+}
+
+static inline void vlv_dpio_get(struct drm_i915_private *i915)
+{
+	vlv_iosf_sb_get(i915, BIT(VLV_IOSF_SB_DPIO));
+}
+
+u32 vlv_dpio_read(struct drm_i915_private *i915, enum pipe pipe, int reg);
+void vlv_dpio_write(struct drm_i915_private *i915,
+		    enum pipe pipe, int reg, u32 val);
+
+static inline void vlv_dpio_put(struct drm_i915_private *i915)
+{
+	vlv_iosf_sb_put(i915, BIT(VLV_IOSF_SB_DPIO));
+}
+
+static inline void vlv_flisdsi_get(struct drm_i915_private *i915)
+{
+	vlv_iosf_sb_get(i915, BIT(VLV_IOSF_SB_FLISDSI));
+}
+
+u32 vlv_flisdsi_read(struct drm_i915_private *i915, u32 reg);
+void vlv_flisdsi_write(struct drm_i915_private *i915, u32 reg, u32 val);
+
+static inline void vlv_flisdsi_put(struct drm_i915_private *i915)
+{
+	vlv_iosf_sb_put(i915, BIT(VLV_IOSF_SB_FLISDSI));
+}
+
+static inline void vlv_nc_get(struct drm_i915_private *i915)
+{
+	vlv_iosf_sb_get(i915, BIT(VLV_IOSF_SB_NC));
+}
+
+u32 vlv_nc_read(struct drm_i915_private *i915, u8 addr);
+
+static inline void vlv_nc_put(struct drm_i915_private *i915)
+{
+	vlv_iosf_sb_put(i915, BIT(VLV_IOSF_SB_NC));
+}
+
+static inline void vlv_punit_get(struct drm_i915_private *i915)
+{
+	vlv_iosf_sb_get(i915, BIT(VLV_IOSF_SB_PUNIT));
+}
+
+u32 vlv_punit_read(struct drm_i915_private *i915, u32 addr);
+int vlv_punit_write(struct drm_i915_private *i915, u32 addr, u32 val);
+
+static inline void vlv_punit_put(struct drm_i915_private *i915)
+{
+	vlv_iosf_sb_put(i915, BIT(VLV_IOSF_SB_PUNIT));
+}
+
+u32 intel_sbi_read(struct drm_i915_private *i915, u16 reg,
+		   enum intel_sbi_destination destination);
+void intel_sbi_write(struct drm_i915_private *i915, u16 reg, u32 value,
+		     enum intel_sbi_destination destination);
+
+int sandybridge_pcode_read(struct drm_i915_private *i915, u32 mbox,
+			   u32 *val, u32 *val1);
+int sandybridge_pcode_write_timeout(struct drm_i915_private *i915, u32 mbox,
+				    u32 val, int fast_timeout_us,
+				    int slow_timeout_ms);
+#define sandybridge_pcode_write(i915, mbox, val)	\
+	sandybridge_pcode_write_timeout(i915, mbox, val, 500, 0)
+
+int skl_pcode_request(struct drm_i915_private *i915, u32 mbox, u32 request,
+		      u32 reply_mask, u32 reply, int timeout_base_ms);
+
+#endif /* _INTEL_SIDEBAND_H */
diff --git a/drivers/gpu/drm/i915/intel_uc.c b/drivers/gpu/drm/i915/intel_uc.c
index 25b80ffe71ad..ae45651ac73c 100644
--- a/drivers/gpu/drm/i915/intel_uc.c
+++ b/drivers/gpu/drm/i915/intel_uc.c
@@ -22,11 +22,12 @@
  *
  */
 
+#include "gt/intel_reset.h"
 #include "intel_uc.h"
-#include "intel_guc_submission.h"
 #include "intel_guc.h"
+#include "intel_guc_ads.h"
+#include "intel_guc_submission.h"
 #include "i915_drv.h"
-#include "i915_reset.h"
 
 static void guc_free_load_err_log(struct intel_guc *guc);
 
@@ -57,10 +58,8 @@ static int __get_platform_enable_guc(struct drm_i915_private *i915)
 	struct intel_uc_fw *huc_fw = &i915->huc.fw;
 	int enable_guc = 0;
 
-	/* Default is to enable GuC/HuC if we know their firmwares */
-	if (intel_uc_fw_is_selected(guc_fw))
-		enable_guc |= ENABLE_GUC_SUBMISSION;
-	if (intel_uc_fw_is_selected(huc_fw))
+	/* Default is to use HuC if we know GuC and HuC firmwares */
+	if (intel_uc_fw_is_selected(guc_fw) && intel_uc_fw_is_selected(huc_fw))
 		enable_guc |= ENABLE_GUC_LOAD_HUC;
 
 	/* Any platform specific fine-tuning can be done here */
@@ -132,6 +131,15 @@ static void sanitize_options_early(struct drm_i915_private *i915)
 					  "no HuC firmware");
 	}
 
+	/* XXX: GuC submission is unavailable for now */
+	if (intel_uc_is_using_guc_submission(i915)) {
+		DRM_INFO("Incompatible option detected: %s=%d, %s!\n",
+			 "enable_guc", i915_modparams.enable_guc,
+			 "GuC submission not supported");
+		DRM_INFO("Switching to non-GuC submission mode!\n");
+		i915_modparams.enable_guc &= ~ENABLE_GUC_SUBMISSION;
+	}
+
 	/* A negative value means "use platform/config default" */
 	if (i915_modparams.guc_log_level < 0)
 		i915_modparams.guc_log_level =
@@ -210,28 +218,41 @@ static void guc_free_load_err_log(struct intel_guc *guc)
 		i915_gem_object_put(guc->load_err_log);
 }
 
+static void guc_reset_interrupts(struct intel_guc *guc)
+{
+	guc->interrupts.reset(guc_to_i915(guc));
+}
+
+static void guc_enable_interrupts(struct intel_guc *guc)
+{
+	guc->interrupts.enable(guc_to_i915(guc));
+}
+
+static void guc_disable_interrupts(struct intel_guc *guc)
+{
+	guc->interrupts.disable(guc_to_i915(guc));
+}
+
 static int guc_enable_communication(struct intel_guc *guc)
 {
-	struct drm_i915_private *i915 = guc_to_i915(guc);
+	guc_enable_interrupts(guc);
 
-	gen9_enable_guc_interrupts(i915);
+	return intel_guc_ct_enable(&guc->ct);
+}
 
-	if (HAS_GUC_CT(i915))
-		return intel_guc_ct_enable(&guc->ct);
+static void guc_stop_communication(struct intel_guc *guc)
+{
+	intel_guc_ct_stop(&guc->ct);
 
-	guc->send = intel_guc_send_mmio;
-	guc->handler = intel_guc_to_host_event_handler_mmio;
-	return 0;
+	guc->send = intel_guc_send_nop;
+	guc->handler = intel_guc_to_host_event_handler_nop;
 }
 
 static void guc_disable_communication(struct intel_guc *guc)
 {
-	struct drm_i915_private *i915 = guc_to_i915(guc);
+	intel_guc_ct_disable(&guc->ct);
 
-	if (HAS_GUC_CT(i915))
-		intel_guc_ct_disable(&guc->ct);
-
-	gen9_disable_guc_interrupts(i915);
+	guc_disable_interrupts(guc);
 
 	guc->send = intel_guc_send_nop;
 	guc->handler = intel_guc_to_host_event_handler_nop;
@@ -280,6 +301,7 @@ void intel_uc_fini_misc(struct drm_i915_private *i915)
 int intel_uc_init(struct drm_i915_private *i915)
 {
 	struct intel_guc *guc = &i915->guc;
+	struct intel_huc *huc = &i915->huc;
 	int ret;
 
 	if (!USES_GUC(i915))
@@ -288,23 +310,37 @@ int intel_uc_init(struct drm_i915_private *i915)
 	if (!HAS_GUC(i915))
 		return -ENODEV;
 
+	/* XXX: GuC submission is unavailable for now */
+	GEM_BUG_ON(USES_GUC_SUBMISSION(i915));
+
 	ret = intel_guc_init(guc);
 	if (ret)
 		return ret;
 
+	if (USES_HUC(i915)) {
+		ret = intel_huc_init(huc);
+		if (ret)
+			goto err_guc;
+	}
+
 	if (USES_GUC_SUBMISSION(i915)) {
 		/*
 		 * This is stuff we need to have available at fw load time
 		 * if we are planning to enable submission later
 		 */
 		ret = intel_guc_submission_init(guc);
-		if (ret) {
-			intel_guc_fini(guc);
-			return ret;
-		}
+		if (ret)
+			goto err_huc;
 	}
 
 	return 0;
+
+err_huc:
+	if (USES_HUC(i915))
+		intel_huc_fini(huc);
+err_guc:
+	intel_guc_fini(guc);
+	return ret;
 }
 
 void intel_uc_fini(struct drm_i915_private *i915)
@@ -319,17 +355,17 @@ void intel_uc_fini(struct drm_i915_private *i915)
 	if (USES_GUC_SUBMISSION(i915))
 		intel_guc_submission_fini(guc);
 
+	if (USES_HUC(i915))
+		intel_huc_fini(&i915->huc);
+
 	intel_guc_fini(guc);
 }
 
-void intel_uc_sanitize(struct drm_i915_private *i915)
+static void __uc_sanitize(struct drm_i915_private *i915)
 {
 	struct intel_guc *guc = &i915->guc;
 	struct intel_huc *huc = &i915->huc;
 
-	if (!USES_GUC(i915))
-		return;
-
 	GEM_BUG_ON(!HAS_GUC(i915));
 
 	intel_huc_sanitize(huc);
@@ -338,6 +374,14 @@ void intel_uc_sanitize(struct drm_i915_private *i915)
 	__intel_uc_reset_hw(i915);
 }
 
+void intel_uc_sanitize(struct drm_i915_private *i915)
+{
+	if (!USES_GUC(i915))
+		return;
+
+	__uc_sanitize(i915);
+}
+
 int intel_uc_init_hw(struct drm_i915_private *i915)
 {
 	struct intel_guc *guc = &i915->guc;
@@ -349,7 +393,7 @@ int intel_uc_init_hw(struct drm_i915_private *i915)
 
 	GEM_BUG_ON(!HAS_GUC(i915));
 
-	gen9_reset_guc_interrupts(i915);
+	guc_reset_interrupts(guc);
 
 	/* WaEnableuKernelHeaderValidFix:skl */
 	/* WaEnableGuCBootHashCheckNotSet:skl,bxt,kbl */
@@ -373,6 +417,7 @@ int intel_uc_init_hw(struct drm_i915_private *i915)
 				goto err_out;
 		}
 
+		intel_guc_ads_reset(guc);
 		intel_guc_init_params(guc);
 		ret = intel_guc_fw_upload(guc);
 		if (ret == 0)
@@ -396,14 +441,14 @@ int intel_uc_init_hw(struct drm_i915_private *i915)
 			goto err_communication;
 	}
 
+	ret = intel_guc_sample_forcewake(guc);
+	if (ret)
+		goto err_communication;
+
 	if (USES_GUC_SUBMISSION(i915)) {
 		ret = intel_guc_submission_enable(guc);
 		if (ret)
 			goto err_communication;
-	} else if (INTEL_GEN(i915) < 11) {
-		ret = intel_guc_sample_forcewake(guc);
-		if (ret)
-			goto err_communication;
 	}
 
 	dev_info(i915->drm.dev, "GuC firmware version %u.%u\n",
@@ -423,6 +468,8 @@ err_communication:
 err_log_capture:
 	guc_capture_load_err_log(guc);
 err_out:
+	__uc_sanitize(i915);
+
 	/*
 	 * Note that there is no fallback as either user explicitly asked for
 	 * the GuC or driver default option was to run with the GuC enabled.
@@ -438,7 +485,7 @@ void intel_uc_fini_hw(struct drm_i915_private *i915)
 {
 	struct intel_guc *guc = &i915->guc;
 
-	if (!USES_GUC(i915))
+	if (!intel_guc_is_loaded(guc))
 		return;
 
 	GEM_BUG_ON(!HAS_GUC(i915));
@@ -447,6 +494,7 @@ void intel_uc_fini_hw(struct drm_i915_private *i915)
 		intel_guc_submission_disable(guc);
 
 	guc_disable_communication(guc);
+	__uc_sanitize(i915);
 }
 
 /**
@@ -459,33 +507,38 @@ void intel_uc_reset_prepare(struct drm_i915_private *i915)
 {
 	struct intel_guc *guc = &i915->guc;
 
-	if (!USES_GUC(i915))
+	if (!intel_guc_is_loaded(guc))
 		return;
 
-	guc_disable_communication(guc);
-	intel_uc_sanitize(i915);
+	guc_stop_communication(guc);
+	__uc_sanitize(i915);
 }
 
-int intel_uc_suspend(struct drm_i915_private *i915)
+void intel_uc_runtime_suspend(struct drm_i915_private *i915)
 {
 	struct intel_guc *guc = &i915->guc;
 	int err;
 
-	if (!USES_GUC(i915))
-		return 0;
-
-	if (guc->fw.load_status != INTEL_UC_FIRMWARE_SUCCESS)
-		return 0;
+	if (!intel_guc_is_loaded(guc))
+		return;
 
 	err = intel_guc_suspend(guc);
-	if (err) {
+	if (err)
 		DRM_DEBUG_DRIVER("Failed to suspend GuC, err=%d", err);
-		return err;
-	}
 
 	guc_disable_communication(guc);
+}
 
-	return 0;
+void intel_uc_suspend(struct drm_i915_private *i915)
+{
+	struct intel_guc *guc = &i915->guc;
+	intel_wakeref_t wakeref;
+
+	if (!intel_guc_is_loaded(guc))
+		return;
+
+	with_intel_runtime_pm(&i915->runtime_pm, wakeref)
+		intel_uc_runtime_suspend(i915);
 }
 
 int intel_uc_resume(struct drm_i915_private *i915)
@@ -493,10 +546,7 @@ int intel_uc_resume(struct drm_i915_private *i915)
 	struct intel_guc *guc = &i915->guc;
 	int err;
 
-	if (!USES_GUC(i915))
-		return 0;
-
-	if (guc->fw.load_status != INTEL_UC_FIRMWARE_SUCCESS)
+	if (!intel_guc_is_loaded(guc))
 		return 0;
 
 	guc_enable_communication(guc);
diff --git a/drivers/gpu/drm/i915/intel_uc.h b/drivers/gpu/drm/i915/intel_uc.h
index c14729786652..3ea06c87dfcd 100644
--- a/drivers/gpu/drm/i915/intel_uc.h
+++ b/drivers/gpu/drm/i915/intel_uc.h
@@ -39,7 +39,8 @@ void intel_uc_fini_hw(struct drm_i915_private *dev_priv);
 int intel_uc_init(struct drm_i915_private *dev_priv);
 void intel_uc_fini(struct drm_i915_private *dev_priv);
 void intel_uc_reset_prepare(struct drm_i915_private *i915);
-int intel_uc_suspend(struct drm_i915_private *dev_priv);
+void intel_uc_suspend(struct drm_i915_private *i915);
+void intel_uc_runtime_suspend(struct drm_i915_private *i915);
 int intel_uc_resume(struct drm_i915_private *dev_priv);
 
 static inline bool intel_uc_is_using_guc(struct drm_i915_private *i915)
diff --git a/drivers/gpu/drm/i915/intel_uc_fw.c b/drivers/gpu/drm/i915/intel_uc_fw.c
index becf05ebae4d..f342ddd47df8 100644
--- a/drivers/gpu/drm/i915/intel_uc_fw.c
+++ b/drivers/gpu/drm/i915/intel_uc_fw.c
@@ -22,6 +22,7 @@
  *
  */
 
+#include <linux/bitfield.h>
 #include <linux/firmware.h>
 #include <drm/drm_print.h>
 
@@ -119,21 +120,20 @@ void intel_uc_fw_fetch(struct drm_i915_private *dev_priv,
 		goto fail;
 	}
 
-	/*
-	 * The GuC firmware image has the version number embedded at a
-	 * well-known offset within the firmware blob; note that major / minor
-	 * version are TWO bytes each (i.e. u16), although all pointers and
-	 * offsets are defined in terms of bytes (u8).
-	 */
+	/* Get version numbers from the CSS header */
 	switch (uc_fw->type) {
 	case INTEL_UC_FW_TYPE_GUC:
-		uc_fw->major_ver_found = css->guc.sw_version >> 16;
-		uc_fw->minor_ver_found = css->guc.sw_version & 0xFFFF;
+		uc_fw->major_ver_found = FIELD_GET(CSS_SW_VERSION_GUC_MAJOR,
+						   css->sw_version);
+		uc_fw->minor_ver_found = FIELD_GET(CSS_SW_VERSION_GUC_MINOR,
+						   css->sw_version);
 		break;
 
 	case INTEL_UC_FW_TYPE_HUC:
-		uc_fw->major_ver_found = css->huc.sw_version >> 16;
-		uc_fw->minor_ver_found = css->huc.sw_version & 0xFFFF;
+		uc_fw->major_ver_found = FIELD_GET(CSS_SW_VERSION_HUC_MAJOR,
+						   css->sw_version);
+		uc_fw->minor_ver_found = FIELD_GET(CSS_SW_VERSION_HUC_MINOR,
+						   css->sw_version);
 		break;
 
 	default:
@@ -159,7 +159,8 @@ void intel_uc_fw_fetch(struct drm_i915_private *dev_priv,
 		goto fail;
 	}
 
-	obj = i915_gem_object_create_from_data(dev_priv, fw->data, fw->size);
+	obj = i915_gem_object_create_shmem_from_data(dev_priv,
+						     fw->data, fw->size);
 	if (IS_ERR(obj)) {
 		err = PTR_ERR(obj);
 		DRM_DEBUG_DRIVER("%s fw object_create err=%d\n",
@@ -191,6 +192,35 @@ fail:
 	release_firmware(fw);		/* OK even if fw is NULL */
 }
 
+static void intel_uc_fw_ggtt_bind(struct intel_uc_fw *uc_fw)
+{
+	struct drm_i915_gem_object *obj = uc_fw->obj;
+	struct i915_ggtt *ggtt = &to_i915(obj->base.dev)->ggtt;
+	struct i915_vma dummy = {
+		.node.start = intel_uc_fw_ggtt_offset(uc_fw),
+		.node.size = obj->base.size,
+		.pages = obj->mm.pages,
+		.vm = &ggtt->vm,
+	};
+
+	GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
+	GEM_BUG_ON(dummy.node.size > ggtt->uc_fw.size);
+
+	/* uc_fw->obj cache domains were not controlled across suspend */
+	drm_clflush_sg(dummy.pages);
+
+	ggtt->vm.insert_entries(&ggtt->vm, &dummy, I915_CACHE_NONE, 0);
+}
+
+static void intel_uc_fw_ggtt_unbind(struct intel_uc_fw *uc_fw)
+{
+	struct drm_i915_gem_object *obj = uc_fw->obj;
+	struct i915_ggtt *ggtt = &to_i915(obj->base.dev)->ggtt;
+	u64 start = intel_uc_fw_ggtt_offset(uc_fw);
+
+	ggtt->vm.clear_range(&ggtt->vm, start, obj->base.size);
+}
+
 /**
  * intel_uc_fw_upload - load uC firmware using custom loader
  * @uc_fw: uC firmware
@@ -201,11 +231,8 @@ fail:
  * Return: 0 on success, non-zero on failure.
  */
 int intel_uc_fw_upload(struct intel_uc_fw *uc_fw,
-		       int (*xfer)(struct intel_uc_fw *uc_fw,
-				   struct i915_vma *vma))
+		       int (*xfer)(struct intel_uc_fw *uc_fw))
 {
-	struct i915_vma *vma;
-	u32 ggtt_pin_bias;
 	int err;
 
 	DRM_DEBUG_DRIVER("%s fw load %s\n",
@@ -219,33 +246,10 @@ int intel_uc_fw_upload(struct intel_uc_fw *uc_fw,
 			 intel_uc_fw_type_repr(uc_fw->type),
 			 intel_uc_fw_status_repr(uc_fw->load_status));
 
-	/* Pin object with firmware */
-	err = i915_gem_object_set_to_gtt_domain(uc_fw->obj, false);
-	if (err) {
-		DRM_DEBUG_DRIVER("%s fw set-domain err=%d\n",
-				 intel_uc_fw_type_repr(uc_fw->type), err);
-		goto fail;
-	}
-
-	ggtt_pin_bias = to_i915(uc_fw->obj->base.dev)->ggtt.pin_bias;
-	vma = i915_gem_object_ggtt_pin(uc_fw->obj, NULL, 0, 0,
-				       PIN_OFFSET_BIAS | ggtt_pin_bias);
-	if (IS_ERR(vma)) {
-		err = PTR_ERR(vma);
-		DRM_DEBUG_DRIVER("%s fw ggtt-pin err=%d\n",
-				 intel_uc_fw_type_repr(uc_fw->type), err);
-		goto fail;
-	}
-
 	/* Call custom loader */
-	err = xfer(uc_fw, vma);
-
-	/*
-	 * We keep the object pages for reuse during resume. But we can unpin it
-	 * now that DMA has completed, so it doesn't continue to take up space.
-	 */
-	i915_vma_unpin(vma);
-
+	intel_uc_fw_ggtt_bind(uc_fw);
+	err = xfer(uc_fw);
+	intel_uc_fw_ggtt_unbind(uc_fw);
 	if (err)
 		goto fail;
 
@@ -273,14 +277,50 @@ fail:
 	return err;
 }
 
+int intel_uc_fw_init(struct intel_uc_fw *uc_fw)
+{
+	int err;
+
+	if (uc_fw->fetch_status != INTEL_UC_FIRMWARE_SUCCESS)
+		return -ENOEXEC;
+
+	err = i915_gem_object_pin_pages(uc_fw->obj);
+	if (err)
+		DRM_DEBUG_DRIVER("%s fw pin-pages err=%d\n",
+				 intel_uc_fw_type_repr(uc_fw->type), err);
+
+	return err;
+}
+
+void intel_uc_fw_fini(struct intel_uc_fw *uc_fw)
+{
+	if (uc_fw->fetch_status != INTEL_UC_FIRMWARE_SUCCESS)
+		return;
+
+	i915_gem_object_unpin_pages(uc_fw->obj);
+}
+
+u32 intel_uc_fw_ggtt_offset(struct intel_uc_fw *uc_fw)
+{
+	struct drm_i915_private *i915 = to_i915(uc_fw->obj->base.dev);
+	struct i915_ggtt *ggtt = &i915->ggtt;
+	struct drm_mm_node *node = &ggtt->uc_fw;
+
+	GEM_BUG_ON(!node->allocated);
+	GEM_BUG_ON(upper_32_bits(node->start));
+	GEM_BUG_ON(upper_32_bits(node->start + node->size - 1));
+
+	return lower_32_bits(node->start);
+}
+
 /**
- * intel_uc_fw_fini - cleanup uC firmware
+ * intel_uc_fw_cleanup_fetch - cleanup uC firmware
  *
  * @uc_fw: uC firmware
  *
  * Cleans up uC firmware by releasing the firmware GEM obj.
  */
-void intel_uc_fw_fini(struct intel_uc_fw *uc_fw)
+void intel_uc_fw_cleanup_fetch(struct intel_uc_fw *uc_fw)
 {
 	struct drm_i915_gem_object *obj;
 
diff --git a/drivers/gpu/drm/i915/intel_uc_fw.h b/drivers/gpu/drm/i915/intel_uc_fw.h
index 0e3bd580e267..ff98f8661d72 100644
--- a/drivers/gpu/drm/i915/intel_uc_fw.h
+++ b/drivers/gpu/drm/i915/intel_uc_fw.h
@@ -27,7 +27,6 @@
 
 struct drm_printer;
 struct drm_i915_private;
-struct i915_vma;
 
 /* Home of GuC, HuC and DMC firmwares */
 #define INTEL_UC_FIRMWARE_URL "https://git.kernel.org/pub/scm/linux/kernel/git/firmware/linux-firmware.git/tree/i915"
@@ -102,7 +101,8 @@ static inline const char *intel_uc_fw_type_repr(enum intel_uc_fw_type type)
 }
 
 static inline
-void intel_uc_fw_init(struct intel_uc_fw *uc_fw, enum intel_uc_fw_type type)
+void intel_uc_fw_init_early(struct intel_uc_fw *uc_fw,
+			    enum intel_uc_fw_type type)
 {
 	uc_fw->path = NULL;
 	uc_fw->fetch_status = INTEL_UC_FIRMWARE_NONE;
@@ -144,10 +144,12 @@ static inline u32 intel_uc_fw_get_upload_size(struct intel_uc_fw *uc_fw)
 
 void intel_uc_fw_fetch(struct drm_i915_private *dev_priv,
 		       struct intel_uc_fw *uc_fw);
+void intel_uc_fw_cleanup_fetch(struct intel_uc_fw *uc_fw);
 int intel_uc_fw_upload(struct intel_uc_fw *uc_fw,
-		       int (*xfer)(struct intel_uc_fw *uc_fw,
-				   struct i915_vma *vma));
+		       int (*xfer)(struct intel_uc_fw *uc_fw));
+int intel_uc_fw_init(struct intel_uc_fw *uc_fw);
 void intel_uc_fw_fini(struct intel_uc_fw *uc_fw);
+u32 intel_uc_fw_ggtt_offset(struct intel_uc_fw *uc_fw);
 void intel_uc_fw_dump(const struct intel_uc_fw *uc_fw, struct drm_printer *p);
 
 #endif
diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
index d1d51e1121e2..da33aa672c3d 100644
--- a/drivers/gpu/drm/i915/intel_uncore.c
+++ b/drivers/gpu/drm/i915/intel_uncore.c
@@ -111,9 +111,11 @@ wait_ack_set(const struct intel_uncore_forcewake_domain *d,
 static inline void
 fw_domain_wait_ack_clear(const struct intel_uncore_forcewake_domain *d)
 {
-	if (wait_ack_clear(d, FORCEWAKE_KERNEL))
+	if (wait_ack_clear(d, FORCEWAKE_KERNEL)) {
 		DRM_ERROR("%s: timed out waiting for forcewake ack to clear.\n",
 			  intel_uncore_forcewake_domain_to_str(d->id));
+		add_taint_for_CI(TAINT_WARN); /* CI now unreliable */
+	}
 }
 
 enum ack_type {
@@ -186,9 +188,11 @@ fw_domain_get(const struct intel_uncore_forcewake_domain *d)
 static inline void
 fw_domain_wait_ack_set(const struct intel_uncore_forcewake_domain *d)
 {
-	if (wait_ack_set(d, FORCEWAKE_KERNEL))
+	if (wait_ack_set(d, FORCEWAKE_KERNEL)) {
 		DRM_ERROR("%s: timed out waiting for forcewake ack request.\n",
 			  intel_uncore_forcewake_domain_to_str(d->id));
+		add_taint_for_CI(TAINT_WARN); /* CI now unreliable */
+	}
 }
 
 static inline void
@@ -579,7 +583,7 @@ void intel_uncore_forcewake_get(struct intel_uncore *uncore,
 	if (!uncore->funcs.force_wake_get)
 		return;
 
-	__assert_rpm_wakelock_held(uncore->rpm);
+	assert_rpm_wakelock_held(uncore->rpm);
 
 	spin_lock_irqsave(&uncore->lock, irqflags);
 	__intel_uncore_forcewake_get(uncore, fw_domains);
@@ -733,7 +737,7 @@ void assert_forcewakes_active(struct intel_uncore *uncore,
 	if (!uncore->funcs.force_wake_get)
 		return;
 
-	__assert_rpm_wakelock_held(uncore->rpm);
+	assert_rpm_wakelock_held(uncore->rpm);
 
 	fw_domains &= uncore->fw_domains;
 	WARN(fw_domains & ~uncore->fw_domains_active,
@@ -1050,7 +1054,7 @@ unclaimed_reg_debug(struct intel_uncore *uncore,
 
 #define GEN2_READ_HEADER(x) \
 	u##x val = 0; \
-	__assert_rpm_wakelock_held(uncore->rpm);
+	assert_rpm_wakelock_held(uncore->rpm);
 
 #define GEN2_READ_FOOTER \
 	trace_i915_reg_rw(false, reg, val, sizeof(val), trace); \
@@ -1092,7 +1096,7 @@ __gen2_read(64)
 	u32 offset = i915_mmio_reg_offset(reg); \
 	unsigned long irqflags; \
 	u##x val = 0; \
-	__assert_rpm_wakelock_held(uncore->rpm); \
+	assert_rpm_wakelock_held(uncore->rpm); \
 	spin_lock_irqsave(&uncore->lock, irqflags); \
 	unclaimed_reg_debug(uncore, reg, true, true)
 
@@ -1166,7 +1170,7 @@ __gen6_read(64)
 
 #define GEN2_WRITE_HEADER \
 	trace_i915_reg_rw(true, reg, val, sizeof(val), trace); \
-	__assert_rpm_wakelock_held(uncore->rpm); \
+	assert_rpm_wakelock_held(uncore->rpm); \
 
 #define GEN2_WRITE_FOOTER
 
@@ -1204,7 +1208,7 @@ __gen2_write(32)
 	u32 offset = i915_mmio_reg_offset(reg); \
 	unsigned long irqflags; \
 	trace_i915_reg_rw(true, reg, val, sizeof(val), trace); \
-	__assert_rpm_wakelock_held(uncore->rpm); \
+	assert_rpm_wakelock_held(uncore->rpm); \
 	spin_lock_irqsave(&uncore->lock, irqflags); \
 	unclaimed_reg_debug(uncore, reg, false, true)
 
@@ -1457,8 +1461,8 @@ static void intel_uncore_fw_domains_init(struct intel_uncore *uncore)
 static int i915_pmic_bus_access_notifier(struct notifier_block *nb,
 					 unsigned long action, void *data)
 {
-	struct drm_i915_private *dev_priv = container_of(nb,
-			struct drm_i915_private, uncore.pmic_bus_access_nb);
+	struct intel_uncore *uncore = container_of(nb,
+			struct intel_uncore, pmic_bus_access_nb);
 
 	switch (action) {
 	case MBI_PMIC_BUS_ACCESS_BEGIN:
@@ -1475,12 +1479,12 @@ static int i915_pmic_bus_access_notifier(struct notifier_block *nb,
 		 * wake reference -> disable wakeref asserts for the time of
 		 * the access.
 		 */
-		disable_rpm_wakeref_asserts(dev_priv);
-		intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL);
-		enable_rpm_wakeref_asserts(dev_priv);
+		disable_rpm_wakeref_asserts(uncore->rpm);
+		intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
+		enable_rpm_wakeref_asserts(uncore->rpm);
 		break;
 	case MBI_PMIC_BUS_ACCESS_END:
-		intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
+		intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL);
 		break;
 	}
 
@@ -1668,7 +1672,8 @@ static const struct reg_whitelist {
 int i915_reg_read_ioctl(struct drm_device *dev,
 			void *data, struct drm_file *file)
 {
-	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct drm_i915_private *i915 = to_i915(dev);
+	struct intel_uncore *uncore = &i915->uncore;
 	struct drm_i915_reg_read *reg = data;
 	struct reg_whitelist const *entry;
 	intel_wakeref_t wakeref;
@@ -1685,7 +1690,7 @@ int i915_reg_read_ioctl(struct drm_device *dev,
 		GEM_BUG_ON(entry->size > 8);
 		GEM_BUG_ON(entry_offset & (entry->size - 1));
 
-		if (INTEL_INFO(dev_priv)->gen_mask & entry->gen_mask &&
+		if (INTEL_INFO(i915)->gen_mask & entry->gen_mask &&
 		    entry_offset == (reg->offset & -entry->size))
 			break;
 		entry++;
@@ -1697,18 +1702,22 @@ int i915_reg_read_ioctl(struct drm_device *dev,
 
 	flags = reg->offset & (entry->size - 1);
 
-	with_intel_runtime_pm(dev_priv, wakeref) {
+	with_intel_runtime_pm(&i915->runtime_pm, wakeref) {
 		if (entry->size == 8 && flags == I915_REG_READ_8B_WA)
-			reg->val = I915_READ64_2x32(entry->offset_ldw,
-						    entry->offset_udw);
+			reg->val = intel_uncore_read64_2x32(uncore,
+							    entry->offset_ldw,
+							    entry->offset_udw);
 		else if (entry->size == 8 && flags == 0)
-			reg->val = I915_READ64(entry->offset_ldw);
+			reg->val = intel_uncore_read64(uncore,
+						       entry->offset_ldw);
 		else if (entry->size == 4 && flags == 0)
-			reg->val = I915_READ(entry->offset_ldw);
+			reg->val = intel_uncore_read(uncore, entry->offset_ldw);
 		else if (entry->size == 2 && flags == 0)
-			reg->val = I915_READ16(entry->offset_ldw);
+			reg->val = intel_uncore_read16(uncore,
+						       entry->offset_ldw);
 		else if (entry->size == 1 && flags == 0)
-			reg->val = I915_READ8(entry->offset_ldw);
+			reg->val = intel_uncore_read8(uncore,
+						      entry->offset_ldw);
 		else
 			ret = -EINVAL;
 	}
diff --git a/drivers/gpu/drm/i915/intel_uncore.h b/drivers/gpu/drm/i915/intel_uncore.h
index d6af3de70121..804a0faacc91 100644
--- a/drivers/gpu/drm/i915/intel_uncore.h
+++ b/drivers/gpu/drm/i915/intel_uncore.h
@@ -33,7 +33,7 @@
 #include "i915_reg.h"
 
 struct drm_i915_private;
-struct i915_runtime_pm;
+struct intel_runtime_pm;
 struct intel_uncore;
 
 enum forcewake_domain_id {
@@ -97,7 +97,7 @@ struct intel_forcewake_range {
 struct intel_uncore {
 	void __iomem *regs;
 
-	struct i915_runtime_pm *rpm;
+	struct intel_runtime_pm *rpm;
 
 	spinlock_t lock; /** lock is also taken in irq contexts. */
 
diff --git a/drivers/gpu/drm/i915/intel_wakeref.c b/drivers/gpu/drm/i915/intel_wakeref.c
new file mode 100644
index 000000000000..3db6fa682823
--- /dev/null
+++ b/drivers/gpu/drm/i915/intel_wakeref.c
@@ -0,0 +1,138 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include "intel_runtime_pm.h"
+#include "i915_gem.h"
+
+static void rpm_get(struct intel_runtime_pm *rpm, struct intel_wakeref *wf)
+{
+	wf->wakeref = intel_runtime_pm_get(rpm);
+}
+
+static void rpm_put(struct intel_runtime_pm *rpm, struct intel_wakeref *wf)
+{
+	intel_wakeref_t wakeref = fetch_and_zero(&wf->wakeref);
+
+	intel_runtime_pm_put(rpm, wakeref);
+	GEM_BUG_ON(!wakeref);
+}
+
+int __intel_wakeref_get_first(struct intel_runtime_pm *rpm,
+			      struct intel_wakeref *wf,
+			      int (*fn)(struct intel_wakeref *wf))
+{
+	/*
+	 * Treat get/put as different subclasses, as we may need to run
+	 * the put callback from under the shrinker and do not want to
+	 * cross-contanimate that callback with any extra work performed
+	 * upon acquiring the wakeref.
+	 */
+	mutex_lock_nested(&wf->mutex, SINGLE_DEPTH_NESTING);
+	if (!atomic_read(&wf->count)) {
+		int err;
+
+		rpm_get(rpm, wf);
+
+		err = fn(wf);
+		if (unlikely(err)) {
+			rpm_put(rpm, wf);
+			mutex_unlock(&wf->mutex);
+			return err;
+		}
+
+		smp_mb__before_atomic(); /* release wf->count */
+	}
+	atomic_inc(&wf->count);
+	mutex_unlock(&wf->mutex);
+
+	return 0;
+}
+
+int __intel_wakeref_put_last(struct intel_runtime_pm *rpm,
+			     struct intel_wakeref *wf,
+			     int (*fn)(struct intel_wakeref *wf))
+{
+	int err;
+
+	err = fn(wf);
+	if (likely(!err))
+		rpm_put(rpm, wf);
+	else
+		atomic_inc(&wf->count);
+	mutex_unlock(&wf->mutex);
+
+	return err;
+}
+
+void __intel_wakeref_init(struct intel_wakeref *wf, struct lock_class_key *key)
+{
+	__mutex_init(&wf->mutex, "wakeref", key);
+	atomic_set(&wf->count, 0);
+	wf->wakeref = 0;
+}
+
+static void wakeref_auto_timeout(struct timer_list *t)
+{
+	struct intel_wakeref_auto *wf = from_timer(wf, t, timer);
+	intel_wakeref_t wakeref;
+	unsigned long flags;
+
+	if (!refcount_dec_and_lock_irqsave(&wf->count, &wf->lock, &flags))
+		return;
+
+	wakeref = fetch_and_zero(&wf->wakeref);
+	spin_unlock_irqrestore(&wf->lock, flags);
+
+	intel_runtime_pm_put(wf->rpm, wakeref);
+}
+
+void intel_wakeref_auto_init(struct intel_wakeref_auto *wf,
+			     struct intel_runtime_pm *rpm)
+{
+	spin_lock_init(&wf->lock);
+	timer_setup(&wf->timer, wakeref_auto_timeout, 0);
+	refcount_set(&wf->count, 0);
+	wf->wakeref = 0;
+	wf->rpm = rpm;
+}
+
+void intel_wakeref_auto(struct intel_wakeref_auto *wf, unsigned long timeout)
+{
+	unsigned long flags;
+
+	if (!timeout) {
+		if (del_timer_sync(&wf->timer))
+			wakeref_auto_timeout(&wf->timer);
+		return;
+	}
+
+	/* Our mission is that we only extend an already active wakeref */
+	assert_rpm_wakelock_held(wf->rpm);
+
+	if (!refcount_inc_not_zero(&wf->count)) {
+		spin_lock_irqsave(&wf->lock, flags);
+		if (!refcount_inc_not_zero(&wf->count)) {
+			GEM_BUG_ON(wf->wakeref);
+			wf->wakeref = intel_runtime_pm_get_if_in_use(wf->rpm);
+			refcount_set(&wf->count, 1);
+		}
+		spin_unlock_irqrestore(&wf->lock, flags);
+	}
+
+	/*
+	 * If we extend a pending timer, we will only get a single timer
+	 * callback and so need to cancel the local inc by running the
+	 * elided callback to keep the wf->count balanced.
+	 */
+	if (mod_timer(&wf->timer, jiffies + timeout))
+		wakeref_auto_timeout(&wf->timer);
+}
+
+void intel_wakeref_auto_fini(struct intel_wakeref_auto *wf)
+{
+	intel_wakeref_auto(wf, 0);
+	GEM_BUG_ON(wf->wakeref);
+}
diff --git a/drivers/gpu/drm/i915/intel_wakeref.h b/drivers/gpu/drm/i915/intel_wakeref.h
new file mode 100644
index 000000000000..9cbb2ebf575b
--- /dev/null
+++ b/drivers/gpu/drm/i915/intel_wakeref.h
@@ -0,0 +1,164 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef INTEL_WAKEREF_H
+#define INTEL_WAKEREF_H
+
+#include <linux/atomic.h>
+#include <linux/mutex.h>
+#include <linux/refcount.h>
+#include <linux/stackdepot.h>
+#include <linux/timer.h>
+
+struct intel_runtime_pm;
+
+typedef depot_stack_handle_t intel_wakeref_t;
+
+struct intel_wakeref {
+	atomic_t count;
+	struct mutex mutex;
+	intel_wakeref_t wakeref;
+};
+
+void __intel_wakeref_init(struct intel_wakeref *wf,
+			  struct lock_class_key *key);
+#define intel_wakeref_init(wf) do {					\
+	static struct lock_class_key __key;				\
+									\
+	__intel_wakeref_init((wf), &__key);				\
+} while (0)
+
+int __intel_wakeref_get_first(struct intel_runtime_pm *rpm,
+			      struct intel_wakeref *wf,
+			      int (*fn)(struct intel_wakeref *wf));
+int __intel_wakeref_put_last(struct intel_runtime_pm *rpm,
+			     struct intel_wakeref *wf,
+			     int (*fn)(struct intel_wakeref *wf));
+
+/**
+ * intel_wakeref_get: Acquire the wakeref
+ * @i915: the drm_i915_private device
+ * @wf: the wakeref
+ * @fn: callback for acquired the wakeref, called only on first acquire.
+ *
+ * Acquire a hold on the wakeref. The first user to do so, will acquire
+ * the runtime pm wakeref and then call the @fn underneath the wakeref
+ * mutex.
+ *
+ * Note that @fn is allowed to fail, in which case the runtime-pm wakeref
+ * will be released and the acquisition unwound, and an error reported.
+ *
+ * Returns: 0 if the wakeref was acquired successfully, or a negative error
+ * code otherwise.
+ */
+static inline int
+intel_wakeref_get(struct intel_runtime_pm *rpm,
+		  struct intel_wakeref *wf,
+		  int (*fn)(struct intel_wakeref *wf))
+{
+	if (unlikely(!atomic_inc_not_zero(&wf->count)))
+		return __intel_wakeref_get_first(rpm, wf, fn);
+
+	return 0;
+}
+
+/**
+ * intel_wakeref_put: Release the wakeref
+ * @i915: the drm_i915_private device
+ * @wf: the wakeref
+ * @fn: callback for releasing the wakeref, called only on final release.
+ *
+ * Release our hold on the wakeref. When there are no more users,
+ * the runtime pm wakeref will be released after the @fn callback is called
+ * underneath the wakeref mutex.
+ *
+ * Note that @fn is allowed to fail, in which case the runtime-pm wakeref
+ * is retained and an error reported.
+ *
+ * Returns: 0 if the wakeref was released successfully, or a negative error
+ * code otherwise.
+ */
+static inline int
+intel_wakeref_put(struct intel_runtime_pm *rpm,
+		  struct intel_wakeref *wf,
+		  int (*fn)(struct intel_wakeref *wf))
+{
+	if (atomic_dec_and_mutex_lock(&wf->count, &wf->mutex))
+		return __intel_wakeref_put_last(rpm, wf, fn);
+
+	return 0;
+}
+
+/**
+ * intel_wakeref_lock: Lock the wakeref (mutex)
+ * @wf: the wakeref
+ *
+ * Locks the wakeref to prevent it being acquired or released. New users
+ * can still adjust the counter, but the wakeref itself (and callback)
+ * cannot be acquired or released.
+ */
+static inline void
+intel_wakeref_lock(struct intel_wakeref *wf)
+	__acquires(wf->mutex)
+{
+	mutex_lock(&wf->mutex);
+}
+
+/**
+ * intel_wakeref_unlock: Unlock the wakeref
+ * @wf: the wakeref
+ *
+ * Releases a previously acquired intel_wakeref_lock().
+ */
+static inline void
+intel_wakeref_unlock(struct intel_wakeref *wf)
+	__releases(wf->mutex)
+{
+	mutex_unlock(&wf->mutex);
+}
+
+/**
+ * intel_wakeref_active: Query whether the wakeref is currently held
+ * @wf: the wakeref
+ *
+ * Returns: true if the wakeref is currently held.
+ */
+static inline bool
+intel_wakeref_active(struct intel_wakeref *wf)
+{
+	return READ_ONCE(wf->wakeref);
+}
+
+struct intel_wakeref_auto {
+	struct intel_runtime_pm *rpm;
+	struct timer_list timer;
+	intel_wakeref_t wakeref;
+	spinlock_t lock;
+	refcount_t count;
+};
+
+/**
+ * intel_wakeref_auto: Delay the runtime-pm autosuspend
+ * @wf: the wakeref
+ * @timeout: relative timeout in jiffies
+ *
+ * The runtime-pm core uses a suspend delay after the last wakeref
+ * is released before triggering runtime suspend of the device. That
+ * delay is configurable via sysfs with little regard to the device
+ * characteristics. Instead, we want to tune the autosuspend based on our
+ * HW knowledge. intel_wakeref_auto() delays the sleep by the supplied
+ * timeout.
+ *
+ * Pass @timeout = 0 to cancel a previous autosuspend by executing the
+ * suspend immediately.
+ */
+void intel_wakeref_auto(struct intel_wakeref_auto *wf, unsigned long timeout);
+
+void intel_wakeref_auto_init(struct intel_wakeref_auto *wf,
+			     struct intel_runtime_pm *rpm);
+void intel_wakeref_auto_fini(struct intel_wakeref_auto *wf);
+
+#endif /* INTEL_WAKEREF_H */
diff --git a/drivers/gpu/drm/i915/intel_wopcm.c b/drivers/gpu/drm/i915/intel_wopcm.c
index f82a415ea2ba..7b4ba84b9fb8 100644
--- a/drivers/gpu/drm/i915/intel_wopcm.c
+++ b/drivers/gpu/drm/i915/intel_wopcm.c
@@ -41,26 +41,27 @@
  * context).
  */
 
-/* Default WOPCM size 1MB. */
-#define GEN9_WOPCM_SIZE			(1024 * 1024)
+/* Default WOPCM size is 2MB from Gen11, 1MB on previous platforms */
+#define GEN11_WOPCM_SIZE		SZ_2M
+#define GEN9_WOPCM_SIZE			SZ_1M
 /* 16KB WOPCM (RSVD WOPCM) is reserved from HuC firmware top. */
-#define WOPCM_RESERVED_SIZE		(16 * 1024)
+#define WOPCM_RESERVED_SIZE		SZ_16K
 
 /* 16KB reserved at the beginning of GuC WOPCM. */
-#define GUC_WOPCM_RESERVED		(16 * 1024)
+#define GUC_WOPCM_RESERVED		SZ_16K
 /* 8KB from GUC_WOPCM_RESERVED is reserved for GuC stack. */
-#define GUC_WOPCM_STACK_RESERVED	(8 * 1024)
+#define GUC_WOPCM_STACK_RESERVED	SZ_8K
 
 /* GuC WOPCM Offset value needs to be aligned to 16KB. */
 #define GUC_WOPCM_OFFSET_ALIGNMENT	(1UL << GUC_WOPCM_OFFSET_SHIFT)
 
 /* 24KB at the end of WOPCM is reserved for RC6 CTX on BXT. */
-#define BXT_WOPCM_RC6_CTX_RESERVED	(24 * 1024)
+#define BXT_WOPCM_RC6_CTX_RESERVED	(SZ_16K + SZ_8K)
 /* 36KB WOPCM reserved at the end of WOPCM on CNL. */
-#define CNL_WOPCM_HW_CTX_RESERVED	(36 * 1024)
+#define CNL_WOPCM_HW_CTX_RESERVED	(SZ_32K + SZ_4K)
 
 /* 128KB from GUC_WOPCM_RESERVED is reserved for FW on Gen9. */
-#define GEN9_GUC_FW_RESERVED	(128 * 1024)
+#define GEN9_GUC_FW_RESERVED	SZ_128K
 #define GEN9_GUC_WOPCM_OFFSET	(GUC_WOPCM_RESERVED + GEN9_GUC_FW_RESERVED)
 
 /**
@@ -71,7 +72,15 @@
  */
 void intel_wopcm_init_early(struct intel_wopcm *wopcm)
 {
-	wopcm->size = GEN9_WOPCM_SIZE;
+	struct drm_i915_private *i915 = wopcm_to_i915(wopcm);
+
+	if (!HAS_GUC(i915))
+		return;
+
+	if (INTEL_GEN(i915) >= 11)
+		wopcm->size = GEN11_WOPCM_SIZE;
+	else
+		wopcm->size = GEN9_WOPCM_SIZE;
 
 	DRM_DEBUG_DRIVER("WOPCM size: %uKiB\n", wopcm->size / 1024);
 }
diff --git a/drivers/gpu/drm/i915/intel_wopcm.h b/drivers/gpu/drm/i915/intel_wopcm.h
index 6298910a384c..114401971520 100644
--- a/drivers/gpu/drm/i915/intel_wopcm.h
+++ b/drivers/gpu/drm/i915/intel_wopcm.h
@@ -24,6 +24,21 @@ struct intel_wopcm {
 	} guc;
 };
 
+/**
+ * intel_wopcm_guc_size()
+ * @wopcm:	intel_wopcm structure
+ *
+ * Returns size of the WOPCM shadowed region.
+ *
+ * Returns:
+ * 0 if GuC is not present or not in use.
+ * Otherwise, the GuC WOPCM size.
+ */
+static inline u32 intel_wopcm_guc_size(struct intel_wopcm *wopcm)
+{
+	return wopcm->guc.size;
+}
+
 void intel_wopcm_init_early(struct intel_wopcm *wopcm);
 int intel_wopcm_init(struct intel_wopcm *wopcm);
 int intel_wopcm_init_hw(struct intel_wopcm *wopcm);
diff --git a/drivers/gpu/drm/i915/selftests/huge_gem_object.h b/drivers/gpu/drm/i915/selftests/huge_gem_object.h
deleted file mode 100644
index a6133a9e8029..000000000000
--- a/drivers/gpu/drm/i915/selftests/huge_gem_object.h
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * Copyright © 2016 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- */
-
-#ifndef __HUGE_GEM_OBJECT_H
-#define __HUGE_GEM_OBJECT_H
-
-struct drm_i915_gem_object *
-huge_gem_object(struct drm_i915_private *i915,
-		phys_addr_t phys_size,
-		dma_addr_t dma_size);
-
-static inline phys_addr_t
-huge_gem_object_phys_size(struct drm_i915_gem_object *obj)
-{
-	return obj->scratch;
-}
-
-static inline dma_addr_t
-huge_gem_object_dma_size(struct drm_i915_gem_object *obj)
-{
-	return obj->base.size;
-}
-
-#endif /* !__HUGE_GEM_OBJECT_H */
diff --git a/drivers/gpu/drm/i915/selftests/i915_active.c b/drivers/gpu/drm/i915/selftests/i915_active.c
index 27d8f853111b..c0b3537a5fa6 100644
--- a/drivers/gpu/drm/i915/selftests/i915_active.c
+++ b/drivers/gpu/drm/i915/selftests/i915_active.c
@@ -4,7 +4,9 @@
  * Copyright © 2018 Intel Corporation
  */
 
-#include "../i915_selftest.h"
+#include "gem/i915_gem_pm.h"
+
+#include "i915_selftest.h"
 
 #include "igt_flush_test.h"
 #include "lib_sw_fence.h"
@@ -46,7 +48,7 @@ static int __live_active_setup(struct drm_i915_private *i915,
 	for_each_engine(engine, i915, id) {
 		struct i915_request *rq;
 
-		rq = i915_request_alloc(engine, i915->kernel_context);
+		rq = i915_request_create(engine->kernel_context);
 		if (IS_ERR(rq)) {
 			err = PTR_ERR(rq);
 			break;
@@ -95,7 +97,7 @@ static int live_active_wait(void *arg)
 	/* Check that we get a callback when requests retire upon waiting */
 
 	mutex_lock(&i915->drm.struct_mutex);
-	wakeref = intel_runtime_pm_get(i915);
+	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
 
 	err = __live_active_setup(i915, &active);
 
@@ -109,7 +111,7 @@ static int live_active_wait(void *arg)
 	if (igt_flush_test(i915, I915_WAIT_LOCKED))
 		err = -EIO;
 
-	intel_runtime_pm_put(i915, wakeref);
+	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
 	mutex_unlock(&i915->drm.struct_mutex);
 	return err;
 }
@@ -124,7 +126,7 @@ static int live_active_retire(void *arg)
 	/* Check that we get a callback when requests are indirectly retired */
 
 	mutex_lock(&i915->drm.struct_mutex);
-	wakeref = intel_runtime_pm_get(i915);
+	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
 
 	err = __live_active_setup(i915, &active);
 
@@ -138,7 +140,7 @@ static int live_active_retire(void *arg)
 	}
 
 	i915_active_fini(&active.base);
-	intel_runtime_pm_put(i915, wakeref);
+	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
 	mutex_unlock(&i915->drm.struct_mutex);
 	return err;
 }
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem.c b/drivers/gpu/drm/i915/selftests/i915_gem.c
index 6fd70d326468..c6a01a6e87f1 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem.c
@@ -6,36 +6,31 @@
 
 #include <linux/random.h>
 
-#include "../i915_selftest.h"
+#include "gem/selftests/igt_gem_utils.h"
+#include "gem/selftests/mock_context.h"
+
+#include "i915_selftest.h"
 
-#include "mock_context.h"
 #include "igt_flush_test.h"
+#include "mock_drm.h"
 
 static int switch_to_context(struct drm_i915_private *i915,
 			     struct i915_gem_context *ctx)
 {
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
-	intel_wakeref_t wakeref;
-	int err = 0;
-
-	wakeref = intel_runtime_pm_get(i915);
 
 	for_each_engine(engine, i915, id) {
 		struct i915_request *rq;
 
-		rq = i915_request_alloc(engine, ctx);
-		if (IS_ERR(rq)) {
-			err = PTR_ERR(rq);
-			break;
-		}
+		rq = igt_request_alloc(ctx, engine);
+		if (IS_ERR(rq))
+			return PTR_ERR(rq);
 
 		i915_request_add(rq);
 	}
 
-	intel_runtime_pm_put(i915, wakeref);
-
-	return err;
+	return 0;
 }
 
 static void trash_stolen(struct drm_i915_private *i915)
@@ -68,7 +63,7 @@ static void simulate_hibernate(struct drm_i915_private *i915)
 {
 	intel_wakeref_t wakeref;
 
-	wakeref = intel_runtime_pm_get(i915);
+	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
 
 	/*
 	 * As a final sting in the tail, invalidate stolen. Under a real S4,
@@ -79,7 +74,7 @@ static void simulate_hibernate(struct drm_i915_private *i915)
 	 */
 	trash_stolen(i915);
 
-	intel_runtime_pm_put(i915, wakeref);
+	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
 }
 
 static int pm_prepare(struct drm_i915_private *i915)
@@ -93,7 +88,7 @@ static void pm_suspend(struct drm_i915_private *i915)
 {
 	intel_wakeref_t wakeref;
 
-	with_intel_runtime_pm(i915, wakeref) {
+	with_intel_runtime_pm(&i915->runtime_pm, wakeref) {
 		i915_gem_suspend_gtt_mappings(i915);
 		i915_gem_suspend_late(i915);
 	}
@@ -103,7 +98,7 @@ static void pm_hibernate(struct drm_i915_private *i915)
 {
 	intel_wakeref_t wakeref;
 
-	with_intel_runtime_pm(i915, wakeref) {
+	with_intel_runtime_pm(&i915->runtime_pm, wakeref) {
 		i915_gem_suspend_gtt_mappings(i915);
 
 		i915_gem_freeze(i915);
@@ -119,8 +114,8 @@ static void pm_resume(struct drm_i915_private *i915)
 	 * Both suspend and hibernate follow the same wakeup path and assume
 	 * that runtime-pm just works.
 	 */
-	with_intel_runtime_pm(i915, wakeref) {
-		intel_engines_sanitize(i915, false);
+	with_intel_runtime_pm(&i915->runtime_pm, wakeref) {
+		intel_gt_sanitize(i915, false);
 		i915_gem_sanitize(i915);
 		i915_gem_resume(i915);
 	}
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
index 89766688e420..a3cb0aade6f1 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
@@ -22,10 +22,14 @@
  *
  */
 
-#include "../i915_selftest.h"
+#include "gem/i915_gem_pm.h"
+#include "gem/selftests/igt_gem_utils.h"
+#include "gem/selftests/mock_context.h"
 
+#include "i915_selftest.h"
+
+#include "igt_flush_test.h"
 #include "lib_sw_fence.h"
-#include "mock_context.h"
 #include "mock_drm.h"
 #include "mock_gem_device.h"
 
@@ -64,20 +68,24 @@ static int populate_ggtt(struct drm_i915_private *i915,
 		count++;
 	}
 
+	bound = 0;
 	unbound = 0;
-	list_for_each_entry(obj, &i915->mm.unbound_list, mm.link)
-		if (obj->mm.quirked)
+	list_for_each_entry(obj, objects, st_link) {
+		GEM_BUG_ON(!obj->mm.quirked);
+
+		if (atomic_read(&obj->bind_count))
+			bound++;
+		else
 			unbound++;
+	}
+	GEM_BUG_ON(bound + unbound != count);
+
 	if (unbound) {
 		pr_err("%s: Found %lu objects unbound, expected %u!\n",
 		       __func__, unbound, 0);
 		return -EINVAL;
 	}
 
-	bound = 0;
-	list_for_each_entry(obj, &i915->mm.bound_list, mm.link)
-		if (obj->mm.quirked)
-			bound++;
 	if (bound != count) {
 		pr_err("%s: Found %lu objects bound, expected %lu!\n",
 		       __func__, bound, count);
@@ -397,7 +405,7 @@ static int igt_evict_contexts(void *arg)
 		return 0;
 
 	mutex_lock(&i915->drm.struct_mutex);
-	wakeref = intel_runtime_pm_get(i915);
+	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
 
 	/* Reserve a block so that we know we have enough to fit a few rq */
 	memset(&hole, 0, sizeof(hole));
@@ -460,7 +468,7 @@ static int igt_evict_contexts(void *arg)
 
 			/* We will need some GGTT space for the rq's context */
 			igt_evict_ctl.fail_if_busy = true;
-			rq = i915_request_alloc(engine, ctx);
+			rq = igt_request_alloc(ctx, engine);
 			igt_evict_ctl.fail_if_busy = false;
 
 			if (IS_ERR(rq)) {
@@ -498,6 +506,8 @@ static int igt_evict_contexts(void *arg)
 
 	mutex_lock(&i915->drm.struct_mutex);
 out_locked:
+	if (igt_flush_test(i915, I915_WAIT_LOCKED))
+		err = -EIO;
 	while (reserved) {
 		struct reserved *next = reserved->next;
 
@@ -508,7 +518,7 @@ out_locked:
 	}
 	if (drm_mm_node_allocated(&hole))
 		drm_mm_remove_node(&hole);
-	intel_runtime_pm_put(i915, wakeref);
+	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
 	mutex_unlock(&i915->drm.struct_mutex);
 
 	return err;
@@ -532,7 +542,7 @@ int i915_gem_evict_mock_selftests(void)
 		return -ENOMEM;
 
 	mutex_lock(&i915->drm.struct_mutex);
-	with_intel_runtime_pm(i915, wakeref)
+	with_intel_runtime_pm(&i915->runtime_pm, wakeref)
 		err = i915_subtests(tests, i915);
 
 	mutex_unlock(&i915->drm.struct_mutex);
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
index 9cca66e4420a..1a60b9fe8221 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
@@ -25,10 +25,11 @@
 #include <linux/list_sort.h>
 #include <linux/prime_numbers.h>
 
-#include "../i915_selftest.h"
+#include "gem/selftests/mock_context.h"
+
 #include "i915_random.h"
+#include "i915_selftest.h"
 
-#include "mock_context.h"
 #include "mock_drm.h"
 #include "mock_gem_device.h"
 
@@ -147,7 +148,7 @@ err:
 static int igt_ppgtt_alloc(void *arg)
 {
 	struct drm_i915_private *dev_priv = arg;
-	struct i915_hw_ppgtt *ppgtt;
+	struct i915_ppgtt *ppgtt;
 	u64 size, last, limit;
 	int err = 0;
 
@@ -156,7 +157,7 @@ static int igt_ppgtt_alloc(void *arg)
 	if (!HAS_PPGTT(dev_priv))
 		return 0;
 
-	ppgtt = __hw_ppgtt_create(dev_priv);
+	ppgtt = __ppgtt_create(dev_priv);
 	if (IS_ERR(ppgtt))
 		return PTR_ERR(ppgtt);
 
@@ -208,7 +209,7 @@ static int igt_ppgtt_alloc(void *arg)
 
 err_ppgtt_cleanup:
 	mutex_lock(&dev_priv->drm.struct_mutex);
-	i915_ppgtt_put(ppgtt);
+	i915_vm_put(&ppgtt->vm);
 	mutex_unlock(&dev_priv->drm.struct_mutex);
 	return err;
 }
@@ -294,9 +295,9 @@ static int lowlevel_hole(struct drm_i915_private *i915,
 			mock_vma.node.size = BIT_ULL(size);
 			mock_vma.node.start = addr;
 
-			wakeref = intel_runtime_pm_get(i915);
+			wakeref = intel_runtime_pm_get(&i915->runtime_pm);
 			vm->insert_entries(vm, &mock_vma, I915_CACHE_NONE, 0);
-			intel_runtime_pm_put(i915, wakeref);
+			intel_runtime_pm_put(&i915->runtime_pm, wakeref);
 		}
 		count = n;
 
@@ -998,7 +999,7 @@ static int exercise_ppgtt(struct drm_i915_private *dev_priv,
 				      unsigned long end_time))
 {
 	struct drm_file *file;
-	struct i915_hw_ppgtt *ppgtt;
+	struct i915_ppgtt *ppgtt;
 	IGT_TIMEOUT(end_time);
 	int err;
 
@@ -1020,7 +1021,7 @@ static int exercise_ppgtt(struct drm_i915_private *dev_priv,
 
 	err = func(dev_priv, &ppgtt->vm, 0, ppgtt->vm.total, end_time);
 
-	i915_ppgtt_put(ppgtt);
+	i915_vm_put(&ppgtt->vm);
 out_unlock:
 	mutex_unlock(&dev_priv->drm.struct_mutex);
 
@@ -1170,7 +1171,7 @@ static int igt_ggtt_page(void *arg)
 	if (err)
 		goto out_unpin;
 
-	wakeref = intel_runtime_pm_get(i915);
+	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
 
 	for (n = 0; n < count; n++) {
 		u64 offset = tmp.start + n * PAGE_SIZE;
@@ -1217,7 +1218,7 @@ static int igt_ggtt_page(void *arg)
 	kfree(order);
 out_remove:
 	ggtt->vm.clear_range(&ggtt->vm, tmp.start, tmp.size);
-	intel_runtime_pm_put(i915, wakeref);
+	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
 	drm_mm_remove_node(&tmp);
 out_unpin:
 	i915_gem_object_unpin_pages(obj);
@@ -1232,7 +1233,7 @@ static void track_vma_bind(struct i915_vma *vma)
 {
 	struct drm_i915_gem_object *obj = vma->obj;
 
-	obj->bind_count++; /* track for eviction later */
+	atomic_inc(&obj->bind_count); /* track for eviction later */
 	__i915_gem_object_pin_pages(obj);
 
 	vma->pages = obj->mm.pages;
@@ -1250,7 +1251,6 @@ static int exercise_mock(struct drm_i915_private *i915,
 {
 	const u64 limit = totalram_pages() << PAGE_SHIFT;
 	struct i915_gem_context *ctx;
-	struct i915_hw_ppgtt *ppgtt;
 	IGT_TIMEOUT(end_time);
 	int err;
 
@@ -1258,10 +1258,7 @@ static int exercise_mock(struct drm_i915_private *i915,
 	if (!ctx)
 		return -ENOMEM;
 
-	ppgtt = ctx->ppgtt;
-	GEM_BUG_ON(!ppgtt);
-
-	err = func(i915, &ppgtt->vm, 0, min(ppgtt->vm.total, limit), end_time);
+	err = func(i915, ctx->vm, 0, min(ctx->vm->total, limit), end_time);
 
 	mock_context_close(ctx);
 	return err;
diff --git a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h
index 6d766925ad04..d5dc4427d664 100644
--- a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h
+++ b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h
@@ -16,13 +16,18 @@ selftest(timelines, i915_timeline_live_selftests)
 selftest(requests, i915_request_live_selftests)
 selftest(active, i915_active_live_selftests)
 selftest(objects, i915_gem_object_live_selftests)
+selftest(mman, i915_gem_mman_live_selftests)
 selftest(dmabuf, i915_gem_dmabuf_live_selftests)
+selftest(vma, i915_vma_live_selftests)
 selftest(coherency, i915_gem_coherency_live_selftests)
 selftest(gtt, i915_gem_gtt_live_selftests)
 selftest(gem, i915_gem_live_selftests)
 selftest(evict, i915_gem_evict_live_selftests)
 selftest(hugepages, i915_gem_huge_page_live_selftests)
 selftest(contexts, i915_gem_context_live_selftests)
+selftest(blt, i915_gem_object_blt_live_selftests)
+selftest(client, i915_gem_client_blt_live_selftests)
+selftest(reset, intel_reset_live_selftests)
 selftest(hangcheck, intel_hangcheck_live_selftests)
 selftest(execlists, intel_execlists_live_selftests)
 selftest(guc, intel_guc_live_selftest)
diff --git a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
index 88e5ab586337..510eb176bb2c 100644
--- a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
+++ b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
@@ -18,6 +18,7 @@ selftest(engine, intel_engine_cs_mock_selftests)
 selftest(timelines, i915_timeline_mock_selftests)
 selftest(requests, i915_request_mock_selftests)
 selftest(objects, i915_gem_object_mock_selftests)
+selftest(phys, i915_gem_phys_mock_selftests)
 selftest(dmabuf, i915_gem_dmabuf_mock_selftests)
 selftest(vma, i915_vma_mock_selftests)
 selftest(evict, i915_gem_evict_mock_selftests)
diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c
index e6ffe2240126..298bb7116c51 100644
--- a/drivers/gpu/drm/i915/selftests/i915_request.c
+++ b/drivers/gpu/drm/i915/selftests/i915_request.c
@@ -24,12 +24,14 @@
 
 #include <linux/prime_numbers.h>
 
-#include "../i915_selftest.h"
+#include "gem/i915_gem_pm.h"
+#include "gem/selftests/mock_context.h"
+
 #include "i915_random.h"
+#include "i915_selftest.h"
 #include "igt_live_test.h"
 #include "lib_sw_fence.h"
 
-#include "mock_context.h"
 #include "mock_drm.h"
 #include "mock_gem_device.h"
 
@@ -72,12 +74,12 @@ static int igt_wait_request(void *arg)
 		goto out_unlock;
 	}
 
-	if (i915_request_wait(request, I915_WAIT_LOCKED, 0) != -ETIME) {
+	if (i915_request_wait(request, 0, 0) != -ETIME) {
 		pr_err("request wait (busy query) succeeded (expected timeout before submit!)\n");
 		goto out_unlock;
 	}
 
-	if (i915_request_wait(request, I915_WAIT_LOCKED, T) != -ETIME) {
+	if (i915_request_wait(request, 0, T) != -ETIME) {
 		pr_err("request wait succeeded (expected timeout before submit!)\n");
 		goto out_unlock;
 	}
@@ -89,7 +91,7 @@ static int igt_wait_request(void *arg)
 
 	i915_request_add(request);
 
-	if (i915_request_wait(request, I915_WAIT_LOCKED, 0) != -ETIME) {
+	if (i915_request_wait(request, 0, 0) != -ETIME) {
 		pr_err("request wait (busy query) succeeded (expected timeout after submit!)\n");
 		goto out_unlock;
 	}
@@ -99,12 +101,12 @@ static int igt_wait_request(void *arg)
 		goto out_unlock;
 	}
 
-	if (i915_request_wait(request, I915_WAIT_LOCKED, T / 2) != -ETIME) {
+	if (i915_request_wait(request, 0, T / 2) != -ETIME) {
 		pr_err("request wait succeeded (expected timeout!)\n");
 		goto out_unlock;
 	}
 
-	if (i915_request_wait(request, I915_WAIT_LOCKED, T) == -ETIME) {
+	if (i915_request_wait(request, 0, T) == -ETIME) {
 		pr_err("request wait timed out!\n");
 		goto out_unlock;
 	}
@@ -114,7 +116,7 @@ static int igt_wait_request(void *arg)
 		goto out_unlock;
 	}
 
-	if (i915_request_wait(request, I915_WAIT_LOCKED, T) == -ETIME) {
+	if (i915_request_wait(request, 0, T) == -ETIME) {
 		pr_err("request wait timed out when already complete!\n");
 		goto out_unlock;
 	}
@@ -267,7 +269,7 @@ static struct i915_request *
 __live_request_alloc(struct i915_gem_context *ctx,
 		     struct intel_engine_cs *engine)
 {
-	return i915_request_alloc(engine, ctx);
+	return igt_request_alloc(ctx, engine);
 }
 
 static int __igt_breadcrumbs_smoketest(void *arg)
@@ -512,7 +514,7 @@ int i915_request_mock_selftests(void)
 	if (!i915)
 		return -ENOMEM;
 
-	with_intel_runtime_pm(i915, wakeref)
+	with_intel_runtime_pm(&i915->runtime_pm, wakeref)
 		err = i915_subtests(tests, i915);
 
 	drm_dev_put(&i915->drm);
@@ -535,7 +537,7 @@ static int live_nop_request(void *arg)
 	 */
 
 	mutex_lock(&i915->drm.struct_mutex);
-	wakeref = intel_runtime_pm_get(i915);
+	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
 
 	for_each_engine(engine, i915, id) {
 		struct i915_request *request = NULL;
@@ -551,8 +553,7 @@ static int live_nop_request(void *arg)
 			times[1] = ktime_get_raw();
 
 			for (n = 0; n < prime; n++) {
-				request = i915_request_alloc(engine,
-							     i915->kernel_context);
+				request = i915_request_create(engine->kernel_context);
 				if (IS_ERR(request)) {
 					err = PTR_ERR(request);
 					goto out_unlock;
@@ -573,9 +574,7 @@ static int live_nop_request(void *arg)
 
 				i915_request_add(request);
 			}
-			i915_request_wait(request,
-					  I915_WAIT_LOCKED,
-					  MAX_SCHEDULE_TIMEOUT);
+			i915_request_wait(request, 0, MAX_SCHEDULE_TIMEOUT);
 
 			times[1] = ktime_sub(ktime_get_raw(), times[1]);
 			if (prime == 1)
@@ -596,7 +595,7 @@ static int live_nop_request(void *arg)
 	}
 
 out_unlock:
-	intel_runtime_pm_put(i915, wakeref);
+	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
 	mutex_unlock(&i915->drm.struct_mutex);
 	return err;
 }
@@ -649,7 +648,7 @@ empty_request(struct intel_engine_cs *engine,
 	struct i915_request *request;
 	int err;
 
-	request = i915_request_alloc(engine, engine->i915->kernel_context);
+	request = i915_request_create(engine->kernel_context);
 	if (IS_ERR(request))
 		return request;
 
@@ -681,7 +680,7 @@ static int live_empty_request(void *arg)
 	 */
 
 	mutex_lock(&i915->drm.struct_mutex);
-	wakeref = intel_runtime_pm_get(i915);
+	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
 
 	batch = empty_batch(i915);
 	if (IS_ERR(batch)) {
@@ -705,9 +704,7 @@ static int live_empty_request(void *arg)
 			err = PTR_ERR(request);
 			goto out_batch;
 		}
-		i915_request_wait(request,
-				  I915_WAIT_LOCKED,
-				  MAX_SCHEDULE_TIMEOUT);
+		i915_request_wait(request, 0, MAX_SCHEDULE_TIMEOUT);
 
 		for_each_prime_number_from(prime, 1, 8192) {
 			times[1] = ktime_get_raw();
@@ -719,9 +716,7 @@ static int live_empty_request(void *arg)
 					goto out_batch;
 				}
 			}
-			i915_request_wait(request,
-					  I915_WAIT_LOCKED,
-					  MAX_SCHEDULE_TIMEOUT);
+			i915_request_wait(request, 0, MAX_SCHEDULE_TIMEOUT);
 
 			times[1] = ktime_sub(ktime_get_raw(), times[1]);
 			if (prime == 1)
@@ -745,7 +740,7 @@ out_batch:
 	i915_vma_unpin(batch);
 	i915_vma_put(batch);
 out_unlock:
-	intel_runtime_pm_put(i915, wakeref);
+	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
 	mutex_unlock(&i915->drm.struct_mutex);
 	return err;
 }
@@ -753,8 +748,7 @@ out_unlock:
 static struct i915_vma *recursive_batch(struct drm_i915_private *i915)
 {
 	struct i915_gem_context *ctx = i915->kernel_context;
-	struct i915_address_space *vm =
-		ctx->ppgtt ? &ctx->ppgtt->vm : &i915->ggtt.vm;
+	struct i915_address_space *vm = ctx->vm ?: &i915->ggtt.vm;
 	struct drm_i915_gem_object *obj;
 	const int gen = INTEL_GEN(i915);
 	struct i915_vma *vma;
@@ -839,7 +833,7 @@ static int live_all_engines(void *arg)
 	 */
 
 	mutex_lock(&i915->drm.struct_mutex);
-	wakeref = intel_runtime_pm_get(i915);
+	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
 
 	err = igt_live_test_begin(&t, i915, __func__, "");
 	if (err)
@@ -853,7 +847,7 @@ static int live_all_engines(void *arg)
 	}
 
 	for_each_engine(engine, i915, id) {
-		request[id] = i915_request_alloc(engine, i915->kernel_context);
+		request[id] = i915_request_create(engine->kernel_context);
 		if (IS_ERR(request[id])) {
 			err = PTR_ERR(request[id]);
 			pr_err("%s: Request allocation failed with err=%d\n",
@@ -868,12 +862,9 @@ static int live_all_engines(void *arg)
 		GEM_BUG_ON(err);
 		request[id]->batch = batch;
 
-		if (!i915_gem_object_has_active_reference(batch->obj)) {
-			i915_gem_object_get(batch->obj);
-			i915_gem_object_set_active_reference(batch->obj);
-		}
-
+		i915_vma_lock(batch);
 		err = i915_vma_move_to_active(batch, request[id], 0);
+		i915_vma_unlock(batch);
 		GEM_BUG_ON(err);
 
 		i915_request_get(request[id]);
@@ -898,8 +889,7 @@ static int live_all_engines(void *arg)
 	for_each_engine(engine, i915, id) {
 		long timeout;
 
-		timeout = i915_request_wait(request[id],
-					    I915_WAIT_LOCKED,
+		timeout = i915_request_wait(request[id], 0,
 					    MAX_SCHEDULE_TIMEOUT);
 		if (timeout < 0) {
 			err = timeout;
@@ -922,7 +912,7 @@ out_request:
 	i915_vma_unpin(batch);
 	i915_vma_put(batch);
 out_unlock:
-	intel_runtime_pm_put(i915, wakeref);
+	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
 	mutex_unlock(&i915->drm.struct_mutex);
 	return err;
 }
@@ -945,7 +935,7 @@ static int live_sequential_engines(void *arg)
 	 */
 
 	mutex_lock(&i915->drm.struct_mutex);
-	wakeref = intel_runtime_pm_get(i915);
+	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
 
 	err = igt_live_test_begin(&t, i915, __func__, "");
 	if (err)
@@ -962,7 +952,7 @@ static int live_sequential_engines(void *arg)
 			goto out_unlock;
 		}
 
-		request[id] = i915_request_alloc(engine, i915->kernel_context);
+		request[id] = i915_request_create(engine->kernel_context);
 		if (IS_ERR(request[id])) {
 			err = PTR_ERR(request[id]);
 			pr_err("%s: Request allocation failed for %s with err=%d\n",
@@ -988,12 +978,11 @@ static int live_sequential_engines(void *arg)
 		GEM_BUG_ON(err);
 		request[id]->batch = batch;
 
+		i915_vma_lock(batch);
 		err = i915_vma_move_to_active(batch, request[id], 0);
+		i915_vma_unlock(batch);
 		GEM_BUG_ON(err);
 
-		i915_gem_object_set_active_reference(batch->obj);
-		i915_vma_get(batch);
-
 		i915_request_get(request[id]);
 		i915_request_add(request[id]);
 
@@ -1017,8 +1006,7 @@ static int live_sequential_engines(void *arg)
 			goto out_request;
 		}
 
-		timeout = i915_request_wait(request[id],
-					    I915_WAIT_LOCKED,
+		timeout = i915_request_wait(request[id], 0,
 					    MAX_SCHEDULE_TIMEOUT);
 		if (timeout < 0) {
 			err = timeout;
@@ -1052,7 +1040,7 @@ out_request:
 		i915_request_put(request[id]);
 	}
 out_unlock:
-	intel_runtime_pm_put(i915, wakeref);
+	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
 	mutex_unlock(&i915->drm.struct_mutex);
 	return err;
 }
@@ -1075,7 +1063,7 @@ max_batches(struct i915_gem_context *ctx, struct intel_engine_cs *engine)
 	if (HAS_EXECLISTS(ctx->i915))
 		return INT_MAX;
 
-	rq = i915_request_alloc(engine, ctx);
+	rq = igt_request_alloc(ctx, engine);
 	if (IS_ERR(rq)) {
 		ret = PTR_ERR(rq);
 	} else {
@@ -1117,7 +1105,7 @@ static int live_breadcrumbs_smoketest(void *arg)
 	 * On real hardware this time.
 	 */
 
-	wakeref = intel_runtime_pm_get(i915);
+	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
 
 	file = mock_file(i915);
 	if (IS_ERR(file)) {
@@ -1224,7 +1212,7 @@ out_threads:
 out_file:
 	mock_file_free(i915, file);
 out_rpm:
-	intel_runtime_pm_put(i915, wakeref);
+	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
 
 	return ret;
 }
diff --git a/drivers/gpu/drm/i915/selftests/i915_timeline.c b/drivers/gpu/drm/i915/selftests/i915_timeline.c
index bd96afcadfe7..76d3977f1d4b 100644
--- a/drivers/gpu/drm/i915/selftests/i915_timeline.c
+++ b/drivers/gpu/drm/i915/selftests/i915_timeline.c
@@ -6,8 +6,10 @@
 
 #include <linux/prime_numbers.h>
 
-#include "../i915_selftest.h"
+#include "gem/i915_gem_pm.h"
+
 #include "i915_random.h"
+#include "i915_selftest.h"
 
 #include "igt_flush_test.h"
 #include "mock_gem_device.h"
@@ -454,7 +456,7 @@ tl_write(struct i915_timeline *tl, struct intel_engine_cs *engine, u32 value)
 		goto out;
 	}
 
-	rq = i915_request_alloc(engine, engine->i915->kernel_context);
+	rq = i915_request_create(engine->kernel_context);
 	if (IS_ERR(rq))
 		goto out_unpin;
 
@@ -513,7 +515,7 @@ static int live_hwsp_engine(void *arg)
 		return -ENOMEM;
 
 	mutex_lock(&i915->drm.struct_mutex);
-	wakeref = intel_runtime_pm_get(i915);
+	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
 
 	count = 0;
 	for_each_engine(engine, i915, id) {
@@ -556,7 +558,7 @@ out:
 		i915_timeline_put(tl);
 	}
 
-	intel_runtime_pm_put(i915, wakeref);
+	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
 	mutex_unlock(&i915->drm.struct_mutex);
 
 	kvfree(timelines);
@@ -589,7 +591,7 @@ static int live_hwsp_alternate(void *arg)
 		return -ENOMEM;
 
 	mutex_lock(&i915->drm.struct_mutex);
-	wakeref = intel_runtime_pm_get(i915);
+	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
 
 	count = 0;
 	for (n = 0; n < NUM_TIMELINES; n++) {
@@ -632,7 +634,7 @@ out:
 		i915_timeline_put(tl);
 	}
 
-	intel_runtime_pm_put(i915, wakeref);
+	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
 	mutex_unlock(&i915->drm.struct_mutex);
 
 	kvfree(timelines);
@@ -656,7 +658,7 @@ static int live_hwsp_wrap(void *arg)
 	 */
 
 	mutex_lock(&i915->drm.struct_mutex);
-	wakeref = intel_runtime_pm_get(i915);
+	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
 
 	tl = i915_timeline_create(i915, NULL);
 	if (IS_ERR(tl)) {
@@ -678,7 +680,7 @@ static int live_hwsp_wrap(void *arg)
 		if (!intel_engine_can_store_dword(engine))
 			continue;
 
-		rq = i915_request_alloc(engine, i915->kernel_context);
+		rq = i915_request_create(engine->kernel_context);
 		if (IS_ERR(rq)) {
 			err = PTR_ERR(rq);
 			goto out;
@@ -722,7 +724,7 @@ static int live_hwsp_wrap(void *arg)
 
 		i915_request_add(rq);
 
-		if (i915_request_wait(rq, I915_WAIT_LOCKED, HZ / 5) < 0) {
+		if (i915_request_wait(rq, 0, HZ / 5) < 0) {
 			pr_err("Wait for timeline writes timed out!\n");
 			err = -EIO;
 			goto out;
@@ -747,7 +749,7 @@ out:
 out_free:
 	i915_timeline_put(tl);
 out_rpm:
-	intel_runtime_pm_put(i915, wakeref);
+	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
 	mutex_unlock(&i915->drm.struct_mutex);
 
 	return err;
@@ -769,7 +771,7 @@ static int live_hwsp_recycle(void *arg)
 	 */
 
 	mutex_lock(&i915->drm.struct_mutex);
-	wakeref = intel_runtime_pm_get(i915);
+	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
 
 	count = 0;
 	for_each_engine(engine, i915, id) {
@@ -795,9 +797,7 @@ static int live_hwsp_recycle(void *arg)
 				goto out;
 			}
 
-			if (i915_request_wait(rq,
-					      I915_WAIT_LOCKED,
-					      HZ / 5) < 0) {
+			if (i915_request_wait(rq, 0, HZ / 5) < 0) {
 				pr_err("Wait for timeline writes timed out!\n");
 				i915_timeline_put(tl);
 				err = -EIO;
@@ -823,7 +823,7 @@ static int live_hwsp_recycle(void *arg)
 out:
 	if (igt_flush_test(i915, I915_WAIT_LOCKED))
 		err = -EIO;
-	intel_runtime_pm_put(i915, wakeref);
+	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
 	mutex_unlock(&i915->drm.struct_mutex);
 
 	return err;
diff --git a/drivers/gpu/drm/i915/selftests/i915_vma.c b/drivers/gpu/drm/i915/selftests/i915_vma.c
index fc594b030f5a..fbc79b14823a 100644
--- a/drivers/gpu/drm/i915/selftests/i915_vma.c
+++ b/drivers/gpu/drm/i915/selftests/i915_vma.c
@@ -24,10 +24,12 @@
 
 #include <linux/prime_numbers.h>
 
-#include "../i915_selftest.h"
+#include "gem/selftests/mock_context.h"
+
+#include "i915_scatterlist.h"
+#include "i915_selftest.h"
 
 #include "mock_gem_device.h"
-#include "mock_context.h"
 #include "mock_gtt.h"
 
 static bool assert_vma(struct i915_vma *vma,
@@ -36,7 +38,7 @@ static bool assert_vma(struct i915_vma *vma,
 {
 	bool ok = true;
 
-	if (vma->vm != &ctx->ppgtt->vm) {
+	if (vma->vm != ctx->vm) {
 		pr_err("VMA created with wrong VM\n");
 		ok = false;
 	}
@@ -59,7 +61,7 @@ static bool assert_vma(struct i915_vma *vma,
 static struct i915_vma *
 checked_vma_instance(struct drm_i915_gem_object *obj,
 		     struct i915_address_space *vm,
-		     struct i915_ggtt_view *view)
+		     const struct i915_ggtt_view *view)
 {
 	struct i915_vma *vma;
 	bool ok = true;
@@ -111,7 +113,7 @@ static int create_vmas(struct drm_i915_private *i915,
 	list_for_each_entry(obj, objects, st_link) {
 		for (pinned = 0; pinned <= 1; pinned++) {
 			list_for_each_entry(ctx, contexts, link) {
-				struct i915_address_space *vm = &ctx->ppgtt->vm;
+				struct i915_address_space *vm = ctx->vm;
 				struct i915_vma *vma;
 				int err;
 
@@ -397,18 +399,79 @@ assert_rotated(struct drm_i915_gem_object *obj,
 	return sg;
 }
 
-static unsigned int rotated_size(const struct intel_rotation_plane_info *a,
-				 const struct intel_rotation_plane_info *b)
+static unsigned long remapped_index(const struct intel_remapped_info *r,
+				    unsigned int n,
+				    unsigned int x,
+				    unsigned int y)
+{
+	return (r->plane[n].stride * y +
+		r->plane[n].offset + x);
+}
+
+static struct scatterlist *
+assert_remapped(struct drm_i915_gem_object *obj,
+		const struct intel_remapped_info *r, unsigned int n,
+		struct scatterlist *sg)
+{
+	unsigned int x, y;
+	unsigned int left = 0;
+	unsigned int offset;
+
+	for (y = 0; y < r->plane[n].height; y++) {
+		for (x = 0; x < r->plane[n].width; x++) {
+			unsigned long src_idx;
+			dma_addr_t src;
+
+			if (!sg) {
+				pr_err("Invalid sg table: too short at plane %d, (%d, %d)!\n",
+				       n, x, y);
+				return ERR_PTR(-EINVAL);
+			}
+			if (!left) {
+				offset = 0;
+				left = sg_dma_len(sg);
+			}
+
+			src_idx = remapped_index(r, n, x, y);
+			src = i915_gem_object_get_dma_address(obj, src_idx);
+
+			if (left < PAGE_SIZE || left & (PAGE_SIZE-1)) {
+				pr_err("Invalid sg.length, found %d, expected %lu for remapped page (%d, %d) [src index %lu]\n",
+				       sg_dma_len(sg), PAGE_SIZE,
+				       x, y, src_idx);
+				return ERR_PTR(-EINVAL);
+			}
+
+			if (sg_dma_address(sg) + offset != src) {
+				pr_err("Invalid address for remapped page (%d, %d) [src index %lu]\n",
+				       x, y, src_idx);
+				return ERR_PTR(-EINVAL);
+			}
+
+			left -= PAGE_SIZE;
+			offset += PAGE_SIZE;
+
+
+			if (!left)
+				sg = sg_next(sg);
+		}
+	}
+
+	return sg;
+}
+
+static unsigned int rotated_size(const struct intel_remapped_plane_info *a,
+				 const struct intel_remapped_plane_info *b)
 {
 	return a->width * a->height + b->width * b->height;
 }
 
-static int igt_vma_rotate(void *arg)
+static int igt_vma_rotate_remap(void *arg)
 {
 	struct i915_ggtt *ggtt = arg;
 	struct i915_address_space *vm = &ggtt->vm;
 	struct drm_i915_gem_object *obj;
-	const struct intel_rotation_plane_info planes[] = {
+	const struct intel_remapped_plane_info planes[] = {
 		{ .width = 1, .height = 1, .stride = 1 },
 		{ .width = 2, .height = 2, .stride = 2 },
 		{ .width = 4, .height = 4, .stride = 4 },
@@ -426,6 +489,11 @@ static int igt_vma_rotate(void *arg)
 		{ .width = 6, .height = 4, .stride = 6 },
 		{ }
 	}, *a, *b;
+	enum i915_ggtt_view_type types[] = {
+		I915_GGTT_VIEW_ROTATED,
+		I915_GGTT_VIEW_REMAPPED,
+		0,
+	}, *t;
 	const unsigned int max_pages = 64;
 	int err = -ENOMEM;
 
@@ -437,6 +505,7 @@ static int igt_vma_rotate(void *arg)
 	if (IS_ERR(obj))
 		goto out;
 
+	for (t = types; *t; t++) {
 	for (a = planes; a->width; a++) {
 		for (b = planes + ARRAY_SIZE(planes); b-- != planes; ) {
 			struct i915_ggtt_view view;
@@ -447,7 +516,7 @@ static int igt_vma_rotate(void *arg)
 			GEM_BUG_ON(max_offset > max_pages);
 			max_offset = max_pages - max_offset;
 
-			view.type = I915_GGTT_VIEW_ROTATED;
+			view.type = *t;
 			view.rotated.plane[0] = *a;
 			view.rotated.plane[1] = *b;
 
@@ -468,14 +537,23 @@ static int igt_vma_rotate(void *arg)
 						goto out_object;
 					}
 
-					if (vma->size != rotated_size(a, b) * PAGE_SIZE) {
+					if (view.type == I915_GGTT_VIEW_ROTATED &&
+					    vma->size != rotated_size(a, b) * PAGE_SIZE) {
+						pr_err("VMA is wrong size, expected %lu, found %llu\n",
+						       PAGE_SIZE * rotated_size(a, b), vma->size);
+						err = -EINVAL;
+						goto out_object;
+					}
+
+					if (view.type == I915_GGTT_VIEW_REMAPPED &&
+					    vma->size > rotated_size(a, b) * PAGE_SIZE) {
 						pr_err("VMA is wrong size, expected %lu, found %llu\n",
 						       PAGE_SIZE * rotated_size(a, b), vma->size);
 						err = -EINVAL;
 						goto out_object;
 					}
 
-					if (vma->pages->nents != rotated_size(a, b)) {
+					if (vma->pages->nents > rotated_size(a, b)) {
 						pr_err("sg table is wrong sizeo, expected %u, found %u nents\n",
 						       rotated_size(a, b), vma->pages->nents);
 						err = -EINVAL;
@@ -497,9 +575,14 @@ static int igt_vma_rotate(void *arg)
 
 					sg = vma->pages->sgl;
 					for (n = 0; n < ARRAY_SIZE(view.rotated.plane); n++) {
-						sg = assert_rotated(obj, &view.rotated, n, sg);
+						if (view.type == I915_GGTT_VIEW_ROTATED)
+							sg = assert_rotated(obj, &view.rotated, n, sg);
+						else
+							sg = assert_remapped(obj, &view.remapped, n, sg);
 						if (IS_ERR(sg)) {
-							pr_err("Inconsistent VMA pages for plane %d: [(%d, %d, %d, %d), (%d, %d, %d, %d)]\n", n,
+							pr_err("Inconsistent %s VMA pages for plane %d: [(%d, %d, %d, %d), (%d, %d, %d, %d)]\n",
+							       view.type == I915_GGTT_VIEW_ROTATED ?
+							       "rotated" : "remapped", n,
 							       view.rotated.plane[0].width,
 							       view.rotated.plane[0].height,
 							       view.rotated.plane[0].stride,
@@ -518,6 +601,7 @@ static int igt_vma_rotate(void *arg)
 			}
 		}
 	}
+	}
 
 out_object:
 	i915_gem_object_put(obj);
@@ -721,7 +805,7 @@ int i915_vma_mock_selftests(void)
 	static const struct i915_subtest tests[] = {
 		SUBTEST(igt_vma_create),
 		SUBTEST(igt_vma_pin1),
-		SUBTEST(igt_vma_rotate),
+		SUBTEST(igt_vma_rotate_remap),
 		SUBTEST(igt_vma_partial),
 	};
 	struct drm_i915_private *i915;
@@ -752,3 +836,147 @@ out_put:
 	drm_dev_put(&i915->drm);
 	return err;
 }
+
+static int igt_vma_remapped_gtt(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	const struct intel_remapped_plane_info planes[] = {
+		{ .width = 1, .height = 1, .stride = 1 },
+		{ .width = 2, .height = 2, .stride = 2 },
+		{ .width = 4, .height = 4, .stride = 4 },
+		{ .width = 8, .height = 8, .stride = 8 },
+
+		{ .width = 3, .height = 5, .stride = 3 },
+		{ .width = 3, .height = 5, .stride = 4 },
+		{ .width = 3, .height = 5, .stride = 5 },
+
+		{ .width = 5, .height = 3, .stride = 5 },
+		{ .width = 5, .height = 3, .stride = 7 },
+		{ .width = 5, .height = 3, .stride = 9 },
+
+		{ .width = 4, .height = 6, .stride = 6 },
+		{ .width = 6, .height = 4, .stride = 6 },
+		{ }
+	}, *p;
+	enum i915_ggtt_view_type types[] = {
+		I915_GGTT_VIEW_ROTATED,
+		I915_GGTT_VIEW_REMAPPED,
+		0,
+	}, *t;
+	struct drm_i915_gem_object *obj;
+	intel_wakeref_t wakeref;
+	int err = 0;
+
+	obj = i915_gem_object_create_internal(i915, 10 * 10 * PAGE_SIZE);
+	if (IS_ERR(obj))
+		return PTR_ERR(obj);
+
+	mutex_lock(&i915->drm.struct_mutex);
+
+	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
+
+	for (t = types; *t; t++) {
+		for (p = planes; p->width; p++) {
+			struct i915_ggtt_view view = {
+				.type = *t,
+				.rotated.plane[0] = *p,
+			};
+			struct i915_vma *vma;
+			u32 __iomem *map;
+			unsigned int x, y;
+			int err;
+
+			i915_gem_object_lock(obj);
+			err = i915_gem_object_set_to_gtt_domain(obj, true);
+			i915_gem_object_unlock(obj);
+			if (err)
+				goto out;
+
+			vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, PIN_MAPPABLE);
+			if (IS_ERR(vma)) {
+				err = PTR_ERR(vma);
+				goto out;
+			}
+
+			GEM_BUG_ON(vma->ggtt_view.type != *t);
+
+			map = i915_vma_pin_iomap(vma);
+			i915_vma_unpin(vma);
+			if (IS_ERR(map)) {
+				err = PTR_ERR(map);
+				goto out;
+			}
+
+			for (y = 0 ; y < p->height; y++) {
+				for (x = 0 ; x < p->width; x++) {
+					unsigned int offset;
+					u32 val = y << 16 | x;
+
+					if (*t == I915_GGTT_VIEW_ROTATED)
+						offset = (x * p->height + y) * PAGE_SIZE;
+					else
+						offset = (y * p->width + x) * PAGE_SIZE;
+
+					iowrite32(val, &map[offset / sizeof(*map)]);
+				}
+			}
+
+			i915_vma_unpin_iomap(vma);
+
+			vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE);
+			if (IS_ERR(vma)) {
+				err = PTR_ERR(vma);
+				goto out;
+			}
+
+			GEM_BUG_ON(vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL);
+
+			map = i915_vma_pin_iomap(vma);
+			i915_vma_unpin(vma);
+			if (IS_ERR(map)) {
+				err = PTR_ERR(map);
+				goto out;
+			}
+
+			for (y = 0 ; y < p->height; y++) {
+				for (x = 0 ; x < p->width; x++) {
+					unsigned int offset, src_idx;
+					u32 exp = y << 16 | x;
+					u32 val;
+
+					if (*t == I915_GGTT_VIEW_ROTATED)
+						src_idx = rotated_index(&view.rotated, 0, x, y);
+					else
+						src_idx = remapped_index(&view.remapped, 0, x, y);
+					offset = src_idx * PAGE_SIZE;
+
+					val = ioread32(&map[offset / sizeof(*map)]);
+					if (val != exp) {
+						pr_err("%s VMA write test failed, expected 0x%x, found 0x%x\n",
+						       *t == I915_GGTT_VIEW_ROTATED ? "Rotated" : "Remapped",
+						       val, exp);
+						i915_vma_unpin_iomap(vma);
+						goto out;
+					}
+				}
+			}
+			i915_vma_unpin_iomap(vma);
+		}
+	}
+
+out:
+	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
+	mutex_unlock(&i915->drm.struct_mutex);
+	i915_gem_object_put(obj);
+
+	return err;
+}
+
+int i915_vma_live_selftests(struct drm_i915_private *i915)
+{
+	static const struct i915_subtest tests[] = {
+		SUBTEST(igt_vma_remapped_gtt),
+	};
+
+	return i915_subtests(tests, i915);
+}
diff --git a/drivers/gpu/drm/i915/selftests/igt_atomic.h b/drivers/gpu/drm/i915/selftests/igt_atomic.h
new file mode 100644
index 000000000000..93ec89f487ec
--- /dev/null
+++ b/drivers/gpu/drm/i915/selftests/igt_atomic.h
@@ -0,0 +1,56 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2018 Intel Corporation
+ */
+
+#ifndef IGT_ATOMIC_H
+#define IGT_ATOMIC_H
+
+#include <linux/preempt.h>
+#include <linux/bottom_half.h>
+#include <linux/irqflags.h>
+
+static void __preempt_begin(void)
+{
+	preempt_disable();
+}
+
+static void __preempt_end(void)
+{
+	preempt_enable();
+}
+
+static void __softirq_begin(void)
+{
+	local_bh_disable();
+}
+
+static void __softirq_end(void)
+{
+	local_bh_enable();
+}
+
+static void __hardirq_begin(void)
+{
+	local_irq_disable();
+}
+
+static void __hardirq_end(void)
+{
+	local_irq_enable();
+}
+
+struct igt_atomic_section {
+	const char *name;
+	void (*critical_section_begin)(void);
+	void (*critical_section_end)(void);
+};
+
+static const struct igt_atomic_section igt_atomic_phases[] = {
+	{ "preempt", __preempt_begin, __preempt_end },
+	{ "softirq", __softirq_begin, __softirq_end },
+	{ "hardirq", __hardirq_begin, __hardirq_end },
+	{ }
+};
+
+#endif /* IGT_ATOMIC_H */
diff --git a/drivers/gpu/drm/i915/selftests/igt_flush_test.c b/drivers/gpu/drm/i915/selftests/igt_flush_test.c
index 94aee4071a66..5bfd1b2626a2 100644
--- a/drivers/gpu/drm/i915/selftests/igt_flush_test.c
+++ b/drivers/gpu/drm/i915/selftests/igt_flush_test.c
@@ -4,30 +4,38 @@
  * Copyright © 2018 Intel Corporation
  */
 
-#include "../i915_drv.h"
+#include "gem/i915_gem_context.h"
+
+#include "i915_drv.h"
+#include "i915_selftest.h"
 
-#include "../i915_selftest.h"
 #include "igt_flush_test.h"
 
 int igt_flush_test(struct drm_i915_private *i915, unsigned int flags)
 {
+	int ret = i915_terminally_wedged(i915) ? -EIO : 0;
+	int repeat = !!(flags & I915_WAIT_LOCKED);
+
 	cond_resched();
 
-	if (flags & I915_WAIT_LOCKED &&
-	    i915_gem_switch_to_kernel_context(i915, i915->gt.active_engines)) {
-		pr_err("Failed to switch back to kernel context; declaring wedged\n");
-		i915_gem_set_wedged(i915);
-	}
+	do {
+		if (i915_gem_wait_for_idle(i915, flags, HZ / 5) == -ETIME) {
+			pr_err("%pS timed out, cancelling all further testing.\n",
+			       __builtin_return_address(0));
 
-	if (i915_gem_wait_for_idle(i915, flags, HZ / 5) == -ETIME) {
-		pr_err("%pS timed out, cancelling all further testing.\n",
-		       __builtin_return_address(0));
+			GEM_TRACE("%pS timed out.\n",
+				  __builtin_return_address(0));
+			GEM_TRACE_DUMP();
 
-		GEM_TRACE("%pS timed out.\n", __builtin_return_address(0));
-		GEM_TRACE_DUMP();
+			i915_gem_set_wedged(i915);
+			repeat = 0;
+			ret = -EIO;
+		}
 
-		i915_gem_set_wedged(i915);
-	}
+		/* Ensure we also flush after wedging. */
+		if (flags & I915_WAIT_LOCKED)
+			i915_retire_requests(i915);
+	} while (repeat--);
 
-	return i915_terminally_wedged(i915);
+	return ret;
 }
diff --git a/drivers/gpu/drm/i915/selftests/igt_reset.c b/drivers/gpu/drm/i915/selftests/igt_reset.c
index 208a966da8ca..587df6fd4ffe 100644
--- a/drivers/gpu/drm/i915/selftests/igt_reset.c
+++ b/drivers/gpu/drm/i915/selftests/igt_reset.c
@@ -6,8 +6,9 @@
 
 #include "igt_reset.h"
 
+#include "gt/intel_engine.h"
+
 #include "../i915_drv.h"
-#include "../intel_ringbuffer.h"
 
 void igt_global_reset_lock(struct drm_i915_private *i915)
 {
@@ -42,3 +43,11 @@ void igt_global_reset_unlock(struct drm_i915_private *i915)
 	clear_bit(I915_RESET_BACKOFF, &i915->gpu_error.flags);
 	wake_up_all(&i915->gpu_error.reset_queue);
 }
+
+bool igt_force_reset(struct drm_i915_private *i915)
+{
+	i915_gem_set_wedged(i915);
+	i915_reset(i915, 0, NULL);
+
+	return !i915_reset_failed(i915);
+}
diff --git a/drivers/gpu/drm/i915/selftests/igt_reset.h b/drivers/gpu/drm/i915/selftests/igt_reset.h
index 5f0234d045d5..363bd853e50f 100644
--- a/drivers/gpu/drm/i915/selftests/igt_reset.h
+++ b/drivers/gpu/drm/i915/selftests/igt_reset.h
@@ -11,5 +11,6 @@
 
 void igt_global_reset_lock(struct drm_i915_private *i915);
 void igt_global_reset_unlock(struct drm_i915_private *i915);
+bool igt_force_reset(struct drm_i915_private *i915);
 
 #endif
diff --git a/drivers/gpu/drm/i915/selftests/igt_spinner.c b/drivers/gpu/drm/i915/selftests/igt_spinner.c
index 16890dfe74c0..1e59b543cf27 100644
--- a/drivers/gpu/drm/i915/selftests/igt_spinner.c
+++ b/drivers/gpu/drm/i915/selftests/igt_spinner.c
@@ -4,6 +4,8 @@
  * Copyright © 2018 Intel Corporation
  */
 
+#include "gem/selftests/igt_gem_utils.h"
+
 #include "igt_spinner.h"
 
 int igt_spinner_init(struct igt_spinner *spin, struct drm_i915_private *i915)
@@ -74,16 +76,11 @@ static int move_to_active(struct i915_vma *vma,
 {
 	int err;
 
+	i915_vma_lock(vma);
 	err = i915_vma_move_to_active(vma, rq, flags);
-	if (err)
-		return err;
-
-	if (!i915_gem_object_has_active_reference(vma->obj)) {
-		i915_gem_object_get(vma->obj);
-		i915_gem_object_set_active_reference(vma->obj);
-	}
+	i915_vma_unlock(vma);
 
-	return 0;
+	return err;
 }
 
 struct i915_request *
@@ -92,17 +89,16 @@ igt_spinner_create_request(struct igt_spinner *spin,
 			   struct intel_engine_cs *engine,
 			   u32 arbitration_command)
 {
-	struct i915_address_space *vm = &ctx->ppgtt->vm;
 	struct i915_request *rq = NULL;
 	struct i915_vma *hws, *vma;
 	u32 *batch;
 	int err;
 
-	vma = i915_vma_instance(spin->obj, vm, NULL);
+	vma = i915_vma_instance(spin->obj, ctx->vm, NULL);
 	if (IS_ERR(vma))
 		return ERR_CAST(vma);
 
-	hws = i915_vma_instance(spin->hws, vm, NULL);
+	hws = i915_vma_instance(spin->hws, ctx->vm, NULL);
 	if (IS_ERR(hws))
 		return ERR_CAST(hws);
 
@@ -114,7 +110,7 @@ igt_spinner_create_request(struct igt_spinner *spin,
 	if (err)
 		goto unpin_vma;
 
-	rq = i915_request_alloc(engine, ctx);
+	rq = igt_request_alloc(ctx, engine);
 	if (IS_ERR(rq)) {
 		err = PTR_ERR(rq);
 		goto unpin_hws;
diff --git a/drivers/gpu/drm/i915/selftests/igt_spinner.h b/drivers/gpu/drm/i915/selftests/igt_spinner.h
index 391777c76dc7..34a88ac9b47a 100644
--- a/drivers/gpu/drm/i915/selftests/igt_spinner.h
+++ b/drivers/gpu/drm/i915/selftests/igt_spinner.h
@@ -7,12 +7,12 @@
 #ifndef __I915_SELFTESTS_IGT_SPINNER_H__
 #define __I915_SELFTESTS_IGT_SPINNER_H__
 
-#include "../i915_selftest.h"
+#include "gem/i915_gem_context.h"
+#include "gt/intel_engine.h"
 
-#include "../i915_drv.h"
-#include "../i915_request.h"
-#include "../intel_ringbuffer.h"
-#include "../i915_gem_context.h"
+#include "i915_drv.h"
+#include "i915_request.h"
+#include "i915_selftest.h"
 
 struct igt_spinner {
 	struct drm_i915_private *i915;
diff --git a/drivers/gpu/drm/i915/selftests/intel_guc.c b/drivers/gpu/drm/i915/selftests/intel_guc.c
index b05a21eaa8f4..6ca8584cd64c 100644
--- a/drivers/gpu/drm/i915/selftests/intel_guc.c
+++ b/drivers/gpu/drm/i915/selftests/intel_guc.c
@@ -22,7 +22,8 @@
  *
  */
 
-#include "../i915_selftest.h"
+#include "i915_selftest.h"
+#include "gem/i915_gem_pm.h"
 
 /* max doorbell number + negative test for each client type */
 #define ATTEMPTS (GUC_NUM_DOORBELLS + GUC_CLIENT_PRIORITY_NUM)
@@ -143,7 +144,7 @@ static int igt_guc_clients(void *args)
 
 	GEM_BUG_ON(!HAS_GUC(dev_priv));
 	mutex_lock(&dev_priv->drm.struct_mutex);
-	wakeref = intel_runtime_pm_get(dev_priv);
+	wakeref = intel_runtime_pm_get(&dev_priv->runtime_pm);
 
 	guc = &dev_priv->guc;
 	if (!guc) {
@@ -226,7 +227,7 @@ out:
 	guc_clients_create(guc);
 	guc_clients_enable(guc);
 unlock:
-	intel_runtime_pm_put(dev_priv, wakeref);
+	intel_runtime_pm_put(&dev_priv->runtime_pm, wakeref);
 	mutex_unlock(&dev_priv->drm.struct_mutex);
 	return err;
 }
@@ -246,7 +247,7 @@ static int igt_guc_doorbells(void *arg)
 
 	GEM_BUG_ON(!HAS_GUC(dev_priv));
 	mutex_lock(&dev_priv->drm.struct_mutex);
-	wakeref = intel_runtime_pm_get(dev_priv);
+	wakeref = intel_runtime_pm_get(&dev_priv->runtime_pm);
 
 	guc = &dev_priv->guc;
 	if (!guc) {
@@ -339,7 +340,7 @@ out:
 			guc_client_free(clients[i]);
 		}
 unlock:
-	intel_runtime_pm_put(dev_priv, wakeref);
+	intel_runtime_pm_put(&dev_priv->runtime_pm, wakeref);
 	mutex_unlock(&dev_priv->drm.struct_mutex);
 	return err;
 }
diff --git a/drivers/gpu/drm/i915/selftests/intel_uncore.c b/drivers/gpu/drm/i915/selftests/intel_uncore.c
index e0d7ebecb215..86815c6072a1 100644
--- a/drivers/gpu/drm/i915/selftests/intel_uncore.c
+++ b/drivers/gpu/drm/i915/selftests/intel_uncore.c
@@ -176,7 +176,7 @@ static int live_forcewake_ops(void *arg)
 		return 0;
 	}
 
-	wakeref = intel_runtime_pm_get(i915);
+	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
 
 	for_each_fw_domain(domain, uncore, tmp) {
 		smp_store_mb(domain->active, false);
@@ -247,7 +247,7 @@ static int live_forcewake_ops(void *arg)
 	}
 
 out_rpm:
-	intel_runtime_pm_put(i915, wakeref);
+	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
 	return err;
 }
 
diff --git a/drivers/gpu/drm/i915/selftests/lib_sw_fence.c b/drivers/gpu/drm/i915/selftests/lib_sw_fence.c
index 2bfa72c1654b..b976c12817c5 100644
--- a/drivers/gpu/drm/i915/selftests/lib_sw_fence.c
+++ b/drivers/gpu/drm/i915/selftests/lib_sw_fence.c
@@ -45,6 +45,9 @@ void __onstack_fence_init(struct i915_sw_fence *fence,
 
 void onstack_fence_fini(struct i915_sw_fence *fence)
 {
+	if (!fence->flags)
+		return;
+
 	i915_sw_fence_commit(fence);
 	i915_sw_fence_fini(fence);
 }
diff --git a/drivers/gpu/drm/i915/selftests/mock_context.h b/drivers/gpu/drm/i915/selftests/mock_context.h
deleted file mode 100644
index 29b9d60a158b..000000000000
--- a/drivers/gpu/drm/i915/selftests/mock_context.h
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * Copyright © 2016 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- */
-
-#ifndef __MOCK_CONTEXT_H
-#define __MOCK_CONTEXT_H
-
-void mock_init_contexts(struct drm_i915_private *i915);
-
-struct i915_gem_context *
-mock_context(struct drm_i915_private *i915,
-	     const char *name);
-
-void mock_context_close(struct i915_gem_context *ctx);
-
-struct i915_gem_context *
-live_context(struct drm_i915_private *i915, struct drm_file *file);
-
-struct i915_gem_context *kernel_context(struct drm_i915_private *i915);
-void kernel_context_close(struct i915_gem_context *ctx);
-
-#endif /* !__MOCK_CONTEXT_H */
diff --git a/drivers/gpu/drm/i915/selftests/mock_dmabuf.h b/drivers/gpu/drm/i915/selftests/mock_dmabuf.h
deleted file mode 100644
index ec80613159b9..000000000000
--- a/drivers/gpu/drm/i915/selftests/mock_dmabuf.h
+++ /dev/null
@@ -1,41 +0,0 @@
-
-/*
- * Copyright © 2016 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- */
-
-#ifndef __MOCK_DMABUF_H__
-#define __MOCK_DMABUF_H__
-
-#include <linux/dma-buf.h>
-
-struct mock_dmabuf {
-	int npages;
-	struct page *pages[];
-};
-
-static struct mock_dmabuf *to_mock(struct dma_buf *buf)
-{
-	return buf->priv;
-}
-
-#endif /* !__MOCK_DMABUF_H__ */
diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
index 60bbf8b4df40..64bc51400ae7 100644
--- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c
+++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
@@ -25,14 +25,16 @@
 #include <linux/pm_domain.h>
 #include <linux/pm_runtime.h>
 
-#include "mock_engine.h"
-#include "mock_context.h"
+#include "gt/mock_engine.h"
+
 #include "mock_request.h"
 #include "mock_gem_device.h"
-#include "mock_gem_object.h"
 #include "mock_gtt.h"
 #include "mock_uncore.h"
 
+#include "gem/selftests/mock_context.h"
+#include "gem/selftests/mock_gem_object.h"
+
 void mock_device_flush(struct drm_i915_private *i915)
 {
 	struct intel_engine_cs *engine;
@@ -40,11 +42,10 @@ void mock_device_flush(struct drm_i915_private *i915)
 
 	lockdep_assert_held(&i915->drm.struct_mutex);
 
-	for_each_engine(engine, i915, id)
-		mock_engine_flush(engine);
-
-	i915_retire_requests(i915);
-	GEM_BUG_ON(i915->gt.active_requests);
+	do {
+		for_each_engine(engine, i915, id)
+			mock_engine_flush(engine);
+	} while (i915_retire_requests(i915));
 }
 
 static void mock_device_release(struct drm_device *dev)
@@ -55,11 +56,9 @@ static void mock_device_release(struct drm_device *dev)
 
 	mutex_lock(&i915->drm.struct_mutex);
 	mock_device_flush(i915);
-	i915_gem_contexts_lost(i915);
 	mutex_unlock(&i915->drm.struct_mutex);
 
-	drain_delayed_work(&i915->gt.retire_work);
-	drain_delayed_work(&i915->gt.idle_work);
+	flush_work(&i915->gem.idle_work);
 	i915_gem_drain_workqueue(i915);
 
 	mutex_lock(&i915->drm.struct_mutex);
@@ -109,10 +108,6 @@ static void mock_retire_work_handler(struct work_struct *work)
 
 static void mock_idle_work_handler(struct work_struct *work)
 {
-	struct drm_i915_private *i915 =
-		container_of(work, typeof(*i915), gt.idle_work.work);
-
-	i915->gt.active_engines = 0;
 }
 
 static int pm_domain_resume(struct device *dev)
@@ -156,8 +151,6 @@ struct drm_i915_private *mock_gem_device(void)
 	i915 = (struct drm_i915_private *)(pdev + 1);
 	pci_set_drvdata(pdev, i915);
 
-	intel_runtime_pm_init_early(i915);
-
 	dev_pm_domain_set(&pdev->dev, &pm_domain);
 	pm_runtime_enable(&pdev->dev);
 	pm_runtime_dont_use_autosuspend(&pdev->dev);
@@ -172,6 +165,8 @@ struct drm_i915_private *mock_gem_device(void)
 	i915->drm.pdev = pdev;
 	i915->drm.dev_private = i915;
 
+	intel_runtime_pm_init_early(&i915->runtime_pm);
+
 	/* Using the global GTT may ask questions about KMS users, so prepare */
 	drm_mode_config_init(&i915->drm);
 
@@ -184,6 +179,8 @@ struct drm_i915_private *mock_gem_device(void)
 
 	mock_uncore_init(&i915->uncore);
 	i915_gem_init__mm(i915);
+	intel_gt_pm_init(i915);
+	atomic_inc(&i915->gt.wakeref.count); /* disable; no hw support */
 
 	init_waitqueue_head(&i915->gpu_error.wait_queue);
 	init_waitqueue_head(&i915->gpu_error.reset_queue);
@@ -196,8 +193,8 @@ struct drm_i915_private *mock_gem_device(void)
 
 	mock_init_contexts(i915);
 
-	INIT_DELAYED_WORK(&i915->gt.retire_work, mock_retire_work_handler);
-	INIT_DELAYED_WORK(&i915->gt.idle_work, mock_idle_work_handler);
+	INIT_DELAYED_WORK(&i915->gem.retire_work, mock_retire_work_handler);
+	INIT_WORK(&i915->gem.idle_work, mock_idle_work_handler);
 
 	i915->gt.awake = true;
 
@@ -205,18 +202,23 @@ struct drm_i915_private *mock_gem_device(void)
 
 	INIT_LIST_HEAD(&i915->gt.active_rings);
 	INIT_LIST_HEAD(&i915->gt.closed_vma);
+	spin_lock_init(&i915->gt.closed_lock);
 
 	mutex_lock(&i915->drm.struct_mutex);
 
 	mock_init_ggtt(i915, &i915->ggtt);
 
 	mkwrite_device_info(i915)->engine_mask = BIT(0);
-	i915->kernel_context = mock_context(i915, NULL);
-	if (!i915->kernel_context)
-		goto err_unlock;
 
 	i915->engine[RCS0] = mock_engine(i915, "mock", RCS0);
 	if (!i915->engine[RCS0])
+		goto err_unlock;
+
+	i915->kernel_context = mock_context(i915, NULL);
+	if (!i915->kernel_context)
+		goto err_engine;
+
+	if (mock_engine_init(i915->engine[RCS0]))
 		goto err_context;
 
 	mutex_unlock(&i915->drm.struct_mutex);
@@ -227,6 +229,8 @@ struct drm_i915_private *mock_gem_device(void)
 
 err_context:
 	i915_gem_contexts_fini(i915);
+err_engine:
+	mock_engine_free(i915->engine[RCS0]);
 err_unlock:
 	mutex_unlock(&i915->drm.struct_mutex);
 	i915_timelines_fini(i915);
diff --git a/drivers/gpu/drm/i915/selftests/mock_gtt.c b/drivers/gpu/drm/i915/selftests/mock_gtt.c
index cd83929fde8e..f625c307a406 100644
--- a/drivers/gpu/drm/i915/selftests/mock_gtt.c
+++ b/drivers/gpu/drm/i915/selftests/mock_gtt.c
@@ -55,17 +55,14 @@ static void mock_cleanup(struct i915_address_space *vm)
 {
 }
 
-struct i915_hw_ppgtt *
-mock_ppgtt(struct drm_i915_private *i915,
-	   const char *name)
+struct i915_ppgtt *mock_ppgtt(struct drm_i915_private *i915, const char *name)
 {
-	struct i915_hw_ppgtt *ppgtt;
+	struct i915_ppgtt *ppgtt;
 
 	ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL);
 	if (!ppgtt)
 		return NULL;
 
-	kref_init(&ppgtt->ref);
 	ppgtt->vm.i915 = i915;
 	ppgtt->vm.total = round_down(U64_MAX, PAGE_SIZE);
 	ppgtt->vm.file = ERR_PTR(-ENODEV);
diff --git a/drivers/gpu/drm/i915/selftests/mock_gtt.h b/drivers/gpu/drm/i915/selftests/mock_gtt.h
index 40d544bde1d5..3387393286de 100644
--- a/drivers/gpu/drm/i915/selftests/mock_gtt.h
+++ b/drivers/gpu/drm/i915/selftests/mock_gtt.h
@@ -28,8 +28,6 @@
 void mock_init_ggtt(struct drm_i915_private *i915, struct i915_ggtt *ggtt);
 void mock_fini_ggtt(struct i915_ggtt *ggtt);
 
-struct i915_hw_ppgtt *
-mock_ppgtt(struct drm_i915_private *i915,
-	   const char *name);
+struct i915_ppgtt *mock_ppgtt(struct drm_i915_private *i915, const char *name);
 
 #endif /* !__MOCK_GTT_H */
diff --git a/drivers/gpu/drm/i915/selftests/mock_request.c b/drivers/gpu/drm/i915/selftests/mock_request.c
index d1a7c9608712..9390fc09984b 100644
--- a/drivers/gpu/drm/i915/selftests/mock_request.c
+++ b/drivers/gpu/drm/i915/selftests/mock_request.c
@@ -22,7 +22,9 @@
  *
  */
 
-#include "mock_engine.h"
+#include "gem/selftests/igt_gem_utils.h"
+#include "gt/mock_engine.h"
+
 #include "mock_request.h"
 
 struct i915_request *
@@ -33,7 +35,7 @@ mock_request(struct intel_engine_cs *engine,
 	struct i915_request *request;
 
 	/* NB the i915->requests slab cache is enlarged to fit mock_request */
-	request = i915_request_alloc(engine, context);
+	request = igt_request_alloc(context, engine);
 	if (IS_ERR(request))
 		return NULL;
 
diff --git a/drivers/gpu/drm/i915/selftests/mock_timeline.c b/drivers/gpu/drm/i915/selftests/mock_timeline.c
index e084476469ef..65b52be23d42 100644
--- a/drivers/gpu/drm/i915/selftests/mock_timeline.c
+++ b/drivers/gpu/drm/i915/selftests/mock_timeline.c
@@ -13,7 +13,6 @@ void mock_timeline_init(struct i915_timeline *timeline, u64 context)
 	timeline->i915 = NULL;
 	timeline->fence_context = context;
 
-	spin_lock_init(&timeline->lock);
 	mutex_init(&timeline->mutex);
 
 	INIT_ACTIVE_REQUEST(&timeline->last_request);
diff --git a/drivers/gpu/drm/i915/selftests/scatterlist.c b/drivers/gpu/drm/i915/selftests/scatterlist.c
index cd6d2a16071f..d599186d5b71 100644
--- a/drivers/gpu/drm/i915/selftests/scatterlist.c
+++ b/drivers/gpu/drm/i915/selftests/scatterlist.c
@@ -24,7 +24,8 @@
 #include <linux/prime_numbers.h>
 #include <linux/random.h>
 
-#include "../i915_selftest.h"
+#include "i915_selftest.h"
+#include "i915_utils.h"
 
 #define PFN_BIAS (1 << 10)
 
diff --git a/drivers/gpu/drm/imx/ipuv3-plane.c b/drivers/gpu/drm/imx/ipuv3-plane.c
index 91edfe2498a6..2c19054ed570 100644
--- a/drivers/gpu/drm/imx/ipuv3-plane.c
+++ b/drivers/gpu/drm/imx/ipuv3-plane.c
@@ -115,8 +115,8 @@ drm_plane_state_to_ubo(struct drm_plane_state *state)
 	cma_obj = drm_fb_cma_get_gem_obj(fb, 1);
 	BUG_ON(!cma_obj);
 
-	x /= drm_format_horz_chroma_subsampling(fb->format->format);
-	y /= drm_format_vert_chroma_subsampling(fb->format->format);
+	x /= fb->format->hsub;
+	y /= fb->format->vsub;
 
 	return cma_obj->paddr + fb->offsets[1] + fb->pitches[1] * y +
 	       fb->format->cpp[1] * x - eba;
@@ -134,8 +134,8 @@ drm_plane_state_to_vbo(struct drm_plane_state *state)
 	cma_obj = drm_fb_cma_get_gem_obj(fb, 2);
 	BUG_ON(!cma_obj);
 
-	x /= drm_format_horz_chroma_subsampling(fb->format->format);
-	y /= drm_format_vert_chroma_subsampling(fb->format->format);
+	x /= fb->format->hsub;
+	y /= fb->format->vsub;
 
 	return cma_obj->paddr + fb->offsets[2] + fb->pitches[2] * y +
 	       fb->format->cpp[2] * x - eba;
@@ -352,7 +352,6 @@ static int ipu_plane_atomic_check(struct drm_plane *plane,
 	struct drm_framebuffer *old_fb = old_state->fb;
 	unsigned long eba, ubo, vbo, old_ubo, old_vbo, alpha_eba;
 	bool can_position = (plane->type == DRM_PLANE_TYPE_OVERLAY);
-	int hsub, vsub;
 	int ret;
 
 	/* Ok to disable */
@@ -471,10 +470,8 @@ static int ipu_plane_atomic_check(struct drm_plane *plane,
 		 * The x/y offsets must be even in case of horizontal/vertical
 		 * chroma subsampling.
 		 */
-		hsub = drm_format_horz_chroma_subsampling(fb->format->format);
-		vsub = drm_format_vert_chroma_subsampling(fb->format->format);
-		if (((state->src.x1 >> 16) & (hsub - 1)) ||
-		    ((state->src.y1 >> 16) & (vsub - 1)))
+		if (((state->src.x1 >> 16) & (fb->format->hsub - 1)) ||
+		    ((state->src.y1 >> 16) & (fb->format->vsub - 1)))
 			return -EINVAL;
 		break;
 	case DRM_FORMAT_RGB565_A8:
diff --git a/drivers/gpu/drm/ingenic/Kconfig b/drivers/gpu/drm/ingenic/Kconfig
new file mode 100644
index 000000000000..d82c3d37ec9c
--- /dev/null
+++ b/drivers/gpu/drm/ingenic/Kconfig
@@ -0,0 +1,16 @@
+config DRM_INGENIC
+	tristate "DRM Support for Ingenic SoCs"
+	depends on MIPS || COMPILE_TEST
+	depends on DRM
+	depends on CMA
+	depends on OF
+	select DRM_BRIDGE
+	select DRM_PANEL_BRIDGE
+	select DRM_KMS_HELPER
+	select DRM_KMS_CMA_HELPER
+	select DRM_GEM_CMA_HELPER
+	select VT_HW_CONSOLE_BINDING if FRAMEBUFFER_CONSOLE
+	help
+	  Choose this option for DRM support for the Ingenic SoCs.
+
+	  If M is selected the module will be called ingenic-drm.
diff --git a/drivers/gpu/drm/ingenic/Makefile b/drivers/gpu/drm/ingenic/Makefile
new file mode 100644
index 000000000000..11cac42ce0bb
--- /dev/null
+++ b/drivers/gpu/drm/ingenic/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_DRM_INGENIC) += ingenic-drm.o
diff --git a/drivers/gpu/drm/ingenic/ingenic-drm.c b/drivers/gpu/drm/ingenic/ingenic-drm.c
new file mode 100644
index 000000000000..e9f9e9fb9b17
--- /dev/null
+++ b/drivers/gpu/drm/ingenic/ingenic-drm.c
@@ -0,0 +1,818 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// Ingenic JZ47xx KMS driver
+//
+// Copyright (C) 2019, Paul Cercueil <paul@crapouillou.net>
+
+#include <linux/clk.h>
+#include <linux/dma-mapping.h>
+#include <linux/module.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+
+#include <drm/drm_atomic.h>
+#include <drm/drm_atomic_helper.h>
+#include <drm/drm_crtc.h>
+#include <drm/drm_crtc_helper.h>
+#include <drm/drm_drv.h>
+#include <drm/drm_gem_cma_helper.h>
+#include <drm/drm_fb_cma_helper.h>
+#include <drm/drm_fb_helper.h>
+#include <drm/drm_fourcc.h>
+#include <drm/drm_gem_framebuffer_helper.h>
+#include <drm/drm_irq.h>
+#include <drm/drm_of.h>
+#include <drm/drm_panel.h>
+#include <drm/drm_plane.h>
+#include <drm/drm_plane_helper.h>
+#include <drm/drm_probe_helper.h>
+#include <drm/drm_vblank.h>
+
+#define JZ_REG_LCD_CFG				0x00
+#define JZ_REG_LCD_VSYNC			0x04
+#define JZ_REG_LCD_HSYNC			0x08
+#define JZ_REG_LCD_VAT				0x0C
+#define JZ_REG_LCD_DAH				0x10
+#define JZ_REG_LCD_DAV				0x14
+#define JZ_REG_LCD_PS				0x18
+#define JZ_REG_LCD_CLS				0x1C
+#define JZ_REG_LCD_SPL				0x20
+#define JZ_REG_LCD_REV				0x24
+#define JZ_REG_LCD_CTRL				0x30
+#define JZ_REG_LCD_STATE			0x34
+#define JZ_REG_LCD_IID				0x38
+#define JZ_REG_LCD_DA0				0x40
+#define JZ_REG_LCD_SA0				0x44
+#define JZ_REG_LCD_FID0				0x48
+#define JZ_REG_LCD_CMD0				0x4C
+#define JZ_REG_LCD_DA1				0x50
+#define JZ_REG_LCD_SA1				0x54
+#define JZ_REG_LCD_FID1				0x58
+#define JZ_REG_LCD_CMD1				0x5C
+
+#define JZ_LCD_CFG_SLCD				BIT(31)
+#define JZ_LCD_CFG_PS_DISABLE			BIT(23)
+#define JZ_LCD_CFG_CLS_DISABLE			BIT(22)
+#define JZ_LCD_CFG_SPL_DISABLE			BIT(21)
+#define JZ_LCD_CFG_REV_DISABLE			BIT(20)
+#define JZ_LCD_CFG_HSYNCM			BIT(19)
+#define JZ_LCD_CFG_PCLKM			BIT(18)
+#define JZ_LCD_CFG_INV				BIT(17)
+#define JZ_LCD_CFG_SYNC_DIR			BIT(16)
+#define JZ_LCD_CFG_PS_POLARITY			BIT(15)
+#define JZ_LCD_CFG_CLS_POLARITY			BIT(14)
+#define JZ_LCD_CFG_SPL_POLARITY			BIT(13)
+#define JZ_LCD_CFG_REV_POLARITY			BIT(12)
+#define JZ_LCD_CFG_HSYNC_ACTIVE_LOW		BIT(11)
+#define JZ_LCD_CFG_PCLK_FALLING_EDGE		BIT(10)
+#define JZ_LCD_CFG_DE_ACTIVE_LOW		BIT(9)
+#define JZ_LCD_CFG_VSYNC_ACTIVE_LOW		BIT(8)
+#define JZ_LCD_CFG_18_BIT			BIT(7)
+#define JZ_LCD_CFG_PDW				(BIT(5) | BIT(4))
+
+#define JZ_LCD_CFG_MODE_GENERIC_16BIT		0
+#define JZ_LCD_CFG_MODE_GENERIC_18BIT		BIT(7)
+#define JZ_LCD_CFG_MODE_GENERIC_24BIT		BIT(6)
+
+#define JZ_LCD_CFG_MODE_SPECIAL_TFT_1		1
+#define JZ_LCD_CFG_MODE_SPECIAL_TFT_2		2
+#define JZ_LCD_CFG_MODE_SPECIAL_TFT_3		3
+
+#define JZ_LCD_CFG_MODE_TV_OUT_P		4
+#define JZ_LCD_CFG_MODE_TV_OUT_I		6
+
+#define JZ_LCD_CFG_MODE_SINGLE_COLOR_STN	8
+#define JZ_LCD_CFG_MODE_SINGLE_MONOCHROME_STN	9
+#define JZ_LCD_CFG_MODE_DUAL_COLOR_STN		10
+#define JZ_LCD_CFG_MODE_DUAL_MONOCHROME_STN	11
+
+#define JZ_LCD_CFG_MODE_8BIT_SERIAL		12
+#define JZ_LCD_CFG_MODE_LCM			13
+
+#define JZ_LCD_VSYNC_VPS_OFFSET			16
+#define JZ_LCD_VSYNC_VPE_OFFSET			0
+
+#define JZ_LCD_HSYNC_HPS_OFFSET			16
+#define JZ_LCD_HSYNC_HPE_OFFSET			0
+
+#define JZ_LCD_VAT_HT_OFFSET			16
+#define JZ_LCD_VAT_VT_OFFSET			0
+
+#define JZ_LCD_DAH_HDS_OFFSET			16
+#define JZ_LCD_DAH_HDE_OFFSET			0
+
+#define JZ_LCD_DAV_VDS_OFFSET			16
+#define JZ_LCD_DAV_VDE_OFFSET			0
+
+#define JZ_LCD_CTRL_BURST_4			(0x0 << 28)
+#define JZ_LCD_CTRL_BURST_8			(0x1 << 28)
+#define JZ_LCD_CTRL_BURST_16			(0x2 << 28)
+#define JZ_LCD_CTRL_RGB555			BIT(27)
+#define JZ_LCD_CTRL_OFUP			BIT(26)
+#define JZ_LCD_CTRL_FRC_GRAYSCALE_16		(0x0 << 24)
+#define JZ_LCD_CTRL_FRC_GRAYSCALE_4		(0x1 << 24)
+#define JZ_LCD_CTRL_FRC_GRAYSCALE_2		(0x2 << 24)
+#define JZ_LCD_CTRL_PDD_MASK			(0xff << 16)
+#define JZ_LCD_CTRL_EOF_IRQ			BIT(13)
+#define JZ_LCD_CTRL_SOF_IRQ			BIT(12)
+#define JZ_LCD_CTRL_OFU_IRQ			BIT(11)
+#define JZ_LCD_CTRL_IFU0_IRQ			BIT(10)
+#define JZ_LCD_CTRL_IFU1_IRQ			BIT(9)
+#define JZ_LCD_CTRL_DD_IRQ			BIT(8)
+#define JZ_LCD_CTRL_QDD_IRQ			BIT(7)
+#define JZ_LCD_CTRL_REVERSE_ENDIAN		BIT(6)
+#define JZ_LCD_CTRL_LSB_FISRT			BIT(5)
+#define JZ_LCD_CTRL_DISABLE			BIT(4)
+#define JZ_LCD_CTRL_ENABLE			BIT(3)
+#define JZ_LCD_CTRL_BPP_1			0x0
+#define JZ_LCD_CTRL_BPP_2			0x1
+#define JZ_LCD_CTRL_BPP_4			0x2
+#define JZ_LCD_CTRL_BPP_8			0x3
+#define JZ_LCD_CTRL_BPP_15_16			0x4
+#define JZ_LCD_CTRL_BPP_18_24			0x5
+#define JZ_LCD_CTRL_BPP_MASK			(JZ_LCD_CTRL_RGB555 | (0x7 << 0))
+
+#define JZ_LCD_CMD_SOF_IRQ			BIT(31)
+#define JZ_LCD_CMD_EOF_IRQ			BIT(30)
+#define JZ_LCD_CMD_ENABLE_PAL			BIT(28)
+
+#define JZ_LCD_SYNC_MASK			0x3ff
+
+#define JZ_LCD_STATE_EOF_IRQ			BIT(5)
+#define JZ_LCD_STATE_SOF_IRQ			BIT(4)
+#define JZ_LCD_STATE_DISABLED			BIT(0)
+
+struct ingenic_dma_hwdesc {
+	u32 next;
+	u32 addr;
+	u32 id;
+	u32 cmd;
+} __packed;
+
+struct jz_soc_info {
+	bool needs_dev_clk;
+};
+
+struct ingenic_drm {
+	struct drm_device drm;
+	struct drm_plane primary;
+	struct drm_crtc crtc;
+	struct drm_encoder encoder;
+
+	struct device *dev;
+	struct regmap *map;
+	struct clk *lcd_clk, *pix_clk;
+
+	struct ingenic_dma_hwdesc *dma_hwdesc;
+	dma_addr_t dma_hwdesc_phys;
+};
+
+static const u32 ingenic_drm_primary_formats[] = {
+	DRM_FORMAT_XRGB1555,
+	DRM_FORMAT_RGB565,
+	DRM_FORMAT_XRGB8888,
+};
+
+static bool ingenic_drm_writeable_reg(struct device *dev, unsigned int reg)
+{
+	switch (reg) {
+	case JZ_REG_LCD_IID:
+	case JZ_REG_LCD_SA0:
+	case JZ_REG_LCD_FID0:
+	case JZ_REG_LCD_CMD0:
+	case JZ_REG_LCD_SA1:
+	case JZ_REG_LCD_FID1:
+	case JZ_REG_LCD_CMD1:
+		return false;
+	default:
+		return true;
+	}
+}
+
+static const struct regmap_config ingenic_drm_regmap_config = {
+	.reg_bits = 32,
+	.val_bits = 32,
+	.reg_stride = 4,
+
+	.max_register = JZ_REG_LCD_CMD1,
+	.writeable_reg = ingenic_drm_writeable_reg,
+};
+
+static inline struct ingenic_drm *drm_device_get_priv(struct drm_device *drm)
+{
+	return container_of(drm, struct ingenic_drm, drm);
+}
+
+static inline struct ingenic_drm *drm_crtc_get_priv(struct drm_crtc *crtc)
+{
+	return container_of(crtc, struct ingenic_drm, crtc);
+}
+
+static inline struct ingenic_drm *
+drm_encoder_get_priv(struct drm_encoder *encoder)
+{
+	return container_of(encoder, struct ingenic_drm, encoder);
+}
+
+static inline struct ingenic_drm *drm_plane_get_priv(struct drm_plane *plane)
+{
+	return container_of(plane, struct ingenic_drm, primary);
+}
+
+static void ingenic_drm_crtc_atomic_enable(struct drm_crtc *crtc,
+					   struct drm_crtc_state *state)
+{
+	struct ingenic_drm *priv = drm_crtc_get_priv(crtc);
+
+	regmap_write(priv->map, JZ_REG_LCD_STATE, 0);
+
+	regmap_update_bits(priv->map, JZ_REG_LCD_CTRL,
+			   JZ_LCD_CTRL_ENABLE | JZ_LCD_CTRL_DISABLE,
+			   JZ_LCD_CTRL_ENABLE);
+
+	drm_crtc_vblank_on(crtc);
+}
+
+static void ingenic_drm_crtc_atomic_disable(struct drm_crtc *crtc,
+					    struct drm_crtc_state *state)
+{
+	struct ingenic_drm *priv = drm_crtc_get_priv(crtc);
+	unsigned int var;
+
+	drm_crtc_vblank_off(crtc);
+
+	regmap_update_bits(priv->map, JZ_REG_LCD_CTRL,
+			   JZ_LCD_CTRL_DISABLE, JZ_LCD_CTRL_DISABLE);
+
+	regmap_read_poll_timeout(priv->map, JZ_REG_LCD_STATE, var,
+				 var & JZ_LCD_STATE_DISABLED,
+				 1000, 0);
+}
+
+static void ingenic_drm_crtc_update_timings(struct ingenic_drm *priv,
+					    struct drm_display_mode *mode)
+{
+	unsigned int vpe, vds, vde, vt, hpe, hds, hde, ht;
+
+	vpe = mode->vsync_end - mode->vsync_start;
+	vds = mode->vtotal - mode->vsync_start;
+	vde = vds + mode->vdisplay;
+	vt = vde + mode->vsync_start - mode->vdisplay;
+
+	hpe = mode->hsync_end - mode->hsync_start;
+	hds = mode->htotal - mode->hsync_start;
+	hde = hds + mode->hdisplay;
+	ht = hde + mode->hsync_start - mode->hdisplay;
+
+	regmap_write(priv->map, JZ_REG_LCD_VSYNC,
+		     0 << JZ_LCD_VSYNC_VPS_OFFSET |
+		     vpe << JZ_LCD_VSYNC_VPE_OFFSET);
+
+	regmap_write(priv->map, JZ_REG_LCD_HSYNC,
+		     0 << JZ_LCD_HSYNC_HPS_OFFSET |
+		     hpe << JZ_LCD_HSYNC_HPE_OFFSET);
+
+	regmap_write(priv->map, JZ_REG_LCD_VAT,
+		     ht << JZ_LCD_VAT_HT_OFFSET |
+		     vt << JZ_LCD_VAT_VT_OFFSET);
+
+	regmap_write(priv->map, JZ_REG_LCD_DAH,
+		     hds << JZ_LCD_DAH_HDS_OFFSET |
+		     hde << JZ_LCD_DAH_HDE_OFFSET);
+	regmap_write(priv->map, JZ_REG_LCD_DAV,
+		     vds << JZ_LCD_DAV_VDS_OFFSET |
+		     vde << JZ_LCD_DAV_VDE_OFFSET);
+}
+
+static void ingenic_drm_crtc_update_ctrl(struct ingenic_drm *priv,
+					 const struct drm_format_info *finfo)
+{
+	unsigned int ctrl = JZ_LCD_CTRL_OFUP | JZ_LCD_CTRL_BURST_16;
+
+	switch (finfo->format) {
+	case DRM_FORMAT_XRGB1555:
+		ctrl |= JZ_LCD_CTRL_RGB555;
+		/* fall-through */
+	case DRM_FORMAT_RGB565:
+		ctrl |= JZ_LCD_CTRL_BPP_15_16;
+		break;
+	case DRM_FORMAT_XRGB8888:
+		ctrl |= JZ_LCD_CTRL_BPP_18_24;
+		break;
+	}
+
+	regmap_update_bits(priv->map, JZ_REG_LCD_CTRL,
+			   JZ_LCD_CTRL_OFUP | JZ_LCD_CTRL_BURST_16 |
+			   JZ_LCD_CTRL_BPP_MASK, ctrl);
+}
+
+static int ingenic_drm_crtc_atomic_check(struct drm_crtc *crtc,
+					 struct drm_crtc_state *state)
+{
+	struct ingenic_drm *priv = drm_crtc_get_priv(crtc);
+	long rate;
+
+	if (!drm_atomic_crtc_needs_modeset(state))
+		return 0;
+
+	rate = clk_round_rate(priv->pix_clk,
+			      state->adjusted_mode.clock * 1000);
+	if (rate < 0)
+		return rate;
+
+	return 0;
+}
+
+static void ingenic_drm_crtc_atomic_flush(struct drm_crtc *crtc,
+					  struct drm_crtc_state *oldstate)
+{
+	struct ingenic_drm *priv = drm_crtc_get_priv(crtc);
+	struct drm_crtc_state *state = crtc->state;
+	struct drm_pending_vblank_event *event = state->event;
+	struct drm_framebuffer *drm_fb = crtc->primary->state->fb;
+	const struct drm_format_info *finfo;
+
+	if (drm_atomic_crtc_needs_modeset(state)) {
+		finfo = drm_format_info(drm_fb->format->format);
+
+		ingenic_drm_crtc_update_timings(priv, &state->mode);
+		ingenic_drm_crtc_update_ctrl(priv, finfo);
+
+		clk_set_rate(priv->pix_clk, state->adjusted_mode.clock * 1000);
+
+		regmap_write(priv->map, JZ_REG_LCD_DA0, priv->dma_hwdesc->next);
+	}
+
+	if (event) {
+		state->event = NULL;
+
+		spin_lock_irq(&crtc->dev->event_lock);
+		if (drm_crtc_vblank_get(crtc) == 0)
+			drm_crtc_arm_vblank_event(crtc, event);
+		else
+			drm_crtc_send_vblank_event(crtc, event);
+		spin_unlock_irq(&crtc->dev->event_lock);
+	}
+}
+
+static void ingenic_drm_plane_atomic_update(struct drm_plane *plane,
+					    struct drm_plane_state *oldstate)
+{
+	struct ingenic_drm *priv = drm_plane_get_priv(plane);
+	struct drm_plane_state *state = plane->state;
+	unsigned int width, height, cpp;
+
+	width = state->crtc->state->adjusted_mode.hdisplay;
+	height = state->crtc->state->adjusted_mode.vdisplay;
+	cpp = state->fb->format->cpp[plane->index];
+
+	priv->dma_hwdesc->addr = drm_fb_cma_get_gem_addr(state->fb, state, 0);
+	priv->dma_hwdesc->cmd = width * height * cpp / 4;
+	priv->dma_hwdesc->cmd |= JZ_LCD_CMD_EOF_IRQ;
+}
+
+static void ingenic_drm_encoder_atomic_mode_set(struct drm_encoder *encoder,
+						struct drm_crtc_state *crtc_state,
+						struct drm_connector_state *conn_state)
+{
+	struct ingenic_drm *priv = drm_encoder_get_priv(encoder);
+	struct drm_display_mode *mode = &crtc_state->adjusted_mode;
+	struct drm_display_info *info = &conn_state->connector->display_info;
+	unsigned int cfg = JZ_LCD_CFG_PS_DISABLE
+			 | JZ_LCD_CFG_CLS_DISABLE
+			 | JZ_LCD_CFG_SPL_DISABLE
+			 | JZ_LCD_CFG_REV_DISABLE;
+
+	if (mode->flags & DRM_MODE_FLAG_NHSYNC)
+		cfg |= JZ_LCD_CFG_HSYNC_ACTIVE_LOW;
+	if (mode->flags & DRM_MODE_FLAG_NVSYNC)
+		cfg |= JZ_LCD_CFG_VSYNC_ACTIVE_LOW;
+	if (info->bus_flags & DRM_BUS_FLAG_DE_LOW)
+		cfg |= JZ_LCD_CFG_DE_ACTIVE_LOW;
+	if (info->bus_flags & DRM_BUS_FLAG_PIXDATA_NEGEDGE)
+		cfg |= JZ_LCD_CFG_PCLK_FALLING_EDGE;
+
+	if (conn_state->connector->connector_type == DRM_MODE_CONNECTOR_TV) {
+		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
+			cfg |= JZ_LCD_CFG_MODE_TV_OUT_I;
+		else
+			cfg |= JZ_LCD_CFG_MODE_TV_OUT_P;
+	} else {
+		switch (*info->bus_formats) {
+		case MEDIA_BUS_FMT_RGB565_1X16:
+			cfg |= JZ_LCD_CFG_MODE_GENERIC_16BIT;
+			break;
+		case MEDIA_BUS_FMT_RGB666_1X18:
+			cfg |= JZ_LCD_CFG_MODE_GENERIC_18BIT;
+			break;
+		case MEDIA_BUS_FMT_RGB888_1X24:
+			cfg |= JZ_LCD_CFG_MODE_GENERIC_24BIT;
+			break;
+		default:
+			break;
+		}
+	}
+
+	regmap_write(priv->map, JZ_REG_LCD_CFG, cfg);
+}
+
+static int ingenic_drm_encoder_atomic_check(struct drm_encoder *encoder,
+					    struct drm_crtc_state *crtc_state,
+					    struct drm_connector_state *conn_state)
+{
+	struct drm_display_info *info = &conn_state->connector->display_info;
+
+	if (info->num_bus_formats != 1)
+		return -EINVAL;
+
+	if (conn_state->connector->connector_type == DRM_MODE_CONNECTOR_TV)
+		return 0;
+
+	switch (*info->bus_formats) {
+	case MEDIA_BUS_FMT_RGB565_1X16:
+	case MEDIA_BUS_FMT_RGB666_1X18:
+	case MEDIA_BUS_FMT_RGB888_1X24:
+		return 0;
+	default:
+		return -EINVAL;
+	}
+}
+
+static irqreturn_t ingenic_drm_irq_handler(int irq, void *arg)
+{
+	struct ingenic_drm *priv = arg;
+	unsigned int state;
+
+	regmap_read(priv->map, JZ_REG_LCD_STATE, &state);
+
+	regmap_update_bits(priv->map, JZ_REG_LCD_STATE,
+			   JZ_LCD_STATE_EOF_IRQ, 0);
+
+	if (state & JZ_LCD_STATE_EOF_IRQ)
+		drm_crtc_handle_vblank(&priv->crtc);
+
+	return IRQ_HANDLED;
+}
+
+static void ingenic_drm_release(struct drm_device *drm)
+{
+	struct ingenic_drm *priv = drm_device_get_priv(drm);
+
+	drm_mode_config_cleanup(drm);
+	drm_dev_fini(drm);
+	kfree(priv);
+}
+
+static int ingenic_drm_enable_vblank(struct drm_crtc *crtc)
+{
+	struct ingenic_drm *priv = drm_crtc_get_priv(crtc);
+
+	regmap_update_bits(priv->map, JZ_REG_LCD_CTRL,
+			   JZ_LCD_CTRL_EOF_IRQ, JZ_LCD_CTRL_EOF_IRQ);
+
+	return 0;
+}
+
+static void ingenic_drm_disable_vblank(struct drm_crtc *crtc)
+{
+	struct ingenic_drm *priv = drm_crtc_get_priv(crtc);
+
+	regmap_update_bits(priv->map, JZ_REG_LCD_CTRL, JZ_LCD_CTRL_EOF_IRQ, 0);
+}
+
+DEFINE_DRM_GEM_CMA_FOPS(ingenic_drm_fops);
+
+static struct drm_driver ingenic_drm_driver_data = {
+	.driver_features	= DRIVER_MODESET | DRIVER_GEM | DRIVER_PRIME
+				| DRIVER_ATOMIC,
+	.name			= "ingenic-drm",
+	.desc			= "DRM module for Ingenic SoCs",
+	.date			= "20190422",
+	.major			= 1,
+	.minor			= 0,
+	.patchlevel		= 0,
+
+	.fops			= &ingenic_drm_fops,
+
+	.dumb_create		= drm_gem_cma_dumb_create,
+	.gem_free_object_unlocked = drm_gem_cma_free_object,
+	.gem_vm_ops		= &drm_gem_cma_vm_ops,
+
+	.prime_handle_to_fd	= drm_gem_prime_handle_to_fd,
+	.prime_fd_to_handle	= drm_gem_prime_fd_to_handle,
+	.gem_prime_get_sg_table	= drm_gem_cma_prime_get_sg_table,
+	.gem_prime_import_sg_table = drm_gem_cma_prime_import_sg_table,
+	.gem_prime_vmap		= drm_gem_cma_prime_vmap,
+	.gem_prime_vunmap	= drm_gem_cma_prime_vunmap,
+	.gem_prime_mmap		= drm_gem_cma_prime_mmap,
+
+	.irq_handler		= ingenic_drm_irq_handler,
+	.release		= ingenic_drm_release,
+};
+
+static const struct drm_plane_funcs ingenic_drm_primary_plane_funcs = {
+	.update_plane		= drm_atomic_helper_update_plane,
+	.disable_plane		= drm_atomic_helper_disable_plane,
+	.reset			= drm_atomic_helper_plane_reset,
+	.destroy		= drm_plane_cleanup,
+
+	.atomic_duplicate_state	= drm_atomic_helper_plane_duplicate_state,
+	.atomic_destroy_state	= drm_atomic_helper_plane_destroy_state,
+};
+
+static const struct drm_crtc_funcs ingenic_drm_crtc_funcs = {
+	.set_config		= drm_atomic_helper_set_config,
+	.page_flip		= drm_atomic_helper_page_flip,
+	.reset			= drm_atomic_helper_crtc_reset,
+	.destroy		= drm_crtc_cleanup,
+
+	.atomic_duplicate_state	= drm_atomic_helper_crtc_duplicate_state,
+	.atomic_destroy_state	= drm_atomic_helper_crtc_destroy_state,
+
+	.enable_vblank		= ingenic_drm_enable_vblank,
+	.disable_vblank		= ingenic_drm_disable_vblank,
+
+	.gamma_set		= drm_atomic_helper_legacy_gamma_set,
+};
+
+static const struct drm_plane_helper_funcs ingenic_drm_plane_helper_funcs = {
+	.atomic_update		= ingenic_drm_plane_atomic_update,
+	.prepare_fb		= drm_gem_fb_prepare_fb,
+};
+
+static const struct drm_crtc_helper_funcs ingenic_drm_crtc_helper_funcs = {
+	.atomic_enable		= ingenic_drm_crtc_atomic_enable,
+	.atomic_disable		= ingenic_drm_crtc_atomic_disable,
+	.atomic_flush		= ingenic_drm_crtc_atomic_flush,
+	.atomic_check		= ingenic_drm_crtc_atomic_check,
+};
+
+static const struct drm_encoder_helper_funcs ingenic_drm_encoder_helper_funcs = {
+	.atomic_mode_set	= ingenic_drm_encoder_atomic_mode_set,
+	.atomic_check		= ingenic_drm_encoder_atomic_check,
+};
+
+static const struct drm_mode_config_funcs ingenic_drm_mode_config_funcs = {
+	.fb_create		= drm_gem_fb_create,
+	.output_poll_changed	= drm_fb_helper_output_poll_changed,
+	.atomic_check		= drm_atomic_helper_check,
+	.atomic_commit		= drm_atomic_helper_commit,
+};
+
+static const struct drm_encoder_funcs ingenic_drm_encoder_funcs = {
+	.destroy		= drm_encoder_cleanup,
+};
+
+static void ingenic_drm_free_dma_hwdesc(void *d)
+{
+	struct ingenic_drm *priv = d;
+
+	dma_free_coherent(priv->dev, sizeof(*priv->dma_hwdesc),
+			  priv->dma_hwdesc, priv->dma_hwdesc_phys);
+}
+
+static int ingenic_drm_probe(struct platform_device *pdev)
+{
+	const struct jz_soc_info *soc_info;
+	struct device *dev = &pdev->dev;
+	struct ingenic_drm *priv;
+	struct clk *parent_clk;
+	struct drm_bridge *bridge;
+	struct drm_panel *panel;
+	struct drm_device *drm;
+	struct resource *mem;
+	void __iomem *base;
+	long parent_rate;
+	int ret, irq;
+
+	soc_info = of_device_get_match_data(dev);
+	if (!soc_info) {
+		dev_err(dev, "Missing platform data\n");
+		return -EINVAL;
+	}
+
+	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	priv->dev = dev;
+	drm = &priv->drm;
+	drm->dev_private = priv;
+
+	platform_set_drvdata(pdev, priv);
+
+	ret = devm_drm_dev_init(dev, drm, &ingenic_drm_driver_data);
+	if (ret) {
+		kfree(priv);
+		return ret;
+	}
+
+	drm_mode_config_init(drm);
+	drm->mode_config.min_width = 0;
+	drm->mode_config.min_height = 0;
+	drm->mode_config.max_width = 800;
+	drm->mode_config.max_height = 600;
+	drm->mode_config.funcs = &ingenic_drm_mode_config_funcs;
+
+	mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	base = devm_ioremap_resource(dev, mem);
+	if (IS_ERR(base)) {
+		dev_err(dev, "Failed to get memory resource");
+		return PTR_ERR(base);
+	}
+
+	priv->map = devm_regmap_init_mmio(dev, base,
+					  &ingenic_drm_regmap_config);
+	if (IS_ERR(priv->map)) {
+		dev_err(dev, "Failed to create regmap");
+		return PTR_ERR(priv->map);
+	}
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0) {
+		dev_err(dev, "Failed to get platform irq");
+		return irq;
+	}
+
+	if (soc_info->needs_dev_clk) {
+		priv->lcd_clk = devm_clk_get(dev, "lcd");
+		if (IS_ERR(priv->lcd_clk)) {
+			dev_err(dev, "Failed to get lcd clock");
+			return PTR_ERR(priv->lcd_clk);
+		}
+	}
+
+	priv->pix_clk = devm_clk_get(dev, "lcd_pclk");
+	if (IS_ERR(priv->pix_clk)) {
+		dev_err(dev, "Failed to get pixel clock");
+		return PTR_ERR(priv->pix_clk);
+	}
+
+	ret = drm_of_find_panel_or_bridge(dev->of_node, 0, 0, &panel, &bridge);
+	if (ret) {
+		if (ret != -EPROBE_DEFER)
+			dev_err(dev, "Failed to get panel handle");
+		return ret;
+	}
+
+	if (panel) {
+		bridge = devm_drm_panel_bridge_add(dev, panel,
+						   DRM_MODE_CONNECTOR_Unknown);
+	}
+
+	priv->dma_hwdesc = dma_alloc_coherent(dev, sizeof(*priv->dma_hwdesc),
+					      &priv->dma_hwdesc_phys,
+					      GFP_KERNEL);
+	if (!priv->dma_hwdesc)
+		return -ENOMEM;
+
+	ret = devm_add_action_or_reset(dev, ingenic_drm_free_dma_hwdesc, priv);
+	if (ret)
+		return ret;
+
+	priv->dma_hwdesc->next = priv->dma_hwdesc_phys;
+	priv->dma_hwdesc->id = 0xdeafbead;
+
+	drm_plane_helper_add(&priv->primary, &ingenic_drm_plane_helper_funcs);
+
+	ret = drm_universal_plane_init(drm, &priv->primary,
+				       0, &ingenic_drm_primary_plane_funcs,
+				       ingenic_drm_primary_formats,
+				       ARRAY_SIZE(ingenic_drm_primary_formats),
+				       NULL, DRM_PLANE_TYPE_PRIMARY, NULL);
+	if (ret) {
+		dev_err(dev, "Failed to register primary plane: %i", ret);
+		return ret;
+	}
+
+	drm_crtc_helper_add(&priv->crtc, &ingenic_drm_crtc_helper_funcs);
+
+	ret = drm_crtc_init_with_planes(drm, &priv->crtc, &priv->primary,
+					NULL, &ingenic_drm_crtc_funcs, NULL);
+	if (ret) {
+		dev_err(dev, "Failed to init CRTC: %i", ret);
+		return ret;
+	}
+
+	priv->encoder.possible_crtcs = 1;
+
+	drm_encoder_helper_add(&priv->encoder,
+			       &ingenic_drm_encoder_helper_funcs);
+
+	ret = drm_encoder_init(drm, &priv->encoder, &ingenic_drm_encoder_funcs,
+			       DRM_MODE_ENCODER_DPI, NULL);
+	if (ret) {
+		dev_err(dev, "Failed to init encoder: %i", ret);
+		return ret;
+	}
+
+	ret = drm_bridge_attach(&priv->encoder, bridge, NULL);
+	if (ret) {
+		dev_err(dev, "Unable to attach bridge");
+		return ret;
+	}
+
+	ret = drm_irq_install(drm, irq);
+	if (ret) {
+		dev_err(dev, "Unable to install IRQ handler");
+		return ret;
+	}
+
+	ret = drm_vblank_init(drm, 1);
+	if (ret) {
+		dev_err(dev, "Failed calling drm_vblank_init()");
+		return ret;
+	}
+
+	drm_mode_config_reset(drm);
+
+	ret = clk_prepare_enable(priv->pix_clk);
+	if (ret) {
+		dev_err(dev, "Unable to start pixel clock");
+		return ret;
+	}
+
+	if (priv->lcd_clk) {
+		parent_clk = clk_get_parent(priv->lcd_clk);
+		parent_rate = clk_get_rate(parent_clk);
+
+		/* LCD Device clock must be 3x the pixel clock for STN panels,
+		 * or 1.5x the pixel clock for TFT panels. To avoid having to
+		 * check for the LCD device clock everytime we do a mode change,
+		 * we set the LCD device clock to the highest rate possible.
+		 */
+		ret = clk_set_rate(priv->lcd_clk, parent_rate);
+		if (ret) {
+			dev_err(dev, "Unable to set LCD clock rate");
+			goto err_pixclk_disable;
+		}
+
+		ret = clk_prepare_enable(priv->lcd_clk);
+		if (ret) {
+			dev_err(dev, "Unable to start lcd clock");
+			goto err_pixclk_disable;
+		}
+	}
+
+	ret = drm_dev_register(drm, 0);
+	if (ret) {
+		dev_err(dev, "Failed to register DRM driver");
+		goto err_devclk_disable;
+	}
+
+	ret = drm_fbdev_generic_setup(drm, 32);
+	if (ret)
+		dev_warn(dev, "Unable to start fbdev emulation: %i", ret);
+
+	return 0;
+
+err_devclk_disable:
+	if (priv->lcd_clk)
+		clk_disable_unprepare(priv->lcd_clk);
+err_pixclk_disable:
+	clk_disable_unprepare(priv->pix_clk);
+	return ret;
+}
+
+static int ingenic_drm_remove(struct platform_device *pdev)
+{
+	struct ingenic_drm *priv = platform_get_drvdata(pdev);
+
+	if (priv->lcd_clk)
+		clk_disable_unprepare(priv->lcd_clk);
+	clk_disable_unprepare(priv->pix_clk);
+
+	drm_dev_unregister(&priv->drm);
+	drm_atomic_helper_shutdown(&priv->drm);
+
+	return 0;
+}
+
+static const struct jz_soc_info jz4740_soc_info = {
+	.needs_dev_clk = true,
+};
+
+static const struct jz_soc_info jz4725b_soc_info = {
+	.needs_dev_clk = false,
+};
+
+static const struct of_device_id ingenic_drm_of_match[] = {
+	{ .compatible = "ingenic,jz4740-lcd", .data = &jz4740_soc_info },
+	{ .compatible = "ingenic,jz4725b-lcd", .data = &jz4725b_soc_info },
+	{ /* sentinel */ },
+};
+
+static struct platform_driver ingenic_drm_driver = {
+	.driver = {
+		.name = "ingenic-drm",
+		.of_match_table = of_match_ptr(ingenic_drm_of_match),
+	},
+	.probe = ingenic_drm_probe,
+	.remove = ingenic_drm_remove,
+};
+module_platform_driver(ingenic_drm_driver);
+
+MODULE_AUTHOR("Paul Cercueil <paul@crapouillou.net>");
+MODULE_DESCRIPTION("DRM driver for the Ingenic SoCs\n");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/gpu/drm/lima/lima_drv.c b/drivers/gpu/drm/lima/lima_drv.c
index f9a281a62083..b29c26cd13b2 100644
--- a/drivers/gpu/drm/lima/lima_drv.c
+++ b/drivers/gpu/drm/lima/lima_drv.c
@@ -17,7 +17,7 @@
 
 int lima_sched_timeout_ms;
 
-MODULE_PARM_DESC(sched_timeout_ms, "task run timeout in ms (0 = no timeout (default))");
+MODULE_PARM_DESC(sched_timeout_ms, "task run timeout in ms");
 module_param_named(sched_timeout_ms, lima_sched_timeout_ms, int, 0444);
 
 static int lima_ioctl_get_param(struct drm_device *dev, void *data, struct drm_file *file)
diff --git a/drivers/gpu/drm/lima/lima_pp.c b/drivers/gpu/drm/lima/lima_pp.c
index d29721e177bf..8fef224b93c8 100644
--- a/drivers/gpu/drm/lima/lima_pp.c
+++ b/drivers/gpu/drm/lima/lima_pp.c
@@ -64,7 +64,13 @@ static irqreturn_t lima_pp_bcast_irq_handler(int irq, void *data)
 	struct lima_ip *pp_bcast = data;
 	struct lima_device *dev = pp_bcast->dev;
 	struct lima_sched_pipe *pipe = dev->pipe + lima_pipe_pp;
-	struct drm_lima_m450_pp_frame *frame = pipe->current_task->frame;
+	struct drm_lima_m450_pp_frame *frame;
+
+	/* for shared irq case */
+	if (!pipe->current_task)
+		return IRQ_NONE;
+
+	frame = pipe->current_task->frame;
 
 	for (i = 0; i < frame->num_pp; i++) {
 		struct lima_ip *ip = pipe->processor[i];
diff --git a/drivers/gpu/drm/lima/lima_sched.c b/drivers/gpu/drm/lima/lima_sched.c
index d53bd45f8d96..4127cacac454 100644
--- a/drivers/gpu/drm/lima/lima_sched.c
+++ b/drivers/gpu/drm/lima/lima_sched.c
@@ -258,7 +258,7 @@ static struct dma_fence *lima_sched_run_job(struct drm_sched_job *job)
 static void lima_sched_handle_error_task(struct lima_sched_pipe *pipe,
 					 struct lima_sched_task *task)
 {
-	drm_sched_stop(&pipe->base);
+	drm_sched_stop(&pipe->base, &task->base);
 
 	if (task)
 		drm_sched_increase_karma(&task->base);
@@ -329,19 +329,16 @@ static void lima_sched_error_work(struct work_struct *work)
 
 int lima_sched_pipe_init(struct lima_sched_pipe *pipe, const char *name)
 {
-	long timeout;
-
-	if (lima_sched_timeout_ms <= 0)
-		timeout = MAX_SCHEDULE_TIMEOUT;
-	else
-		timeout = msecs_to_jiffies(lima_sched_timeout_ms);
+	unsigned int timeout = lima_sched_timeout_ms > 0 ?
+			       lima_sched_timeout_ms : 500;
 
 	pipe->fence_context = dma_fence_context_alloc(1);
 	spin_lock_init(&pipe->fence_lock);
 
 	INIT_WORK(&pipe->error_work, lima_sched_error_work);
 
-	return drm_sched_init(&pipe->base, &lima_sched_ops, 1, 0, timeout, name);
+	return drm_sched_init(&pipe->base, &lima_sched_ops, 1, 0,
+			      msecs_to_jiffies(timeout), name);
 }
 
 void lima_sched_pipe_fini(struct lima_sched_pipe *pipe)
diff --git a/drivers/gpu/drm/mcde/Kconfig b/drivers/gpu/drm/mcde/Kconfig
new file mode 100644
index 000000000000..b3990126562c
--- /dev/null
+++ b/drivers/gpu/drm/mcde/Kconfig
@@ -0,0 +1,18 @@
+config DRM_MCDE
+	tristate "DRM Support for ST-Ericsson MCDE (Multichannel Display Engine)"
+	depends on DRM
+	depends on CMA
+	depends on ARM || COMPILE_TEST
+	depends on OF
+	select MFD_SYSCON
+	select DRM_MIPI_DSI
+	select DRM_BRIDGE
+	select DRM_PANEL_BRIDGE
+	select DRM_KMS_HELPER
+	select DRM_KMS_CMA_HELPER
+	select DRM_GEM_CMA_HELPER
+	select VT_HW_CONSOLE_BINDING if FRAMEBUFFER_CONSOLE
+	help
+	  Choose this option for DRM support for the ST-Ericsson MCDE
+	  Multi-Channel Display Engine.
+	  If M is selected the module will be called mcde_drm.
diff --git a/drivers/gpu/drm/mcde/Makefile b/drivers/gpu/drm/mcde/Makefile
new file mode 100644
index 000000000000..fe28f4e0fe46
--- /dev/null
+++ b/drivers/gpu/drm/mcde/Makefile
@@ -0,0 +1,3 @@
+mcde_drm-y +=	mcde_drv.o mcde_dsi.o mcde_display.o
+
+obj-$(CONFIG_DRM_MCDE) += mcde_drm.o
diff --git a/drivers/gpu/drm/mcde/mcde_display.c b/drivers/gpu/drm/mcde/mcde_display.c
new file mode 100644
index 000000000000..751454ae3cd1
--- /dev/null
+++ b/drivers/gpu/drm/mcde/mcde_display.c
@@ -0,0 +1,1142 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2018 Linus Walleij <linus.walleij@linaro.org>
+ * Parts of this file were based on the MCDE driver by Marcus Lorentzon
+ * (C) ST-Ericsson SA 2013
+ */
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/dma-buf.h>
+
+#include <drm/drm_device.h>
+#include <drm/drm_fb_cma_helper.h>
+#include <drm/drm_fourcc.h>
+#include <drm/drm_gem_cma_helper.h>
+#include <drm/drm_gem_framebuffer_helper.h>
+#include <drm/drm_mipi_dsi.h>
+#include <drm/drm_simple_kms_helper.h>
+#include <drm/drm_vblank.h>
+#include <video/mipi_display.h>
+
+#include "mcde_drm.h"
+#include "mcde_display_regs.h"
+
+enum mcde_fifo {
+	MCDE_FIFO_A,
+	MCDE_FIFO_B,
+	/* TODO: implement FIFO C0 and FIFO C1 */
+};
+
+enum mcde_channel {
+	MCDE_CHANNEL_0 = 0,
+	MCDE_CHANNEL_1,
+	MCDE_CHANNEL_2,
+	MCDE_CHANNEL_3,
+};
+
+enum mcde_extsrc {
+	MCDE_EXTSRC_0 = 0,
+	MCDE_EXTSRC_1,
+	MCDE_EXTSRC_2,
+	MCDE_EXTSRC_3,
+	MCDE_EXTSRC_4,
+	MCDE_EXTSRC_5,
+	MCDE_EXTSRC_6,
+	MCDE_EXTSRC_7,
+	MCDE_EXTSRC_8,
+	MCDE_EXTSRC_9,
+};
+
+enum mcde_overlay {
+	MCDE_OVERLAY_0 = 0,
+	MCDE_OVERLAY_1,
+	MCDE_OVERLAY_2,
+	MCDE_OVERLAY_3,
+	MCDE_OVERLAY_4,
+	MCDE_OVERLAY_5,
+};
+
+enum mcde_dsi_formatter {
+	MCDE_DSI_FORMATTER_0 = 0,
+	MCDE_DSI_FORMATTER_1,
+	MCDE_DSI_FORMATTER_2,
+};
+
+void mcde_display_irq(struct mcde *mcde)
+{
+	u32 mispp, misovl, mischnl;
+	bool vblank = false;
+
+	/* Handle display IRQs */
+	mispp = readl(mcde->regs + MCDE_MISPP);
+	misovl = readl(mcde->regs + MCDE_MISOVL);
+	mischnl = readl(mcde->regs + MCDE_MISCHNL);
+
+	/*
+	 * Handle IRQs from the DSI link. All IRQs from the DSI links
+	 * are just latched onto the MCDE IRQ line, so we need to traverse
+	 * any active DSI masters and check if an IRQ is originating from
+	 * them.
+	 *
+	 * TODO: Currently only one DSI link is supported.
+	 */
+	if (mcde_dsi_irq(mcde->mdsi)) {
+		u32 val;
+
+		/*
+		 * In oneshot mode we do not send continuous updates
+		 * to the display, instead we only push out updates when
+		 * the update function is called, then we disable the
+		 * flow on the channel once we get the TE IRQ.
+		 */
+		if (mcde->oneshot_mode) {
+			spin_lock(&mcde->flow_lock);
+			if (--mcde->flow_active == 0) {
+				dev_dbg(mcde->dev, "TE0 IRQ\n");
+				/* Disable FIFO A flow */
+				val = readl(mcde->regs + MCDE_CRA0);
+				val &= ~MCDE_CRX0_FLOEN;
+				writel(val, mcde->regs + MCDE_CRA0);
+			}
+			spin_unlock(&mcde->flow_lock);
+		}
+	}
+
+	/* Vblank from one of the channels */
+	if (mispp & MCDE_PP_VCMPA) {
+		dev_dbg(mcde->dev, "chnl A vblank IRQ\n");
+		vblank = true;
+	}
+	if (mispp & MCDE_PP_VCMPB) {
+		dev_dbg(mcde->dev, "chnl B vblank IRQ\n");
+		vblank = true;
+	}
+	if (mispp & MCDE_PP_VCMPC0)
+		dev_dbg(mcde->dev, "chnl C0 vblank IRQ\n");
+	if (mispp & MCDE_PP_VCMPC1)
+		dev_dbg(mcde->dev, "chnl C1 vblank IRQ\n");
+	if (mispp & MCDE_PP_VSCC0)
+		dev_dbg(mcde->dev, "chnl C0 TE IRQ\n");
+	if (mispp & MCDE_PP_VSCC1)
+		dev_dbg(mcde->dev, "chnl C1 TE IRQ\n");
+	writel(mispp, mcde->regs + MCDE_RISPP);
+
+	if (vblank)
+		drm_crtc_handle_vblank(&mcde->pipe.crtc);
+
+	if (misovl)
+		dev_info(mcde->dev, "some stray overlay IRQ %08x\n", misovl);
+	writel(misovl, mcde->regs + MCDE_RISOVL);
+
+	if (mischnl)
+		dev_info(mcde->dev, "some stray channel error IRQ %08x\n",
+			 mischnl);
+	writel(mischnl, mcde->regs + MCDE_RISCHNL);
+}
+
+void mcde_display_disable_irqs(struct mcde *mcde)
+{
+	/* Disable all IRQs */
+	writel(0, mcde->regs + MCDE_IMSCPP);
+	writel(0, mcde->regs + MCDE_IMSCOVL);
+	writel(0, mcde->regs + MCDE_IMSCCHNL);
+
+	/* Clear any pending IRQs */
+	writel(0xFFFFFFFF, mcde->regs + MCDE_RISPP);
+	writel(0xFFFFFFFF, mcde->regs + MCDE_RISOVL);
+	writel(0xFFFFFFFF, mcde->regs + MCDE_RISCHNL);
+}
+
+static int mcde_display_check(struct drm_simple_display_pipe *pipe,
+			      struct drm_plane_state *pstate,
+			      struct drm_crtc_state *cstate)
+{
+	const struct drm_display_mode *mode = &cstate->mode;
+	struct drm_framebuffer *old_fb = pipe->plane.state->fb;
+	struct drm_framebuffer *fb = pstate->fb;
+
+	if (fb) {
+		u32 offset = drm_fb_cma_get_gem_addr(fb, pstate, 0);
+
+		/* FB base address must be dword aligned. */
+		if (offset & 3) {
+			DRM_DEBUG_KMS("FB not 32-bit aligned\n");
+			return -EINVAL;
+		}
+
+		/*
+		 * There's no pitch register, the mode's hdisplay
+		 * controls this.
+		 */
+		if (fb->pitches[0] != mode->hdisplay * fb->format->cpp[0]) {
+			DRM_DEBUG_KMS("can't handle pitches\n");
+			return -EINVAL;
+		}
+
+		/*
+		 * We can't change the FB format in a flicker-free
+		 * manner (and only update it during CRTC enable).
+		 */
+		if (old_fb && old_fb->format != fb->format)
+			cstate->mode_changed = true;
+	}
+
+	return 0;
+}
+
+static int mcde_configure_extsrc(struct mcde *mcde, enum mcde_extsrc src,
+				 u32 format)
+{
+	u32 val;
+	u32 conf;
+	u32 cr;
+
+	switch (src) {
+	case MCDE_EXTSRC_0:
+		conf = MCDE_EXTSRC0CONF;
+		cr = MCDE_EXTSRC0CR;
+		break;
+	case MCDE_EXTSRC_1:
+		conf = MCDE_EXTSRC1CONF;
+		cr = MCDE_EXTSRC1CR;
+		break;
+	case MCDE_EXTSRC_2:
+		conf = MCDE_EXTSRC2CONF;
+		cr = MCDE_EXTSRC2CR;
+		break;
+	case MCDE_EXTSRC_3:
+		conf = MCDE_EXTSRC3CONF;
+		cr = MCDE_EXTSRC3CR;
+		break;
+	case MCDE_EXTSRC_4:
+		conf = MCDE_EXTSRC4CONF;
+		cr = MCDE_EXTSRC4CR;
+		break;
+	case MCDE_EXTSRC_5:
+		conf = MCDE_EXTSRC5CONF;
+		cr = MCDE_EXTSRC5CR;
+		break;
+	case MCDE_EXTSRC_6:
+		conf = MCDE_EXTSRC6CONF;
+		cr = MCDE_EXTSRC6CR;
+		break;
+	case MCDE_EXTSRC_7:
+		conf = MCDE_EXTSRC7CONF;
+		cr = MCDE_EXTSRC7CR;
+		break;
+	case MCDE_EXTSRC_8:
+		conf = MCDE_EXTSRC8CONF;
+		cr = MCDE_EXTSRC8CR;
+		break;
+	case MCDE_EXTSRC_9:
+		conf = MCDE_EXTSRC9CONF;
+		cr = MCDE_EXTSRC9CR;
+		break;
+	}
+
+	/*
+	 * Configure external source 0 one buffer (buffer 0)
+	 * primary overlay ID 0.
+	 * From mcde_hw.c ovly_update_registers() in the vendor tree
+	 */
+	val = 0 << MCDE_EXTSRCXCONF_BUF_ID_SHIFT;
+	val |= 1 << MCDE_EXTSRCXCONF_BUF_NB_SHIFT;
+	val |= 0 << MCDE_EXTSRCXCONF_PRI_OVLID_SHIFT;
+	/*
+	 * MCDE has inverse semantics from DRM on RBG/BGR which is why
+	 * all the modes are inversed here.
+	 */
+	switch (format) {
+	case DRM_FORMAT_ARGB8888:
+		val |= MCDE_EXTSRCXCONF_BPP_ARGB8888 <<
+			MCDE_EXTSRCXCONF_BPP_SHIFT;
+		val |= MCDE_EXTSRCXCONF_BGR;
+		break;
+	case DRM_FORMAT_ABGR8888:
+		val |= MCDE_EXTSRCXCONF_BPP_ARGB8888 <<
+			MCDE_EXTSRCXCONF_BPP_SHIFT;
+		break;
+	case DRM_FORMAT_XRGB8888:
+		val |= MCDE_EXTSRCXCONF_BPP_XRGB8888 <<
+			MCDE_EXTSRCXCONF_BPP_SHIFT;
+		val |= MCDE_EXTSRCXCONF_BGR;
+		break;
+	case DRM_FORMAT_XBGR8888:
+		val |= MCDE_EXTSRCXCONF_BPP_XRGB8888 <<
+			MCDE_EXTSRCXCONF_BPP_SHIFT;
+		break;
+	case DRM_FORMAT_RGB888:
+		val |= MCDE_EXTSRCXCONF_BPP_RGB888 <<
+			MCDE_EXTSRCXCONF_BPP_SHIFT;
+		val |= MCDE_EXTSRCXCONF_BGR;
+		break;
+	case DRM_FORMAT_BGR888:
+		val |= MCDE_EXTSRCXCONF_BPP_RGB888 <<
+			MCDE_EXTSRCXCONF_BPP_SHIFT;
+		break;
+	case DRM_FORMAT_ARGB4444:
+		val |= MCDE_EXTSRCXCONF_BPP_ARGB4444 <<
+			MCDE_EXTSRCXCONF_BPP_SHIFT;
+		val |= MCDE_EXTSRCXCONF_BGR;
+		break;
+	case DRM_FORMAT_ABGR4444:
+		val |= MCDE_EXTSRCXCONF_BPP_ARGB4444 <<
+			MCDE_EXTSRCXCONF_BPP_SHIFT;
+		break;
+	case DRM_FORMAT_XRGB4444:
+		val |= MCDE_EXTSRCXCONF_BPP_RGB444 <<
+			MCDE_EXTSRCXCONF_BPP_SHIFT;
+		val |= MCDE_EXTSRCXCONF_BGR;
+		break;
+	case DRM_FORMAT_XBGR4444:
+		val |= MCDE_EXTSRCXCONF_BPP_RGB444 <<
+			MCDE_EXTSRCXCONF_BPP_SHIFT;
+		break;
+	case DRM_FORMAT_XRGB1555:
+		val |= MCDE_EXTSRCXCONF_BPP_IRGB1555 <<
+			MCDE_EXTSRCXCONF_BPP_SHIFT;
+		val |= MCDE_EXTSRCXCONF_BGR;
+		break;
+	case DRM_FORMAT_XBGR1555:
+		val |= MCDE_EXTSRCXCONF_BPP_IRGB1555 <<
+			MCDE_EXTSRCXCONF_BPP_SHIFT;
+		break;
+	case DRM_FORMAT_RGB565:
+		val |= MCDE_EXTSRCXCONF_BPP_RGB565 <<
+			MCDE_EXTSRCXCONF_BPP_SHIFT;
+		val |= MCDE_EXTSRCXCONF_BGR;
+		break;
+	case DRM_FORMAT_BGR565:
+		val |= MCDE_EXTSRCXCONF_BPP_RGB565 <<
+			MCDE_EXTSRCXCONF_BPP_SHIFT;
+		break;
+	case DRM_FORMAT_YUV422:
+		val |= MCDE_EXTSRCXCONF_BPP_YCBCR422 <<
+			MCDE_EXTSRCXCONF_BPP_SHIFT;
+		break;
+	default:
+		dev_err(mcde->dev, "Unknown pixel format 0x%08x\n",
+			format);
+		return -EINVAL;
+	}
+	writel(val, mcde->regs + conf);
+
+	/* Software select, primary */
+	val = MCDE_EXTSRCXCR_SEL_MOD_SOFTWARE_SEL;
+	val |= MCDE_EXTSRCXCR_MULTIOVL_CTRL_PRIMARY;
+	writel(val, mcde->regs + cr);
+
+	return 0;
+}
+
+static void mcde_configure_overlay(struct mcde *mcde, enum mcde_overlay ovl,
+				   enum mcde_extsrc src,
+				   enum mcde_channel ch,
+				   const struct drm_display_mode *mode,
+				   u32 format)
+{
+	u32 val;
+	u32 conf1;
+	u32 conf2;
+	u32 crop;
+	u32 ljinc;
+	u32 cr;
+	u32 comp;
+
+	switch (ovl) {
+	case MCDE_OVERLAY_0:
+		conf1 = MCDE_OVL0CONF;
+		conf2 = MCDE_OVL0CONF2;
+		crop = MCDE_OVL0CROP;
+		ljinc = MCDE_OVL0LJINC;
+		cr = MCDE_OVL0CR;
+		comp = MCDE_OVL0COMP;
+		break;
+	case MCDE_OVERLAY_1:
+		conf1 = MCDE_OVL1CONF;
+		conf2 = MCDE_OVL1CONF2;
+		crop = MCDE_OVL1CROP;
+		ljinc = MCDE_OVL1LJINC;
+		cr = MCDE_OVL1CR;
+		comp = MCDE_OVL1COMP;
+		break;
+	case MCDE_OVERLAY_2:
+		conf1 = MCDE_OVL2CONF;
+		conf2 = MCDE_OVL2CONF2;
+		crop = MCDE_OVL2CROP;
+		ljinc = MCDE_OVL2LJINC;
+		cr = MCDE_OVL2CR;
+		comp = MCDE_OVL2COMP;
+		break;
+	case MCDE_OVERLAY_3:
+		conf1 = MCDE_OVL3CONF;
+		conf2 = MCDE_OVL3CONF2;
+		crop = MCDE_OVL3CROP;
+		ljinc = MCDE_OVL3LJINC;
+		cr = MCDE_OVL3CR;
+		comp = MCDE_OVL3COMP;
+		break;
+	case MCDE_OVERLAY_4:
+		conf1 = MCDE_OVL4CONF;
+		conf2 = MCDE_OVL4CONF2;
+		crop = MCDE_OVL4CROP;
+		ljinc = MCDE_OVL4LJINC;
+		cr = MCDE_OVL4CR;
+		comp = MCDE_OVL4COMP;
+		break;
+	case MCDE_OVERLAY_5:
+		conf1 = MCDE_OVL5CONF;
+		conf2 = MCDE_OVL5CONF2;
+		crop = MCDE_OVL5CROP;
+		ljinc = MCDE_OVL5LJINC;
+		cr = MCDE_OVL5CR;
+		comp = MCDE_OVL5COMP;
+		break;
+	}
+
+	val = mode->hdisplay << MCDE_OVLXCONF_PPL_SHIFT;
+	val |= mode->vdisplay << MCDE_OVLXCONF_LPF_SHIFT;
+	/* Use external source 0 that we just configured */
+	val |= src << MCDE_OVLXCONF_EXTSRC_ID_SHIFT;
+	writel(val, mcde->regs + conf1);
+
+	val = MCDE_OVLXCONF2_BP_PER_PIXEL_ALPHA;
+	val |= 0xff << MCDE_OVLXCONF2_ALPHAVALUE_SHIFT;
+	/* OPQ: overlay is opaque */
+	switch (format) {
+	case DRM_FORMAT_ARGB8888:
+	case DRM_FORMAT_ABGR8888:
+	case DRM_FORMAT_ARGB4444:
+	case DRM_FORMAT_ABGR4444:
+	case DRM_FORMAT_XRGB1555:
+	case DRM_FORMAT_XBGR1555:
+		/* No OPQ */
+		break;
+	case DRM_FORMAT_XRGB8888:
+	case DRM_FORMAT_XBGR8888:
+	case DRM_FORMAT_RGB888:
+	case DRM_FORMAT_BGR888:
+	case DRM_FORMAT_RGB565:
+	case DRM_FORMAT_BGR565:
+	case DRM_FORMAT_YUV422:
+		val |= MCDE_OVLXCONF2_OPQ;
+		break;
+	default:
+		dev_err(mcde->dev, "Unknown pixel format 0x%08x\n",
+			format);
+		break;
+	}
+	/* The default watermark level for overlay 0 is 48 */
+	val |= 48 << MCDE_OVLXCONF2_PIXELFETCHERWATERMARKLEVEL_SHIFT;
+	writel(val, mcde->regs + conf2);
+
+	/* Number of bytes to fetch per line */
+	writel(mcde->stride, mcde->regs + ljinc);
+	/* No cropping */
+	writel(0, mcde->regs + crop);
+
+	/* Set up overlay control register */
+	val = MCDE_OVLXCR_OVLEN;
+	val |= MCDE_OVLXCR_COLCCTRL_DISABLED;
+	val |= MCDE_OVLXCR_BURSTSIZE_8W <<
+		MCDE_OVLXCR_BURSTSIZE_SHIFT;
+	val |= MCDE_OVLXCR_MAXOUTSTANDING_8_REQ <<
+		MCDE_OVLXCR_MAXOUTSTANDING_SHIFT;
+	/* Not using rotation but set it up anyways */
+	val |= MCDE_OVLXCR_ROTBURSTSIZE_8W <<
+		MCDE_OVLXCR_ROTBURSTSIZE_SHIFT;
+	writel(val, mcde->regs + cr);
+
+	/*
+	 * Set up the overlay compositor to route the overlay out to
+	 * the desired channel
+	 */
+	val = ch << MCDE_OVLXCOMP_CH_ID_SHIFT;
+	writel(val, mcde->regs + comp);
+}
+
+static void mcde_configure_channel(struct mcde *mcde, enum mcde_channel ch,
+				   enum mcde_fifo fifo,
+				   const struct drm_display_mode *mode)
+{
+	u32 val;
+	u32 conf;
+	u32 sync;
+	u32 stat;
+	u32 bgcol;
+	u32 mux;
+
+	switch (ch) {
+	case MCDE_CHANNEL_0:
+		conf = MCDE_CHNL0CONF;
+		sync = MCDE_CHNL0SYNCHMOD;
+		stat = MCDE_CHNL0STAT;
+		bgcol = MCDE_CHNL0BCKGNDCOL;
+		mux = MCDE_CHNL0MUXING;
+		break;
+	case MCDE_CHANNEL_1:
+		conf = MCDE_CHNL1CONF;
+		sync = MCDE_CHNL1SYNCHMOD;
+		stat = MCDE_CHNL1STAT;
+		bgcol = MCDE_CHNL1BCKGNDCOL;
+		mux = MCDE_CHNL1MUXING;
+		break;
+	case MCDE_CHANNEL_2:
+		conf = MCDE_CHNL2CONF;
+		sync = MCDE_CHNL2SYNCHMOD;
+		stat = MCDE_CHNL2STAT;
+		bgcol = MCDE_CHNL2BCKGNDCOL;
+		mux = MCDE_CHNL2MUXING;
+		break;
+	case MCDE_CHANNEL_3:
+		conf = MCDE_CHNL3CONF;
+		sync = MCDE_CHNL3SYNCHMOD;
+		stat = MCDE_CHNL3STAT;
+		bgcol = MCDE_CHNL3BCKGNDCOL;
+		mux = MCDE_CHNL3MUXING;
+		return;
+	}
+
+	/* Set up channel 0 sync (based on chnl_update_registers()) */
+	if (mcde->te_sync) {
+		/*
+		 * Turn on hardware TE0 synchronization
+		 */
+		val = MCDE_CHNLXSYNCHMOD_SRC_SYNCH_HARDWARE
+			<< MCDE_CHNLXSYNCHMOD_SRC_SYNCH_SHIFT;
+		val |= MCDE_CHNLXSYNCHMOD_OUT_SYNCH_SRC_TE0
+			<< MCDE_CHNLXSYNCHMOD_OUT_SYNCH_SRC_SHIFT;
+	} else {
+		/*
+		 * Set up sync source to software, out sync formatter
+		 * Code mostly from mcde_hw.c chnl_update_registers()
+		 */
+		val = MCDE_CHNLXSYNCHMOD_SRC_SYNCH_SOFTWARE
+			<< MCDE_CHNLXSYNCHMOD_SRC_SYNCH_SHIFT;
+		val |= MCDE_CHNLXSYNCHMOD_OUT_SYNCH_SRC_FORMATTER
+			<< MCDE_CHNLXSYNCHMOD_OUT_SYNCH_SRC_SHIFT;
+	}
+	writel(val, mcde->regs + sync);
+
+	/* Set up pixels per line and lines per frame */
+	val = (mode->hdisplay - 1) << MCDE_CHNLXCONF_PPL_SHIFT;
+	val |= (mode->vdisplay - 1) << MCDE_CHNLXCONF_LPF_SHIFT;
+	writel(val, mcde->regs + conf);
+
+	/*
+	 * Normalize color conversion:
+	 * black background, OLED conversion disable on channel
+	 */
+	val = MCDE_CHNLXSTAT_CHNLBLBCKGND_EN |
+		MCDE_CHNLXSTAT_CHNLRD;
+	writel(val, mcde->regs + stat);
+	writel(0, mcde->regs + bgcol);
+
+	/* Set up muxing: connect the channel to the desired FIFO */
+	switch (fifo) {
+	case MCDE_FIFO_A:
+		writel(MCDE_CHNLXMUXING_FIFO_ID_FIFO_A,
+		       mcde->regs + mux);
+		break;
+	case MCDE_FIFO_B:
+		writel(MCDE_CHNLXMUXING_FIFO_ID_FIFO_B,
+		       mcde->regs + mux);
+		break;
+	}
+}
+
+static void mcde_configure_fifo(struct mcde *mcde, enum mcde_fifo fifo,
+				enum mcde_dsi_formatter fmt,
+				int fifo_wtrmrk)
+{
+	u32 val;
+	u32 ctrl;
+	u32 cr0, cr1;
+
+	switch (fifo) {
+	case MCDE_FIFO_A:
+		ctrl = MCDE_CTRLA;
+		cr0 = MCDE_CRA0;
+		cr1 = MCDE_CRA1;
+		break;
+	case MCDE_FIFO_B:
+		ctrl = MCDE_CTRLB;
+		cr0 = MCDE_CRB0;
+		cr1 = MCDE_CRB1;
+		break;
+	}
+
+	val = fifo_wtrmrk << MCDE_CTRLX_FIFOWTRMRK_SHIFT;
+	/* We only support DSI formatting for now */
+	val |= MCDE_CTRLX_FORMTYPE_DSI <<
+		MCDE_CTRLX_FORMTYPE_SHIFT;
+
+	/* Select the formatter to use for this FIFO */
+	val |= fmt << MCDE_CTRLX_FORMID_SHIFT;
+	writel(val, mcde->regs + ctrl);
+
+	/* Blend source with Alpha 0xff on FIFO */
+	val = MCDE_CRX0_BLENDEN |
+		0xff << MCDE_CRX0_ALPHABLEND_SHIFT;
+	writel(val, mcde->regs + cr0);
+
+	/* Set-up from mcde_fmtr_dsi.c, fmtr_dsi_enable_video() */
+
+	/* Use the MCDE clock for this FIFO */
+	val = MCDE_CRX1_CLKSEL_MCDECLK << MCDE_CRX1_CLKSEL_SHIFT;
+
+	/* TODO: when adding DPI support add OUTBPP etc here */
+	writel(val, mcde->regs + cr1);
+};
+
+static void mcde_configure_dsi_formatter(struct mcde *mcde,
+					 enum mcde_dsi_formatter fmt,
+					 u32 formatter_frame,
+					 int pkt_size)
+{
+	u32 val;
+	u32 conf0;
+	u32 frame;
+	u32 pkt;
+	u32 sync;
+	u32 cmdw;
+	u32 delay0, delay1;
+
+	switch (fmt) {
+	case MCDE_DSI_FORMATTER_0:
+		conf0 = MCDE_DSIVID0CONF0;
+		frame = MCDE_DSIVID0FRAME;
+		pkt = MCDE_DSIVID0PKT;
+		sync = MCDE_DSIVID0SYNC;
+		cmdw = MCDE_DSIVID0CMDW;
+		delay0 = MCDE_DSIVID0DELAY0;
+		delay1 = MCDE_DSIVID0DELAY1;
+		break;
+	case MCDE_DSI_FORMATTER_1:
+		conf0 = MCDE_DSIVID1CONF0;
+		frame = MCDE_DSIVID1FRAME;
+		pkt = MCDE_DSIVID1PKT;
+		sync = MCDE_DSIVID1SYNC;
+		cmdw = MCDE_DSIVID1CMDW;
+		delay0 = MCDE_DSIVID1DELAY0;
+		delay1 = MCDE_DSIVID1DELAY1;
+		break;
+	case MCDE_DSI_FORMATTER_2:
+		conf0 = MCDE_DSIVID2CONF0;
+		frame = MCDE_DSIVID2FRAME;
+		pkt = MCDE_DSIVID2PKT;
+		sync = MCDE_DSIVID2SYNC;
+		cmdw = MCDE_DSIVID2CMDW;
+		delay0 = MCDE_DSIVID2DELAY0;
+		delay1 = MCDE_DSIVID2DELAY1;
+		break;
+	}
+
+	/*
+	 * Enable formatter
+	 * 8 bit commands and DCS commands (notgen = not generic)
+	 */
+	val = MCDE_DSICONF0_CMD8 | MCDE_DSICONF0_DCSVID_NOTGEN;
+	if (mcde->mdsi->mode_flags & MIPI_DSI_MODE_VIDEO)
+		val |= MCDE_DSICONF0_VID_MODE_VID;
+	switch (mcde->mdsi->format) {
+	case MIPI_DSI_FMT_RGB888:
+		val |= MCDE_DSICONF0_PACKING_RGB888 <<
+			MCDE_DSICONF0_PACKING_SHIFT;
+		break;
+	case MIPI_DSI_FMT_RGB666:
+		val |= MCDE_DSICONF0_PACKING_RGB666 <<
+			MCDE_DSICONF0_PACKING_SHIFT;
+		break;
+	case MIPI_DSI_FMT_RGB666_PACKED:
+		val |= MCDE_DSICONF0_PACKING_RGB666_PACKED <<
+			MCDE_DSICONF0_PACKING_SHIFT;
+		break;
+	case MIPI_DSI_FMT_RGB565:
+		val |= MCDE_DSICONF0_PACKING_RGB565 <<
+			MCDE_DSICONF0_PACKING_SHIFT;
+		break;
+	default:
+		dev_err(mcde->dev, "unknown DSI format\n");
+		return;
+	}
+	writel(val, mcde->regs + conf0);
+
+	writel(formatter_frame, mcde->regs + frame);
+	writel(pkt_size, mcde->regs + pkt);
+	writel(0, mcde->regs + sync);
+	/* Define the MIPI command: we want to write into display memory */
+	val = MIPI_DCS_WRITE_MEMORY_CONTINUE <<
+		MCDE_DSIVIDXCMDW_CMDW_CONTINUE_SHIFT;
+	val |= MIPI_DCS_WRITE_MEMORY_START <<
+		MCDE_DSIVIDXCMDW_CMDW_START_SHIFT;
+	writel(val, mcde->regs + cmdw);
+
+	/*
+	 * FIXME: the vendor driver has some hack around this value in
+	 * CMD mode with autotrig.
+	 */
+	writel(0, mcde->regs + delay0);
+	writel(0, mcde->regs + delay1);
+}
+
+static void mcde_enable_fifo(struct mcde *mcde, enum mcde_fifo fifo)
+{
+	u32 val;
+	u32 cr;
+
+	switch (fifo) {
+	case MCDE_FIFO_A:
+		cr = MCDE_CRA0;
+		break;
+	case MCDE_FIFO_B:
+		cr = MCDE_CRB0;
+		break;
+	default:
+		dev_err(mcde->dev, "cannot enable FIFO %c\n",
+			'A' + fifo);
+		return;
+	}
+
+	spin_lock(&mcde->flow_lock);
+	val = readl(mcde->regs + cr);
+	val |= MCDE_CRX0_FLOEN;
+	writel(val, mcde->regs + cr);
+	mcde->flow_active++;
+	spin_unlock(&mcde->flow_lock);
+}
+
+static void mcde_disable_fifo(struct mcde *mcde, enum mcde_fifo fifo,
+			      bool wait_for_drain)
+{
+	int timeout = 100;
+	u32 val;
+	u32 cr;
+
+	switch (fifo) {
+	case MCDE_FIFO_A:
+		cr = MCDE_CRA0;
+		break;
+	case MCDE_FIFO_B:
+		cr = MCDE_CRB0;
+		break;
+	default:
+		dev_err(mcde->dev, "cannot disable FIFO %c\n",
+			'A' + fifo);
+		return;
+	}
+
+	spin_lock(&mcde->flow_lock);
+	val = readl(mcde->regs + cr);
+	val &= ~MCDE_CRX0_FLOEN;
+	writel(val, mcde->regs + cr);
+	mcde->flow_active = 0;
+	spin_unlock(&mcde->flow_lock);
+
+	if (!wait_for_drain)
+		return;
+
+	/* Check that we really drained and stopped the flow */
+	while (readl(mcde->regs + cr) & MCDE_CRX0_FLOEN) {
+		usleep_range(1000, 1500);
+		if (!--timeout) {
+			dev_err(mcde->dev,
+				"FIFO timeout while clearing FIFO %c\n",
+				'A' + fifo);
+			return;
+		}
+	}
+}
+
+/*
+ * This drains a pipe i.e. a FIFO connected to a certain channel
+ */
+static void mcde_drain_pipe(struct mcde *mcde, enum mcde_fifo fifo,
+			    enum mcde_channel ch)
+{
+	u32 val;
+	u32 ctrl;
+	u32 synsw;
+
+	switch (fifo) {
+	case MCDE_FIFO_A:
+		ctrl = MCDE_CTRLA;
+		break;
+	case MCDE_FIFO_B:
+		ctrl = MCDE_CTRLB;
+		break;
+	}
+
+	switch (ch) {
+	case MCDE_CHANNEL_0:
+		synsw = MCDE_CHNL0SYNCHSW;
+		break;
+	case MCDE_CHANNEL_1:
+		synsw = MCDE_CHNL1SYNCHSW;
+		break;
+	case MCDE_CHANNEL_2:
+		synsw = MCDE_CHNL2SYNCHSW;
+		break;
+	case MCDE_CHANNEL_3:
+		synsw = MCDE_CHNL3SYNCHSW;
+		return;
+	}
+
+	val = readl(mcde->regs + ctrl);
+	if (!(val & MCDE_CTRLX_FIFOEMPTY)) {
+		dev_err(mcde->dev, "Channel A FIFO not empty (handover)\n");
+		/* Attempt to clear the FIFO */
+		mcde_enable_fifo(mcde, fifo);
+		/* Trigger a software sync out on respective channel (0-3) */
+		writel(MCDE_CHNLXSYNCHSW_SW_TRIG, mcde->regs + synsw);
+		/* Disable FIFO A flow again */
+		mcde_disable_fifo(mcde, fifo, true);
+	}
+}
+
+static int mcde_dsi_get_pkt_div(int ppl, int fifo_size)
+{
+	/*
+	 * DSI command mode line packets should be split into an even number of
+	 * packets smaller than or equal to the fifo size.
+	 */
+	int div;
+	const int max_div = DIV_ROUND_UP(MCDE_MAX_WIDTH, fifo_size);
+
+	for (div = 1; div < max_div; div++)
+		if (ppl % div == 0 && ppl / div <= fifo_size)
+			return div;
+	return 1;
+}
+
+static void mcde_display_enable(struct drm_simple_display_pipe *pipe,
+				struct drm_crtc_state *cstate,
+				struct drm_plane_state *plane_state)
+{
+	struct drm_crtc *crtc = &pipe->crtc;
+	struct drm_plane *plane = &pipe->plane;
+	struct drm_device *drm = crtc->dev;
+	struct mcde *mcde = drm->dev_private;
+	const struct drm_display_mode *mode = &cstate->mode;
+	struct drm_framebuffer *fb = plane->state->fb;
+	u32 format = fb->format->format;
+	u32 formatter_ppl = mode->hdisplay; /* pixels per line */
+	u32 formatter_lpf = mode->vdisplay; /* lines per frame */
+	int pkt_size, fifo_wtrmrk;
+	int cpp = fb->format->cpp[0];
+	int formatter_cpp;
+	struct drm_format_name_buf tmp;
+	u32 formatter_frame;
+	u32 pkt_div;
+	u32 val;
+
+	dev_info(drm->dev, "enable MCDE, %d x %d format %s\n",
+		 mode->hdisplay, mode->vdisplay,
+		 drm_get_format_name(format, &tmp));
+	if (!mcde->mdsi) {
+		/* TODO: deal with this for non-DSI output */
+		dev_err(drm->dev, "no DSI master attached!\n");
+		return;
+	}
+
+	dev_info(drm->dev, "output in %s mode, format %dbpp\n",
+		 (mcde->mdsi->mode_flags & MIPI_DSI_MODE_VIDEO) ?
+		 "VIDEO" : "CMD",
+		 mipi_dsi_pixel_format_to_bpp(mcde->mdsi->format));
+	formatter_cpp =
+		mipi_dsi_pixel_format_to_bpp(mcde->mdsi->format) / 8;
+	dev_info(drm->dev, "overlay CPP %d bytes, DSI CPP %d bytes\n",
+		 cpp,
+		 formatter_cpp);
+
+	/* Calculations from mcde_fmtr_dsi.c, fmtr_dsi_enable_video() */
+
+	/*
+	 * Set up FIFO A watermark level:
+	 * 128 for LCD 32bpp video mode
+	 * 48  for LCD 32bpp command mode
+	 * 128 for LCD 16bpp video mode
+	 * 64  for LCD 16bpp command mode
+	 * 128 for HDMI 32bpp
+	 * 192 for HDMI 16bpp
+	 */
+	fifo_wtrmrk = mode->hdisplay;
+	if (mcde->mdsi->mode_flags & MIPI_DSI_MODE_VIDEO) {
+		fifo_wtrmrk = min(fifo_wtrmrk, 128);
+		pkt_div = 1;
+	} else {
+		fifo_wtrmrk = min(fifo_wtrmrk, 48);
+		/* The FIFO is 640 entries deep on this v3 hardware */
+		pkt_div = mcde_dsi_get_pkt_div(mode->hdisplay, 640);
+	}
+	dev_dbg(drm->dev, "FIFO watermark after flooring: %d bytes\n",
+		fifo_wtrmrk);
+	dev_dbg(drm->dev, "Packet divisor: %d bytes\n", pkt_div);
+
+	/* NOTE: pkt_div is 1 for video mode */
+	pkt_size = (formatter_ppl * formatter_cpp) / pkt_div;
+	/* Commands CMD8 need one extra byte */
+	if (!(mcde->mdsi->mode_flags & MIPI_DSI_MODE_VIDEO))
+		pkt_size++;
+
+	dev_dbg(drm->dev, "DSI packet size: %d * %d bytes per line\n",
+		pkt_size, pkt_div);
+	dev_dbg(drm->dev, "Overlay frame size: %u bytes\n",
+		mode->hdisplay * mode->vdisplay * cpp);
+	mcde->stride = mode->hdisplay * cpp;
+	dev_dbg(drm->dev, "Overlay line stride: %u bytes\n",
+		mcde->stride);
+	/* NOTE: pkt_div is 1 for video mode */
+	formatter_frame = pkt_size * pkt_div * formatter_lpf;
+	dev_dbg(drm->dev, "Formatter frame size: %u bytes\n", formatter_frame);
+
+	/* Drain the FIFO A + channel 0 pipe so we have a clean slate */
+	mcde_drain_pipe(mcde, MCDE_FIFO_A, MCDE_CHANNEL_0);
+
+	/*
+	 * We set up our display pipeline:
+	 * EXTSRC 0 -> OVERLAY 0 -> CHANNEL 0 -> FIFO A -> DSI FORMATTER 0
+	 *
+	 * First configure the external source (memory) on external source 0
+	 * using the desired bitstream/bitmap format
+	 */
+	mcde_configure_extsrc(mcde, MCDE_EXTSRC_0, format);
+
+	/*
+	 * Configure overlay 0 according to format and mode and take input
+	 * from external source 0 and route the output of this overlay to
+	 * channel 0
+	 */
+	mcde_configure_overlay(mcde, MCDE_OVERLAY_0, MCDE_EXTSRC_0,
+			       MCDE_CHANNEL_0, mode, format);
+
+	/*
+	 * Configure pixel-per-line and line-per-frame for channel 0 and then
+	 * route channel 0 to FIFO A
+	 */
+	mcde_configure_channel(mcde, MCDE_CHANNEL_0, MCDE_FIFO_A, mode);
+
+	/* Configure FIFO A to use DSI formatter 0 */
+	mcde_configure_fifo(mcde, MCDE_FIFO_A, MCDE_DSI_FORMATTER_0,
+			    fifo_wtrmrk);
+
+	/* Configure the DSI formatter 0 for the DSI panel output */
+	mcde_configure_dsi_formatter(mcde, MCDE_DSI_FORMATTER_0,
+				     formatter_frame, pkt_size);
+
+	if (mcde->te_sync) {
+		if (mode->flags & DRM_MODE_FLAG_NVSYNC)
+			val = MCDE_VSCRC_VSPOL;
+		else
+			val = 0;
+		writel(val, mcde->regs + MCDE_VSCRC0);
+		/* Enable VSYNC capture on TE0 */
+		val = readl(mcde->regs + MCDE_CRC);
+		val |= MCDE_CRC_SYCEN0;
+		writel(val, mcde->regs + MCDE_CRC);
+
+		drm_crtc_vblank_on(crtc);
+	}
+
+	dev_info(drm->dev, "MCDE display is enabled\n");
+}
+
+static void mcde_display_disable(struct drm_simple_display_pipe *pipe)
+{
+	struct drm_crtc *crtc = &pipe->crtc;
+	struct drm_device *drm = crtc->dev;
+	struct mcde *mcde = drm->dev_private;
+
+	if (mcde->te_sync)
+		drm_crtc_vblank_off(crtc);
+
+	/* Disable FIFO A flow */
+	mcde_disable_fifo(mcde, MCDE_FIFO_A, true);
+
+	dev_info(drm->dev, "MCDE display is disabled\n");
+}
+
+static void mcde_display_send_one_frame(struct mcde *mcde)
+{
+	/* Request a TE ACK */
+	if (mcde->te_sync)
+		mcde_dsi_te_request(mcde->mdsi);
+
+	/* Enable FIFO A flow */
+	mcde_enable_fifo(mcde, MCDE_FIFO_A);
+
+	if (mcde->te_sync) {
+		/*
+		 * If oneshot mode is enabled, the flow will be disabled
+		 * when the TE0 IRQ arrives in the interrupt handler. Otherwise
+		 * updates are continuously streamed to the display after this
+		 * point.
+		 */
+		dev_dbg(mcde->dev, "sent TE0 framebuffer update\n");
+		return;
+	}
+
+	/* Trigger a software sync out on channel 0 */
+	writel(MCDE_CHNLXSYNCHSW_SW_TRIG,
+	       mcde->regs + MCDE_CHNL0SYNCHSW);
+
+	/*
+	 * Disable FIFO A flow again: since we are using TE sync we
+	 * need to wait for the FIFO to drain before we continue
+	 * so repeated calls to this function will not cause a mess
+	 * in the hardware by pushing updates will updates are going
+	 * on already.
+	 */
+	mcde_disable_fifo(mcde, MCDE_FIFO_A, true);
+
+	dev_dbg(mcde->dev, "sent SW framebuffer update\n");
+}
+
+static void mcde_set_extsrc(struct mcde *mcde, u32 buffer_address)
+{
+	/* Write bitmap base address to register */
+	writel(buffer_address, mcde->regs + MCDE_EXTSRCXA0);
+	/*
+	 * Base address for next line this is probably only used
+	 * in interlace modes.
+	 */
+	writel(buffer_address + mcde->stride, mcde->regs + MCDE_EXTSRCXA1);
+}
+
+static void mcde_display_update(struct drm_simple_display_pipe *pipe,
+				struct drm_plane_state *old_pstate)
+{
+	struct drm_crtc *crtc = &pipe->crtc;
+	struct drm_device *drm = crtc->dev;
+	struct mcde *mcde = drm->dev_private;
+	struct drm_pending_vblank_event *event = crtc->state->event;
+	struct drm_plane *plane = &pipe->plane;
+	struct drm_plane_state *pstate = plane->state;
+	struct drm_framebuffer *fb = pstate->fb;
+
+	/*
+	 * Handle any pending event first, we need to arm the vblank
+	 * interrupt before sending any update to the display so we don't
+	 * miss the interrupt.
+	 */
+	if (event) {
+		crtc->state->event = NULL;
+
+		spin_lock_irq(&crtc->dev->event_lock);
+		/*
+		 * Hardware must be on before we can arm any vblank event,
+		 * this is not a scanout controller where there is always
+		 * some periodic update going on, it is completely frozen
+		 * until we get an update. If MCDE output isn't yet enabled,
+		 * we just send a vblank dummy event back.
+		 */
+		if (crtc->state->active && drm_crtc_vblank_get(crtc) == 0) {
+			dev_dbg(mcde->dev, "arm vblank event\n");
+			drm_crtc_arm_vblank_event(crtc, event);
+		} else {
+			dev_dbg(mcde->dev, "insert fake vblank event\n");
+			drm_crtc_send_vblank_event(crtc, event);
+		}
+
+		spin_unlock_irq(&crtc->dev->event_lock);
+	}
+
+	/*
+	 * We do not start sending framebuffer updates before the
+	 * display is enabled. Update events will however be dispatched
+	 * from the DRM core before the display is enabled.
+	 */
+	if (fb) {
+		mcde_set_extsrc(mcde, drm_fb_cma_get_gem_addr(fb, pstate, 0));
+		/* Send a single frame using software sync */
+		mcde_display_send_one_frame(mcde);
+		dev_info_once(mcde->dev, "sent first display update\n");
+	} else {
+		/*
+		 * If an update is receieved before the MCDE is enabled
+		 * (before mcde_display_enable() is called) we can't really
+		 * do much with that buffer.
+		 */
+		dev_info(mcde->dev, "ignored a display update\n");
+	}
+}
+
+static int mcde_display_enable_vblank(struct drm_simple_display_pipe *pipe)
+{
+	struct drm_crtc *crtc = &pipe->crtc;
+	struct drm_device *drm = crtc->dev;
+	struct mcde *mcde = drm->dev_private;
+	u32 val;
+
+	/* Enable all VBLANK IRQs */
+	val = MCDE_PP_VCMPA |
+		MCDE_PP_VCMPB |
+		MCDE_PP_VSCC0 |
+		MCDE_PP_VSCC1 |
+		MCDE_PP_VCMPC0 |
+		MCDE_PP_VCMPC1;
+	writel(val, mcde->regs + MCDE_IMSCPP);
+
+	return 0;
+}
+
+static void mcde_display_disable_vblank(struct drm_simple_display_pipe *pipe)
+{
+	struct drm_crtc *crtc = &pipe->crtc;
+	struct drm_device *drm = crtc->dev;
+	struct mcde *mcde = drm->dev_private;
+
+	/* Disable all VBLANK IRQs */
+	writel(0, mcde->regs + MCDE_IMSCPP);
+	/* Clear any pending IRQs */
+	writel(0xFFFFFFFF, mcde->regs + MCDE_RISPP);
+}
+
+static struct drm_simple_display_pipe_funcs mcde_display_funcs = {
+	.check = mcde_display_check,
+	.enable = mcde_display_enable,
+	.disable = mcde_display_disable,
+	.update = mcde_display_update,
+	.prepare_fb = drm_gem_fb_simple_display_pipe_prepare_fb,
+};
+
+int mcde_display_init(struct drm_device *drm)
+{
+	struct mcde *mcde = drm->dev_private;
+	int ret;
+	static const u32 formats[] = {
+		DRM_FORMAT_ARGB8888,
+		DRM_FORMAT_ABGR8888,
+		DRM_FORMAT_XRGB8888,
+		DRM_FORMAT_XBGR8888,
+		DRM_FORMAT_RGB888,
+		DRM_FORMAT_BGR888,
+		DRM_FORMAT_ARGB4444,
+		DRM_FORMAT_ABGR4444,
+		DRM_FORMAT_XRGB4444,
+		DRM_FORMAT_XBGR4444,
+		/* These are actually IRGB1555 so intensity bit is lost */
+		DRM_FORMAT_XRGB1555,
+		DRM_FORMAT_XBGR1555,
+		DRM_FORMAT_RGB565,
+		DRM_FORMAT_BGR565,
+		DRM_FORMAT_YUV422,
+	};
+
+	/* Provide vblank only when we have TE enabled */
+	if (mcde->te_sync) {
+		mcde_display_funcs.enable_vblank = mcde_display_enable_vblank;
+		mcde_display_funcs.disable_vblank = mcde_display_disable_vblank;
+	}
+
+	ret = drm_simple_display_pipe_init(drm, &mcde->pipe,
+					   &mcde_display_funcs,
+					   formats, ARRAY_SIZE(formats),
+					   NULL,
+					   mcde->connector);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(mcde_display_init);
diff --git a/drivers/gpu/drm/mcde/mcde_display_regs.h b/drivers/gpu/drm/mcde/mcde_display_regs.h
new file mode 100644
index 000000000000..d3ac7ef5ff9a
--- /dev/null
+++ b/drivers/gpu/drm/mcde/mcde_display_regs.h
@@ -0,0 +1,518 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __DRM_MCDE_DISPLAY_REGS
+#define __DRM_MCDE_DISPLAY_REGS
+
+/* PP (pixel processor) interrupts */
+#define MCDE_IMSCPP 0x00000104
+#define MCDE_RISPP 0x00000114
+#define MCDE_MISPP 0x00000124
+#define MCDE_SISPP 0x00000134
+
+#define MCDE_PP_VCMPA BIT(0)
+#define MCDE_PP_VCMPB BIT(1)
+#define MCDE_PP_VSCC0 BIT(2)
+#define MCDE_PP_VSCC1 BIT(3)
+#define MCDE_PP_VCMPC0 BIT(4)
+#define MCDE_PP_VCMPC1 BIT(5)
+#define MCDE_PP_ROTFD_A BIT(6)
+#define MCDE_PP_ROTFD_B BIT(7)
+
+/* Overlay interrupts */
+#define MCDE_IMSCOVL 0x00000108
+#define MCDE_RISOVL 0x00000118
+#define MCDE_MISOVL 0x00000128
+#define MCDE_SISOVL 0x00000138
+
+/* Channel interrupts */
+#define MCDE_IMSCCHNL 0x0000010C
+#define MCDE_RISCHNL 0x0000011C
+#define MCDE_MISCHNL 0x0000012C
+#define MCDE_SISCHNL 0x0000013C
+
+/* X = 0..9 */
+#define MCDE_EXTSRCXA0 0x00000200
+#define MCDE_EXTSRCXA0_GROUPOFFSET 0x20
+#define MCDE_EXTSRCXA0_BASEADDRESS0_SHIFT 3
+#define MCDE_EXTSRCXA0_BASEADDRESS0_MASK 0xFFFFFFF8
+
+#define MCDE_EXTSRCXA1 0x00000204
+#define MCDE_EXTSRCXA1_GROUPOFFSET 0x20
+#define MCDE_EXTSRCXA1_BASEADDRESS1_SHIFT 3
+#define MCDE_EXTSRCXA1_BASEADDRESS1_MASK 0xFFFFFFF8
+
+/* External sources 0..9 */
+#define MCDE_EXTSRC0CONF 0x0000020C
+#define MCDE_EXTSRC1CONF 0x0000022C
+#define MCDE_EXTSRC2CONF 0x0000024C
+#define MCDE_EXTSRC3CONF 0x0000026C
+#define MCDE_EXTSRC4CONF 0x0000028C
+#define MCDE_EXTSRC5CONF 0x000002AC
+#define MCDE_EXTSRC6CONF 0x000002CC
+#define MCDE_EXTSRC7CONF 0x000002EC
+#define MCDE_EXTSRC8CONF 0x0000030C
+#define MCDE_EXTSRC9CONF 0x0000032C
+#define MCDE_EXTSRCXCONF_GROUPOFFSET 0x20
+#define MCDE_EXTSRCXCONF_BUF_ID_SHIFT 0
+#define MCDE_EXTSRCXCONF_BUF_ID_MASK 0x00000003
+#define MCDE_EXTSRCXCONF_BUF_NB_SHIFT 2
+#define MCDE_EXTSRCXCONF_BUF_NB_MASK 0x0000000C
+#define MCDE_EXTSRCXCONF_PRI_OVLID_SHIFT 4
+#define MCDE_EXTSRCXCONF_PRI_OVLID_MASK 0x000000F0
+#define MCDE_EXTSRCXCONF_BPP_SHIFT 8
+#define MCDE_EXTSRCXCONF_BPP_MASK 0x00000F00
+#define MCDE_EXTSRCXCONF_BPP_1BPP_PAL 0
+#define MCDE_EXTSRCXCONF_BPP_2BPP_PAL 1
+#define MCDE_EXTSRCXCONF_BPP_4BPP_PAL 2
+#define MCDE_EXTSRCXCONF_BPP_8BPP_PAL 3
+#define MCDE_EXTSRCXCONF_BPP_RGB444 4
+#define MCDE_EXTSRCXCONF_BPP_ARGB4444 5
+#define MCDE_EXTSRCXCONF_BPP_IRGB1555 6
+#define MCDE_EXTSRCXCONF_BPP_RGB565 7
+#define MCDE_EXTSRCXCONF_BPP_RGB888 8
+#define MCDE_EXTSRCXCONF_BPP_XRGB8888 9
+#define MCDE_EXTSRCXCONF_BPP_ARGB8888 10
+#define MCDE_EXTSRCXCONF_BPP_YCBCR422 11
+#define MCDE_EXTSRCXCONF_BGR BIT(12)
+#define MCDE_EXTSRCXCONF_BEBO BIT(13)
+#define MCDE_EXTSRCXCONF_BEPO BIT(14)
+#define MCDE_EXTSRCXCONF_TUNNELING_BUFFER_HEIGHT_SHIFT 16
+#define MCDE_EXTSRCXCONF_TUNNELING_BUFFER_HEIGHT_MASK 0x0FFF0000
+
+/* External sources 0..9 */
+#define MCDE_EXTSRC0CR 0x00000210
+#define MCDE_EXTSRC1CR 0x00000230
+#define MCDE_EXTSRC2CR 0x00000250
+#define MCDE_EXTSRC3CR 0x00000270
+#define MCDE_EXTSRC4CR 0x00000290
+#define MCDE_EXTSRC5CR 0x000002B0
+#define MCDE_EXTSRC6CR 0x000002D0
+#define MCDE_EXTSRC7CR 0x000002F0
+#define MCDE_EXTSRC8CR 0x00000310
+#define MCDE_EXTSRC9CR 0x00000330
+#define MCDE_EXTSRCXCR_SEL_MOD_SHIFT 0
+#define MCDE_EXTSRCXCR_SEL_MOD_MASK 0x00000003
+#define MCDE_EXTSRCXCR_SEL_MOD_EXTERNAL_SEL 0
+#define MCDE_EXTSRCXCR_SEL_MOD_AUTO_TOGGLE 1
+#define MCDE_EXTSRCXCR_SEL_MOD_SOFTWARE_SEL 2
+#define MCDE_EXTSRCXCR_MULTIOVL_CTRL_PRIMARY BIT(2) /* 0 = all */
+#define MCDE_EXTSRCXCR_FS_DIV_DISABLE BIT(3)
+#define MCDE_EXTSRCXCR_FORCE_FS_DIV BIT(4)
+
+/* Only external source 6 has a second address register */
+#define MCDE_EXTSRC6A2 0x000002C8
+
+/* 6 overlays */
+#define MCDE_OVL0CR 0x00000400
+#define MCDE_OVL1CR 0x00000420
+#define MCDE_OVL2CR 0x00000440
+#define MCDE_OVL3CR 0x00000460
+#define MCDE_OVL4CR 0x00000480
+#define MCDE_OVL5CR 0x000004A0
+#define MCDE_OVLXCR_OVLEN BIT(0)
+#define MCDE_OVLXCR_COLCCTRL_DISABLED 0
+#define MCDE_OVLXCR_COLCCTRL_ENABLED_NO_SAT (1 << 1)
+#define MCDE_OVLXCR_COLCCTRL_ENABLED_SAT (2 << 1)
+#define MCDE_OVLXCR_CKEYGEN BIT(3)
+#define MCDE_OVLXCR_ALPHAPMEN BIT(4)
+#define MCDE_OVLXCR_OVLF BIT(5)
+#define MCDE_OVLXCR_OVLR BIT(6)
+#define MCDE_OVLXCR_OVLB BIT(7)
+#define MCDE_OVLXCR_FETCH_ROPC_SHIFT 8
+#define MCDE_OVLXCR_FETCH_ROPC_MASK 0x0000FF00
+#define MCDE_OVLXCR_STBPRIO_SHIFT 16
+#define MCDE_OVLXCR_STBPRIO_MASK 0x000F0000
+#define MCDE_OVLXCR_BURSTSIZE_SHIFT 20
+#define MCDE_OVLXCR_BURSTSIZE_MASK 0x00F00000
+#define MCDE_OVLXCR_BURSTSIZE_1W 0
+#define MCDE_OVLXCR_BURSTSIZE_2W 1
+#define MCDE_OVLXCR_BURSTSIZE_4W 2
+#define MCDE_OVLXCR_BURSTSIZE_8W 3
+#define MCDE_OVLXCR_BURSTSIZE_16W 4
+#define MCDE_OVLXCR_BURSTSIZE_HW_1W 8
+#define MCDE_OVLXCR_BURSTSIZE_HW_2W 9
+#define MCDE_OVLXCR_BURSTSIZE_HW_4W 10
+#define MCDE_OVLXCR_BURSTSIZE_HW_8W 11
+#define MCDE_OVLXCR_BURSTSIZE_HW_16W 12
+#define MCDE_OVLXCR_MAXOUTSTANDING_SHIFT 24
+#define MCDE_OVLXCR_MAXOUTSTANDING_MASK 0x0F000000
+#define MCDE_OVLXCR_MAXOUTSTANDING_1_REQ 0
+#define MCDE_OVLXCR_MAXOUTSTANDING_2_REQ 1
+#define MCDE_OVLXCR_MAXOUTSTANDING_4_REQ 2
+#define MCDE_OVLXCR_MAXOUTSTANDING_8_REQ 3
+#define MCDE_OVLXCR_MAXOUTSTANDING_16_REQ 4
+#define MCDE_OVLXCR_ROTBURSTSIZE_SHIFT 28
+#define MCDE_OVLXCR_ROTBURSTSIZE_MASK 0xF0000000
+#define MCDE_OVLXCR_ROTBURSTSIZE_1W 0
+#define MCDE_OVLXCR_ROTBURSTSIZE_2W 1
+#define MCDE_OVLXCR_ROTBURSTSIZE_4W 2
+#define MCDE_OVLXCR_ROTBURSTSIZE_8W 3
+#define MCDE_OVLXCR_ROTBURSTSIZE_16W 4
+#define MCDE_OVLXCR_ROTBURSTSIZE_HW_1W 8
+#define MCDE_OVLXCR_ROTBURSTSIZE_HW_2W 9
+#define MCDE_OVLXCR_ROTBURSTSIZE_HW_4W 10
+#define MCDE_OVLXCR_ROTBURSTSIZE_HW_8W 11
+#define MCDE_OVLXCR_ROTBURSTSIZE_HW_16W 12
+
+#define MCDE_OVL0CONF 0x00000404
+#define MCDE_OVL1CONF 0x00000424
+#define MCDE_OVL2CONF 0x00000444
+#define MCDE_OVL3CONF 0x00000464
+#define MCDE_OVL4CONF 0x00000484
+#define MCDE_OVL5CONF 0x000004A4
+#define MCDE_OVLXCONF_PPL_SHIFT 0
+#define MCDE_OVLXCONF_PPL_MASK 0x000007FF
+#define MCDE_OVLXCONF_EXTSRC_ID_SHIFT 11
+#define MCDE_OVLXCONF_EXTSRC_ID_MASK 0x00007800
+#define MCDE_OVLXCONF_LPF_SHIFT 16
+#define MCDE_OVLXCONF_LPF_MASK 0x07FF0000
+
+#define MCDE_OVL0CONF2 0x00000408
+#define MCDE_OVL1CONF2 0x00000428
+#define MCDE_OVL2CONF2 0x00000448
+#define MCDE_OVL3CONF2 0x00000468
+#define MCDE_OVL4CONF2 0x00000488
+#define MCDE_OVL5CONF2 0x000004A8
+#define MCDE_OVLXCONF2_BP_PER_PIXEL_ALPHA 0
+#define MCDE_OVLXCONF2_BP_CONSTANT_ALPHA BIT(0)
+#define MCDE_OVLXCONF2_ALPHAVALUE_SHIFT 1
+#define MCDE_OVLXCONF2_ALPHAVALUE_MASK 0x000001FE
+#define MCDE_OVLXCONF2_OPQ BIT(9)
+#define MCDE_OVLXCONF2_PIXOFF_SHIFT 10
+#define MCDE_OVLXCONF2_PIXOFF_MASK 0x0000FC00
+#define MCDE_OVLXCONF2_PIXELFETCHERWATERMARKLEVEL_SHIFT 16
+#define MCDE_OVLXCONF2_PIXELFETCHERWATERMARKLEVEL_MASK 0x1FFF0000
+
+#define MCDE_OVL0LJINC 0x0000040C
+#define MCDE_OVL1LJINC 0x0000042C
+#define MCDE_OVL2LJINC 0x0000044C
+#define MCDE_OVL3LJINC 0x0000046C
+#define MCDE_OVL4LJINC 0x0000048C
+#define MCDE_OVL5LJINC 0x000004AC
+
+#define MCDE_OVL0CROP 0x00000410
+#define MCDE_OVL1CROP 0x00000430
+#define MCDE_OVL2CROP 0x00000450
+#define MCDE_OVL3CROP 0x00000470
+#define MCDE_OVL4CROP 0x00000490
+#define MCDE_OVL5CROP 0x000004B0
+#define MCDE_OVLXCROP_TMRGN_SHIFT 0
+#define MCDE_OVLXCROP_TMRGN_MASK 0x003FFFFF
+#define MCDE_OVLXCROP_LMRGN_SHIFT 22
+#define MCDE_OVLXCROP_LMRGN_MASK 0xFFC00000
+
+#define MCDE_OVL0COMP 0x00000414
+#define MCDE_OVL1COMP 0x00000434
+#define MCDE_OVL2COMP 0x00000454
+#define MCDE_OVL3COMP 0x00000474
+#define MCDE_OVL4COMP 0x00000494
+#define MCDE_OVL5COMP 0x000004B4
+#define MCDE_OVLXCOMP_XPOS_SHIFT 0
+#define MCDE_OVLXCOMP_XPOS_MASK 0x000007FF
+#define MCDE_OVLXCOMP_CH_ID_SHIFT 11
+#define MCDE_OVLXCOMP_CH_ID_MASK 0x00007800
+#define MCDE_OVLXCOMP_YPOS_SHIFT 16
+#define MCDE_OVLXCOMP_YPOS_MASK 0x07FF0000
+#define MCDE_OVLXCOMP_Z_SHIFT 27
+#define MCDE_OVLXCOMP_Z_MASK 0x78000000
+
+#define MCDE_CRC 0x00000C00
+#define MCDE_CRC_C1EN BIT(2)
+#define MCDE_CRC_C2EN BIT(3)
+#define MCDE_CRC_SYCEN0 BIT(7)
+#define MCDE_CRC_SYCEN1 BIT(8)
+#define MCDE_CRC_SIZE1 BIT(9)
+#define MCDE_CRC_SIZE2 BIT(10)
+#define MCDE_CRC_YUVCONVC1EN BIT(15)
+#define MCDE_CRC_CS1EN BIT(16)
+#define MCDE_CRC_CS2EN BIT(17)
+#define MCDE_CRC_CS1POL BIT(19)
+#define MCDE_CRC_CS2POL BIT(20)
+#define MCDE_CRC_CD1POL BIT(21)
+#define MCDE_CRC_CD2POL BIT(22)
+#define MCDE_CRC_WR1POL BIT(23)
+#define MCDE_CRC_WR2POL BIT(24)
+#define MCDE_CRC_RD1POL BIT(25)
+#define MCDE_CRC_RD2POL BIT(26)
+#define MCDE_CRC_SYNCCTRL_SHIFT 29
+#define MCDE_CRC_SYNCCTRL_MASK 0x60000000
+#define MCDE_CRC_SYNCCTRL_NO_SYNC 0
+#define MCDE_CRC_SYNCCTRL_DBI0 1
+#define MCDE_CRC_SYNCCTRL_DBI1 2
+#define MCDE_CRC_SYNCCTRL_PING_PONG 3
+#define MCDE_CRC_CLAMPC1EN BIT(31)
+
+#define MCDE_VSCRC0 0x00000C5C
+#define MCDE_VSCRC1 0x00000C60
+#define MCDE_VSCRC_VSPMIN_MASK 0x00000FFF
+#define MCDE_VSCRC_VSPMAX_SHIFT 12
+#define MCDE_VSCRC_VSPMAX_MASK 0x00FFF000
+#define MCDE_VSCRC_VSPDIV_SHIFT 24
+#define MCDE_VSCRC_VSPDIV_MASK 0x07000000
+#define MCDE_VSCRC_VSPDIV_MCDECLK_DIV_1 0
+#define MCDE_VSCRC_VSPDIV_MCDECLK_DIV_2 1
+#define MCDE_VSCRC_VSPDIV_MCDECLK_DIV_4 2
+#define MCDE_VSCRC_VSPDIV_MCDECLK_DIV_8 3
+#define MCDE_VSCRC_VSPDIV_MCDECLK_DIV_16 4
+#define MCDE_VSCRC_VSPDIV_MCDECLK_DIV_32 5
+#define MCDE_VSCRC_VSPDIV_MCDECLK_DIV_64 6
+#define MCDE_VSCRC_VSPDIV_MCDECLK_DIV_128 7
+#define MCDE_VSCRC_VSPOL BIT(27) /* 0 active high, 1 active low */
+#define MCDE_VSCRC_VSSEL BIT(28) /* 0 VSYNC0, 1 VSYNC1 */
+#define MCDE_VSCRC_VSDBL BIT(29)
+
+/* Channel config 0..3 */
+#define MCDE_CHNL0CONF 0x00000600
+#define MCDE_CHNL1CONF 0x00000620
+#define MCDE_CHNL2CONF 0x00000640
+#define MCDE_CHNL3CONF 0x00000660
+#define MCDE_CHNLXCONF_PPL_SHIFT 0
+#define MCDE_CHNLXCONF_PPL_MASK 0x000007FF
+#define MCDE_CHNLXCONF_LPF_SHIFT 16
+#define MCDE_CHNLXCONF_LPF_MASK 0x07FF0000
+#define MCDE_MAX_WIDTH 2048
+
+/* Channel status 0..3 */
+#define MCDE_CHNL0STAT 0x00000604
+#define MCDE_CHNL1STAT 0x00000624
+#define MCDE_CHNL2STAT 0x00000644
+#define MCDE_CHNL3STAT 0x00000664
+#define MCDE_CHNLXSTAT_CHNLRD BIT(0)
+#define MCDE_CHNLXSTAT_CHNLA BIT(1)
+#define MCDE_CHNLXSTAT_CHNLBLBCKGND_EN BIT(16)
+#define MCDE_CHNLXSTAT_PPLX2_V422 BIT(17)
+#define MCDE_CHNLXSTAT_LPFX2_V422 BIT(18)
+
+/* Sync settings for channel 0..3 */
+#define MCDE_CHNL0SYNCHMOD 0x00000608
+#define MCDE_CHNL1SYNCHMOD 0x00000628
+#define MCDE_CHNL2SYNCHMOD 0x00000648
+#define MCDE_CHNL3SYNCHMOD 0x00000668
+
+#define MCDE_CHNLXSYNCHMOD_SRC_SYNCH_SHIFT 0
+#define MCDE_CHNLXSYNCHMOD_SRC_SYNCH_MASK 0x00000003
+#define MCDE_CHNLXSYNCHMOD_SRC_SYNCH_HARDWARE 0
+#define MCDE_CHNLXSYNCHMOD_SRC_SYNCH_NO_SYNCH 1
+#define MCDE_CHNLXSYNCHMOD_SRC_SYNCH_SOFTWARE 2
+#define MCDE_CHNLXSYNCHMOD_OUT_SYNCH_SRC_SHIFT 2
+#define MCDE_CHNLXSYNCHMOD_OUT_SYNCH_SRC_MASK 0x0000001C
+#define MCDE_CHNLXSYNCHMOD_OUT_SYNCH_SRC_FORMATTER 0
+#define MCDE_CHNLXSYNCHMOD_OUT_SYNCH_SRC_TE0 1
+#define MCDE_CHNLXSYNCHMOD_OUT_SYNCH_SRC_TE1 2
+
+/* Software sync triggers for channel 0..3 */
+#define MCDE_CHNL0SYNCHSW 0x0000060C
+#define MCDE_CHNL1SYNCHSW 0x0000062C
+#define MCDE_CHNL2SYNCHSW 0x0000064C
+#define MCDE_CHNL3SYNCHSW 0x0000066C
+#define MCDE_CHNLXSYNCHSW_SW_TRIG BIT(0)
+
+#define MCDE_CHNL0BCKGNDCOL 0x00000610
+#define MCDE_CHNL1BCKGNDCOL 0x00000630
+#define MCDE_CHNL2BCKGNDCOL 0x00000650
+#define MCDE_CHNL3BCKGNDCOL 0x00000670
+#define MCDE_CHNLXBCKGNDCOL_B_SHIFT 0
+#define MCDE_CHNLXBCKGNDCOL_B_MASK 0x000000FF
+#define MCDE_CHNLXBCKGNDCOL_G_SHIFT 8
+#define MCDE_CHNLXBCKGNDCOL_G_MASK 0x0000FF00
+#define MCDE_CHNLXBCKGNDCOL_R_SHIFT 16
+#define MCDE_CHNLXBCKGNDCOL_R_MASK 0x00FF0000
+
+#define MCDE_CHNL0MUXING 0x00000614
+#define MCDE_CHNL1MUXING 0x00000634
+#define MCDE_CHNL2MUXING 0x00000654
+#define MCDE_CHNL3MUXING 0x00000674
+#define MCDE_CHNLXMUXING_FIFO_ID_FIFO_A 0
+#define MCDE_CHNLXMUXING_FIFO_ID_FIFO_B 1
+#define MCDE_CHNLXMUXING_FIFO_ID_FIFO_C0 2
+#define MCDE_CHNLXMUXING_FIFO_ID_FIFO_C1 3
+
+/* Pixel processing control registers for channel A B,  */
+#define MCDE_CRA0 0x00000800
+#define MCDE_CRB0 0x00000A00
+#define MCDE_CRX0_FLOEN BIT(0)
+#define MCDE_CRX0_POWEREN BIT(1)
+#define MCDE_CRX0_BLENDEN BIT(2)
+#define MCDE_CRX0_AFLICKEN BIT(3)
+#define MCDE_CRX0_PALEN BIT(4)
+#define MCDE_CRX0_DITHEN BIT(5)
+#define MCDE_CRX0_GAMEN BIT(6)
+#define MCDE_CRX0_KEYCTRL_SHIFT 7
+#define MCDE_CRX0_KEYCTRL_MASK 0x00000380
+#define MCDE_CRX0_KEYCTRL_OFF 0
+#define MCDE_CRX0_KEYCTRL_ALPHA_RGB 1
+#define MCDE_CRX0_KEYCTRL_RGB 2
+#define MCDE_CRX0_KEYCTRL_FALPHA_FRGB 4
+#define MCDE_CRX0_KEYCTRL_FRGB 5
+#define MCDE_CRX0_BLENDCTRL BIT(10)
+#define MCDE_CRX0_FLICKMODE_SHIFT 11
+#define MCDE_CRX0_FLICKMODE_MASK 0x00001800
+#define MCDE_CRX0_FLICKMODE_FORCE_FILTER_0 0
+#define MCDE_CRX0_FLICKMODE_ADAPTIVE 1
+#define MCDE_CRX0_FLICKMODE_TEST_MODE 2
+#define MCDE_CRX0_FLOCKFORMAT_RGB BIT(13) /* 0 = YCVCR */
+#define MCDE_CRX0_PALMODE_GAMMA BIT(14) /* 0 = palette */
+#define MCDE_CRX0_OLEDEN BIT(15)
+#define MCDE_CRX0_ALPHABLEND_SHIFT 16
+#define MCDE_CRX0_ALPHABLEND_MASK 0x00FF0000
+#define MCDE_CRX0_ROTEN BIT(24)
+
+#define MCDE_CRA1 0x00000804
+#define MCDE_CRB1 0x00000A04
+#define MCDE_CRX1_PCD_SHIFT 0
+#define MCDE_CRX1_PCD_MASK 0x000003FF
+#define MCDE_CRX1_CLKSEL_SHIFT 10
+#define MCDE_CRX1_CLKSEL_MASK 0x00001C00
+#define MCDE_CRX1_CLKSEL_CLKPLL72 0
+#define MCDE_CRX1_CLKSEL_CLKPLL27 2
+#define MCDE_CRX1_CLKSEL_TV1CLK 3
+#define MCDE_CRX1_CLKSEL_TV2CLK 4
+#define MCDE_CRX1_CLKSEL_MCDECLK 5
+#define MCDE_CRX1_CDWIN_SHIFT 13
+#define MCDE_CRX1_CDWIN_MASK 0x0001E000
+#define MCDE_CRX1_CDWIN_8BPP_C1 0
+#define MCDE_CRX1_CDWIN_12BPP_C1 1
+#define MCDE_CRX1_CDWIN_12BPP_C2 2
+#define MCDE_CRX1_CDWIN_16BPP_C1 3
+#define MCDE_CRX1_CDWIN_16BPP_C2 4
+#define MCDE_CRX1_CDWIN_16BPP_C3 5
+#define MCDE_CRX1_CDWIN_18BPP_C1 6
+#define MCDE_CRX1_CDWIN_18BPP_C2 7
+#define MCDE_CRX1_CDWIN_24BPP 8
+#define MCDE_CRX1_OUTBPP_SHIFT 25
+#define MCDE_CRX1_OUTBPP_MASK 0x1E000000
+#define MCDE_CRX1_OUTBPP_MONO1 0
+#define MCDE_CRX1_OUTBPP_MONO2 1
+#define MCDE_CRX1_OUTBPP_MONO4 2
+#define MCDE_CRX1_OUTBPP_MONO8 3
+#define MCDE_CRX1_OUTBPP_8BPP 4
+#define MCDE_CRX1_OUTBPP_12BPP 5
+#define MCDE_CRX1_OUTBPP_15BPP 6
+#define MCDE_CRX1_OUTBPP_16BPP 7
+#define MCDE_CRX1_OUTBPP_18BPP 8
+#define MCDE_CRX1_OUTBPP_24BPP 9
+#define MCDE_CRX1_BCD BIT(29)
+#define MCDE_CRA1_CLKTYPE_TVXCLKSEL1 BIT(30) /* 0 = TVXCLKSEL1 */
+
+#define MCDE_COLKEYA 0x00000808
+#define MCDE_COLKEYB 0x00000A08
+
+#define MCDE_FCOLKEYA 0x0000080C
+#define MCDE_FCOLKEYB 0x00000A0C
+
+#define MCDE_RGBCONV1A 0x00000810
+#define MCDE_RGBCONV1B 0x00000A10
+
+#define MCDE_RGBCONV2A 0x00000814
+#define MCDE_RGBCONV2B 0x00000A14
+
+#define MCDE_RGBCONV3A 0x00000818
+#define MCDE_RGBCONV3B 0x00000A18
+
+#define MCDE_RGBCONV4A 0x0000081C
+#define MCDE_RGBCONV4B 0x00000A1C
+
+#define MCDE_RGBCONV5A 0x00000820
+#define MCDE_RGBCONV5B 0x00000A20
+
+#define MCDE_RGBCONV6A 0x00000824
+#define MCDE_RGBCONV6B 0x00000A24
+
+/* Rotation */
+#define MCDE_ROTACONF 0x0000087C
+#define MCDE_ROTBCONF 0x00000A7C
+
+#define MCDE_SYNCHCONFA 0x00000880
+#define MCDE_SYNCHCONFB 0x00000A80
+
+/* Channel A+B control registers */
+#define MCDE_CTRLA 0x00000884
+#define MCDE_CTRLB 0x00000A84
+#define MCDE_CTRLX_FIFOWTRMRK_SHIFT 0
+#define MCDE_CTRLX_FIFOWTRMRK_MASK 0x000003FF
+#define MCDE_CTRLX_FIFOEMPTY BIT(12)
+#define MCDE_CTRLX_FIFOFULL BIT(13)
+#define MCDE_CTRLX_FORMID_SHIFT 16
+#define MCDE_CTRLX_FORMID_MASK 0x00070000
+#define MCDE_CTRLX_FORMID_DSI0VID 0
+#define MCDE_CTRLX_FORMID_DSI0CMD 1
+#define MCDE_CTRLX_FORMID_DSI1VID 2
+#define MCDE_CTRLX_FORMID_DSI1CMD 3
+#define MCDE_CTRLX_FORMID_DSI2VID 4
+#define MCDE_CTRLX_FORMID_DSI2CMD 5
+#define MCDE_CTRLX_FORMID_DPIA 0
+#define MCDE_CTRLX_FORMID_DPIB 1
+#define MCDE_CTRLX_FORMTYPE_SHIFT 20
+#define MCDE_CTRLX_FORMTYPE_MASK 0x00700000
+#define MCDE_CTRLX_FORMTYPE_DPITV 0
+#define MCDE_CTRLX_FORMTYPE_DBI 1
+#define MCDE_CTRLX_FORMTYPE_DSI 2
+
+#define MCDE_DSIVID0CONF0 0x00000E00
+#define MCDE_DSICMD0CONF0 0x00000E20
+#define MCDE_DSIVID1CONF0 0x00000E40
+#define MCDE_DSICMD1CONF0 0x00000E60
+#define MCDE_DSIVID2CONF0 0x00000E80
+#define MCDE_DSICMD2CONF0 0x00000EA0
+#define MCDE_DSICONF0_BLANKING_SHIFT 0
+#define MCDE_DSICONF0_BLANKING_MASK 0x000000FF
+#define MCDE_DSICONF0_VID_MODE_CMD 0
+#define MCDE_DSICONF0_VID_MODE_VID BIT(12)
+#define MCDE_DSICONF0_CMD8 BIT(13)
+#define MCDE_DSICONF0_BIT_SWAP BIT(16)
+#define MCDE_DSICONF0_BYTE_SWAP BIT(17)
+#define MCDE_DSICONF0_DCSVID_NOTGEN BIT(18)
+#define MCDE_DSICONF0_PACKING_SHIFT 20
+#define MCDE_DSICONF0_PACKING_MASK 0x00700000
+#define MCDE_DSICONF0_PACKING_RGB565 0
+#define MCDE_DSICONF0_PACKING_RGB666 1
+#define MCDE_DSICONF0_PACKING_RGB666_PACKED 2
+#define MCDE_DSICONF0_PACKING_RGB888 3
+#define MCDE_DSICONF0_PACKING_HDTV 4
+
+#define MCDE_DSIVID0FRAME 0x00000E04
+#define MCDE_DSICMD0FRAME 0x00000E24
+#define MCDE_DSIVID1FRAME 0x00000E44
+#define MCDE_DSICMD1FRAME 0x00000E64
+#define MCDE_DSIVID2FRAME 0x00000E84
+#define MCDE_DSICMD2FRAME 0x00000EA4
+
+#define MCDE_DSIVID0PKT 0x00000E08
+#define MCDE_DSICMD0PKT 0x00000E28
+#define MCDE_DSIVID1PKT 0x00000E48
+#define MCDE_DSICMD1PKT 0x00000E68
+#define MCDE_DSIVID2PKT 0x00000E88
+#define MCDE_DSICMD2PKT 0x00000EA8
+
+#define MCDE_DSIVID0SYNC 0x00000E0C
+#define MCDE_DSICMD0SYNC 0x00000E2C
+#define MCDE_DSIVID1SYNC 0x00000E4C
+#define MCDE_DSICMD1SYNC 0x00000E6C
+#define MCDE_DSIVID2SYNC 0x00000E8C
+#define MCDE_DSICMD2SYNC 0x00000EAC
+
+#define MCDE_DSIVID0CMDW 0x00000E10
+#define MCDE_DSICMD0CMDW 0x00000E30
+#define MCDE_DSIVID1CMDW 0x00000E50
+#define MCDE_DSICMD1CMDW 0x00000E70
+#define MCDE_DSIVID2CMDW 0x00000E90
+#define MCDE_DSICMD2CMDW 0x00000EB0
+#define MCDE_DSIVIDXCMDW_CMDW_CONTINUE_SHIFT 0
+#define MCDE_DSIVIDXCMDW_CMDW_CONTINUE_MASK 0x0000FFFF
+#define MCDE_DSIVIDXCMDW_CMDW_START_SHIFT 16
+#define MCDE_DSIVIDXCMDW_CMDW_START_MASK 0xFFFF0000
+
+#define MCDE_DSIVID0DELAY0 0x00000E14
+#define MCDE_DSICMD0DELAY0 0x00000E34
+#define MCDE_DSIVID1DELAY0 0x00000E54
+#define MCDE_DSICMD1DELAY0 0x00000E74
+#define MCDE_DSIVID2DELAY0 0x00000E94
+#define MCDE_DSICMD2DELAY0 0x00000EB4
+
+#define MCDE_DSIVID0DELAY1 0x00000E18
+#define MCDE_DSICMD0DELAY1 0x00000E38
+#define MCDE_DSIVID1DELAY1 0x00000E58
+#define MCDE_DSICMD1DELAY1 0x00000E78
+#define MCDE_DSIVID2DELAY1 0x00000E98
+#define MCDE_DSICMD2DELAY1 0x00000EB8
+
+#endif /* __DRM_MCDE_DISPLAY_REGS */
diff --git a/drivers/gpu/drm/mcde/mcde_drm.h b/drivers/gpu/drm/mcde/mcde_drm.h
new file mode 100644
index 000000000000..dab4db021231
--- /dev/null
+++ b/drivers/gpu/drm/mcde/mcde_drm.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Copyright (C) 2018 Linus Walleij <linus.walleij@linaro.org>
+ * Parts of this file were based on the MCDE driver by Marcus Lorentzon
+ * (C) ST-Ericsson SA 2013
+ */
+#include <drm/drm_simple_kms_helper.h>
+
+#ifndef _MCDE_DRM_H_
+#define _MCDE_DRM_H_
+
+struct mcde {
+	struct drm_device drm;
+	struct device *dev;
+	struct drm_panel *panel;
+	struct drm_bridge *bridge;
+	struct drm_connector *connector;
+	struct drm_simple_display_pipe pipe;
+	struct mipi_dsi_device *mdsi;
+	s16 stride;
+	bool te_sync;
+	bool oneshot_mode;
+	unsigned int flow_active;
+	spinlock_t flow_lock; /* Locks the channel flow control */
+
+	void __iomem *regs;
+
+	struct clk *mcde_clk;
+	struct clk *lcd_clk;
+	struct clk *hdmi_clk;
+
+	struct regulator *epod;
+	struct regulator *vana;
+};
+
+bool mcde_dsi_irq(struct mipi_dsi_device *mdsi);
+void mcde_dsi_te_request(struct mipi_dsi_device *mdsi);
+extern struct platform_driver mcde_dsi_driver;
+
+void mcde_display_irq(struct mcde *mcde);
+void mcde_display_disable_irqs(struct mcde *mcde);
+int mcde_display_init(struct drm_device *drm);
+
+#endif /* _MCDE_DRM_H_ */
diff --git a/drivers/gpu/drm/mcde/mcde_drv.c b/drivers/gpu/drm/mcde/mcde_drv.c
new file mode 100644
index 000000000000..baf63fb6850a
--- /dev/null
+++ b/drivers/gpu/drm/mcde/mcde_drv.c
@@ -0,0 +1,572 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2018 Linus Walleij <linus.walleij@linaro.org>
+ * Parts of this file were based on the MCDE driver by Marcus Lorentzon
+ * (C) ST-Ericsson SA 2013
+ */
+
+/**
+ * DOC: ST-Ericsson MCDE Driver
+ *
+ * The MCDE (short for multi-channel display engine) is a graphics
+ * controller found in the Ux500 chipsets, such as NovaThor U8500.
+ * It was initially conceptualized by ST Microelectronics for the
+ * successor of the Nomadik line, STn8500 but productified in the
+ * ST-Ericsson U8500 where is was used for mass-market deployments
+ * in Android phones from Samsung and Sony Ericsson.
+ *
+ * It can do 1080p30 on SDTV CCIR656, DPI-2, DBI-2 or DSI for
+ * panels with or without frame buffering and can convert most
+ * input formats including most variants of RGB and YUV.
+ *
+ * The hardware has four display pipes, and the layout is a little
+ * bit like this:
+ *
+ * Memory     -> Overlay -> Channel -> FIFO -> 5 formatters -> DSI/DPI
+ * External      0..5       0..3       A,B,    3 x DSI         bridge
+ * source 0..9                         C0,C1   2 x DPI
+ *
+ * FIFOs A and B are for LCD and HDMI while FIFO CO/C1 are for
+ * panels with embedded buffer.
+ * 3 of the formatters are for DSI.
+ * 2 of the formatters are for DPI.
+ *
+ * Behind the formatters are the DSI or DPI ports that route to
+ * the external pins of the chip. As there are 3 DSI ports and one
+ * DPI port, it is possible to configure up to 4 display pipelines
+ * (effectively using channels 0..3) for concurrent use.
+ *
+ * In the current DRM/KMS setup, we use one external source, one overlay,
+ * one FIFO and one formatter which we connect to the simple CMA framebuffer
+ * helpers. We then provide a bridge to the DSI port, and on the DSI port
+ * bridge we connect hang a panel bridge or other bridge. This may be subject
+ * to change as we exploit more of the hardware capabilities.
+ *
+ * TODO:
+ * - Enabled damaged rectangles using drm_plane_enable_fb_damage_clips()
+ *   so we can selectively just transmit the damaged area to a
+ *   command-only display.
+ * - Enable mixing of more planes, possibly at the cost of moving away
+ *   from using the simple framebuffer pipeline.
+ * - Enable output to bridges such as the AV8100 HDMI encoder from
+ *   the DSI bridge.
+ */
+
+#include <linux/clk.h>
+#include <linux/component.h>
+#include <linux/dma-buf.h>
+#include <linux/irq.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/regulator/consumer.h>
+#include <linux/slab.h>
+
+#include <drm/drm_atomic_helper.h>
+#include <drm/drm_bridge.h>
+#include <drm/drm_drv.h>
+#include <drm/drm_fb_cma_helper.h>
+#include <drm/drm_fb_helper.h>
+#include <drm/drm_gem.h>
+#include <drm/drm_gem_cma_helper.h>
+#include <drm/drm_gem_framebuffer_helper.h>
+#include <drm/drm_of.h>
+#include <drm/drm_probe_helper.h>
+#include <drm/drm_panel.h>
+#include <drm/drm_vblank.h>
+
+#include "mcde_drm.h"
+
+#define DRIVER_DESC	"DRM module for MCDE"
+
+#define MCDE_CR 0x00000000
+#define MCDE_CR_IFIFOEMPTYLINECOUNT_V422_SHIFT 0
+#define MCDE_CR_IFIFOEMPTYLINECOUNT_V422_MASK 0x0000003F
+#define MCDE_CR_IFIFOCTRLEN BIT(15)
+#define MCDE_CR_UFRECOVERY_MODE_V422 BIT(16)
+#define MCDE_CR_WRAP_MODE_V422_SHIFT BIT(17)
+#define MCDE_CR_AUTOCLKG_EN BIT(30)
+#define MCDE_CR_MCDEEN BIT(31)
+
+#define MCDE_CONF0 0x00000004
+#define MCDE_CONF0_SYNCMUX0 BIT(0)
+#define MCDE_CONF0_SYNCMUX1 BIT(1)
+#define MCDE_CONF0_SYNCMUX2 BIT(2)
+#define MCDE_CONF0_SYNCMUX3 BIT(3)
+#define MCDE_CONF0_SYNCMUX4 BIT(4)
+#define MCDE_CONF0_SYNCMUX5 BIT(5)
+#define MCDE_CONF0_SYNCMUX6 BIT(6)
+#define MCDE_CONF0_SYNCMUX7 BIT(7)
+#define MCDE_CONF0_IFIFOCTRLWTRMRKLVL_SHIFT 12
+#define MCDE_CONF0_IFIFOCTRLWTRMRKLVL_MASK 0x00007000
+#define MCDE_CONF0_OUTMUX0_SHIFT 16
+#define MCDE_CONF0_OUTMUX0_MASK 0x00070000
+#define MCDE_CONF0_OUTMUX1_SHIFT 19
+#define MCDE_CONF0_OUTMUX1_MASK 0x00380000
+#define MCDE_CONF0_OUTMUX2_SHIFT 22
+#define MCDE_CONF0_OUTMUX2_MASK 0x01C00000
+#define MCDE_CONF0_OUTMUX3_SHIFT 25
+#define MCDE_CONF0_OUTMUX3_MASK 0x0E000000
+#define MCDE_CONF0_OUTMUX4_SHIFT 28
+#define MCDE_CONF0_OUTMUX4_MASK 0x70000000
+
+#define MCDE_SSP 0x00000008
+#define MCDE_AIS 0x00000100
+#define MCDE_IMSCERR 0x00000110
+#define MCDE_RISERR 0x00000120
+#define MCDE_MISERR 0x00000130
+#define MCDE_SISERR 0x00000140
+
+#define MCDE_PID 0x000001FC
+#define MCDE_PID_METALFIX_VERSION_SHIFT 0
+#define MCDE_PID_METALFIX_VERSION_MASK 0x000000FF
+#define MCDE_PID_DEVELOPMENT_VERSION_SHIFT 8
+#define MCDE_PID_DEVELOPMENT_VERSION_MASK 0x0000FF00
+#define MCDE_PID_MINOR_VERSION_SHIFT 16
+#define MCDE_PID_MINOR_VERSION_MASK 0x00FF0000
+#define MCDE_PID_MAJOR_VERSION_SHIFT 24
+#define MCDE_PID_MAJOR_VERSION_MASK 0xFF000000
+
+static const struct drm_mode_config_funcs mcde_mode_config_funcs = {
+	.fb_create = drm_gem_fb_create_with_dirty,
+	.atomic_check = drm_atomic_helper_check,
+	.atomic_commit = drm_atomic_helper_commit,
+};
+
+static const struct drm_mode_config_helper_funcs mcde_mode_config_helpers = {
+	/*
+	 * Using this function is necessary to commit atomic updates
+	 * that need the CRTC to be enabled before a commit, as is
+	 * the case with e.g. DSI displays.
+	 */
+	.atomic_commit_tail = drm_atomic_helper_commit_tail_rpm,
+};
+
+static irqreturn_t mcde_irq(int irq, void *data)
+{
+	struct mcde *mcde = data;
+	u32 val;
+
+	val = readl(mcde->regs + MCDE_MISERR);
+
+	mcde_display_irq(mcde);
+
+	if (val)
+		dev_info(mcde->dev, "some error IRQ\n");
+	writel(val, mcde->regs + MCDE_RISERR);
+
+	return IRQ_HANDLED;
+}
+
+static int mcde_modeset_init(struct drm_device *drm)
+{
+	struct drm_mode_config *mode_config;
+	struct mcde *mcde = drm->dev_private;
+	int ret;
+
+	if (!mcde->bridge) {
+		dev_err(drm->dev, "no display output bridge yet\n");
+		return -EPROBE_DEFER;
+	}
+
+	mode_config = &drm->mode_config;
+	mode_config->funcs = &mcde_mode_config_funcs;
+	mode_config->helper_private = &mcde_mode_config_helpers;
+	/* This hardware can do 1080p */
+	mode_config->min_width = 1;
+	mode_config->max_width = 1920;
+	mode_config->min_height = 1;
+	mode_config->max_height = 1080;
+
+	/*
+	 * Currently we only support vblank handling on the DSI bridge, using
+	 * TE synchronization. If TE sync is not set up, it is still possible
+	 * to push out a single update on demand, but this is hard for DRM to
+	 * exploit.
+	 */
+	if (mcde->te_sync) {
+		ret = drm_vblank_init(drm, 1);
+		if (ret) {
+			dev_err(drm->dev, "failed to init vblank\n");
+			goto out_config;
+		}
+	}
+
+	ret = mcde_display_init(drm);
+	if (ret) {
+		dev_err(drm->dev, "failed to init display\n");
+		goto out_config;
+	}
+
+	/*
+	 * Attach the DSI bridge
+	 *
+	 * TODO: when adding support for the DPI bridge or several DSI bridges,
+	 * we selectively connect the bridge(s) here instead of this simple
+	 * attachment.
+	 */
+	ret = drm_simple_display_pipe_attach_bridge(&mcde->pipe,
+						    mcde->bridge);
+	if (ret) {
+		dev_err(drm->dev, "failed to attach display output bridge\n");
+		goto out_config;
+	}
+
+	drm_mode_config_reset(drm);
+	drm_kms_helper_poll_init(drm);
+	drm_fbdev_generic_setup(drm, 32);
+
+	return 0;
+
+out_config:
+	drm_mode_config_cleanup(drm);
+	return ret;
+}
+
+static void mcde_release(struct drm_device *drm)
+{
+	struct mcde *mcde = drm->dev_private;
+
+	drm_mode_config_cleanup(drm);
+	drm_dev_fini(drm);
+	kfree(mcde);
+}
+
+DEFINE_DRM_GEM_CMA_FOPS(drm_fops);
+
+static struct drm_driver mcde_drm_driver = {
+	.driver_features =
+		DRIVER_MODESET | DRIVER_GEM | DRIVER_PRIME | DRIVER_ATOMIC,
+	.release = mcde_release,
+	.lastclose = drm_fb_helper_lastclose,
+	.ioctls = NULL,
+	.fops = &drm_fops,
+	.name = "mcde",
+	.desc = DRIVER_DESC,
+	.date = "20180529",
+	.major = 1,
+	.minor = 0,
+	.patchlevel = 0,
+	.dumb_create = drm_gem_cma_dumb_create,
+	.gem_free_object_unlocked = drm_gem_cma_free_object,
+	.gem_vm_ops = &drm_gem_cma_vm_ops,
+
+	.prime_handle_to_fd = drm_gem_prime_handle_to_fd,
+	.prime_fd_to_handle = drm_gem_prime_fd_to_handle,
+	.gem_prime_import = drm_gem_prime_import,
+	.gem_prime_export = drm_gem_prime_export,
+	.gem_prime_get_sg_table	= drm_gem_cma_prime_get_sg_table,
+	.gem_prime_import_sg_table = drm_gem_cma_prime_import_sg_table,
+	.gem_prime_vmap = drm_gem_cma_prime_vmap,
+	.gem_prime_vunmap = drm_gem_cma_prime_vunmap,
+	.gem_prime_mmap = drm_gem_cma_prime_mmap,
+};
+
+static int mcde_drm_bind(struct device *dev)
+{
+	struct drm_device *drm = dev_get_drvdata(dev);
+	int ret;
+
+	drm_mode_config_init(drm);
+
+	ret = component_bind_all(drm->dev, drm);
+	if (ret) {
+		dev_err(dev, "can't bind component devices\n");
+		return ret;
+	}
+
+	ret = mcde_modeset_init(drm);
+	if (ret)
+		goto unbind;
+
+	ret = drm_dev_register(drm, 0);
+	if (ret < 0)
+		goto unbind;
+
+	return 0;
+
+unbind:
+	component_unbind_all(drm->dev, drm);
+	return ret;
+}
+
+static void mcde_drm_unbind(struct device *dev)
+{
+	struct drm_device *drm = dev_get_drvdata(dev);
+
+	drm_dev_unregister(drm);
+	drm_atomic_helper_shutdown(drm);
+	component_unbind_all(drm->dev, drm);
+}
+
+static const struct component_master_ops mcde_drm_comp_ops = {
+	.bind = mcde_drm_bind,
+	.unbind = mcde_drm_unbind,
+};
+
+static struct platform_driver *const mcde_component_drivers[] = {
+	&mcde_dsi_driver,
+};
+
+static int mcde_compare_dev(struct device *dev, void *data)
+{
+	return dev == data;
+}
+
+static int mcde_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct drm_device *drm;
+	struct mcde *mcde;
+	struct component_match *match;
+	struct resource *res;
+	u32 pid;
+	u32 val;
+	int irq;
+	int ret;
+	int i;
+
+	mcde = kzalloc(sizeof(*mcde), GFP_KERNEL);
+	if (!mcde)
+		return -ENOMEM;
+	mcde->dev = dev;
+
+	ret = drm_dev_init(&mcde->drm, &mcde_drm_driver, dev);
+	if (ret) {
+		kfree(mcde);
+		return ret;
+	}
+	drm = &mcde->drm;
+	drm->dev_private = mcde;
+	platform_set_drvdata(pdev, drm);
+
+	/* Enable use of the TE signal and interrupt */
+	mcde->te_sync = true;
+	/* Enable continuous updates: this is what Linux' framebuffer expects */
+	mcde->oneshot_mode = false;
+	drm->dev_private = mcde;
+
+	/* First obtain and turn on the main power */
+	mcde->epod = devm_regulator_get(dev, "epod");
+	if (IS_ERR(mcde->epod)) {
+		ret = PTR_ERR(mcde->epod);
+		dev_err(dev, "can't get EPOD regulator\n");
+		goto dev_unref;
+	}
+	ret = regulator_enable(mcde->epod);
+	if (ret) {
+		dev_err(dev, "can't enable EPOD regulator\n");
+		goto dev_unref;
+	}
+	mcde->vana = devm_regulator_get(dev, "vana");
+	if (IS_ERR(mcde->vana)) {
+		ret = PTR_ERR(mcde->vana);
+		dev_err(dev, "can't get VANA regulator\n");
+		goto regulator_epod_off;
+	}
+	ret = regulator_enable(mcde->vana);
+	if (ret) {
+		dev_err(dev, "can't enable VANA regulator\n");
+		goto regulator_epod_off;
+	}
+	/*
+	 * The vendor code uses ESRAM (onchip RAM) and need to activate
+	 * the v-esram34 regulator, but we don't use that yet
+	 */
+
+	/* Clock the silicon so we can access the registers */
+	mcde->mcde_clk = devm_clk_get(dev, "mcde");
+	if (IS_ERR(mcde->mcde_clk)) {
+		dev_err(dev, "unable to get MCDE main clock\n");
+		ret = PTR_ERR(mcde->mcde_clk);
+		goto regulator_off;
+	}
+	ret = clk_prepare_enable(mcde->mcde_clk);
+	if (ret) {
+		dev_err(dev, "failed to enable MCDE main clock\n");
+		goto regulator_off;
+	}
+	dev_info(dev, "MCDE clk rate %lu Hz\n", clk_get_rate(mcde->mcde_clk));
+
+	mcde->lcd_clk = devm_clk_get(dev, "lcd");
+	if (IS_ERR(mcde->lcd_clk)) {
+		dev_err(dev, "unable to get LCD clock\n");
+		ret = PTR_ERR(mcde->lcd_clk);
+		goto clk_disable;
+	}
+	mcde->hdmi_clk = devm_clk_get(dev, "hdmi");
+	if (IS_ERR(mcde->hdmi_clk)) {
+		dev_err(dev, "unable to get HDMI clock\n");
+		ret = PTR_ERR(mcde->hdmi_clk);
+		goto clk_disable;
+	}
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	mcde->regs = devm_ioremap_resource(dev, res);
+	if (IS_ERR(mcde->regs)) {
+		dev_err(dev, "no MCDE regs\n");
+		ret = -EINVAL;
+		goto clk_disable;
+	}
+
+	irq = platform_get_irq(pdev, 0);
+	if (!irq) {
+		ret = -EINVAL;
+		goto clk_disable;
+	}
+
+	ret = devm_request_irq(dev, irq, mcde_irq, 0, "mcde", mcde);
+	if (ret) {
+		dev_err(dev, "failed to request irq %d\n", ret);
+		goto clk_disable;
+	}
+
+	/*
+	 * Check hardware revision, we only support U8500v2 version
+	 * as this was the only version used for mass market deployment,
+	 * but surely you can add more versions if you have them and
+	 * need them.
+	 */
+	pid = readl(mcde->regs + MCDE_PID);
+	dev_info(dev, "found MCDE HW revision %d.%d (dev %d, metal fix %d)\n",
+		 (pid & MCDE_PID_MAJOR_VERSION_MASK)
+		 >> MCDE_PID_MAJOR_VERSION_SHIFT,
+		 (pid & MCDE_PID_MINOR_VERSION_MASK)
+		 >> MCDE_PID_MINOR_VERSION_SHIFT,
+		 (pid & MCDE_PID_DEVELOPMENT_VERSION_MASK)
+		 >> MCDE_PID_DEVELOPMENT_VERSION_SHIFT,
+		 (pid & MCDE_PID_METALFIX_VERSION_MASK)
+		 >> MCDE_PID_METALFIX_VERSION_SHIFT);
+	if (pid != 0x03000800) {
+		dev_err(dev, "unsupported hardware revision\n");
+		ret = -ENODEV;
+		goto clk_disable;
+	}
+
+	/* Set up the main control, watermark level at 7 */
+	val = 7 << MCDE_CONF0_IFIFOCTRLWTRMRKLVL_SHIFT;
+	/* 24 bits DPI: connect LSB Ch B to D[0:7] */
+	val |= 3 << MCDE_CONF0_OUTMUX0_SHIFT;
+	/* TV out: connect LSB Ch B to D[8:15] */
+	val |= 3 << MCDE_CONF0_OUTMUX1_SHIFT;
+	/* Don't care about this muxing */
+	val |= 0 << MCDE_CONF0_OUTMUX2_SHIFT;
+	/* 24 bits DPI: connect MID Ch B to D[24:31] */
+	val |= 4 << MCDE_CONF0_OUTMUX3_SHIFT;
+	/* 5: 24 bits DPI: connect MSB Ch B to D[32:39] */
+	val |= 5 << MCDE_CONF0_OUTMUX4_SHIFT;
+	/* Syncmux bits zero: DPI channel A and B on output pins A and B resp */
+	writel(val, mcde->regs + MCDE_CONF0);
+
+	/* Enable automatic clock gating */
+	val = readl(mcde->regs + MCDE_CR);
+	val |= MCDE_CR_MCDEEN | MCDE_CR_AUTOCLKG_EN;
+	writel(val, mcde->regs + MCDE_CR);
+
+	/* Clear any pending interrupts */
+	mcde_display_disable_irqs(mcde);
+	writel(0, mcde->regs + MCDE_IMSCERR);
+	writel(0xFFFFFFFF, mcde->regs + MCDE_RISERR);
+
+	/* Spawn child devices for the DSI ports */
+	devm_of_platform_populate(dev);
+
+	/* Create something that will match the subdrivers when we bind */
+	for (i = 0; i < ARRAY_SIZE(mcde_component_drivers); i++) {
+		struct device_driver *drv = &mcde_component_drivers[i]->driver;
+		struct device *p = NULL, *d;
+
+		while ((d = bus_find_device(&platform_bus_type, p, drv,
+					    (void *)platform_bus_type.match))) {
+			put_device(p);
+			component_match_add(dev, &match, mcde_compare_dev, d);
+			p = d;
+		}
+		put_device(p);
+	}
+	if (IS_ERR(match)) {
+		dev_err(dev, "could not create component match\n");
+		ret = PTR_ERR(match);
+		goto clk_disable;
+	}
+	ret = component_master_add_with_match(&pdev->dev, &mcde_drm_comp_ops,
+					      match);
+	if (ret) {
+		dev_err(dev, "failed to add component master\n");
+		goto clk_disable;
+	}
+	return 0;
+
+clk_disable:
+	clk_disable_unprepare(mcde->mcde_clk);
+regulator_off:
+	regulator_disable(mcde->vana);
+regulator_epod_off:
+	regulator_disable(mcde->epod);
+dev_unref:
+	drm_dev_put(drm);
+	return ret;
+
+}
+
+static int mcde_remove(struct platform_device *pdev)
+{
+	struct drm_device *drm = platform_get_drvdata(pdev);
+	struct mcde *mcde = drm->dev_private;
+
+	component_master_del(&pdev->dev, &mcde_drm_comp_ops);
+	clk_disable_unprepare(mcde->mcde_clk);
+	regulator_disable(mcde->vana);
+	regulator_disable(mcde->epod);
+	drm_dev_put(drm);
+
+	return 0;
+}
+
+static const struct of_device_id mcde_of_match[] = {
+	{
+		.compatible = "ste,mcde",
+	},
+	{},
+};
+
+static struct platform_driver mcde_driver = {
+	.driver = {
+		.name           = "mcde",
+		.of_match_table = of_match_ptr(mcde_of_match),
+	},
+	.probe = mcde_probe,
+	.remove = mcde_remove,
+};
+
+static struct platform_driver *const component_drivers[] = {
+	&mcde_dsi_driver,
+};
+
+static int __init mcde_drm_register(void)
+{
+	int ret;
+
+	ret = platform_register_drivers(component_drivers,
+					ARRAY_SIZE(component_drivers));
+	if (ret)
+		return ret;
+
+	return platform_driver_register(&mcde_driver);
+}
+
+static void __exit mcde_drm_unregister(void)
+{
+	platform_unregister_drivers(component_drivers,
+				    ARRAY_SIZE(component_drivers));
+	platform_driver_unregister(&mcde_driver);
+}
+
+module_init(mcde_drm_register);
+module_exit(mcde_drm_unregister);
+
+MODULE_ALIAS("platform:mcde-drm");
+MODULE_DESCRIPTION(DRIVER_DESC);
+MODULE_AUTHOR("Linus Walleij <linus.walleij@linaro.org>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/gpu/drm/mcde/mcde_dsi.c b/drivers/gpu/drm/mcde/mcde_dsi.c
new file mode 100644
index 000000000000..07f7090d08b3
--- /dev/null
+++ b/drivers/gpu/drm/mcde/mcde_dsi.c
@@ -0,0 +1,1044 @@
+// SPDX-License-Identifier: GPL-2.0+
+#include <linux/clk.h>
+#include <linux/component.h>
+#include <linux/delay.h>
+#include <linux/io.h>
+#include <linux/mfd/syscon.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+#include <linux/regulator/consumer.h>
+#include <video/mipi_display.h>
+
+#include <drm/drm_atomic_helper.h>
+#include <drm/drm_bridge.h>
+#include <drm/drm_device.h>
+#include <drm/drm_drv.h>
+#include <drm/drm_encoder.h>
+#include <drm/drm_mipi_dsi.h>
+#include <drm/drm_modeset_helper_vtables.h>
+#include <drm/drm_of.h>
+#include <drm/drm_panel.h>
+#include <drm/drm_print.h>
+#include <drm/drm_probe_helper.h>
+
+#include "mcde_drm.h"
+#include "mcde_dsi_regs.h"
+
+#define DSI_DEFAULT_LP_FREQ_HZ	19200000
+#define DSI_DEFAULT_HS_FREQ_HZ	420160000
+
+/* PRCMU DSI reset registers */
+#define PRCM_DSI_SW_RESET 0x324
+#define PRCM_DSI_SW_RESET_DSI0_SW_RESETN BIT(0)
+#define PRCM_DSI_SW_RESET_DSI1_SW_RESETN BIT(1)
+#define PRCM_DSI_SW_RESET_DSI2_SW_RESETN BIT(2)
+
+struct mcde_dsi {
+	struct device *dev;
+	struct mcde *mcde;
+	struct drm_bridge bridge;
+	struct drm_connector connector;
+	struct drm_panel *panel;
+	struct drm_bridge *bridge_out;
+	struct mipi_dsi_host dsi_host;
+	struct mipi_dsi_device *mdsi;
+	struct clk *hs_clk;
+	struct clk *lp_clk;
+	unsigned long hs_freq;
+	unsigned long lp_freq;
+	bool unused;
+
+	void __iomem *regs;
+	struct regmap *prcmu;
+};
+
+static inline struct mcde_dsi *bridge_to_mcde_dsi(struct drm_bridge *bridge)
+{
+	return container_of(bridge, struct mcde_dsi, bridge);
+}
+
+static inline struct mcde_dsi *host_to_mcde_dsi(struct mipi_dsi_host *h)
+{
+	return container_of(h, struct mcde_dsi, dsi_host);
+}
+
+static inline struct mcde_dsi *connector_to_mcde_dsi(struct drm_connector *c)
+{
+	return container_of(c, struct mcde_dsi, connector);
+}
+
+bool mcde_dsi_irq(struct mipi_dsi_device *mdsi)
+{
+	struct mcde_dsi *d;
+	u32 val;
+	bool te_received = false;
+
+	d = host_to_mcde_dsi(mdsi->host);
+
+	dev_dbg(d->dev, "%s called\n", __func__);
+
+	val = readl(d->regs + DSI_DIRECT_CMD_STS_FLAG);
+	if (val)
+		dev_dbg(d->dev, "DSI_DIRECT_CMD_STS_FLAG = %08x\n", val);
+	if (val & DSI_DIRECT_CMD_STS_WRITE_COMPLETED)
+		dev_dbg(d->dev, "direct command write completed\n");
+	if (val & DSI_DIRECT_CMD_STS_TE_RECEIVED) {
+		te_received = true;
+		dev_dbg(d->dev, "direct command TE received\n");
+	}
+	if (val & DSI_DIRECT_CMD_STS_ACKNOWLEDGE_WITH_ERR_RECEIVED)
+		dev_err(d->dev, "direct command ACK ERR received\n");
+	if (val & DSI_DIRECT_CMD_STS_READ_COMPLETED_WITH_ERR)
+		dev_err(d->dev, "direct command read ERR received\n");
+	/* Mask off the ACK value and clear status */
+	writel(val, d->regs + DSI_DIRECT_CMD_STS_CLR);
+
+	val = readl(d->regs + DSI_CMD_MODE_STS_FLAG);
+	if (val)
+		dev_dbg(d->dev, "DSI_CMD_MODE_STS_FLAG = %08x\n", val);
+	if (val & DSI_CMD_MODE_STS_ERR_NO_TE)
+		/* This happens all the time (safe to ignore) */
+		dev_dbg(d->dev, "CMD mode no TE\n");
+	if (val & DSI_CMD_MODE_STS_ERR_TE_MISS)
+		/* This happens all the time (safe to ignore) */
+		dev_dbg(d->dev, "CMD mode TE miss\n");
+	if (val & DSI_CMD_MODE_STS_ERR_SDI1_UNDERRUN)
+		dev_err(d->dev, "CMD mode SD1 underrun\n");
+	if (val & DSI_CMD_MODE_STS_ERR_SDI2_UNDERRUN)
+		dev_err(d->dev, "CMD mode SD2 underrun\n");
+	if (val & DSI_CMD_MODE_STS_ERR_UNWANTED_RD)
+		dev_err(d->dev, "CMD mode unwanted RD\n");
+	writel(val, d->regs + DSI_CMD_MODE_STS_CLR);
+
+	val = readl(d->regs + DSI_DIRECT_CMD_RD_STS_FLAG);
+	if (val)
+		dev_dbg(d->dev, "DSI_DIRECT_CMD_RD_STS_FLAG = %08x\n", val);
+	writel(val, d->regs + DSI_DIRECT_CMD_RD_STS_CLR);
+
+	val = readl(d->regs + DSI_TG_STS_FLAG);
+	if (val)
+		dev_dbg(d->dev, "DSI_TG_STS_FLAG = %08x\n", val);
+	writel(val, d->regs + DSI_TG_STS_CLR);
+
+	val = readl(d->regs + DSI_VID_MODE_STS_FLAG);
+	if (val)
+		dev_err(d->dev, "some video mode error status\n");
+	writel(val, d->regs + DSI_VID_MODE_STS_CLR);
+
+	return te_received;
+}
+
+static int mcde_dsi_host_attach(struct mipi_dsi_host *host,
+				struct mipi_dsi_device *mdsi)
+{
+	struct mcde_dsi *d = host_to_mcde_dsi(host);
+
+	if (mdsi->lanes < 1 || mdsi->lanes > 2) {
+		DRM_ERROR("dsi device params invalid, 1 or 2 lanes supported\n");
+		return -EINVAL;
+	}
+
+	dev_info(d->dev, "attached DSI device with %d lanes\n", mdsi->lanes);
+	/* MIPI_DSI_FMT_RGB88 etc */
+	dev_info(d->dev, "format %08x, %dbpp\n", mdsi->format,
+		 mipi_dsi_pixel_format_to_bpp(mdsi->format));
+	dev_info(d->dev, "mode flags: %08lx\n", mdsi->mode_flags);
+
+	d->mdsi = mdsi;
+	if (d->mcde)
+		d->mcde->mdsi = mdsi;
+
+	return 0;
+}
+
+static int mcde_dsi_host_detach(struct mipi_dsi_host *host,
+				struct mipi_dsi_device *mdsi)
+{
+	struct mcde_dsi *d = host_to_mcde_dsi(host);
+
+	d->mdsi = NULL;
+	if (d->mcde)
+		d->mcde->mdsi = NULL;
+
+	return 0;
+}
+
+#define MCDE_DSI_HOST_IS_READ(type)			    \
+	((type == MIPI_DSI_GENERIC_READ_REQUEST_0_PARAM) || \
+	 (type == MIPI_DSI_GENERIC_READ_REQUEST_1_PARAM) || \
+	 (type == MIPI_DSI_GENERIC_READ_REQUEST_2_PARAM) || \
+	 (type == MIPI_DSI_DCS_READ))
+
+static ssize_t mcde_dsi_host_transfer(struct mipi_dsi_host *host,
+				      const struct mipi_dsi_msg *msg)
+{
+	struct mcde_dsi *d = host_to_mcde_dsi(host);
+	const u32 loop_delay_us = 10; /* us */
+	const u8 *tx = msg->tx_buf;
+	u32 loop_counter;
+	size_t txlen;
+	u32 val;
+	int ret;
+	int i;
+
+	txlen = msg->tx_len;
+	if (txlen > 12) {
+		dev_err(d->dev,
+			"dunno how to write more than 12 bytes yet\n");
+		return -EIO;
+	}
+
+	dev_dbg(d->dev,
+		"message to channel %d, %zd bytes",
+		msg->channel,
+		txlen);
+
+	/* Command "nature" */
+	if (MCDE_DSI_HOST_IS_READ(msg->type))
+		/* MCTL_MAIN_DATA_CTL already set up */
+		val = DSI_DIRECT_CMD_MAIN_SETTINGS_CMD_NAT_READ;
+	else
+		val = DSI_DIRECT_CMD_MAIN_SETTINGS_CMD_NAT_WRITE;
+	/*
+	 * More than 2 bytes will not fit in a single packet, so it's
+	 * time to set the "long not short" bit. One byte is used by
+	 * the MIPI DCS command leaving just one byte for the payload
+	 * in a short package.
+	 */
+	if (mipi_dsi_packet_format_is_long(msg->type))
+		val |= DSI_DIRECT_CMD_MAIN_SETTINGS_CMD_LONGNOTSHORT;
+	val |= 0 << DSI_DIRECT_CMD_MAIN_SETTINGS_CMD_ID_SHIFT;
+	/* Add one to the length for the MIPI DCS command */
+	val |= txlen
+		<< DSI_DIRECT_CMD_MAIN_SETTINGS_CMD_SIZE_SHIFT;
+	val |= DSI_DIRECT_CMD_MAIN_SETTINGS_CMD_LP_EN;
+	val |= msg->type << DSI_DIRECT_CMD_MAIN_SETTINGS_CMD_HEAD_SHIFT;
+	writel(val, d->regs + DSI_DIRECT_CMD_MAIN_SETTINGS);
+
+	/* MIPI DCS command is part of the data */
+	if (txlen > 0) {
+		val = 0;
+		for (i = 0; i < 4 && i < txlen; i++)
+			val |= tx[i] << (i & 3) * 8;
+	}
+	writel(val, d->regs + DSI_DIRECT_CMD_WRDAT0);
+	if (txlen > 4) {
+		val = 0;
+		for (i = 0; i < 4 && (i + 4) < txlen; i++)
+			val |= tx[i + 4] << (i & 3) * 8;
+		writel(val, d->regs + DSI_DIRECT_CMD_WRDAT1);
+	}
+	if (txlen > 8) {
+		val = 0;
+		for (i = 0; i < 4 && (i + 8) < txlen; i++)
+			val |= tx[i + 8] << (i & 3) * 8;
+		writel(val, d->regs + DSI_DIRECT_CMD_WRDAT2);
+	}
+	if (txlen > 12) {
+		val = 0;
+		for (i = 0; i < 4 && (i + 12) < txlen; i++)
+			val |= tx[i + 12] << (i & 3) * 8;
+		writel(val, d->regs + DSI_DIRECT_CMD_WRDAT3);
+	}
+
+	writel(~0, d->regs + DSI_DIRECT_CMD_STS_CLR);
+	writel(~0, d->regs + DSI_CMD_MODE_STS_CLR);
+	/* Send command */
+	writel(1, d->regs + DSI_DIRECT_CMD_SEND);
+
+	loop_counter = 1000 * 1000 / loop_delay_us;
+	while (!(readl(d->regs + DSI_DIRECT_CMD_STS) &
+		 DSI_DIRECT_CMD_STS_WRITE_COMPLETED)
+	       && --loop_counter)
+		usleep_range(loop_delay_us, (loop_delay_us * 3) / 2);
+
+	if (!loop_counter) {
+		dev_err(d->dev, "DSI write timeout!\n");
+		return -ETIME;
+	}
+
+	val = readl(d->regs + DSI_DIRECT_CMD_STS);
+	if (val & DSI_DIRECT_CMD_STS_ACKNOWLEDGE_WITH_ERR_RECEIVED) {
+		val >>= DSI_DIRECT_CMD_STS_ACK_VAL_SHIFT;
+		dev_err(d->dev, "error during transmission: %04x\n",
+			val);
+		return -EIO;
+	}
+
+	if (!MCDE_DSI_HOST_IS_READ(msg->type)) {
+		/* Return number of bytes written */
+		if (mipi_dsi_packet_format_is_long(msg->type))
+			ret = 4 + txlen;
+		else
+			ret = 4;
+	} else {
+		/* OK this is a read command, get the response */
+		u32 rdsz;
+		u32 rddat;
+		u8 *rx = msg->rx_buf;
+
+		rdsz = readl(d->regs + DSI_DIRECT_CMD_RD_PROPERTY);
+		rdsz &= DSI_DIRECT_CMD_RD_PROPERTY_RD_SIZE_MASK;
+		rddat = readl(d->regs + DSI_DIRECT_CMD_RDDAT);
+		for (i = 0; i < 4 && i < rdsz; i++)
+			rx[i] = (rddat >> (i * 8)) & 0xff;
+		ret = rdsz;
+	}
+
+	writel(~0, d->regs + DSI_DIRECT_CMD_STS_CLR);
+	writel(~0, d->regs + DSI_CMD_MODE_STS_CLR);
+
+	return ret;
+}
+
+static const struct mipi_dsi_host_ops mcde_dsi_host_ops = {
+	.attach = mcde_dsi_host_attach,
+	.detach = mcde_dsi_host_detach,
+	.transfer = mcde_dsi_host_transfer,
+};
+
+/* This sends a direct (short) command to request TE */
+void mcde_dsi_te_request(struct mipi_dsi_device *mdsi)
+{
+	struct mcde_dsi *d;
+	u32 val;
+
+	d = host_to_mcde_dsi(mdsi->host);
+
+	/* Command "nature" TE request */
+	val = DSI_DIRECT_CMD_MAIN_SETTINGS_CMD_NAT_TE_REQ;
+	val |= 0 << DSI_DIRECT_CMD_MAIN_SETTINGS_CMD_ID_SHIFT;
+	val |= 2 << DSI_DIRECT_CMD_MAIN_SETTINGS_CMD_SIZE_SHIFT;
+	val |= DSI_DIRECT_CMD_MAIN_SETTINGS_CMD_LP_EN;
+	val |= DSI_DIRECT_CMD_MAIN_SETTINGS_CMD_HEAD_DCS_SHORT_WRITE_1 <<
+		DSI_DIRECT_CMD_MAIN_SETTINGS_CMD_HEAD_SHIFT;
+	writel(val, d->regs + DSI_DIRECT_CMD_MAIN_SETTINGS);
+
+	/* Clear TE reveived and error status bits and enables them */
+	writel(DSI_DIRECT_CMD_STS_CLR_TE_RECEIVED_CLR |
+	       DSI_DIRECT_CMD_STS_CLR_ACKNOWLEDGE_WITH_ERR_RECEIVED_CLR,
+	       d->regs + DSI_DIRECT_CMD_STS_CLR);
+	val = readl(d->regs + DSI_DIRECT_CMD_STS_CTL);
+	val |= DSI_DIRECT_CMD_STS_CTL_TE_RECEIVED_EN;
+	val |= DSI_DIRECT_CMD_STS_CTL_ACKNOWLEDGE_WITH_ERR_EN;
+	writel(val, d->regs + DSI_DIRECT_CMD_STS_CTL);
+
+	/* Clear and enable no TE or TE missing status */
+	writel(DSI_CMD_MODE_STS_CLR_ERR_NO_TE_CLR |
+	       DSI_CMD_MODE_STS_CLR_ERR_TE_MISS_CLR,
+	       d->regs + DSI_CMD_MODE_STS_CLR);
+	val = readl(d->regs + DSI_CMD_MODE_STS_CTL);
+	val |= DSI_CMD_MODE_STS_CTL_ERR_NO_TE_EN;
+	val |= DSI_CMD_MODE_STS_CTL_ERR_TE_MISS_EN;
+	writel(val, d->regs + DSI_CMD_MODE_STS_CTL);
+
+	/* Send this TE request command */
+	writel(1, d->regs + DSI_DIRECT_CMD_SEND);
+}
+
+static void mcde_dsi_setup_video_mode(struct mcde_dsi *d,
+				      const struct drm_display_mode *mode)
+{
+	u8 bpp = mipi_dsi_pixel_format_to_bpp(d->mdsi->format);
+	u64 bpl;
+	u32 hfp;
+	u32 hbp;
+	u32 hsa;
+	u32 blkline_pck, line_duration;
+	u32 blkeol_pck, blkeol_duration;
+	u32 val;
+
+	val = 0;
+	if (d->mdsi->mode_flags & MIPI_DSI_MODE_VIDEO_BURST)
+		val |= DSI_VID_MAIN_CTL_BURST_MODE;
+	if (d->mdsi->mode_flags & MIPI_DSI_MODE_VIDEO_SYNC_PULSE) {
+		val |= DSI_VID_MAIN_CTL_SYNC_PULSE_ACTIVE;
+		val |= DSI_VID_MAIN_CTL_SYNC_PULSE_HORIZONTAL;
+	}
+	/* RGB header and pixel mode */
+	switch (d->mdsi->format) {
+	case MIPI_DSI_FMT_RGB565:
+		val |= MIPI_DSI_PACKED_PIXEL_STREAM_16 <<
+			DSI_VID_MAIN_CTL_HEADER_SHIFT;
+		val |= DSI_VID_MAIN_CTL_VID_PIXEL_MODE_16BITS;
+		break;
+	case MIPI_DSI_FMT_RGB666_PACKED:
+		val |= MIPI_DSI_PACKED_PIXEL_STREAM_18 <<
+			DSI_VID_MAIN_CTL_HEADER_SHIFT;
+		val |= DSI_VID_MAIN_CTL_VID_PIXEL_MODE_18BITS;
+		break;
+	case MIPI_DSI_FMT_RGB666:
+		val |= MIPI_DSI_PIXEL_STREAM_3BYTE_18
+			<< DSI_VID_MAIN_CTL_HEADER_SHIFT;
+		val |= DSI_VID_MAIN_CTL_VID_PIXEL_MODE_18BITS_LOOSE;
+		break;
+	case MIPI_DSI_FMT_RGB888:
+		val |= MIPI_DSI_PACKED_PIXEL_STREAM_24 <<
+			DSI_VID_MAIN_CTL_HEADER_SHIFT;
+		val |= DSI_VID_MAIN_CTL_VID_PIXEL_MODE_24BITS;
+		break;
+	default:
+		dev_err(d->dev, "unknown pixel mode\n");
+		return;
+	}
+
+	/* TODO: TVG could be enabled here */
+
+	/* Send blanking packet */
+	val |= DSI_VID_MAIN_CTL_REG_BLKLINE_MODE_LP_0;
+	/* Send EOL packet */
+	val |= DSI_VID_MAIN_CTL_REG_BLKEOL_MODE_LP_0;
+	/* Recovery mode 1 */
+	val |= 1 << DSI_VID_MAIN_CTL_RECOVERY_MODE_SHIFT;
+	/* All other fields zero */
+	writel(val, d->regs + DSI_VID_MAIN_CTL);
+
+	/* Vertical frame parameters are pretty straight-forward */
+	val = mode->vdisplay << DSI_VID_VSIZE_VSA_LENGTH_SHIFT;
+	/* vertical front porch */
+	val |= (mode->vsync_start - mode->vdisplay)
+		<< DSI_VID_VSIZE_VFP_LENGTH_SHIFT;
+	/* vertical sync active */
+	val |= (mode->vsync_end - mode->vsync_start)
+		<< DSI_VID_VSIZE_VACT_LENGTH_SHIFT;
+	/* vertical back porch */
+	val |= (mode->vtotal - mode->vsync_end)
+		<< DSI_VID_VSIZE_VBP_LENGTH_SHIFT;
+	writel(val, d->regs + DSI_VID_VSIZE);
+
+	/*
+	 * Horizontal frame parameters:
+	 * horizontal resolution is given in pixels and must be re-calculated
+	 * into bytes since this is what the hardware expects.
+	 *
+	 * 6 + 2 is HFP header + checksum
+	 */
+	hfp = (mode->hsync_start - mode->hdisplay) * bpp - 6 - 2;
+	if (d->mdsi->mode_flags & MIPI_DSI_MODE_VIDEO_SYNC_PULSE) {
+		/*
+		 * 6 is HBP header + checksum
+		 * 4 is RGB header + checksum
+		 */
+		hbp = (mode->htotal - mode->hsync_end) * bpp - 4 - 6;
+		/*
+		 * 6 is HBP header + checksum
+		 * 4 is HSW packet bytes
+		 * 4 is RGB header + checksum
+		 */
+		hsa = (mode->hsync_end - mode->hsync_start) * bpp - 4 - 4 - 6;
+	} else {
+		/*
+		 * HBP includes both back porch and sync
+		 * 6 is HBP header + checksum
+		 * 4 is HSW packet bytes
+		 * 4 is RGB header + checksum
+		 */
+		hbp = (mode->htotal - mode->hsync_start) * bpp - 4 - 4 - 6;
+		/* HSA is not considered in this mode and set to 0 */
+		hsa = 0;
+	}
+	dev_dbg(d->dev, "hfp: %u, hbp: %u, hsa: %u\n",
+		hfp, hbp, hsa);
+
+	/* Frame parameters: horizontal sync active */
+	val = hsa << DSI_VID_HSIZE1_HSA_LENGTH_SHIFT;
+	/* horizontal back porch */
+	val |= hbp << DSI_VID_HSIZE1_HBP_LENGTH_SHIFT;
+	/* horizontal front porch */
+	val |= hfp << DSI_VID_HSIZE1_HFP_LENGTH_SHIFT;
+	writel(val, d->regs + DSI_VID_HSIZE1);
+
+	/* RGB data length (bytes on one scanline) */
+	val = mode->hdisplay * (bpp / 8);
+	writel(val, d->regs + DSI_VID_HSIZE2);
+
+	/* TODO: further adjustments for TVG mode here */
+
+	/*
+	 * EOL packet length from bits per line calculations: pixel clock
+	 * is given in kHz, calculate the time between two pixels in
+	 * picoseconds.
+	 */
+	bpl = mode->clock * mode->htotal;
+	bpl *= (d->hs_freq / 8);
+	do_div(bpl, 1000000); /* microseconds */
+	do_div(bpl, 1000000); /* seconds */
+	bpl *= d->mdsi->lanes;
+	dev_dbg(d->dev, "calculated bytes per line: %llu\n", bpl);
+	/*
+	 * 6 is header + checksum, header = 4 bytes, checksum = 2 bytes
+	 * 4 is short packet for vsync/hsync
+	 */
+	if (d->mdsi->mode_flags & MIPI_DSI_MODE_VIDEO_SYNC_PULSE) {
+		/* Fixme: isn't the hsync width in pixels? */
+		blkline_pck = bpl - (mode->hsync_end - mode->hsync_start) - 6;
+		val = blkline_pck << DSI_VID_BLKSIZE2_BLKLINE_PULSE_PCK_SHIFT;
+		writel(val, d->regs + DSI_VID_BLKSIZE2);
+	} else {
+		blkline_pck = bpl - 4 - 6;
+		val = blkline_pck << DSI_VID_BLKSIZE1_BLKLINE_EVENT_PCK_SHIFT;
+		writel(val, d->regs + DSI_VID_BLKSIZE1);
+	}
+
+	line_duration = (blkline_pck + 6) / d->mdsi->lanes;
+	dev_dbg(d->dev, "line duration %u\n", line_duration);
+	val = line_duration << DSI_VID_DPHY_TIME_REG_LINE_DURATION_SHIFT;
+	/*
+	 * This is the time to perform LP->HS on D-PHY
+	 * FIXME: nowhere to get this from: DT property on the DSI?
+	 */
+	val |= 0 << DSI_VID_DPHY_TIME_REG_WAKEUP_TIME_SHIFT;
+	writel(val, d->regs + DSI_VID_DPHY_TIME);
+
+	/* Calculate block end of line */
+	blkeol_pck = bpl - mode->hdisplay * bpp - 6;
+	blkeol_duration = (blkeol_pck + 6) / d->mdsi->lanes;
+	dev_dbg(d->dev, "blkeol pck: %u, duration: %u\n",
+		 blkeol_pck, blkeol_duration);
+
+	if (d->mdsi->mode_flags & MIPI_DSI_MODE_VIDEO_BURST) {
+		/* Set up EOL clock for burst mode */
+		val = readl(d->regs + DSI_VID_BLKSIZE1);
+		val |= blkeol_pck << DSI_VID_BLKSIZE1_BLKEOL_PCK_SHIFT;
+		writel(val, d->regs + DSI_VID_BLKSIZE1);
+		writel(blkeol_pck, d->regs + DSI_VID_VCA_SETTING2);
+
+		writel(blkeol_duration, d->regs + DSI_VID_PCK_TIME);
+		writel(blkeol_duration - 6, d->regs + DSI_VID_VCA_SETTING1);
+	}
+
+	/* Maximum line limit */
+	val = readl(d->regs + DSI_VID_VCA_SETTING2);
+	val |= blkline_pck <<
+		DSI_VID_VCA_SETTING2_EXACT_BURST_LIMIT_SHIFT;
+	writel(val, d->regs + DSI_VID_VCA_SETTING2);
+
+	/* Put IF1 into video mode */
+	val = readl(d->regs + DSI_MCTL_MAIN_DATA_CTL);
+	val |= DSI_MCTL_MAIN_DATA_CTL_IF1_MODE;
+	writel(val, d->regs + DSI_MCTL_MAIN_DATA_CTL);
+
+	/* Disable command mode on IF1 */
+	val = readl(d->regs + DSI_CMD_MODE_CTL);
+	val &= ~DSI_CMD_MODE_CTL_IF1_LP_EN;
+	writel(val, d->regs + DSI_CMD_MODE_CTL);
+
+	/* Enable some error interrupts */
+	val = readl(d->regs + DSI_VID_MODE_STS_CTL);
+	val |= DSI_VID_MODE_STS_CTL_ERR_MISSING_VSYNC;
+	val |= DSI_VID_MODE_STS_CTL_ERR_MISSING_DATA;
+	writel(val, d->regs + DSI_VID_MODE_STS_CTL);
+
+	/* Enable video mode */
+	val = readl(d->regs + DSI_MCTL_MAIN_DATA_CTL);
+	val |= DSI_MCTL_MAIN_DATA_CTL_VID_EN;
+	writel(val, d->regs + DSI_MCTL_MAIN_DATA_CTL);
+}
+
+static void mcde_dsi_start(struct mcde_dsi *d)
+{
+	unsigned long hs_freq;
+	u32 val;
+	int i;
+
+	/* No integration mode */
+	writel(0, d->regs + DSI_MCTL_INTEGRATION_MODE);
+
+	/* Enable the DSI port, from drivers/video/mcde/dsilink_v2.c */
+	val = DSI_MCTL_MAIN_DATA_CTL_LINK_EN |
+		DSI_MCTL_MAIN_DATA_CTL_BTA_EN |
+		DSI_MCTL_MAIN_DATA_CTL_READ_EN |
+		DSI_MCTL_MAIN_DATA_CTL_REG_TE_EN;
+	if (d->mdsi->mode_flags & MIPI_DSI_MODE_EOT_PACKET)
+		val |= DSI_MCTL_MAIN_DATA_CTL_HOST_EOT_GEN;
+	writel(val, d->regs + DSI_MCTL_MAIN_DATA_CTL);
+
+	/* Set a high command timeout, clear other fields */
+	val = 0x3ff << DSI_CMD_MODE_CTL_TE_TIMEOUT_SHIFT;
+	writel(val, d->regs + DSI_CMD_MODE_CTL);
+
+	/*
+	 * UI_X4 is described as "unit interval times four"
+	 * I guess since DSI packets are 4 bytes wide, one unit
+	 * is one byte.
+	 */
+	hs_freq = clk_get_rate(d->hs_clk);
+	hs_freq /= 1000000; /* MHz */
+	val = 4000 / hs_freq;
+	dev_dbg(d->dev, "UI value: %d\n", val);
+	val <<= DSI_MCTL_DPHY_STATIC_UI_X4_SHIFT;
+	val &= DSI_MCTL_DPHY_STATIC_UI_X4_MASK;
+	writel(val, d->regs + DSI_MCTL_DPHY_STATIC);
+
+	/*
+	 * Enable clocking: 0x0f (something?) between each burst,
+	 * enable the second lane if needed, enable continuous clock if
+	 * needed, enable switch into ULPM (ultra-low power mode) on
+	 * all the lines.
+	 */
+	val = 0x0f << DSI_MCTL_MAIN_PHY_CTL_WAIT_BURST_TIME_SHIFT;
+	if (d->mdsi->lanes == 2)
+		val |= DSI_MCTL_MAIN_PHY_CTL_LANE2_EN;
+	if (!(d->mdsi->mode_flags & MIPI_DSI_CLOCK_NON_CONTINUOUS))
+		val |= DSI_MCTL_MAIN_PHY_CTL_CLK_CONTINUOUS;
+	val |= DSI_MCTL_MAIN_PHY_CTL_CLK_ULPM_EN |
+		DSI_MCTL_MAIN_PHY_CTL_DAT1_ULPM_EN |
+		DSI_MCTL_MAIN_PHY_CTL_DAT2_ULPM_EN;
+	writel(val, d->regs + DSI_MCTL_MAIN_PHY_CTL);
+
+	val = (1 << DSI_MCTL_ULPOUT_TIME_CKLANE_ULPOUT_TIME_SHIFT) |
+		(1 << DSI_MCTL_ULPOUT_TIME_DATA_ULPOUT_TIME_SHIFT);
+	writel(val, d->regs + DSI_MCTL_ULPOUT_TIME);
+
+	writel(DSI_DPHY_LANES_TRIM_DPHY_SPECS_90_81B_0_90,
+	       d->regs + DSI_DPHY_LANES_TRIM);
+
+	/* High PHY timeout */
+	val = (0x0f << DSI_MCTL_DPHY_TIMEOUT_CLK_DIV_SHIFT) |
+		(0x3fff << DSI_MCTL_DPHY_TIMEOUT_HSTX_TO_VAL_SHIFT) |
+		(0x3fff << DSI_MCTL_DPHY_TIMEOUT_LPRX_TO_VAL_SHIFT);
+	writel(val, d->regs + DSI_MCTL_DPHY_TIMEOUT);
+
+	val = DSI_MCTL_MAIN_EN_PLL_START |
+		DSI_MCTL_MAIN_EN_CKLANE_EN |
+		DSI_MCTL_MAIN_EN_DAT1_EN |
+		DSI_MCTL_MAIN_EN_IF1_EN;
+	if (d->mdsi->lanes == 2)
+		val |= DSI_MCTL_MAIN_EN_DAT2_EN;
+	writel(val, d->regs + DSI_MCTL_MAIN_EN);
+
+	/* Wait for the PLL to lock and the clock and data lines to come up */
+	i = 0;
+	val = DSI_MCTL_MAIN_STS_PLL_LOCK |
+		DSI_MCTL_MAIN_STS_CLKLANE_READY |
+		DSI_MCTL_MAIN_STS_DAT1_READY;
+	if (d->mdsi->lanes == 2)
+		val |= DSI_MCTL_MAIN_STS_DAT2_READY;
+	while ((readl(d->regs + DSI_MCTL_MAIN_STS) & val) != val) {
+		/* Sleep for a millisecond */
+		usleep_range(1000, 1500);
+		if (i++ == 100) {
+			dev_warn(d->dev, "DSI lanes did not start up\n");
+			return;
+		}
+	}
+
+	/* TODO needed? */
+
+	/* Command mode, clear IF1 ID */
+	val = readl(d->regs + DSI_CMD_MODE_CTL);
+	/*
+	 * If we enable low-power mode here, with
+	 * val |= DSI_CMD_MODE_CTL_IF1_LP_EN
+	 * then display updates become really slow.
+	 */
+	val &= ~DSI_CMD_MODE_CTL_IF1_ID_MASK;
+	writel(val, d->regs + DSI_CMD_MODE_CTL);
+
+	/* Wait for DSI PHY to initialize */
+	usleep_range(100, 200);
+	dev_info(d->dev, "DSI link enabled\n");
+}
+
+
+static void mcde_dsi_bridge_enable(struct drm_bridge *bridge)
+{
+	struct mcde_dsi *d = bridge_to_mcde_dsi(bridge);
+
+	dev_info(d->dev, "enable DSI master\n");
+};
+
+static void mcde_dsi_bridge_mode_set(struct drm_bridge *bridge,
+				     const struct drm_display_mode *mode,
+				     const struct drm_display_mode *adj)
+{
+	struct mcde_dsi *d = bridge_to_mcde_dsi(bridge);
+	unsigned long pixel_clock_hz = mode->clock * 1000;
+	unsigned long hs_freq, lp_freq;
+	u32 val;
+	int ret;
+
+	if (!d->mdsi) {
+		dev_err(d->dev, "no DSI device attached to encoder!\n");
+		return;
+	}
+
+	dev_info(d->dev, "set DSI master to %dx%d %lu Hz %s mode\n",
+		 mode->hdisplay, mode->vdisplay, pixel_clock_hz,
+		 (d->mdsi->mode_flags & MIPI_DSI_MODE_VIDEO) ? "VIDEO" : "CMD"
+		);
+
+	/* Copy maximum clock frequencies */
+	if (d->mdsi->lp_rate)
+		lp_freq = d->mdsi->lp_rate;
+	else
+		lp_freq = DSI_DEFAULT_LP_FREQ_HZ;
+	if (d->mdsi->hs_rate)
+		hs_freq = d->mdsi->hs_rate;
+	else
+		hs_freq = DSI_DEFAULT_HS_FREQ_HZ;
+
+	/* Enable LP (Low Power, Energy Save, ES) and HS (High Speed) clocks */
+	d->lp_freq = clk_round_rate(d->lp_clk, lp_freq);
+	ret = clk_set_rate(d->lp_clk, d->lp_freq);
+	if (ret)
+		dev_err(d->dev, "failed to set LP clock rate %lu Hz\n",
+			d->lp_freq);
+
+	d->hs_freq = clk_round_rate(d->hs_clk, hs_freq);
+	ret = clk_set_rate(d->hs_clk, d->hs_freq);
+	if (ret)
+		dev_err(d->dev, "failed to set HS clock rate %lu Hz\n",
+			d->hs_freq);
+
+	/* Start clocks */
+	ret = clk_prepare_enable(d->lp_clk);
+	if (ret)
+		dev_err(d->dev, "failed to enable LP clock\n");
+	else
+		dev_info(d->dev, "DSI LP clock rate %lu Hz\n",
+			 d->lp_freq);
+	ret = clk_prepare_enable(d->hs_clk);
+	if (ret)
+		dev_err(d->dev, "failed to enable HS clock\n");
+	else
+		dev_info(d->dev, "DSI HS clock rate %lu Hz\n",
+			 d->hs_freq);
+
+	if (d->mdsi->mode_flags & MIPI_DSI_MODE_VIDEO) {
+		mcde_dsi_setup_video_mode(d, mode);
+	} else {
+		/* Command mode, clear IF1 ID */
+		val = readl(d->regs + DSI_CMD_MODE_CTL);
+		/*
+		 * If we enable low-power mode here with
+		 * val |= DSI_CMD_MODE_CTL_IF1_LP_EN
+		 * the display updates become really slow.
+		 */
+		val &= ~DSI_CMD_MODE_CTL_IF1_ID_MASK;
+		writel(val, d->regs + DSI_CMD_MODE_CTL);
+	}
+}
+
+static void mcde_dsi_wait_for_command_mode_stop(struct mcde_dsi *d)
+{
+	u32 val;
+	int i;
+
+	/*
+	 * Wait until we get out of command mode
+	 * CSM = Command State Machine
+	 */
+	i = 0;
+	val = DSI_CMD_MODE_STS_CSM_RUNNING;
+	while ((readl(d->regs + DSI_CMD_MODE_STS) & val) == val) {
+		/* Sleep for a millisecond */
+		usleep_range(1000, 2000);
+		if (i++ == 100) {
+			dev_warn(d->dev,
+				 "could not get out of command mode\n");
+			return;
+		}
+	}
+}
+
+static void mcde_dsi_wait_for_video_mode_stop(struct mcde_dsi *d)
+{
+	u32 val;
+	int i;
+
+	/* Wait until we get out og video mode */
+	i = 0;
+	val = DSI_VID_MODE_STS_VSG_RUNNING;
+	while ((readl(d->regs + DSI_VID_MODE_STS) & val) == val) {
+		/* Sleep for a millisecond */
+		usleep_range(1000, 2000);
+		if (i++ == 100) {
+			dev_warn(d->dev,
+				 "could not get out of video mode\n");
+			return;
+		}
+	}
+}
+
+static void mcde_dsi_bridge_disable(struct drm_bridge *bridge)
+{
+	struct mcde_dsi *d = bridge_to_mcde_dsi(bridge);
+	u32 val;
+
+	/* Disable all error interrupts */
+	writel(0, d->regs + DSI_VID_MODE_STS_CTL);
+
+	if (d->mdsi->mode_flags & MIPI_DSI_MODE_VIDEO) {
+		/* Stop video mode */
+		val = readl(d->regs + DSI_MCTL_MAIN_DATA_CTL);
+		val &= ~DSI_MCTL_MAIN_DATA_CTL_VID_EN;
+		writel(val, d->regs + DSI_MCTL_MAIN_DATA_CTL);
+		mcde_dsi_wait_for_video_mode_stop(d);
+	} else {
+		/* Stop command mode */
+		mcde_dsi_wait_for_command_mode_stop(d);
+	}
+
+	/* Stop clocks */
+	clk_disable_unprepare(d->hs_clk);
+	clk_disable_unprepare(d->lp_clk);
+}
+
+/*
+ * This connector needs no special handling, just use the default
+ * helpers for everything. It's pretty dummy.
+ */
+static const struct drm_connector_funcs mcde_dsi_connector_funcs = {
+	.reset = drm_atomic_helper_connector_reset,
+	.fill_modes = drm_helper_probe_single_connector_modes,
+	.destroy = drm_connector_cleanup,
+	.atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state,
+	.atomic_destroy_state = drm_atomic_helper_connector_destroy_state,
+};
+
+static int mcde_dsi_get_modes(struct drm_connector *connector)
+{
+	struct mcde_dsi *d = connector_to_mcde_dsi(connector);
+
+	/* Just pass the question to the panel */
+	if (d->panel)
+		return drm_panel_get_modes(d->panel);
+
+	/* TODO: deal with bridges */
+
+	return 0;
+}
+
+static const struct drm_connector_helper_funcs
+mcde_dsi_connector_helper_funcs = {
+	.get_modes = mcde_dsi_get_modes,
+};
+
+static int mcde_dsi_bridge_attach(struct drm_bridge *bridge)
+{
+	struct mcde_dsi *d = bridge_to_mcde_dsi(bridge);
+	struct drm_device *drm = bridge->dev;
+	int ret;
+
+	drm_connector_helper_add(&d->connector,
+				 &mcde_dsi_connector_helper_funcs);
+
+	if (!drm_core_check_feature(drm, DRIVER_ATOMIC)) {
+		dev_err(d->dev, "we need atomic updates\n");
+		return -ENOTSUPP;
+	}
+
+	ret = drm_connector_init(drm, &d->connector,
+				 &mcde_dsi_connector_funcs,
+				 DRM_MODE_CONNECTOR_DSI);
+	if (ret) {
+		dev_err(d->dev, "failed to initialize DSI bridge connector\n");
+		return ret;
+	}
+	d->connector.polled = DRM_CONNECTOR_POLL_CONNECT;
+	/* The encoder in the bridge attached to the DSI bridge */
+	drm_connector_attach_encoder(&d->connector, bridge->encoder);
+	/* Then we attach the DSI bridge to the output (panel etc) bridge */
+	ret = drm_bridge_attach(bridge->encoder, d->bridge_out, bridge);
+	if (ret) {
+		dev_err(d->dev, "failed to attach the DSI bridge\n");
+		return ret;
+	}
+	d->connector.status = connector_status_connected;
+
+	return 0;
+}
+
+static const struct drm_bridge_funcs mcde_dsi_bridge_funcs = {
+	.attach = mcde_dsi_bridge_attach,
+	.mode_set = mcde_dsi_bridge_mode_set,
+	.disable = mcde_dsi_bridge_disable,
+	.enable = mcde_dsi_bridge_enable,
+};
+
+static int mcde_dsi_bind(struct device *dev, struct device *master,
+			 void *data)
+{
+	struct drm_device *drm = data;
+	struct mcde *mcde = drm->dev_private;
+	struct mcde_dsi *d = dev_get_drvdata(dev);
+	struct device_node *child;
+	struct drm_panel *panel = NULL;
+	struct drm_bridge *bridge = NULL;
+
+	if (!of_get_available_child_count(dev->of_node)) {
+		dev_info(dev, "unused DSI interface\n");
+		d->unused = true;
+		return 0;
+	}
+	d->mcde = mcde;
+	/* If the display attached before binding, set this up */
+	if (d->mdsi)
+		d->mcde->mdsi = d->mdsi;
+
+	/* Obtain the clocks */
+	d->hs_clk = devm_clk_get(dev, "hs");
+	if (IS_ERR(d->hs_clk)) {
+		dev_err(dev, "unable to get HS clock\n");
+		return PTR_ERR(d->hs_clk);
+	}
+
+	d->lp_clk = devm_clk_get(dev, "lp");
+	if (IS_ERR(d->lp_clk)) {
+		dev_err(dev, "unable to get LP clock\n");
+		return PTR_ERR(d->lp_clk);
+	}
+
+	/* Assert RESET through the PRCMU, active low */
+	/* FIXME: which DSI block? */
+	regmap_update_bits(d->prcmu, PRCM_DSI_SW_RESET,
+			   PRCM_DSI_SW_RESET_DSI0_SW_RESETN, 0);
+
+	usleep_range(100, 200);
+
+	/* De-assert RESET again */
+	regmap_update_bits(d->prcmu, PRCM_DSI_SW_RESET,
+			   PRCM_DSI_SW_RESET_DSI0_SW_RESETN,
+			   PRCM_DSI_SW_RESET_DSI0_SW_RESETN);
+
+	/* Start up the hardware */
+	mcde_dsi_start(d);
+
+	/* Look for a panel as a child to this node */
+	for_each_available_child_of_node(dev->of_node, child) {
+		panel = of_drm_find_panel(child);
+		if (IS_ERR(panel)) {
+			dev_err(dev, "failed to find panel try bridge (%lu)\n",
+				PTR_ERR(panel));
+			bridge = of_drm_find_bridge(child);
+			if (IS_ERR(bridge)) {
+				dev_err(dev, "failed to find bridge (%lu)\n",
+					PTR_ERR(bridge));
+				return PTR_ERR(bridge);
+			}
+		}
+	}
+	if (panel) {
+		bridge = drm_panel_bridge_add(panel,
+					      DRM_MODE_CONNECTOR_DSI);
+		if (IS_ERR(bridge)) {
+			dev_err(dev, "error adding panel bridge\n");
+			return PTR_ERR(bridge);
+		}
+		dev_info(dev, "connected to panel\n");
+		d->panel = panel;
+	} else if (bridge) {
+		/* TODO: AV8100 HDMI encoder goes here for example */
+		dev_info(dev, "connected to non-panel bridge (unsupported)\n");
+		return -ENODEV;
+	} else {
+		dev_err(dev, "no panel or bridge\n");
+		return -ENODEV;
+	}
+
+	d->bridge_out = bridge;
+
+	/* Create a bridge for this DSI channel */
+	d->bridge.funcs = &mcde_dsi_bridge_funcs;
+	d->bridge.of_node = dev->of_node;
+	drm_bridge_add(&d->bridge);
+
+	/* TODO: first come first serve, use a list */
+	mcde->bridge = &d->bridge;
+
+	dev_info(dev, "initialized MCDE DSI bridge\n");
+
+	return 0;
+}
+
+static void mcde_dsi_unbind(struct device *dev, struct device *master,
+			    void *data)
+{
+	struct mcde_dsi *d = dev_get_drvdata(dev);
+
+	if (d->panel)
+		drm_panel_bridge_remove(d->bridge_out);
+	regmap_update_bits(d->prcmu, PRCM_DSI_SW_RESET,
+			   PRCM_DSI_SW_RESET_DSI0_SW_RESETN, 0);
+}
+
+static const struct component_ops mcde_dsi_component_ops = {
+	.bind   = mcde_dsi_bind,
+	.unbind = mcde_dsi_unbind,
+};
+
+static int mcde_dsi_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct mcde_dsi *d;
+	struct mipi_dsi_host *host;
+	struct resource *res;
+	u32 dsi_id;
+	int ret;
+
+	d = devm_kzalloc(dev, sizeof(*d), GFP_KERNEL);
+	if (!d)
+		return -ENOMEM;
+	d->dev = dev;
+	platform_set_drvdata(pdev, d);
+
+	/* Get a handle on the PRCMU so we can do reset */
+	d->prcmu =
+		syscon_regmap_lookup_by_compatible("stericsson,db8500-prcmu");
+	if (IS_ERR(d->prcmu)) {
+		dev_err(dev, "no PRCMU regmap\n");
+		return PTR_ERR(d->prcmu);
+	}
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	d->regs = devm_ioremap_resource(dev, res);
+	if (IS_ERR(d->regs)) {
+		dev_err(dev, "no DSI regs\n");
+		return PTR_ERR(d->regs);
+	}
+
+	dsi_id = readl(d->regs + DSI_ID_REG);
+	dev_info(dev, "HW revision 0x%08x\n", dsi_id);
+
+	host = &d->dsi_host;
+	host->dev = dev;
+	host->ops = &mcde_dsi_host_ops;
+	ret = mipi_dsi_host_register(host);
+	if (ret < 0) {
+		dev_err(dev, "failed to register DSI host: %d\n", ret);
+		return ret;
+	}
+	dev_info(dev, "registered DSI host\n");
+
+	platform_set_drvdata(pdev, d);
+	return component_add(dev, &mcde_dsi_component_ops);
+}
+
+static int mcde_dsi_remove(struct platform_device *pdev)
+{
+	struct mcde_dsi *d = platform_get_drvdata(pdev);
+
+	component_del(&pdev->dev, &mcde_dsi_component_ops);
+	mipi_dsi_host_unregister(&d->dsi_host);
+
+	return 0;
+}
+
+static const struct of_device_id mcde_dsi_of_match[] = {
+	{
+		.compatible = "ste,mcde-dsi",
+	},
+	{},
+};
+
+struct platform_driver mcde_dsi_driver = {
+	.driver = {
+		.name           = "mcde-dsi",
+		.of_match_table = of_match_ptr(mcde_dsi_of_match),
+	},
+	.probe = mcde_dsi_probe,
+	.remove = mcde_dsi_remove,
+};
diff --git a/drivers/gpu/drm/mcde/mcde_dsi_regs.h b/drivers/gpu/drm/mcde/mcde_dsi_regs.h
new file mode 100644
index 000000000000..c9253321a3be
--- /dev/null
+++ b/drivers/gpu/drm/mcde/mcde_dsi_regs.h
@@ -0,0 +1,385 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __DRM_MCDE_DSI_REGS
+#define __DRM_MCDE_DSI_REGS
+
+#define DSI_MCTL_INTEGRATION_MODE 0x00000000
+
+#define DSI_MCTL_MAIN_DATA_CTL 0x00000004
+#define DSI_MCTL_MAIN_DATA_CTL_LINK_EN BIT(0)
+#define DSI_MCTL_MAIN_DATA_CTL_IF1_MODE BIT(1)
+#define DSI_MCTL_MAIN_DATA_CTL_VID_EN BIT(2)
+#define DSI_MCTL_MAIN_DATA_CTL_TVG_SEL BIT(3)
+#define DSI_MCTL_MAIN_DATA_CTL_TBG_SEL BIT(4)
+#define DSI_MCTL_MAIN_DATA_CTL_IF1_TE_EN BIT(5)
+#define DSI_MCTL_MAIN_DATA_CTL_IF2_TE_EN BIT(6)
+#define DSI_MCTL_MAIN_DATA_CTL_REG_TE_EN BIT(7)
+#define DSI_MCTL_MAIN_DATA_CTL_READ_EN BIT(8)
+#define DSI_MCTL_MAIN_DATA_CTL_BTA_EN BIT(9)
+#define DSI_MCTL_MAIN_DATA_CTL_DISP_GEN_ECC BIT(10)
+#define DSI_MCTL_MAIN_DATA_CTL_DISP_GEN_CHECKSUM BIT(11)
+#define DSI_MCTL_MAIN_DATA_CTL_HOST_EOT_GEN BIT(12)
+#define DSI_MCTL_MAIN_DATA_CTL_DISP_EOT_GEN BIT(13)
+#define DSI_MCTL_MAIN_DATA_CTL_DLX_REMAP_EN BIT(14)
+#define DSI_MCTL_MAIN_DATA_CTL_TE_POLLING_EN BIT(15)
+
+#define DSI_MCTL_MAIN_PHY_CTL 0x00000008
+#define DSI_MCTL_MAIN_PHY_CTL_LANE2_EN BIT(0)
+#define DSI_MCTL_MAIN_PHY_CTL_FORCE_STOP_MODE BIT(1)
+#define DSI_MCTL_MAIN_PHY_CTL_CLK_CONTINUOUS BIT(2)
+#define DSI_MCTL_MAIN_PHY_CTL_CLK_ULPM_EN BIT(3)
+#define DSI_MCTL_MAIN_PHY_CTL_DAT1_ULPM_EN BIT(4)
+#define DSI_MCTL_MAIN_PHY_CTL_DAT2_ULPM_EN BIT(5)
+#define DSI_MCTL_MAIN_PHY_CTL_WAIT_BURST_TIME_SHIFT 6
+#define DSI_MCTL_MAIN_PHY_CTL_WAIT_BURST_TIME_MASK 0x000003C0
+#define DSI_MCTL_MAIN_PHY_CTL_CLOCK_FORCE_STOP_MODE BIT(10)
+
+#define DSI_MCTL_PLL_CTL 0x0000000C
+#define DSI_MCTL_LANE_STS 0x00000010
+
+#define DSI_MCTL_DPHY_TIMEOUT 0x00000014
+#define DSI_MCTL_DPHY_TIMEOUT_CLK_DIV_SHIFT 0
+#define DSI_MCTL_DPHY_TIMEOUT_CLK_DIV_MASK 0x0000000F
+#define DSI_MCTL_DPHY_TIMEOUT_HSTX_TO_VAL_SHIFT 4
+#define DSI_MCTL_DPHY_TIMEOUT_HSTX_TO_VAL_MASK 0x0003FFF0
+#define DSI_MCTL_DPHY_TIMEOUT_LPRX_TO_VAL_SHIFT 18
+#define DSI_MCTL_DPHY_TIMEOUT_LPRX_TO_VAL_MASK 0xFFFC0000
+
+#define DSI_MCTL_ULPOUT_TIME 0x00000018
+#define DSI_MCTL_ULPOUT_TIME_CKLANE_ULPOUT_TIME_SHIFT 0
+#define DSI_MCTL_ULPOUT_TIME_CKLANE_ULPOUT_TIME_MASK 0x000001FF
+#define DSI_MCTL_ULPOUT_TIME_DATA_ULPOUT_TIME_SHIFT 9
+#define DSI_MCTL_ULPOUT_TIME_DATA_ULPOUT_TIME_MASK 0x0003FE00
+
+#define DSI_MCTL_DPHY_STATIC 0x0000001C
+#define DSI_MCTL_DPHY_STATIC_SWAP_PINS_CLK BIT(0)
+#define DSI_MCTL_DPHY_STATIC_HS_INVERT_CLK BIT(1)
+#define DSI_MCTL_DPHY_STATIC_SWAP_PINS_DAT1 BIT(2)
+#define DSI_MCTL_DPHY_STATIC_HS_INVERT_DAT1 BIT(3)
+#define DSI_MCTL_DPHY_STATIC_SWAP_PINS_DAT2 BIT(4)
+#define DSI_MCTL_DPHY_STATIC_HS_INVERT_DAT2 BIT(5)
+#define DSI_MCTL_DPHY_STATIC_UI_X4_SHIFT 6
+#define DSI_MCTL_DPHY_STATIC_UI_X4_MASK 0x00000FC0
+
+#define DSI_MCTL_MAIN_EN 0x00000020
+#define DSI_MCTL_MAIN_EN_PLL_START BIT(0)
+#define DSI_MCTL_MAIN_EN_CKLANE_EN BIT(3)
+#define DSI_MCTL_MAIN_EN_DAT1_EN BIT(4)
+#define DSI_MCTL_MAIN_EN_DAT2_EN BIT(5)
+#define DSI_MCTL_MAIN_EN_CLKLANE_ULPM_REQ BIT(6)
+#define DSI_MCTL_MAIN_EN_DAT1_ULPM_REQ BIT(7)
+#define DSI_MCTL_MAIN_EN_DAT2_ULPM_REQ BIT(8)
+#define DSI_MCTL_MAIN_EN_IF1_EN BIT(9)
+#define DSI_MCTL_MAIN_EN_IF2_EN BIT(10)
+
+#define DSI_MCTL_MAIN_STS 0x00000024
+#define DSI_MCTL_MAIN_STS_PLL_LOCK BIT(0)
+#define DSI_MCTL_MAIN_STS_CLKLANE_READY BIT(1)
+#define DSI_MCTL_MAIN_STS_DAT1_READY BIT(2)
+#define DSI_MCTL_MAIN_STS_DAT2_READY BIT(3)
+#define DSI_MCTL_MAIN_STS_HSTX_TO_ERR BIT(4)
+#define DSI_MCTL_MAIN_STS_LPRX_TO_ERR BIT(5)
+#define DSI_MCTL_MAIN_STS_CRS_UNTERM_PCK BIT(6)
+#define DSI_MCTL_MAIN_STS_VRS_UNTERM_PCK BIT(7)
+
+#define DSI_MCTL_DPHY_ERR 0x00000028
+#define DSI_INT_VID_RDDATA 0x00000030
+#define DSI_INT_VID_GNT 0x00000034
+#define DSI_INT_CMD_RDDATA 0x00000038
+#define DSI_INT_CMD_GNT 0x0000003C
+#define DSI_INT_INTERRUPT_CTL 0x00000040
+
+#define DSI_CMD_MODE_CTL 0x00000050
+#define DSI_CMD_MODE_CTL_IF1_ID_SHIFT 0
+#define DSI_CMD_MODE_CTL_IF1_ID_MASK 0x00000003
+#define DSI_CMD_MODE_CTL_IF2_ID_SHIFT 2
+#define DSI_CMD_MODE_CTL_IF2_ID_MASK 0x0000000C
+#define DSI_CMD_MODE_CTL_IF1_LP_EN BIT(4)
+#define DSI_CMD_MODE_CTL_IF2_LP_EN BIT(5)
+#define DSI_CMD_MODE_CTL_ARB_MODE BIT(6)
+#define DSI_CMD_MODE_CTL_ARB_PRI BIT(7)
+#define DSI_CMD_MODE_CTL_FIL_VALUE_SHIFT 8
+#define DSI_CMD_MODE_CTL_FIL_VALUE_MASK 0x0000FF00
+#define DSI_CMD_MODE_CTL_TE_TIMEOUT_SHIFT 16
+#define DSI_CMD_MODE_CTL_TE_TIMEOUT_MASK 0x03FF0000
+
+#define DSI_CMD_MODE_STS 0x00000054
+#define DSI_CMD_MODE_STS_ERR_NO_TE BIT(0)
+#define DSI_CMD_MODE_STS_ERR_TE_MISS BIT(1)
+#define DSI_CMD_MODE_STS_ERR_SDI1_UNDERRUN BIT(2)
+#define DSI_CMD_MODE_STS_ERR_SDI2_UNDERRUN BIT(3)
+#define DSI_CMD_MODE_STS_ERR_UNWANTED_RD BIT(4)
+#define DSI_CMD_MODE_STS_CSM_RUNNING BIT(5)
+
+#define DSI_DIRECT_CMD_SEND 0x00000060
+
+#define DSI_DIRECT_CMD_MAIN_SETTINGS 0x00000064
+#define DSI_DIRECT_CMD_MAIN_SETTINGS_CMD_NAT_SHIFT 0
+#define DSI_DIRECT_CMD_MAIN_SETTINGS_CMD_NAT_MASK 0x00000007
+#define DSI_DIRECT_CMD_MAIN_SETTINGS_CMD_NAT_WRITE 0
+#define DSI_DIRECT_CMD_MAIN_SETTINGS_CMD_NAT_READ 1
+#define DSI_DIRECT_CMD_MAIN_SETTINGS_CMD_NAT_TE_REQ 4
+#define DSI_DIRECT_CMD_MAIN_SETTINGS_CMD_NAT_TRIG_REQ 5
+#define DSI_DIRECT_CMD_MAIN_SETTINGS_CMD_NAT_BTA_REQ 6
+#define DSI_DIRECT_CMD_MAIN_SETTINGS_CMD_LONGNOTSHORT BIT(3)
+#define DSI_DIRECT_CMD_MAIN_SETTINGS_CMD_HEAD_SHIFT 8
+#define DSI_DIRECT_CMD_MAIN_SETTINGS_CMD_HEAD_MASK 0x00003F00
+#define DSI_DIRECT_CMD_MAIN_SETTINGS_CMD_HEAD_TURN_ON_PERIPHERAL 50
+#define DSI_DIRECT_CMD_MAIN_SETTINGS_CMD_HEAD_SHUT_DOWN_PERIPHERAL 34
+#define DSI_DIRECT_CMD_MAIN_SETTINGS_CMD_HEAD_GENERIC_SHORT_WRITE_0 3
+#define DSI_DIRECT_CMD_MAIN_SETTINGS_CMD_HEAD_GENERIC_SHORT_WRITE_1 19
+#define DSI_DIRECT_CMD_MAIN_SETTINGS_CMD_HEAD_GENERIC_SHORT_WRITE_2 35
+#define DSI_DIRECT_CMD_MAIN_SETTINGS_CMD_HEAD_GENERIC_LONG_WRITE 41
+#define DSI_DIRECT_CMD_MAIN_SETTINGS_CMD_HEAD_DCS_SHORT_WRITE_0 5
+#define DSI_DIRECT_CMD_MAIN_SETTINGS_CMD_HEAD_DCS_SHORT_WRITE_1 21
+#define DSI_DIRECT_CMD_MAIN_SETTINGS_CMD_HEAD_DCS_LONG_WRITE 57
+#define DSI_DIRECT_CMD_MAIN_SETTINGS_CMD_HEAD_DCS_READ 6
+#define DSI_DIRECT_CMD_MAIN_SETTINGS_CMD_HEAD_SET_MAX_PKT_SIZE 55
+#define DSI_DIRECT_CMD_MAIN_SETTINGS_CMD_ID_SHIFT 14
+#define DSI_DIRECT_CMD_MAIN_SETTINGS_CMD_SIZE_SHIFT 16
+#define DSI_DIRECT_CMD_MAIN_SETTINGS_CMD_LP_EN BIT(21)
+#define DSI_DIRECT_CMD_MAIN_SETTINGS_TRIGGER_VAL_SHIFT 24
+#define DSI_DIRECT_CMD_MAIN_SETTINGS_TRIGGER_VAL_MASK 0x0F000000
+
+#define DSI_DIRECT_CMD_STS 0x00000068
+#define DSI_DIRECT_CMD_STS_CMD_TRANSMISSION BIT(0)
+#define DSI_DIRECT_CMD_STS_WRITE_COMPLETED BIT(1)
+#define DSI_DIRECT_CMD_STS_TRIGGER_COMPLETED BIT(2)
+#define DSI_DIRECT_CMD_STS_READ_COMPLETED BIT(3)
+#define DSI_DIRECT_CMD_STS_ACKNOWLEDGE_RECEIVED_SHIFT BIT(4)
+#define DSI_DIRECT_CMD_STS_ACKNOWLEDGE_WITH_ERR_RECEIVED BIT(5)
+#define DSI_DIRECT_CMD_STS_TRIGGER_RECEIVED BIT(6)
+#define DSI_DIRECT_CMD_STS_TE_RECEIVED BIT(7)
+#define DSI_DIRECT_CMD_STS_BTA_COMPLETED BIT(8)
+#define DSI_DIRECT_CMD_STS_BTA_FINISHED BIT(9)
+#define DSI_DIRECT_CMD_STS_READ_COMPLETED_WITH_ERR BIT(10)
+#define DSI_DIRECT_CMD_STS_TRIGGER_VAL_MASK 0x00007800
+#define DSI_DIRECT_CMD_STS_TRIGGER_VAL_SHIFT 11
+#define DSI_DIRECT_CMD_STS_ACK_VAL_SHIFT 16
+#define DSI_DIRECT_CMD_STS_ACK_VAL_MASK 0xFFFF0000
+
+#define DSI_DIRECT_CMD_RD_INIT 0x0000006C
+#define DSI_DIRECT_CMD_RD_INIT_RESET_SHIFT 0
+#define DSI_DIRECT_CMD_RD_INIT_RESET_MASK 0xFFFFFFFF
+
+#define DSI_DIRECT_CMD_WRDAT0 0x00000070
+#define DSI_DIRECT_CMD_WRDAT1 0x00000074
+#define DSI_DIRECT_CMD_WRDAT2 0x00000078
+#define DSI_DIRECT_CMD_WRDAT3 0x0000007C
+
+#define DSI_DIRECT_CMD_RDDAT 0x00000080
+
+#define DSI_DIRECT_CMD_RD_PROPERTY 0x00000084
+#define DSI_DIRECT_CMD_RD_PROPERTY_RD_SIZE_SHIFT 0
+#define DSI_DIRECT_CMD_RD_PROPERTY_RD_SIZE_MASK 0x0000FFFF
+#define DSI_DIRECT_CMD_RD_PROPERTY_RD_ID_SHIFT 16
+#define DSI_DIRECT_CMD_RD_PROPERTY_RD_ID_MASK 0x00030000
+#define DSI_DIRECT_CMD_RD_PROPERTY_RD_DCSNOTGENERIC_SHIFT 18
+#define DSI_DIRECT_CMD_RD_PROPERTY_RD_DCSNOTGENERIC_MASK 0x00040000
+
+#define DSI_DIRECT_CMD_RD_STS 0x00000088
+
+#define DSI_VID_MAIN_CTL 0x00000090
+#define DSI_VID_MAIN_CTL_START_MODE_SHIFT 0
+#define DSI_VID_MAIN_CTL_START_MODE_MASK 0x00000003
+#define DSI_VID_MAIN_CTL_STOP_MODE_SHIFT 2
+#define DSI_VID_MAIN_CTL_STOP_MODE_MASK 0x0000000C
+#define DSI_VID_MAIN_CTL_VID_ID_SHIFT 4
+#define DSI_VID_MAIN_CTL_VID_ID_MASK 0x00000030
+#define DSI_VID_MAIN_CTL_HEADER_SHIFT 6
+#define DSI_VID_MAIN_CTL_HEADER_MASK 0x00000FC0
+#define DSI_VID_MAIN_CTL_VID_PIXEL_MODE_16BITS 0
+#define DSI_VID_MAIN_CTL_VID_PIXEL_MODE_18BITS BIT(12)
+#define DSI_VID_MAIN_CTL_VID_PIXEL_MODE_18BITS_LOOSE BIT(13)
+#define DSI_VID_MAIN_CTL_VID_PIXEL_MODE_24BITS (BIT(12) | BIT(13))
+#define DSI_VID_MAIN_CTL_BURST_MODE BIT(14)
+#define DSI_VID_MAIN_CTL_SYNC_PULSE_ACTIVE BIT(15)
+#define DSI_VID_MAIN_CTL_SYNC_PULSE_HORIZONTAL BIT(16)
+#define DSI_VID_MAIN_CTL_REG_BLKLINE_MODE_NULL 0
+#define DSI_VID_MAIN_CTL_REG_BLKLINE_MODE_BLANKING BIT(17)
+#define DSI_VID_MAIN_CTL_REG_BLKLINE_MODE_LP_0 BIT(18)
+#define DSI_VID_MAIN_CTL_REG_BLKLINE_MODE_LP_1 (BIT(17) | BIT(18))
+#define DSI_VID_MAIN_CTL_REG_BLKEOL_MODE_NULL 0
+#define DSI_VID_MAIN_CTL_REG_BLKEOL_MODE_BLANKING BIT(19)
+#define DSI_VID_MAIN_CTL_REG_BLKEOL_MODE_LP_0 BIT(20)
+#define DSI_VID_MAIN_CTL_REG_BLKEOL_MODE_LP_1 (BIT(19) | BIT(20))
+#define DSI_VID_MAIN_CTL_RECOVERY_MODE_SHIFT 21
+#define DSI_VID_MAIN_CTL_RECOVERY_MODE_MASK 0x00600000
+
+#define DSI_VID_VSIZE 0x00000094
+#define DSI_VID_VSIZE_VSA_LENGTH_SHIFT 0
+#define DSI_VID_VSIZE_VSA_LENGTH_MASK 0x0000003F
+#define DSI_VID_VSIZE_VBP_LENGTH_SHIFT 6
+#define DSI_VID_VSIZE_VBP_LENGTH_MASK 0x00000FC0
+#define DSI_VID_VSIZE_VFP_LENGTH_SHIFT 12
+#define DSI_VID_VSIZE_VFP_LENGTH_MASK 0x000FF000
+#define DSI_VID_VSIZE_VACT_LENGTH_SHIFT 20
+#define DSI_VID_VSIZE_VACT_LENGTH_MASK 0x7FF00000
+
+#define DSI_VID_HSIZE1 0x00000098
+#define DSI_VID_HSIZE1_HSA_LENGTH_SHIFT 0
+#define DSI_VID_HSIZE1_HSA_LENGTH_MASK 0x000003FF
+#define DSI_VID_HSIZE1_HBP_LENGTH_SHIFT 10
+#define DSI_VID_HSIZE1_HBP_LENGTH_MASK 0x000FFC00
+#define DSI_VID_HSIZE1_HFP_LENGTH_SHIFT 20
+#define DSI_VID_HSIZE1_HFP_LENGTH_MASK 0x7FF00000
+
+#define DSI_VID_HSIZE2 0x0000009C
+#define DSI_VID_HSIZE2_RGB_SIZE_SHIFT 0
+#define DSI_VID_HSIZE2_RGB_SIZE_MASK 0x00001FFF
+
+#define DSI_VID_BLKSIZE1 0x000000A0
+#define DSI_VID_BLKSIZE1_BLKLINE_EVENT_PCK_SHIFT 0
+#define DSI_VID_BLKSIZE1_BLKLINE_EVENT_PCK_MASK 0x00001FFF
+#define DSI_VID_BLKSIZE1_BLKEOL_PCK_SHIFT 13
+#define DSI_VID_BLKSIZE1_BLKEOL_PCK_MASK 0x03FFE000
+
+#define DSI_VID_BLKSIZE2 0x000000A4
+#define DSI_VID_BLKSIZE2_BLKLINE_PULSE_PCK_SHIFT 0
+#define DSI_VID_BLKSIZE2_BLKLINE_PULSE_PCK_MASK 0x00001FFF
+
+#define DSI_VID_PCK_TIME 0x000000A8
+#define DSI_VID_PCK_TIME_BLKEOL_DURATION_SHIFT 0
+
+#define DSI_VID_DPHY_TIME 0x000000AC
+#define DSI_VID_DPHY_TIME_REG_LINE_DURATION_SHIFT 0
+#define DSI_VID_DPHY_TIME_REG_LINE_DURATION_MASK 0x00001FFF
+#define DSI_VID_DPHY_TIME_REG_WAKEUP_TIME_SHIFT 13
+#define DSI_VID_DPHY_TIME_REG_WAKEUP_TIME_MASK 0x00FFE000
+
+#define DSI_VID_MODE_STS 0x000000BC
+#define DSI_VID_MODE_STS_VSG_RUNNING BIT(0)
+
+#define DSI_VID_VCA_SETTING1 0x000000C0
+#define DSI_VID_VCA_SETTING1_MAX_BURST_LIMIT_SHIFT 0
+#define DSI_VID_VCA_SETTING1_MAX_BURST_LIMIT_MASK 0x0000FFFF
+#define DSI_VID_VCA_SETTING1_BURST_LP BIT(16)
+
+#define DSI_VID_VCA_SETTING2 0x000000C4
+#define DSI_VID_VCA_SETTING2_EXACT_BURST_LIMIT_SHIFT 0
+#define DSI_VID_VCA_SETTING2_EXACT_BURST_LIMIT_MASK 0x0000FFFF
+#define DSI_VID_VCA_SETTING2_MAX_LINE_LIMIT_SHIFT 16
+#define DSI_VID_VCA_SETTING2_MAX_LINE_LIMIT_MASK 0xFFFF0000
+
+#define DSI_CMD_MODE_STS_CTL 0x000000F4
+#define DSI_CMD_MODE_STS_CTL_ERR_NO_TE_EN BIT(0)
+#define DSI_CMD_MODE_STS_CTL_ERR_TE_MISS_EN BIT(1)
+#define DSI_CMD_MODE_STS_CTL_ERR_SDI1_UNDERRUN_EN BIT(2)
+#define DSI_CMD_MODE_STS_CTL_ERR_SDI2_UNDERRUN_EN BIT(3)
+#define DSI_CMD_MODE_STS_CTL_ERR_UNWANTED_RD_EN BIT(4)
+#define DSI_CMD_MODE_STS_CTL_CSM_RUNNING_EN BIT(5)
+#define DSI_CMD_MODE_STS_CTL_ERR_NO_TE_EDGE BIT(16)
+#define DSI_CMD_MODE_STS_CTL_ERR_TE_MISS_EDGE BIT(17)
+#define DSI_CMD_MODE_STS_CTL_ERR_SDI1_UNDERRUN_EDGE BIT(18)
+#define DSI_CMD_MODE_STS_CTL_ERR_SDI2_UNDERRUN_EDGE BIT(19)
+#define DSI_CMD_MODE_STS_CTL_ERR_UNWANTED_RD_EDGE BIT(20)
+#define DSI_CMD_MODE_STS_CTL_CSM_RUNNING_EDGE BIT(21)
+
+#define DSI_DIRECT_CMD_STS_CTL 0x000000F8
+#define DSI_DIRECT_CMD_STS_CTL_CMD_TRANSMISSION_EN BIT(0)
+#define DSI_DIRECT_CMD_STS_CTL_WRITE_COMPLETED_EN BIT(1)
+#define DSI_DIRECT_CMD_STS_CTL_TRIGGER_COMPLETED_EN BIT(2)
+#define DSI_DIRECT_CMD_STS_CTL_READ_COMPLETED_EN BIT(3)
+#define DSI_DIRECT_CMD_STS_CTL_ACKNOWLEDGE_RECEIVED_EN BIT(4)
+#define DSI_DIRECT_CMD_STS_CTL_ACKNOWLEDGE_WITH_ERR_EN BIT(5)
+#define DSI_DIRECT_CMD_STS_CTL_TRIGGER_RECEIVED_EN BIT(6)
+#define DSI_DIRECT_CMD_STS_CTL_TE_RECEIVED_EN BIT(7)
+#define DSI_DIRECT_CMD_STS_CTL_BTA_COMPLETED_EN BIT(8)
+#define DSI_DIRECT_CMD_STS_CTL_BTA_FINISHED_EN BIT(9)
+#define DSI_DIRECT_CMD_STS_CTL_READ_COMPLETED_WITH_ERR_EN BIT(10)
+#define DSI_DIRECT_CMD_STS_CTL_CMD_TRANSMISSION_EDGE BIT(16)
+#define DSI_DIRECT_CMD_STS_CTL_WRITE_COMPLETED_EDGE BIT(17)
+#define DSI_DIRECT_CMD_STS_CTL_TRIGGER_COMPLETED_EDGE BIT(18)
+#define DSI_DIRECT_CMD_STS_CTL_READ_COMPLETED_EDGE BIT(19)
+#define DSI_DIRECT_CMD_STS_CTL_ACKNOWLEDGE_RECEIVED_EDGE BIT(20)
+#define DSI_DIRECT_CMD_STS_CTL_ACKNOWLEDGE_WITH_ERR_EDGE BIT(21)
+#define DSI_DIRECT_CMD_STS_CTL_TRIGGER_RECEIVED_EDGE BIT(22)
+#define DSI_DIRECT_CMD_STS_CTL_TE_RECEIVED_EDGE BIT(23)
+#define DSI_DIRECT_CMD_STS_CTL_BTA_COMPLETED_EDGE BIT(24)
+#define DSI_DIRECT_CMD_STS_CTL_BTA_FINISHED_EDGE BIT(25)
+#define DSI_DIRECT_CMD_STS_CTL_READ_COMPLETED_WITH_ERR_EDGE BIT(26)
+
+#define DSI_VID_MODE_STS_CTL 0x00000100
+#define DSI_VID_MODE_STS_CTL_VSG_RUNNING BIT(0)
+#define DSI_VID_MODE_STS_CTL_ERR_MISSING_DATA BIT(1)
+#define DSI_VID_MODE_STS_CTL_ERR_MISSING_HSYNC BIT(2)
+#define DSI_VID_MODE_STS_CTL_ERR_MISSING_VSYNC BIT(3)
+#define DSI_VID_MODE_STS_CTL_REG_ERR_SMALL_LENGTH BIT(4)
+#define DSI_VID_MODE_STS_CTL_REG_ERR_SMALL_HEIGHT BIT(5)
+#define DSI_VID_MODE_STS_CTL_ERR_BURSTWRITE BIT(6)
+#define DSI_VID_MODE_STS_CTL_ERR_LONGWRITE BIT(7)
+#define DSI_VID_MODE_STS_CTL_ERR_LONGREAD BIT(8)
+#define DSI_VID_MODE_STS_CTL_ERR_VRS_WRONG_LENGTH BIT(9)
+#define DSI_VID_MODE_STS_CTL_VSG_RUNNING_EDGE BIT(16)
+#define DSI_VID_MODE_STS_CTL_ERR_MISSING_DATA_EDGE BIT(17)
+#define DSI_VID_MODE_STS_CTL_ERR_MISSING_HSYNC_EDGE BIT(18)
+#define DSI_VID_MODE_STS_CTL_ERR_MISSING_VSYNC_EDGE BIT(19)
+#define DSI_VID_MODE_STS_CTL_REG_ERR_SMALL_LENGTH_EDGE BIT(20)
+#define DSI_VID_MODE_STS_CTL_REG_ERR_SMALL_HEIGHT_EDGE BIT(21)
+#define DSI_VID_MODE_STS_CTL_ERR_BURSTWRITE_EDGE BIT(22)
+#define DSI_VID_MODE_STS_CTL_ERR_LONGWRITE_EDGE BIT(23)
+#define DSI_VID_MODE_STS_CTL_ERR_LONGREAD_EDGE BIT(24)
+#define DSI_VID_MODE_STS_CTL_ERR_VRS_WRONG_LENGTH_EDGE BIT(25)
+#define DSI_VID_MODE_STS_CTL_VSG_RECOVERY_EDGE BIT(26)
+
+#define DSI_TG_STS_CTL 0x00000104
+#define DSI_MCTL_DHPY_ERR_CTL 0x00000108
+#define DSI_MCTL_MAIN_STS_CLR 0x00000110
+
+#define DSI_CMD_MODE_STS_CLR 0x00000114
+#define DSI_CMD_MODE_STS_CLR_ERR_NO_TE_CLR BIT(0)
+#define DSI_CMD_MODE_STS_CLR_ERR_TE_MISS_CLR BIT(1)
+#define DSI_CMD_MODE_STS_CLR_ERR_SDI1_UNDERRUN_CLR BIT(2)
+#define DSI_CMD_MODE_STS_CLR_ERR_SDI2_UNDERRUN_CLR BIT(3)
+#define DSI_CMD_MODE_STS_CLR_ERR_UNWANTED_RD_CLR BIT(4)
+#define DSI_CMD_MODE_STS_CLR_CSM_RUNNING_CLR BIT(5)
+
+#define DSI_DIRECT_CMD_STS_CLR 0x00000118
+#define DSI_DIRECT_CMD_STS_CLR_CMD_TRANSMISSION_CLR BIT(0)
+#define DSI_DIRECT_CMD_STS_CLR_WRITE_COMPLETED_CLR BIT(1)
+#define DSI_DIRECT_CMD_STS_CLR_TRIGGER_COMPLETED_CLR BIT(2)
+#define DSI_DIRECT_CMD_STS_CLR_READ_COMPLETED_CLR BIT(3)
+#define DSI_DIRECT_CMD_STS_CLR_ACKNOWLEDGE_RECEIVED_CLR BIT(4)
+#define DSI_DIRECT_CMD_STS_CLR_ACKNOWLEDGE_WITH_ERR_RECEIVED_CLR BIT(5)
+#define DSI_DIRECT_CMD_STS_CLR_TRIGGER_RECEIVED_CLR BIT(6)
+#define DSI_DIRECT_CMD_STS_CLR_TE_RECEIVED_CLR BIT(7)
+#define DSI_DIRECT_CMD_STS_CLR_BTA_COMPLETED_CLR BIT(8)
+#define DSI_DIRECT_CMD_STS_CLR_BTA_FINISHED_CLR BIT(9)
+#define DSI_DIRECT_CMD_STS_CLR_READ_COMPLETED_WITH_ERR_CLR BIT(10)
+
+#define DSI_DIRECT_CMD_RD_STS_CLR 0x0000011C
+#define DSI_VID_MODE_STS_CLR 0x00000120
+#define DSI_TG_STS_CLR 0x00000124
+#define DSI_MCTL_DPHY_ERR_CLR 0x00000128
+#define DSI_MCTL_MAIN_STS_FLAG 0x00000130
+#define DSI_CMD_MODE_STS_FLAG 0x00000134
+#define DSI_DIRECT_CMD_STS_FLAG 0x00000138
+#define DSI_DIRECT_CMD_RD_STS_FLAG 0x0000013C
+#define DSI_VID_MODE_STS_FLAG 0x00000140
+#define DSI_TG_STS_FLAG 0x00000144
+
+#define DSI_DPHY_LANES_TRIM 0x00000150
+#define DSI_DPHY_LANES_TRIM_DPHY_SKEW_DAT1_SHIFT 0
+#define DSI_DPHY_LANES_TRIM_DPHY_SKEW_DAT1_MASK 0x00000003
+#define DSI_DPHY_LANES_TRIM_DPHY_CD_OFF_DAT1 BIT(2)
+#define DSI_DPHY_LANES_TRIM_DPHY_HSTX_SLEWRATE_UP_DAT1 BIT(3)
+#define DSI_DPHY_LANES_TRIM_DPHY_HSTX_SLEWRATE_DOWN_DAT1 BIT(4)
+#define DSI_DPHY_LANES_TRIM_DPHY_TEST_RESERVED_1_DAT1 BIT(5)
+#define DSI_DPHY_LANES_TRIM_DPHY_SKEW_CLK_SHIFT 6
+#define DSI_DPHY_LANES_TRIM_DPHY_SKEW_CLK_MASK 0x000000C0
+#define DSI_DPHY_LANES_TRIM_DPHY_LP_RX_VIL_CLK_SHIFT 8
+#define DSI_DPHY_LANES_TRIM_DPHY_LP_RX_VIL_CLK_MASK 0x00000300
+#define DSI_DPHY_LANES_TRIM_DPHY_LP_TX_SLEWRATE_CLK_SHIFT 10
+#define DSI_DPHY_LANES_TRIM_DPHY_LP_TX_SLEWRATE_CLK_MASK 0x00000C00
+#define DSI_DPHY_LANES_TRIM_DPHY_SPECS_90_81B_0_81 0
+#define DSI_DPHY_LANES_TRIM_DPHY_SPECS_90_81B_0_90 BIT(12)
+#define DSI_DPHY_LANES_TRIM_DPHY_HSTX_SLEWRATE_UP_CLK BIT(13)
+#define DSI_DPHY_LANES_TRIM_DPHY_HSTX_SLEWRATE_DOWN_CLK BIT(14)
+#define DSI_DPHY_LANES_TRIM_DPHY_TEST_RESERVED_1_CLK BIT(15)
+#define DSI_DPHY_LANES_TRIM_DPHY_SKEW_DAT2 BIT(16)
+#define DSI_DPHY_LANES_TRIM_DPHY_HSTX_SLEWRATE_UP_DAT2 BIT(18)
+#define DSI_DPHY_LANES_TRIM_DPHY_HSTX_SLEWRATE_DOWN_DAT2 BIT(19)
+#define DSI_DPHY_LANES_TRIM_DPHY_TEST_RESERVED_1_DAT2 BIT(20)
+
+#define DSI_ID_REG	0x00000FF0
+
+#endif /* __DRM_MCDE_DSI_REGS */
diff --git a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c
index f426dfdfb418..a9007210dda1 100644
--- a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c
+++ b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c
@@ -90,10 +90,6 @@ static void mtk_drm_finish_page_flip(struct mtk_drm_crtc *mtk_crtc)
 static void mtk_drm_crtc_destroy(struct drm_crtc *crtc)
 {
 	struct mtk_drm_crtc *mtk_crtc = to_mtk_crtc(crtc);
-	int i;
-
-	for (i = 0; i < mtk_crtc->ddp_comp_nr; i++)
-		clk_unprepare(mtk_crtc->ddp_comp[i]->clk);
 
 	mtk_disp_mutex_put(mtk_crtc->mutex);
 
@@ -186,7 +182,7 @@ static int mtk_crtc_ddp_clk_enable(struct mtk_drm_crtc *mtk_crtc)
 
 	DRM_DEBUG_DRIVER("%s\n", __func__);
 	for (i = 0; i < mtk_crtc->ddp_comp_nr; i++) {
-		ret = clk_enable(mtk_crtc->ddp_comp[i]->clk);
+		ret = clk_prepare_enable(mtk_crtc->ddp_comp[i]->clk);
 		if (ret) {
 			DRM_ERROR("Failed to enable clock %d: %d\n", i, ret);
 			goto err;
@@ -196,7 +192,7 @@ static int mtk_crtc_ddp_clk_enable(struct mtk_drm_crtc *mtk_crtc)
 	return 0;
 err:
 	while (--i >= 0)
-		clk_disable(mtk_crtc->ddp_comp[i]->clk);
+		clk_disable_unprepare(mtk_crtc->ddp_comp[i]->clk);
 	return ret;
 }
 
@@ -206,7 +202,7 @@ static void mtk_crtc_ddp_clk_disable(struct mtk_drm_crtc *mtk_crtc)
 
 	DRM_DEBUG_DRIVER("%s\n", __func__);
 	for (i = 0; i < mtk_crtc->ddp_comp_nr; i++)
-		clk_disable(mtk_crtc->ddp_comp[i]->clk);
+		clk_disable_unprepare(mtk_crtc->ddp_comp[i]->clk);
 }
 
 static int mtk_crtc_ddp_hw_init(struct mtk_drm_crtc *mtk_crtc)
@@ -577,15 +573,7 @@ int mtk_drm_crtc_create(struct drm_device *drm_dev,
 		if (!comp) {
 			dev_err(dev, "Component %pOF not initialized\n", node);
 			ret = -ENODEV;
-			goto unprepare;
-		}
-
-		ret = clk_prepare(comp->clk);
-		if (ret) {
-			dev_err(dev,
-				"Failed to prepare clock for component %pOF: %d\n",
-				node, ret);
-			goto unprepare;
+			return ret;
 		}
 
 		mtk_crtc->ddp_comp[i] = comp;
@@ -603,23 +591,17 @@ int mtk_drm_crtc_create(struct drm_device *drm_dev,
 		ret = mtk_plane_init(drm_dev, &mtk_crtc->planes[zpos],
 				     BIT(pipe), type);
 		if (ret)
-			goto unprepare;
+			return ret;
 	}
 
 	ret = mtk_drm_crtc_init(drm_dev, mtk_crtc, &mtk_crtc->planes[0],
 				mtk_crtc->layer_nr > 1 ? &mtk_crtc->planes[1] :
 				NULL, pipe);
 	if (ret < 0)
-		goto unprepare;
+		return ret;
 	drm_mode_crtc_set_gamma_size(&mtk_crtc->base, MTK_LUT_SIZE);
 	drm_crtc_enable_color_mgmt(&mtk_crtc->base, 0, false, MTK_LUT_SIZE);
 	priv->num_pipes++;
 
 	return 0;
-
-unprepare:
-	while (--i >= 0)
-		clk_unprepare(mtk_crtc->ddp_comp[i]->clk);
-
-	return ret;
 }
diff --git a/drivers/gpu/drm/mediatek/mtk_drm_drv.c b/drivers/gpu/drm/mediatek/mtk_drm_drv.c
index 7fcb7407096f..95fdbd0fbcac 100644
--- a/drivers/gpu/drm/mediatek/mtk_drm_drv.c
+++ b/drivers/gpu/drm/mediatek/mtk_drm_drv.c
@@ -303,6 +303,7 @@ err_config_cleanup:
 static void mtk_drm_kms_deinit(struct drm_device *drm)
 {
 	drm_kms_helper_poll_fini(drm);
+	drm_atomic_helper_shutdown(drm);
 
 	component_unbind_all(drm->dev, drm);
 	drm_mode_config_cleanup(drm);
@@ -389,7 +390,9 @@ static void mtk_drm_unbind(struct device *dev)
 	struct mtk_drm_private *private = dev_get_drvdata(dev);
 
 	drm_dev_unregister(private->drm);
+	mtk_drm_kms_deinit(private->drm);
 	drm_dev_put(private->drm);
+	private->num_pipes = 0;
 	private->drm = NULL;
 }
 
@@ -560,13 +563,8 @@ err_node:
 static int mtk_drm_remove(struct platform_device *pdev)
 {
 	struct mtk_drm_private *private = platform_get_drvdata(pdev);
-	struct drm_device *drm = private->drm;
 	int i;
 
-	drm_dev_unregister(drm);
-	mtk_drm_kms_deinit(drm);
-	drm_dev_put(drm);
-
 	component_master_del(&pdev->dev, &mtk_drm_ops);
 	pm_runtime_disable(&pdev->dev);
 	of_node_put(private->mutex_node);
diff --git a/drivers/gpu/drm/mediatek/mtk_drm_fb.c b/drivers/gpu/drm/mediatek/mtk_drm_fb.c
index 44afa912da1f..4c3ad7de2d3b 100644
--- a/drivers/gpu/drm/mediatek/mtk_drm_fb.c
+++ b/drivers/gpu/drm/mediatek/mtk_drm_fb.c
@@ -24,10 +24,11 @@ static struct drm_framebuffer *mtk_drm_framebuffer_init(struct drm_device *dev,
 					const struct drm_mode_fb_cmd2 *mode,
 					struct drm_gem_object *obj)
 {
+	const struct drm_format_info *info = drm_get_format_info(dev, mode);
 	struct drm_framebuffer *fb;
 	int ret;
 
-	if (drm_format_num_planes(mode->pixel_format) != 1)
+	if (info->num_planes != 1)
 		return ERR_PTR(-EINVAL);
 
 	fb = kzalloc(sizeof(*fb), GFP_KERNEL);
@@ -80,6 +81,7 @@ struct drm_framebuffer *mtk_drm_mode_fb_create(struct drm_device *dev,
 					       struct drm_file *file,
 					       const struct drm_mode_fb_cmd2 *cmd)
 {
+	const struct drm_format_info *info = drm_get_format_info(dev, cmd);
 	struct drm_framebuffer *fb;
 	struct drm_gem_object *gem;
 	unsigned int width = cmd->width;
@@ -87,14 +89,14 @@ struct drm_framebuffer *mtk_drm_mode_fb_create(struct drm_device *dev,
 	unsigned int size, bpp;
 	int ret;
 
-	if (drm_format_num_planes(cmd->pixel_format) != 1)
+	if (info->num_planes != 1)
 		return ERR_PTR(-EINVAL);
 
 	gem = drm_gem_object_lookup(file, cmd->handles[0]);
 	if (!gem)
 		return ERR_PTR(-ENOENT);
 
-	bpp = drm_format_plane_cpp(cmd->pixel_format, 0);
+	bpp = info->cpp[0];
 	size = (height - 1) * cmd->pitches[0] + width * bpp;
 	size += cmd->offsets[0];
 
diff --git a/drivers/gpu/drm/mediatek/mtk_drm_gem.c b/drivers/gpu/drm/mediatek/mtk_drm_gem.c
index 3eefb22206c7..0d69698f8173 100644
--- a/drivers/gpu/drm/mediatek/mtk_drm_gem.c
+++ b/drivers/gpu/drm/mediatek/mtk_drm_gem.c
@@ -136,7 +136,6 @@ static int mtk_drm_gem_object_mmap(struct drm_gem_object *obj,
 	 * VM_PFNMAP flag that was set by drm_gem_mmap_obj()/drm_gem_mmap().
 	 */
 	vma->vm_flags &= ~VM_PFNMAP;
-	vma->vm_pgoff = 0;
 
 	ret = dma_mmap_attrs(priv->dma_dev, vma, mtk_gem->cookie,
 			     mtk_gem->dma_addr, obj->size, mtk_gem->dma_attrs);
@@ -168,6 +167,12 @@ int mtk_drm_gem_mmap(struct file *filp, struct vm_area_struct *vma)
 
 	obj = vma->vm_private_data;
 
+	/*
+	 * Set vm_pgoff (used as a fake buffer offset by DRM) to 0 and map the
+	 * whole buffer from the start.
+	 */
+	vma->vm_pgoff = 0;
+
 	return mtk_drm_gem_object_mmap(obj, vma);
 }
 
diff --git a/drivers/gpu/drm/mediatek/mtk_dsi.c b/drivers/gpu/drm/mediatek/mtk_dsi.c
index 4a0b9150a7bb..b91c4616644a 100644
--- a/drivers/gpu/drm/mediatek/mtk_dsi.c
+++ b/drivers/gpu/drm/mediatek/mtk_dsi.c
@@ -622,6 +622,15 @@ static void mtk_dsi_poweroff(struct mtk_dsi *dsi)
 	if (--dsi->refcount != 0)
 		return;
 
+	/*
+	 * mtk_dsi_stop() and mtk_dsi_start() is asymmetric, since
+	 * mtk_dsi_stop() should be called after mtk_drm_crtc_atomic_disable(),
+	 * which needs irq for vblank, and mtk_dsi_stop() will disable irq.
+	 * mtk_dsi_start() needs to be called in mtk_output_dsi_enable(),
+	 * after dsi is fully set.
+	 */
+	mtk_dsi_stop(dsi);
+
 	if (!mtk_dsi_switch_to_cmd_mode(dsi, VM_DONE_INT_FLAG, 500)) {
 		if (dsi->panel) {
 			if (drm_panel_unprepare(dsi->panel)) {
@@ -688,7 +697,6 @@ static void mtk_output_dsi_disable(struct mtk_dsi *dsi)
 		}
 	}
 
-	mtk_dsi_stop(dsi);
 	mtk_dsi_poweroff(dsi);
 
 	dsi->enabled = false;
@@ -836,6 +844,8 @@ static void mtk_dsi_destroy_conn_enc(struct mtk_dsi *dsi)
 	/* Skip connector cleanup if creation was delegated to the bridge */
 	if (dsi->conn.dev)
 		drm_connector_cleanup(&dsi->conn);
+	if (dsi->panel)
+		drm_panel_detach(dsi->panel);
 }
 
 static void mtk_dsi_ddp_start(struct mtk_ddp_comp *comp)
diff --git a/drivers/gpu/drm/mediatek/mtk_hdmi.c b/drivers/gpu/drm/mediatek/mtk_hdmi.c
index 562cd6113f28..5d6a9f094df5 100644
--- a/drivers/gpu/drm/mediatek/mtk_hdmi.c
+++ b/drivers/gpu/drm/mediatek/mtk_hdmi.c
@@ -333,6 +333,9 @@ static void mtk_hdmi_hw_send_info_frame(struct mtk_hdmi *hdmi, u8 *buffer,
 		ctrl_frame_en = VS_EN;
 		ctrl_reg = GRL_ACP_ISRC_CTRL;
 		break;
+	default:
+		dev_err(hdmi->dev, "Unknown infoframe type %d\n", frame_type);
+		return;
 	}
 	mtk_hdmi_clear_bits(hdmi, ctrl_reg, ctrl_frame_en);
 	mtk_hdmi_write(hdmi, GRL_INFOFRM_TYPE, frame_type);
diff --git a/drivers/gpu/drm/meson/Kconfig b/drivers/gpu/drm/meson/Kconfig
index e450387d0eab..9f9281dd49f8 100644
--- a/drivers/gpu/drm/meson/Kconfig
+++ b/drivers/gpu/drm/meson/Kconfig
@@ -15,3 +15,4 @@ config DRM_MESON_DW_HDMI
 	depends on DRM_MESON
 	default y if DRM_MESON
 	select DRM_DW_HDMI
+	imply DRM_DW_HDMI_I2S_AUDIO
diff --git a/drivers/gpu/drm/meson/meson_crtc.c b/drivers/gpu/drm/meson/meson_crtc.c
index 685715144156..aa8ea107524e 100644
--- a/drivers/gpu/drm/meson/meson_crtc.c
+++ b/drivers/gpu/drm/meson/meson_crtc.c
@@ -107,8 +107,6 @@ static void meson_g12a_crtc_atomic_enable(struct drm_crtc *crtc,
 			priv->io_base + _REG(VPP_OUT_H_V_SIZE));
 
 	drm_crtc_vblank_on(crtc);
-
-	priv->viu.osd1_enabled = true;
 }
 
 static void meson_crtc_atomic_enable(struct drm_crtc *crtc,
@@ -137,8 +135,6 @@ static void meson_crtc_atomic_enable(struct drm_crtc *crtc,
 			    priv->io_base + _REG(VPP_MISC));
 
 	drm_crtc_vblank_on(crtc);
-
-	priv->viu.osd1_enabled = true;
 }
 
 static void meson_g12a_crtc_atomic_disable(struct drm_crtc *crtc,
@@ -256,6 +252,8 @@ static void meson_g12a_crtc_enable_osd1(struct meson_drm *priv)
 	writel_relaxed(priv->viu.osb_blend1_size,
 		       priv->io_base +
 		       _REG(VIU_OSD_BLEND_BLEND1_SIZE));
+	writel_bits_relaxed(3 << 8, 3 << 8,
+			    priv->io_base + _REG(OSD1_BLEND_SRC_CTRL));
 }
 
 static void meson_crtc_enable_vd1(struct meson_drm *priv)
diff --git a/drivers/gpu/drm/meson/meson_overlay.c b/drivers/gpu/drm/meson/meson_overlay.c
index bdbf925ff3e8..cc7c6ae3013d 100644
--- a/drivers/gpu/drm/meson/meson_overlay.c
+++ b/drivers/gpu/drm/meson/meson_overlay.c
@@ -458,7 +458,7 @@ static void meson_overlay_atomic_update(struct drm_plane *plane,
 	}
 
 	/* Update Canvas with buffer address */
-	priv->viu.vd1_planes = drm_format_num_planes(fb->format->format);
+	priv->viu.vd1_planes = fb->format->num_planes;
 
 	switch (priv->viu.vd1_planes) {
 	case 3:
@@ -466,8 +466,8 @@ static void meson_overlay_atomic_update(struct drm_plane *plane,
 		priv->viu.vd1_addr2 = gem->paddr + fb->offsets[2];
 		priv->viu.vd1_stride2 = fb->pitches[2];
 		priv->viu.vd1_height2 =
-			drm_format_plane_height(fb->height,
-						fb->format->format, 2);
+			drm_format_info_plane_height(fb->format,
+						fb->height, 2);
 		DRM_DEBUG("plane 2 addr 0x%x stride %d height %d\n",
 			 priv->viu.vd1_addr2,
 			 priv->viu.vd1_stride2,
@@ -478,8 +478,8 @@ static void meson_overlay_atomic_update(struct drm_plane *plane,
 		priv->viu.vd1_addr1 = gem->paddr + fb->offsets[1];
 		priv->viu.vd1_stride1 = fb->pitches[1];
 		priv->viu.vd1_height1 =
-			drm_format_plane_height(fb->height,
-						fb->format->format, 1);
+			drm_format_info_plane_height(fb->format,
+						fb->height, 1);
 		DRM_DEBUG("plane 1 addr 0x%x stride %d height %d\n",
 			 priv->viu.vd1_addr1,
 			 priv->viu.vd1_stride1,
@@ -490,8 +490,8 @@ static void meson_overlay_atomic_update(struct drm_plane *plane,
 		priv->viu.vd1_addr0 = gem->paddr + fb->offsets[0];
 		priv->viu.vd1_stride0 = fb->pitches[0];
 		priv->viu.vd1_height0 =
-			drm_format_plane_height(fb->height,
-						fb->format->format, 0);
+			drm_format_info_plane_height(fb->format,
+						fb->height, 0);
 		DRM_DEBUG("plane 0 addr 0x%x stride %d height %d\n",
 			 priv->viu.vd1_addr0,
 			 priv->viu.vd1_stride0,
@@ -578,6 +578,9 @@ int meson_overlay_create(struct meson_drm *priv)
 
 	drm_plane_helper_add(plane, &meson_overlay_helper_funcs);
 
+	/* For now, VD Overlay plane is always on the back */
+	drm_plane_create_zpos_immutable_property(plane, 0);
+
 	priv->overlay_plane = plane;
 
 	DRM_DEBUG_DRIVER("\n");
diff --git a/drivers/gpu/drm/meson/meson_plane.c b/drivers/gpu/drm/meson/meson_plane.c
index 22490047932e..7a7e88dadd0b 100644
--- a/drivers/gpu/drm/meson/meson_plane.c
+++ b/drivers/gpu/drm/meson/meson_plane.c
@@ -153,6 +153,13 @@ static void meson_plane_atomic_update(struct drm_plane *plane,
 		priv->viu.osd1_blk0_cfg[0] |= OSD_BLK_MODE_32 |
 					      OSD_COLOR_MATRIX_32_ARGB;
 		break;
+	case DRM_FORMAT_XBGR8888:
+		/* For XRGB, replace the pixel's alpha by 0xFF */
+		writel_bits_relaxed(OSD_REPLACE_EN, OSD_REPLACE_EN,
+				    priv->io_base + _REG(VIU_OSD1_CTRL_STAT2));
+		priv->viu.osd1_blk0_cfg[0] |= OSD_BLK_MODE_32 |
+					      OSD_COLOR_MATRIX_32_ABGR;
+		break;
 	case DRM_FORMAT_ARGB8888:
 		/* For ARGB, use the pixel's alpha */
 		writel_bits_relaxed(OSD_REPLACE_EN, 0,
@@ -160,6 +167,13 @@ static void meson_plane_atomic_update(struct drm_plane *plane,
 		priv->viu.osd1_blk0_cfg[0] |= OSD_BLK_MODE_32 |
 					      OSD_COLOR_MATRIX_32_ARGB;
 		break;
+	case DRM_FORMAT_ABGR8888:
+		/* For ARGB, use the pixel's alpha */
+		writel_bits_relaxed(OSD_REPLACE_EN, 0,
+				    priv->io_base + _REG(VIU_OSD1_CTRL_STAT2));
+		priv->viu.osd1_blk0_cfg[0] |= OSD_BLK_MODE_32 |
+					      OSD_COLOR_MATRIX_32_ABGR;
+		break;
 	case DRM_FORMAT_RGB888:
 		priv->viu.osd1_blk0_cfg[0] |= OSD_BLK_MODE_24 |
 					      OSD_COLOR_MATRIX_24_RGB;
@@ -305,6 +319,8 @@ static void meson_plane_atomic_update(struct drm_plane *plane,
 		meson_plane->enabled = true;
 	}
 
+	priv->viu.osd1_enabled = true;
+
 	spin_unlock_irqrestore(&priv->drm->event_lock, flags);
 }
 
@@ -316,14 +332,14 @@ static void meson_plane_atomic_disable(struct drm_plane *plane,
 
 	/* Disable OSD1 */
 	if (meson_vpu_is_compatible(priv, "amlogic,meson-g12a-vpu"))
-		writel_bits_relaxed(BIT(0) | BIT(21), 0,
-			priv->io_base + _REG(VIU_OSD1_CTRL_STAT));
+		writel_bits_relaxed(3 << 8, 0,
+				    priv->io_base + _REG(OSD1_BLEND_SRC_CTRL));
 	else
 		writel_bits_relaxed(VPP_OSD1_POSTBLEND, 0,
 				    priv->io_base + _REG(VPP_MISC));
 
 	meson_plane->enabled = false;
-
+	priv->viu.osd1_enabled = false;
 }
 
 static const struct drm_plane_helper_funcs meson_plane_helper_funcs = {
@@ -344,7 +360,9 @@ static const struct drm_plane_funcs meson_plane_funcs = {
 
 static const uint32_t supported_drm_formats[] = {
 	DRM_FORMAT_ARGB8888,
+	DRM_FORMAT_ABGR8888,
 	DRM_FORMAT_XRGB8888,
+	DRM_FORMAT_XBGR8888,
 	DRM_FORMAT_RGB888,
 	DRM_FORMAT_RGB565,
 };
@@ -371,6 +389,9 @@ int meson_plane_create(struct meson_drm *priv)
 
 	drm_plane_helper_add(plane, &meson_plane_helper_funcs);
 
+	/* For now, OSD Primary plane is always on the front */
+	drm_plane_create_zpos_immutable_property(plane, 1);
+
 	priv->primary_plane = plane;
 
 	return 0;
diff --git a/drivers/gpu/drm/meson/meson_vclk.c b/drivers/gpu/drm/meson/meson_vclk.c
index 58b4af5fbb6d..26732f038d19 100644
--- a/drivers/gpu/drm/meson/meson_vclk.c
+++ b/drivers/gpu/drm/meson/meson_vclk.c
@@ -503,8 +503,17 @@ void meson_hdmi_pll_set_params(struct meson_drm *priv, unsigned int m,
 
 		/* G12A HDMI PLL Needs specific parameters for 5.4GHz */
 		if (m >= 0xf7) {
-			regmap_write(priv->hhi, HHI_HDMI_PLL_CNTL4, 0xea68dc00);
-			regmap_write(priv->hhi, HHI_HDMI_PLL_CNTL5, 0x65771290);
+			if (frac < 0x10000) {
+				regmap_write(priv->hhi, HHI_HDMI_PLL_CNTL4,
+							0x6a685c00);
+				regmap_write(priv->hhi, HHI_HDMI_PLL_CNTL5,
+							0x11551293);
+			} else {
+				regmap_write(priv->hhi, HHI_HDMI_PLL_CNTL4,
+							0xea68dc00);
+				regmap_write(priv->hhi, HHI_HDMI_PLL_CNTL5,
+							0x65771290);
+			}
 			regmap_write(priv->hhi, HHI_HDMI_PLL_CNTL6, 0x39272000);
 			regmap_write(priv->hhi, HHI_HDMI_PLL_CNTL7, 0x55540000);
 		} else {
diff --git a/drivers/gpu/drm/meson/meson_viu.c b/drivers/gpu/drm/meson/meson_viu.c
index 462c7cb3e1bd..4b2b3024d371 100644
--- a/drivers/gpu/drm/meson/meson_viu.c
+++ b/drivers/gpu/drm/meson/meson_viu.c
@@ -405,8 +405,7 @@ void meson_viu_init(struct meson_drm *priv)
 				0 << 16 |
 				1,
 				priv->io_base + _REG(VIU_OSD_BLEND_CTRL));
-		writel_relaxed(3 << 8 |
-				1 << 20,
+		writel_relaxed(1 << 20,
 				priv->io_base + _REG(OSD1_BLEND_SRC_CTRL));
 		writel_relaxed(1 << 20,
 				priv->io_base + _REG(OSD2_BLEND_SRC_CTRL));
diff --git a/drivers/gpu/drm/mgag200/Kconfig b/drivers/gpu/drm/mgag200/Kconfig
index 91f3579546d0..76fee0fbdcae 100644
--- a/drivers/gpu/drm/mgag200/Kconfig
+++ b/drivers/gpu/drm/mgag200/Kconfig
@@ -3,7 +3,7 @@ config DRM_MGAG200
 	tristate "Kernel modesetting driver for MGA G200 server engines"
 	depends on DRM && PCI && MMU
 	select DRM_KMS_HELPER
-	select DRM_TTM
+	select DRM_VRAM_HELPER
 	help
 	 This is a KMS driver for the MGA G200 server chips, it
          does not support the original MGA G200 or any of the desktop
diff --git a/drivers/gpu/drm/mgag200/mgag200_cursor.c b/drivers/gpu/drm/mgag200/mgag200_cursor.c
index 853387b0fa88..f0c61a92351c 100644
--- a/drivers/gpu/drm/mgag200/mgag200_cursor.c
+++ b/drivers/gpu/drm/mgag200/mgag200_cursor.c
@@ -19,10 +19,9 @@ static void mga_hide_cursor(struct mga_device *mdev)
 {
 	WREG8(MGA_CURPOSXL, 0);
 	WREG8(MGA_CURPOSXH, 0);
-	if (mdev->cursor.pixels_1->pin_count)
-		mgag200_bo_unpin(mdev->cursor.pixels_1);
-	if (mdev->cursor.pixels_2->pin_count)
-		mgag200_bo_unpin(mdev->cursor.pixels_2);
+	if (mdev->cursor.pixels_current)
+		drm_gem_vram_unpin(mdev->cursor.pixels_current);
+	mdev->cursor.pixels_current = NULL;
 }
 
 int mga_crtc_cursor_set(struct drm_crtc *crtc,
@@ -33,13 +32,14 @@ int mga_crtc_cursor_set(struct drm_crtc *crtc,
 {
 	struct drm_device *dev = crtc->dev;
 	struct mga_device *mdev = (struct mga_device *)dev->dev_private;
-	struct mgag200_bo *pixels_1 = mdev->cursor.pixels_1;
-	struct mgag200_bo *pixels_2 = mdev->cursor.pixels_2;
-	struct mgag200_bo *pixels_current = mdev->cursor.pixels_current;
-	struct mgag200_bo *pixels_prev = mdev->cursor.pixels_prev;
+	struct drm_gem_vram_object *pixels_1 = mdev->cursor.pixels_1;
+	struct drm_gem_vram_object *pixels_2 = mdev->cursor.pixels_2;
+	struct drm_gem_vram_object *pixels_current = mdev->cursor.pixels_current;
+	struct drm_gem_vram_object *pixels_next;
 	struct drm_gem_object *obj;
-	struct mgag200_bo *bo = NULL;
+	struct drm_gem_vram_object *gbo = NULL;
 	int ret = 0;
+	u8 *src, *dst;
 	unsigned int i, row, col;
 	uint32_t colour_set[16];
 	uint32_t *next_space = &colour_set[0];
@@ -47,7 +47,8 @@ int mga_crtc_cursor_set(struct drm_crtc *crtc,
 	uint32_t this_colour;
 	bool found = false;
 	int colour_count = 0;
-	u64 gpu_addr;
+	s64 gpu_addr;
+	u64 dst_gpu;
 	u8 reg_index;
 	u8 this_row[48];
 
@@ -57,73 +58,71 @@ int mga_crtc_cursor_set(struct drm_crtc *crtc,
 		return -ENOTSUPP; /* Didn't allocate space for cursors */
 	}
 
-	if ((width != 64 || height != 64) && handle) {
-		WREG8(MGA_CURPOSXL, 0);
-		WREG8(MGA_CURPOSXH, 0);
-		return -EINVAL;
+	if (WARN_ON(pixels_current &&
+		    pixels_1 != pixels_current &&
+		    pixels_2 != pixels_current)) {
+		return -ENOTSUPP; /* inconsistent state */
 	}
 
-	BUG_ON(pixels_1 != pixels_current && pixels_1 != pixels_prev);
-	BUG_ON(pixels_2 != pixels_current && pixels_2 != pixels_prev);
-	BUG_ON(pixels_current == pixels_prev);
-
 	if (!handle || !file_priv) {
 		mga_hide_cursor(mdev);
 		return 0;
 	}
 
-	obj = drm_gem_object_lookup(file_priv, handle);
-	if (!obj)
-		return -ENOENT;
-
-	ret = mgag200_bo_reserve(pixels_1, true);
-	if (ret) {
+	if (width != 64 || height != 64) {
 		WREG8(MGA_CURPOSXL, 0);
 		WREG8(MGA_CURPOSXH, 0);
-		goto out_unref;
-	}
-	ret = mgag200_bo_reserve(pixels_2, true);
-	if (ret) {
-		WREG8(MGA_CURPOSXL, 0);
-		WREG8(MGA_CURPOSXH, 0);
-		mgag200_bo_unreserve(pixels_1);
-		goto out_unreserve1;
+		return -EINVAL;
 	}
 
-	/* Move cursor buffers into VRAM if they aren't already */
-	if (!pixels_1->pin_count) {
-		ret = mgag200_bo_pin(pixels_1, TTM_PL_FLAG_VRAM,
-				     &mdev->cursor.pixels_1_gpu_addr);
-		if (ret)
-			goto out1;
+	if (pixels_current == pixels_1)
+		pixels_next = pixels_2;
+	else
+		pixels_next = pixels_1;
+
+	obj = drm_gem_object_lookup(file_priv, handle);
+	if (!obj)
+		return -ENOENT;
+	gbo = drm_gem_vram_of_gem(obj);
+	ret = drm_gem_vram_pin(gbo, 0);
+	if (ret) {
+		dev_err(&dev->pdev->dev, "failed to lock user bo\n");
+		goto err_drm_gem_object_put_unlocked;
 	}
-	if (!pixels_2->pin_count) {
-		ret = mgag200_bo_pin(pixels_2, TTM_PL_FLAG_VRAM,
-				     &mdev->cursor.pixels_2_gpu_addr);
-		if (ret) {
-			mgag200_bo_unpin(pixels_1);
-			goto out1;
-		}
+	src = drm_gem_vram_kmap(gbo, true, NULL);
+	if (IS_ERR(src)) {
+		ret = PTR_ERR(src);
+		dev_err(&dev->pdev->dev,
+			"failed to kmap user buffer updates\n");
+		goto err_drm_gem_vram_unpin_src;
 	}
 
-	bo = gem_to_mga_bo(obj);
-	ret = mgag200_bo_reserve(bo, true);
-	if (ret) {
-		dev_err(&dev->pdev->dev, "failed to reserve user bo\n");
-		goto out1;
+	/* Pin and map up-coming buffer to write colour indices */
+	ret = drm_gem_vram_pin(pixels_next, 0);
+	if (ret)
+		dev_err(&dev->pdev->dev,
+			"failed to pin cursor buffer: %d\n", ret);
+		goto err_drm_gem_vram_kunmap_src;
+	dst = drm_gem_vram_kmap(pixels_next, true, NULL);
+	if (IS_ERR(dst)) {
+		ret = PTR_ERR(dst);
+		dev_err(&dev->pdev->dev,
+			"failed to kmap cursor updates: %d\n", ret);
+		goto err_drm_gem_vram_unpin_dst;
 	}
-	if (!bo->kmap.virtual) {
-		ret = ttm_bo_kmap(&bo->bo, 0, bo->bo.num_pages, &bo->kmap);
-		if (ret) {
-			dev_err(&dev->pdev->dev, "failed to kmap user buffer updates\n");
-			goto out2;
-		}
+	gpu_addr = drm_gem_vram_offset(pixels_2);
+	if (gpu_addr < 0) {
+		ret = (int)gpu_addr;
+		dev_err(&dev->pdev->dev,
+			"failed to get cursor scanout address: %d\n", ret);
+		goto err_drm_gem_vram_kunmap_dst;
 	}
+	dst_gpu = (u64)gpu_addr;
 
 	memset(&colour_set[0], 0, sizeof(uint32_t)*16);
 	/* width*height*4 = 16384 */
 	for (i = 0; i < 16384; i += 4) {
-		this_colour = ioread32(bo->kmap.virtual + i);
+		this_colour = ioread32(src + i);
 		/* No transparency */
 		if (this_colour>>24 != 0xff &&
 			this_colour>>24 != 0x0) {
@@ -133,7 +132,7 @@ int mga_crtc_cursor_set(struct drm_crtc *crtc,
 				warn_transparent = false; /* Only tell the user once. */
 			}
 			ret = -EINVAL;
-			goto out3;
+			goto err_drm_gem_vram_kunmap_dst;
 		}
 		/* Don't need to store transparent pixels as colours */
 		if (this_colour>>24 == 0x0)
@@ -155,7 +154,7 @@ int mga_crtc_cursor_set(struct drm_crtc *crtc,
 				warn_palette = false; /* Only tell the user once. */
 			}
 			ret = -EINVAL;
-			goto out3;
+			goto err_drm_gem_vram_kunmap_dst;
 		}
 		*next_space = this_colour;
 		next_space++;
@@ -174,22 +173,11 @@ int mga_crtc_cursor_set(struct drm_crtc *crtc,
 		BUG_ON((colour_set[i]>>24 & 0xff) != 0xff);
 	}
 
-	/* Map up-coming buffer to write colour indices */
-	if (!pixels_prev->kmap.virtual) {
-		ret = ttm_bo_kmap(&pixels_prev->bo, 0,
-				  pixels_prev->bo.num_pages,
-				  &pixels_prev->kmap);
-		if (ret) {
-			dev_err(&dev->pdev->dev, "failed to kmap cursor updates\n");
-			goto out3;
-		}
-	}
-
 	/* now write colour indices into hardware cursor buffer */
 	for (row = 0; row < 64; row++) {
 		memset(&this_row[0], 0, 48);
 		for (col = 0; col < 64; col++) {
-			this_colour = ioread32(bo->kmap.virtual + 4*(col + 64*row));
+			this_colour = ioread32(src + 4*(col + 64*row));
 			/* write transparent pixels */
 			if (this_colour>>24 == 0x0) {
 				this_row[47 - col/8] |= 0x80>>(col%8);
@@ -207,46 +195,39 @@ int mga_crtc_cursor_set(struct drm_crtc *crtc,
 				}
 			}
 		}
-		memcpy_toio(pixels_prev->kmap.virtual + row*48, &this_row[0], 48);
+		memcpy_toio(dst + row*48, &this_row[0], 48);
 	}
 
 	/* Program gpu address of cursor buffer */
-	if (pixels_prev == pixels_1)
-		gpu_addr = mdev->cursor.pixels_1_gpu_addr;
-	else
-		gpu_addr = mdev->cursor.pixels_2_gpu_addr;
-	WREG_DAC(MGA1064_CURSOR_BASE_ADR_LOW, (u8)((gpu_addr>>10) & 0xff));
-	WREG_DAC(MGA1064_CURSOR_BASE_ADR_HI, (u8)((gpu_addr>>18) & 0x3f));
+	WREG_DAC(MGA1064_CURSOR_BASE_ADR_LOW, (u8)((dst_gpu>>10) & 0xff));
+	WREG_DAC(MGA1064_CURSOR_BASE_ADR_HI, (u8)((dst_gpu>>18) & 0x3f));
 
 	/* Adjust cursor control register to turn on the cursor */
 	WREG_DAC(MGA1064_CURSOR_CTL, 4); /* 16-colour palletized cursor mode */
 
-	/* Now swap internal buffer pointers */
-	if (mdev->cursor.pixels_1 == mdev->cursor.pixels_prev) {
-		mdev->cursor.pixels_prev = mdev->cursor.pixels_2;
-		mdev->cursor.pixels_current = mdev->cursor.pixels_1;
-	} else if (mdev->cursor.pixels_1 == mdev->cursor.pixels_current) {
-		mdev->cursor.pixels_prev = mdev->cursor.pixels_1;
-		mdev->cursor.pixels_current = mdev->cursor.pixels_2;
-	} else {
-		BUG();
-	}
-	ret = 0;
+	/* Now update internal buffer pointers */
+	if (pixels_current)
+		drm_gem_vram_unpin(pixels_current);
+	mdev->cursor.pixels_current = pixels_next;
 
-	ttm_bo_kunmap(&pixels_prev->kmap);
- out3:
-	ttm_bo_kunmap(&bo->kmap);
- out2:
-	mgag200_bo_unreserve(bo);
- out1:
-	if (ret)
-		mga_hide_cursor(mdev);
-	mgag200_bo_unreserve(pixels_1);
-out_unreserve1:
-	mgag200_bo_unreserve(pixels_2);
-out_unref:
+	drm_gem_vram_kunmap(pixels_next);
+	drm_gem_vram_unpin(pixels_next);
+	drm_gem_vram_kunmap(gbo);
+	drm_gem_vram_unpin(gbo);
 	drm_gem_object_put_unlocked(obj);
 
+	return 0;
+
+err_drm_gem_vram_kunmap_dst:
+	drm_gem_vram_kunmap(pixels_next);
+err_drm_gem_vram_unpin_dst:
+	drm_gem_vram_unpin(pixels_next);
+err_drm_gem_vram_kunmap_src:
+	drm_gem_vram_kunmap(gbo);
+err_drm_gem_vram_unpin_src:
+	drm_gem_vram_unpin(gbo);
+err_drm_gem_object_put_unlocked:
+	drm_gem_object_put_unlocked(obj);
 	return ret;
 }
 
diff --git a/drivers/gpu/drm/mgag200/mgag200_drv.c b/drivers/gpu/drm/mgag200/mgag200_drv.c
index efd2145bc0a2..aafa1cb31f50 100644
--- a/drivers/gpu/drm/mgag200/mgag200_drv.c
+++ b/drivers/gpu/drm/mgag200/mgag200_drv.c
@@ -56,13 +56,7 @@ static void mga_pci_remove(struct pci_dev *pdev)
 
 static const struct file_operations mgag200_driver_fops = {
 	.owner = THIS_MODULE,
-	.open = drm_open,
-	.release = drm_release,
-	.unlocked_ioctl = drm_ioctl,
-	.mmap = mgag200_mmap,
-	.poll = drm_poll,
-	.compat_ioctl = drm_compat_ioctl,
-	.read = drm_read,
+	DRM_VRAM_MM_FILE_OPERATIONS
 };
 
 static struct drm_driver driver = {
@@ -76,10 +70,7 @@ static struct drm_driver driver = {
 	.major = DRIVER_MAJOR,
 	.minor = DRIVER_MINOR,
 	.patchlevel = DRIVER_PATCHLEVEL,
-
-	.gem_free_object_unlocked = mgag200_gem_free_object,
-	.dumb_create = mgag200_dumb_create,
-	.dumb_map_offset = mgag200_dumb_mmap_offset,
+	DRM_GEM_VRAM_DRIVER
 };
 
 static struct pci_driver mgag200_pci_driver = {
diff --git a/drivers/gpu/drm/mgag200/mgag200_drv.h b/drivers/gpu/drm/mgag200/mgag200_drv.h
index 8506e6d62b63..c47671ce6c48 100644
--- a/drivers/gpu/drm/mgag200/mgag200_drv.h
+++ b/drivers/gpu/drm/mgag200/mgag200_drv.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Copyright 2010 Matt Turner.
- * Copyright 2012 Red Hat 
+ * Copyright 2012 Red Hat
  *
  * Authors: Matthew Garrett
  * 	    Matt Turner
@@ -14,13 +14,11 @@
 
 #include <drm/drm_encoder.h>
 #include <drm/drm_fb_helper.h>
-#include <drm/ttm/ttm_bo_api.h>
-#include <drm/ttm/ttm_bo_driver.h>
-#include <drm/ttm/ttm_placement.h>
-#include <drm/ttm/ttm_memory.h>
-#include <drm/ttm/ttm_module.h>
 
 #include <drm/drm_gem.h>
+#include <drm/drm_gem_vram_helper.h>
+
+#include <drm/drm_vram_mm_helper.h>
 
 #include <linux/i2c.h>
 #include <linux/i2c-algo-bit.h>
@@ -114,7 +112,6 @@ struct mga_fbdev {
 	struct mga_framebuffer mfb;
 	void *sysram;
 	int size;
-	struct ttm_bo_kmap_obj mapping;
 	int x1, y1, x2, y2; /* dirty rect */
 	spinlock_t dirty_lock;
 };
@@ -156,13 +153,10 @@ struct mga_cursor {
 	   If either of these is NULL, then don't do hardware cursors, and
 	   fall back to software.
 	*/
-	struct mgag200_bo *pixels_1;
-	struct mgag200_bo *pixels_2;
-	u64 pixels_1_gpu_addr, pixels_2_gpu_addr;
+	struct drm_gem_vram_object *pixels_1;
+	struct drm_gem_vram_object *pixels_2;
 	/* The currently displayed icon, this points to one of pixels_1, or pixels_2 */
-	struct mgag200_bo *pixels_current;
-	/* The previously displayed icon */
-	struct mgag200_bo *pixels_prev;
+	struct drm_gem_vram_object *pixels_current;
 };
 
 struct mga_mc {
@@ -208,31 +202,10 @@ struct mga_device {
 
 	int fb_mtrr;
 
-	struct {
-		struct ttm_bo_device bdev;
-	} ttm;
-
 	/* SE model number stored in reg 0x1e24 */
 	u32 unique_rev_id;
 };
 
-
-struct mgag200_bo {
-	struct ttm_buffer_object bo;
-	struct ttm_placement placement;
-	struct ttm_bo_kmap_obj kmap;
-	struct drm_gem_object gem;
-	struct ttm_place placements[3];
-	int pin_count;
-};
-#define gem_to_mga_bo(gobj) container_of((gobj), struct mgag200_bo, gem)
-
-static inline struct mgag200_bo *
-mgag200_bo(struct ttm_buffer_object *bo)
-{
-	return container_of(bo, struct mgag200_bo, bo);
-}
-
 				/* mgag200_mode.c */
 int mgag200_modeset_init(struct mga_device *mdev);
 void mgag200_modeset_fini(struct mga_device *mdev);
@@ -256,45 +229,15 @@ int mgag200_gem_create(struct drm_device *dev,
 int mgag200_dumb_create(struct drm_file *file,
 			struct drm_device *dev,
 			struct drm_mode_create_dumb *args);
-void mgag200_gem_free_object(struct drm_gem_object *obj);
-int
-mgag200_dumb_mmap_offset(struct drm_file *file,
-			 struct drm_device *dev,
-			 uint32_t handle,
-			 uint64_t *offset);
+
 				/* mgag200_i2c.c */
 struct mga_i2c_chan *mgag200_i2c_create(struct drm_device *dev);
 void mgag200_i2c_destroy(struct mga_i2c_chan *i2c);
 
-void mgag200_ttm_placement(struct mgag200_bo *bo, int domain);
-
-static inline int mgag200_bo_reserve(struct mgag200_bo *bo, bool no_wait)
-{
-	int ret;
-
-	ret = ttm_bo_reserve(&bo->bo, true, no_wait, NULL);
-	if (ret) {
-		if (ret != -ERESTARTSYS && ret != -EBUSY)
-			DRM_ERROR("reserve failed %p\n", bo);
-		return ret;
-	}
-	return 0;
-}
-
-static inline void mgag200_bo_unreserve(struct mgag200_bo *bo)
-{
-	ttm_bo_unreserve(&bo->bo);
-}
-
-int mgag200_bo_create(struct drm_device *dev, int size, int align,
-		      uint32_t flags, struct mgag200_bo **pastbo);
 int mgag200_mm_init(struct mga_device *mdev);
 void mgag200_mm_fini(struct mga_device *mdev);
 int mgag200_mmap(struct file *filp, struct vm_area_struct *vma);
-int mgag200_bo_pin(struct mgag200_bo *bo, u32 pl_flag, u64 *gpu_addr);
-int mgag200_bo_unpin(struct mgag200_bo *bo);
-int mgag200_bo_push_sysram(struct mgag200_bo *bo);
-			   /* mgag200_cursor.c */
+
 int mga_crtc_cursor_set(struct drm_crtc *crtc, struct drm_file *file_priv,
 						uint32_t handle, uint32_t width, uint32_t height);
 int mga_crtc_cursor_move(struct drm_crtc *crtc, int x, int y);
diff --git a/drivers/gpu/drm/mgag200/mgag200_fb.c b/drivers/gpu/drm/mgag200/mgag200_fb.c
index c4d1dcc5afde..8adb33228732 100644
--- a/drivers/gpu/drm/mgag200/mgag200_fb.c
+++ b/drivers/gpu/drm/mgag200/mgag200_fb.c
@@ -20,29 +20,31 @@ static void mga_dirty_update(struct mga_fbdev *mfbdev,
 {
 	int i;
 	struct drm_gem_object *obj;
-	struct mgag200_bo *bo;
+	struct drm_gem_vram_object *gbo;
 	int src_offset, dst_offset;
 	int bpp = mfbdev->mfb.base.format->cpp[0];
-	int ret = -EBUSY;
+	int ret;
+	u8 *dst;
 	bool unmap = false;
 	bool store_for_later = false;
 	int x2, y2;
 	unsigned long flags;
 
 	obj = mfbdev->mfb.obj;
-	bo = gem_to_mga_bo(obj);
-
-	/*
-	 * try and reserve the BO, if we fail with busy
-	 * then the BO is being moved and we should
-	 * store up the damage until later.
-	 */
-	if (drm_can_sleep())
-		ret = mgag200_bo_reserve(bo, true);
-	if (ret) {
-		if (ret != -EBUSY)
-			return;
-
+	gbo = drm_gem_vram_of_gem(obj);
+
+	if (drm_can_sleep()) {
+		/* We pin the BO so it won't be moved during the
+		 * update. The actual location, video RAM or system
+		 * memory, is not important.
+		 */
+		ret = drm_gem_vram_pin(gbo, 0);
+		if (ret) {
+			if (ret != -EBUSY)
+				return;
+			store_for_later = true;
+		}
+	} else {
 		store_for_later = true;
 	}
 
@@ -72,25 +74,32 @@ static void mga_dirty_update(struct mga_fbdev *mfbdev,
 	mfbdev->x2 = mfbdev->y2 = 0;
 	spin_unlock_irqrestore(&mfbdev->dirty_lock, flags);
 
-	if (!bo->kmap.virtual) {
-		ret = ttm_bo_kmap(&bo->bo, 0, bo->bo.num_pages, &bo->kmap);
-		if (ret) {
+	dst = drm_gem_vram_kmap(gbo, false, NULL);
+	if (IS_ERR(dst)) {
+		DRM_ERROR("failed to kmap fb updates\n");
+		goto out;
+	} else if (!dst) {
+		dst = drm_gem_vram_kmap(gbo, true, NULL);
+		if (IS_ERR(dst)) {
 			DRM_ERROR("failed to kmap fb updates\n");
-			mgag200_bo_unreserve(bo);
-			return;
+			goto out;
 		}
 		unmap = true;
 	}
+
 	for (i = y; i <= y2; i++) {
 		/* assume equal stride for now */
-		src_offset = dst_offset = i * mfbdev->mfb.base.pitches[0] + (x * bpp);
-		memcpy_toio(bo->kmap.virtual + src_offset, mfbdev->sysram + src_offset, (x2 - x + 1) * bpp);
-
+		src_offset = dst_offset =
+			i * mfbdev->mfb.base.pitches[0] + (x * bpp);
+		memcpy_toio(dst + dst_offset, mfbdev->sysram + src_offset,
+			    (x2 - x + 1) * bpp);
 	}
+
 	if (unmap)
-		ttm_bo_kunmap(&bo->kmap);
+		drm_gem_vram_kunmap(gbo);
 
-	mgag200_bo_unreserve(bo);
+out:
+	drm_gem_vram_unpin(gbo);
 }
 
 static void mga_fillrect(struct fb_info *info,
diff --git a/drivers/gpu/drm/mgag200/mgag200_main.c b/drivers/gpu/drm/mgag200/mgag200_main.c
index 5333bd41a36f..dd61ccc5af5c 100644
--- a/drivers/gpu/drm/mgag200/mgag200_main.c
+++ b/drivers/gpu/drm/mgag200/mgag200_main.c
@@ -227,19 +227,19 @@ int mgag200_driver_load(struct drm_device *dev, unsigned long flags)
 	}
 
 	/* Make small buffers to store a hardware cursor (double buffered icon updates) */
-	mgag200_bo_create(dev, roundup(48*64, PAGE_SIZE), 0, 0,
-					  &mdev->cursor.pixels_1);
-	mgag200_bo_create(dev, roundup(48*64, PAGE_SIZE), 0, 0,
-					  &mdev->cursor.pixels_2);
-	if (!mdev->cursor.pixels_2 || !mdev->cursor.pixels_1) {
+	mdev->cursor.pixels_1 = drm_gem_vram_create(dev, &dev->vram_mm->bdev,
+						    roundup(48*64, PAGE_SIZE),
+						    0, 0);
+	mdev->cursor.pixels_2 = drm_gem_vram_create(dev, &dev->vram_mm->bdev,
+						    roundup(48*64, PAGE_SIZE),
+						    0, 0);
+	if (IS_ERR(mdev->cursor.pixels_2) || IS_ERR(mdev->cursor.pixels_1)) {
 		mdev->cursor.pixels_1 = NULL;
 		mdev->cursor.pixels_2 = NULL;
 		dev_warn(&dev->pdev->dev,
 			"Could not allocate space for cursors. Not doing hardware cursors.\n");
-	} else {
-		mdev->cursor.pixels_current = mdev->cursor.pixels_1;
-		mdev->cursor.pixels_prev = mdev->cursor.pixels_2;
 	}
+	mdev->cursor.pixels_current = NULL;
 
 	return 0;
 
@@ -269,7 +269,7 @@ int mgag200_gem_create(struct drm_device *dev,
 		   u32 size, bool iskernel,
 		   struct drm_gem_object **obj)
 {
-	struct mgag200_bo *astbo;
+	struct drm_gem_vram_object *gbo;
 	int ret;
 
 	*obj = NULL;
@@ -278,78 +278,13 @@ int mgag200_gem_create(struct drm_device *dev,
 	if (size == 0)
 		return -EINVAL;
 
-	ret = mgag200_bo_create(dev, size, 0, 0, &astbo);
-	if (ret) {
+	gbo = drm_gem_vram_create(dev, &dev->vram_mm->bdev, size, 0, false);
+	if (IS_ERR(gbo)) {
+		ret = PTR_ERR(gbo);
 		if (ret != -ERESTARTSYS)
 			DRM_ERROR("failed to allocate GEM object\n");
 		return ret;
 	}
-	*obj = &astbo->gem;
-	return 0;
-}
-
-int mgag200_dumb_create(struct drm_file *file,
-		    struct drm_device *dev,
-		    struct drm_mode_create_dumb *args)
-{
-	int ret;
-	struct drm_gem_object *gobj;
-	u32 handle;
-
-	args->pitch = args->width * ((args->bpp + 7) / 8);
-	args->size = args->pitch * args->height;
-
-	ret = mgag200_gem_create(dev, args->size, false,
-			     &gobj);
-	if (ret)
-		return ret;
-
-	ret = drm_gem_handle_create(file, gobj, &handle);
-	drm_gem_object_put_unlocked(gobj);
-	if (ret)
-		return ret;
-
-	args->handle = handle;
-	return 0;
-}
-
-static void mgag200_bo_unref(struct mgag200_bo **bo)
-{
-	if ((*bo) == NULL)
-		return;
-	ttm_bo_put(&((*bo)->bo));
-	*bo = NULL;
-}
-
-void mgag200_gem_free_object(struct drm_gem_object *obj)
-{
-	struct mgag200_bo *mgag200_bo = gem_to_mga_bo(obj);
-
-	mgag200_bo_unref(&mgag200_bo);
-}
-
-
-static inline u64 mgag200_bo_mmap_offset(struct mgag200_bo *bo)
-{
-	return drm_vma_node_offset_addr(&bo->bo.vma_node);
-}
-
-int
-mgag200_dumb_mmap_offset(struct drm_file *file,
-		     struct drm_device *dev,
-		     uint32_t handle,
-		     uint64_t *offset)
-{
-	struct drm_gem_object *obj;
-	struct mgag200_bo *bo;
-
-	obj = drm_gem_object_lookup(file, handle);
-	if (obj == NULL)
-		return -ENOENT;
-
-	bo = gem_to_mga_bo(obj);
-	*offset = mgag200_bo_mmap_offset(bo);
-
-	drm_gem_object_put_unlocked(obj);
+	*obj = &gbo->gem;
 	return 0;
 }
diff --git a/drivers/gpu/drm/mgag200/mgag200_mode.c b/drivers/gpu/drm/mgag200/mgag200_mode.c
index 210a19354f1c..a25054015e8c 100644
--- a/drivers/gpu/drm/mgag200/mgag200_mode.c
+++ b/drivers/gpu/drm/mgag200/mgag200_mode.c
@@ -855,8 +855,6 @@ static void mga_set_start_address(struct drm_crtc *crtc, unsigned offset)
 	WREG_ECRT(0x0, ((u8)(addr >> 16) & 0xf) | crtcext0);
 }
 
-
-/* ast is different - we will force move buffers out of VRAM */
 static int mga_crtc_do_set_base(struct drm_crtc *crtc,
 				struct drm_framebuffer *fb,
 				int x, int y, int atomic)
@@ -864,48 +862,51 @@ static int mga_crtc_do_set_base(struct drm_crtc *crtc,
 	struct mga_device *mdev = crtc->dev->dev_private;
 	struct drm_gem_object *obj;
 	struct mga_framebuffer *mga_fb;
-	struct mgag200_bo *bo;
+	struct drm_gem_vram_object *gbo;
 	int ret;
-	u64 gpu_addr;
+	s64 gpu_addr;
+	void *base;
 
-	/* push the previous fb to system ram */
 	if (!atomic && fb) {
 		mga_fb = to_mga_framebuffer(fb);
 		obj = mga_fb->obj;
-		bo = gem_to_mga_bo(obj);
-		ret = mgag200_bo_reserve(bo, false);
-		if (ret)
-			return ret;
-		mgag200_bo_push_sysram(bo);
-		mgag200_bo_unreserve(bo);
+		gbo = drm_gem_vram_of_gem(obj);
+
+		/* unmap if console */
+		if (&mdev->mfbdev->mfb == mga_fb)
+			drm_gem_vram_kunmap(gbo);
+		drm_gem_vram_unpin(gbo);
 	}
 
 	mga_fb = to_mga_framebuffer(crtc->primary->fb);
 	obj = mga_fb->obj;
-	bo = gem_to_mga_bo(obj);
+	gbo = drm_gem_vram_of_gem(obj);
 
-	ret = mgag200_bo_reserve(bo, false);
+	ret = drm_gem_vram_pin(gbo, DRM_GEM_VRAM_PL_FLAG_VRAM);
 	if (ret)
 		return ret;
-
-	ret = mgag200_bo_pin(bo, TTM_PL_FLAG_VRAM, &gpu_addr);
-	if (ret) {
-		mgag200_bo_unreserve(bo);
-		return ret;
+	gpu_addr = drm_gem_vram_offset(gbo);
+	if (gpu_addr < 0) {
+		ret = (int)gpu_addr;
+		goto err_drm_gem_vram_unpin;
 	}
 
 	if (&mdev->mfbdev->mfb == mga_fb) {
 		/* if pushing console in kmap it */
-		ret = ttm_bo_kmap(&bo->bo, 0, bo->bo.num_pages, &bo->kmap);
-		if (ret)
+		base = drm_gem_vram_kmap(gbo, true, NULL);
+		if (IS_ERR(base)) {
+			ret = PTR_ERR(base);
 			DRM_ERROR("failed to kmap fbcon\n");
-
+		}
 	}
-	mgag200_bo_unreserve(bo);
 
 	mga_set_start_address(crtc, (u32)gpu_addr);
 
 	return 0;
+
+err_drm_gem_vram_unpin:
+	drm_gem_vram_unpin(gbo);
+	return ret;
 }
 
 static int mga_crtc_mode_set_base(struct drm_crtc *crtc, int x, int y,
@@ -1419,18 +1420,18 @@ static void mga_crtc_destroy(struct drm_crtc *crtc)
 
 static void mga_crtc_disable(struct drm_crtc *crtc)
 {
-	int ret;
 	DRM_DEBUG_KMS("\n");
 	mga_crtc_dpms(crtc, DRM_MODE_DPMS_OFF);
 	if (crtc->primary->fb) {
+		struct mga_device *mdev = crtc->dev->dev_private;
 		struct mga_framebuffer *mga_fb = to_mga_framebuffer(crtc->primary->fb);
 		struct drm_gem_object *obj = mga_fb->obj;
-		struct mgag200_bo *bo = gem_to_mga_bo(obj);
-		ret = mgag200_bo_reserve(bo, false);
-		if (ret)
-			return;
-		mgag200_bo_push_sysram(bo);
-		mgag200_bo_unreserve(bo);
+		struct drm_gem_vram_object *gbo = drm_gem_vram_of_gem(obj);
+
+		/* unmap if console */
+		if (&mdev->mfbdev->mfb == mga_fb)
+			drm_gem_vram_kunmap(gbo);
+		drm_gem_vram_unpin(gbo);
 	}
 	crtc->primary->fb = NULL;
 }
diff --git a/drivers/gpu/drm/mgag200/mgag200_ttm.c b/drivers/gpu/drm/mgag200/mgag200_ttm.c
index bd42365a8aa8..59294c0fd24a 100644
--- a/drivers/gpu/drm/mgag200/mgag200_ttm.c
+++ b/drivers/gpu/drm/mgag200/mgag200_ttm.c
@@ -26,167 +26,21 @@
  * Authors: Dave Airlie <airlied@redhat.com>
  */
 #include <drm/drmP.h>
-#include <drm/ttm/ttm_page_alloc.h>
 
 #include "mgag200_drv.h"
 
-static inline struct mga_device *
-mgag200_bdev(struct ttm_bo_device *bd)
-{
-	return container_of(bd, struct mga_device, ttm.bdev);
-}
-
-static void mgag200_bo_ttm_destroy(struct ttm_buffer_object *tbo)
-{
-	struct mgag200_bo *bo;
-
-	bo = container_of(tbo, struct mgag200_bo, bo);
-
-	drm_gem_object_release(&bo->gem);
-	kfree(bo);
-}
-
-static bool mgag200_ttm_bo_is_mgag200_bo(struct ttm_buffer_object *bo)
-{
-	if (bo->destroy == &mgag200_bo_ttm_destroy)
-		return true;
-	return false;
-}
-
-static int
-mgag200_bo_init_mem_type(struct ttm_bo_device *bdev, uint32_t type,
-		     struct ttm_mem_type_manager *man)
-{
-	switch (type) {
-	case TTM_PL_SYSTEM:
-		man->flags = TTM_MEMTYPE_FLAG_MAPPABLE;
-		man->available_caching = TTM_PL_MASK_CACHING;
-		man->default_caching = TTM_PL_FLAG_CACHED;
-		break;
-	case TTM_PL_VRAM:
-		man->func = &ttm_bo_manager_func;
-		man->flags = TTM_MEMTYPE_FLAG_FIXED |
-			TTM_MEMTYPE_FLAG_MAPPABLE;
-		man->available_caching = TTM_PL_FLAG_UNCACHED |
-			TTM_PL_FLAG_WC;
-		man->default_caching = TTM_PL_FLAG_WC;
-		break;
-	default:
-		DRM_ERROR("Unsupported memory type %u\n", (unsigned)type);
-		return -EINVAL;
-	}
-	return 0;
-}
-
-static void
-mgag200_bo_evict_flags(struct ttm_buffer_object *bo, struct ttm_placement *pl)
-{
-	struct mgag200_bo *mgabo = mgag200_bo(bo);
-
-	if (!mgag200_ttm_bo_is_mgag200_bo(bo))
-		return;
-
-	mgag200_ttm_placement(mgabo, TTM_PL_FLAG_SYSTEM);
-	*pl = mgabo->placement;
-}
-
-static int mgag200_bo_verify_access(struct ttm_buffer_object *bo, struct file *filp)
-{
-	struct mgag200_bo *mgabo = mgag200_bo(bo);
-
-	return drm_vma_node_verify_access(&mgabo->gem.vma_node,
-					  filp->private_data);
-}
-
-static int mgag200_ttm_io_mem_reserve(struct ttm_bo_device *bdev,
-				  struct ttm_mem_reg *mem)
-{
-	struct ttm_mem_type_manager *man = &bdev->man[mem->mem_type];
-	struct mga_device *mdev = mgag200_bdev(bdev);
-
-	mem->bus.addr = NULL;
-	mem->bus.offset = 0;
-	mem->bus.size = mem->num_pages << PAGE_SHIFT;
-	mem->bus.base = 0;
-	mem->bus.is_iomem = false;
-	if (!(man->flags & TTM_MEMTYPE_FLAG_MAPPABLE))
-		return -EINVAL;
-	switch (mem->mem_type) {
-	case TTM_PL_SYSTEM:
-		/* system memory */
-		return 0;
-	case TTM_PL_VRAM:
-		mem->bus.offset = mem->start << PAGE_SHIFT;
-		mem->bus.base = pci_resource_start(mdev->dev->pdev, 0);
-		mem->bus.is_iomem = true;
-		break;
-	default:
-		return -EINVAL;
-		break;
-	}
-	return 0;
-}
-
-static void mgag200_ttm_io_mem_free(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem)
-{
-}
-
-static void mgag200_ttm_backend_destroy(struct ttm_tt *tt)
-{
-	ttm_tt_fini(tt);
-	kfree(tt);
-}
-
-static struct ttm_backend_func mgag200_tt_backend_func = {
-	.destroy = &mgag200_ttm_backend_destroy,
-};
-
-
-static struct ttm_tt *mgag200_ttm_tt_create(struct ttm_buffer_object *bo,
-					    uint32_t page_flags)
-{
-	struct ttm_tt *tt;
-
-	tt = kzalloc(sizeof(struct ttm_tt), GFP_KERNEL);
-	if (tt == NULL)
-		return NULL;
-	tt->func = &mgag200_tt_backend_func;
-	if (ttm_tt_init(tt, bo, page_flags)) {
-		kfree(tt);
-		return NULL;
-	}
-	return tt;
-}
-
-struct ttm_bo_driver mgag200_bo_driver = {
-	.ttm_tt_create = mgag200_ttm_tt_create,
-	.init_mem_type = mgag200_bo_init_mem_type,
-	.eviction_valuable = ttm_bo_eviction_valuable,
-	.evict_flags = mgag200_bo_evict_flags,
-	.move = NULL,
-	.verify_access = mgag200_bo_verify_access,
-	.io_mem_reserve = &mgag200_ttm_io_mem_reserve,
-	.io_mem_free = &mgag200_ttm_io_mem_free,
-};
-
 int mgag200_mm_init(struct mga_device *mdev)
 {
+	struct drm_vram_mm *vmm;
 	int ret;
 	struct drm_device *dev = mdev->dev;
-	struct ttm_bo_device *bdev = &mdev->ttm.bdev;
-
-	ret = ttm_bo_device_init(&mdev->ttm.bdev,
-				 &mgag200_bo_driver,
-				 dev->anon_inode->i_mapping,
-				 true);
-	if (ret) {
-		DRM_ERROR("Error initialising bo driver; %d\n", ret);
-		return ret;
-	}
 
-	ret = ttm_bo_init_mm(bdev, TTM_PL_VRAM, mdev->mc.vram_size >> PAGE_SHIFT);
-	if (ret) {
-		DRM_ERROR("Failed ttm VRAM init: %d\n", ret);
+	vmm = drm_vram_helper_alloc_mm(dev, pci_resource_start(dev->pdev, 0),
+				       mdev->mc.vram_size,
+				       &drm_gem_vram_mm_funcs);
+	if (IS_ERR(vmm)) {
+		ret = PTR_ERR(vmm);
+		DRM_ERROR("Error initializing VRAM MM; %d\n", ret);
 		return ret;
 	}
 
@@ -203,149 +57,10 @@ void mgag200_mm_fini(struct mga_device *mdev)
 {
 	struct drm_device *dev = mdev->dev;
 
-	ttm_bo_device_release(&mdev->ttm.bdev);
+	drm_vram_helper_release_mm(dev);
 
 	arch_io_free_memtype_wc(pci_resource_start(dev->pdev, 0),
 				pci_resource_len(dev->pdev, 0));
 	arch_phys_wc_del(mdev->fb_mtrr);
 	mdev->fb_mtrr = 0;
 }
-
-void mgag200_ttm_placement(struct mgag200_bo *bo, int domain)
-{
-	u32 c = 0;
-	unsigned i;
-
-	bo->placement.placement = bo->placements;
-	bo->placement.busy_placement = bo->placements;
-	if (domain & TTM_PL_FLAG_VRAM)
-		bo->placements[c++].flags = TTM_PL_FLAG_WC | TTM_PL_FLAG_UNCACHED | TTM_PL_FLAG_VRAM;
-	if (domain & TTM_PL_FLAG_SYSTEM)
-		bo->placements[c++].flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM;
-	if (!c)
-		bo->placements[c++].flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM;
-	bo->placement.num_placement = c;
-	bo->placement.num_busy_placement = c;
-	for (i = 0; i < c; ++i) {
-		bo->placements[i].fpfn = 0;
-		bo->placements[i].lpfn = 0;
-	}
-}
-
-int mgag200_bo_create(struct drm_device *dev, int size, int align,
-		  uint32_t flags, struct mgag200_bo **pmgabo)
-{
-	struct mga_device *mdev = dev->dev_private;
-	struct mgag200_bo *mgabo;
-	size_t acc_size;
-	int ret;
-
-	mgabo = kzalloc(sizeof(struct mgag200_bo), GFP_KERNEL);
-	if (!mgabo)
-		return -ENOMEM;
-
-	ret = drm_gem_object_init(dev, &mgabo->gem, size);
-	if (ret) {
-		kfree(mgabo);
-		return ret;
-	}
-
-	mgabo->bo.bdev = &mdev->ttm.bdev;
-
-	mgag200_ttm_placement(mgabo, TTM_PL_FLAG_VRAM | TTM_PL_FLAG_SYSTEM);
-
-	acc_size = ttm_bo_dma_acc_size(&mdev->ttm.bdev, size,
-				       sizeof(struct mgag200_bo));
-
-	ret = ttm_bo_init(&mdev->ttm.bdev, &mgabo->bo, size,
-			  ttm_bo_type_device, &mgabo->placement,
-			  align >> PAGE_SHIFT, false, acc_size,
-			  NULL, NULL, mgag200_bo_ttm_destroy);
-	if (ret)
-		return ret;
-
-	*pmgabo = mgabo;
-	return 0;
-}
-
-static inline u64 mgag200_bo_gpu_offset(struct mgag200_bo *bo)
-{
-	return bo->bo.offset;
-}
-
-int mgag200_bo_pin(struct mgag200_bo *bo, u32 pl_flag, u64 *gpu_addr)
-{
-	struct ttm_operation_ctx ctx = { false, false };
-	int i, ret;
-
-	if (bo->pin_count) {
-		bo->pin_count++;
-		if (gpu_addr)
-			*gpu_addr = mgag200_bo_gpu_offset(bo);
-		return 0;
-	}
-
-	mgag200_ttm_placement(bo, pl_flag);
-	for (i = 0; i < bo->placement.num_placement; i++)
-		bo->placements[i].flags |= TTM_PL_FLAG_NO_EVICT;
-	ret = ttm_bo_validate(&bo->bo, &bo->placement, &ctx);
-	if (ret)
-		return ret;
-
-	bo->pin_count = 1;
-	if (gpu_addr)
-		*gpu_addr = mgag200_bo_gpu_offset(bo);
-	return 0;
-}
-
-int mgag200_bo_unpin(struct mgag200_bo *bo)
-{
-	struct ttm_operation_ctx ctx = { false, false };
-	int i;
-	if (!bo->pin_count) {
-		DRM_ERROR("unpin bad %p\n", bo);
-		return 0;
-	}
-	bo->pin_count--;
-	if (bo->pin_count)
-		return 0;
-
-	for (i = 0; i < bo->placement.num_placement ; i++)
-		bo->placements[i].flags &= ~TTM_PL_FLAG_NO_EVICT;
-	return ttm_bo_validate(&bo->bo, &bo->placement, &ctx);
-}
-
-int mgag200_bo_push_sysram(struct mgag200_bo *bo)
-{
-	struct ttm_operation_ctx ctx = { false, false };
-	int i, ret;
-	if (!bo->pin_count) {
-		DRM_ERROR("unpin bad %p\n", bo);
-		return 0;
-	}
-	bo->pin_count--;
-	if (bo->pin_count)
-		return 0;
-
-	if (bo->kmap.virtual)
-		ttm_bo_kunmap(&bo->kmap);
-
-	mgag200_ttm_placement(bo, TTM_PL_FLAG_SYSTEM);
-	for (i = 0; i < bo->placement.num_placement ; i++)
-		bo->placements[i].flags |= TTM_PL_FLAG_NO_EVICT;
-
-	ret = ttm_bo_validate(&bo->bo, &bo->placement, &ctx);
-	if (ret) {
-		DRM_ERROR("pushing to VRAM failed\n");
-		return ret;
-	}
-	return 0;
-}
-
-int mgag200_mmap(struct file *filp, struct vm_area_struct *vma)
-{
-	struct drm_file *file_priv = filp->private_data;
-	struct mga_device *mdev = file_priv->minor->dev->dev_private;
-
-	return ttm_bo_mmap(filp, vma, &mdev->ttm.bdev);
-}
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c
index dfdfa766da8f..3772f745589d 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c
@@ -694,14 +694,12 @@ end:
 
 static void dpu_crtc_reset(struct drm_crtc *crtc)
 {
-	struct dpu_crtc_state *cstate;
+	struct dpu_crtc_state *cstate = kzalloc(sizeof(*cstate), GFP_KERNEL);
 
 	if (crtc->state)
 		dpu_crtc_destroy_state(crtc, crtc->state);
 
-	crtc->state = kzalloc(sizeof(*cstate), GFP_KERNEL);
-	if (crtc->state)
-		crtc->state->crtc = crtc;
+	__drm_atomic_helper_crtc_reset(crtc, &cstate->base);
 }
 
 /**
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_formats.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_formats.c
index 0440696b5bad..2307c431a894 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_formats.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_formats.c
@@ -1032,10 +1032,11 @@ int dpu_format_check_modified_format(
 		const struct drm_mode_fb_cmd2 *cmd,
 		struct drm_gem_object **bos)
 {
-	int ret, i, num_base_fmt_planes;
+	const struct drm_format_info *info;
 	const struct dpu_format *fmt;
 	struct dpu_hw_fmt_layout layout;
 	uint32_t bos_total_size = 0;
+	int ret, i;
 
 	if (!msm_fmt || !cmd || !bos) {
 		DRM_ERROR("invalid arguments\n");
@@ -1043,14 +1044,16 @@ int dpu_format_check_modified_format(
 	}
 
 	fmt = to_dpu_format(msm_fmt);
-	num_base_fmt_planes = drm_format_num_planes(fmt->base.pixel_format);
+	info = drm_format_info(fmt->base.pixel_format);
+	if (!info)
+		return -EINVAL;
 
 	ret = dpu_format_get_plane_sizes(fmt, cmd->width, cmd->height,
 			&layout, cmd->pitches);
 	if (ret)
 		return ret;
 
-	for (i = 0; i < num_base_fmt_planes; i++) {
+	for (i = 0; i < info->num_planes; i++) {
 		if (!bos[i]) {
 			DRM_ERROR("invalid handle for plane %d\n", i);
 			return -EINVAL;
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c
index ce1a555e1f31..d831cedb55ec 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c
@@ -557,14 +557,9 @@ static void _dpu_plane_setup_scaler(struct dpu_plane *pdpu,
 		struct dpu_plane_state *pstate,
 		const struct dpu_format *fmt, bool color_fill)
 {
-	uint32_t chroma_subsmpl_h, chroma_subsmpl_v;
+	const struct drm_format_info *info = drm_format_info(fmt->base.pixel_format);
 
 	/* don't chroma subsample if decimating */
-	chroma_subsmpl_h =
-		drm_format_horz_chroma_subsampling(fmt->base.pixel_format);
-	chroma_subsmpl_v =
-		drm_format_vert_chroma_subsampling(fmt->base.pixel_format);
-
 	/* update scaler. calculate default config for QSEED3 */
 	_dpu_plane_setup_scaler3(pdpu, pstate,
 			drm_rect_width(&pdpu->pipe_cfg.src_rect),
@@ -572,7 +567,7 @@ static void _dpu_plane_setup_scaler(struct dpu_plane *pdpu,
 			drm_rect_width(&pdpu->pipe_cfg.dst_rect),
 			drm_rect_height(&pdpu->pipe_cfg.dst_rect),
 			&pstate->scaler3_cfg, fmt,
-			chroma_subsmpl_h, chroma_subsmpl_v);
+			info->hsub, info->vsub);
 }
 
 /**
diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c
index b0cf63c4e3d7..c3751c95b452 100644
--- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c
+++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c
@@ -782,6 +782,7 @@ static void get_roi(struct drm_crtc *crtc, uint32_t *roi_w, uint32_t *roi_h)
 
 static void mdp5_crtc_restore_cursor(struct drm_crtc *crtc)
 {
+	const struct drm_format_info *info = drm_format_info(DRM_FORMAT_ARGB8888);
 	struct mdp5_crtc_state *mdp5_cstate = to_mdp5_crtc_state(crtc->state);
 	struct mdp5_crtc *mdp5_crtc = to_mdp5_crtc(crtc);
 	struct mdp5_kms *mdp5_kms = get_kms(crtc);
@@ -800,7 +801,7 @@ static void mdp5_crtc_restore_cursor(struct drm_crtc *crtc)
 	width = mdp5_crtc->cursor.width;
 	height = mdp5_crtc->cursor.height;
 
-	stride = width * drm_format_plane_cpp(DRM_FORMAT_ARGB8888, 0);
+	stride = width * info->cpp[0];
 
 	get_roi(crtc, &roi_w, &roi_h);
 
@@ -1002,23 +1003,6 @@ mdp5_crtc_atomic_print_state(struct drm_printer *p,
 	drm_printf(p, "\tcmd_mode=%d\n", mdp5_cstate->cmd_mode);
 }
 
-static void mdp5_crtc_reset(struct drm_crtc *crtc)
-{
-	struct mdp5_crtc_state *mdp5_cstate;
-
-	if (crtc->state) {
-		__drm_atomic_helper_crtc_destroy_state(crtc->state);
-		kfree(to_mdp5_crtc_state(crtc->state));
-	}
-
-	mdp5_cstate = kzalloc(sizeof(*mdp5_cstate), GFP_KERNEL);
-
-	if (mdp5_cstate) {
-		mdp5_cstate->base.crtc = crtc;
-		crtc->state = &mdp5_cstate->base;
-	}
-}
-
 static struct drm_crtc_state *
 mdp5_crtc_duplicate_state(struct drm_crtc *crtc)
 {
@@ -1046,6 +1030,17 @@ static void mdp5_crtc_destroy_state(struct drm_crtc *crtc, struct drm_crtc_state
 	kfree(mdp5_cstate);
 }
 
+static void mdp5_crtc_reset(struct drm_crtc *crtc)
+{
+	struct mdp5_crtc_state *mdp5_cstate =
+		kzalloc(sizeof(*mdp5_cstate), GFP_KERNEL);
+
+	if (crtc->state)
+		mdp5_crtc_destroy_state(crtc, crtc->state);
+
+	__drm_atomic_helper_crtc_reset(crtc, &mdp5_cstate->base);
+}
+
 static const struct drm_crtc_funcs mdp5_crtc_funcs = {
 	.set_config = drm_atomic_helper_set_config,
 	.destroy = mdp5_crtc_destroy,
diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_plane.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_plane.c
index b854f471e9e5..1105c2433f14 100644
--- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_plane.c
+++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_plane.c
@@ -654,10 +654,10 @@ static int calc_scalex_steps(struct drm_plane *plane,
 		uint32_t pixel_format, uint32_t src, uint32_t dest,
 		uint32_t phasex_steps[COMP_MAX])
 {
+	const struct drm_format_info *info = drm_format_info(pixel_format);
 	struct mdp5_kms *mdp5_kms = get_kms(plane);
 	struct device *dev = mdp5_kms->dev->dev;
 	uint32_t phasex_step;
-	unsigned int hsub;
 	int ret;
 
 	ret = calc_phase_step(src, dest, &phasex_step);
@@ -666,11 +666,9 @@ static int calc_scalex_steps(struct drm_plane *plane,
 		return ret;
 	}
 
-	hsub = drm_format_horz_chroma_subsampling(pixel_format);
-
 	phasex_steps[COMP_0]   = phasex_step;
 	phasex_steps[COMP_3]   = phasex_step;
-	phasex_steps[COMP_1_2] = phasex_step / hsub;
+	phasex_steps[COMP_1_2] = phasex_step / info->hsub;
 
 	return 0;
 }
@@ -679,10 +677,10 @@ static int calc_scaley_steps(struct drm_plane *plane,
 		uint32_t pixel_format, uint32_t src, uint32_t dest,
 		uint32_t phasey_steps[COMP_MAX])
 {
+	const struct drm_format_info *info = drm_format_info(pixel_format);
 	struct mdp5_kms *mdp5_kms = get_kms(plane);
 	struct device *dev = mdp5_kms->dev->dev;
 	uint32_t phasey_step;
-	unsigned int vsub;
 	int ret;
 
 	ret = calc_phase_step(src, dest, &phasey_step);
@@ -691,11 +689,9 @@ static int calc_scaley_steps(struct drm_plane *plane,
 		return ret;
 	}
 
-	vsub = drm_format_vert_chroma_subsampling(pixel_format);
-
 	phasey_steps[COMP_0]   = phasey_step;
 	phasey_steps[COMP_3]   = phasey_step;
-	phasey_steps[COMP_1_2] = phasey_step / vsub;
+	phasey_steps[COMP_1_2] = phasey_step / info->vsub;
 
 	return 0;
 }
@@ -703,8 +699,9 @@ static int calc_scaley_steps(struct drm_plane *plane,
 static uint32_t get_scale_config(const struct mdp_format *format,
 		uint32_t src, uint32_t dst, bool horz)
 {
+	const struct drm_format_info *info = drm_format_info(format->base.pixel_format);
 	bool scaling = format->is_yuv ? true : (src != dst);
-	uint32_t sub, pix_fmt = format->base.pixel_format;
+	uint32_t sub;
 	uint32_t ya_filter, uv_filter;
 	bool yuv = format->is_yuv;
 
@@ -712,8 +709,7 @@ static uint32_t get_scale_config(const struct mdp_format *format,
 		return 0;
 
 	if (yuv) {
-		sub = horz ? drm_format_horz_chroma_subsampling(pix_fmt) :
-			     drm_format_vert_chroma_subsampling(pix_fmt);
+		sub = horz ? info->hsub : info->vsub;
 		uv_filter = ((src / sub) <= dst) ?
 				   SCALE_FILTER_BIL : SCALE_FILTER_PCMN;
 	}
@@ -758,7 +754,7 @@ static void mdp5_write_pixel_ext(struct mdp5_kms *mdp5_kms, enum mdp5_pipe pipe,
 	uint32_t src_w, int pe_left[COMP_MAX], int pe_right[COMP_MAX],
 	uint32_t src_h, int pe_top[COMP_MAX], int pe_bottom[COMP_MAX])
 {
-	uint32_t pix_fmt = format->base.pixel_format;
+	const struct drm_format_info *info = drm_format_info(format->base.pixel_format);
 	uint32_t lr, tb, req;
 	int i;
 
@@ -767,8 +763,8 @@ static void mdp5_write_pixel_ext(struct mdp5_kms *mdp5_kms, enum mdp5_pipe pipe,
 		uint32_t roi_h = src_h;
 
 		if (format->is_yuv && i == COMP_1_2) {
-			roi_w /= drm_format_horz_chroma_subsampling(pix_fmt);
-			roi_h /= drm_format_vert_chroma_subsampling(pix_fmt);
+			roi_w /= info->hsub;
+			roi_h /= info->vsub;
 		}
 
 		lr  = (pe_left[i] >= 0) ?
diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_smp.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_smp.c
index 6153514db04c..2834837f4d3e 100644
--- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_smp.c
+++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_smp.c
@@ -127,14 +127,15 @@ uint32_t mdp5_smp_calculate(struct mdp5_smp *smp,
 		const struct mdp_format *format,
 		u32 width, bool hdecim)
 {
+	const struct drm_format_info *info = drm_format_info(format->base.pixel_format);
 	struct mdp5_kms *mdp5_kms = get_kms(smp);
 	int rev = mdp5_cfg_get_hw_rev(mdp5_kms->cfg);
 	int i, hsub, nplanes, nlines;
 	u32 fmt = format->base.pixel_format;
 	uint32_t blkcfg = 0;
 
-	nplanes = drm_format_num_planes(fmt);
-	hsub = drm_format_horz_chroma_subsampling(fmt);
+	nplanes = info->num_planes;
+	hsub = info->hsub;
 
 	/* different if BWC (compressed framebuffer?) enabled: */
 	nlines = 2;
@@ -157,7 +158,7 @@ uint32_t mdp5_smp_calculate(struct mdp5_smp *smp,
 	for (i = 0; i < nplanes; i++) {
 		int n, fetch_stride, cpp;
 
-		cpp = drm_format_plane_cpp(fmt, i);
+		cpp = info->cpp[i];
 		fetch_stride = width * cpp / (i ? hsub : 1);
 
 		n = DIV_ROUND_UP(fetch_stride * nlines, smp->blk_size);
diff --git a/drivers/gpu/drm/msm/msm_fb.c b/drivers/gpu/drm/msm/msm_fb.c
index 136058978e0f..68fa2c8f61e6 100644
--- a/drivers/gpu/drm/msm/msm_fb.c
+++ b/drivers/gpu/drm/msm/msm_fb.c
@@ -106,9 +106,11 @@ const struct msm_format *msm_framebuffer_format(struct drm_framebuffer *fb)
 struct drm_framebuffer *msm_framebuffer_create(struct drm_device *dev,
 		struct drm_file *file, const struct drm_mode_fb_cmd2 *mode_cmd)
 {
+	const struct drm_format_info *info = drm_get_format_info(dev,
+								 mode_cmd);
 	struct drm_gem_object *bos[4] = {0};
 	struct drm_framebuffer *fb;
-	int ret, i, n = drm_format_num_planes(mode_cmd->pixel_format);
+	int ret, i, n = info->num_planes;
 
 	for (i = 0; i < n; i++) {
 		bos[i] = drm_gem_object_lookup(file, mode_cmd->handles[i]);
@@ -135,22 +137,20 @@ out_unref:
 static struct drm_framebuffer *msm_framebuffer_init(struct drm_device *dev,
 		const struct drm_mode_fb_cmd2 *mode_cmd, struct drm_gem_object **bos)
 {
+	const struct drm_format_info *info = drm_get_format_info(dev,
+								 mode_cmd);
 	struct msm_drm_private *priv = dev->dev_private;
 	struct msm_kms *kms = priv->kms;
 	struct msm_framebuffer *msm_fb = NULL;
 	struct drm_framebuffer *fb;
 	const struct msm_format *format;
 	int ret, i, n;
-	unsigned int hsub, vsub;
 
 	DBG("create framebuffer: dev=%p, mode_cmd=%p (%dx%d@%4.4s)",
 			dev, mode_cmd, mode_cmd->width, mode_cmd->height,
 			(char *)&mode_cmd->pixel_format);
 
-	n = drm_format_num_planes(mode_cmd->pixel_format);
-	hsub = drm_format_horz_chroma_subsampling(mode_cmd->pixel_format);
-	vsub = drm_format_vert_chroma_subsampling(mode_cmd->pixel_format);
-
+	n = info->num_planes;
 	format = kms->funcs->get_format(kms, mode_cmd->pixel_format,
 			mode_cmd->modifier[0]);
 	if (!format) {
@@ -176,12 +176,12 @@ static struct drm_framebuffer *msm_framebuffer_init(struct drm_device *dev,
 	}
 
 	for (i = 0; i < n; i++) {
-		unsigned int width = mode_cmd->width / (i ? hsub : 1);
-		unsigned int height = mode_cmd->height / (i ? vsub : 1);
+		unsigned int width = mode_cmd->width / (i ? info->hsub : 1);
+		unsigned int height = mode_cmd->height / (i ? info->vsub : 1);
 		unsigned int min_size;
 
 		min_size = (height - 1) * mode_cmd->pitches[i]
-			 + width * drm_format_plane_cpp(mode_cmd->pixel_format, i)
+			 + width * info->cpp[i]
 			 + mode_cmd->offsets[i];
 
 		if (bos[i]->size < min_size) {
diff --git a/drivers/gpu/drm/nouveau/dispnv50/disp.c b/drivers/gpu/drm/nouveau/dispnv50/disp.c
index 4b1650f51955..7ba373f493b2 100644
--- a/drivers/gpu/drm/nouveau/dispnv50/disp.c
+++ b/drivers/gpu/drm/nouveau/dispnv50/disp.c
@@ -948,11 +948,12 @@ nv50_mstc_get_modes(struct drm_connector *connector)
 
 static int
 nv50_mstc_atomic_check(struct drm_connector *connector,
-		       struct drm_connector_state *new_conn_state)
+		       struct drm_atomic_state *state)
 {
-	struct drm_atomic_state *state = new_conn_state->state;
 	struct nv50_mstc *mstc = nv50_mstc(connector);
 	struct drm_dp_mst_topology_mgr *mgr = &mstc->mstm->mgr;
+	struct drm_connector_state *new_conn_state =
+		drm_atomic_get_new_connector_state(state, connector);
 	struct drm_connector_state *old_conn_state =
 		drm_atomic_get_old_connector_state(state, connector);
 	struct drm_crtc_state *crtc_state;
diff --git a/drivers/gpu/drm/nouveau/dispnv50/head.c b/drivers/gpu/drm/nouveau/dispnv50/head.c
index 06ee23823a68..48a6485ec4e0 100644
--- a/drivers/gpu/drm/nouveau/dispnv50/head.c
+++ b/drivers/gpu/drm/nouveau/dispnv50/head.c
@@ -421,16 +421,6 @@ nv50_head_atomic_duplicate_state(struct drm_crtc *crtc)
 }
 
 static void
-__drm_atomic_helper_crtc_reset(struct drm_crtc *crtc,
-			       struct drm_crtc_state *state)
-{
-	if (crtc->state)
-		crtc->funcs->atomic_destroy_state(crtc, crtc->state);
-	crtc->state = state;
-	crtc->state->crtc = crtc;
-}
-
-static void
 nv50_head_reset(struct drm_crtc *crtc)
 {
 	struct nv50_head_atom *asyh;
@@ -438,6 +428,9 @@ nv50_head_reset(struct drm_crtc *crtc)
 	if (WARN_ON(!(asyh = kzalloc(sizeof(*asyh), GFP_KERNEL))))
 		return;
 
+	if (crtc->state)
+		nv50_head_atomic_destroy_state(crtc, crtc->state);
+
 	__drm_atomic_helper_crtc_reset(crtc, &asyh->state);
 }
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bus/nv04.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bus/nv04.c
index c80b96789c31..2b44ba5cf4b0 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/bus/nv04.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bus/nv04.c
@@ -26,8 +26,6 @@
 
 #include <subdev/gpio.h>
 
-#include <subdev/gpio.h>
-
 static void
 nv04_bus_intr(struct nvkm_bus *bus)
 {
diff --git a/drivers/gpu/drm/omapdrm/displays/panel-dsi-cm.c b/drivers/gpu/drm/omapdrm/displays/panel-dsi-cm.c
index 741a5e324767..913e8291a917 100644
--- a/drivers/gpu/drm/omapdrm/displays/panel-dsi-cm.c
+++ b/drivers/gpu/drm/omapdrm/displays/panel-dsi-cm.c
@@ -410,8 +410,7 @@ static const struct backlight_ops dsicm_bl_ops = {
 static ssize_t dsicm_num_errors_show(struct device *dev,
 		struct device_attribute *attr, char *buf)
 {
-	struct platform_device *pdev = to_platform_device(dev);
-	struct panel_drv_data *ddata = platform_get_drvdata(pdev);
+	struct panel_drv_data *ddata = dev_get_drvdata(dev);
 	struct omap_dss_device *src = ddata->src;
 	u8 errors = 0;
 	int r;
@@ -442,8 +441,7 @@ static ssize_t dsicm_num_errors_show(struct device *dev,
 static ssize_t dsicm_hw_revision_show(struct device *dev,
 		struct device_attribute *attr, char *buf)
 {
-	struct platform_device *pdev = to_platform_device(dev);
-	struct panel_drv_data *ddata = platform_get_drvdata(pdev);
+	struct panel_drv_data *ddata = dev_get_drvdata(dev);
 	struct omap_dss_device *src = ddata->src;
 	u8 id1, id2, id3;
 	int r;
@@ -474,8 +472,7 @@ static ssize_t dsicm_store_ulps(struct device *dev,
 		struct device_attribute *attr,
 		const char *buf, size_t count)
 {
-	struct platform_device *pdev = to_platform_device(dev);
-	struct panel_drv_data *ddata = platform_get_drvdata(pdev);
+	struct panel_drv_data *ddata = dev_get_drvdata(dev);
 	struct omap_dss_device *src = ddata->src;
 	unsigned long t;
 	int r;
@@ -509,8 +506,7 @@ static ssize_t dsicm_show_ulps(struct device *dev,
 		struct device_attribute *attr,
 		char *buf)
 {
-	struct platform_device *pdev = to_platform_device(dev);
-	struct panel_drv_data *ddata = platform_get_drvdata(pdev);
+	struct panel_drv_data *ddata = dev_get_drvdata(dev);
 	unsigned int t;
 
 	mutex_lock(&ddata->lock);
@@ -524,8 +520,7 @@ static ssize_t dsicm_store_ulps_timeout(struct device *dev,
 		struct device_attribute *attr,
 		const char *buf, size_t count)
 {
-	struct platform_device *pdev = to_platform_device(dev);
-	struct panel_drv_data *ddata = platform_get_drvdata(pdev);
+	struct panel_drv_data *ddata = dev_get_drvdata(dev);
 	struct omap_dss_device *src = ddata->src;
 	unsigned long t;
 	int r;
@@ -556,8 +551,7 @@ static ssize_t dsicm_show_ulps_timeout(struct device *dev,
 		struct device_attribute *attr,
 		char *buf)
 {
-	struct platform_device *pdev = to_platform_device(dev);
-	struct panel_drv_data *ddata = platform_get_drvdata(pdev);
+	struct panel_drv_data *ddata = dev_get_drvdata(dev);
 	unsigned int t;
 
 	mutex_lock(&ddata->lock);
diff --git a/drivers/gpu/drm/omapdrm/dss/omapdss-boot-init.c b/drivers/gpu/drm/omapdrm/dss/omapdss-boot-init.c
index 2b41c75ce988..e02aa8e70968 100644
--- a/drivers/gpu/drm/omapdrm/dss/omapdss-boot-init.c
+++ b/drivers/gpu/drm/omapdrm/dss/omapdss-boot-init.c
@@ -198,6 +198,7 @@ static const struct of_device_id omapdss_of_fixups_whitelist[] __initconst = {
 	{ .compatible = "toppoly,td028ttec1" },
 	{ .compatible = "tpo,td028ttec1" },
 	{ .compatible = "tpo,td043mtea1" },
+	{},
 };
 
 static int __init omapdss_boot_init(void)
diff --git a/drivers/gpu/drm/omapdrm/omap_crtc.c b/drivers/gpu/drm/omapdrm/omap_crtc.c
index 5a29bf01c0e8..d61215494617 100644
--- a/drivers/gpu/drm/omapdrm/omap_crtc.c
+++ b/drivers/gpu/drm/omapdrm/omap_crtc.c
@@ -32,6 +32,7 @@ struct omap_crtc_state {
 	/* Shadow values for legacy userspace support. */
 	unsigned int rotation;
 	unsigned int zpos;
+	bool manually_updated;
 };
 
 #define to_omap_crtc(x) container_of(x, struct omap_crtc, base)
@@ -51,6 +52,10 @@ struct omap_crtc {
 	bool pending;
 	wait_queue_head_t pending_wait;
 	struct drm_pending_vblank_event *event;
+	struct delayed_work update_work;
+
+	void (*framedone_handler)(void *);
+	void *framedone_handler_data;
 };
 
 /* -----------------------------------------------------------------------------
@@ -102,21 +107,18 @@ int omap_crtc_wait_pending(struct drm_crtc *crtc)
 /*
  * Manager-ops, callbacks from output when they need to configure
  * the upstream part of the video pipe.
- *
- * Most of these we can ignore until we add support for command-mode
- * panels.. for video-mode the crtc-helpers already do an adequate
- * job of sequencing the setup of the video pipe in the proper order
  */
 
-/* we can probably ignore these until we support command-mode panels: */
 static void omap_crtc_dss_start_update(struct omap_drm_private *priv,
 				       enum omap_channel channel)
 {
+	priv->dispc_ops->mgr_enable(priv->dispc, channel, true);
 }
 
 /* Called only from the encoder enable/disable and suspend/resume handlers. */
 static void omap_crtc_set_enabled(struct drm_crtc *crtc, bool enable)
 {
+	struct omap_crtc_state *omap_state = to_omap_crtc_state(crtc->state);
 	struct drm_device *dev = crtc->dev;
 	struct omap_drm_private *priv = dev->dev_private;
 	struct omap_crtc *omap_crtc = to_omap_crtc(crtc);
@@ -128,6 +130,12 @@ static void omap_crtc_set_enabled(struct drm_crtc *crtc, bool enable)
 	if (WARN_ON(omap_crtc->enabled == enable))
 		return;
 
+	if (omap_state->manually_updated) {
+		omap_irq_enable_framedone(crtc, enable);
+		omap_crtc->enabled = enable;
+		return;
+	}
+
 	if (omap_crtc->pipe->output->type == OMAP_DISPLAY_TYPE_HDMI) {
 		priv->dispc_ops->mgr_enable(priv->dispc, channel, enable);
 		omap_crtc->enabled = enable;
@@ -230,6 +238,18 @@ static int omap_crtc_dss_register_framedone(
 		struct omap_drm_private *priv, enum omap_channel channel,
 		void (*handler)(void *), void *data)
 {
+	struct drm_crtc *crtc = priv->channels[channel]->crtc;
+	struct omap_crtc *omap_crtc = to_omap_crtc(crtc);
+	struct drm_device *dev = omap_crtc->base.dev;
+
+	if (omap_crtc->framedone_handler)
+		return -EBUSY;
+
+	dev_dbg(dev->dev, "register framedone %s", omap_crtc->name);
+
+	omap_crtc->framedone_handler = handler;
+	omap_crtc->framedone_handler_data = data;
+
 	return 0;
 }
 
@@ -237,6 +257,17 @@ static void omap_crtc_dss_unregister_framedone(
 		struct omap_drm_private *priv, enum omap_channel channel,
 		void (*handler)(void *), void *data)
 {
+	struct drm_crtc *crtc = priv->channels[channel]->crtc;
+	struct omap_crtc *omap_crtc = to_omap_crtc(crtc);
+	struct drm_device *dev = omap_crtc->base.dev;
+
+	dev_dbg(dev->dev, "unregister framedone %s", omap_crtc->name);
+
+	WARN_ON(omap_crtc->framedone_handler != handler);
+	WARN_ON(omap_crtc->framedone_handler_data != data);
+
+	omap_crtc->framedone_handler = NULL;
+	omap_crtc->framedone_handler_data = NULL;
 }
 
 static const struct dss_mgr_ops mgr_ops = {
@@ -302,6 +333,73 @@ void omap_crtc_vblank_irq(struct drm_crtc *crtc)
 	DBG("%s: apply done", omap_crtc->name);
 }
 
+void omap_crtc_framedone_irq(struct drm_crtc *crtc, uint32_t irqstatus)
+{
+	struct omap_crtc *omap_crtc = to_omap_crtc(crtc);
+
+	if (!omap_crtc->framedone_handler)
+		return;
+
+	omap_crtc->framedone_handler(omap_crtc->framedone_handler_data);
+
+	spin_lock(&crtc->dev->event_lock);
+	/* Send the vblank event if one has been requested. */
+	if (omap_crtc->event) {
+		drm_crtc_send_vblank_event(crtc, omap_crtc->event);
+		omap_crtc->event = NULL;
+	}
+	omap_crtc->pending = false;
+	spin_unlock(&crtc->dev->event_lock);
+
+	/* Wake up omap_atomic_complete. */
+	wake_up(&omap_crtc->pending_wait);
+}
+
+void omap_crtc_flush(struct drm_crtc *crtc)
+{
+	struct omap_crtc *omap_crtc = to_omap_crtc(crtc);
+	struct omap_crtc_state *omap_state = to_omap_crtc_state(crtc->state);
+
+	if (!omap_state->manually_updated)
+		return;
+
+	if (!delayed_work_pending(&omap_crtc->update_work))
+		schedule_delayed_work(&omap_crtc->update_work, 0);
+}
+
+static void omap_crtc_manual_display_update(struct work_struct *data)
+{
+	struct omap_crtc *omap_crtc =
+			container_of(data, struct omap_crtc, update_work.work);
+	struct drm_display_mode *mode = &omap_crtc->pipe->crtc->mode;
+	struct omap_dss_device *dssdev = omap_crtc->pipe->output->next;
+	struct drm_device *dev = omap_crtc->base.dev;
+	const struct omap_dss_driver *dssdrv;
+	int ret;
+
+	if (!dssdev) {
+		dev_err_once(dev->dev, "missing display dssdev!");
+		return;
+	}
+
+	dssdrv = dssdev->driver;
+	if (!dssdrv || !dssdrv->update) {
+		dev_err_once(dev->dev, "missing or incorrect dssdrv!");
+		return;
+	}
+
+	if (dssdrv->sync)
+		dssdrv->sync(dssdev);
+
+	ret = dssdrv->update(dssdev, 0, 0, mode->hdisplay, mode->vdisplay);
+	if (ret < 0) {
+		spin_lock_irq(&dev->event_lock);
+		omap_crtc->pending = false;
+		spin_unlock_irq(&dev->event_lock);
+		wake_up(&omap_crtc->pending_wait);
+	}
+}
+
 static void omap_crtc_write_crtc_properties(struct drm_crtc *crtc)
 {
 	struct omap_drm_private *priv = crtc->dev->dev_private;
@@ -351,12 +449,17 @@ static void omap_crtc_atomic_enable(struct drm_crtc *crtc,
 {
 	struct omap_drm_private *priv = crtc->dev->dev_private;
 	struct omap_crtc *omap_crtc = to_omap_crtc(crtc);
+	struct omap_crtc_state *omap_state = to_omap_crtc_state(crtc->state);
 	int ret;
 
 	DBG("%s", omap_crtc->name);
 
 	priv->dispc_ops->runtime_get(priv->dispc);
 
+	/* manual updated display will not trigger vsync irq */
+	if (omap_state->manually_updated)
+		return;
+
 	spin_lock_irq(&crtc->dev->event_lock);
 	drm_crtc_vblank_on(crtc);
 	ret = drm_crtc_vblank_get(crtc);
@@ -371,6 +474,7 @@ static void omap_crtc_atomic_disable(struct drm_crtc *crtc,
 {
 	struct omap_drm_private *priv = crtc->dev->dev_private;
 	struct omap_crtc *omap_crtc = to_omap_crtc(crtc);
+	struct drm_device *dev = crtc->dev;
 
 	DBG("%s", omap_crtc->name);
 
@@ -381,6 +485,11 @@ static void omap_crtc_atomic_disable(struct drm_crtc *crtc,
 	}
 	spin_unlock_irq(&crtc->dev->event_lock);
 
+	cancel_delayed_work(&omap_crtc->update_work);
+
+	if (!omap_crtc_wait_pending(crtc))
+		dev_warn(dev->dev, "manual display update did not finish!");
+
 	drm_crtc_vblank_off(crtc);
 
 	priv->dispc_ops->runtime_put(priv->dispc);
@@ -395,10 +504,20 @@ static enum drm_mode_status omap_crtc_mode_valid(struct drm_crtc *crtc,
 	int r;
 
 	drm_display_mode_to_videomode(mode, &vm);
-	r = priv->dispc_ops->mgr_check_timings(priv->dispc, omap_crtc->channel,
-					       &vm);
-	if (r)
-		return r;
+
+	/*
+	 * DSI might not call this, since the supplied mode is not a
+	 * valid DISPC mode. DSI will calculate and configure the
+	 * proper DISPC mode later.
+	 */
+	if (omap_crtc->pipe->output->next == NULL ||
+	    omap_crtc->pipe->output->next->type != OMAP_DISPLAY_TYPE_DSI) {
+		r = priv->dispc_ops->mgr_check_timings(priv->dispc,
+						       omap_crtc->channel,
+						       &vm);
+		if (r)
+			return r;
+	}
 
 	/* Check for bandwidth limit */
 	if (priv->max_bandwidth) {
@@ -441,6 +560,22 @@ static void omap_crtc_mode_set_nofb(struct drm_crtc *crtc)
 	drm_display_mode_to_videomode(mode, &omap_crtc->vm);
 }
 
+static bool omap_crtc_is_manually_updated(struct drm_crtc *crtc)
+{
+	struct omap_crtc *omap_crtc = to_omap_crtc(crtc);
+	struct omap_dss_device *display = omap_crtc->pipe->output->next;
+
+	if (!display)
+		return false;
+
+	if (display->caps & OMAP_DSS_DISPLAY_CAP_MANUAL_UPDATE) {
+		DBG("detected manually updated display!");
+		return true;
+	}
+
+	return false;
+}
+
 static int omap_crtc_atomic_check(struct drm_crtc *crtc,
 				struct drm_crtc_state *state)
 {
@@ -462,6 +597,9 @@ static int omap_crtc_atomic_check(struct drm_crtc *crtc,
 		/* Mirror new values for zpos and rotation in omap_crtc_state */
 		omap_crtc_state->zpos = pri_state->zpos;
 		omap_crtc_state->rotation = pri_state->rotation;
+
+		/* Check if this CRTC is for a manually updated display */
+		omap_crtc_state->manually_updated = omap_crtc_is_manually_updated(crtc);
 	}
 
 	return 0;
@@ -477,6 +615,7 @@ static void omap_crtc_atomic_flush(struct drm_crtc *crtc,
 {
 	struct omap_drm_private *priv = crtc->dev->dev_private;
 	struct omap_crtc *omap_crtc = to_omap_crtc(crtc);
+	struct omap_crtc_state *omap_crtc_state = to_omap_crtc_state(crtc->state);
 	int ret;
 
 	if (crtc->state->color_mgmt_changed) {
@@ -501,6 +640,15 @@ static void omap_crtc_atomic_flush(struct drm_crtc *crtc,
 
 	DBG("%s: GO", omap_crtc->name);
 
+	if (omap_crtc_state->manually_updated) {
+		/* send new image for page flips and modeset changes */
+		spin_lock_irq(&crtc->dev->event_lock);
+		omap_crtc_flush(crtc);
+		omap_crtc_arm_event(crtc);
+		spin_unlock_irq(&crtc->dev->event_lock);
+		return;
+	}
+
 	ret = drm_crtc_vblank_get(crtc);
 	WARN_ON(ret != 0);
 
@@ -586,6 +734,7 @@ omap_crtc_duplicate_state(struct drm_crtc *crtc)
 
 	state->zpos = current_state->zpos;
 	state->rotation = current_state->rotation;
+	state->manually_updated = current_state->manually_updated;
 
 	return &state->base;
 }
@@ -662,6 +811,19 @@ struct drm_crtc *omap_crtc_init(struct drm_device *dev,
 	omap_crtc->channel = channel;
 	omap_crtc->name = channel_names[channel];
 
+	/*
+	 * We want to refresh manually updated displays from dirty callback,
+	 * which is called quite often (e.g. for each drawn line). This will
+	 * be used to do the display update asynchronously to avoid blocking
+	 * the rendering process and merges multiple dirty calls into one
+	 * update if they arrive very fast. We also call this function for
+	 * atomic display updates (e.g. for page flips), which means we do
+	 * not need extra locking. Atomic updates should be synchronous, but
+	 * need to wait for the framedone interrupt anyways.
+	 */
+	INIT_DELAYED_WORK(&omap_crtc->update_work,
+			  omap_crtc_manual_display_update);
+
 	ret = drm_crtc_init_with_planes(dev, crtc, plane, NULL,
 					&omap_crtc_funcs, NULL);
 	if (ret < 0) {
diff --git a/drivers/gpu/drm/omapdrm/omap_crtc.h b/drivers/gpu/drm/omapdrm/omap_crtc.h
index d9de437ba9dd..2b518c74203e 100644
--- a/drivers/gpu/drm/omapdrm/omap_crtc.h
+++ b/drivers/gpu/drm/omapdrm/omap_crtc.h
@@ -41,5 +41,7 @@ struct drm_crtc *omap_crtc_init(struct drm_device *dev,
 int omap_crtc_wait_pending(struct drm_crtc *crtc);
 void omap_crtc_error_irq(struct drm_crtc *crtc, u32 irqstatus);
 void omap_crtc_vblank_irq(struct drm_crtc *crtc);
+void omap_crtc_framedone_irq(struct drm_crtc *crtc, uint32_t irqstatus);
+void omap_crtc_flush(struct drm_crtc *crtc);
 
 #endif /* __OMAPDRM_CRTC_H__ */
diff --git a/drivers/gpu/drm/omapdrm/omap_drv.c b/drivers/gpu/drm/omapdrm/omap_drv.c
index 1b9b6f5e48e1..672e0f8ad11c 100644
--- a/drivers/gpu/drm/omapdrm/omap_drv.c
+++ b/drivers/gpu/drm/omapdrm/omap_drv.c
@@ -439,20 +439,6 @@ static int ioctl_get_param(struct drm_device *dev, void *data,
 	return 0;
 }
 
-static int ioctl_set_param(struct drm_device *dev, void *data,
-		struct drm_file *file_priv)
-{
-	struct drm_omap_param *args = data;
-
-	switch (args->param) {
-	default:
-		DBG("unknown parameter %lld", args->param);
-		return -EINVAL;
-	}
-
-	return 0;
-}
-
 #define OMAP_BO_USER_MASK	0x00ffffff	/* flags settable by userspace */
 
 static int ioctl_gem_new(struct drm_device *dev, void *data,
@@ -492,7 +478,7 @@ static int ioctl_gem_info(struct drm_device *dev, void *data,
 static const struct drm_ioctl_desc ioctls[DRM_COMMAND_END - DRM_COMMAND_BASE] = {
 	DRM_IOCTL_DEF_DRV(OMAP_GET_PARAM, ioctl_get_param,
 			  DRM_AUTH | DRM_RENDER_ALLOW),
-	DRM_IOCTL_DEF_DRV(OMAP_SET_PARAM, ioctl_set_param,
+	DRM_IOCTL_DEF_DRV(OMAP_SET_PARAM, drm_invalid_op,
 			  DRM_AUTH | DRM_MASTER | DRM_ROOT_ONLY),
 	DRM_IOCTL_DEF_DRV(OMAP_GEM_NEW, ioctl_gem_new,
 			  DRM_AUTH | DRM_RENDER_ALLOW),
diff --git a/drivers/gpu/drm/omapdrm/omap_drv.h b/drivers/gpu/drm/omapdrm/omap_drv.h
index 3cca45cb25f3..896aa12f09b2 100644
--- a/drivers/gpu/drm/omapdrm/omap_drv.h
+++ b/drivers/gpu/drm/omapdrm/omap_drv.h
@@ -37,8 +37,8 @@
 #include "omap_irq.h"
 #include "omap_plane.h"
 
-#define DBG(fmt, ...) DRM_DEBUG(fmt"\n", ##__VA_ARGS__)
-#define VERB(fmt, ...) if (0) DRM_DEBUG(fmt, ##__VA_ARGS__) /* verbose debug */
+#define DBG(fmt, ...) DRM_DEBUG_DRIVER(fmt"\n", ##__VA_ARGS__)
+#define VERB(fmt, ...) if (0) DRM_DEBUG_DRIVER(fmt, ##__VA_ARGS__) /* verbose debug */
 
 #define MODULE_NAME     "omapdrm"
 
diff --git a/drivers/gpu/drm/omapdrm/omap_fb.c b/drivers/gpu/drm/omapdrm/omap_fb.c
index 4f8eb9d08f99..06d5c5081e41 100644
--- a/drivers/gpu/drm/omapdrm/omap_fb.c
+++ b/drivers/gpu/drm/omapdrm/omap_fb.c
@@ -66,8 +66,27 @@ struct omap_framebuffer {
 	struct mutex lock;
 };
 
+static int omap_framebuffer_dirty(struct drm_framebuffer *fb,
+				  struct drm_file *file_priv,
+				  unsigned flags, unsigned color,
+				  struct drm_clip_rect *clips,
+				  unsigned num_clips)
+{
+	struct drm_crtc *crtc;
+
+	drm_modeset_lock_all(fb->dev);
+
+	drm_for_each_crtc(crtc, fb->dev)
+		omap_crtc_flush(crtc);
+
+	drm_modeset_unlock_all(fb->dev);
+
+	return 0;
+}
+
 static const struct drm_framebuffer_funcs omap_framebuffer_funcs = {
 	.create_handle = drm_gem_fb_create_handle,
+	.dirty = omap_framebuffer_dirty,
 	.destroy = drm_gem_fb_destroy,
 };
 
@@ -298,7 +317,9 @@ void omap_framebuffer_describe(struct drm_framebuffer *fb, struct seq_file *m)
 struct drm_framebuffer *omap_framebuffer_create(struct drm_device *dev,
 		struct drm_file *file, const struct drm_mode_fb_cmd2 *mode_cmd)
 {
-	unsigned int num_planes = drm_format_num_planes(mode_cmd->pixel_format);
+	const struct drm_format_info *info = drm_get_format_info(dev,
+								 mode_cmd);
+	unsigned int num_planes = info->num_planes;
 	struct drm_gem_object *bos[4];
 	struct drm_framebuffer *fb;
 	int i;
@@ -337,7 +358,7 @@ struct drm_framebuffer *omap_framebuffer_init(struct drm_device *dev,
 			dev, mode_cmd, mode_cmd->width, mode_cmd->height,
 			(char *)&mode_cmd->pixel_format);
 
-	format = drm_format_info(mode_cmd->pixel_format);
+	format = drm_get_format_info(dev, mode_cmd);
 
 	for (i = 0; i < ARRAY_SIZE(formats); i++) {
 		if (formats[i] == mode_cmd->pixel_format)
diff --git a/drivers/gpu/drm/omapdrm/omap_irq.c b/drivers/gpu/drm/omapdrm/omap_irq.c
index 329ad26d6d50..01dda84ca2ee 100644
--- a/drivers/gpu/drm/omapdrm/omap_irq.c
+++ b/drivers/gpu/drm/omapdrm/omap_irq.c
@@ -85,6 +85,28 @@ int omap_irq_wait(struct drm_device *dev, struct omap_irq_wait *wait,
 	return ret == 0 ? -1 : 0;
 }
 
+int omap_irq_enable_framedone(struct drm_crtc *crtc, bool enable)
+{
+	struct drm_device *dev = crtc->dev;
+	struct omap_drm_private *priv = dev->dev_private;
+	unsigned long flags;
+	enum omap_channel channel = omap_crtc_channel(crtc);
+	int framedone_irq =
+		priv->dispc_ops->mgr_get_framedone_irq(priv->dispc, channel);
+
+	DBG("dev=%p, crtc=%u, enable=%d", dev, channel, enable);
+
+	spin_lock_irqsave(&priv->wait_lock, flags);
+	if (enable)
+		priv->irq_mask |= framedone_irq;
+	else
+		priv->irq_mask &= ~framedone_irq;
+	omap_irq_update(dev);
+	spin_unlock_irqrestore(&priv->wait_lock, flags);
+
+	return 0;
+}
+
 /**
  * enable_vblank - enable vblank interrupt events
  * @dev: DRM device
@@ -217,6 +239,9 @@ static irqreturn_t omap_irq_handler(int irq, void *arg)
 
 		if (irqstatus & priv->dispc_ops->mgr_get_sync_lost_irq(priv->dispc, channel))
 			omap_crtc_error_irq(crtc, irqstatus);
+
+		if (irqstatus & priv->dispc_ops->mgr_get_framedone_irq(priv->dispc, channel))
+			omap_crtc_framedone_irq(crtc, irqstatus);
 	}
 
 	omap_irq_ocp_error_handler(dev, irqstatus);
diff --git a/drivers/gpu/drm/omapdrm/omap_irq.h b/drivers/gpu/drm/omapdrm/omap_irq.h
index 9d5441468eca..02abb4ed9813 100644
--- a/drivers/gpu/drm/omapdrm/omap_irq.h
+++ b/drivers/gpu/drm/omapdrm/omap_irq.h
@@ -27,6 +27,7 @@ struct drm_device;
 struct omap_irq_wait;
 
 int omap_irq_enable_vblank(struct drm_crtc *crtc);
+int omap_irq_enable_framedone(struct drm_crtc *crtc, bool enable);
 void omap_irq_disable_vblank(struct drm_crtc *crtc);
 void omap_drm_irq_uninstall(struct drm_device *dev);
 int omap_drm_irq_install(struct drm_device *dev);
diff --git a/drivers/gpu/drm/panel/Kconfig b/drivers/gpu/drm/panel/Kconfig
index e281fc544742..d9d931aa6e26 100644
--- a/drivers/gpu/drm/panel/Kconfig
+++ b/drivers/gpu/drm/panel/Kconfig
@@ -132,6 +132,15 @@ config DRM_PANEL_ORISETECH_OTM8009A
 	  Say Y here if you want to enable support for Orise Technology
 	  otm8009a 480x800 dsi 2dl panel.
 
+config DRM_PANEL_OSD_OSD101T2587_53TS
+	tristate "OSD OSD101T2587-53TS DSI 1920x1200 video mode panel"
+	depends on OF
+	depends on DRM_MIPI_DSI
+	depends on BACKLIGHT_CLASS_DEVICE
+	help
+	  Say Y here if you want to enable support for One Stop Displays
+	  OSD101T2587-53TS 10.1" 1920x1200 dsi panel.
+
 config DRM_PANEL_PANASONIC_VVX10F034N00
 	tristate "Panasonic VVX10F034N00 1920x1200 video mode panel"
 	depends on OF
@@ -201,6 +210,15 @@ config DRM_PANEL_SAMSUNG_S6E63J0X03
 	depends on BACKLIGHT_CLASS_DEVICE
 	select VIDEOMODE_HELPERS
 
+config DRM_PANEL_SAMSUNG_S6E63M0
+	tristate "Samsung S6E63M0 RGB/SPI panel"
+	depends on OF
+	depends on SPI
+	depends on BACKLIGHT_CLASS_DEVICE
+	help
+	  Say Y here if you want to enable support for Samsung S6E63M0
+	  AMOLED LCD panel.
+
 config DRM_PANEL_SAMSUNG_S6E8AA0
 	tristate "Samsung S6E8AA0 DSI video mode panel"
 	depends on OF
diff --git a/drivers/gpu/drm/panel/Makefile b/drivers/gpu/drm/panel/Makefile
index 78e3dc376bdd..fb0cb3aaa9e6 100644
--- a/drivers/gpu/drm/panel/Makefile
+++ b/drivers/gpu/drm/panel/Makefile
@@ -11,6 +11,7 @@ obj-$(CONFIG_DRM_PANEL_KINGDISPLAY_KD097D04) += panel-kingdisplay-kd097d04.o
 obj-$(CONFIG_DRM_PANEL_LG_LG4573) += panel-lg-lg4573.o
 obj-$(CONFIG_DRM_PANEL_OLIMEX_LCD_OLINUXINO) += panel-olimex-lcd-olinuxino.o
 obj-$(CONFIG_DRM_PANEL_ORISETECH_OTM8009A) += panel-orisetech-otm8009a.o
+obj-$(CONFIG_DRM_PANEL_OSD_OSD101T2587_53TS) += panel-osd-osd101t2587-53ts.o
 obj-$(CONFIG_DRM_PANEL_PANASONIC_VVX10F034N00) += panel-panasonic-vvx10f034n00.o
 obj-$(CONFIG_DRM_PANEL_RASPBERRYPI_TOUCHSCREEN) += panel-raspberrypi-touchscreen.o
 obj-$(CONFIG_DRM_PANEL_RAYDIUM_RM68200) += panel-raydium-rm68200.o
@@ -20,6 +21,7 @@ obj-$(CONFIG_DRM_PANEL_SAMSUNG_LD9040) += panel-samsung-ld9040.o
 obj-$(CONFIG_DRM_PANEL_SAMSUNG_S6D16D0) += panel-samsung-s6d16d0.o
 obj-$(CONFIG_DRM_PANEL_SAMSUNG_S6E3HA2) += panel-samsung-s6e3ha2.o
 obj-$(CONFIG_DRM_PANEL_SAMSUNG_S6E63J0X03) += panel-samsung-s6e63j0x03.o
+obj-$(CONFIG_DRM_PANEL_SAMSUNG_S6E63M0) += panel-samsung-s6e63m0.o
 obj-$(CONFIG_DRM_PANEL_SAMSUNG_S6E8AA0) += panel-samsung-s6e8aa0.o
 obj-$(CONFIG_DRM_PANEL_SEIKO_43WVF1G) += panel-seiko-43wvf1g.o
 obj-$(CONFIG_DRM_PANEL_SHARP_LQ101R1SX01) += panel-sharp-lq101r1sx01.o
diff --git a/drivers/gpu/drm/panel/panel-arm-versatile.c b/drivers/gpu/drm/panel/panel-arm-versatile.c
index a79908dfa3c8..5f72c922a04b 100644
--- a/drivers/gpu/drm/panel/panel-arm-versatile.c
+++ b/drivers/gpu/drm/panel/panel-arm-versatile.c
@@ -25,13 +25,12 @@
  * Epson QCIF display.
  *
  */
-#include <drm/drmP.h>
-#include <drm/drm_panel.h>
 
 #include <linux/bitops.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/mfd/syscon.h>
+#include <linux/mod_devicetable.h>
 #include <linux/module.h>
 #include <linux/platform_device.h>
 #include <linux/regmap.h>
@@ -39,6 +38,9 @@
 #include <video/of_videomode.h>
 #include <video/videomode.h>
 
+#include <drm/drm_modes.h>
+#include <drm/drm_panel.h>
+
 /*
  * This configuration register in the Versatile and RealView
  * family is uniformly present but appears more and more
diff --git a/drivers/gpu/drm/panel/panel-ilitek-ili9322.c b/drivers/gpu/drm/panel/panel-ilitek-ili9322.c
index a1c4cd2940fb..35a4bd05edf5 100644
--- a/drivers/gpu/drm/panel/panel-ilitek-ili9322.c
+++ b/drivers/gpu/drm/panel/panel-ilitek-ili9322.c
@@ -22,13 +22,10 @@
  * published by the Free Software Foundation.
  */
 
-#include <drm/drmP.h>
-#include <drm/drm_panel.h>
-
-#include <linux/of_device.h>
 #include <linux/bitops.h>
 #include <linux/gpio/consumer.h>
 #include <linux/module.h>
+#include <linux/of_device.h>
 #include <linux/regmap.h>
 #include <linux/regulator/consumer.h>
 #include <linux/spi/spi.h>
@@ -37,6 +34,10 @@
 #include <video/of_videomode.h>
 #include <video/videomode.h>
 
+#include <drm/drm_modes.h>
+#include <drm/drm_panel.h>
+#include <drm/drm_print.h>
+
 #define ILI9322_CHIP_ID			0x00
 #define ILI9322_CHIP_ID_MAGIC		0x96
 
diff --git a/drivers/gpu/drm/panel/panel-innolux-p079zca.c b/drivers/gpu/drm/panel/panel-innolux-p079zca.c
index 0daafda39df7..d92d1c98878c 100644
--- a/drivers/gpu/drm/panel/panel-innolux-p079zca.c
+++ b/drivers/gpu/drm/panel/panel-innolux-p079zca.c
@@ -4,18 +4,21 @@
  */
 
 #include <linux/backlight.h>
+#include <linux/delay.h>
 #include <linux/gpio/consumer.h>
 #include <linux/module.h>
 #include <linux/of.h>
 #include <linux/of_device.h>
 #include <linux/regulator/consumer.h>
 
-#include <drm/drmP.h>
+#include <video/mipi_display.h>
+
 #include <drm/drm_crtc.h>
+#include <drm/drm_device.h>
 #include <drm/drm_mipi_dsi.h>
+#include <drm/drm_modes.h>
 #include <drm/drm_panel.h>
-
-#include <video/mipi_display.h>
+#include <drm/drm_print.h>
 
 struct panel_init_cmd {
 	size_t len;
@@ -51,7 +54,6 @@ struct innolux_panel {
 
 	struct backlight_device *backlight;
 	struct regulator_bulk_data *supplies;
-	unsigned int num_supplies;
 	struct gpio_desc *enable_gpio;
 
 	bool prepared;
diff --git a/drivers/gpu/drm/panel/panel-jdi-lt070me05000.c b/drivers/gpu/drm/panel/panel-jdi-lt070me05000.c
index 99caa7835e7b..ee5ddf771e10 100644
--- a/drivers/gpu/drm/panel/panel-jdi-lt070me05000.c
+++ b/drivers/gpu/drm/panel/panel-jdi-lt070me05000.c
@@ -21,19 +21,21 @@
  * You should have received a copy of the GNU General Public License along with
  * this program.  If not, see <http://www.gnu.org/licenses/>.
  */
+
 #include <linux/backlight.h>
+#include <linux/delay.h>
 #include <linux/gpio/consumer.h>
 #include <linux/module.h>
 #include <linux/of.h>
 #include <linux/regulator/consumer.h>
 
-#include <drm/drmP.h>
+#include <video/mipi_display.h>
+
 #include <drm/drm_crtc.h>
 #include <drm/drm_mipi_dsi.h>
+#include <drm/drm_modes.h>
 #include <drm/drm_panel.h>
 
-#include <video/mipi_display.h>
-
 static const char * const regulator_names[] = {
 	"vddp",
 	"iovcc"
diff --git a/drivers/gpu/drm/panel/panel-kingdisplay-kd097d04.c b/drivers/gpu/drm/panel/panel-kingdisplay-kd097d04.c
index 2a25a914d09e..3ac04eb8d0fe 100644
--- a/drivers/gpu/drm/panel/panel-kingdisplay-kd097d04.c
+++ b/drivers/gpu/drm/panel/panel-kingdisplay-kd097d04.c
@@ -4,17 +4,20 @@
  */
 
 #include <linux/backlight.h>
+#include <linux/delay.h>
 #include <linux/gpio/consumer.h>
 #include <linux/module.h>
 #include <linux/of.h>
 #include <linux/regulator/consumer.h>
 
-#include <drm/drmP.h>
+#include <video/mipi_display.h>
+
 #include <drm/drm_crtc.h>
+#include <drm/drm_device.h>
 #include <drm/drm_mipi_dsi.h>
+#include <drm/drm_modes.h>
 #include <drm/drm_panel.h>
-
-#include <video/mipi_display.h>
+#include <drm/drm_print.h>
 
 struct kingdisplay_panel {
 	struct drm_panel base;
diff --git a/drivers/gpu/drm/panel/panel-lg-lg4573.c b/drivers/gpu/drm/panel/panel-lg-lg4573.c
index 6989238b276a..0dd4bdda7c4e 100644
--- a/drivers/gpu/drm/panel/panel-lg-lg4573.c
+++ b/drivers/gpu/drm/panel/panel-lg-lg4573.c
@@ -15,10 +15,9 @@
  * published by the Free Software Foundation.
 */
 
-#include <drm/drmP.h>
-#include <drm/drm_panel.h>
-
+#include <linux/delay.h>
 #include <linux/gpio/consumer.h>
+#include <linux/module.h>
 #include <linux/regulator/consumer.h>
 #include <linux/spi/spi.h>
 
@@ -26,6 +25,10 @@
 #include <video/of_videomode.h>
 #include <video/videomode.h>
 
+#include <drm/drm_device.h>
+#include <drm/drm_modes.h>
+#include <drm/drm_panel.h>
+
 struct lg4573 {
 	struct drm_panel panel;
 	struct spi_device *spi;
diff --git a/drivers/gpu/drm/panel/panel-lvds.c b/drivers/gpu/drm/panel/panel-lvds.c
index 3f6550e6b6a4..1ec57d0806a8 100644
--- a/drivers/gpu/drm/panel/panel-lvds.c
+++ b/drivers/gpu/drm/panel/panel-lvds.c
@@ -16,14 +16,13 @@
 #include <linux/regulator/consumer.h>
 #include <linux/slab.h>
 
-#include <drm/drmP.h>
-#include <drm/drm_crtc.h>
-#include <drm/drm_panel.h>
-
 #include <video/display_timing.h>
 #include <video/of_display_timing.h>
 #include <video/videomode.h>
 
+#include <drm/drm_crtc.h>
+#include <drm/drm_panel.h>
+
 struct panel_lvds {
 	struct drm_panel panel;
 	struct device *dev;
diff --git a/drivers/gpu/drm/panel/panel-olimex-lcd-olinuxino.c b/drivers/gpu/drm/panel/panel-olimex-lcd-olinuxino.c
index a1d8d92fac2b..2bae1db3ff34 100644
--- a/drivers/gpu/drm/panel/panel-olimex-lcd-olinuxino.c
+++ b/drivers/gpu/drm/panel/panel-olimex-lcd-olinuxino.c
@@ -15,13 +15,13 @@
 #include <linux/of.h>
 #include <linux/regulator/consumer.h>
 
-#include <drm/drm_modes.h>
-#include <drm/drm_panel.h>
-#include <drm/drmP.h>
-
 #include <video/videomode.h>
 #include <video/display_timing.h>
 
+#include <drm/drm_device.h>
+#include <drm/drm_modes.h>
+#include <drm/drm_panel.h>
+
 #define LCD_OLINUXINO_HEADER_MAGIC	0x4F4CB727
 #define LCD_OLINUXINO_DATA_LEN		256
 
diff --git a/drivers/gpu/drm/panel/panel-orisetech-otm8009a.c b/drivers/gpu/drm/panel/panel-orisetech-otm8009a.c
index f27a7e426574..c7b48df8869a 100644
--- a/drivers/gpu/drm/panel/panel-orisetech-otm8009a.c
+++ b/drivers/gpu/drm/panel/panel-orisetech-otm8009a.c
@@ -6,14 +6,19 @@
  *          Yannick Fertre <yannick.fertre@st.com>
  */
 
-#include <drm/drmP.h>
-#include <drm/drm_mipi_dsi.h>
-#include <drm/drm_panel.h>
 #include <linux/backlight.h>
+#include <linux/delay.h>
 #include <linux/gpio/consumer.h>
+#include <linux/module.h>
 #include <linux/regulator/consumer.h>
+
 #include <video/mipi_display.h>
 
+#include <drm/drm_mipi_dsi.h>
+#include <drm/drm_modes.h>
+#include <drm/drm_panel.h>
+#include <drm/drm_print.h>
+
 #define OTM8009A_BACKLIGHT_DEFAULT	240
 #define OTM8009A_BACKLIGHT_MAX		255
 
diff --git a/drivers/gpu/drm/panel/panel-osd-osd101t2587-53ts.c b/drivers/gpu/drm/panel/panel-osd-osd101t2587-53ts.c
new file mode 100644
index 000000000000..e0e20ecff916
--- /dev/null
+++ b/drivers/gpu/drm/panel/panel-osd-osd101t2587-53ts.c
@@ -0,0 +1,254 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *  Copyright (C) 2019 Texas Instruments Incorporated - http://www.ti.com
+ *  Author: Peter Ujfalusi <peter.ujfalusi@ti.com>
+ */
+
+#include <linux/backlight.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/regulator/consumer.h>
+
+#include <drm/drm_crtc.h>
+#include <drm/drm_device.h>
+#include <drm/drm_mipi_dsi.h>
+#include <drm/drm_panel.h>
+
+#include <video/mipi_display.h>
+
+struct osd101t2587_panel {
+	struct drm_panel base;
+	struct mipi_dsi_device *dsi;
+
+	struct backlight_device *backlight;
+	struct regulator *supply;
+
+	bool prepared;
+	bool enabled;
+
+	const struct drm_display_mode *default_mode;
+};
+
+static inline struct osd101t2587_panel *ti_osd_panel(struct drm_panel *panel)
+{
+	return container_of(panel, struct osd101t2587_panel, base);
+}
+
+static int osd101t2587_panel_disable(struct drm_panel *panel)
+{
+	struct osd101t2587_panel *osd101t2587 = ti_osd_panel(panel);
+	int ret;
+
+	if (!osd101t2587->enabled)
+		return 0;
+
+	backlight_disable(osd101t2587->backlight);
+
+	ret = mipi_dsi_shutdown_peripheral(osd101t2587->dsi);
+
+	osd101t2587->enabled = false;
+
+	return ret;
+}
+
+static int osd101t2587_panel_unprepare(struct drm_panel *panel)
+{
+	struct osd101t2587_panel *osd101t2587 = ti_osd_panel(panel);
+
+	if (!osd101t2587->prepared)
+		return 0;
+
+	regulator_disable(osd101t2587->supply);
+	osd101t2587->prepared = false;
+
+	return 0;
+}
+
+static int osd101t2587_panel_prepare(struct drm_panel *panel)
+{
+	struct osd101t2587_panel *osd101t2587 = ti_osd_panel(panel);
+	int ret;
+
+	if (osd101t2587->prepared)
+		return 0;
+
+	ret = regulator_enable(osd101t2587->supply);
+	if (!ret)
+		osd101t2587->prepared = true;
+
+	return ret;
+}
+
+static int osd101t2587_panel_enable(struct drm_panel *panel)
+{
+	struct osd101t2587_panel *osd101t2587 = ti_osd_panel(panel);
+	int ret;
+
+	if (osd101t2587->enabled)
+		return 0;
+
+	ret = mipi_dsi_turn_on_peripheral(osd101t2587->dsi);
+	if (ret)
+		return ret;
+
+	backlight_enable(osd101t2587->backlight);
+
+	osd101t2587->enabled = true;
+
+	return ret;
+}
+
+static const struct drm_display_mode default_mode_osd101t2587 = {
+	.clock = 164400,
+	.hdisplay = 1920,
+	.hsync_start = 1920 + 152,
+	.hsync_end = 1920 + 152 + 52,
+	.htotal = 1920 + 152 + 52 + 20,
+	.vdisplay = 1200,
+	.vsync_start = 1200 + 24,
+	.vsync_end = 1200 + 24 + 6,
+	.vtotal = 1200 + 24 + 6 + 48,
+	.vrefresh = 60,
+	.flags = DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC,
+};
+
+static int osd101t2587_panel_get_modes(struct drm_panel *panel)
+{
+	struct osd101t2587_panel *osd101t2587 = ti_osd_panel(panel);
+	struct drm_display_mode *mode;
+
+	mode = drm_mode_duplicate(panel->drm, osd101t2587->default_mode);
+	if (!mode) {
+		dev_err(panel->drm->dev, "failed to add mode %ux%ux@%u\n",
+			osd101t2587->default_mode->hdisplay,
+			osd101t2587->default_mode->vdisplay,
+			osd101t2587->default_mode->vrefresh);
+		return -ENOMEM;
+	}
+
+	drm_mode_set_name(mode);
+
+	drm_mode_probed_add(panel->connector, mode);
+
+	panel->connector->display_info.width_mm = 217;
+	panel->connector->display_info.height_mm = 136;
+
+	return 1;
+}
+
+static const struct drm_panel_funcs osd101t2587_panel_funcs = {
+	.disable = osd101t2587_panel_disable,
+	.unprepare = osd101t2587_panel_unprepare,
+	.prepare = osd101t2587_panel_prepare,
+	.enable = osd101t2587_panel_enable,
+	.get_modes = osd101t2587_panel_get_modes,
+};
+
+static const struct of_device_id osd101t2587_of_match[] = {
+	{
+		.compatible = "osddisplays,osd101t2587-53ts",
+		.data = &default_mode_osd101t2587,
+	}, {
+		/* sentinel */
+	}
+};
+MODULE_DEVICE_TABLE(of, osd101t2587_of_match);
+
+static int osd101t2587_panel_add(struct osd101t2587_panel *osd101t2587)
+{
+	struct device *dev = &osd101t2587->dsi->dev;
+
+	osd101t2587->supply = devm_regulator_get(dev, "power");
+	if (IS_ERR(osd101t2587->supply))
+		return PTR_ERR(osd101t2587->supply);
+
+	osd101t2587->backlight = devm_of_find_backlight(dev);
+	if (IS_ERR(osd101t2587->backlight))
+		return PTR_ERR(osd101t2587->backlight);
+
+	drm_panel_init(&osd101t2587->base);
+	osd101t2587->base.funcs = &osd101t2587_panel_funcs;
+	osd101t2587->base.dev = &osd101t2587->dsi->dev;
+
+	return drm_panel_add(&osd101t2587->base);
+}
+
+static int osd101t2587_panel_probe(struct mipi_dsi_device *dsi)
+{
+	struct osd101t2587_panel *osd101t2587;
+	const struct of_device_id *id;
+	int ret;
+
+	id = of_match_node(osd101t2587_of_match, dsi->dev.of_node);
+	if (!id)
+		return -ENODEV;
+
+	dsi->lanes = 4;
+	dsi->format = MIPI_DSI_FMT_RGB888;
+	dsi->mode_flags = MIPI_DSI_MODE_VIDEO |
+			  MIPI_DSI_MODE_VIDEO_BURST |
+			  MIPI_DSI_MODE_VIDEO_SYNC_PULSE |
+			  MIPI_DSI_MODE_EOT_PACKET;
+
+	osd101t2587 = devm_kzalloc(&dsi->dev, sizeof(*osd101t2587), GFP_KERNEL);
+	if (!osd101t2587)
+		return -ENOMEM;
+
+	mipi_dsi_set_drvdata(dsi, osd101t2587);
+
+	osd101t2587->dsi = dsi;
+	osd101t2587->default_mode = id->data;
+
+	ret = osd101t2587_panel_add(osd101t2587);
+	if (ret < 0)
+		return ret;
+
+	ret = mipi_dsi_attach(dsi);
+	if (ret)
+		drm_panel_remove(&osd101t2587->base);
+
+	return ret;
+}
+
+static int osd101t2587_panel_remove(struct mipi_dsi_device *dsi)
+{
+	struct osd101t2587_panel *osd101t2587 = mipi_dsi_get_drvdata(dsi);
+	int ret;
+
+	ret = osd101t2587_panel_disable(&osd101t2587->base);
+	if (ret < 0)
+		dev_warn(&dsi->dev, "failed to disable panel: %d\n", ret);
+
+	osd101t2587_panel_unprepare(&osd101t2587->base);
+
+	drm_panel_remove(&osd101t2587->base);
+
+	ret = mipi_dsi_detach(dsi);
+	if (ret < 0)
+		dev_err(&dsi->dev, "failed to detach from DSI host: %d\n", ret);
+
+	return ret;
+}
+
+static void osd101t2587_panel_shutdown(struct mipi_dsi_device *dsi)
+{
+	struct osd101t2587_panel *osd101t2587 = mipi_dsi_get_drvdata(dsi);
+
+	osd101t2587_panel_disable(&osd101t2587->base);
+	osd101t2587_panel_unprepare(&osd101t2587->base);
+}
+
+static struct mipi_dsi_driver osd101t2587_panel_driver = {
+	.driver = {
+		.name = "panel-osd-osd101t2587-53ts",
+		.of_match_table = osd101t2587_of_match,
+	},
+	.probe = osd101t2587_panel_probe,
+	.remove = osd101t2587_panel_remove,
+	.shutdown = osd101t2587_panel_shutdown,
+};
+module_mipi_dsi_driver(osd101t2587_panel_driver);
+
+MODULE_AUTHOR("Peter Ujfalusi <peter.ujfalusi@ti.com>");
+MODULE_DESCRIPTION("OSD101T2587-53TS DSI panel");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/gpu/drm/panel/panel-panasonic-vvx10f034n00.c b/drivers/gpu/drm/panel/panel-panasonic-vvx10f034n00.c
index cb4dfb98be0f..045df41dbde2 100644
--- a/drivers/gpu/drm/panel/panel-panasonic-vvx10f034n00.c
+++ b/drivers/gpu/drm/panel/panel-panasonic-vvx10f034n00.c
@@ -19,17 +19,18 @@
  */
 
 #include <linux/backlight.h>
+#include <linux/delay.h>
 #include <linux/module.h>
 #include <linux/of.h>
 #include <linux/regulator/consumer.h>
 
-#include <drm/drmP.h>
+#include <video/mipi_display.h>
+
 #include <drm/drm_crtc.h>
+#include <drm/drm_device.h>
 #include <drm/drm_mipi_dsi.h>
 #include <drm/drm_panel.h>
 
-#include <video/mipi_display.h>
-
 /*
  * When power is turned off to this panel a minimum off time of 500ms has to be
  * observed before powering back on as there's no external reset pin. Keep
diff --git a/drivers/gpu/drm/panel/panel-raspberrypi-touchscreen.c b/drivers/gpu/drm/panel/panel-raspberrypi-touchscreen.c
index 2c9c9722734f..28c0620dfe0f 100644
--- a/drivers/gpu/drm/panel/panel-raspberrypi-touchscreen.c
+++ b/drivers/gpu/drm/panel/panel-raspberrypi-touchscreen.c
@@ -53,9 +53,8 @@
 #include <linux/of_graph.h>
 #include <linux/pm.h>
 
-#include <drm/drm_panel.h>
-#include <drm/drmP.h>
 #include <drm/drm_crtc.h>
+#include <drm/drm_device.h>
 #include <drm/drm_mipi_dsi.h>
 #include <drm/drm_panel.h>
 
diff --git a/drivers/gpu/drm/panel/panel-raydium-rm68200.c b/drivers/gpu/drm/panel/panel-raydium-rm68200.c
index 14186827e591..ba889625ad43 100644
--- a/drivers/gpu/drm/panel/panel-raydium-rm68200.c
+++ b/drivers/gpu/drm/panel/panel-raydium-rm68200.c
@@ -7,14 +7,17 @@
  */
 
 #include <linux/backlight.h>
+#include <linux/delay.h>
 #include <linux/gpio/consumer.h>
+#include <linux/module.h>
 #include <linux/regulator/consumer.h>
 
 #include <video/mipi_display.h>
 
-#include <drm/drmP.h>
 #include <drm/drm_mipi_dsi.h>
+#include <drm/drm_modes.h>
 #include <drm/drm_panel.h>
+#include <drm/drm_print.h>
 
 /*** Manufacturer Command Set ***/
 #define MCS_CMD_MODE_SW		0xFE /* CMD Mode Switch */
diff --git a/drivers/gpu/drm/panel/panel-rocktech-jh057n00900.c b/drivers/gpu/drm/panel/panel-rocktech-jh057n00900.c
index d88ea8da2ec2..6dcb692c4701 100644
--- a/drivers/gpu/drm/panel/panel-rocktech-jh057n00900.c
+++ b/drivers/gpu/drm/panel/panel-rocktech-jh057n00900.c
@@ -257,20 +257,12 @@ static int allpixelson_set(void *data, u64 val)
 DEFINE_SIMPLE_ATTRIBUTE(allpixelson_fops, NULL,
 			allpixelson_set, "%llu\n");
 
-static int jh057n_debugfs_init(struct jh057n *ctx)
+static void jh057n_debugfs_init(struct jh057n *ctx)
 {
-	struct dentry *f;
-
 	ctx->debugfs = debugfs_create_dir(DRV_NAME, NULL);
-	if (!ctx->debugfs)
-		return -ENOMEM;
 
-	f = debugfs_create_file("allpixelson", 0600,
-				ctx->debugfs, ctx, &allpixelson_fops);
-	if (!f)
-		return -ENOMEM;
-
-	return 0;
+	debugfs_create_file("allpixelson", 0600, ctx->debugfs, ctx,
+			    &allpixelson_fops);
 }
 
 static void jh057n_debugfs_remove(struct jh057n *ctx)
diff --git a/drivers/gpu/drm/panel/panel-samsung-ld9040.c b/drivers/gpu/drm/panel/panel-samsung-ld9040.c
index 3cf4cf6a6942..5c2a1cae603b 100644
--- a/drivers/gpu/drm/panel/panel-samsung-ld9040.c
+++ b/drivers/gpu/drm/panel/panel-samsung-ld9040.c
@@ -11,10 +11,10 @@
  * published by the Free Software Foundation.
 */
 
-#include <drm/drmP.h>
-#include <drm/drm_panel.h>
-
+#include <linux/delay.h>
 #include <linux/gpio/consumer.h>
+#include <linux/module.h>
+#include <linux/of.h>
 #include <linux/regulator/consumer.h>
 #include <linux/spi/spi.h>
 
@@ -22,6 +22,10 @@
 #include <video/of_videomode.h>
 #include <video/videomode.h>
 
+#include <drm/drm_modes.h>
+#include <drm/drm_panel.h>
+#include <drm/drm_print.h>
+
 /* Manufacturer Command Set */
 #define MCS_MANPWR		0xb0
 #define MCS_ELVSS_ON		0xb1
diff --git a/drivers/gpu/drm/panel/panel-samsung-s6e3ha2.c b/drivers/gpu/drm/panel/panel-samsung-s6e3ha2.c
index 797bbc7a264e..351eee951648 100644
--- a/drivers/gpu/drm/panel/panel-samsung-s6e3ha2.c
+++ b/drivers/gpu/drm/panel/panel-samsung-s6e3ha2.c
@@ -11,14 +11,18 @@
  * published by the Free Software Foundation.
  */
 
-#include <drm/drmP.h>
-#include <drm/drm_mipi_dsi.h>
-#include <drm/drm_panel.h>
 #include <linux/backlight.h>
+#include <linux/delay.h>
 #include <linux/gpio/consumer.h>
+#include <linux/module.h>
 #include <linux/of_device.h>
 #include <linux/regulator/consumer.h>
 
+#include <drm/drm_mipi_dsi.h>
+#include <drm/drm_modes.h>
+#include <drm/drm_panel.h>
+#include <drm/drm_print.h>
+
 #define S6E3HA2_MIN_BRIGHTNESS		0
 #define S6E3HA2_MAX_BRIGHTNESS		100
 #define S6E3HA2_DEFAULT_BRIGHTNESS	80
diff --git a/drivers/gpu/drm/panel/panel-samsung-s6e63j0x03.c b/drivers/gpu/drm/panel/panel-samsung-s6e63j0x03.c
index aeb32aa58899..19ea325a0e9b 100644
--- a/drivers/gpu/drm/panel/panel-samsung-s6e63j0x03.c
+++ b/drivers/gpu/drm/panel/panel-samsung-s6e63j0x03.c
@@ -11,14 +11,19 @@
  * published by the Free Software Foundation.
  */
 
-#include <drm/drmP.h>
-#include <drm/drm_mipi_dsi.h>
-#include <drm/drm_panel.h>
 #include <linux/backlight.h>
+#include <linux/delay.h>
 #include <linux/gpio/consumer.h>
+#include <linux/module.h>
 #include <linux/regulator/consumer.h>
+
 #include <video/mipi_display.h>
 
+#include <drm/drm_mipi_dsi.h>
+#include <drm/drm_modes.h>
+#include <drm/drm_panel.h>
+#include <drm/drm_print.h>
+
 #define MCS_LEVEL2_KEY		0xf0
 #define MCS_MTP_KEY		0xf1
 #define MCS_MTP_SET3		0xd4
diff --git a/drivers/gpu/drm/panel/panel-samsung-s6e63m0.c b/drivers/gpu/drm/panel/panel-samsung-s6e63m0.c
new file mode 100644
index 000000000000..142d395ea512
--- /dev/null
+++ b/drivers/gpu/drm/panel/panel-samsung-s6e63m0.c
@@ -0,0 +1,514 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * S6E63M0 AMOLED LCD drm_panel driver.
+ *
+ * Copyright (C) 2019 Paweł Chmiel <pawel.mikolaj.chmiel@gmail.com>
+ * Derived from drivers/gpu/drm/panel-samsung-ld9040.c
+ *
+ * Andrzej Hajda <a.hajda@samsung.com>
+ */
+
+#include <drm/drm_modes.h>
+#include <drm/drm_panel.h>
+#include <drm/drm_print.h>
+
+#include <linux/backlight.h>
+#include <linux/delay.h>
+#include <linux/gpio/consumer.h>
+#include <linux/module.h>
+#include <linux/regulator/consumer.h>
+#include <linux/spi/spi.h>
+
+#include <video/mipi_display.h>
+
+/* Manufacturer Command Set */
+#define MCS_ELVSS_ON                0xb1
+#define MCS_MIECTL1                0xc0
+#define MCS_BCMODE                              0xc1
+#define MCS_DISCTL   0xf2
+#define MCS_SRCCTL           0xf6
+#define MCS_IFCTL                       0xf7
+#define MCS_PANELCTL         0xF8
+#define MCS_PGAMMACTL                   0xfa
+
+#define NUM_GAMMA_LEVELS             11
+#define GAMMA_TABLE_COUNT           23
+
+#define DATA_MASK                                       0x100
+
+#define MAX_BRIGHTNESS              (NUM_GAMMA_LEVELS - 1)
+
+/* array of gamma tables for gamma value 2.2 */
+static u8 const s6e63m0_gamma_22[NUM_GAMMA_LEVELS][GAMMA_TABLE_COUNT] = {
+	{ MCS_PGAMMACTL, 0x00,
+	  0x18, 0x08, 0x24, 0x78, 0xEC, 0x3D, 0xC8,
+	  0xC2, 0xB6, 0xC4, 0xC7, 0xB6, 0xD5, 0xD7,
+	  0xCC, 0x00, 0x39, 0x00, 0x36, 0x00, 0x51 },
+	{ MCS_PGAMMACTL, 0x00,
+	  0x18, 0x08, 0x24, 0x73, 0x4A, 0x3D, 0xC0,
+	  0xC2, 0xB1, 0xBB, 0xBE, 0xAC, 0xCE, 0xCF,
+	  0xC5, 0x00, 0x5D, 0x00, 0x5E, 0x00, 0x82 },
+	{ MCS_PGAMMACTL, 0x00,
+	  0x18, 0x08, 0x24, 0x70, 0x51, 0x3E, 0xBF,
+	  0xC1, 0xAF, 0xB9, 0xBC, 0xAB, 0xCC, 0xCC,
+	  0xC2, 0x00, 0x65, 0x00, 0x67, 0x00, 0x8D },
+	{ MCS_PGAMMACTL, 0x00,
+	  0x18, 0x08, 0x24, 0x6C, 0x54, 0x3A, 0xBC,
+	  0xBF, 0xAC, 0xB7, 0xBB, 0xA9, 0xC9, 0xC9,
+	  0xBE, 0x00, 0x71, 0x00, 0x73, 0x00, 0x9E },
+	{ MCS_PGAMMACTL, 0x00,
+	  0x18, 0x08, 0x24, 0x69, 0x54, 0x37, 0xBB,
+	  0xBE, 0xAC, 0xB4, 0xB7, 0xA6, 0xC7, 0xC8,
+	  0xBC, 0x00, 0x7B, 0x00, 0x7E, 0x00, 0xAB },
+	{ MCS_PGAMMACTL, 0x00,
+	  0x18, 0x08, 0x24, 0x66, 0x55, 0x34, 0xBA,
+	  0xBD, 0xAB, 0xB1, 0xB5, 0xA3, 0xC5, 0xC6,
+	  0xB9, 0x00, 0x85, 0x00, 0x88, 0x00, 0xBA },
+	{ MCS_PGAMMACTL, 0x00,
+	  0x18, 0x08, 0x24, 0x63, 0x53, 0x31, 0xB8,
+	  0xBC, 0xA9, 0xB0, 0xB5, 0xA2, 0xC4, 0xC4,
+	  0xB8, 0x00, 0x8B, 0x00, 0x8E, 0x00, 0xC2 },
+	{ MCS_PGAMMACTL, 0x00,
+	  0x18, 0x08, 0x24, 0x62, 0x54, 0x30, 0xB9,
+	  0xBB, 0xA9, 0xB0, 0xB3, 0xA1, 0xC1, 0xC3,
+	  0xB7, 0x00, 0x91, 0x00, 0x95, 0x00, 0xDA },
+	{ MCS_PGAMMACTL, 0x00,
+	  0x18, 0x08, 0x24, 0x66, 0x58, 0x34, 0xB6,
+	  0xBA, 0xA7, 0xAF, 0xB3, 0xA0, 0xC1, 0xC2,
+	  0xB7, 0x00, 0x97, 0x00, 0x9A, 0x00, 0xD1 },
+	{ MCS_PGAMMACTL, 0x00,
+	  0x18, 0x08, 0x24, 0x64, 0x56, 0x33, 0xB6,
+	  0xBA, 0xA8, 0xAC, 0xB1, 0x9D, 0xC1, 0xC1,
+	  0xB7, 0x00, 0x9C, 0x00, 0x9F, 0x00, 0xD6 },
+	{ MCS_PGAMMACTL, 0x00,
+	  0x18, 0x08, 0x24, 0x5f, 0x50, 0x2d, 0xB6,
+	  0xB9, 0xA7, 0xAd, 0xB1, 0x9f, 0xbe, 0xC0,
+	  0xB5, 0x00, 0xa0, 0x00, 0xa4, 0x00, 0xdb },
+};
+
+struct s6e63m0 {
+	struct device *dev;
+	struct drm_panel panel;
+	struct backlight_device *bl_dev;
+
+	struct regulator_bulk_data supplies[2];
+	struct gpio_desc *reset_gpio;
+
+	bool prepared;
+	bool enabled;
+
+	/*
+	 * This field is tested by functions directly accessing bus before
+	 * transfer, transfer is skipped if it is set. In case of transfer
+	 * failure or unexpected response the field is set to error value.
+	 * Such construct allows to eliminate many checks in higher level
+	 * functions.
+	 */
+	int error;
+};
+
+static const struct drm_display_mode default_mode = {
+	.clock		= 25628,
+	.hdisplay	= 480,
+	.hsync_start	= 480 + 16,
+	.hsync_end	= 480 + 16 + 2,
+	.htotal		= 480 + 16 + 2 + 16,
+	.vdisplay	= 800,
+	.vsync_start	= 800 + 28,
+	.vsync_end	= 800 + 28 + 2,
+	.vtotal		= 800 + 28 + 2 + 1,
+	.vrefresh	= 60,
+	.width_mm	= 53,
+	.height_mm	= 89,
+	.flags		= DRM_MODE_FLAG_NVSYNC | DRM_MODE_FLAG_NHSYNC,
+};
+
+static inline struct s6e63m0 *panel_to_s6e63m0(struct drm_panel *panel)
+{
+	return container_of(panel, struct s6e63m0, panel);
+}
+
+static int s6e63m0_clear_error(struct s6e63m0 *ctx)
+{
+	int ret = ctx->error;
+
+	ctx->error = 0;
+	return ret;
+}
+
+static int s6e63m0_spi_write_word(struct s6e63m0 *ctx, u16 data)
+{
+	struct spi_device *spi = to_spi_device(ctx->dev);
+	struct spi_transfer xfer = {
+		.len	= 2,
+		.tx_buf = &data,
+	};
+	struct spi_message msg;
+
+	spi_message_init(&msg);
+	spi_message_add_tail(&xfer, &msg);
+
+	return spi_sync(spi, &msg);
+}
+
+static void s6e63m0_dcs_write(struct s6e63m0 *ctx, const u8 *data, size_t len)
+{
+	int ret = 0;
+
+	if (ctx->error < 0 || len == 0)
+		return;
+
+	DRM_DEV_DEBUG(ctx->dev, "writing dcs seq: %*ph\n", (int)len, data);
+	ret = s6e63m0_spi_write_word(ctx, *data);
+
+	while (!ret && --len) {
+		++data;
+		ret = s6e63m0_spi_write_word(ctx, *data | DATA_MASK);
+	}
+
+	if (ret) {
+		DRM_DEV_ERROR(ctx->dev, "error %d writing dcs seq: %*ph\n", ret,
+			      (int)len, data);
+		ctx->error = ret;
+	}
+
+	usleep_range(300, 310);
+}
+
+#define s6e63m0_dcs_write_seq_static(ctx, seq ...) \
+	({ \
+		static const u8 d[] = { seq }; \
+		s6e63m0_dcs_write(ctx, d, ARRAY_SIZE(d)); \
+	})
+
+static void s6e63m0_init(struct s6e63m0 *ctx)
+{
+	s6e63m0_dcs_write_seq_static(ctx, MCS_PANELCTL,
+				     0x01, 0x27, 0x27, 0x07, 0x07, 0x54, 0x9f,
+				     0x63, 0x86, 0x1a, 0x33, 0x0d, 0x00, 0x00);
+
+	s6e63m0_dcs_write_seq_static(ctx, MCS_DISCTL,
+				     0x02, 0x03, 0x1c, 0x10, 0x10);
+	s6e63m0_dcs_write_seq_static(ctx, MCS_IFCTL,
+				     0x03, 0x00, 0x00);
+
+	s6e63m0_dcs_write_seq_static(ctx, MCS_PGAMMACTL,
+				     0x00, 0x18, 0x08, 0x24, 0x64, 0x56, 0x33,
+				     0xb6, 0xba, 0xa8, 0xac, 0xb1, 0x9d, 0xc1,
+				     0xc1, 0xb7, 0x00, 0x9c, 0x00, 0x9f, 0x00,
+				     0xd6);
+	s6e63m0_dcs_write_seq_static(ctx, MCS_PGAMMACTL,
+				     0x01);
+
+	s6e63m0_dcs_write_seq_static(ctx, MCS_SRCCTL,
+				     0x00, 0x8c, 0x07);
+	s6e63m0_dcs_write_seq_static(ctx, 0xb3,
+				     0xc);
+
+	s6e63m0_dcs_write_seq_static(ctx, 0xb5,
+				     0x2c, 0x12, 0x0c, 0x0a, 0x10, 0x0e, 0x17,
+				     0x13, 0x1f, 0x1a, 0x2a, 0x24, 0x1f, 0x1b,
+				     0x1a, 0x17, 0x2b, 0x26, 0x22, 0x20, 0x3a,
+				     0x34, 0x30, 0x2c, 0x29, 0x26, 0x25, 0x23,
+				     0x21, 0x20, 0x1e, 0x1e);
+
+	s6e63m0_dcs_write_seq_static(ctx, 0xb6,
+				     0x00, 0x00, 0x11, 0x22, 0x33, 0x44, 0x44,
+				     0x44, 0x55, 0x55, 0x66, 0x66, 0x66, 0x66,
+				     0x66, 0x66);
+
+	s6e63m0_dcs_write_seq_static(ctx, 0xb7,
+				     0x2c, 0x12, 0x0c, 0x0a, 0x10, 0x0e, 0x17,
+				     0x13, 0x1f, 0x1a, 0x2a, 0x24, 0x1f, 0x1b,
+				     0x1a, 0x17, 0x2b, 0x26, 0x22, 0x20, 0x3a,
+				     0x34, 0x30, 0x2c, 0x29, 0x26, 0x25, 0x23,
+				     0x21, 0x20, 0x1e, 0x1e, 0x00, 0x00, 0x11,
+				     0x22, 0x33, 0x44, 0x44, 0x44, 0x55, 0x55,
+				     0x66, 0x66, 0x66, 0x66, 0x66, 0x66);
+
+	s6e63m0_dcs_write_seq_static(ctx, 0xb9,
+				     0x2c, 0x12, 0x0c, 0x0a, 0x10, 0x0e, 0x17,
+				     0x13, 0x1f, 0x1a, 0x2a, 0x24, 0x1f, 0x1b,
+				     0x1a, 0x17, 0x2b, 0x26, 0x22, 0x20, 0x3a,
+				     0x34, 0x30, 0x2c, 0x29, 0x26, 0x25, 0x23,
+				     0x21, 0x20, 0x1e, 0x1e);
+
+	s6e63m0_dcs_write_seq_static(ctx, 0xba,
+				     0x00, 0x00, 0x11, 0x22, 0x33, 0x44, 0x44,
+				     0x44, 0x55, 0x55, 0x66, 0x66, 0x66, 0x66,
+				     0x66, 0x66);
+
+	s6e63m0_dcs_write_seq_static(ctx, MCS_BCMODE,
+				     0x4d, 0x96, 0x1d, 0x00, 0x00, 0x01, 0xdf,
+				     0x00, 0x00, 0x03, 0x1f, 0x00, 0x00, 0x00,
+				     0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x06,
+				     0x09, 0x0d, 0x0f, 0x12, 0x15, 0x18);
+
+	s6e63m0_dcs_write_seq_static(ctx, 0xb2,
+				     0x10, 0x10, 0x0b, 0x05);
+
+	s6e63m0_dcs_write_seq_static(ctx, MCS_MIECTL1,
+				     0x01);
+
+	s6e63m0_dcs_write_seq_static(ctx, MCS_ELVSS_ON,
+				     0x0b);
+
+	s6e63m0_dcs_write_seq_static(ctx, MIPI_DCS_EXIT_SLEEP_MODE);
+}
+
+static int s6e63m0_power_on(struct s6e63m0 *ctx)
+{
+	int ret;
+
+	ret = regulator_bulk_enable(ARRAY_SIZE(ctx->supplies), ctx->supplies);
+	if (ret < 0)
+		return ret;
+
+	msleep(25);
+
+	gpiod_set_value(ctx->reset_gpio, 0);
+	msleep(120);
+
+	return 0;
+}
+
+static int s6e63m0_power_off(struct s6e63m0 *ctx)
+{
+	int ret;
+
+	gpiod_set_value(ctx->reset_gpio, 1);
+	msleep(120);
+
+	ret = regulator_bulk_disable(ARRAY_SIZE(ctx->supplies), ctx->supplies);
+	if (ret < 0)
+		return ret;
+
+	return 0;
+}
+
+static int s6e63m0_disable(struct drm_panel *panel)
+{
+	struct s6e63m0 *ctx = panel_to_s6e63m0(panel);
+
+	if (!ctx->enabled)
+		return 0;
+
+	backlight_disable(ctx->bl_dev);
+
+	s6e63m0_dcs_write_seq_static(ctx, MIPI_DCS_ENTER_SLEEP_MODE);
+	msleep(200);
+
+	ctx->enabled = false;
+
+	return 0;
+}
+
+static int s6e63m0_unprepare(struct drm_panel *panel)
+{
+	struct s6e63m0 *ctx = panel_to_s6e63m0(panel);
+	int ret;
+
+	if (!ctx->prepared)
+		return 0;
+
+	s6e63m0_clear_error(ctx);
+
+	ret = s6e63m0_power_off(ctx);
+	if (ret < 0)
+		return ret;
+
+	ctx->prepared = false;
+
+	return 0;
+}
+
+static int s6e63m0_prepare(struct drm_panel *panel)
+{
+	struct s6e63m0 *ctx = panel_to_s6e63m0(panel);
+	int ret;
+
+	if (ctx->prepared)
+		return 0;
+
+	ret = s6e63m0_power_on(ctx);
+	if (ret < 0)
+		return ret;
+
+	s6e63m0_init(ctx);
+
+	ret = s6e63m0_clear_error(ctx);
+
+	if (ret < 0)
+		s6e63m0_unprepare(panel);
+
+	ctx->prepared = true;
+
+	return ret;
+}
+
+static int s6e63m0_enable(struct drm_panel *panel)
+{
+	struct s6e63m0 *ctx = panel_to_s6e63m0(panel);
+
+	if (ctx->enabled)
+		return 0;
+
+	s6e63m0_dcs_write_seq_static(ctx, MIPI_DCS_SET_DISPLAY_ON);
+
+	backlight_enable(ctx->bl_dev);
+
+	ctx->enabled = true;
+
+	return 0;
+}
+
+static int s6e63m0_get_modes(struct drm_panel *panel)
+{
+	struct drm_connector *connector = panel->connector;
+	struct drm_display_mode *mode;
+
+	mode = drm_mode_duplicate(panel->drm, &default_mode);
+	if (!mode) {
+		DRM_ERROR("failed to add mode %ux%ux@%u\n",
+			  default_mode.hdisplay, default_mode.vdisplay,
+			  default_mode.vrefresh);
+		return -ENOMEM;
+	}
+
+	drm_mode_set_name(mode);
+
+	mode->type = DRM_MODE_TYPE_DRIVER | DRM_MODE_TYPE_PREFERRED;
+	drm_mode_probed_add(connector, mode);
+
+	return 1;
+}
+
+static const struct drm_panel_funcs s6e63m0_drm_funcs = {
+	.disable	= s6e63m0_disable,
+	.unprepare	= s6e63m0_unprepare,
+	.prepare	= s6e63m0_prepare,
+	.enable		= s6e63m0_enable,
+	.get_modes	= s6e63m0_get_modes,
+};
+
+static int s6e63m0_set_brightness(struct backlight_device *bd)
+{
+	struct s6e63m0 *ctx = bl_get_data(bd);
+
+	int brightness = bd->props.brightness;
+
+	/* disable and set new gamma */
+	s6e63m0_dcs_write(ctx, s6e63m0_gamma_22[brightness],
+			  ARRAY_SIZE(s6e63m0_gamma_22[brightness]));
+
+	/* update gamma table. */
+	s6e63m0_dcs_write_seq_static(ctx, MCS_PGAMMACTL, 0x01);
+
+	return s6e63m0_clear_error(ctx);
+}
+
+static const struct backlight_ops s6e63m0_backlight_ops = {
+	.update_status	= s6e63m0_set_brightness,
+};
+
+static int s6e63m0_backlight_register(struct s6e63m0 *ctx)
+{
+	struct backlight_properties props = {
+		.type		= BACKLIGHT_RAW,
+		.brightness	= MAX_BRIGHTNESS,
+		.max_brightness = MAX_BRIGHTNESS
+	};
+	struct device *dev = ctx->dev;
+	int ret = 0;
+
+	ctx->bl_dev = devm_backlight_device_register(dev, "panel", dev, ctx,
+						     &s6e63m0_backlight_ops,
+						     &props);
+	if (IS_ERR(ctx->bl_dev)) {
+		ret = PTR_ERR(ctx->bl_dev);
+		DRM_DEV_ERROR(dev, "error registering backlight device (%d)\n",
+			      ret);
+	}
+
+	return ret;
+}
+
+static int s6e63m0_probe(struct spi_device *spi)
+{
+	struct device *dev = &spi->dev;
+	struct s6e63m0 *ctx;
+	int ret;
+
+	ctx = devm_kzalloc(dev, sizeof(struct s6e63m0), GFP_KERNEL);
+	if (!ctx)
+		return -ENOMEM;
+
+	spi_set_drvdata(spi, ctx);
+
+	ctx->dev = dev;
+	ctx->enabled = false;
+	ctx->prepared = false;
+
+	ctx->supplies[0].supply = "vdd3";
+	ctx->supplies[1].supply = "vci";
+	ret = devm_regulator_bulk_get(dev, ARRAY_SIZE(ctx->supplies),
+				      ctx->supplies);
+	if (ret < 0) {
+		DRM_DEV_ERROR(dev, "failed to get regulators: %d\n", ret);
+		return ret;
+	}
+
+	ctx->reset_gpio = devm_gpiod_get(dev, "reset", GPIOD_OUT_HIGH);
+	if (IS_ERR(ctx->reset_gpio)) {
+		DRM_DEV_ERROR(dev, "cannot get reset-gpios %ld\n",
+			      PTR_ERR(ctx->reset_gpio));
+		return PTR_ERR(ctx->reset_gpio);
+	}
+
+	spi->bits_per_word = 9;
+	spi->mode = SPI_MODE_3;
+	ret = spi_setup(spi);
+	if (ret < 0) {
+		DRM_DEV_ERROR(dev, "spi setup failed.\n");
+		return ret;
+	}
+
+	drm_panel_init(&ctx->panel);
+	ctx->panel.dev = dev;
+	ctx->panel.funcs = &s6e63m0_drm_funcs;
+
+	ret = s6e63m0_backlight_register(ctx);
+	if (ret < 0)
+		return ret;
+
+	return drm_panel_add(&ctx->panel);
+}
+
+static int s6e63m0_remove(struct spi_device *spi)
+{
+	struct s6e63m0 *ctx = spi_get_drvdata(spi);
+
+	drm_panel_remove(&ctx->panel);
+
+	return 0;
+}
+
+static const struct of_device_id s6e63m0_of_match[] = {
+	{ .compatible = "samsung,s6e63m0" },
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, s6e63m0_of_match);
+
+static struct spi_driver s6e63m0_driver = {
+	.probe			= s6e63m0_probe,
+	.remove			= s6e63m0_remove,
+	.driver			= {
+		.name		= "panel-samsung-s6e63m0",
+		.of_match_table = s6e63m0_of_match,
+	},
+};
+module_spi_driver(s6e63m0_driver);
+
+MODULE_AUTHOR("Paweł Chmiel <pawel.mikolaj.chmiel@gmail.com>");
+MODULE_DESCRIPTION("s6e63m0 LCD Driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/gpu/drm/panel/panel-samsung-s6e8aa0.c b/drivers/gpu/drm/panel/panel-samsung-s6e8aa0.c
index 6ad827b93ae1..0dcbda8310e3 100644
--- a/drivers/gpu/drm/panel/panel-samsung-s6e8aa0.c
+++ b/drivers/gpu/drm/panel/panel-samsung-s6e8aa0.c
@@ -15,17 +15,21 @@
  * published by the Free Software Foundation.
 */
 
-#include <drm/drmP.h>
-#include <drm/drm_mipi_dsi.h>
-#include <drm/drm_panel.h>
-
+#include <linux/delay.h>
 #include <linux/gpio/consumer.h>
+#include <linux/module.h>
+#include <linux/of.h>
 #include <linux/regulator/consumer.h>
 
 #include <video/mipi_display.h>
 #include <video/of_videomode.h>
 #include <video/videomode.h>
 
+#include <drm/drm_mipi_dsi.h>
+#include <drm/drm_modes.h>
+#include <drm/drm_panel.h>
+#include <drm/drm_print.h>
+
 #define LDI_MTP_LENGTH			24
 #define GAMMA_LEVEL_NUM			25
 #define GAMMA_TABLE_LEN			26
diff --git a/drivers/gpu/drm/panel/panel-seiko-43wvf1g.c b/drivers/gpu/drm/panel/panel-seiko-43wvf1g.c
index bdcc5d80823d..18b22b1294fb 100644
--- a/drivers/gpu/drm/panel/panel-seiko-43wvf1g.c
+++ b/drivers/gpu/drm/panel/panel-seiko-43wvf1g.c
@@ -7,17 +7,19 @@
  */
 
 #include <linux/backlight.h>
+#include <linux/delay.h>
 #include <linux/module.h>
 #include <linux/of.h>
+#include <linux/platform_device.h>
 #include <linux/regulator/consumer.h>
 
-#include <drm/drmP.h>
-#include <drm/drm_crtc.h>
-#include <drm/drm_panel.h>
-
 #include <video/display_timing.h>
 #include <video/videomode.h>
 
+#include <drm/drm_crtc.h>
+#include <drm/drm_device.h>
+#include <drm/drm_panel.h>
+
 struct seiko_panel_desc {
 	const struct drm_display_mode *modes;
 	unsigned int num_modes;
diff --git a/drivers/gpu/drm/panel/panel-sharp-lq101r1sx01.c b/drivers/gpu/drm/panel/panel-sharp-lq101r1sx01.c
index 02fc0f5423d4..1ac75a30e431 100644
--- a/drivers/gpu/drm/panel/panel-sharp-lq101r1sx01.c
+++ b/drivers/gpu/drm/panel/panel-sharp-lq101r1sx01.c
@@ -7,18 +7,19 @@
  */
 
 #include <linux/backlight.h>
+#include <linux/delay.h>
 #include <linux/gpio/consumer.h>
 #include <linux/module.h>
 #include <linux/of.h>
 #include <linux/regulator/consumer.h>
 
-#include <drm/drmP.h>
+#include <video/mipi_display.h>
+
 #include <drm/drm_crtc.h>
+#include <drm/drm_device.h>
 #include <drm/drm_mipi_dsi.h>
 #include <drm/drm_panel.h>
 
-#include <video/mipi_display.h>
-
 struct sharp_panel {
 	struct drm_panel base;
 	/* the datasheet refers to them as DSI-LINK1 and DSI-LINK2 */
diff --git a/drivers/gpu/drm/panel/panel-sharp-ls043t1le01.c b/drivers/gpu/drm/panel/panel-sharp-ls043t1le01.c
index e5cae0050f52..89d7fc842972 100644
--- a/drivers/gpu/drm/panel/panel-sharp-ls043t1le01.c
+++ b/drivers/gpu/drm/panel/panel-sharp-ls043t1le01.c
@@ -19,18 +19,19 @@
  */
 
 #include <linux/backlight.h>
+#include <linux/delay.h>
 #include <linux/gpio/consumer.h>
 #include <linux/module.h>
 #include <linux/of.h>
 #include <linux/regulator/consumer.h>
 
-#include <drm/drmP.h>
+#include <video/mipi_display.h>
+
 #include <drm/drm_crtc.h>
+#include <drm/drm_device.h>
 #include <drm/drm_mipi_dsi.h>
 #include <drm/drm_panel.h>
 
-#include <video/mipi_display.h>
-
 struct sharp_nt_panel {
 	struct drm_panel base;
 	struct mipi_dsi_device *dsi;
diff --git a/drivers/gpu/drm/panel/panel-simple.c b/drivers/gpu/drm/panel/panel-simple.c
index 569be4efd8d1..5a93c4edf1e4 100644
--- a/drivers/gpu/drm/panel/panel-simple.c
+++ b/drivers/gpu/drm/panel/panel-simple.c
@@ -22,20 +22,21 @@
  */
 
 #include <linux/backlight.h>
+#include <linux/delay.h>
 #include <linux/gpio/consumer.h>
 #include <linux/module.h>
 #include <linux/of_platform.h>
 #include <linux/platform_device.h>
 #include <linux/regulator/consumer.h>
 
-#include <drm/drmP.h>
+#include <video/display_timing.h>
+#include <video/videomode.h>
+
 #include <drm/drm_crtc.h>
+#include <drm/drm_device.h>
 #include <drm/drm_mipi_dsi.h>
 #include <drm/drm_panel.h>
 
-#include <video/display_timing.h>
-#include <video/videomode.h>
-
 struct panel_desc {
 	const struct drm_display_mode *modes;
 	unsigned int num_modes;
@@ -446,6 +447,32 @@ static const struct panel_desc ampire_am800480r3tmqwa1h = {
 	.bus_format = MEDIA_BUS_FMT_RGB666_1X18,
 };
 
+static const struct display_timing santek_st0700i5y_rbslw_f_timing = {
+	.pixelclock = { 26400000, 33300000, 46800000 },
+	.hactive = { 800, 800, 800 },
+	.hfront_porch = { 16, 210, 354 },
+	.hback_porch = { 45, 36, 6 },
+	.hsync_len = { 1, 10, 40 },
+	.vactive = { 480, 480, 480 },
+	.vfront_porch = { 7, 22, 147 },
+	.vback_porch = { 22, 13, 3 },
+	.vsync_len = { 1, 10, 20 },
+	.flags = DISPLAY_FLAGS_HSYNC_LOW | DISPLAY_FLAGS_VSYNC_LOW |
+		DISPLAY_FLAGS_DE_HIGH | DISPLAY_FLAGS_PIXDATA_POSEDGE
+};
+
+static const struct panel_desc armadeus_st0700_adapt = {
+	.timings = &santek_st0700i5y_rbslw_f_timing,
+	.num_timings = 1,
+	.bpc = 6,
+	.size = {
+		.width = 154,
+		.height = 86,
+	},
+	.bus_format = MEDIA_BUS_FMT_RGB666_1X18,
+	.bus_flags = DRM_BUS_FLAG_DE_HIGH | DRM_BUS_FLAG_PIXDATA_POSEDGE,
+};
+
 static const struct drm_display_mode auo_b101aw03_mode = {
 	.clock = 51450,
 	.hdisplay = 1024,
@@ -1096,6 +1123,56 @@ static const struct panel_desc dlc_dlc1010gig = {
 	.bus_format = MEDIA_BUS_FMT_RGB888_1X7X4_SPWG,
 };
 
+static const struct drm_display_mode edt_et035012dm6_mode = {
+	.clock = 6500,
+	.hdisplay = 320,
+	.hsync_start = 320 + 20,
+	.hsync_end = 320 + 20 + 30,
+	.htotal = 320 + 20 + 68,
+	.vdisplay = 240,
+	.vsync_start = 240 + 4,
+	.vsync_end = 240 + 4 + 4,
+	.vtotal = 240 + 4 + 4 + 14,
+	.vrefresh = 60,
+	.flags = DRM_MODE_FLAG_NVSYNC | DRM_MODE_FLAG_NHSYNC,
+};
+
+static const struct panel_desc edt_et035012dm6 = {
+	.modes = &edt_et035012dm6_mode,
+	.num_modes = 1,
+	.bpc = 8,
+	.size = {
+		.width = 70,
+		.height = 52,
+	},
+	.bus_format = MEDIA_BUS_FMT_RGB888_1X24,
+	.bus_flags = DRM_BUS_FLAG_DE_LOW | DRM_BUS_FLAG_PIXDATA_NEGEDGE,
+};
+
+static const struct drm_display_mode edt_etm0430g0dh6_mode = {
+	.clock = 9000,
+	.hdisplay = 480,
+	.hsync_start = 480 + 2,
+	.hsync_end = 480 + 2 + 41,
+	.htotal = 480 + 2 + 41 + 2,
+	.vdisplay = 272,
+	.vsync_start = 272 + 2,
+	.vsync_end = 272 + 2 + 10,
+	.vtotal = 272 + 2 + 10 + 2,
+	.vrefresh = 60,
+	.flags = DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC,
+};
+
+static const struct panel_desc edt_etm0430g0dh6 = {
+	.modes = &edt_etm0430g0dh6_mode,
+	.num_modes = 1,
+	.bpc = 6,
+	.size = {
+		.width = 95,
+		.height = 54,
+	},
+};
+
 static const struct drm_display_mode edt_et057090dhu_mode = {
 	.clock = 25175,
 	.hdisplay = 640,
@@ -1160,6 +1237,33 @@ static const struct panel_desc edt_etm0700g0bdh6 = {
 	.bus_flags = DRM_BUS_FLAG_DE_HIGH | DRM_BUS_FLAG_PIXDATA_DRIVE_POSEDGE,
 };
 
+static const struct display_timing evervision_vgg804821_timing = {
+	.pixelclock = { 27600000, 33300000, 50000000 },
+	.hactive = { 800, 800, 800 },
+	.hfront_porch = { 40, 66, 70 },
+	.hback_porch = { 40, 67, 70 },
+	.hsync_len = { 40, 67, 70 },
+	.vactive = { 480, 480, 480 },
+	.vfront_porch = { 6, 10, 10 },
+	.vback_porch = { 7, 11, 11 },
+	.vsync_len = { 7, 11, 11 },
+	.flags = DISPLAY_FLAGS_HSYNC_HIGH | DISPLAY_FLAGS_VSYNC_HIGH |
+		 DISPLAY_FLAGS_DE_HIGH | DISPLAY_FLAGS_PIXDATA_NEGEDGE |
+		 DISPLAY_FLAGS_SYNC_NEGEDGE,
+};
+
+static const struct panel_desc evervision_vgg804821 = {
+	.timings = &evervision_vgg804821_timing,
+	.num_timings = 1,
+	.bpc = 8,
+	.size = {
+		.width = 108,
+		.height = 64,
+	},
+	.bus_format = MEDIA_BUS_FMT_RGB888_1X24,
+	.bus_flags = DRM_BUS_FLAG_DE_HIGH | DRM_BUS_FLAG_PIXDATA_NEGEDGE,
+};
+
 static const struct drm_display_mode foxlink_fl500wvr00_a0t_mode = {
 	.clock = 32260,
 	.hdisplay = 800,
@@ -1184,6 +1288,29 @@ static const struct panel_desc foxlink_fl500wvr00_a0t = {
 	.bus_format = MEDIA_BUS_FMT_RGB888_1X24,
 };
 
+static const struct drm_display_mode friendlyarm_hd702e_mode = {
+	.clock		= 67185,
+	.hdisplay	= 800,
+	.hsync_start	= 800 + 20,
+	.hsync_end	= 800 + 20 + 24,
+	.htotal		= 800 + 20 + 24 + 20,
+	.vdisplay	= 1280,
+	.vsync_start	= 1280 + 4,
+	.vsync_end	= 1280 + 4 + 8,
+	.vtotal		= 1280 + 4 + 8 + 4,
+	.vrefresh	= 60,
+	.flags		= DRM_MODE_FLAG_NVSYNC | DRM_MODE_FLAG_NHSYNC,
+};
+
+static const struct panel_desc friendlyarm_hd702e = {
+	.modes = &friendlyarm_hd702e_mode,
+	.num_modes = 1,
+	.size = {
+		.width	= 94,
+		.height	= 151,
+	},
+};
+
 static const struct drm_display_mode giantplus_gpg482739qs5_mode = {
 	.clock = 9000,
 	.hdisplay = 480,
@@ -1549,6 +1676,29 @@ static const struct panel_desc innolux_zj070na_01p = {
 	},
 };
 
+static const struct display_timing koe_tx14d24vm1bpa_timing = {
+	.pixelclock = { 5580000, 5850000, 6200000 },
+	.hactive = { 320, 320, 320 },
+	.hfront_porch = { 30, 30, 30 },
+	.hback_porch = { 30, 30, 30 },
+	.hsync_len = { 1, 5, 17 },
+	.vactive = { 240, 240, 240 },
+	.vfront_porch = { 6, 6, 6 },
+	.vback_porch = { 5, 5, 5 },
+	.vsync_len = { 1, 2, 11 },
+	.flags = DISPLAY_FLAGS_DE_HIGH,
+};
+
+static const struct panel_desc koe_tx14d24vm1bpa = {
+	.timings = &koe_tx14d24vm1bpa_timing,
+	.num_timings = 1,
+	.bpc = 6,
+	.size = {
+		.width = 115,
+		.height = 86,
+	},
+};
+
 static const struct display_timing koe_tx31d200vm0baa_timing = {
 	.pixelclock = { 39600000, 43200000, 48000000 },
 	.hactive = { 1280, 1280, 1280 },
@@ -2355,6 +2505,31 @@ static const struct panel_desc starry_kr122ea0sra = {
 	},
 };
 
+static const struct drm_display_mode tfc_s9700rtwv43tr_01b_mode = {
+	.clock = 30000,
+	.hdisplay = 800,
+	.hsync_start = 800 + 39,
+	.hsync_end = 800 + 39 + 47,
+	.htotal = 800 + 39 + 47 + 39,
+	.vdisplay = 480,
+	.vsync_start = 480 + 13,
+	.vsync_end = 480 + 13 + 2,
+	.vtotal = 480 + 13 + 2 + 29,
+	.vrefresh = 62,
+};
+
+static const struct panel_desc tfc_s9700rtwv43tr_01b = {
+	.modes = &tfc_s9700rtwv43tr_01b_mode,
+	.num_modes = 1,
+	.bpc = 8,
+	.size = {
+		.width = 155,
+		.height = 90,
+	},
+	.bus_format = MEDIA_BUS_FMT_RGB888_1X24,
+	.bus_flags = DRM_BUS_FLAG_DE_HIGH | DRM_BUS_FLAG_PIXDATA_POSEDGE,
+};
+
 static const struct display_timing tianma_tm070jdhg30_timing = {
 	.pixelclock = { 62600000, 68200000, 78100000 },
 	.hactive = { 1280, 1280, 1280 },
@@ -2508,6 +2683,32 @@ static const struct panel_desc urt_umsh_8596md_parallel = {
 	.bus_format = MEDIA_BUS_FMT_RGB666_1X18,
 };
 
+static const struct drm_display_mode vl050_8048nt_c01_mode = {
+	.clock = 33333,
+	.hdisplay = 800,
+	.hsync_start = 800 + 210,
+	.hsync_end = 800 + 210 + 20,
+	.htotal = 800 + 210 + 20 + 46,
+	.vdisplay =  480,
+	.vsync_start = 480 + 22,
+	.vsync_end = 480 + 22 + 10,
+	.vtotal = 480 + 22 + 10 + 23,
+	.vrefresh = 60,
+	.flags = DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC,
+};
+
+static const struct panel_desc vl050_8048nt_c01 = {
+	.modes = &vl050_8048nt_c01_mode,
+	.num_modes = 1,
+	.bpc = 8,
+	.size = {
+		.width = 120,
+		.height = 76,
+	},
+	.bus_format = MEDIA_BUS_FMT_RGB888_1X24,
+	.bus_flags = DRM_BUS_FLAG_DE_HIGH | DRM_BUS_FLAG_PIXDATA_POSEDGE,
+};
+
 static const struct drm_display_mode winstar_wf35ltiacd_mode = {
 	.clock = 6410,
 	.hdisplay = 320,
@@ -2571,6 +2772,9 @@ static const struct of_device_id platform_of_match[] = {
 		.compatible = "arm,rtsm-display",
 		.data = &arm_rtsm,
 	}, {
+		.compatible = "armadeus,st0700-adapt",
+		.data = &armadeus_st0700_adapt,
+	}, {
 		.compatible = "auo,b101aw03",
 		.data = &auo_b101aw03,
 	}, {
@@ -2646,6 +2850,12 @@ static const struct of_device_id platform_of_match[] = {
 		.compatible = "dlc,dlc1010gig",
 		.data = &dlc_dlc1010gig,
 	}, {
+		.compatible = "edt,et035012dm6",
+		.data = &edt_et035012dm6,
+	}, {
+		.compatible = "edt,etm0430g0dh6",
+		.data = &edt_etm0430g0dh6,
+	}, {
 		.compatible = "edt,et057090dhu",
 		.data = &edt_et057090dhu,
 	}, {
@@ -2661,9 +2871,15 @@ static const struct of_device_id platform_of_match[] = {
 		.compatible = "edt,etm0700g0edh6",
 		.data = &edt_etm0700g0bdh6,
 	}, {
+		.compatible = "evervision,vgg804821",
+		.data = &evervision_vgg804821,
+	}, {
 		.compatible = "foxlink,fl500wvr00-a0t",
 		.data = &foxlink_fl500wvr00_a0t,
 	}, {
+		.compatible = "friendlyarm,hd702e",
+		.data = &friendlyarm_hd702e,
+	}, {
 		.compatible = "giantplus,gpg482739qs5",
 		.data = &giantplus_gpg482739qs5
 	}, {
@@ -2706,6 +2922,9 @@ static const struct of_device_id platform_of_match[] = {
 		.compatible = "innolux,zj070na-01p",
 		.data = &innolux_zj070na_01p,
 	}, {
+		.compatible = "koe,tx14d24vm1bpa",
+		.data = &koe_tx14d24vm1bpa,
+	}, {
 		.compatible = "koe,tx31d200vm0baa",
 		.data = &koe_tx31d200vm0baa,
 	}, {
@@ -2802,6 +3021,9 @@ static const struct of_device_id platform_of_match[] = {
 		.compatible = "starry,kr122ea0sra",
 		.data = &starry_kr122ea0sra,
 	}, {
+		.compatible = "tfc,s9700rtwv43tr-01b",
+		.data = &tfc_s9700rtwv43tr_01b,
+	}, {
 		.compatible = "tianma,tm070jdhg30",
 		.data = &tianma_tm070jdhg30,
 	}, {
@@ -2835,6 +3057,9 @@ static const struct of_device_id platform_of_match[] = {
 		.compatible = "urt,umsh-8596md-20t",
 		.data = &urt_umsh_8596md_parallel,
 	}, {
+		.compatible = "vxt,vl050-8048nt-c01",
+		.data = &vl050_8048nt_c01,
+	}, {
 		.compatible = "winstar,wf35ltiacd",
 		.data = &winstar_wf35ltiacd,
 	}, {
@@ -3053,6 +3278,37 @@ static const struct panel_desc_dsi lg_acx467akm_7 = {
 	.lanes = 4,
 };
 
+static const struct drm_display_mode osd101t2045_53ts_mode = {
+	.clock = 154500,
+	.hdisplay = 1920,
+	.hsync_start = 1920 + 112,
+	.hsync_end = 1920 + 112 + 16,
+	.htotal = 1920 + 112 + 16 + 32,
+	.vdisplay = 1200,
+	.vsync_start = 1200 + 16,
+	.vsync_end = 1200 + 16 + 2,
+	.vtotal = 1200 + 16 + 2 + 16,
+	.vrefresh = 60,
+	.flags = DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC,
+};
+
+static const struct panel_desc_dsi osd101t2045_53ts = {
+	.desc = {
+		.modes = &osd101t2045_53ts_mode,
+		.num_modes = 1,
+		.bpc = 8,
+		.size = {
+			.width = 217,
+			.height = 136,
+		},
+	},
+	.flags = MIPI_DSI_MODE_VIDEO | MIPI_DSI_MODE_VIDEO_BURST |
+		 MIPI_DSI_MODE_VIDEO_SYNC_PULSE |
+		 MIPI_DSI_MODE_EOT_PACKET,
+	.format = MIPI_DSI_FMT_RGB888,
+	.lanes = 4,
+};
+
 static const struct of_device_id dsi_of_match[] = {
 	{
 		.compatible = "auo,b080uan01",
@@ -3073,6 +3329,9 @@ static const struct of_device_id dsi_of_match[] = {
 		.compatible = "lg,acx467akm-7",
 		.data = &lg_acx467akm_7
 	}, {
+		.compatible = "osddisplays,osd101t2045-53ts",
+		.data = &osd101t2045_53ts
+	}, {
 		/* sentinel */
 	}
 };
@@ -3098,7 +3357,14 @@ static int panel_simple_dsi_probe(struct mipi_dsi_device *dsi)
 	dsi->format = desc->format;
 	dsi->lanes = desc->lanes;
 
-	return mipi_dsi_attach(dsi);
+	err = mipi_dsi_attach(dsi);
+	if (err) {
+		struct panel_simple *panel = dev_get_drvdata(&dsi->dev);
+
+		drm_panel_remove(&panel->base);
+	}
+
+	return err;
 }
 
 static int panel_simple_dsi_remove(struct mipi_dsi_device *dsi)
diff --git a/drivers/gpu/drm/panel/panel-sitronix-st7701.c b/drivers/gpu/drm/panel/panel-sitronix-st7701.c
index 63f9a1c7fb1b..09c5d9a6f9fa 100644
--- a/drivers/gpu/drm/panel/panel-sitronix-st7701.c
+++ b/drivers/gpu/drm/panel/panel-sitronix-st7701.c
@@ -305,9 +305,9 @@ static const struct drm_display_mode ts8550b_mode = {
 	.htotal		= 480 + 38 + 12 + 12,
 
 	.vdisplay	= 854,
-	.vsync_start	= 854 + 4,
-	.vsync_end	= 854 + 4 + 8,
-	.vtotal		= 854 + 4 + 8 + 18,
+	.vsync_start	= 854 + 18,
+	.vsync_end	= 854 + 18 + 8,
+	.vtotal		= 854 + 18 + 8 + 4,
 
 	.width_mm	= 69,
 	.height_mm	= 139,
diff --git a/drivers/gpu/drm/panel/panel-sitronix-st7789v.c b/drivers/gpu/drm/panel/panel-sitronix-st7789v.c
index 74284e5afc5d..bf85a8fa9ad0 100644
--- a/drivers/gpu/drm/panel/panel-sitronix-st7789v.c
+++ b/drivers/gpu/drm/panel/panel-sitronix-st7789v.c
@@ -6,15 +6,19 @@
  * 2 as published by the Free Software Foundation.
  */
 
+#include <linux/backlight.h>
+#include <linux/delay.h>
 #include <linux/gpio/consumer.h>
+#include <linux/module.h>
 #include <linux/regulator/consumer.h>
 #include <linux/spi/spi.h>
 
-#include <drm/drmP.h>
-#include <drm/drm_panel.h>
-
 #include <video/mipi_display.h>
 
+#include <drm/drm_device.h>
+#include <drm/drm_modes.h>
+#include <drm/drm_panel.h>
+
 #define ST7789V_COLMOD_RGB_FMT_18BITS		(6 << 4)
 #define ST7789V_COLMOD_CTRL_FMT_18BITS		(6 << 0)
 
diff --git a/drivers/gpu/drm/panel/panel-truly-nt35597.c b/drivers/gpu/drm/panel/panel-truly-nt35597.c
index fc2a66c53db4..77e1311b7c69 100644
--- a/drivers/gpu/drm/panel/panel-truly-nt35597.c
+++ b/drivers/gpu/drm/panel/panel-truly-nt35597.c
@@ -3,11 +3,10 @@
  * Copyright (c) 2018, The Linux Foundation. All rights reserved.
  */
 
-#include <drm/drmP.h>
-#include <drm/drm_panel.h>
-#include <drm/drm_mipi_dsi.h>
-
+#include <linux/backlight.h>
+#include <linux/delay.h>
 #include <linux/gpio/consumer.h>
+#include <linux/module.h>
 #include <linux/of_device.h>
 #include <linux/of_graph.h>
 #include <linux/pinctrl/consumer.h>
@@ -15,6 +14,11 @@
 
 #include <video/mipi_display.h>
 
+#include <drm/drm_mipi_dsi.h>
+#include <drm/drm_modes.h>
+#include <drm/drm_panel.h>
+#include <drm/drm_print.h>
+
 static const char * const regulator_names[] = {
 	"vdda",
 	"vdispp",
@@ -280,6 +284,7 @@ static int truly_35597_power_on(struct truly_nt35597 *ctx)
 	gpiod_set_value(ctx->reset_gpio, 1);
 	usleep_range(10000, 20000);
 	gpiod_set_value(ctx->reset_gpio, 0);
+	usleep_range(10000, 20000);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/panfrost/Kconfig b/drivers/gpu/drm/panfrost/Kconfig
index 81963e964b0f..86cdc0ce79e6 100644
--- a/drivers/gpu/drm/panfrost/Kconfig
+++ b/drivers/gpu/drm/panfrost/Kconfig
@@ -10,6 +10,7 @@ config DRM_PANFROST
 	select IOMMU_IO_PGTABLE_LPAE
 	select DRM_GEM_SHMEM_HELPER
 	select PM_DEVFREQ
+	select DEVFREQ_GOV_SIMPLE_ONDEMAND
 	help
 	  DRM driver for ARM Mali Midgard (T6xx, T7xx, T8xx) and
 	  Bifrost (G3x, G5x, G7x) GPUs.
diff --git a/drivers/gpu/drm/panfrost/Makefile b/drivers/gpu/drm/panfrost/Makefile
index 6de72d13c58f..ecf0864cb515 100644
--- a/drivers/gpu/drm/panfrost/Makefile
+++ b/drivers/gpu/drm/panfrost/Makefile
@@ -7,6 +7,7 @@ panfrost-y := \
 	panfrost_gem.o \
 	panfrost_gpu.o \
 	panfrost_job.o \
-	panfrost_mmu.o
+	panfrost_mmu.o \
+	panfrost_perfcnt.o
 
 obj-$(CONFIG_DRM_PANFROST) += panfrost.o
diff --git a/drivers/gpu/drm/panfrost/panfrost_devfreq.c b/drivers/gpu/drm/panfrost/panfrost_devfreq.c
index 29fcffdf2d57..db798532b0b6 100644
--- a/drivers/gpu/drm/panfrost/panfrost_devfreq.c
+++ b/drivers/gpu/drm/panfrost/panfrost_devfreq.c
@@ -140,7 +140,9 @@ int panfrost_devfreq_init(struct panfrost_device *pfdev)
 		return 0;
 
 	ret = dev_pm_opp_of_add_table(&pfdev->pdev->dev);
-	if (ret)
+	if (ret == -ENODEV) /* Optional, continue without devfreq */
+		return 0;
+	else if (ret)
 		return ret;
 
 	panfrost_devfreq_reset(pfdev);
@@ -170,6 +172,9 @@ void panfrost_devfreq_resume(struct panfrost_device *pfdev)
 {
 	int i;
 
+	if (!pfdev->devfreq.devfreq)
+		return;
+
 	panfrost_devfreq_reset(pfdev);
 	for (i = 0; i < NUM_JOB_SLOTS; i++)
 		pfdev->devfreq.slot[i].busy = false;
@@ -179,6 +184,9 @@ void panfrost_devfreq_resume(struct panfrost_device *pfdev)
 
 void panfrost_devfreq_suspend(struct panfrost_device *pfdev)
 {
+	if (!pfdev->devfreq.devfreq)
+		return;
+
 	devfreq_suspend_device(pfdev->devfreq.devfreq);
 }
 
@@ -188,6 +196,9 @@ static void panfrost_devfreq_update_utilization(struct panfrost_device *pfdev, i
 	ktime_t now;
 	ktime_t last;
 
+	if (!pfdev->devfreq.devfreq)
+		return;
+
 	now = ktime_get();
 	last = pfdev->devfreq.slot[slot].time_last_update;
 
diff --git a/drivers/gpu/drm/panfrost/panfrost_device.c b/drivers/gpu/drm/panfrost/panfrost_device.c
index 3b2bced1b015..8a111d7c0200 100644
--- a/drivers/gpu/drm/panfrost/panfrost_device.c
+++ b/drivers/gpu/drm/panfrost/panfrost_device.c
@@ -14,6 +14,7 @@
 #include "panfrost_gpu.h"
 #include "panfrost_job.h"
 #include "panfrost_mmu.h"
+#include "panfrost_perfcnt.h"
 
 static int panfrost_reset_init(struct panfrost_device *pfdev)
 {
@@ -55,11 +56,33 @@ static int panfrost_clk_init(struct panfrost_device *pfdev)
 	if (err)
 		return err;
 
+	pfdev->bus_clock = devm_clk_get_optional(pfdev->dev, "bus");
+	if (IS_ERR(pfdev->bus_clock)) {
+		dev_err(pfdev->dev, "get bus_clock failed %ld\n",
+			PTR_ERR(pfdev->bus_clock));
+		return PTR_ERR(pfdev->bus_clock);
+	}
+
+	if (pfdev->bus_clock) {
+		rate = clk_get_rate(pfdev->bus_clock);
+		dev_info(pfdev->dev, "bus_clock rate = %lu\n", rate);
+
+		err = clk_prepare_enable(pfdev->bus_clock);
+		if (err)
+			goto disable_clock;
+	}
+
 	return 0;
+
+disable_clock:
+	clk_disable_unprepare(pfdev->clock);
+
+	return err;
 }
 
 static void panfrost_clk_fini(struct panfrost_device *pfdev)
 {
+	clk_disable_unprepare(pfdev->bus_clock);
 	clk_disable_unprepare(pfdev->clock);
 }
 
@@ -149,7 +172,13 @@ int panfrost_device_init(struct panfrost_device *pfdev)
 	pm_runtime_mark_last_busy(pfdev->dev);
 	pm_runtime_put_autosuspend(pfdev->dev);
 
+	err = panfrost_perfcnt_init(pfdev);
+	if (err)
+		goto err_out5;
+
 	return 0;
+err_out5:
+	panfrost_job_fini(pfdev);
 err_out4:
 	panfrost_mmu_fini(pfdev);
 err_out3:
@@ -165,6 +194,7 @@ err_out0:
 
 void panfrost_device_fini(struct panfrost_device *pfdev)
 {
+	panfrost_perfcnt_fini(pfdev);
 	panfrost_job_fini(pfdev);
 	panfrost_mmu_fini(pfdev);
 	panfrost_gpu_fini(pfdev);
diff --git a/drivers/gpu/drm/panfrost/panfrost_device.h b/drivers/gpu/drm/panfrost/panfrost_device.h
index 56f452dfb490..83cc01cafde1 100644
--- a/drivers/gpu/drm/panfrost/panfrost_device.h
+++ b/drivers/gpu/drm/panfrost/panfrost_device.h
@@ -14,6 +14,7 @@ struct panfrost_device;
 struct panfrost_mmu;
 struct panfrost_job_slot;
 struct panfrost_job;
+struct panfrost_perfcnt;
 
 #define NUM_JOB_SLOTS 3
 
@@ -66,6 +67,7 @@ struct panfrost_device {
 
 	void __iomem *iomem;
 	struct clk *clock;
+	struct clk *bus_clock;
 	struct regulator *regulator;
 	struct reset_control *rstc;
 
@@ -77,6 +79,8 @@ struct panfrost_device {
 	struct panfrost_job *jobs[NUM_JOB_SLOTS];
 	struct list_head scheduled_jobs;
 
+	struct panfrost_perfcnt *perfcnt;
+
 	struct mutex sched_lock;
 	struct mutex reset_lock;
 
@@ -109,11 +113,18 @@ static inline int panfrost_model_cmp(struct panfrost_device *pfdev, s32 id)
 	return match_id - id;
 }
 
+static inline bool panfrost_model_is_bifrost(struct panfrost_device *pfdev)
+{
+	return panfrost_model_cmp(pfdev, 0x1000) >= 0;
+}
+
 static inline bool panfrost_model_eq(struct panfrost_device *pfdev, s32 id)
 {
 	return !panfrost_model_cmp(pfdev, id);
 }
 
+int panfrost_unstable_ioctl_check(void);
+
 int panfrost_device_init(struct panfrost_device *pfdev);
 void panfrost_device_fini(struct panfrost_device *pfdev);
 
diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c
index d11e2281dde6..e34e86a7378a 100644
--- a/drivers/gpu/drm/panfrost/panfrost_drv.c
+++ b/drivers/gpu/drm/panfrost/panfrost_drv.c
@@ -19,6 +19,10 @@
 #include "panfrost_mmu.h"
 #include "panfrost_job.h"
 #include "panfrost_gpu.h"
+#include "panfrost_perfcnt.h"
+
+static bool unstable_ioctls;
+module_param_unsafe(unstable_ioctls, bool, 0600);
 
 static int panfrost_ioctl_get_param(struct drm_device *ddev, void *data, struct drm_file *file)
 {
@@ -297,6 +301,14 @@ static int panfrost_ioctl_get_bo_offset(struct drm_device *dev, void *data,
 	return 0;
 }
 
+int panfrost_unstable_ioctl_check(void)
+{
+	if (!unstable_ioctls)
+		return -ENOSYS;
+
+	return 0;
+}
+
 static int
 panfrost_open(struct drm_device *dev, struct drm_file *file)
 {
@@ -318,6 +330,7 @@ panfrost_postclose(struct drm_device *dev, struct drm_file *file)
 {
 	struct panfrost_file_priv *panfrost_priv = file->driver_priv;
 
+	panfrost_perfcnt_close(panfrost_priv);
 	panfrost_job_close(panfrost_priv);
 
 	kfree(panfrost_priv);
@@ -337,6 +350,8 @@ static const struct drm_ioctl_desc panfrost_drm_driver_ioctls[] = {
 	PANFROST_IOCTL(MMAP_BO,		mmap_bo,	DRM_RENDER_ALLOW),
 	PANFROST_IOCTL(GET_PARAM,	get_param,	DRM_RENDER_ALLOW),
 	PANFROST_IOCTL(GET_BO_OFFSET,	get_bo_offset,	DRM_RENDER_ALLOW),
+	PANFROST_IOCTL(PERFCNT_ENABLE,	perfcnt_enable,	DRM_RENDER_ALLOW),
+	PANFROST_IOCTL(PERFCNT_DUMP,	perfcnt_dump,	DRM_RENDER_ALLOW),
 };
 
 DEFINE_DRM_GEM_SHMEM_FOPS(panfrost_drm_driver_fops);
diff --git a/drivers/gpu/drm/panfrost/panfrost_gem.c b/drivers/gpu/drm/panfrost/panfrost_gem.c
index a5528a360ef4..886875ae31d3 100644
--- a/drivers/gpu/drm/panfrost/panfrost_gem.c
+++ b/drivers/gpu/drm/panfrost/panfrost_gem.c
@@ -52,6 +52,7 @@ struct drm_gem_object *panfrost_gem_create_object(struct drm_device *dev, size_t
 	int ret;
 	struct panfrost_device *pfdev = dev->dev_private;
 	struct panfrost_gem_object *obj;
+	u64 align;
 
 	obj = kzalloc(sizeof(*obj), GFP_KERNEL);
 	if (!obj)
@@ -59,9 +60,12 @@ struct drm_gem_object *panfrost_gem_create_object(struct drm_device *dev, size_t
 
 	obj->base.base.funcs = &panfrost_gem_funcs;
 
+	size = roundup(size, PAGE_SIZE);
+	align = size >= SZ_2M ? SZ_2M >> PAGE_SHIFT : 0;
+
 	spin_lock(&pfdev->mm_lock);
-	ret = drm_mm_insert_node(&pfdev->mm, &obj->node,
-				 roundup(size, PAGE_SIZE) >> PAGE_SHIFT);
+	ret = drm_mm_insert_node_generic(&pfdev->mm, &obj->node,
+					 size >> PAGE_SHIFT, align, 0, 0);
 	spin_unlock(&pfdev->mm_lock);
 	if (ret)
 		goto free_obj;
diff --git a/drivers/gpu/drm/panfrost/panfrost_gpu.c b/drivers/gpu/drm/panfrost/panfrost_gpu.c
index 58ef25573cda..20ab333fc925 100644
--- a/drivers/gpu/drm/panfrost/panfrost_gpu.c
+++ b/drivers/gpu/drm/panfrost/panfrost_gpu.c
@@ -15,11 +15,9 @@
 #include "panfrost_features.h"
 #include "panfrost_issues.h"
 #include "panfrost_gpu.h"
+#include "panfrost_perfcnt.h"
 #include "panfrost_regs.h"
 
-#define gpu_write(dev, reg, data) writel(data, dev->iomem + reg)
-#define gpu_read(dev, reg) readl(dev->iomem + reg)
-
 static irqreturn_t panfrost_gpu_irq_handler(int irq, void *data)
 {
 	struct panfrost_device *pfdev = data;
@@ -43,6 +41,12 @@ static irqreturn_t panfrost_gpu_irq_handler(int irq, void *data)
 		gpu_write(pfdev, GPU_INT_MASK, 0);
 	}
 
+	if (state & GPU_IRQ_PERFCNT_SAMPLE_COMPLETED)
+		panfrost_perfcnt_sample_done(pfdev);
+
+	if (state & GPU_IRQ_CLEAN_CACHES_COMPLETED)
+		panfrost_perfcnt_clean_cache_done(pfdev);
+
 	gpu_write(pfdev, GPU_INT_CLEAR, state);
 
 	return IRQ_HANDLED;
diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c b/drivers/gpu/drm/panfrost/panfrost_job.c
index a5716c8fe8b3..9bb9260d9181 100644
--- a/drivers/gpu/drm/panfrost/panfrost_job.c
+++ b/drivers/gpu/drm/panfrost/panfrost_job.c
@@ -387,7 +387,7 @@ static void panfrost_job_timedout(struct drm_sched_job *sched_job)
 	mutex_lock(&pfdev->reset_lock);
 
 	for (i = 0; i < NUM_JOB_SLOTS; i++)
-		drm_sched_stop(&pfdev->js->queue[i].sched);
+		drm_sched_stop(&pfdev->js->queue[i].sched, sched_job);
 
 	if (sched_job)
 		drm_sched_increase_karma(sched_job);
diff --git a/drivers/gpu/drm/panfrost/panfrost_perfcnt.c b/drivers/gpu/drm/panfrost/panfrost_perfcnt.c
new file mode 100644
index 000000000000..83c57d325ca8
--- /dev/null
+++ b/drivers/gpu/drm/panfrost/panfrost_perfcnt.c
@@ -0,0 +1,329 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright 2019 Collabora Ltd */
+
+#include <drm/drm_file.h>
+#include <drm/drm_gem_shmem_helper.h>
+#include <drm/panfrost_drm.h>
+#include <linux/completion.h>
+#include <linux/iopoll.h>
+#include <linux/pm_runtime.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+
+#include "panfrost_device.h"
+#include "panfrost_features.h"
+#include "panfrost_gem.h"
+#include "panfrost_issues.h"
+#include "panfrost_job.h"
+#include "panfrost_mmu.h"
+#include "panfrost_regs.h"
+
+#define COUNTERS_PER_BLOCK		64
+#define BYTES_PER_COUNTER		4
+#define BLOCKS_PER_COREGROUP		8
+#define V4_SHADERS_PER_COREGROUP	4
+
+struct panfrost_perfcnt {
+	struct panfrost_gem_object *bo;
+	size_t bosize;
+	void *buf;
+	struct panfrost_file_priv *user;
+	struct mutex lock;
+	struct completion dump_comp;
+};
+
+void panfrost_perfcnt_clean_cache_done(struct panfrost_device *pfdev)
+{
+	complete(&pfdev->perfcnt->dump_comp);
+}
+
+void panfrost_perfcnt_sample_done(struct panfrost_device *pfdev)
+{
+	gpu_write(pfdev, GPU_CMD, GPU_CMD_CLEAN_CACHES);
+}
+
+static int panfrost_perfcnt_dump_locked(struct panfrost_device *pfdev)
+{
+	u64 gpuva;
+	int ret;
+
+	reinit_completion(&pfdev->perfcnt->dump_comp);
+	gpuva = pfdev->perfcnt->bo->node.start << PAGE_SHIFT;
+	gpu_write(pfdev, GPU_PERFCNT_BASE_LO, gpuva);
+	gpu_write(pfdev, GPU_PERFCNT_BASE_HI, gpuva >> 32);
+	gpu_write(pfdev, GPU_INT_CLEAR,
+		  GPU_IRQ_CLEAN_CACHES_COMPLETED |
+		  GPU_IRQ_PERFCNT_SAMPLE_COMPLETED);
+	gpu_write(pfdev, GPU_CMD, GPU_CMD_PERFCNT_SAMPLE);
+	ret = wait_for_completion_interruptible_timeout(&pfdev->perfcnt->dump_comp,
+							msecs_to_jiffies(1000));
+	if (!ret)
+		ret = -ETIMEDOUT;
+	else if (ret > 0)
+		ret = 0;
+
+	return ret;
+}
+
+static int panfrost_perfcnt_enable_locked(struct panfrost_device *pfdev,
+					  struct panfrost_file_priv *user,
+					  unsigned int counterset)
+{
+	struct panfrost_perfcnt *perfcnt = pfdev->perfcnt;
+	struct drm_gem_shmem_object *bo;
+	u32 cfg;
+	int ret;
+
+	if (user == perfcnt->user)
+		return 0;
+	else if (perfcnt->user)
+		return -EBUSY;
+
+	ret = pm_runtime_get_sync(pfdev->dev);
+	if (ret < 0)
+		return ret;
+
+	bo = drm_gem_shmem_create(pfdev->ddev, perfcnt->bosize);
+	if (IS_ERR(bo))
+		return PTR_ERR(bo);
+
+	perfcnt->bo = to_panfrost_bo(&bo->base);
+
+	/* Map the perfcnt buf in the address space attached to file_priv. */
+	ret = panfrost_mmu_map(perfcnt->bo);
+	if (ret)
+		goto err_put_bo;
+
+	perfcnt->buf = drm_gem_shmem_vmap(&bo->base);
+	if (IS_ERR(perfcnt->buf)) {
+		ret = PTR_ERR(perfcnt->buf);
+		goto err_put_bo;
+	}
+
+	/*
+	 * Invalidate the cache and clear the counters to start from a fresh
+	 * state.
+	 */
+	reinit_completion(&pfdev->perfcnt->dump_comp);
+	gpu_write(pfdev, GPU_INT_CLEAR,
+		  GPU_IRQ_CLEAN_CACHES_COMPLETED |
+		  GPU_IRQ_PERFCNT_SAMPLE_COMPLETED);
+	gpu_write(pfdev, GPU_CMD, GPU_CMD_PERFCNT_CLEAR);
+	gpu_write(pfdev, GPU_CMD, GPU_CMD_CLEAN_INV_CACHES);
+	ret = wait_for_completion_timeout(&pfdev->perfcnt->dump_comp,
+					  msecs_to_jiffies(1000));
+	if (!ret) {
+		ret = -ETIMEDOUT;
+		goto err_vunmap;
+	}
+
+	perfcnt->user = user;
+
+	/*
+	 * Always use address space 0 for now.
+	 * FIXME: this needs to be updated when we start using different
+	 * address space.
+	 */
+	cfg = GPU_PERFCNT_CFG_AS(0) |
+	      GPU_PERFCNT_CFG_MODE(GPU_PERFCNT_CFG_MODE_MANUAL);
+
+	/*
+	 * Bifrost GPUs have 2 set of counters, but we're only interested by
+	 * the first one for now.
+	 */
+	if (panfrost_model_is_bifrost(pfdev))
+		cfg |= GPU_PERFCNT_CFG_SETSEL(counterset);
+
+	gpu_write(pfdev, GPU_PRFCNT_JM_EN, 0xffffffff);
+	gpu_write(pfdev, GPU_PRFCNT_SHADER_EN, 0xffffffff);
+	gpu_write(pfdev, GPU_PRFCNT_MMU_L2_EN, 0xffffffff);
+
+	/*
+	 * Due to PRLAM-8186 we need to disable the Tiler before we enable HW
+	 * counters.
+	 */
+	if (panfrost_has_hw_issue(pfdev, HW_ISSUE_8186))
+		gpu_write(pfdev, GPU_PRFCNT_TILER_EN, 0);
+	else
+		gpu_write(pfdev, GPU_PRFCNT_TILER_EN, 0xffffffff);
+
+	gpu_write(pfdev, GPU_PERFCNT_CFG, cfg);
+
+	if (panfrost_has_hw_issue(pfdev, HW_ISSUE_8186))
+		gpu_write(pfdev, GPU_PRFCNT_TILER_EN, 0xffffffff);
+
+	return 0;
+
+err_vunmap:
+	drm_gem_shmem_vunmap(&perfcnt->bo->base.base, perfcnt->buf);
+err_put_bo:
+	drm_gem_object_put_unlocked(&bo->base);
+	return ret;
+}
+
+static int panfrost_perfcnt_disable_locked(struct panfrost_device *pfdev,
+					   struct panfrost_file_priv *user)
+{
+	struct panfrost_perfcnt *perfcnt = pfdev->perfcnt;
+
+	if (user != perfcnt->user)
+		return -EINVAL;
+
+	gpu_write(pfdev, GPU_PRFCNT_JM_EN, 0x0);
+	gpu_write(pfdev, GPU_PRFCNT_SHADER_EN, 0x0);
+	gpu_write(pfdev, GPU_PRFCNT_MMU_L2_EN, 0x0);
+	gpu_write(pfdev, GPU_PRFCNT_TILER_EN, 0);
+	gpu_write(pfdev, GPU_PERFCNT_CFG,
+		  GPU_PERFCNT_CFG_MODE(GPU_PERFCNT_CFG_MODE_OFF));
+
+	perfcnt->user = NULL;
+	drm_gem_shmem_vunmap(&perfcnt->bo->base.base, perfcnt->buf);
+	perfcnt->buf = NULL;
+	drm_gem_object_put_unlocked(&perfcnt->bo->base.base);
+	perfcnt->bo = NULL;
+	pm_runtime_mark_last_busy(pfdev->dev);
+	pm_runtime_put_autosuspend(pfdev->dev);
+
+	return 0;
+}
+
+int panfrost_ioctl_perfcnt_enable(struct drm_device *dev, void *data,
+				  struct drm_file *file_priv)
+{
+	struct panfrost_file_priv *pfile = file_priv->driver_priv;
+	struct panfrost_device *pfdev = dev->dev_private;
+	struct panfrost_perfcnt *perfcnt = pfdev->perfcnt;
+	struct drm_panfrost_perfcnt_enable *req = data;
+	int ret;
+
+	ret = panfrost_unstable_ioctl_check();
+	if (ret)
+		return ret;
+
+	/* Only Bifrost GPUs have 2 set of counters. */
+	if (req->counterset > (panfrost_model_is_bifrost(pfdev) ? 1 : 0))
+		return -EINVAL;
+
+	mutex_lock(&perfcnt->lock);
+	if (req->enable)
+		ret = panfrost_perfcnt_enable_locked(pfdev, pfile,
+						     req->counterset);
+	else
+		ret = panfrost_perfcnt_disable_locked(pfdev, pfile);
+	mutex_unlock(&perfcnt->lock);
+
+	return ret;
+}
+
+int panfrost_ioctl_perfcnt_dump(struct drm_device *dev, void *data,
+				struct drm_file *file_priv)
+{
+	struct panfrost_device *pfdev = dev->dev_private;
+	struct panfrost_perfcnt *perfcnt = pfdev->perfcnt;
+	struct drm_panfrost_perfcnt_dump *req = data;
+	void __user *user_ptr = (void __user *)(uintptr_t)req->buf_ptr;
+	int ret;
+
+	ret = panfrost_unstable_ioctl_check();
+	if (ret)
+		return ret;
+
+	mutex_lock(&perfcnt->lock);
+	if (perfcnt->user != file_priv->driver_priv) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	ret = panfrost_perfcnt_dump_locked(pfdev);
+	if (ret)
+		goto out;
+
+	if (copy_to_user(user_ptr, perfcnt->buf, perfcnt->bosize))
+		ret = -EFAULT;
+
+out:
+	mutex_unlock(&perfcnt->lock);
+
+	return ret;
+}
+
+void panfrost_perfcnt_close(struct panfrost_file_priv *pfile)
+{
+	struct panfrost_device *pfdev = pfile->pfdev;
+	struct panfrost_perfcnt *perfcnt = pfdev->perfcnt;
+
+	pm_runtime_get_sync(pfdev->dev);
+	mutex_lock(&perfcnt->lock);
+	if (perfcnt->user == pfile)
+		panfrost_perfcnt_disable_locked(pfdev, pfile);
+	mutex_unlock(&perfcnt->lock);
+	pm_runtime_mark_last_busy(pfdev->dev);
+	pm_runtime_put_autosuspend(pfdev->dev);
+}
+
+int panfrost_perfcnt_init(struct panfrost_device *pfdev)
+{
+	struct panfrost_perfcnt *perfcnt;
+	size_t size;
+
+	if (panfrost_has_hw_feature(pfdev, HW_FEATURE_V4)) {
+		unsigned int ncoregroups;
+
+		ncoregroups = hweight64(pfdev->features.l2_present);
+		size = ncoregroups * BLOCKS_PER_COREGROUP *
+		       COUNTERS_PER_BLOCK * BYTES_PER_COUNTER;
+	} else {
+		unsigned int nl2c, ncores;
+
+		/*
+		 * TODO: define a macro to extract the number of l2 caches from
+		 * mem_features.
+		 */
+		nl2c = ((pfdev->features.mem_features >> 8) & GENMASK(3, 0)) + 1;
+
+		/*
+		 * shader_present might be sparse, but the counters layout
+		 * forces to dump unused regions too, hence the fls64() call
+		 * instead of hweight64().
+		 */
+		ncores = fls64(pfdev->features.shader_present);
+
+		/*
+		 * There's always one JM and one Tiler block, hence the '+ 2'
+		 * here.
+		 */
+		size = (nl2c + ncores + 2) *
+		       COUNTERS_PER_BLOCK * BYTES_PER_COUNTER;
+	}
+
+	perfcnt = devm_kzalloc(pfdev->dev, sizeof(*perfcnt), GFP_KERNEL);
+	if (!perfcnt)
+		return -ENOMEM;
+
+	perfcnt->bosize = size;
+
+	/* Start with everything disabled. */
+	gpu_write(pfdev, GPU_PERFCNT_CFG,
+		  GPU_PERFCNT_CFG_MODE(GPU_PERFCNT_CFG_MODE_OFF));
+	gpu_write(pfdev, GPU_PRFCNT_JM_EN, 0);
+	gpu_write(pfdev, GPU_PRFCNT_SHADER_EN, 0);
+	gpu_write(pfdev, GPU_PRFCNT_MMU_L2_EN, 0);
+	gpu_write(pfdev, GPU_PRFCNT_TILER_EN, 0);
+
+	init_completion(&perfcnt->dump_comp);
+	mutex_init(&perfcnt->lock);
+	pfdev->perfcnt = perfcnt;
+
+	return 0;
+}
+
+void panfrost_perfcnt_fini(struct panfrost_device *pfdev)
+{
+	/* Disable everything before leaving. */
+	gpu_write(pfdev, GPU_PERFCNT_CFG,
+		  GPU_PERFCNT_CFG_MODE(GPU_PERFCNT_CFG_MODE_OFF));
+	gpu_write(pfdev, GPU_PRFCNT_JM_EN, 0);
+	gpu_write(pfdev, GPU_PRFCNT_SHADER_EN, 0);
+	gpu_write(pfdev, GPU_PRFCNT_MMU_L2_EN, 0);
+	gpu_write(pfdev, GPU_PRFCNT_TILER_EN, 0);
+}
diff --git a/drivers/gpu/drm/panfrost/panfrost_perfcnt.h b/drivers/gpu/drm/panfrost/panfrost_perfcnt.h
new file mode 100644
index 000000000000..13b8fdaa1b43
--- /dev/null
+++ b/drivers/gpu/drm/panfrost/panfrost_perfcnt.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright 2019 Collabora Ltd */
+#ifndef __PANFROST_PERFCNT_H__
+#define __PANFROST_PERFCNT_H__
+
+#include "panfrost_device.h"
+
+void panfrost_perfcnt_sample_done(struct panfrost_device *pfdev);
+void panfrost_perfcnt_clean_cache_done(struct panfrost_device *pfdev);
+int panfrost_perfcnt_init(struct panfrost_device *pfdev);
+void panfrost_perfcnt_fini(struct panfrost_device *pfdev);
+void panfrost_perfcnt_close(struct panfrost_file_priv *pfile);
+int panfrost_ioctl_perfcnt_enable(struct drm_device *dev, void *data,
+				  struct drm_file *file_priv);
+int panfrost_ioctl_perfcnt_dump(struct drm_device *dev, void *data,
+				struct drm_file *file_priv);
+
+#endif
diff --git a/drivers/gpu/drm/panfrost/panfrost_regs.h b/drivers/gpu/drm/panfrost/panfrost_regs.h
index 578c5fc2188b..ea38ac60581c 100644
--- a/drivers/gpu/drm/panfrost/panfrost_regs.h
+++ b/drivers/gpu/drm/panfrost/panfrost_regs.h
@@ -44,12 +44,31 @@
 	 GPU_IRQ_MULTIPLE_FAULT)
 #define GPU_CMD				0x30
 #define   GPU_CMD_SOFT_RESET		0x01
+#define   GPU_CMD_PERFCNT_CLEAR		0x03
+#define   GPU_CMD_PERFCNT_SAMPLE	0x04
+#define   GPU_CMD_CLEAN_CACHES		0x07
+#define   GPU_CMD_CLEAN_INV_CACHES	0x08
 #define GPU_STATUS			0x34
+#define   GPU_STATUS_PRFCNT_ACTIVE	BIT(2)
 #define GPU_LATEST_FLUSH_ID		0x38
 #define GPU_FAULT_STATUS		0x3C
 #define GPU_FAULT_ADDRESS_LO		0x40
 #define GPU_FAULT_ADDRESS_HI		0x44
 
+#define GPU_PERFCNT_BASE_LO		0x60
+#define GPU_PERFCNT_BASE_HI		0x64
+#define GPU_PERFCNT_CFG			0x68
+#define   GPU_PERFCNT_CFG_MODE(x)	(x)
+#define   GPU_PERFCNT_CFG_MODE_OFF	0
+#define   GPU_PERFCNT_CFG_MODE_MANUAL	1
+#define   GPU_PERFCNT_CFG_MODE_TILE	2
+#define   GPU_PERFCNT_CFG_AS(x)		((x) << 4)
+#define   GPU_PERFCNT_CFG_SETSEL(x)	((x) << 8)
+#define GPU_PRFCNT_JM_EN		0x6c
+#define GPU_PRFCNT_SHADER_EN		0x70
+#define GPU_PRFCNT_TILER_EN		0x74
+#define GPU_PRFCNT_MMU_L2_EN		0x7c
+
 #define GPU_THREAD_MAX_THREADS		0x0A0	/* (RO) Maximum number of threads per core */
 #define GPU_THREAD_MAX_WORKGROUP_SIZE	0x0A4	/* (RO) Maximum workgroup size */
 #define GPU_THREAD_MAX_BARRIER_SIZE	0x0A8	/* (RO) Maximum threads waiting at a barrier */
@@ -295,4 +314,7 @@
 #define AS_FAULTSTATUS_ACCESS_TYPE_READ		(0x2 << 8)
 #define AS_FAULTSTATUS_ACCESS_TYPE_WRITE	(0x3 << 8)
 
+#define gpu_write(dev, reg, data) writel(data, dev->iomem + reg)
+#define gpu_read(dev, reg) readl(dev->iomem + reg)
+
 #endif
diff --git a/drivers/gpu/drm/qxl/qxl_release.c b/drivers/gpu/drm/qxl/qxl_release.c
index 30f85f0130cb..49f9a9385393 100644
--- a/drivers/gpu/drm/qxl/qxl_release.c
+++ b/drivers/gpu/drm/qxl/qxl_release.c
@@ -256,7 +256,7 @@ int qxl_release_reserve_list(struct qxl_release *release, bool no_intr)
 		return 0;
 
 	ret = ttm_eu_reserve_buffers(&release->ticket, &release->bos,
-				     !no_intr, NULL);
+				     !no_intr, NULL, true);
 	if (ret)
 		return ret;
 
diff --git a/drivers/gpu/drm/r128/r128_cce.c b/drivers/gpu/drm/r128/r128_cce.c
index b91af1bf531b..138af32480d4 100644
--- a/drivers/gpu/drm/r128/r128_cce.c
+++ b/drivers/gpu/drm/r128/r128_cce.c
@@ -29,13 +29,21 @@
  *    Gareth Hughes <gareth@valinux.com>
  */
 
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
 #include <linux/firmware.h>
+#include <linux/module.h>
 #include <linux/platform_device.h>
 #include <linux/slab.h>
-#include <linux/module.h>
+#include <linux/uaccess.h>
 
-#include <drm/drmP.h>
+#include <drm/drm_agpsupport.h>
+#include <drm/drm_device.h>
+#include <drm/drm_file.h>
+#include <drm/drm_irq.h>
+#include <drm/drm_print.h>
 #include <drm/r128_drm.h>
+
 #include "r128_drv.h"
 
 #define R128_FIFO_DEBUG		0
@@ -85,7 +93,7 @@ static int r128_do_pixcache_flush(drm_r128_private_t *dev_priv)
 	for (i = 0; i < dev_priv->usec_timeout; i++) {
 		if (!(R128_READ(R128_PC_NGUI_CTLSTAT) & R128_PC_BUSY))
 			return 0;
-		DRM_UDELAY(1);
+		udelay(1);
 	}
 
 #if R128_FIFO_DEBUG
@@ -102,7 +110,7 @@ static int r128_do_wait_for_fifo(drm_r128_private_t *dev_priv, int entries)
 		int slots = R128_READ(R128_GUI_STAT) & R128_GUI_FIFOCNT_MASK;
 		if (slots >= entries)
 			return 0;
-		DRM_UDELAY(1);
+		udelay(1);
 	}
 
 #if R128_FIFO_DEBUG
@@ -124,7 +132,7 @@ static int r128_do_wait_for_idle(drm_r128_private_t *dev_priv)
 			r128_do_pixcache_flush(dev_priv);
 			return 0;
 		}
-		DRM_UDELAY(1);
+		udelay(1);
 	}
 
 #if R128_FIFO_DEBUG
@@ -211,7 +219,7 @@ int r128_do_cce_idle(drm_r128_private_t *dev_priv)
 				return r128_do_pixcache_flush(dev_priv);
 			}
 		}
-		DRM_UDELAY(1);
+		udelay(1);
 	}
 
 #if R128_FIFO_DEBUG
@@ -838,7 +846,7 @@ static struct drm_buf *r128_freelist_get(struct drm_device * dev)
 				return buf;
 			}
 		}
-		DRM_UDELAY(1);
+		udelay(1);
 	}
 
 	DRM_DEBUG("returning NULL!\n");
@@ -870,7 +878,7 @@ int r128_wait_ring(drm_r128_private_t *dev_priv, int n)
 		r128_update_ring_snapshot(dev_priv);
 		if (ring->space >= n)
 			return 0;
-		DRM_UDELAY(1);
+		udelay(1);
 	}
 
 	/* FIXME: This is being ignored... */
@@ -916,7 +924,7 @@ int r128_cce_buffers(struct drm_device *dev, void *data, struct drm_file *file_p
 	 */
 	if (d->send_count != 0) {
 		DRM_ERROR("Process %d trying to send %d buffers via drmDMA\n",
-			  DRM_CURRENTPID, d->send_count);
+			  task_pid_nr(current), d->send_count);
 		return -EINVAL;
 	}
 
@@ -924,7 +932,7 @@ int r128_cce_buffers(struct drm_device *dev, void *data, struct drm_file *file_p
 	 */
 	if (d->request_count < 0 || d->request_count > dma->buf_count) {
 		DRM_ERROR("Process %d trying to get %d buffers (of %d max)\n",
-			  DRM_CURRENTPID, d->request_count, dma->buf_count);
+			  task_pid_nr(current), d->request_count, dma->buf_count);
 		return -EINVAL;
 	}
 
diff --git a/drivers/gpu/drm/r128/r128_drv.c b/drivers/gpu/drm/r128/r128_drv.c
index 4b1a505ab353..fd74f744604f 100644
--- a/drivers/gpu/drm/r128/r128_drv.c
+++ b/drivers/gpu/drm/r128/r128_drv.c
@@ -31,11 +31,14 @@
 
 #include <linux/module.h>
 
-#include <drm/drmP.h>
+#include <drm/drm_drv.h>
+#include <drm/drm_file.h>
+#include <drm/drm_pci.h>
+#include <drm/drm_pciids.h>
+#include <drm/drm_vblank.h>
 #include <drm/r128_drm.h>
-#include "r128_drv.h"
 
-#include <drm/drm_pciids.h>
+#include "r128_drv.h"
 
 static struct pci_device_id pciidlist[] = {
 	r128_PCI_IDS
diff --git a/drivers/gpu/drm/r128/r128_drv.h b/drivers/gpu/drm/r128/r128_drv.h
index 2de40d276116..ba8c30ed91d1 100644
--- a/drivers/gpu/drm/r128/r128_drv.h
+++ b/drivers/gpu/drm/r128/r128_drv.h
@@ -35,8 +35,14 @@
 #ifndef __R128_DRV_H__
 #define __R128_DRV_H__
 
+#include <linux/delay.h>
+#include <linux/io.h>
+#include <linux/irqreturn.h>
+
 #include <drm/ati_pcigart.h>
+#include <drm/drm_ioctl.h>
 #include <drm/drm_legacy.h>
+#include <drm/r128_drm.h>
 
 /* General customization:
  */
@@ -397,10 +403,10 @@ extern long r128_compat_ioctl(struct file *filp, unsigned int cmd,
 
 #define R128_PCIGART_TABLE_SIZE         32768
 
-#define R128_READ(reg)		DRM_READ32(dev_priv->mmio, (reg))
-#define R128_WRITE(reg, val)	DRM_WRITE32(dev_priv->mmio, (reg), (val))
-#define R128_READ8(reg)		DRM_READ8(dev_priv->mmio, (reg))
-#define R128_WRITE8(reg, val)	DRM_WRITE8(dev_priv->mmio, (reg), (val))
+#define R128_READ(reg)		readl(((void __iomem *)dev_priv->mmio->handle) + (reg))
+#define R128_WRITE(reg, val)	writel(val, ((void __iomem *)dev_priv->mmio->handle) + (reg))
+#define R128_READ8(reg)		readb(((void __iomem *)dev_priv->mmio->handle) + (reg))
+#define R128_WRITE8(reg, val)	writeb(val, ((void __iomem *)dev_priv->mmio->handle) + (reg))
 
 #define R128_WRITE_PLL(addr, val)					\
 do {									\
@@ -445,7 +451,7 @@ do {									\
 			r128_update_ring_snapshot(dev_priv);		\
 			if (ring->space >= ring->high_mark)		\
 				goto __ring_space_done;			\
-			DRM_UDELAY(1);					\
+			udelay(1);					\
 		}							\
 		DRM_ERROR("ring space check failed!\n");		\
 		return -EBUSY;						\
diff --git a/drivers/gpu/drm/r128/r128_state.c b/drivers/gpu/drm/r128/r128_state.c
index b9bfa806d346..9d74c9d914cb 100644
--- a/drivers/gpu/drm/r128/r128_state.c
+++ b/drivers/gpu/drm/r128/r128_state.c
@@ -28,8 +28,15 @@
  *    Gareth Hughes <gareth@valinux.com>
  */
 
-#include <drm/drmP.h>
+#include <linux/pci.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+
+#include <drm/drm_device.h>
+#include <drm/drm_file.h>
+#include <drm/drm_print.h>
 #include <drm/r128_drm.h>
+
 #include "r128_drv.h"
 
 /* ================================================================
@@ -824,7 +831,7 @@ static int r128_cce_dispatch_blit(struct drm_device *dev,
 
 	if (buf->file_priv != file_priv) {
 		DRM_ERROR("process %d using buffer owned by %p\n",
-			  DRM_CURRENTPID, buf->file_priv);
+			  task_pid_nr(current), buf->file_priv);
 		return -EINVAL;
 	}
 	if (buf->pending) {
@@ -1317,7 +1324,7 @@ static int r128_cce_vertex(struct drm_device *dev, void *data, struct drm_file *
 	DEV_INIT_TEST_WITH_RETURN(dev_priv);
 
 	DRM_DEBUG("pid=%d index=%d count=%d discard=%d\n",
-		  DRM_CURRENTPID, vertex->idx, vertex->count, vertex->discard);
+		  task_pid_nr(current), vertex->idx, vertex->count, vertex->discard);
 
 	if (vertex->idx < 0 || vertex->idx >= dma->buf_count) {
 		DRM_ERROR("buffer index %d (of %d max)\n",
@@ -1338,7 +1345,7 @@ static int r128_cce_vertex(struct drm_device *dev, void *data, struct drm_file *
 
 	if (buf->file_priv != file_priv) {
 		DRM_ERROR("process %d using buffer owned by %p\n",
-			  DRM_CURRENTPID, buf->file_priv);
+			  task_pid_nr(current), buf->file_priv);
 		return -EINVAL;
 	}
 	if (buf->pending) {
@@ -1369,7 +1376,7 @@ static int r128_cce_indices(struct drm_device *dev, void *data, struct drm_file
 
 	DEV_INIT_TEST_WITH_RETURN(dev_priv);
 
-	DRM_DEBUG("pid=%d buf=%d s=%d e=%d d=%d\n", DRM_CURRENTPID,
+	DRM_DEBUG("pid=%d buf=%d s=%d e=%d d=%d\n", task_pid_nr(current),
 		  elts->idx, elts->start, elts->end, elts->discard);
 
 	if (elts->idx < 0 || elts->idx >= dma->buf_count) {
@@ -1391,7 +1398,7 @@ static int r128_cce_indices(struct drm_device *dev, void *data, struct drm_file
 
 	if (buf->file_priv != file_priv) {
 		DRM_ERROR("process %d using buffer owned by %p\n",
-			  DRM_CURRENTPID, buf->file_priv);
+			  task_pid_nr(current), buf->file_priv);
 		return -EINVAL;
 	}
 	if (buf->pending) {
@@ -1432,7 +1439,7 @@ static int r128_cce_blit(struct drm_device *dev, void *data, struct drm_file *fi
 
 	DEV_INIT_TEST_WITH_RETURN(dev_priv);
 
-	DRM_DEBUG("pid=%d index=%d\n", DRM_CURRENTPID, blit->idx);
+	DRM_DEBUG("pid=%d index=%d\n", task_pid_nr(current), blit->idx);
 
 	if (blit->idx < 0 || blit->idx >= dma->buf_count) {
 		DRM_ERROR("buffer index %d (of %d max)\n",
@@ -1532,7 +1539,7 @@ static int r128_cce_indirect(struct drm_device *dev, void *data, struct drm_file
 
 	if (buf->file_priv != file_priv) {
 		DRM_ERROR("process %d using buffer owned by %p\n",
-			  DRM_CURRENTPID, buf->file_priv);
+			  task_pid_nr(current), buf->file_priv);
 		return -EINVAL;
 	}
 	if (buf->pending) {
@@ -1579,7 +1586,7 @@ int r128_getparam(struct drm_device *dev, void *data, struct drm_file *file_priv
 
 	DEV_INIT_TEST_WITH_RETURN(dev_priv);
 
-	DRM_DEBUG("pid=%d\n", DRM_CURRENTPID);
+	DRM_DEBUG("pid=%d\n", task_pid_nr(current));
 
 	switch (param->param) {
 	case R128_PARAM_IRQ_NR:
diff --git a/drivers/gpu/drm/radeon/atom.c b/drivers/gpu/drm/radeon/atom.c
index ac98ad561870..2c27627b6659 100644
--- a/drivers/gpu/drm/radeon/atom.c
+++ b/drivers/gpu/drm/radeon/atom.c
@@ -25,8 +25,10 @@
 #include <linux/module.h>
 #include <linux/sched.h>
 #include <linux/slab.h>
+
 #include <asm/unaligned.h>
 
+#include <drm/drm_device.h>
 #include <drm/drm_util.h>
 
 #define ATOM_DEBUG
diff --git a/drivers/gpu/drm/radeon/atom.h b/drivers/gpu/drm/radeon/atom.h
index 6d014ddb6b78..364b895e7ebb 100644
--- a/drivers/gpu/drm/radeon/atom.h
+++ b/drivers/gpu/drm/radeon/atom.h
@@ -26,7 +26,6 @@
 #define ATOM_H
 
 #include <linux/types.h>
-#include <drm/drmP.h>
 
 #define ATOM_BIOS_MAGIC		0xAA55
 #define ATOM_ATI_MAGIC_PTR	0x30
diff --git a/drivers/gpu/drm/radeon/atombios_crtc.c b/drivers/gpu/drm/radeon/atombios_crtc.c
index d75ae17ff3ad..da2c9e295408 100644
--- a/drivers/gpu/drm/radeon/atombios_crtc.c
+++ b/drivers/gpu/drm/radeon/atombios_crtc.c
@@ -23,11 +23,14 @@
  * Authors: Dave Airlie
  *          Alex Deucher
  */
-#include <drm/drmP.h>
+
 #include <drm/drm_crtc_helper.h>
 #include <drm/drm_fb_helper.h>
-#include <drm/radeon_drm.h>
 #include <drm/drm_fixed.h>
+#include <drm/drm_fourcc.h>
+#include <drm/drm_vblank.h>
+#include <drm/radeon_drm.h>
+
 #include "radeon.h"
 #include "atom.h"
 #include "atom-bits.h"
diff --git a/drivers/gpu/drm/radeon/atombios_dp.c b/drivers/gpu/drm/radeon/atombios_dp.c
index 3e798593e042..6f38375c77c8 100644
--- a/drivers/gpu/drm/radeon/atombios_dp.c
+++ b/drivers/gpu/drm/radeon/atombios_dp.c
@@ -24,7 +24,7 @@
  *          Alex Deucher
  *          Jerome Glisse
  */
-#include <drm/drmP.h>
+
 #include <drm/radeon_drm.h>
 #include "radeon.h"
 
diff --git a/drivers/gpu/drm/radeon/atombios_encoders.c b/drivers/gpu/drm/radeon/atombios_encoders.c
index e67ed383e11b..cc8f32a1b03c 100644
--- a/drivers/gpu/drm/radeon/atombios_encoders.c
+++ b/drivers/gpu/drm/radeon/atombios_encoders.c
@@ -23,15 +23,19 @@
  * Authors: Dave Airlie
  *          Alex Deucher
  */
-#include <drm/drmP.h>
+
+#include <linux/backlight.h>
+#include <linux/dmi.h>
+
 #include <drm/drm_crtc_helper.h>
+#include <drm/drm_file.h>
+#include <drm/drm_pci.h>
 #include <drm/radeon_drm.h>
+
+#include "atom.h"
 #include "radeon.h"
-#include "radeon_audio.h"
 #include "radeon_asic.h"
-#include "atom.h"
-#include <linux/backlight.h>
-#include <linux/dmi.h>
+#include "radeon_audio.h"
 
 extern int atom_debug;
 
diff --git a/drivers/gpu/drm/radeon/atombios_i2c.c b/drivers/gpu/drm/radeon/atombios_i2c.c
index 9022e9af11a0..a570ce40af19 100644
--- a/drivers/gpu/drm/radeon/atombios_i2c.c
+++ b/drivers/gpu/drm/radeon/atombios_i2c.c
@@ -22,7 +22,7 @@
  * Authors: Alex Deucher
  *
  */
-#include <drm/drmP.h>
+
 #include <drm/radeon_drm.h>
 #include "radeon.h"
 #include "atom.h"
diff --git a/drivers/gpu/drm/radeon/btc_dpm.c b/drivers/gpu/drm/radeon/btc_dpm.c
index 0aef4937c901..ce37de020b91 100644
--- a/drivers/gpu/drm/radeon/btc_dpm.c
+++ b/drivers/gpu/drm/radeon/btc_dpm.c
@@ -22,15 +22,17 @@
  * Authors: Alex Deucher
  */
 
-#include <drm/drmP.h>
-#include "radeon.h"
-#include "radeon_asic.h"
+#include <linux/seq_file.h>
+
+#include <drm/drm_pci.h>
+
+#include "atom.h"
+#include "btc_dpm.h"
 #include "btcd.h"
-#include "r600_dpm.h"
 #include "cypress_dpm.h"
-#include "btc_dpm.h"
-#include "atom.h"
-#include <linux/seq_file.h>
+#include "r600_dpm.h"
+#include "radeon.h"
+#include "radeon_asic.h"
 
 #define MC_CG_ARB_FREQ_F0           0x0a
 #define MC_CG_ARB_FREQ_F1           0x0b
diff --git a/drivers/gpu/drm/radeon/btc_dpm.h b/drivers/gpu/drm/radeon/btc_dpm.h
index 3b6f12b7760b..ec4cbb4aa77c 100644
--- a/drivers/gpu/drm/radeon/btc_dpm.h
+++ b/drivers/gpu/drm/radeon/btc_dpm.h
@@ -23,6 +23,9 @@
 #ifndef __BTC_DPM_H__
 #define __BTC_DPM_H__
 
+#include "radeon.h"
+#include "rv770_dpm.h"
+
 #define BTC_RLP_UVD_DFLT                              20
 #define BTC_RMP_UVD_DFLT                              50
 #define BTC_LHP_UVD_DFLT                              50
diff --git a/drivers/gpu/drm/radeon/ci_dpm.c b/drivers/gpu/drm/radeon/ci_dpm.c
index a12439266bb0..c6fd123f60b5 100644
--- a/drivers/gpu/drm/radeon/ci_dpm.c
+++ b/drivers/gpu/drm/radeon/ci_dpm.c
@@ -22,15 +22,17 @@
  */
 
 #include <linux/firmware.h>
-#include <drm/drmP.h>
+#include <linux/seq_file.h>
+
+#include <drm/drm_pci.h>
+
+#include "atom.h"
+#include "ci_dpm.h"
+#include "cikd.h"
+#include "r600_dpm.h"
 #include "radeon.h"
 #include "radeon_asic.h"
 #include "radeon_ucode.h"
-#include "cikd.h"
-#include "r600_dpm.h"
-#include "ci_dpm.h"
-#include "atom.h"
-#include <linux/seq_file.h>
 
 #define MC_CG_ARB_FREQ_F0           0x0a
 #define MC_CG_ARB_FREQ_F1           0x0b
diff --git a/drivers/gpu/drm/radeon/ci_dpm.h b/drivers/gpu/drm/radeon/ci_dpm.h
index dff2a63df38f..ac12db5f2cf7 100644
--- a/drivers/gpu/drm/radeon/ci_dpm.h
+++ b/drivers/gpu/drm/radeon/ci_dpm.h
@@ -24,6 +24,7 @@
 #define __CI_DPM_H__
 
 #include "ppsmc.h"
+#include "radeon.h"
 
 #define SMU__NUM_SCLK_DPM_STATE  8
 #define SMU__NUM_MCLK_DPM_LEVELS 6
diff --git a/drivers/gpu/drm/radeon/ci_smc.c b/drivers/gpu/drm/radeon/ci_smc.c
index 371121913756..f4a1ba567f21 100644
--- a/drivers/gpu/drm/radeon/ci_smc.c
+++ b/drivers/gpu/drm/radeon/ci_smc.c
@@ -23,7 +23,7 @@
  */
 
 #include <linux/firmware.h>
-#include <drm/drmP.h>
+
 #include "radeon.h"
 #include "cikd.h"
 #include "ppsmc.h"
diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c
index ab7b4e2ffcd2..40f4d29edfe2 100644
--- a/drivers/gpu/drm/radeon/cik.c
+++ b/drivers/gpu/drm/radeon/cik.c
@@ -21,18 +21,22 @@
  *
  * Authors: Alex Deucher
  */
+
 #include <linux/firmware.h>
 #include <linux/slab.h>
 #include <linux/module.h>
-#include <drm/drmP.h>
+
+#include <drm/drm_pci.h>
+#include <drm/drm_vblank.h>
+
+#include "atom.h"
+#include "cik_blit_shaders.h"
+#include "cikd.h"
+#include "clearstate_ci.h"
 #include "radeon.h"
 #include "radeon_asic.h"
 #include "radeon_audio.h"
-#include "cikd.h"
-#include "atom.h"
-#include "cik_blit_shaders.h"
 #include "radeon_ucode.h"
-#include "clearstate_ci.h"
 
 #define SH_MEM_CONFIG_GFX_DEFAULT \
 	ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED)
@@ -3480,7 +3484,7 @@ int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
 		tmp = RREG32(scratch);
 		if (tmp == 0xDEADBEEF)
 			break;
-		DRM_UDELAY(1);
+		udelay(1);
 	}
 	if (i < rdev->usec_timeout) {
 		DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
@@ -3825,7 +3829,7 @@ int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
 		tmp = RREG32(scratch);
 		if (tmp == 0xDEADBEEF)
 			break;
-		DRM_UDELAY(1);
+		udelay(1);
 	}
 	if (i < rdev->usec_timeout) {
 		DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
diff --git a/drivers/gpu/drm/radeon/cik_sdma.c b/drivers/gpu/drm/radeon/cik_sdma.c
index 9c351dc8a9e0..589217a7e435 100644
--- a/drivers/gpu/drm/radeon/cik_sdma.c
+++ b/drivers/gpu/drm/radeon/cik_sdma.c
@@ -22,7 +22,7 @@
  * Authors: Alex Deucher
  */
 #include <linux/firmware.h>
-#include <drm/drmP.h>
+
 #include "radeon.h"
 #include "radeon_ucode.h"
 #include "radeon_asic.h"
@@ -677,7 +677,7 @@ int cik_sdma_ring_test(struct radeon_device *rdev,
 		tmp = le32_to_cpu(rdev->wb.wb[index/4]);
 		if (tmp == 0xDEADBEEF)
 			break;
-		DRM_UDELAY(1);
+		udelay(1);
 	}
 
 	if (i < rdev->usec_timeout) {
@@ -751,7 +751,7 @@ int cik_sdma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
 		tmp = le32_to_cpu(rdev->wb.wb[index/4]);
 		if (tmp == 0xDEADBEEF)
 			break;
-		DRM_UDELAY(1);
+		udelay(1);
 	}
 	if (i < rdev->usec_timeout) {
 		DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
diff --git a/drivers/gpu/drm/radeon/clearstate_cayman.h b/drivers/gpu/drm/radeon/clearstate_cayman.h
index e48a14037b76..4774e04c4da6 100644
--- a/drivers/gpu/drm/radeon/clearstate_cayman.h
+++ b/drivers/gpu/drm/radeon/clearstate_cayman.h
@@ -21,6 +21,8 @@
  *
  */
 
+#include "clearstate_defs.h"
+
 static const u32 SECT_CONTEXT_def_1[] =
 {
     0x00000000, // DB_RENDER_CONTROL
diff --git a/drivers/gpu/drm/radeon/clearstate_ci.h b/drivers/gpu/drm/radeon/clearstate_ci.h
index f55d06664e31..c1b6c22dbed7 100644
--- a/drivers/gpu/drm/radeon/clearstate_ci.h
+++ b/drivers/gpu/drm/radeon/clearstate_ci.h
@@ -21,6 +21,8 @@
  *
  */
 
+#include "clearstate_defs.h"
+
 static const unsigned int ci_SECT_CONTEXT_def_1[] =
 {
     0x00000000, // DB_RENDER_CONTROL
diff --git a/drivers/gpu/drm/radeon/clearstate_si.h b/drivers/gpu/drm/radeon/clearstate_si.h
index 66e39cdb5cb0..356219c6c7f2 100644
--- a/drivers/gpu/drm/radeon/clearstate_si.h
+++ b/drivers/gpu/drm/radeon/clearstate_si.h
@@ -21,6 +21,8 @@
  *
  */
 
+#include "clearstate_defs.h"
+
 static const u32 si_SECT_CONTEXT_def_1[] =
 {
     0x00000000, // DB_RENDER_CONTROL
diff --git a/drivers/gpu/drm/radeon/cypress_dpm.c b/drivers/gpu/drm/radeon/cypress_dpm.c
index 3eb7899a4035..32ed60f1048b 100644
--- a/drivers/gpu/drm/radeon/cypress_dpm.c
+++ b/drivers/gpu/drm/radeon/cypress_dpm.c
@@ -22,13 +22,14 @@
  * Authors: Alex Deucher
  */
 
-#include <drm/drmP.h>
-#include "radeon.h"
-#include "radeon_asic.h"
+#include <drm/drm_pci.h>
+
+#include "atom.h"
+#include "cypress_dpm.h"
 #include "evergreend.h"
 #include "r600_dpm.h"
-#include "cypress_dpm.h"
-#include "atom.h"
+#include "radeon.h"
+#include "radeon_asic.h"
 
 #define SMC_RAM_END 0x8000
 
diff --git a/drivers/gpu/drm/radeon/dce3_1_afmt.c b/drivers/gpu/drm/radeon/dce3_1_afmt.c
index cfa3a84a2af0..e8fe239b9d79 100644
--- a/drivers/gpu/drm/radeon/dce3_1_afmt.c
+++ b/drivers/gpu/drm/radeon/dce3_1_afmt.c
@@ -21,7 +21,7 @@
  * OTHER DEALINGS IN THE SOFTWARE.
  */
 #include <linux/hdmi.h>
-#include <drm/drmP.h>
+
 #include "radeon.h"
 #include "radeon_asic.h"
 #include "radeon_audio.h"
diff --git a/drivers/gpu/drm/radeon/dce6_afmt.c b/drivers/gpu/drm/radeon/dce6_afmt.c
index 367a916f364e..eec5d7a62738 100644
--- a/drivers/gpu/drm/radeon/dce6_afmt.c
+++ b/drivers/gpu/drm/radeon/dce6_afmt.c
@@ -21,7 +21,7 @@
  *
  */
 #include <linux/hdmi.h>
-#include <drm/drmP.h>
+
 #include "radeon.h"
 #include "radeon_audio.h"
 #include "sid.h"
diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c
index 5712d63dca20..1d978a3d9c82 100644
--- a/drivers/gpu/drm/radeon/evergreen.c
+++ b/drivers/gpu/drm/radeon/evergreen.c
@@ -21,18 +21,22 @@
  *
  * Authors: Alex Deucher
  */
+
 #include <linux/firmware.h>
 #include <linux/slab.h>
-#include <drm/drmP.h>
-#include "radeon.h"
-#include "radeon_asic.h"
-#include "radeon_audio.h"
+
+#include <drm/drm_pci.h>
+#include <drm/drm_vblank.h>
 #include <drm/radeon_drm.h>
-#include "evergreend.h"
+
 #include "atom.h"
 #include "avivod.h"
-#include "evergreen_reg.h"
 #include "evergreen_blit_shaders.h"
+#include "evergreen_reg.h"
+#include "evergreend.h"
+#include "radeon.h"
+#include "radeon_asic.h"
+#include "radeon_audio.h"
 #include "radeon_ucode.h"
 
 #define DC_HPDx_CONTROL(x)        (DC_HPD1_CONTROL     + (x * 0xc))
diff --git a/drivers/gpu/drm/radeon/evergreen_cs.c b/drivers/gpu/drm/radeon/evergreen_cs.c
index 1e14c6921454..c410cad28f19 100644
--- a/drivers/gpu/drm/radeon/evergreen_cs.c
+++ b/drivers/gpu/drm/radeon/evergreen_cs.c
@@ -25,7 +25,7 @@
  *          Alex Deucher
  *          Jerome Glisse
  */
-#include <drm/drmP.h>
+
 #include "radeon.h"
 #include "radeon_asic.h"
 #include "evergreend.h"
diff --git a/drivers/gpu/drm/radeon/evergreen_dma.c b/drivers/gpu/drm/radeon/evergreen_dma.c
index 96535aa8659c..5505a04ca402 100644
--- a/drivers/gpu/drm/radeon/evergreen_dma.c
+++ b/drivers/gpu/drm/radeon/evergreen_dma.c
@@ -21,7 +21,7 @@
  *
  * Authors: Alex Deucher
  */
-#include <drm/drmP.h>
+
 #include "radeon.h"
 #include "radeon_asic.h"
 #include "evergreend.h"
diff --git a/drivers/gpu/drm/radeon/evergreen_hdmi.c b/drivers/gpu/drm/radeon/evergreen_hdmi.c
index f766c967a284..739336a48d08 100644
--- a/drivers/gpu/drm/radeon/evergreen_hdmi.c
+++ b/drivers/gpu/drm/radeon/evergreen_hdmi.c
@@ -25,7 +25,7 @@
  *          Rafał Miłecki
  */
 #include <linux/hdmi.h>
-#include <drm/drmP.h>
+
 #include <drm/radeon_drm.h>
 #include "radeon.h"
 #include "radeon_asic.h"
diff --git a/drivers/gpu/drm/radeon/kv_dpm.c b/drivers/gpu/drm/radeon/kv_dpm.c
index f055d6ea3522..0d8d30b78f95 100644
--- a/drivers/gpu/drm/radeon/kv_dpm.c
+++ b/drivers/gpu/drm/radeon/kv_dpm.c
@@ -21,13 +21,15 @@
  *
  */
 
-#include <drm/drmP.h>
-#include "radeon.h"
+#include <linux/seq_file.h>
+
+#include <drm/drm_pci.h>
+
 #include "cikd.h"
-#include "r600_dpm.h"
 #include "kv_dpm.h"
+#include "r600_dpm.h"
+#include "radeon.h"
 #include "radeon_asic.h"
-#include <linux/seq_file.h>
 
 #define KV_MAX_DEEPSLEEP_DIVIDER_ID     5
 #define KV_MINIMUM_ENGINE_CLOCK         800
diff --git a/drivers/gpu/drm/radeon/kv_smc.c b/drivers/gpu/drm/radeon/kv_smc.c
index af60bd32a287..c0a59527e7b8 100644
--- a/drivers/gpu/drm/radeon/kv_smc.c
+++ b/drivers/gpu/drm/radeon/kv_smc.c
@@ -22,7 +22,6 @@
  * Authors: Alex Deucher
  */
 
-#include <drm/drmP.h>
 #include "radeon.h"
 #include "cikd.h"
 #include "kv_dpm.h"
diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c
index 381b0255ff02..410f626a39d4 100644
--- a/drivers/gpu/drm/radeon/ni.c
+++ b/drivers/gpu/drm/radeon/ni.c
@@ -21,20 +21,23 @@
  *
  * Authors: Alex Deucher
  */
+
 #include <linux/firmware.h>
 #include <linux/slab.h>
 #include <linux/module.h>
-#include <drm/drmP.h>
-#include "radeon.h"
-#include "radeon_asic.h"
-#include "radeon_audio.h"
+
+#include <drm/drm_pci.h>
 #include <drm/radeon_drm.h>
-#include "nid.h"
+
 #include "atom.h"
-#include "ni_reg.h"
 #include "cayman_blit_shaders.h"
-#include "radeon_ucode.h"
 #include "clearstate_cayman.h"
+#include "ni_reg.h"
+#include "nid.h"
+#include "radeon.h"
+#include "radeon_asic.h"
+#include "radeon_audio.h"
+#include "radeon_ucode.h"
 
 /*
  * Indirect registers accessor
diff --git a/drivers/gpu/drm/radeon/ni_dma.c b/drivers/gpu/drm/radeon/ni_dma.c
index ce787a9f12c0..c56136848360 100644
--- a/drivers/gpu/drm/radeon/ni_dma.c
+++ b/drivers/gpu/drm/radeon/ni_dma.c
@@ -21,7 +21,7 @@
  *
  * Authors: Alex Deucher
  */
-#include <drm/drmP.h>
+
 #include "radeon.h"
 #include "radeon_asic.h"
 #include "radeon_trace.h"
diff --git a/drivers/gpu/drm/radeon/ni_dpm.c b/drivers/gpu/drm/radeon/ni_dpm.c
index 0fd8d6ba9828..d9e62ca65ab8 100644
--- a/drivers/gpu/drm/radeon/ni_dpm.c
+++ b/drivers/gpu/drm/radeon/ni_dpm.c
@@ -21,16 +21,18 @@
  *
  */
 
-#include <drm/drmP.h>
-#include "radeon.h"
-#include "radeon_asic.h"
-#include "nid.h"
-#include "r600_dpm.h"
-#include "ni_dpm.h"
-#include "atom.h"
 #include <linux/math64.h>
 #include <linux/seq_file.h>
 
+#include <drm/drm_pci.h>
+
+#include "atom.h"
+#include "ni_dpm.h"
+#include "nid.h"
+#include "r600_dpm.h"
+#include "radeon.h"
+#include "radeon_asic.h"
+
 #define MC_CG_ARB_FREQ_F0           0x0a
 #define MC_CG_ARB_FREQ_F1           0x0b
 #define MC_CG_ARB_FREQ_F2           0x0c
diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c
index 7d39ed63e5be..5c05193da520 100644
--- a/drivers/gpu/drm/radeon/r100.c
+++ b/drivers/gpu/drm/radeon/r100.c
@@ -25,24 +25,30 @@
  *          Alex Deucher
  *          Jerome Glisse
  */
+
 #include <linux/seq_file.h>
 #include <linux/slab.h>
-#include <drm/drmP.h>
+#include <linux/firmware.h>
+#include <linux/module.h>
+
+#include <drm/drm_debugfs.h>
+#include <drm/drm_device.h>
+#include <drm/drm_file.h>
+#include <drm/drm_fourcc.h>
+#include <drm/drm_pci.h>
+#include <drm/drm_vblank.h>
 #include <drm/radeon_drm.h>
-#include "radeon_reg.h"
+
+#include "atom.h"
+#include "r100_reg_safe.h"
+#include "r100d.h"
 #include "radeon.h"
 #include "radeon_asic.h"
-#include "r100d.h"
+#include "radeon_reg.h"
+#include "rn50_reg_safe.h"
 #include "rs100d.h"
 #include "rv200d.h"
 #include "rv250d.h"
-#include "atom.h"
-
-#include <linux/firmware.h>
-#include <linux/module.h>
-
-#include "r100_reg_safe.h"
-#include "rn50_reg_safe.h"
 
 /* Firmware Names */
 #define FIRMWARE_R100		"radeon/R100_cp.bin"
@@ -2470,7 +2476,7 @@ static int r100_rbbm_fifo_wait_for_entry(struct radeon_device *rdev, unsigned n)
 		if (tmp >= n) {
 			return 0;
 		}
-		DRM_UDELAY(1);
+		udelay(1);
 	}
 	return -1;
 }
@@ -2488,7 +2494,7 @@ int r100_gui_wait_for_idle(struct radeon_device *rdev)
 		if (!(tmp & RADEON_RBBM_ACTIVE)) {
 			return 0;
 		}
-		DRM_UDELAY(1);
+		udelay(1);
 	}
 	return -1;
 }
@@ -2504,7 +2510,7 @@ int r100_mc_wait_for_idle(struct radeon_device *rdev)
 		if (tmp & RADEON_MC_IDLE) {
 			return 0;
 		}
-		DRM_UDELAY(1);
+		udelay(1);
 	}
 	return -1;
 }
@@ -3669,7 +3675,7 @@ int r100_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
 		if (tmp == 0xDEADBEEF) {
 			break;
 		}
-		DRM_UDELAY(1);
+		udelay(1);
 	}
 	if (i < rdev->usec_timeout) {
 		DRM_INFO("ring test succeeded in %d usecs\n", i);
@@ -3746,7 +3752,7 @@ int r100_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
 		if (tmp == 0xDEADBEEF) {
 			break;
 		}
-		DRM_UDELAY(1);
+		udelay(1);
 	}
 	if (i < rdev->usec_timeout) {
 		DRM_INFO("ib test succeeded in %u usecs\n", i);
diff --git a/drivers/gpu/drm/radeon/r100_track.h b/drivers/gpu/drm/radeon/r100_track.h
index 57e2b09784be..1b5ff3f816db 100644
--- a/drivers/gpu/drm/radeon/r100_track.h
+++ b/drivers/gpu/drm/radeon/r100_track.h
@@ -1,5 +1,7 @@
 /* SPDX-License-Identifier: MIT */
 
+#include "radeon.h"
+
 #define R100_TRACK_MAX_TEXTURE 3
 #define R200_TRACK_MAX_TEXTURE 6
 #define R300_TRACK_MAX_TEXTURE 16
diff --git a/drivers/gpu/drm/radeon/r200.c b/drivers/gpu/drm/radeon/r200.c
index c22321cc5a41..9ce6dd83d284 100644
--- a/drivers/gpu/drm/radeon/r200.c
+++ b/drivers/gpu/drm/radeon/r200.c
@@ -25,7 +25,7 @@
  *          Alex Deucher
  *          Jerome Glisse
  */
-#include <drm/drmP.h>
+
 #include <drm/radeon_drm.h>
 #include "radeon_reg.h"
 #include "radeon.h"
diff --git a/drivers/gpu/drm/radeon/r300.c b/drivers/gpu/drm/radeon/r300.c
index 652126fd6dd4..44856e3a7108 100644
--- a/drivers/gpu/drm/radeon/r300.c
+++ b/drivers/gpu/drm/radeon/r300.c
@@ -25,19 +25,25 @@
  *          Alex Deucher
  *          Jerome Glisse
  */
+
 #include <linux/seq_file.h>
 #include <linux/slab.h>
-#include <drm/drmP.h>
+
 #include <drm/drm.h>
 #include <drm/drm_crtc_helper.h>
-#include "radeon_reg.h"
-#include "radeon.h"
-#include "radeon_asic.h"
+#include <drm/drm_debugfs.h>
+#include <drm/drm_device.h>
+#include <drm/drm_file.h>
+#include <drm/drm_pci.h>
 #include <drm/radeon_drm.h>
+
 #include "r100_track.h"
+#include "r300_reg_safe.h"
 #include "r300d.h"
+#include "radeon.h"
+#include "radeon_asic.h"
+#include "radeon_reg.h"
 #include "rv350d.h"
-#include "r300_reg_safe.h"
 
 /* This files gather functions specifics to: r300,r350,rv350,rv370,rv380
  *
@@ -350,7 +356,7 @@ int r300_mc_wait_for_idle(struct radeon_device *rdev)
 		if (tmp & R300_MC_IDLE) {
 			return 0;
 		}
-		DRM_UDELAY(1);
+		udelay(1);
 	}
 	return -1;
 }
diff --git a/drivers/gpu/drm/radeon/r420.c b/drivers/gpu/drm/radeon/r420.c
index 2318d9e3ed96..83282ee2bde0 100644
--- a/drivers/gpu/drm/radeon/r420.c
+++ b/drivers/gpu/drm/radeon/r420.c
@@ -25,16 +25,22 @@
  *          Alex Deucher
  *          Jerome Glisse
  */
+
 #include <linux/seq_file.h>
 #include <linux/slab.h>
-#include <drm/drmP.h>
-#include "radeon_reg.h"
-#include "radeon.h"
-#include "radeon_asic.h"
+
+#include <drm/drm_debugfs.h>
+#include <drm/drm_device.h>
+#include <drm/drm_file.h>
+#include <drm/drm_pci.h>
+
 #include "atom.h"
 #include "r100d.h"
-#include "r420d.h"
 #include "r420_reg_safe.h"
+#include "r420d.h"
+#include "radeon.h"
+#include "radeon_asic.h"
+#include "radeon_reg.h"
 
 void r420_pm_init_profile(struct radeon_device *rdev)
 {
diff --git a/drivers/gpu/drm/radeon/r520.c b/drivers/gpu/drm/radeon/r520.c
index 074cf752faef..fc78e64ae727 100644
--- a/drivers/gpu/drm/radeon/r520.c
+++ b/drivers/gpu/drm/radeon/r520.c
@@ -25,7 +25,7 @@
  *          Alex Deucher
  *          Jerome Glisse
  */
-#include <drm/drmP.h>
+
 #include "radeon.h"
 #include "radeon_asic.h"
 #include "atom.h"
@@ -44,7 +44,7 @@ int r520_mc_wait_for_idle(struct radeon_device *rdev)
 		if (tmp & R520_MC_STATUS_IDLE) {
 			return 0;
 		}
-		DRM_UDELAY(1);
+		udelay(1);
 	}
 	return -1;
 }
diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c
index e06e2d8feab3..7d175a9e8330 100644
--- a/drivers/gpu/drm/radeon/r600.c
+++ b/drivers/gpu/drm/radeon/r600.c
@@ -25,19 +25,25 @@
  *          Alex Deucher
  *          Jerome Glisse
  */
+
 #include <linux/slab.h>
 #include <linux/seq_file.h>
 #include <linux/firmware.h>
 #include <linux/module.h>
-#include <drm/drmP.h>
+
+#include <drm/drm_debugfs.h>
+#include <drm/drm_device.h>
+#include <drm/drm_pci.h>
+#include <drm/drm_vblank.h>
 #include <drm/radeon_drm.h>
+
+#include "atom.h"
+#include "avivod.h"
+#include "r600d.h"
 #include "radeon.h"
 #include "radeon_asic.h"
 #include "radeon_audio.h"
 #include "radeon_mode.h"
-#include "r600d.h"
-#include "atom.h"
-#include "avivod.h"
 #include "radeon_ucode.h"
 
 /* Firmware Names */
@@ -2840,7 +2846,7 @@ int r600_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
 		tmp = RREG32(scratch);
 		if (tmp == 0xDEADBEEF)
 			break;
-		DRM_UDELAY(1);
+		udelay(1);
 	}
 	if (i < rdev->usec_timeout) {
 		DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
@@ -3433,7 +3439,7 @@ int r600_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
 		tmp = RREG32(scratch);
 		if (tmp == 0xDEADBEEF)
 			break;
-		DRM_UDELAY(1);
+		udelay(1);
 	}
 	if (i < rdev->usec_timeout) {
 		DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
diff --git a/drivers/gpu/drm/radeon/r600_cs.c b/drivers/gpu/drm/radeon/r600_cs.c
index c96b31950ca7..d6c28a5d77ab 100644
--- a/drivers/gpu/drm/radeon/r600_cs.c
+++ b/drivers/gpu/drm/radeon/r600_cs.c
@@ -26,7 +26,7 @@
  *          Jerome Glisse
  */
 #include <linux/kernel.h>
-#include <drm/drmP.h>
+
 #include "radeon.h"
 #include "radeon_asic.h"
 #include "r600d.h"
diff --git a/drivers/gpu/drm/radeon/r600_dma.c b/drivers/gpu/drm/radeon/r600_dma.c
index fb65e6fb5c4f..35d92ef8a0d4 100644
--- a/drivers/gpu/drm/radeon/r600_dma.c
+++ b/drivers/gpu/drm/radeon/r600_dma.c
@@ -21,7 +21,7 @@
  *
  * Authors: Alex Deucher
  */
-#include <drm/drmP.h>
+
 #include "radeon.h"
 #include "radeon_asic.h"
 #include "r600d.h"
@@ -261,7 +261,7 @@ int r600_dma_ring_test(struct radeon_device *rdev,
 		tmp = le32_to_cpu(rdev->wb.wb[index/4]);
 		if (tmp == 0xDEADBEEF)
 			break;
-		DRM_UDELAY(1);
+		udelay(1);
 	}
 
 	if (i < rdev->usec_timeout) {
@@ -382,7 +382,7 @@ int r600_dma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
 		tmp = le32_to_cpu(rdev->wb.wb[index/4]);
 		if (tmp == 0xDEADBEEF)
 			break;
-		DRM_UDELAY(1);
+		udelay(1);
 	}
 	if (i < rdev->usec_timeout) {
 		DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
diff --git a/drivers/gpu/drm/radeon/r600_dpm.c b/drivers/gpu/drm/radeon/r600_dpm.c
index 5e044c98fca2..35b77c944701 100644
--- a/drivers/gpu/drm/radeon/r600_dpm.c
+++ b/drivers/gpu/drm/radeon/r600_dpm.c
@@ -22,7 +22,6 @@
  * Authors: Alex Deucher
  */
 
-#include <drm/drmP.h>
 #include "radeon.h"
 #include "radeon_asic.h"
 #include "r600d.h"
diff --git a/drivers/gpu/drm/radeon/r600_dpm.h b/drivers/gpu/drm/radeon/r600_dpm.h
index bd499d749bc9..6e4d22ed2a00 100644
--- a/drivers/gpu/drm/radeon/r600_dpm.h
+++ b/drivers/gpu/drm/radeon/r600_dpm.h
@@ -23,6 +23,8 @@
 #ifndef __R600_DPM_H__
 #define __R600_DPM_H__
 
+#include "radeon.h"
+
 #define R600_ASI_DFLT                                10000
 #define R600_BSP_DFLT                                0x41EB
 #define R600_BSU_DFLT                                0x2
diff --git a/drivers/gpu/drm/radeon/r600_hdmi.c b/drivers/gpu/drm/radeon/r600_hdmi.c
index ab32830c4e23..c09549d785b5 100644
--- a/drivers/gpu/drm/radeon/r600_hdmi.c
+++ b/drivers/gpu/drm/radeon/r600_hdmi.c
@@ -25,7 +25,7 @@
  */
 #include <linux/hdmi.h>
 #include <linux/gcd.h>
-#include <drm/drmP.h>
+
 #include <drm/radeon_drm.h>
 #include "radeon.h"
 #include "radeon_asic.h"
diff --git a/drivers/gpu/drm/radeon/radeon_acpi.c b/drivers/gpu/drm/radeon/radeon_acpi.c
index 224cc21bbe38..6cf1645e7a1a 100644
--- a/drivers/gpu/drm/radeon/radeon_acpi.c
+++ b/drivers/gpu/drm/radeon/radeon_acpi.c
@@ -21,18 +21,21 @@
  *
  */
 
-#include <linux/pci.h>
 #include <linux/acpi.h>
-#include <linux/slab.h>
-#include <linux/power_supply.h>
+#include <linux/pci.h>
 #include <linux/pm_runtime.h>
+#include <linux/power_supply.h>
+#include <linux/slab.h>
+
+#include <acpi/acpi_bus.h>
 #include <acpi/video.h>
-#include <drm/drmP.h>
+
 #include <drm/drm_crtc_helper.h>
 #include <drm/drm_probe_helper.h>
+
+#include "atom.h"
 #include "radeon.h"
 #include "radeon_acpi.h"
-#include "atom.h"
 
 #if defined(CONFIG_VGA_SWITCHEROO)
 bool radeon_atpx_dgpu_req_power_for_displays(void);
diff --git a/drivers/gpu/drm/radeon/radeon_agp.c b/drivers/gpu/drm/radeon/radeon_agp.c
index c77d349c561c..4de16f3badb4 100644
--- a/drivers/gpu/drm/radeon/radeon_agp.c
+++ b/drivers/gpu/drm/radeon/radeon_agp.c
@@ -24,10 +24,14 @@
  *    Dave Airlie
  *    Jerome Glisse <glisse@freedesktop.org>
  */
-#include <drm/drmP.h>
-#include "radeon.h"
+
+#include <drm/drm_agpsupport.h>
+#include <drm/drm_device.h>
+#include <drm/drm_pci.h>
 #include <drm/radeon_drm.h>
 
+#include "radeon.h"
+
 #if IS_ENABLED(CONFIG_AGP)
 
 struct radeon_agpmode_quirk {
diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c
index bc5121d1a7bc..dc3c2227e06a 100644
--- a/drivers/gpu/drm/radeon/radeon_asic.c
+++ b/drivers/gpu/drm/radeon/radeon_asic.c
@@ -27,14 +27,16 @@
  */
 
 #include <linux/console.h>
-#include <drm/drmP.h>
+#include <linux/vgaarb.h>
+
 #include <drm/drm_crtc_helper.h>
+#include <drm/drm_pci.h>
 #include <drm/radeon_drm.h>
-#include <linux/vgaarb.h>
-#include "radeon_reg.h"
+
+#include "atom.h"
 #include "radeon.h"
 #include "radeon_asic.h"
-#include "atom.h"
+#include "radeon_reg.h"
 
 /*
  * Registers accessors functions.
diff --git a/drivers/gpu/drm/radeon/radeon_atombios.c b/drivers/gpu/drm/radeon/radeon_atombios.c
index f422a8d6aec4..226a7bf0eb7a 100644
--- a/drivers/gpu/drm/radeon/radeon_atombios.c
+++ b/drivers/gpu/drm/radeon/radeon_atombios.c
@@ -23,8 +23,11 @@
  * Authors: Dave Airlie
  *          Alex Deucher
  */
-#include <drm/drmP.h>
+
+#include <drm/drm_device.h>
+#include <drm/drm_pci.h>
 #include <drm/radeon_drm.h>
+
 #include "radeon.h"
 
 #include "atom.h"
diff --git a/drivers/gpu/drm/radeon/radeon_audio.c b/drivers/gpu/drm/radeon/radeon_audio.c
index 96f71114237a..b9aea5776d3d 100644
--- a/drivers/gpu/drm/radeon/radeon_audio.c
+++ b/drivers/gpu/drm/radeon/radeon_audio.c
@@ -23,7 +23,7 @@
  */
 
 #include <linux/gcd.h>
-#include <drm/drmP.h>
+
 #include <drm/drm_crtc.h>
 #include "radeon.h"
 #include "atom.h"
diff --git a/drivers/gpu/drm/radeon/radeon_benchmark.c b/drivers/gpu/drm/radeon/radeon_benchmark.c
index 87d5fb21cb61..7ce5064a59f6 100644
--- a/drivers/gpu/drm/radeon/radeon_benchmark.c
+++ b/drivers/gpu/drm/radeon/radeon_benchmark.c
@@ -21,7 +21,7 @@
  *
  * Authors: Jerome Glisse
  */
-#include <drm/drmP.h>
+
 #include <drm/radeon_drm.h>
 #include "radeon_reg.h"
 #include "radeon.h"
diff --git a/drivers/gpu/drm/radeon/radeon_bios.c b/drivers/gpu/drm/radeon/radeon_bios.c
index 04c0ed41374f..4d1490fbb075 100644
--- a/drivers/gpu/drm/radeon/radeon_bios.c
+++ b/drivers/gpu/drm/radeon/radeon_bios.c
@@ -25,13 +25,17 @@
  *          Alex Deucher
  *          Jerome Glisse
  */
-#include <drm/drmP.h>
-#include "radeon_reg.h"
-#include "radeon.h"
-#include "atom.h"
 
 #include <linux/slab.h>
 #include <linux/acpi.h>
+
+#include <drm/drm_device.h>
+#include <drm/drm_pci.h>
+
+#include "atom.h"
+#include "radeon.h"
+#include "radeon_reg.h"
+
 /*
  * BIOS.
  */
diff --git a/drivers/gpu/drm/radeon/radeon_clocks.c b/drivers/gpu/drm/radeon/radeon_clocks.c
index e55146cdf543..9057b32f4498 100644
--- a/drivers/gpu/drm/radeon/radeon_clocks.c
+++ b/drivers/gpu/drm/radeon/radeon_clocks.c
@@ -25,12 +25,15 @@
  *          Alex Deucher
  *          Jerome Glisse
  */
-#include <drm/drmP.h>
+
+#include <drm/drm_device.h>
+#include <drm/drm_pci.h>
 #include <drm/radeon_drm.h>
-#include "radeon_reg.h"
+
+#include "atom.h"
 #include "radeon.h"
 #include "radeon_asic.h"
-#include "atom.h"
+#include "radeon_reg.h"
 
 /* 10 khz */
 uint32_t radeon_legacy_get_engine_clock(struct radeon_device *rdev)
diff --git a/drivers/gpu/drm/radeon/radeon_combios.c b/drivers/gpu/drm/radeon/radeon_combios.c
index 60a61d33f607..c18ae15189f3 100644
--- a/drivers/gpu/drm/radeon/radeon_combios.c
+++ b/drivers/gpu/drm/radeon/radeon_combios.c
@@ -24,8 +24,11 @@
  * Authors: Dave Airlie
  *          Alex Deucher
  */
-#include <drm/drmP.h>
+
+#include <drm/drm_device.h>
+#include <drm/drm_pci.h>
 #include <drm/radeon_drm.h>
+
 #include "radeon.h"
 #include "atom.h"
 
diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c
index de1745adcccc..c60d1a44d22a 100644
--- a/drivers/gpu/drm/radeon/radeon_connectors.c
+++ b/drivers/gpu/drm/radeon/radeon_connectors.c
@@ -23,7 +23,7 @@
  * Authors: Dave Airlie
  *          Alex Deucher
  */
-#include <drm/drmP.h>
+
 #include <drm/drm_edid.h>
 #include <drm/drm_crtc_helper.h>
 #include <drm/drm_fb_helper.h>
diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c
index f43305329939..cef0e697a2ea 100644
--- a/drivers/gpu/drm/radeon/radeon_cs.c
+++ b/drivers/gpu/drm/radeon/radeon_cs.c
@@ -24,11 +24,17 @@
  * Authors:
  *    Jerome Glisse <glisse@freedesktop.org>
  */
+
 #include <linux/list_sort.h>
-#include <drm/drmP.h>
+#include <linux/uaccess.h>
+
+#include <drm/drm_device.h>
+#include <drm/drm_file.h>
+#include <drm/drm_pci.h>
 #include <drm/radeon_drm.h>
-#include "radeon_reg.h"
+
 #include "radeon.h"
+#include "radeon_reg.h"
 #include "radeon_trace.h"
 
 #define RADEON_CS_MAX_PRIORITY		32u
diff --git a/drivers/gpu/drm/radeon/radeon_cursor.c b/drivers/gpu/drm/radeon/radeon_cursor.c
index 91952277557e..9180bb51b913 100644
--- a/drivers/gpu/drm/radeon/radeon_cursor.c
+++ b/drivers/gpu/drm/radeon/radeon_cursor.c
@@ -23,8 +23,10 @@
  * Authors: Dave Airlie
  *          Alex Deucher
  */
-#include <drm/drmP.h>
+
+#include <drm/drm_device.h>
 #include <drm/radeon_drm.h>
+
 #include "radeon.h"
 
 static void radeon_lock_cursor(struct drm_crtc *crtc, bool lock)
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
index 0a9312ea250a..dceb554e5674 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -25,17 +25,23 @@
  *          Alex Deucher
  *          Jerome Glisse
  */
+
 #include <linux/console.h>
+#include <linux/efi.h>
+#include <linux/pm_runtime.h>
 #include <linux/slab.h>
-#include <drm/drmP.h>
+#include <linux/vga_switcheroo.h>
+#include <linux/vgaarb.h>
+
+#include <drm/drm_cache.h>
 #include <drm/drm_crtc_helper.h>
+#include <drm/drm_debugfs.h>
+#include <drm/drm_device.h>
+#include <drm/drm_file.h>
+#include <drm/drm_pci.h>
 #include <drm/drm_probe_helper.h>
-#include <drm/drm_cache.h>
 #include <drm/radeon_drm.h>
-#include <linux/pm_runtime.h>
-#include <linux/vgaarb.h>
-#include <linux/vga_switcheroo.h>
-#include <linux/efi.h>
+
 #include "radeon_reg.h"
 #include "radeon.h"
 #include "atom.h"
diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c
index 433df7036f96..bd52f15e6330 100644
--- a/drivers/gpu/drm/radeon/radeon_display.c
+++ b/drivers/gpu/drm/radeon/radeon_display.c
@@ -23,22 +23,27 @@
  * Authors: Dave Airlie
  *          Alex Deucher
  */
-#include <drm/drmP.h>
-#include <drm/radeon_drm.h>
-#include "radeon.h"
 
-#include "atom.h"
+#include <linux/pm_runtime.h>
+#include <linux/gcd.h>
+
 #include <asm/div64.h>
 
-#include <linux/pm_runtime.h>
 #include <drm/drm_crtc_helper.h>
-#include <drm/drm_gem_framebuffer_helper.h>
+#include <drm/drm_device.h>
+#include <drm/drm_drv.h>
+#include <drm/drm_edid.h>
 #include <drm/drm_fb_helper.h>
+#include <drm/drm_fourcc.h>
+#include <drm/drm_gem_framebuffer_helper.h>
+#include <drm/drm_pci.h>
 #include <drm/drm_plane_helper.h>
 #include <drm/drm_probe_helper.h>
-#include <drm/drm_edid.h>
+#include <drm/drm_vblank.h>
+#include <drm/radeon_drm.h>
 
-#include <linux/gcd.h>
+#include "atom.h"
+#include "radeon.h"
 
 static void avivo_crtc_load_lut(struct drm_crtc *crtc)
 {
diff --git a/drivers/gpu/drm/radeon/radeon_dp_auxch.c b/drivers/gpu/drm/radeon/radeon_dp_auxch.c
index 12eac4e75542..69379b95146e 100644
--- a/drivers/gpu/drm/radeon/radeon_dp_auxch.c
+++ b/drivers/gpu/drm/radeon/radeon_dp_auxch.c
@@ -21,7 +21,7 @@
  *
  * Authors: Dave Airlie
  */
-#include <drm/drmP.h>
+
 #include <drm/radeon_drm.h>
 #include "radeon.h"
 #include "nid.h"
diff --git a/drivers/gpu/drm/radeon/radeon_dp_mst.c b/drivers/gpu/drm/radeon/radeon_dp_mst.c
index 8d85540bbb43..2994f07fbad9 100644
--- a/drivers/gpu/drm/radeon/radeon_dp_mst.c
+++ b/drivers/gpu/drm/radeon/radeon_dp_mst.c
@@ -1,13 +1,14 @@
 // SPDX-License-Identifier: MIT
 
-#include <drm/drmP.h>
+#include <drm/drm_debugfs.h>
 #include <drm/drm_dp_mst_helper.h>
 #include <drm/drm_fb_helper.h>
+#include <drm/drm_file.h>
 #include <drm/drm_probe_helper.h>
 
-#include "radeon.h"
 #include "atom.h"
 #include "ni_reg.h"
+#include "radeon.h"
 
 static struct radeon_encoder *radeon_dp_create_fake_mst_encoder(struct radeon_connector *connector);
 
diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c
index 2e96c886392b..a6cbe11f79c6 100644
--- a/drivers/gpu/drm/radeon/radeon_drv.c
+++ b/drivers/gpu/drm/radeon/radeon_drv.c
@@ -29,21 +29,26 @@
  * OTHER DEALINGS IN THE SOFTWARE.
  */
 
-#include <drm/drmP.h>
-#include <drm/radeon_drm.h>
-#include "radeon_drv.h"
 
-#include <drm/drm_pciids.h>
+#include <linux/compat.h>
 #include <linux/console.h>
 #include <linux/module.h>
 #include <linux/pm_runtime.h>
 #include <linux/vga_switcheroo.h>
-#include <linux/compat.h>
-#include <drm/drm_gem.h>
-#include <drm/drm_fb_helper.h>
 
 #include <drm/drm_crtc_helper.h>
+#include <drm/drm_drv.h>
+#include <drm/drm_fb_helper.h>
+#include <drm/drm_file.h>
+#include <drm/drm_gem.h>
+#include <drm/drm_ioctl.h>
+#include <drm/drm_pci.h>
+#include <drm/drm_pciids.h>
 #include <drm/drm_probe_helper.h>
+#include <drm/drm_vblank.h>
+#include <drm/radeon_drm.h>
+
+#include "radeon_drv.h"
 
 /*
  * KMS wrapper.
diff --git a/drivers/gpu/drm/radeon/radeon_encoders.c b/drivers/gpu/drm/radeon/radeon_encoders.c
index c341fb2a5b56..a0c99087034a 100644
--- a/drivers/gpu/drm/radeon/radeon_encoders.c
+++ b/drivers/gpu/drm/radeon/radeon_encoders.c
@@ -23,9 +23,12 @@
  * Authors: Dave Airlie
  *          Alex Deucher
  */
-#include <drm/drmP.h>
+
 #include <drm/drm_crtc_helper.h>
+#include <drm/drm_device.h>
+#include <drm/drm_pci.h>
 #include <drm/radeon_drm.h>
+
 #include "radeon.h"
 #include "atom.h"
 
diff --git a/drivers/gpu/drm/radeon/radeon_fb.c b/drivers/gpu/drm/radeon/radeon_fb.c
index 1298b84cb1c7..2c564f4f3468 100644
--- a/drivers/gpu/drm/radeon/radeon_fb.c
+++ b/drivers/gpu/drm/radeon/radeon_fb.c
@@ -23,19 +23,20 @@
  * Authors:
  *     David Airlie
  */
+
 #include <linux/module.h>
-#include <linux/slab.h>
 #include <linux/pm_runtime.h>
+#include <linux/slab.h>
+#include <linux/vga_switcheroo.h>
 
-#include <drm/drmP.h>
 #include <drm/drm_crtc.h>
 #include <drm/drm_crtc_helper.h>
-#include <drm/radeon_drm.h>
-#include "radeon.h"
-
 #include <drm/drm_fb_helper.h>
+#include <drm/drm_fourcc.h>
+#include <drm/drm_pci.h>
+#include <drm/radeon_drm.h>
 
-#include <linux/vga_switcheroo.h>
+#include "radeon.h"
 
 /* object hierarchy -
  * this contains a helper + a radeon fb
@@ -125,6 +126,7 @@ static int radeonfb_create_pinned_object(struct radeon_fbdev *rfbdev,
 					 struct drm_mode_fb_cmd2 *mode_cmd,
 					 struct drm_gem_object **gobj_p)
 {
+	const struct drm_format_info *info;
 	struct radeon_device *rdev = rfbdev->rdev;
 	struct drm_gem_object *gobj = NULL;
 	struct radeon_bo *rbo = NULL;
@@ -135,7 +137,8 @@ static int radeonfb_create_pinned_object(struct radeon_fbdev *rfbdev,
 	int height = mode_cmd->height;
 	u32 cpp;
 
-	cpp = drm_format_plane_cpp(mode_cmd->pixel_format, 0);
+	info = drm_get_format_info(rdev->ddev, mode_cmd);
+	cpp = info->cpp[0];
 
 	/* need to align pitch with crtc limits */
 	mode_cmd->pitches[0] = radeon_align_pitch(rdev, mode_cmd->width, cpp,
diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c
index e86f2bd38410..43f2f9307866 100644
--- a/drivers/gpu/drm/radeon/radeon_fence.c
+++ b/drivers/gpu/drm/radeon/radeon_fence.c
@@ -28,15 +28,21 @@
  *    Jerome Glisse <glisse@freedesktop.org>
  *    Dave Airlie
  */
-#include <linux/seq_file.h>
+
 #include <linux/atomic.h>
-#include <linux/wait.h>
+#include <linux/firmware.h>
 #include <linux/kref.h>
+#include <linux/sched/signal.h>
+#include <linux/seq_file.h>
 #include <linux/slab.h>
-#include <linux/firmware.h>
-#include <drm/drmP.h>
-#include "radeon_reg.h"
+#include <linux/wait.h>
+
+#include <drm/drm_debugfs.h>
+#include <drm/drm_device.h>
+#include <drm/drm_file.h>
+
 #include "radeon.h"
+#include "radeon_reg.h"
 #include "radeon_trace.h"
 
 /*
diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c
index 1cef155cc933..d4d3778d0a98 100644
--- a/drivers/gpu/drm/radeon/radeon_gart.c
+++ b/drivers/gpu/drm/radeon/radeon_gart.c
@@ -25,7 +25,10 @@
  *          Alex Deucher
  *          Jerome Glisse
  */
-#include <drm/drmP.h>
+
+#include <linux/vmalloc.h>
+
+#include <drm/drm_pci.h>
 #include <drm/radeon_drm.h>
 #ifdef CONFIG_X86
 #include <asm/set_memory.h>
diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c
index 44617dec8183..d8bc5d2dfd61 100644
--- a/drivers/gpu/drm/radeon/radeon_gem.c
+++ b/drivers/gpu/drm/radeon/radeon_gem.c
@@ -25,8 +25,13 @@
  *          Alex Deucher
  *          Jerome Glisse
  */
-#include <drm/drmP.h>
+
+#include <drm/drm_debugfs.h>
+#include <drm/drm_device.h>
+#include <drm/drm_file.h>
+#include <drm/drm_pci.h>
 #include <drm/radeon_drm.h>
+
 #include "radeon.h"
 
 void radeon_gem_object_free(struct drm_gem_object *gobj)
@@ -559,7 +564,7 @@ static void radeon_gem_va_update_vm(struct radeon_device *rdev,
 	if (!vm_bos)
 		return;
 
-	r = ttm_eu_reserve_buffers(&ticket, &list, true, NULL);
+	r = ttm_eu_reserve_buffers(&ticket, &list, true, NULL, true);
 	if (r)
 		goto error_free;
 
diff --git a/drivers/gpu/drm/radeon/radeon_i2c.c b/drivers/gpu/drm/radeon/radeon_i2c.c
index 29f7817af821..d465a3de7732 100644
--- a/drivers/gpu/drm/radeon/radeon_i2c.c
+++ b/drivers/gpu/drm/radeon/radeon_i2c.c
@@ -23,11 +23,14 @@
  * Authors: Dave Airlie
  *          Alex Deucher
  */
+
 #include <linux/export.h>
 
-#include <drm/drmP.h>
+#include <drm/drm_device.h>
 #include <drm/drm_edid.h>
+#include <drm/drm_pci.h>
 #include <drm/radeon_drm.h>
+
 #include "radeon.h"
 #include "atom.h"
 
diff --git a/drivers/gpu/drm/radeon/radeon_ib.c b/drivers/gpu/drm/radeon/radeon_ib.c
index 92ce0e533bc0..9fd55e9c616b 100644
--- a/drivers/gpu/drm/radeon/radeon_ib.c
+++ b/drivers/gpu/drm/radeon/radeon_ib.c
@@ -26,7 +26,10 @@
  *          Jerome Glisse
  *          Christian König
  */
-#include <drm/drmP.h>
+
+#include <drm/drm_debugfs.h>
+#include <drm/drm_file.h>
+
 #include "radeon.h"
 
 /*
diff --git a/drivers/gpu/drm/radeon/radeon_irq_kms.c b/drivers/gpu/drm/radeon/radeon_irq_kms.c
index 1d5e3ba7383e..d9613638f9cc 100644
--- a/drivers/gpu/drm/radeon/radeon_irq_kms.c
+++ b/drivers/gpu/drm/radeon/radeon_irq_kms.c
@@ -25,15 +25,21 @@
  *          Alex Deucher
  *          Jerome Glisse
  */
-#include <drm/drmP.h>
+
+#include <linux/pm_runtime.h>
+
 #include <drm/drm_crtc_helper.h>
+#include <drm/drm_device.h>
+#include <drm/drm_irq.h>
+#include <drm/drm_pci.h>
 #include <drm/drm_probe_helper.h>
+#include <drm/drm_vblank.h>
 #include <drm/radeon_drm.h>
-#include "radeon_reg.h"
-#include "radeon.h"
+
 #include "atom.h"
+#include "radeon.h"
+#include "radeon_reg.h"
 
-#include <linux/pm_runtime.h>
 
 #define RADEON_WAIT_IDLE_TIMEOUT 200
 
diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c
index 6a8fb6fd183c..07f7ace42c4b 100644
--- a/drivers/gpu/drm/radeon/radeon_kms.c
+++ b/drivers/gpu/drm/radeon/radeon_kms.c
@@ -25,15 +25,20 @@
  *          Alex Deucher
  *          Jerome Glisse
  */
-#include <drm/drmP.h>
+
+#include <linux/pm_runtime.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/vga_switcheroo.h>
+
 #include <drm/drm_fb_helper.h>
-#include "radeon.h"
+#include <drm/drm_file.h>
+#include <drm/drm_ioctl.h>
+#include <drm/drm_pci.h>
 #include <drm/radeon_drm.h>
-#include "radeon_asic.h"
 
-#include <linux/vga_switcheroo.h>
-#include <linux/slab.h>
-#include <linux/pm_runtime.h>
+#include "radeon.h"
+#include "radeon_asic.h"
 
 #if defined(CONFIG_VGA_SWITCHEROO)
 bool radeon_has_atpx(void);
diff --git a/drivers/gpu/drm/radeon/radeon_legacy_crtc.c b/drivers/gpu/drm/radeon/radeon_legacy_crtc.c
index 35a205ae4318..a1985a552794 100644
--- a/drivers/gpu/drm/radeon/radeon_legacy_crtc.c
+++ b/drivers/gpu/drm/radeon/radeon_legacy_crtc.c
@@ -23,13 +23,16 @@
  * Authors: Dave Airlie
  *          Alex Deucher
  */
-#include <drm/drmP.h>
+
 #include <drm/drm_crtc_helper.h>
 #include <drm/drm_fb_helper.h>
-#include <drm/radeon_drm.h>
 #include <drm/drm_fixed.h>
-#include "radeon.h"
+#include <drm/drm_fourcc.h>
+#include <drm/drm_vblank.h>
+#include <drm/radeon_drm.h>
+
 #include "atom.h"
+#include "radeon.h"
 
 static void radeon_overscan_setup(struct drm_crtc *crtc,
 				  struct drm_display_mode *mode)
diff --git a/drivers/gpu/drm/radeon/radeon_legacy_encoders.c b/drivers/gpu/drm/radeon/radeon_legacy_encoders.c
index 7e3257e8fd56..ef100b790463 100644
--- a/drivers/gpu/drm/radeon/radeon_legacy_encoders.c
+++ b/drivers/gpu/drm/radeon/radeon_legacy_encoders.c
@@ -23,14 +23,19 @@
  * Authors: Dave Airlie
  *          Alex Deucher
  */
-#include <drm/drmP.h>
-#include <drm/drm_util.h>
+
+#include <linux/backlight.h>
+
 #include <drm/drm_crtc_helper.h>
+#include <drm/drm_device.h>
+#include <drm/drm_file.h>
+#include <drm/drm_pci.h>
+#include <drm/drm_util.h>
 #include <drm/radeon_drm.h>
+
 #include "radeon.h"
 #include "radeon_asic.h"
 #include "atom.h"
-#include <linux/backlight.h>
 #ifdef CONFIG_PMAC_BACKLIGHT
 #include <asm/backlight.h>
 #endif
diff --git a/drivers/gpu/drm/radeon/radeon_legacy_tv.c b/drivers/gpu/drm/radeon/radeon_legacy_tv.c
index 3dae2c4dec71..f132eec737ad 100644
--- a/drivers/gpu/drm/radeon/radeon_legacy_tv.c
+++ b/drivers/gpu/drm/radeon/radeon_legacy_tv.c
@@ -1,6 +1,8 @@
 // SPDX-License-Identifier: MIT
-#include <drm/drmP.h>
+
 #include <drm/drm_crtc_helper.h>
+#include <drm/drm_device.h>
+
 #include "radeon.h"
 
 /*
diff --git a/drivers/gpu/drm/radeon/radeon_mn.c b/drivers/gpu/drm/radeon/radeon_mn.c
index c9bd1278f573..8c3871ed23a9 100644
--- a/drivers/gpu/drm/radeon/radeon_mn.c
+++ b/drivers/gpu/drm/radeon/radeon_mn.c
@@ -31,7 +31,7 @@
 #include <linux/firmware.h>
 #include <linux/module.h>
 #include <linux/mmu_notifier.h>
-#include <drm/drmP.h>
+
 #include <drm/drm.h>
 
 #include "radeon.h"
diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c
index 833e909706a9..21f73fc86f38 100644
--- a/drivers/gpu/drm/radeon/radeon_object.c
+++ b/drivers/gpu/drm/radeon/radeon_object.c
@@ -29,15 +29,18 @@
  *    Thomas Hellstrom <thomas-at-tungstengraphics-dot-com>
  *    Dave Airlie
  */
+
+#include <linux/io.h>
 #include <linux/list.h>
 #include <linux/slab.h>
-#include <drm/drmP.h>
-#include <drm/radeon_drm.h>
+
 #include <drm/drm_cache.h>
+#include <drm/drm_prime.h>
+#include <drm/radeon_drm.h>
+
 #include "radeon.h"
 #include "radeon_trace.h"
 
-
 int radeon_ttm_init(struct radeon_device *rdev);
 void radeon_ttm_fini(struct radeon_device *rdev);
 static void radeon_bo_clear_surface_reg(struct radeon_bo *bo);
@@ -539,7 +542,7 @@ int radeon_bo_list_validate(struct radeon_device *rdev,
 	u64 bytes_moved_threshold = radeon_bo_get_threshold_for_moves(rdev);
 
 	INIT_LIST_HEAD(&duplicates);
-	r = ttm_eu_reserve_buffers(ticket, head, true, &duplicates);
+	r = ttm_eu_reserve_buffers(ticket, head, true, &duplicates, true);
 	if (unlikely(r != 0)) {
 		return r;
 	}
diff --git a/drivers/gpu/drm/radeon/radeon_pm.c b/drivers/gpu/drm/radeon/radeon_pm.c
index 4b6542538ff9..5d10e11a9225 100644
--- a/drivers/gpu/drm/radeon/radeon_pm.c
+++ b/drivers/gpu/drm/radeon/radeon_pm.c
@@ -20,14 +20,19 @@
  * Authors: Rafał Miłecki <zajec5@gmail.com>
  *          Alex Deucher <alexdeucher@gmail.com>
  */
-#include <drm/drmP.h>
-#include "radeon.h"
-#include "avivod.h"
+
+#include <linux/hwmon-sysfs.h>
+#include <linux/hwmon.h>
+#include <linux/power_supply.h>
+
+#include <drm/drm_debugfs.h>
+#include <drm/drm_pci.h>
+#include <drm/drm_vblank.h>
+
 #include "atom.h"
+#include "avivod.h"
 #include "r600_dpm.h"
-#include <linux/power_supply.h>
-#include <linux/hwmon.h>
-#include <linux/hwmon-sysfs.h>
+#include "radeon.h"
 
 #define RADEON_IDLE_LOOP_MS 100
 #define RADEON_RECLOCK_DELAY_MS 200
diff --git a/drivers/gpu/drm/radeon/radeon_prime.c b/drivers/gpu/drm/radeon/radeon_prime.c
index 7110d403322c..d3a5bea9a2c5 100644
--- a/drivers/gpu/drm/radeon/radeon_prime.c
+++ b/drivers/gpu/drm/radeon/radeon_prime.c
@@ -23,12 +23,14 @@
  *
  * Authors: Alex Deucher
  */
-#include <drm/drmP.h>
 
-#include "radeon.h"
-#include <drm/radeon_drm.h>
 #include <linux/dma-buf.h>
 
+#include <drm/drm_prime.h>
+#include <drm/radeon_drm.h>
+
+#include "radeon.h"
+
 struct sg_table *radeon_gem_prime_get_sg_table(struct drm_gem_object *obj)
 {
 	struct radeon_bo *bo = gem_to_radeon_bo(obj);
diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c
index 84802b201bef..37093cea24c5 100644
--- a/drivers/gpu/drm/radeon/radeon_ring.c
+++ b/drivers/gpu/drm/radeon/radeon_ring.c
@@ -26,7 +26,11 @@
  *          Jerome Glisse
  *          Christian König
  */
-#include <drm/drmP.h>
+
+#include <drm/drm_debugfs.h>
+#include <drm/drm_device.h>
+#include <drm/drm_file.h>
+
 #include "radeon.h"
 
 /*
diff --git a/drivers/gpu/drm/radeon/radeon_sa.c b/drivers/gpu/drm/radeon/radeon_sa.c
index 197b157b73d0..310c322c7112 100644
--- a/drivers/gpu/drm/radeon/radeon_sa.c
+++ b/drivers/gpu/drm/radeon/radeon_sa.c
@@ -41,7 +41,7 @@
  * If we are asked to block we wait on all the oldest fence of all
  * rings. We just wait for any of those fence to complete.
  */
-#include <drm/drmP.h>
+
 #include "radeon.h"
 
 static void radeon_sa_bo_remove_locked(struct radeon_sa_bo *sa_bo);
diff --git a/drivers/gpu/drm/radeon/radeon_semaphore.c b/drivers/gpu/drm/radeon/radeon_semaphore.c
index b0eb28e8fb73..221e59476f64 100644
--- a/drivers/gpu/drm/radeon/radeon_semaphore.c
+++ b/drivers/gpu/drm/radeon/radeon_semaphore.c
@@ -27,7 +27,7 @@
  * Authors:
  *    Christian König <deathsimple@vodafone.de>
  */
-#include <drm/drmP.h>
+
 #include "radeon.h"
 #include "radeon_trace.h"
 
diff --git a/drivers/gpu/drm/radeon/radeon_sync.c b/drivers/gpu/drm/radeon/radeon_sync.c
index be5d7a38d3aa..8c9780b5a884 100644
--- a/drivers/gpu/drm/radeon/radeon_sync.c
+++ b/drivers/gpu/drm/radeon/radeon_sync.c
@@ -28,7 +28,6 @@
  *    Christian König <christian.koenig@amd.com>
  */
 
-#include <drm/drmP.h>
 #include "radeon.h"
 #include "radeon_trace.h"
 
diff --git a/drivers/gpu/drm/radeon/radeon_test.c b/drivers/gpu/drm/radeon/radeon_test.c
index 701c4a59e3c3..0f6ba81a1669 100644
--- a/drivers/gpu/drm/radeon/radeon_test.c
+++ b/drivers/gpu/drm/radeon/radeon_test.c
@@ -22,7 +22,7 @@
  *
  * Authors: Michel Dänzer
  */
-#include <drm/drmP.h>
+
 #include <drm/radeon_drm.h>
 #include "radeon_reg.h"
 #include "radeon.h"
diff --git a/drivers/gpu/drm/radeon/radeon_trace.h b/drivers/gpu/drm/radeon/radeon_trace.h
index 0d84b8aafab3..c93f3ab3c4e3 100644
--- a/drivers/gpu/drm/radeon/radeon_trace.h
+++ b/drivers/gpu/drm/radeon/radeon_trace.h
@@ -3,10 +3,10 @@
 #define _RADEON_TRACE_H_
 
 #include <linux/stringify.h>
-#include <linux/types.h>
 #include <linux/tracepoint.h>
+#include <linux/types.h>
 
-#include <drm/drmP.h>
+#include <drm/drm_file.h>
 
 #undef TRACE_SYSTEM
 #define TRACE_SYSTEM radeon
diff --git a/drivers/gpu/drm/radeon/radeon_trace_points.c b/drivers/gpu/drm/radeon/radeon_trace_points.c
index 65e92302f974..6806055e3b27 100644
--- a/drivers/gpu/drm/radeon/radeon_trace_points.c
+++ b/drivers/gpu/drm/radeon/radeon_trace_points.c
@@ -2,7 +2,7 @@
 /* Copyright Red Hat Inc 2010.
  * Author : Dave Airlie <airlied@redhat.com>
  */
-#include <drm/drmP.h>
+
 #include <drm/radeon_drm.h>
 #include "radeon.h"
 
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
index 5d42f8d8e68d..1e5e744c16e7 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -29,19 +29,27 @@
  *    Thomas Hellstrom <thomas-at-tungstengraphics-dot-com>
  *    Dave Airlie
  */
+
+#include <linux/dma-mapping.h>
+#include <linux/pagemap.h>
+#include <linux/seq_file.h>
+#include <linux/slab.h>
+#include <linux/swap.h>
+#include <linux/swiotlb.h>
+
+#include <drm/drm_agpsupport.h>
+#include <drm/drm_debugfs.h>
+#include <drm/drm_device.h>
+#include <drm/drm_file.h>
+#include <drm/drm_pci.h>
+#include <drm/drm_prime.h>
+#include <drm/radeon_drm.h>
 #include <drm/ttm/ttm_bo_api.h>
 #include <drm/ttm/ttm_bo_driver.h>
-#include <drm/ttm/ttm_placement.h>
 #include <drm/ttm/ttm_module.h>
 #include <drm/ttm/ttm_page_alloc.h>
-#include <drm/drmP.h>
-#include <drm/radeon_drm.h>
-#include <linux/seq_file.h>
-#include <linux/slab.h>
-#include <linux/swiotlb.h>
-#include <linux/swap.h>
-#include <linux/pagemap.h>
-#include <linux/debugfs.h>
+#include <drm/ttm/ttm_placement.h>
+
 #include "radeon_reg.h"
 #include "radeon.h"
 
diff --git a/drivers/gpu/drm/radeon/radeon_ucode.c b/drivers/gpu/drm/radeon/radeon_ucode.c
index 6beec680390c..0d842d01f8e7 100644
--- a/drivers/gpu/drm/radeon/radeon_ucode.c
+++ b/drivers/gpu/drm/radeon/radeon_ucode.c
@@ -24,7 +24,7 @@
 #include <linux/firmware.h>
 #include <linux/slab.h>
 #include <linux/module.h>
-#include <drm/drmP.h>
+
 #include "radeon.h"
 #include "radeon_ucode.h"
 
diff --git a/drivers/gpu/drm/radeon/radeon_uvd.c b/drivers/gpu/drm/radeon/radeon_uvd.c
index 95f4db70dd22..ff4f794d1c86 100644
--- a/drivers/gpu/drm/radeon/radeon_uvd.c
+++ b/drivers/gpu/drm/radeon/radeon_uvd.c
@@ -30,7 +30,7 @@
 
 #include <linux/firmware.h>
 #include <linux/module.h>
-#include <drm/drmP.h>
+
 #include <drm/drm.h>
 
 #include "radeon.h"
diff --git a/drivers/gpu/drm/radeon/radeon_vce.c b/drivers/gpu/drm/radeon/radeon_vce.c
index c1c619facb47..59db54ace428 100644
--- a/drivers/gpu/drm/radeon/radeon_vce.c
+++ b/drivers/gpu/drm/radeon/radeon_vce.c
@@ -27,7 +27,7 @@
 
 #include <linux/firmware.h>
 #include <linux/module.h>
-#include <drm/drmP.h>
+
 #include <drm/drm.h>
 
 #include "radeon.h"
@@ -771,7 +771,7 @@ int radeon_vce_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
 	for (i = 0; i < rdev->usec_timeout; i++) {
 		if (vce_v1_0_get_rptr(rdev, ring) != rptr)
 			break;
-		DRM_UDELAY(1);
+		udelay(1);
 	}
 
 	if (i < rdev->usec_timeout) {
diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c
index 0d374211661c..8512b02e9583 100644
--- a/drivers/gpu/drm/radeon/radeon_vm.c
+++ b/drivers/gpu/drm/radeon/radeon_vm.c
@@ -25,7 +25,7 @@
  *          Alex Deucher
  *          Jerome Glisse
  */
-#include <drm/drmP.h>
+
 #include <drm/radeon_drm.h>
 #include "radeon.h"
 #include "radeon_trace.h"
diff --git a/drivers/gpu/drm/radeon/rs400.c b/drivers/gpu/drm/radeon/rs400.c
index 4121209c183e..117f60af1ee4 100644
--- a/drivers/gpu/drm/radeon/rs400.c
+++ b/drivers/gpu/drm/radeon/rs400.c
@@ -25,9 +25,14 @@
  *          Alex Deucher
  *          Jerome Glisse
  */
+
 #include <linux/seq_file.h>
 #include <linux/slab.h>
-#include <drm/drmP.h>
+
+#include <drm/drm_debugfs.h>
+#include <drm/drm_device.h>
+#include <drm/drm_file.h>
+
 #include "radeon.h"
 #include "radeon_asic.h"
 #include "rs400d.h"
@@ -67,7 +72,7 @@ void rs400_gart_tlb_flush(struct radeon_device *rdev)
 		tmp = RREG32_MC(RS480_GART_CACHE_CNTRL);
 		if ((tmp & RS480_GART_CACHE_INVALIDATE) == 0)
 			break;
-		DRM_UDELAY(1);
+		udelay(1);
 		timeout--;
 	} while (timeout > 0);
 	WREG32_MC(RS480_GART_CACHE_CNTRL, 0);
@@ -245,7 +250,7 @@ int rs400_mc_wait_for_idle(struct radeon_device *rdev)
 		if (tmp & RADEON_MC_IDLE) {
 			return 0;
 		}
-		DRM_UDELAY(1);
+		udelay(1);
 	}
 	return -1;
 }
diff --git a/drivers/gpu/drm/radeon/rs600.c b/drivers/gpu/drm/radeon/rs600.c
index f16af119c688..2f8ff089f7b1 100644
--- a/drivers/gpu/drm/radeon/rs600.c
+++ b/drivers/gpu/drm/radeon/rs600.c
@@ -35,14 +35,19 @@
  * close to the one of the R600 family (R600 likely being an evolution
  * of the RS600 GART block).
  */
-#include <drm/drmP.h>
+
+#include <linux/io-64-nonatomic-lo-hi.h>
+
+#include <drm/drm_device.h>
+#include <drm/drm_pci.h>
+#include <drm/drm_vblank.h>
+
+#include "atom.h"
 #include "radeon.h"
 #include "radeon_asic.h"
 #include "radeon_audio.h"
-#include "atom.h"
-#include "rs600d.h"
-
 #include "rs600_reg_safe.h"
+#include "rs600d.h"
 
 static void rs600_gpu_init(struct radeon_device *rdev);
 int rs600_mc_wait_for_idle(struct radeon_device *rdev);
diff --git a/drivers/gpu/drm/radeon/rs690.c b/drivers/gpu/drm/radeon/rs690.c
index 1bae33e43f3c..267d8a9134c8 100644
--- a/drivers/gpu/drm/radeon/rs690.c
+++ b/drivers/gpu/drm/radeon/rs690.c
@@ -25,11 +25,13 @@
  *          Alex Deucher
  *          Jerome Glisse
  */
-#include <drm/drmP.h>
+
+#include <drm/drm_pci.h>
+
+#include "atom.h"
 #include "radeon.h"
 #include "radeon_asic.h"
 #include "radeon_audio.h"
-#include "atom.h"
 #include "rs690d.h"
 
 int rs690_mc_wait_for_idle(struct radeon_device *rdev)
diff --git a/drivers/gpu/drm/radeon/rs780_dpm.c b/drivers/gpu/drm/radeon/rs780_dpm.c
index 694b7b3e9799..72dbf3251c53 100644
--- a/drivers/gpu/drm/radeon/rs780_dpm.c
+++ b/drivers/gpu/drm/radeon/rs780_dpm.c
@@ -22,14 +22,16 @@
  * Authors: Alex Deucher
  */
 
-#include <drm/drmP.h>
+#include <linux/seq_file.h>
+
+#include <drm/drm_pci.h>
+
+#include "atom.h"
+#include "r600_dpm.h"
 #include "radeon.h"
 #include "radeon_asic.h"
-#include "rs780d.h"
-#include "r600_dpm.h"
 #include "rs780_dpm.h"
-#include "atom.h"
-#include <linux/seq_file.h>
+#include "rs780d.h"
 
 static struct igp_ps *rs780_get_ps(struct radeon_ps *rps)
 {
diff --git a/drivers/gpu/drm/radeon/rv515.c b/drivers/gpu/drm/radeon/rv515.c
index ffbd2c006f60..147e5cf8348d 100644
--- a/drivers/gpu/drm/radeon/rv515.c
+++ b/drivers/gpu/drm/radeon/rv515.c
@@ -25,14 +25,19 @@
  *          Alex Deucher
  *          Jerome Glisse
  */
+
 #include <linux/seq_file.h>
 #include <linux/slab.h>
-#include <drm/drmP.h>
-#include "rv515d.h"
+
+#include <drm/drm_debugfs.h>
+#include <drm/drm_device.h>
+#include <drm/drm_file.h>
+
+#include "atom.h"
 #include "radeon.h"
 #include "radeon_asic.h"
-#include "atom.h"
 #include "rv515_reg_safe.h"
+#include "rv515d.h"
 
 /* This files gather functions specifics to: rv515 */
 static int rv515_debugfs_pipes_info_init(struct radeon_device *rdev);
@@ -138,7 +143,7 @@ int rv515_mc_wait_for_idle(struct radeon_device *rdev)
 		if (tmp & MC_STATUS_IDLE) {
 			return 0;
 		}
-		DRM_UDELAY(1);
+		udelay(1);
 	}
 	return -1;
 }
diff --git a/drivers/gpu/drm/radeon/rv6xx_dpm.c b/drivers/gpu/drm/radeon/rv6xx_dpm.c
index 6986051fbb89..69d380fff22a 100644
--- a/drivers/gpu/drm/radeon/rv6xx_dpm.c
+++ b/drivers/gpu/drm/radeon/rv6xx_dpm.c
@@ -22,7 +22,6 @@
  * Authors: Alex Deucher
  */
 
-#include <drm/drmP.h>
 #include "radeon.h"
 #include "radeon_asic.h"
 #include "rv6xxd.h"
diff --git a/drivers/gpu/drm/radeon/rv730_dpm.c b/drivers/gpu/drm/radeon/rv730_dpm.c
index 38fdb4152e2a..84a3d6d72486 100644
--- a/drivers/gpu/drm/radeon/rv730_dpm.c
+++ b/drivers/gpu/drm/radeon/rv730_dpm.c
@@ -22,7 +22,6 @@
  * Authors: Alex Deucher
  */
 
-#include <drm/drmP.h>
 #include "radeon.h"
 #include "rv730d.h"
 #include "r600_dpm.h"
diff --git a/drivers/gpu/drm/radeon/rv740_dpm.c b/drivers/gpu/drm/radeon/rv740_dpm.c
index afd597ec5085..327d65a76e1f 100644
--- a/drivers/gpu/drm/radeon/rv740_dpm.c
+++ b/drivers/gpu/drm/radeon/rv740_dpm.c
@@ -22,7 +22,6 @@
  * Authors: Alex Deucher
  */
 
-#include <drm/drmP.h>
 #include "radeon.h"
 #include "rv740d.h"
 #include "r600_dpm.h"
diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c
index 729ae588c970..7a6fc66d6a40 100644
--- a/drivers/gpu/drm/radeon/rv770.c
+++ b/drivers/gpu/drm/radeon/rv770.c
@@ -25,16 +25,20 @@
  *          Alex Deucher
  *          Jerome Glisse
  */
+
 #include <linux/firmware.h>
 #include <linux/slab.h>
-#include <drm/drmP.h>
+
+#include <drm/drm_device.h>
+#include <drm/drm_pci.h>
+#include <drm/radeon_drm.h>
+
+#include "atom.h"
+#include "avivod.h"
 #include "radeon.h"
 #include "radeon_asic.h"
 #include "radeon_audio.h"
-#include <drm/radeon_drm.h>
 #include "rv770d.h"
-#include "atom.h"
-#include "avivod.h"
 
 #define R700_PFP_UCODE_SIZE 848
 #define R700_PM4_UCODE_SIZE 1360
diff --git a/drivers/gpu/drm/radeon/rv770_dma.c b/drivers/gpu/drm/radeon/rv770_dma.c
index acff6e09cc40..0866b38ef264 100644
--- a/drivers/gpu/drm/radeon/rv770_dma.c
+++ b/drivers/gpu/drm/radeon/rv770_dma.c
@@ -21,7 +21,7 @@
  *
  * Authors: Alex Deucher
  */
-#include <drm/drmP.h>
+
 #include "radeon.h"
 #include "radeon_asic.h"
 #include "rv770d.h"
diff --git a/drivers/gpu/drm/radeon/rv770_dpm.c b/drivers/gpu/drm/radeon/rv770_dpm.c
index c765ae7ea806..4a0cf597c11c 100644
--- a/drivers/gpu/drm/radeon/rv770_dpm.c
+++ b/drivers/gpu/drm/radeon/rv770_dpm.c
@@ -22,7 +22,6 @@
  * Authors: Alex Deucher
  */
 
-#include <drm/drmP.h>
 #include "radeon.h"
 #include "radeon_asic.h"
 #include "rv770d.h"
diff --git a/drivers/gpu/drm/radeon/rv770_dpm.h b/drivers/gpu/drm/radeon/rv770_dpm.h
index d12beab7f3e6..d81ccf153c33 100644
--- a/drivers/gpu/drm/radeon/rv770_dpm.h
+++ b/drivers/gpu/drm/radeon/rv770_dpm.h
@@ -23,6 +23,7 @@
 #ifndef __RV770_DPM_H__
 #define __RV770_DPM_H__
 
+#include "radeon.h"
 #include "rv770_smc.h"
 
 struct rv770_clock_registers {
diff --git a/drivers/gpu/drm/radeon/rv770_smc.c b/drivers/gpu/drm/radeon/rv770_smc.c
index 2b7ddee3984c..45575c0d0a1d 100644
--- a/drivers/gpu/drm/radeon/rv770_smc.c
+++ b/drivers/gpu/drm/radeon/rv770_smc.c
@@ -23,7 +23,7 @@
  */
 
 #include <linux/firmware.h>
-#include <drm/drmP.h>
+
 #include "radeon.h"
 #include "rv770d.h"
 #include "rv770_dpm.h"
diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c
index 841bc8bc333d..05894d198a79 100644
--- a/drivers/gpu/drm/radeon/si.c
+++ b/drivers/gpu/drm/radeon/si.c
@@ -21,19 +21,23 @@
  *
  * Authors: Alex Deucher
  */
+
 #include <linux/firmware.h>
 #include <linux/slab.h>
 #include <linux/module.h>
-#include <drm/drmP.h>
-#include "radeon.h"
-#include "radeon_asic.h"
-#include "radeon_audio.h"
+
+#include <drm/drm_pci.h>
+#include <drm/drm_vblank.h>
 #include <drm/radeon_drm.h>
-#include "sid.h"
+
 #include "atom.h"
-#include "si_blit_shaders.h"
 #include "clearstate_si.h"
+#include "radeon.h"
+#include "radeon_asic.h"
+#include "radeon_audio.h"
 #include "radeon_ucode.h"
+#include "si_blit_shaders.h"
+#include "sid.h"
 
 
 MODULE_FIRMWARE("radeon/TAHITI_pfp.bin");
diff --git a/drivers/gpu/drm/radeon/si_dma.c b/drivers/gpu/drm/radeon/si_dma.c
index 83207929fc62..4773bb7d947e 100644
--- a/drivers/gpu/drm/radeon/si_dma.c
+++ b/drivers/gpu/drm/radeon/si_dma.c
@@ -21,7 +21,7 @@
  *
  * Authors: Alex Deucher
  */
-#include <drm/drmP.h>
+
 #include "radeon.h"
 #include "radeon_asic.h"
 #include "radeon_trace.h"
diff --git a/drivers/gpu/drm/radeon/si_dpm.c b/drivers/gpu/drm/radeon/si_dpm.c
index c9f6cb77e857..460fd98e40a7 100644
--- a/drivers/gpu/drm/radeon/si_dpm.c
+++ b/drivers/gpu/drm/radeon/si_dpm.c
@@ -21,15 +21,17 @@
  *
  */
 
-#include <drm/drmP.h>
+#include <linux/math64.h>
+#include <linux/seq_file.h>
+
+#include <drm/drm_pci.h>
+
+#include "atom.h"
+#include "r600_dpm.h"
 #include "radeon.h"
 #include "radeon_asic.h"
-#include "sid.h"
-#include "r600_dpm.h"
 #include "si_dpm.h"
-#include "atom.h"
-#include <linux/math64.h>
-#include <linux/seq_file.h>
+#include "sid.h"
 
 #define MC_CG_ARB_FREQ_F0           0x0a
 #define MC_CG_ARB_FREQ_F1           0x0b
diff --git a/drivers/gpu/drm/radeon/si_smc.c b/drivers/gpu/drm/radeon/si_smc.c
index 51155abda8d8..1573a463593c 100644
--- a/drivers/gpu/drm/radeon/si_smc.c
+++ b/drivers/gpu/drm/radeon/si_smc.c
@@ -23,7 +23,7 @@
  */
 
 #include <linux/firmware.h>
-#include <drm/drmP.h>
+
 #include "radeon.h"
 #include "sid.h"
 #include "ppsmc.h"
diff --git a/drivers/gpu/drm/radeon/sumo_dpm.c b/drivers/gpu/drm/radeon/sumo_dpm.c
index 1e4975f3374c..b95d5d390caf 100644
--- a/drivers/gpu/drm/radeon/sumo_dpm.c
+++ b/drivers/gpu/drm/radeon/sumo_dpm.c
@@ -21,7 +21,6 @@
  *
  */
 
-#include <drm/drmP.h>
 #include "radeon.h"
 #include "radeon_asic.h"
 #include "sumod.h"
diff --git a/drivers/gpu/drm/radeon/sumo_dpm.h b/drivers/gpu/drm/radeon/sumo_dpm.h
index 07dda299c784..f1651135a47a 100644
--- a/drivers/gpu/drm/radeon/sumo_dpm.h
+++ b/drivers/gpu/drm/radeon/sumo_dpm.h
@@ -24,6 +24,7 @@
 #define __SUMO_DPM_H__
 
 #include "atom.h"
+#include "radeon.h"
 
 #define SUMO_MAX_HARDWARE_POWERLEVELS 5
 #define SUMO_PM_NUMBER_OF_TC 15
diff --git a/drivers/gpu/drm/radeon/sumo_smc.c b/drivers/gpu/drm/radeon/sumo_smc.c
index cc051be42362..d78140705736 100644
--- a/drivers/gpu/drm/radeon/sumo_smc.c
+++ b/drivers/gpu/drm/radeon/sumo_smc.c
@@ -21,7 +21,6 @@
  *
  */
 
-#include <drm/drmP.h>
 #include "radeon.h"
 #include "sumod.h"
 #include "sumo_dpm.h"
diff --git a/drivers/gpu/drm/radeon/trinity_dpm.c b/drivers/gpu/drm/radeon/trinity_dpm.c
index 5d317f763eea..65302f9d025e 100644
--- a/drivers/gpu/drm/radeon/trinity_dpm.c
+++ b/drivers/gpu/drm/radeon/trinity_dpm.c
@@ -21,13 +21,15 @@
  *
  */
 
-#include <drm/drmP.h>
+#include <linux/seq_file.h>
+
+#include <drm/drm_pci.h>
+
+#include "r600_dpm.h"
 #include "radeon.h"
 #include "radeon_asic.h"
-#include "trinityd.h"
-#include "r600_dpm.h"
 #include "trinity_dpm.h"
-#include <linux/seq_file.h>
+#include "trinityd.h"
 
 #define TRINITY_MAX_DEEPSLEEP_DIVIDER_ID 5
 #define TRINITY_MINIMUM_ENGINE_CLOCK 800
diff --git a/drivers/gpu/drm/radeon/trinity_smc.c b/drivers/gpu/drm/radeon/trinity_smc.c
index 0310e36e3159..f1770a5664ea 100644
--- a/drivers/gpu/drm/radeon/trinity_smc.c
+++ b/drivers/gpu/drm/radeon/trinity_smc.c
@@ -21,7 +21,6 @@
  *
  */
 
-#include <drm/drmP.h>
 #include "radeon.h"
 #include "trinityd.h"
 #include "trinity_dpm.h"
diff --git a/drivers/gpu/drm/radeon/uvd_v1_0.c b/drivers/gpu/drm/radeon/uvd_v1_0.c
index 0dbeb504a429..f858d8d06347 100644
--- a/drivers/gpu/drm/radeon/uvd_v1_0.c
+++ b/drivers/gpu/drm/radeon/uvd_v1_0.c
@@ -23,7 +23,7 @@
  */
 
 #include <linux/firmware.h>
-#include <drm/drmP.h>
+
 #include "radeon.h"
 #include "radeon_asic.h"
 #include "r600d.h"
@@ -438,7 +438,7 @@ int uvd_v1_0_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
 		tmp = RREG32(UVD_CONTEXT_ID);
 		if (tmp == 0xDEADBEEF)
 			break;
-		DRM_UDELAY(1);
+		udelay(1);
 	}
 
 	if (i < rdev->usec_timeout) {
diff --git a/drivers/gpu/drm/radeon/uvd_v2_2.c b/drivers/gpu/drm/radeon/uvd_v2_2.c
index 9071e656a565..23b18edda20e 100644
--- a/drivers/gpu/drm/radeon/uvd_v2_2.c
+++ b/drivers/gpu/drm/radeon/uvd_v2_2.c
@@ -23,7 +23,7 @@
  */
 
 #include <linux/firmware.h>
-#include <drm/drmP.h>
+
 #include "radeon.h"
 #include "radeon_asic.h"
 #include "rv770d.h"
diff --git a/drivers/gpu/drm/radeon/uvd_v3_1.c b/drivers/gpu/drm/radeon/uvd_v3_1.c
index d722db2cf340..b83d0ecb3b5a 100644
--- a/drivers/gpu/drm/radeon/uvd_v3_1.c
+++ b/drivers/gpu/drm/radeon/uvd_v3_1.c
@@ -22,7 +22,6 @@
  * Authors: Christian König <christian.koenig@amd.com>
  */
 
-#include <drm/drmP.h>
 #include "radeon.h"
 #include "radeon_asic.h"
 #include "nid.h"
diff --git a/drivers/gpu/drm/radeon/uvd_v4_2.c b/drivers/gpu/drm/radeon/uvd_v4_2.c
index 91613b8a9dc9..dc54fa4aaea8 100644
--- a/drivers/gpu/drm/radeon/uvd_v4_2.c
+++ b/drivers/gpu/drm/radeon/uvd_v4_2.c
@@ -23,7 +23,7 @@
  */
 
 #include <linux/firmware.h>
-#include <drm/drmP.h>
+
 #include "radeon.h"
 #include "radeon_asic.h"
 #include "cikd.h"
diff --git a/drivers/gpu/drm/radeon/vce_v1_0.c b/drivers/gpu/drm/radeon/vce_v1_0.c
index f541a4b5ac51..bd75bbcf5bf6 100644
--- a/drivers/gpu/drm/radeon/vce_v1_0.c
+++ b/drivers/gpu/drm/radeon/vce_v1_0.c
@@ -26,7 +26,7 @@
  */
 
 #include <linux/firmware.h>
-#include <drm/drmP.h>
+
 #include "radeon.h"
 #include "radeon_asic.h"
 #include "sid.h"
diff --git a/drivers/gpu/drm/radeon/vce_v2_0.c b/drivers/gpu/drm/radeon/vce_v2_0.c
index b0a43b68776d..d6fde3659e65 100644
--- a/drivers/gpu/drm/radeon/vce_v2_0.c
+++ b/drivers/gpu/drm/radeon/vce_v2_0.c
@@ -26,7 +26,7 @@
  */
 
 #include <linux/firmware.h>
-#include <drm/drmP.h>
+
 #include "radeon.h"
 #include "radeon_asic.h"
 #include "cikd.h"
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_drv.c b/drivers/gpu/drm/rcar-du/rcar_du_drv.c
index 75ab17af13a9..6df37c2a9678 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_drv.c
+++ b/drivers/gpu/drm/rcar-du/rcar_du_drv.c
@@ -102,6 +102,35 @@ static const struct rcar_du_device_info rzg1_du_r8a77470_info = {
 	},
 };
 
+static const struct rcar_du_device_info rcar_du_r8a774a1_info = {
+	.gen = 3,
+	.features = RCAR_DU_FEATURE_CRTC_IRQ_CLOCK
+		  | RCAR_DU_FEATURE_VSP1_SOURCE
+		  | RCAR_DU_FEATURE_INTERLACED
+		  | RCAR_DU_FEATURE_TVM_SYNC,
+	.channels_mask = BIT(2) | BIT(1) | BIT(0),
+	.routes = {
+		/*
+		 * R8A774A1 has one RGB output, one LVDS output and one HDMI
+		 * output.
+		 */
+		[RCAR_DU_OUTPUT_DPAD0] = {
+			.possible_crtcs = BIT(2),
+			.port = 0,
+		},
+		[RCAR_DU_OUTPUT_HDMI0] = {
+			.possible_crtcs = BIT(1),
+			.port = 1,
+		},
+		[RCAR_DU_OUTPUT_LVDS0] = {
+			.possible_crtcs = BIT(0),
+			.port = 2,
+		},
+	},
+	.num_lvds = 1,
+	.dpll_mask =  BIT(1),
+};
+
 static const struct rcar_du_device_info rcar_du_r8a774c0_info = {
 	.gen = 3,
 	.features = RCAR_DU_FEATURE_CRTC_IRQ_CLOCK
@@ -386,6 +415,7 @@ static const struct of_device_id rcar_du_of_table[] = {
 	{ .compatible = "renesas,du-r8a7744", .data = &rzg1_du_r8a7743_info },
 	{ .compatible = "renesas,du-r8a7745", .data = &rzg1_du_r8a7745_info },
 	{ .compatible = "renesas,du-r8a77470", .data = &rzg1_du_r8a77470_info },
+	{ .compatible = "renesas,du-r8a774a1", .data = &rcar_du_r8a774a1_info },
 	{ .compatible = "renesas,du-r8a774c0", .data = &rcar_du_r8a774c0_info },
 	{ .compatible = "renesas,du-r8a7779", .data = &rcar_du_r8a7779_info },
 	{ .compatible = "renesas,du-r8a7790", .data = &rcar_du_r8a7790_info },
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_encoder.c b/drivers/gpu/drm/rcar-du/rcar_du_encoder.c
index 6c91753af7bc..0f00bdfe2366 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_encoder.c
+++ b/drivers/gpu/drm/rcar-du/rcar_du_encoder.c
@@ -16,6 +16,7 @@
 #include "rcar_du_drv.h"
 #include "rcar_du_encoder.h"
 #include "rcar_du_kms.h"
+#include "rcar_lvds.h"
 
 /* -----------------------------------------------------------------------------
  * Encoder
@@ -97,6 +98,17 @@ int rcar_du_encoder_init(struct rcar_du_device *rcdu,
 		}
 	}
 
+	/*
+	 * On Gen3 skip the LVDS1 output if the LVDS1 encoder is used as a
+	 * companion for LVDS0 in dual-link mode.
+	 */
+	if (rcdu->info->gen >= 3 && output == RCAR_DU_OUTPUT_LVDS1) {
+		if (rcar_lvds_dual_link(bridge)) {
+			ret = -ENOLINK;
+			goto done;
+		}
+	}
+
 	ret = drm_encoder_init(rcdu->ddev, encoder, &encoder_funcs,
 			       DRM_MODE_ENCODER_NONE, NULL);
 	if (ret < 0)
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_kms.c b/drivers/gpu/drm/rcar-du/rcar_du_kms.c
index f8f7fff34dff..2dc9caee8767 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_kms.c
+++ b/drivers/gpu/drm/rcar-du/rcar_du_kms.c
@@ -124,6 +124,66 @@ static const struct rcar_du_format_info rcar_du_format_infos[] = {
 		.bpp = 16,
 		.planes = 1,
 	}, {
+		.fourcc = DRM_FORMAT_RGBA4444,
+		.v4l2 = V4L2_PIX_FMT_RGBA444,
+		.bpp = 16,
+		.planes = 1,
+	}, {
+		.fourcc = DRM_FORMAT_RGBX4444,
+		.v4l2 = V4L2_PIX_FMT_RGBX444,
+		.bpp = 16,
+		.planes = 1,
+	}, {
+		.fourcc = DRM_FORMAT_ABGR4444,
+		.v4l2 = V4L2_PIX_FMT_ABGR444,
+		.bpp = 16,
+		.planes = 1,
+	}, {
+		.fourcc = DRM_FORMAT_XBGR4444,
+		.v4l2 = V4L2_PIX_FMT_XBGR444,
+		.bpp = 16,
+		.planes = 1,
+	}, {
+		.fourcc = DRM_FORMAT_BGRA4444,
+		.v4l2 = V4L2_PIX_FMT_BGRA444,
+		.bpp = 16,
+		.planes = 1,
+	}, {
+		.fourcc = DRM_FORMAT_BGRX4444,
+		.v4l2 = V4L2_PIX_FMT_BGRX444,
+		.bpp = 16,
+		.planes = 1,
+	}, {
+		.fourcc = DRM_FORMAT_RGBA5551,
+		.v4l2 = V4L2_PIX_FMT_RGBA555,
+		.bpp = 16,
+		.planes = 1,
+	}, {
+		.fourcc = DRM_FORMAT_RGBX5551,
+		.v4l2 = V4L2_PIX_FMT_RGBX555,
+		.bpp = 16,
+		.planes = 1,
+	}, {
+		.fourcc = DRM_FORMAT_ABGR1555,
+		.v4l2 = V4L2_PIX_FMT_ABGR555,
+		.bpp = 16,
+		.planes = 1,
+	}, {
+		.fourcc = DRM_FORMAT_XBGR1555,
+		.v4l2 = V4L2_PIX_FMT_XBGR555,
+		.bpp = 16,
+		.planes = 1,
+	}, {
+		.fourcc = DRM_FORMAT_BGRA5551,
+		.v4l2 = V4L2_PIX_FMT_BGRA555,
+		.bpp = 16,
+		.planes = 1,
+	}, {
+		.fourcc = DRM_FORMAT_BGRX5551,
+		.v4l2 = V4L2_PIX_FMT_BGRX555,
+		.bpp = 16,
+		.planes = 1,
+	}, {
 		.fourcc = DRM_FORMAT_BGR888,
 		.v4l2 = V4L2_PIX_FMT_RGB24,
 		.bpp = 24,
@@ -134,6 +194,26 @@ static const struct rcar_du_format_info rcar_du_format_infos[] = {
 		.bpp = 24,
 		.planes = 1,
 	}, {
+		.fourcc = DRM_FORMAT_RGBA8888,
+		.v4l2 = V4L2_PIX_FMT_BGRA32,
+		.bpp = 32,
+		.planes = 1,
+	}, {
+		.fourcc = DRM_FORMAT_RGBX8888,
+		.v4l2 = V4L2_PIX_FMT_BGRX32,
+		.bpp = 32,
+		.planes = 1,
+	}, {
+		.fourcc = DRM_FORMAT_ABGR8888,
+		.v4l2 = V4L2_PIX_FMT_RGBA32,
+		.bpp = 32,
+		.planes = 1,
+	}, {
+		.fourcc = DRM_FORMAT_XBGR8888,
+		.v4l2 = V4L2_PIX_FMT_RGBX32,
+		.bpp = 32,
+		.planes = 1,
+	}, {
 		.fourcc = DRM_FORMAT_BGRA8888,
 		.v4l2 = V4L2_PIX_FMT_ARGB32,
 		.bpp = 32,
@@ -378,7 +458,7 @@ static int rcar_du_encoders_init_one(struct rcar_du_device *rcdu,
 	}
 
 	ret = rcar_du_encoder_init(rcdu, output, entity);
-	if (ret && ret != -EPROBE_DEFER)
+	if (ret && ret != -EPROBE_DEFER && ret != -ENOLINK)
 		dev_warn(rcdu->dev,
 			 "failed to initialize encoder %pOF on output %u (%d), skipping\n",
 			 entity, output, ret);
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_writeback.c b/drivers/gpu/drm/rcar-du/rcar_du_writeback.c
index 989a0be94131..ae07290bba6a 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_writeback.c
+++ b/drivers/gpu/drm/rcar-du/rcar_du_writeback.c
@@ -14,6 +14,7 @@
 #include "rcar_du_crtc.h"
 #include "rcar_du_drv.h"
 #include "rcar_du_kms.h"
+#include "rcar_du_writeback.h"
 
 /**
  * struct rcar_du_wb_conn_state - Driver-specific writeback connector state
diff --git a/drivers/gpu/drm/rcar-du/rcar_lvds.c b/drivers/gpu/drm/rcar-du/rcar_lvds.c
index 620b51aab291..1c62578590f4 100644
--- a/drivers/gpu/drm/rcar-du/rcar_lvds.c
+++ b/drivers/gpu/drm/rcar-du/rcar_lvds.c
@@ -63,10 +63,12 @@ struct rcar_lvds {
 		struct clk *extal;		/* External clock */
 		struct clk *dotclkin[2];	/* External DU clocks */
 	} clocks;
-	bool enabled;
 
 	struct drm_display_mode display_mode;
 	enum rcar_lvds_mode mode;
+
+	struct drm_bridge *companion;
+	bool dual_link;
 };
 
 #define bridge_to_rcar_lvds(bridge) \
@@ -92,13 +94,15 @@ static int rcar_lvds_connector_get_modes(struct drm_connector *connector)
 }
 
 static int rcar_lvds_connector_atomic_check(struct drm_connector *connector,
-					    struct drm_connector_state *state)
+					    struct drm_atomic_state *state)
 {
 	struct rcar_lvds *lvds = connector_to_rcar_lvds(connector);
 	const struct drm_display_mode *panel_mode;
+	struct drm_connector_state *conn_state;
 	struct drm_crtc_state *crtc_state;
 
-	if (!state->crtc)
+	conn_state = drm_atomic_get_new_connector_state(state, connector);
+	if (!conn_state->crtc)
 		return 0;
 
 	if (list_empty(&connector->modes)) {
@@ -110,7 +114,7 @@ static int rcar_lvds_connector_atomic_check(struct drm_connector *connector,
 				      struct drm_display_mode, head);
 
 	/* We're not allowed to modify the resolution. */
-	crtc_state = drm_atomic_get_crtc_state(state->state, state->crtc);
+	crtc_state = drm_atomic_get_crtc_state(state, conn_state->crtc);
 	if (IS_ERR(crtc_state))
 		return PTR_ERR(crtc_state);
 
@@ -368,15 +372,12 @@ int rcar_lvds_clk_enable(struct drm_bridge *bridge, unsigned long freq)
 
 	dev_dbg(lvds->dev, "enabling LVDS PLL, freq=%luHz\n", freq);
 
-	WARN_ON(lvds->enabled);
-
 	ret = clk_prepare_enable(lvds->clocks.mod);
 	if (ret < 0)
 		return ret;
 
 	__rcar_lvds_pll_setup_d3_e3(lvds, freq, true);
 
-	lvds->enabled = true;
 	return 0;
 }
 EXPORT_SYMBOL_GPL(rcar_lvds_clk_enable);
@@ -390,13 +391,9 @@ void rcar_lvds_clk_disable(struct drm_bridge *bridge)
 
 	dev_dbg(lvds->dev, "disabling LVDS PLL\n");
 
-	WARN_ON(!lvds->enabled);
-
 	rcar_lvds_write(lvds, LVDPLLCR, 0);
 
 	clk_disable_unprepare(lvds->clocks.mod);
-
-	lvds->enabled = false;
 }
 EXPORT_SYMBOL_GPL(rcar_lvds_clk_disable);
 
@@ -408,21 +405,18 @@ static void rcar_lvds_enable(struct drm_bridge *bridge)
 {
 	struct rcar_lvds *lvds = bridge_to_rcar_lvds(bridge);
 	const struct drm_display_mode *mode = &lvds->display_mode;
-	/*
-	 * FIXME: We should really retrieve the CRTC through the state, but how
-	 * do we get a state pointer?
-	 */
-	struct drm_crtc *crtc = lvds->bridge.encoder->crtc;
 	u32 lvdhcr;
 	u32 lvdcr0;
 	int ret;
 
-	WARN_ON(lvds->enabled);
-
 	ret = clk_prepare_enable(lvds->clocks.mod);
 	if (ret < 0)
 		return;
 
+	/* Enable the companion LVDS encoder in dual-link mode. */
+	if (lvds->dual_link && lvds->companion)
+		lvds->companion->funcs->enable(lvds->companion);
+
 	/*
 	 * Hardcode the channels and control signals routing for now.
 	 *
@@ -445,17 +439,33 @@ static void rcar_lvds_enable(struct drm_bridge *bridge)
 	rcar_lvds_write(lvds, LVDCHCR, lvdhcr);
 
 	if (lvds->info->quirks & RCAR_LVDS_QUIRK_DUAL_LINK) {
-		/* Disable dual-link mode. */
-		rcar_lvds_write(lvds, LVDSTRIPE, 0);
+		/*
+		 * Configure vertical stripe based on the mode of operation of
+		 * the connected device.
+		 */
+		rcar_lvds_write(lvds, LVDSTRIPE,
+				lvds->dual_link ? LVDSTRIPE_ST_ON : 0);
 	}
 
-	/* PLL clock configuration. */
-	lvds->info->pll_setup(lvds, mode->clock * 1000);
+	/*
+	 * PLL clock configuration on all instances but the companion in
+	 * dual-link mode.
+	 */
+	if (!lvds->dual_link || lvds->companion)
+		lvds->info->pll_setup(lvds, mode->clock * 1000);
 
 	/* Set the LVDS mode and select the input. */
 	lvdcr0 = lvds->mode << LVDCR0_LVMD_SHIFT;
-	if (drm_crtc_index(crtc) == 2)
-		lvdcr0 |= LVDCR0_DUSEL;
+
+	if (lvds->bridge.encoder) {
+		/*
+		 * FIXME: We should really retrieve the CRTC through the state,
+		 * but how do we get a state pointer?
+		 */
+		if (drm_crtc_index(lvds->bridge.encoder->crtc) == 2)
+			lvdcr0 |= LVDCR0_DUSEL;
+	}
+
 	rcar_lvds_write(lvds, LVDCR0, lvdcr0);
 
 	/* Turn all the channels on. */
@@ -507,16 +517,12 @@ static void rcar_lvds_enable(struct drm_bridge *bridge)
 		drm_panel_prepare(lvds->panel);
 		drm_panel_enable(lvds->panel);
 	}
-
-	lvds->enabled = true;
 }
 
 static void rcar_lvds_disable(struct drm_bridge *bridge)
 {
 	struct rcar_lvds *lvds = bridge_to_rcar_lvds(bridge);
 
-	WARN_ON(!lvds->enabled);
-
 	if (lvds->panel) {
 		drm_panel_disable(lvds->panel);
 		drm_panel_unprepare(lvds->panel);
@@ -526,9 +532,11 @@ static void rcar_lvds_disable(struct drm_bridge *bridge)
 	rcar_lvds_write(lvds, LVDCR1, 0);
 	rcar_lvds_write(lvds, LVDPLLCR, 0);
 
-	clk_disable_unprepare(lvds->clocks.mod);
+	/* Disable the companion LVDS encoder in dual-link mode. */
+	if (lvds->dual_link && lvds->companion)
+		lvds->companion->funcs->disable(lvds->companion);
 
-	lvds->enabled = false;
+	clk_disable_unprepare(lvds->clocks.mod);
 }
 
 static bool rcar_lvds_mode_fixup(struct drm_bridge *bridge,
@@ -592,8 +600,6 @@ static void rcar_lvds_mode_set(struct drm_bridge *bridge,
 {
 	struct rcar_lvds *lvds = bridge_to_rcar_lvds(bridge);
 
-	WARN_ON(lvds->enabled);
-
 	lvds->display_mode = *adjusted_mode;
 
 	rcar_lvds_get_lvds_mode(lvds);
@@ -646,10 +652,57 @@ static const struct drm_bridge_funcs rcar_lvds_bridge_ops = {
 	.mode_set = rcar_lvds_mode_set,
 };
 
+bool rcar_lvds_dual_link(struct drm_bridge *bridge)
+{
+	struct rcar_lvds *lvds = bridge_to_rcar_lvds(bridge);
+
+	return lvds->dual_link;
+}
+EXPORT_SYMBOL_GPL(rcar_lvds_dual_link);
+
 /* -----------------------------------------------------------------------------
  * Probe & Remove
  */
 
+static int rcar_lvds_parse_dt_companion(struct rcar_lvds *lvds)
+{
+	const struct of_device_id *match;
+	struct device_node *companion;
+	struct device *dev = lvds->dev;
+	int ret = 0;
+
+	/* Locate the companion LVDS encoder for dual-link operation, if any. */
+	companion = of_parse_phandle(dev->of_node, "renesas,companion", 0);
+	if (!companion) {
+		dev_err(dev, "Companion LVDS encoder not found\n");
+		return -ENXIO;
+	}
+
+	/*
+	 * Sanity check: the companion encoder must have the same compatible
+	 * string.
+	 */
+	match = of_match_device(dev->driver->of_match_table, dev);
+	if (!of_device_is_compatible(companion, match->compatible)) {
+		dev_err(dev, "Companion LVDS encoder is invalid\n");
+		ret = -ENXIO;
+		goto done;
+	}
+
+	lvds->companion = of_drm_find_bridge(companion);
+	if (!lvds->companion) {
+		ret = -EPROBE_DEFER;
+		goto done;
+	}
+
+	dev_dbg(dev, "Found companion encoder %pOF\n", companion);
+
+done:
+	of_node_put(companion);
+
+	return ret;
+}
+
 static int rcar_lvds_parse_dt(struct rcar_lvds *lvds)
 {
 	struct device_node *local_output = NULL;
@@ -700,14 +753,26 @@ static int rcar_lvds_parse_dt(struct rcar_lvds *lvds)
 
 	if (is_bridge) {
 		lvds->next_bridge = of_drm_find_bridge(remote);
-		if (!lvds->next_bridge)
+		if (!lvds->next_bridge) {
 			ret = -EPROBE_DEFER;
+			goto done;
+		}
+
+		if (lvds->info->quirks & RCAR_LVDS_QUIRK_DUAL_LINK)
+			lvds->dual_link = lvds->next_bridge->timings
+					? lvds->next_bridge->timings->dual_link
+					: false;
 	} else {
 		lvds->panel = of_drm_find_panel(remote);
-		if (IS_ERR(lvds->panel))
+		if (IS_ERR(lvds->panel)) {
 			ret = PTR_ERR(lvds->panel);
+			goto done;
+		}
 	}
 
+	if (lvds->dual_link)
+		ret = rcar_lvds_parse_dt_companion(lvds);
+
 done:
 	of_node_put(local_output);
 	of_node_put(remote_input);
@@ -793,7 +858,6 @@ static int rcar_lvds_probe(struct platform_device *pdev)
 
 	lvds->dev = &pdev->dev;
 	lvds->info = of_device_get_match_data(&pdev->dev);
-	lvds->enabled = false;
 
 	ret = rcar_lvds_parse_dt(lvds);
 	if (ret < 0)
@@ -866,6 +930,7 @@ static const struct rcar_lvds_device_info rcar_lvds_r8a77995_info = {
 static const struct of_device_id rcar_lvds_of_table[] = {
 	{ .compatible = "renesas,r8a7743-lvds", .data = &rcar_lvds_gen2_info },
 	{ .compatible = "renesas,r8a7744-lvds", .data = &rcar_lvds_gen2_info },
+	{ .compatible = "renesas,r8a774a1-lvds", .data = &rcar_lvds_gen3_info },
 	{ .compatible = "renesas,r8a774c0-lvds", .data = &rcar_lvds_r8a77990_info },
 	{ .compatible = "renesas,r8a7790-lvds", .data = &rcar_lvds_r8a7790_info },
 	{ .compatible = "renesas,r8a7791-lvds", .data = &rcar_lvds_gen2_info },
diff --git a/drivers/gpu/drm/rcar-du/rcar_lvds.h b/drivers/gpu/drm/rcar-du/rcar_lvds.h
index a709cae1bc32..222ec0e60785 100644
--- a/drivers/gpu/drm/rcar-du/rcar_lvds.h
+++ b/drivers/gpu/drm/rcar-du/rcar_lvds.h
@@ -15,6 +15,7 @@ struct drm_bridge;
 #if IS_ENABLED(CONFIG_DRM_RCAR_LVDS)
 int rcar_lvds_clk_enable(struct drm_bridge *bridge, unsigned long freq);
 void rcar_lvds_clk_disable(struct drm_bridge *bridge);
+bool rcar_lvds_dual_link(struct drm_bridge *bridge);
 #else
 static inline int rcar_lvds_clk_enable(struct drm_bridge *bridge,
 				       unsigned long freq)
@@ -22,6 +23,10 @@ static inline int rcar_lvds_clk_enable(struct drm_bridge *bridge,
 	return -ENOSYS;
 }
 static inline void rcar_lvds_clk_disable(struct drm_bridge *bridge) { }
+static inline bool rcar_lvds_dual_link(struct drm_bridge *bridge)
+{
+	return false;
+}
 #endif /* CONFIG_DRM_RCAR_LVDS */
 
 #endif /* __RCAR_LVDS_H__ */
diff --git a/drivers/gpu/drm/rockchip/cdn-dp-reg.c b/drivers/gpu/drm/rockchip/cdn-dp-reg.c
index 66b2d4466eab..077c87021908 100644
--- a/drivers/gpu/drm/rockchip/cdn-dp-reg.c
+++ b/drivers/gpu/drm/rockchip/cdn-dp-reg.c
@@ -535,7 +535,7 @@ static int cdn_dp_get_training_status(struct cdn_dp_device *dp)
 	if (ret)
 		goto err_get_training_status;
 
-	dp->link.rate = status[0];
+	dp->link.rate = drm_dp_bw_code_to_link_rate(status[0]);
 	dp->link.num_lanes = status[1];
 
 err_get_training_status:
@@ -639,7 +639,7 @@ int cdn_dp_config_video(struct cdn_dp_device *dp)
 	bit_per_pix = (video->color_fmt == YCBCR_4_2_2) ?
 		      (video->color_depth * 2) : (video->color_depth * 3);
 
-	link_rate = drm_dp_bw_code_to_link_rate(dp->link.rate) / 1000;
+	link_rate = dp->link.rate / 1000;
 
 	ret = cdn_dp_reg_write(dp, BND_HSYNC2VSYNC, VIF_BYPASS_INTERLACE);
 	if (ret)
diff --git a/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c b/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c
index 77d929e6f6ce..cdc304d4cd02 100644
--- a/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c
+++ b/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c
@@ -19,6 +19,14 @@
 #include "rockchip_drm_drv.h"
 #include "rockchip_drm_vop.h"
 
+#define RK3228_GRF_SOC_CON2		0x0408
+#define RK3228_HDMI_SDAIN_MSK		BIT(14)
+#define RK3228_HDMI_SCLIN_MSK		BIT(13)
+#define RK3228_GRF_SOC_CON6		0x0418
+#define RK3228_HDMI_HPD_VSEL		BIT(6)
+#define RK3228_HDMI_SDA_VSEL		BIT(5)
+#define RK3228_HDMI_SCL_VSEL		BIT(4)
+
 #define RK3288_GRF_SOC_CON6		0x025C
 #define RK3288_HDMI_LCDC_SEL		BIT(4)
 #define RK3328_GRF_SOC_CON2		0x0408
@@ -321,6 +329,25 @@ static void dw_hdmi_rockchip_genphy_disable(struct dw_hdmi *dw_hdmi, void *data)
 	phy_power_off(hdmi->phy);
 }
 
+static void dw_hdmi_rk3228_setup_hpd(struct dw_hdmi *dw_hdmi, void *data)
+{
+	struct rockchip_hdmi *hdmi = (struct rockchip_hdmi *)data;
+
+	dw_hdmi_phy_setup_hpd(dw_hdmi, data);
+
+	regmap_write(hdmi->regmap,
+		RK3228_GRF_SOC_CON6,
+		HIWORD_UPDATE(RK3228_HDMI_HPD_VSEL | RK3228_HDMI_SDA_VSEL |
+			      RK3228_HDMI_SCL_VSEL,
+			      RK3228_HDMI_HPD_VSEL | RK3228_HDMI_SDA_VSEL |
+			      RK3228_HDMI_SCL_VSEL));
+
+	regmap_write(hdmi->regmap,
+		RK3228_GRF_SOC_CON2,
+		HIWORD_UPDATE(RK3228_HDMI_SDAIN_MSK | RK3228_HDMI_SCLIN_MSK,
+			      RK3228_HDMI_SDAIN_MSK | RK3228_HDMI_SCLIN_MSK));
+}
+
 static enum drm_connector_status
 dw_hdmi_rk3328_read_hpd(struct dw_hdmi *dw_hdmi, void *data)
 {
@@ -366,6 +393,29 @@ static void dw_hdmi_rk3328_setup_hpd(struct dw_hdmi *dw_hdmi, void *data)
 			      RK3328_HDMI_HPD_IOE));
 }
 
+static const struct dw_hdmi_phy_ops rk3228_hdmi_phy_ops = {
+	.init		= dw_hdmi_rockchip_genphy_init,
+	.disable	= dw_hdmi_rockchip_genphy_disable,
+	.read_hpd	= dw_hdmi_phy_read_hpd,
+	.update_hpd	= dw_hdmi_phy_update_hpd,
+	.setup_hpd	= dw_hdmi_rk3228_setup_hpd,
+};
+
+static struct rockchip_hdmi_chip_data rk3228_chip_data = {
+	.lcdsel_grf_reg = -1,
+};
+
+static const struct dw_hdmi_plat_data rk3228_hdmi_drv_data = {
+	.mode_valid = dw_hdmi_rockchip_mode_valid,
+	.mpll_cfg = rockchip_mpll_cfg,
+	.cur_ctr = rockchip_cur_ctr,
+	.phy_config = rockchip_phy_config,
+	.phy_data = &rk3228_chip_data,
+	.phy_ops = &rk3228_hdmi_phy_ops,
+	.phy_name = "inno_dw_hdmi_phy2",
+	.phy_force_vendor = true,
+};
+
 static struct rockchip_hdmi_chip_data rk3288_chip_data = {
 	.lcdsel_grf_reg = RK3288_GRF_SOC_CON6,
 	.lcdsel_big = HIWORD_UPDATE(0, RK3288_HDMI_LCDC_SEL),
@@ -418,6 +468,9 @@ static const struct dw_hdmi_plat_data rk3399_hdmi_drv_data = {
 };
 
 static const struct of_device_id dw_hdmi_rockchip_dt_ids[] = {
+	{ .compatible = "rockchip,rk3228-dw-hdmi",
+	  .data = &rk3228_hdmi_drv_data
+	},
 	{ .compatible = "rockchip,rk3288-dw-hdmi",
 	  .data = &rk3288_hdmi_drv_data
 	},
@@ -538,11 +591,25 @@ static int dw_hdmi_rockchip_remove(struct platform_device *pdev)
 	return 0;
 }
 
+static int __maybe_unused dw_hdmi_rockchip_resume(struct device *dev)
+{
+	struct rockchip_hdmi *hdmi = dev_get_drvdata(dev);
+
+	dw_hdmi_resume(hdmi->hdmi);
+
+	return 0;
+}
+
+static const struct dev_pm_ops dw_hdmi_rockchip_pm = {
+	SET_SYSTEM_SLEEP_PM_OPS(NULL, dw_hdmi_rockchip_resume)
+};
+
 struct platform_driver dw_hdmi_rockchip_pltfm_driver = {
 	.probe  = dw_hdmi_rockchip_probe,
 	.remove = dw_hdmi_rockchip_remove,
 	.driver = {
 		.name = "dwhdmi-rockchip",
+		.pm = &dw_hdmi_rockchip_pm,
 		.of_match_table = dw_hdmi_rockchip_dt_ids,
 	},
 };
diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_fb.c b/drivers/gpu/drm/rockchip/rockchip_drm_fb.c
index 1c69066b6894..64ca87cf6d50 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_fb.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_fb.c
@@ -8,6 +8,7 @@
 #include <drm/drm.h>
 #include <drm/drmP.h>
 #include <drm/drm_atomic.h>
+#include <drm/drm_damage_helper.h>
 #include <drm/drm_fb_helper.h>
 #include <drm/drm_gem_framebuffer_helper.h>
 #include <drm/drm_probe_helper.h>
@@ -17,20 +18,10 @@
 #include "rockchip_drm_gem.h"
 #include "rockchip_drm_psr.h"
 
-static int rockchip_drm_fb_dirty(struct drm_framebuffer *fb,
-				 struct drm_file *file,
-				 unsigned int flags, unsigned int color,
-				 struct drm_clip_rect *clips,
-				 unsigned int num_clips)
-{
-	rockchip_drm_psr_flush_all(fb->dev);
-	return 0;
-}
-
 static const struct drm_framebuffer_funcs rockchip_drm_fb_funcs = {
 	.destroy       = drm_gem_fb_destroy,
 	.create_handle = drm_gem_fb_create_handle,
-	.dirty	       = rockchip_drm_fb_dirty,
+	.dirty	       = drm_atomic_helper_dirtyfb,
 };
 
 static struct drm_framebuffer *
@@ -66,23 +57,18 @@ static struct drm_framebuffer *
 rockchip_user_fb_create(struct drm_device *dev, struct drm_file *file_priv,
 			const struct drm_mode_fb_cmd2 *mode_cmd)
 {
+	const struct drm_format_info *info = drm_get_format_info(dev,
+								 mode_cmd);
 	struct drm_framebuffer *fb;
 	struct drm_gem_object *objs[ROCKCHIP_MAX_FB_BUFFER];
 	struct drm_gem_object *obj;
-	unsigned int hsub;
-	unsigned int vsub;
-	int num_planes;
+	int num_planes = min_t(int, info->num_planes, ROCKCHIP_MAX_FB_BUFFER);
 	int ret;
 	int i;
 
-	hsub = drm_format_horz_chroma_subsampling(mode_cmd->pixel_format);
-	vsub = drm_format_vert_chroma_subsampling(mode_cmd->pixel_format);
-	num_planes = min(drm_format_num_planes(mode_cmd->pixel_format),
-			 ROCKCHIP_MAX_FB_BUFFER);
-
 	for (i = 0; i < num_planes; i++) {
-		unsigned int width = mode_cmd->width / (i ? hsub : 1);
-		unsigned int height = mode_cmd->height / (i ? vsub : 1);
+		unsigned int width = mode_cmd->width / (i ? info->hsub : 1);
+		unsigned int height = mode_cmd->height / (i ? info->vsub : 1);
 		unsigned int min_size;
 
 		obj = drm_gem_object_lookup(file_priv, mode_cmd->handles[i]);
@@ -95,7 +81,7 @@ rockchip_user_fb_create(struct drm_device *dev, struct drm_file *file_priv,
 
 		min_size = (height - 1) * mode_cmd->pitches[i] +
 			mode_cmd->offsets[i] +
-			width * drm_format_plane_cpp(mode_cmd->pixel_format, i);
+			width * info->cpp[i];
 
 		if (obj->size < min_size) {
 			drm_gem_object_put_unlocked(obj);
diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c
index 12ed5265a90b..09a790c2f3a1 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c
@@ -307,24 +307,19 @@ static uint16_t scl_vop_cal_scale(enum scale_mode mode, uint32_t src,
 
 static void scl_vop_cal_scl_fac(struct vop *vop, const struct vop_win_data *win,
 			     uint32_t src_w, uint32_t src_h, uint32_t dst_w,
-			     uint32_t dst_h, uint32_t pixel_format)
+			     uint32_t dst_h, const struct drm_format_info *info)
 {
 	uint16_t yrgb_hor_scl_mode, yrgb_ver_scl_mode;
 	uint16_t cbcr_hor_scl_mode = SCALE_NONE;
 	uint16_t cbcr_ver_scl_mode = SCALE_NONE;
-	int hsub = drm_format_horz_chroma_subsampling(pixel_format);
-	int vsub = drm_format_vert_chroma_subsampling(pixel_format);
-	const struct drm_format_info *info;
 	bool is_yuv = false;
-	uint16_t cbcr_src_w = src_w / hsub;
-	uint16_t cbcr_src_h = src_h / vsub;
+	uint16_t cbcr_src_w = src_w / info->hsub;
+	uint16_t cbcr_src_h = src_h / info->vsub;
 	uint16_t vsu_mode;
 	uint16_t lb_mode;
 	uint32_t val;
 	int vskiplines;
 
-	info = drm_format_info(pixel_format);
-
 	if (info->is_yuv)
 		is_yuv = true;
 
@@ -823,8 +818,8 @@ static void vop_plane_atomic_update(struct drm_plane *plane,
 		    (state->rotation & DRM_MODE_REFLECT_X) ? 1 : 0);
 
 	if (is_yuv) {
-		int hsub = drm_format_horz_chroma_subsampling(fb->format->format);
-		int vsub = drm_format_vert_chroma_subsampling(fb->format->format);
+		int hsub = fb->format->hsub;
+		int vsub = fb->format->vsub;
 		int bpp = fb->format->cpp[1];
 
 		uv_obj = fb->obj[1];
@@ -848,7 +843,7 @@ static void vop_plane_atomic_update(struct drm_plane *plane,
 	if (win->phy->scl)
 		scl_vop_cal_scl_fac(vop, win, actual_w, actual_h,
 				    drm_rect_width(dest), drm_rect_height(dest),
-				    fb->format->format);
+				    fb->format);
 
 	VOP_WIN_SET(vop, win, act_info, act_info);
 	VOP_WIN_SET(vop, win, dsp_info, dsp_info);
@@ -1011,7 +1006,8 @@ static bool vop_crtc_mode_fixup(struct drm_crtc *crtc,
 	struct vop *vop = to_vop(crtc);
 
 	adjusted_mode->clock =
-		clk_round_rate(vop->dclk, mode->clock * 1000) / 1000;
+		DIV_ROUND_UP(clk_round_rate(vop->dclk,
+					    adjusted_mode->clock * 1000), 1000);
 
 	return true;
 }
@@ -1215,17 +1211,6 @@ static void vop_crtc_destroy(struct drm_crtc *crtc)
 	drm_crtc_cleanup(crtc);
 }
 
-static void vop_crtc_reset(struct drm_crtc *crtc)
-{
-	if (crtc->state)
-		__drm_atomic_helper_crtc_destroy_state(crtc->state);
-	kfree(crtc->state);
-
-	crtc->state = kzalloc(sizeof(struct rockchip_crtc_state), GFP_KERNEL);
-	if (crtc->state)
-		crtc->state->crtc = crtc;
-}
-
 static struct drm_crtc_state *vop_crtc_duplicate_state(struct drm_crtc *crtc)
 {
 	struct rockchip_crtc_state *rockchip_state;
@@ -1247,6 +1232,17 @@ static void vop_crtc_destroy_state(struct drm_crtc *crtc,
 	kfree(s);
 }
 
+static void vop_crtc_reset(struct drm_crtc *crtc)
+{
+	struct rockchip_crtc_state *crtc_state =
+		kzalloc(sizeof(*crtc_state), GFP_KERNEL);
+
+	if (crtc->state)
+		vop_crtc_destroy_state(crtc, crtc->state);
+
+	__drm_atomic_helper_crtc_reset(crtc, &crtc_state->base);
+}
+
 #ifdef CONFIG_DRM_ANALOGIX_DP
 static struct drm_connector *vop_get_edp_connector(struct vop *vop)
 {
diff --git a/drivers/gpu/drm/savage/savage_bci.c b/drivers/gpu/drm/savage/savage_bci.c
index 35dc74883f83..6889d6534eba 100644
--- a/drivers/gpu/drm/savage/savage_bci.c
+++ b/drivers/gpu/drm/savage/savage_bci.c
@@ -22,8 +22,17 @@
  * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  */
-#include <drm/drmP.h>
+
+#include <linux/delay.h>
+#include <linux/pci.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+
+#include <drm/drm_device.h>
+#include <drm/drm_file.h>
+#include <drm/drm_print.h>
 #include <drm/savage_drm.h>
+
 #include "savage_drv.h"
 
 /* Need a long timeout for shadow status updates can take a while
@@ -53,7 +62,7 @@ savage_bci_wait_fifo_shadow(drm_savage_private_t * dev_priv, unsigned int n)
 		status = dev_priv->status_ptr[0];
 		if ((status & mask) < threshold)
 			return 0;
-		DRM_UDELAY(1);
+		udelay(1);
 	}
 
 #if SAVAGE_BCI_DEBUG
@@ -74,7 +83,7 @@ savage_bci_wait_fifo_s3d(drm_savage_private_t * dev_priv, unsigned int n)
 		status = SAVAGE_READ(SAVAGE_STATUS_WORD0);
 		if ((status & SAVAGE_FIFO_USED_MASK_S3D) <= maxUsed)
 			return 0;
-		DRM_UDELAY(1);
+		udelay(1);
 	}
 
 #if SAVAGE_BCI_DEBUG
@@ -95,7 +104,7 @@ savage_bci_wait_fifo_s4(drm_savage_private_t * dev_priv, unsigned int n)
 		status = SAVAGE_READ(SAVAGE_ALT_STATUS_WORD0);
 		if ((status & SAVAGE_FIFO_USED_MASK_S4) <= maxUsed)
 			return 0;
-		DRM_UDELAY(1);
+		udelay(1);
 	}
 
 #if SAVAGE_BCI_DEBUG
@@ -128,7 +137,7 @@ savage_bci_wait_event_shadow(drm_savage_private_t * dev_priv, uint16_t e)
 		if ((((status & 0xffff) - e) & 0xffff) <= 0x7fff ||
 		    (status & 0xffff) == 0)
 			return 0;
-		DRM_UDELAY(1);
+		udelay(1);
 	}
 
 #if SAVAGE_BCI_DEBUG
@@ -150,7 +159,7 @@ savage_bci_wait_event_reg(drm_savage_private_t * dev_priv, uint16_t e)
 		if ((((status & 0xffff) - e) & 0xffff) <= 0x7fff ||
 		    (status & 0xffff) == 0)
 			return 0;
-		DRM_UDELAY(1);
+		udelay(1);
 	}
 
 #if SAVAGE_BCI_DEBUG
@@ -1014,7 +1023,7 @@ int savage_bci_buffers(struct drm_device *dev, void *data, struct drm_file *file
 	 */
 	if (d->send_count != 0) {
 		DRM_ERROR("Process %d trying to send %d buffers via drmDMA\n",
-			  DRM_CURRENTPID, d->send_count);
+			  task_pid_nr(current), d->send_count);
 		return -EINVAL;
 	}
 
@@ -1022,7 +1031,7 @@ int savage_bci_buffers(struct drm_device *dev, void *data, struct drm_file *file
 	 */
 	if (d->request_count < 0 || d->request_count > dma->buf_count) {
 		DRM_ERROR("Process %d trying to get %d buffers (of %d max)\n",
-			  DRM_CURRENTPID, d->request_count, dma->buf_count);
+			  task_pid_nr(current), d->request_count, dma->buf_count);
 		return -EINVAL;
 	}
 
diff --git a/drivers/gpu/drm/savage/savage_drv.c b/drivers/gpu/drm/savage/savage_drv.c
index 2bddeb8bf457..2966fcfd9548 100644
--- a/drivers/gpu/drm/savage/savage_drv.c
+++ b/drivers/gpu/drm/savage/savage_drv.c
@@ -25,12 +25,13 @@
 
 #include <linux/module.h>
 
-#include <drm/drmP.h>
-#include <drm/savage_drm.h>
-#include "savage_drv.h"
-
+#include <drm/drm_drv.h>
+#include <drm/drm_file.h>
+#include <drm/drm_pci.h>
 #include <drm/drm_pciids.h>
 
+#include "savage_drv.h"
+
 static struct pci_device_id pciidlist[] = {
 	savage_PCI_IDS
 };
diff --git a/drivers/gpu/drm/savage/savage_drv.h b/drivers/gpu/drm/savage/savage_drv.h
index 44a1009b6ecb..b0081bb64776 100644
--- a/drivers/gpu/drm/savage/savage_drv.h
+++ b/drivers/gpu/drm/savage/savage_drv.h
@@ -26,7 +26,11 @@
 #ifndef __SAVAGE_DRV_H__
 #define __SAVAGE_DRV_H__
 
+#include <linux/io.h>
+
+#include <drm/drm_ioctl.h>
 #include <drm/drm_legacy.h>
+#include <drm/savage_drm.h>
 
 #define DRIVER_AUTHOR	"Felix Kuehling"
 
@@ -484,8 +488,10 @@ extern void savage_emit_clip_rect_s4(drm_savage_private_t * dev_priv,
 /*
  * access to MMIO
  */
-#define SAVAGE_READ(reg)	DRM_READ32(  dev_priv->mmio, (reg) )
-#define SAVAGE_WRITE(reg)	DRM_WRITE32( dev_priv->mmio, (reg) )
+#define SAVAGE_READ(reg) \
+       readl(((void __iomem *)dev_priv->mmio->handle) + (reg))
+#define SAVAGE_WRITE(reg) \
+	writel(val, ((void __iomem *)dev_priv->mmio->handle) + (reg))
 
 /*
  * access to the burst command interface (BCI)
diff --git a/drivers/gpu/drm/savage/savage_state.c b/drivers/gpu/drm/savage/savage_state.c
index ebb8b7d32b33..a2ac25c11c90 100644
--- a/drivers/gpu/drm/savage/savage_state.c
+++ b/drivers/gpu/drm/savage/savage_state.c
@@ -22,8 +22,15 @@
  * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  */
-#include <drm/drmP.h>
+
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+
+#include <drm/drm_device.h>
+#include <drm/drm_file.h>
+#include <drm/drm_print.h>
 #include <drm/savage_drm.h>
+
 #include "savage_drv.h"
 
 void savage_emit_clip_rect_s3d(drm_savage_private_t * dev_priv,
diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
index a1bec2779e76..c1058eece16b 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -265,32 +265,6 @@ void drm_sched_resume_timeout(struct drm_gpu_scheduler *sched,
 }
 EXPORT_SYMBOL(drm_sched_resume_timeout);
 
-/* job_finish is called after hw fence signaled
- */
-static void drm_sched_job_finish(struct work_struct *work)
-{
-	struct drm_sched_job *s_job = container_of(work, struct drm_sched_job,
-						   finish_work);
-	struct drm_gpu_scheduler *sched = s_job->sched;
-	unsigned long flags;
-
-	/*
-	 * Canceling the timeout without removing our job from the ring mirror
-	 * list is safe, as we will only end up in this worker if our jobs
-	 * finished fence has been signaled. So even if some another worker
-	 * manages to find this job as the next job in the list, the fence
-	 * signaled check below will prevent the timeout to be restarted.
-	 */
-	cancel_delayed_work_sync(&sched->work_tdr);
-
-	spin_lock_irqsave(&sched->job_list_lock, flags);
-	/* queue TDR for next job */
-	drm_sched_start_timeout(sched);
-	spin_unlock_irqrestore(&sched->job_list_lock, flags);
-
-	sched->ops->free_job(s_job);
-}
-
 static void drm_sched_job_begin(struct drm_sched_job *s_job)
 {
 	struct drm_gpu_scheduler *sched = s_job->sched;
@@ -312,9 +286,19 @@ static void drm_sched_job_timedout(struct work_struct *work)
 	job = list_first_entry_or_null(&sched->ring_mirror_list,
 				       struct drm_sched_job, node);
 
-	if (job)
+	if (job) {
 		job->sched->ops->timedout_job(job);
 
+		/*
+		 * Guilty job did complete and hence needs to be manually removed
+		 * See drm_sched_stop doc.
+		 */
+		if (sched->free_guilty) {
+			job->sched->ops->free_job(job);
+			sched->free_guilty = false;
+		}
+	}
+
 	spin_lock_irqsave(&sched->job_list_lock, flags);
 	drm_sched_start_timeout(sched);
 	spin_unlock_irqrestore(&sched->job_list_lock, flags);
@@ -369,41 +353,68 @@ EXPORT_SYMBOL(drm_sched_increase_karma);
  * drm_sched_stop - stop the scheduler
  *
  * @sched: scheduler instance
+ * @bad: job which caused the time out
+ *
+ * Stop the scheduler and also removes and frees all completed jobs.
+ * Note: bad job will not be freed as it might be used later and so it's
+ * callers responsibility to release it manually if it's not part of the
+ * mirror list any more.
  *
  */
-void drm_sched_stop(struct drm_gpu_scheduler *sched)
+void drm_sched_stop(struct drm_gpu_scheduler *sched, struct drm_sched_job *bad)
 {
-	struct drm_sched_job *s_job;
+	struct drm_sched_job *s_job, *tmp;
 	unsigned long flags;
-	struct dma_fence *last_fence =  NULL;
 
 	kthread_park(sched->thread);
 
 	/*
-	 * Verify all the signaled jobs in mirror list are removed from the ring
-	 * by waiting for the latest job to enter the list. This should insure that
-	 * also all the previous jobs that were in flight also already singaled
-	 * and removed from the list.
+	 * Iterate the job list from later to  earlier one and either deactive
+	 * their HW callbacks or remove them from mirror list if they already
+	 * signaled.
+	 * This iteration is thread safe as sched thread is stopped.
 	 */
-	spin_lock_irqsave(&sched->job_list_lock, flags);
-	list_for_each_entry_reverse(s_job, &sched->ring_mirror_list, node) {
+	list_for_each_entry_safe_reverse(s_job, tmp, &sched->ring_mirror_list, node) {
 		if (s_job->s_fence->parent &&
 		    dma_fence_remove_callback(s_job->s_fence->parent,
 					      &s_job->cb)) {
-			dma_fence_put(s_job->s_fence->parent);
-			s_job->s_fence->parent = NULL;
 			atomic_dec(&sched->hw_rq_count);
 		} else {
-			 last_fence = dma_fence_get(&s_job->s_fence->finished);
-			 break;
+			/*
+			 * remove job from ring_mirror_list.
+			 * Locking here is for concurrent resume timeout
+			 */
+			spin_lock_irqsave(&sched->job_list_lock, flags);
+			list_del_init(&s_job->node);
+			spin_unlock_irqrestore(&sched->job_list_lock, flags);
+
+			/*
+			 * Wait for job's HW fence callback to finish using s_job
+			 * before releasing it.
+			 *
+			 * Job is still alive so fence refcount at least 1
+			 */
+			dma_fence_wait(&s_job->s_fence->finished, false);
+
+			/*
+			 * We must keep bad job alive for later use during
+			 * recovery by some of the drivers but leave a hint
+			 * that the guilty job must be released.
+			 */
+			if (bad != s_job)
+				sched->ops->free_job(s_job);
+			else
+				sched->free_guilty = true;
 		}
 	}
-	spin_unlock_irqrestore(&sched->job_list_lock, flags);
 
-	if (last_fence) {
-		dma_fence_wait(last_fence, false);
-		dma_fence_put(last_fence);
-	}
+	/*
+	 * Stop pending timer in flight as we rearm it in  drm_sched_start. This
+	 * avoids the pending timeout work in progress to fire right away after
+	 * this TDR finished and before the newly restarted jobs had a
+	 * chance to complete.
+	 */
+	cancel_delayed_work(&sched->work_tdr);
 }
 
 EXPORT_SYMBOL(drm_sched_stop);
@@ -412,26 +423,28 @@ EXPORT_SYMBOL(drm_sched_stop);
  * drm_sched_job_recovery - recover jobs after a reset
  *
  * @sched: scheduler instance
+ * @full_recovery: proceed with complete sched restart
  *
  */
 void drm_sched_start(struct drm_gpu_scheduler *sched, bool full_recovery)
 {
 	struct drm_sched_job *s_job, *tmp;
+	unsigned long flags;
 	int r;
 
-	if (!full_recovery)
-		goto unpark;
-
 	/*
 	 * Locking the list is not required here as the sched thread is parked
-	 * so no new jobs are being pushed in to HW and in drm_sched_stop we
-	 * flushed all the jobs who were still in mirror list but who already
-	 * signaled and removed them self from the list. Also concurrent
+	 * so no new jobs are being inserted or removed. Also concurrent
 	 * GPU recovers can't run in parallel.
 	 */
 	list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list, node) {
 		struct dma_fence *fence = s_job->s_fence->parent;
 
+		atomic_inc(&sched->hw_rq_count);
+
+		if (!full_recovery)
+			continue;
+
 		if (fence) {
 			r = dma_fence_add_callback(fence, &s_job->cb,
 						   drm_sched_process_job);
@@ -444,9 +457,12 @@ void drm_sched_start(struct drm_gpu_scheduler *sched, bool full_recovery)
 			drm_sched_process_job(NULL, &s_job->cb);
 	}
 
-	drm_sched_start_timeout(sched);
+	if (full_recovery) {
+		spin_lock_irqsave(&sched->job_list_lock, flags);
+		drm_sched_start_timeout(sched);
+		spin_unlock_irqrestore(&sched->job_list_lock, flags);
+	}
 
-unpark:
 	kthread_unpark(sched->thread);
 }
 EXPORT_SYMBOL(drm_sched_start);
@@ -463,7 +479,6 @@ void drm_sched_resubmit_jobs(struct drm_gpu_scheduler *sched)
 	uint64_t guilty_context;
 	bool found_guilty = false;
 
-	/*TODO DO we need spinlock here ? */
 	list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list, node) {
 		struct drm_sched_fence *s_fence = s_job->s_fence;
 
@@ -475,8 +490,8 @@ void drm_sched_resubmit_jobs(struct drm_gpu_scheduler *sched)
 		if (found_guilty && s_job->s_fence->scheduled.context == guilty_context)
 			dma_fence_set_error(&s_fence->finished, -ECANCELED);
 
+		dma_fence_put(s_job->s_fence->parent);
 		s_job->s_fence->parent = sched->ops->run_job(s_job);
-		atomic_inc(&sched->hw_rq_count);
 	}
 }
 EXPORT_SYMBOL(drm_sched_resubmit_jobs);
@@ -513,7 +528,6 @@ int drm_sched_job_init(struct drm_sched_job *job,
 		return -ENOMEM;
 	job->id = atomic64_inc_return(&sched->job_id_count);
 
-	INIT_WORK(&job->finish_work, drm_sched_job_finish);
 	INIT_LIST_HEAD(&job->node);
 
 	return 0;
@@ -596,24 +610,54 @@ static void drm_sched_process_job(struct dma_fence *f, struct dma_fence_cb *cb)
 	struct drm_sched_job *s_job = container_of(cb, struct drm_sched_job, cb);
 	struct drm_sched_fence *s_fence = s_job->s_fence;
 	struct drm_gpu_scheduler *sched = s_fence->sched;
-	unsigned long flags;
-
-	cancel_delayed_work(&sched->work_tdr);
 
 	atomic_dec(&sched->hw_rq_count);
 	atomic_dec(&sched->num_jobs);
 
-	spin_lock_irqsave(&sched->job_list_lock, flags);
-	/* remove job from ring_mirror_list */
-	list_del_init(&s_job->node);
-	spin_unlock_irqrestore(&sched->job_list_lock, flags);
+	trace_drm_sched_process_job(s_fence);
 
 	drm_sched_fence_finished(s_fence);
-
-	trace_drm_sched_process_job(s_fence);
 	wake_up_interruptible(&sched->wake_up_worker);
+}
+
+/**
+ * drm_sched_cleanup_jobs - destroy finished jobs
+ *
+ * @sched: scheduler instance
+ *
+ * Remove all finished jobs from the mirror list and destroy them.
+ */
+static void drm_sched_cleanup_jobs(struct drm_gpu_scheduler *sched)
+{
+	unsigned long flags;
+
+	/* Don't destroy jobs while the timeout worker is running */
+	if (sched->timeout != MAX_SCHEDULE_TIMEOUT &&
+	    !cancel_delayed_work(&sched->work_tdr))
+		return;
+
+
+	while (!list_empty(&sched->ring_mirror_list)) {
+		struct drm_sched_job *job;
+
+		job = list_first_entry(&sched->ring_mirror_list,
+				       struct drm_sched_job, node);
+		if (!dma_fence_is_signaled(&job->s_fence->finished))
+			break;
+
+		spin_lock_irqsave(&sched->job_list_lock, flags);
+		/* remove job from ring_mirror_list */
+		list_del_init(&job->node);
+		spin_unlock_irqrestore(&sched->job_list_lock, flags);
+
+		sched->ops->free_job(job);
+	}
+
+	/* queue timeout for next job */
+	spin_lock_irqsave(&sched->job_list_lock, flags);
+	drm_sched_start_timeout(sched);
+	spin_unlock_irqrestore(&sched->job_list_lock, flags);
 
-	schedule_work(&s_job->finish_work);
 }
 
 /**
@@ -655,9 +699,10 @@ static int drm_sched_main(void *param)
 		struct dma_fence *fence;
 
 		wait_event_interruptible(sched->wake_up_worker,
+					 (drm_sched_cleanup_jobs(sched),
 					 (!drm_sched_blocked(sched) &&
 					  (entity = drm_sched_select_entity(sched))) ||
-					 kthread_should_stop());
+					 kthread_should_stop()));
 
 		if (!entity)
 			continue;
diff --git a/drivers/gpu/drm/selftests/Makefile b/drivers/gpu/drm/selftests/Makefile
index 8ec64ecf0e36..aae88f8a016c 100644
--- a/drivers/gpu/drm/selftests/Makefile
+++ b/drivers/gpu/drm/selftests/Makefile
@@ -3,4 +3,4 @@ test-drm_modeset-y := test-drm_modeset_common.o test-drm_plane_helper.o \
                       test-drm_format.o test-drm_framebuffer.o \
 		      test-drm_damage_helper.o
 
-obj-$(CONFIG_DRM_DEBUG_SELFTEST) += test-drm_mm.o test-drm_modeset.o
+obj-$(CONFIG_DRM_DEBUG_SELFTEST) += test-drm_mm.o test-drm_modeset.o test-drm_cmdline_parser.o
diff --git a/drivers/gpu/drm/selftests/drm_cmdline_selftests.h b/drivers/gpu/drm/selftests/drm_cmdline_selftests.h
new file mode 100644
index 000000000000..b45824ec7c8f
--- /dev/null
+++ b/drivers/gpu/drm/selftests/drm_cmdline_selftests.h
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* List each unit test as selftest(function)
+ *
+ * The name is used as both an enum and expanded as igt__name to create
+ * a module parameter. It must be unique and legal for a C identifier.
+ *
+ * Tests are executed in order by igt/drm_mm
+ */
+
+#define cmdline_test(test)	selftest(test, test)
+
+cmdline_test(drm_cmdline_test_res)
+cmdline_test(drm_cmdline_test_res_missing_x)
+cmdline_test(drm_cmdline_test_res_missing_y)
+cmdline_test(drm_cmdline_test_res_bad_y)
+cmdline_test(drm_cmdline_test_res_missing_y_bpp)
+cmdline_test(drm_cmdline_test_res_vesa)
+cmdline_test(drm_cmdline_test_res_vesa_rblank)
+cmdline_test(drm_cmdline_test_res_rblank)
+cmdline_test(drm_cmdline_test_res_bpp)
+cmdline_test(drm_cmdline_test_res_bad_bpp)
+cmdline_test(drm_cmdline_test_res_refresh)
+cmdline_test(drm_cmdline_test_res_bad_refresh)
+cmdline_test(drm_cmdline_test_res_bpp_refresh)
+cmdline_test(drm_cmdline_test_res_bpp_refresh_interlaced)
+cmdline_test(drm_cmdline_test_res_bpp_refresh_margins)
+cmdline_test(drm_cmdline_test_res_bpp_refresh_force_off)
+cmdline_test(drm_cmdline_test_res_bpp_refresh_force_on_off)
+cmdline_test(drm_cmdline_test_res_bpp_refresh_force_on)
+cmdline_test(drm_cmdline_test_res_bpp_refresh_force_on_analog)
+cmdline_test(drm_cmdline_test_res_bpp_refresh_force_on_digital)
+cmdline_test(drm_cmdline_test_res_bpp_refresh_interlaced_margins_force_on)
+cmdline_test(drm_cmdline_test_res_margins_force_on)
+cmdline_test(drm_cmdline_test_res_vesa_margins)
+cmdline_test(drm_cmdline_test_res_invalid_mode)
+cmdline_test(drm_cmdline_test_res_bpp_wrong_place_mode)
+cmdline_test(drm_cmdline_test_name)
+cmdline_test(drm_cmdline_test_name_bpp)
+cmdline_test(drm_cmdline_test_name_refresh)
+cmdline_test(drm_cmdline_test_name_bpp_refresh)
+cmdline_test(drm_cmdline_test_name_refresh_wrong_mode)
+cmdline_test(drm_cmdline_test_name_refresh_invalid_mode)
+cmdline_test(drm_cmdline_test_name_option)
+cmdline_test(drm_cmdline_test_name_bpp_option)
+cmdline_test(drm_cmdline_test_rotate_0)
+cmdline_test(drm_cmdline_test_rotate_90)
+cmdline_test(drm_cmdline_test_rotate_180)
+cmdline_test(drm_cmdline_test_rotate_270)
+cmdline_test(drm_cmdline_test_rotate_invalid_val)
+cmdline_test(drm_cmdline_test_rotate_truncated)
+cmdline_test(drm_cmdline_test_hmirror)
+cmdline_test(drm_cmdline_test_vmirror)
+cmdline_test(drm_cmdline_test_margin_options)
+cmdline_test(drm_cmdline_test_multiple_options)
+cmdline_test(drm_cmdline_test_invalid_option)
diff --git a/drivers/gpu/drm/selftests/test-drm_cmdline_parser.c b/drivers/gpu/drm/selftests/test-drm_cmdline_parser.c
new file mode 100644
index 000000000000..bef4edde6f9f
--- /dev/null
+++ b/drivers/gpu/drm/selftests/test-drm_cmdline_parser.c
@@ -0,0 +1,918 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2019 Bootlin
+ */
+
+#define pr_fmt(fmt) "drm_cmdline: " fmt
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+
+#include <drm/drm_connector.h>
+#include <drm/drm_modes.h>
+
+#define TESTS "drm_cmdline_selftests.h"
+#include "drm_selftest.h"
+#include "test-drm_modeset_common.h"
+
+static int drm_cmdline_test_res(void *ignored)
+{
+	struct drm_connector connector = { };
+	struct drm_cmdline_mode mode = { };
+
+	FAIL_ON(!drm_mode_parse_command_line_for_connector("720x480",
+							   &connector,
+							   &mode));
+	FAIL_ON(!mode.specified);
+	FAIL_ON(mode.xres != 720);
+	FAIL_ON(mode.yres != 480);
+
+	FAIL_ON(mode.refresh_specified);
+
+	FAIL_ON(mode.bpp_specified);
+
+	FAIL_ON(mode.rb);
+	FAIL_ON(mode.cvt);
+	FAIL_ON(mode.interlace);
+	FAIL_ON(mode.margins);
+	FAIL_ON(mode.force != DRM_FORCE_UNSPECIFIED);
+
+	return 0;
+}
+
+static int drm_cmdline_test_res_missing_x(void *ignored)
+{
+	struct drm_connector connector = { };
+	struct drm_cmdline_mode mode = { };
+
+	FAIL_ON(drm_mode_parse_command_line_for_connector("x480",
+							  &connector,
+							  &mode));
+
+	return 0;
+}
+
+static int drm_cmdline_test_res_missing_y(void *ignored)
+{
+	struct drm_connector connector = { };
+	struct drm_cmdline_mode mode = { };
+
+	FAIL_ON(drm_mode_parse_command_line_for_connector("1024x",
+							  &connector,
+							  &mode));
+
+	return 0;
+}
+
+static int drm_cmdline_test_res_bad_y(void *ignored)
+{
+	struct drm_connector connector = { };
+	struct drm_cmdline_mode mode = { };
+
+	FAIL_ON(drm_mode_parse_command_line_for_connector("1024xtest",
+							  &connector,
+							  &mode));
+
+	return 0;
+}
+
+static int drm_cmdline_test_res_missing_y_bpp(void *ignored)
+{
+	struct drm_connector connector = { };
+	struct drm_cmdline_mode mode = { };
+
+	FAIL_ON(drm_mode_parse_command_line_for_connector("1024x-24",
+							  &connector,
+							  &mode));
+
+	return 0;
+}
+
+static int drm_cmdline_test_res_vesa(void *ignored)
+{
+	struct drm_connector connector = { };
+	struct drm_cmdline_mode mode = { };
+
+	FAIL_ON(!drm_mode_parse_command_line_for_connector("720x480M",
+							   &connector,
+							   &mode));
+	FAIL_ON(!mode.specified);
+	FAIL_ON(mode.xres != 720);
+	FAIL_ON(mode.yres != 480);
+
+	FAIL_ON(mode.refresh_specified);
+
+	FAIL_ON(mode.bpp_specified);
+
+	FAIL_ON(mode.rb);
+	FAIL_ON(!mode.cvt);
+	FAIL_ON(mode.interlace);
+	FAIL_ON(mode.margins);
+	FAIL_ON(mode.force != DRM_FORCE_UNSPECIFIED);
+
+	return 0;
+}
+
+static int drm_cmdline_test_res_vesa_rblank(void *ignored)
+{
+	struct drm_connector connector = { };
+	struct drm_cmdline_mode mode = { };
+
+	FAIL_ON(!drm_mode_parse_command_line_for_connector("720x480MR",
+							   &connector,
+							   &mode));
+	FAIL_ON(!mode.specified);
+	FAIL_ON(mode.xres != 720);
+	FAIL_ON(mode.yres != 480);
+
+	FAIL_ON(mode.refresh_specified);
+
+	FAIL_ON(mode.bpp_specified);
+
+	FAIL_ON(!mode.rb);
+	FAIL_ON(!mode.cvt);
+	FAIL_ON(mode.interlace);
+	FAIL_ON(mode.margins);
+	FAIL_ON(mode.force != DRM_FORCE_UNSPECIFIED);
+
+	return 0;
+}
+
+static int drm_cmdline_test_res_rblank(void *ignored)
+{
+	struct drm_connector connector = { };
+	struct drm_cmdline_mode mode = { };
+
+	FAIL_ON(!drm_mode_parse_command_line_for_connector("720x480R",
+							   &connector,
+							   &mode));
+	FAIL_ON(!mode.specified);
+	FAIL_ON(mode.xres != 720);
+	FAIL_ON(mode.yres != 480);
+
+	FAIL_ON(mode.refresh_specified);
+
+	FAIL_ON(mode.bpp_specified);
+
+	FAIL_ON(!mode.rb);
+	FAIL_ON(mode.cvt);
+	FAIL_ON(mode.interlace);
+	FAIL_ON(mode.margins);
+	FAIL_ON(mode.force != DRM_FORCE_UNSPECIFIED);
+
+	return 0;
+}
+
+static int drm_cmdline_test_res_bpp(void *ignored)
+{
+	struct drm_connector connector = { };
+	struct drm_cmdline_mode mode = { };
+
+	FAIL_ON(!drm_mode_parse_command_line_for_connector("720x480-24",
+							   &connector,
+							   &mode));
+	FAIL_ON(!mode.specified);
+	FAIL_ON(mode.xres != 720);
+	FAIL_ON(mode.yres != 480);
+
+	FAIL_ON(mode.refresh_specified);
+
+	FAIL_ON(!mode.bpp_specified);
+	FAIL_ON(mode.bpp != 24);
+
+	FAIL_ON(mode.rb);
+	FAIL_ON(mode.cvt);
+	FAIL_ON(mode.interlace);
+	FAIL_ON(mode.margins);
+	FAIL_ON(mode.force != DRM_FORCE_UNSPECIFIED);
+
+	return 0;
+}
+
+static int drm_cmdline_test_res_bad_bpp(void *ignored)
+{
+	struct drm_connector connector = { };
+	struct drm_cmdline_mode mode = { };
+
+	FAIL_ON(drm_mode_parse_command_line_for_connector("720x480-test",
+							  &connector,
+							  &mode));
+
+	return 0;
+}
+
+static int drm_cmdline_test_res_refresh(void *ignored)
+{
+	struct drm_connector connector = { };
+	struct drm_cmdline_mode mode = { };
+
+	FAIL_ON(!drm_mode_parse_command_line_for_connector("720x480@60",
+							   &connector,
+							   &mode));
+	FAIL_ON(!mode.specified);
+	FAIL_ON(mode.xres != 720);
+	FAIL_ON(mode.yres != 480);
+
+	FAIL_ON(!mode.refresh_specified);
+	FAIL_ON(mode.refresh != 60);
+
+	FAIL_ON(mode.bpp_specified);
+
+	FAIL_ON(mode.rb);
+	FAIL_ON(mode.cvt);
+	FAIL_ON(mode.interlace);
+	FAIL_ON(mode.margins);
+	FAIL_ON(mode.force != DRM_FORCE_UNSPECIFIED);
+
+	return 0;
+}
+
+static int drm_cmdline_test_res_bad_refresh(void *ignored)
+{
+	struct drm_connector connector = { };
+	struct drm_cmdline_mode mode = { };
+
+	FAIL_ON(drm_mode_parse_command_line_for_connector("720x480@refresh",
+							  &connector,
+							  &mode));
+
+	return 0;
+}
+
+static int drm_cmdline_test_res_bpp_refresh(void *ignored)
+{
+	struct drm_connector connector = { };
+	struct drm_cmdline_mode mode = { };
+
+	FAIL_ON(!drm_mode_parse_command_line_for_connector("720x480-24@60",
+							   &connector,
+							   &mode));
+	FAIL_ON(!mode.specified);
+	FAIL_ON(mode.xres != 720);
+	FAIL_ON(mode.yres != 480);
+
+	FAIL_ON(!mode.refresh_specified);
+	FAIL_ON(mode.refresh != 60);
+
+	FAIL_ON(!mode.bpp_specified);
+	FAIL_ON(mode.bpp != 24);
+
+	FAIL_ON(mode.rb);
+	FAIL_ON(mode.cvt);
+	FAIL_ON(mode.interlace);
+	FAIL_ON(mode.margins);
+	FAIL_ON(mode.force != DRM_FORCE_UNSPECIFIED);
+
+	return 0;
+}
+
+static int drm_cmdline_test_res_bpp_refresh_interlaced(void *ignored)
+{
+	struct drm_connector connector = { };
+	struct drm_cmdline_mode mode = { };
+
+	FAIL_ON(!drm_mode_parse_command_line_for_connector("720x480-24@60i",
+							   &connector,
+							   &mode));
+	FAIL_ON(!mode.specified);
+	FAIL_ON(mode.xres != 720);
+	FAIL_ON(mode.yres != 480);
+
+	FAIL_ON(!mode.refresh_specified);
+	FAIL_ON(mode.refresh != 60);
+
+	FAIL_ON(!mode.bpp_specified);
+	FAIL_ON(mode.bpp != 24);
+
+	FAIL_ON(mode.rb);
+	FAIL_ON(mode.cvt);
+	FAIL_ON(!mode.interlace);
+	FAIL_ON(mode.margins);
+	FAIL_ON(mode.force != DRM_FORCE_UNSPECIFIED);
+
+	return 0;
+}
+
+static int drm_cmdline_test_res_bpp_refresh_margins(void *ignored)
+{
+	struct drm_connector connector = { };
+	struct drm_cmdline_mode mode = { };
+
+	FAIL_ON(!drm_mode_parse_command_line_for_connector("720x480-24@60m",
+							   &connector,
+							   &mode));
+	FAIL_ON(!mode.specified);
+	FAIL_ON(mode.xres != 720);
+	FAIL_ON(mode.yres != 480);
+
+	FAIL_ON(!mode.refresh_specified);
+	FAIL_ON(mode.refresh != 60);
+
+	FAIL_ON(!mode.bpp_specified);
+	FAIL_ON(mode.bpp != 24);
+
+	FAIL_ON(mode.rb);
+	FAIL_ON(mode.cvt);
+	FAIL_ON(mode.interlace);
+	FAIL_ON(!mode.margins);
+	FAIL_ON(mode.force != DRM_FORCE_UNSPECIFIED);
+
+	return 0;
+}
+
+static int drm_cmdline_test_res_bpp_refresh_force_off(void *ignored)
+{
+	struct drm_connector connector = { };
+	struct drm_cmdline_mode mode = { };
+
+	FAIL_ON(!drm_mode_parse_command_line_for_connector("720x480-24@60d",
+							   &connector,
+							   &mode));
+	FAIL_ON(!mode.specified);
+	FAIL_ON(mode.xres != 720);
+	FAIL_ON(mode.yres != 480);
+
+	FAIL_ON(!mode.refresh_specified);
+	FAIL_ON(mode.refresh != 60);
+
+	FAIL_ON(!mode.bpp_specified);
+	FAIL_ON(mode.bpp != 24);
+
+	FAIL_ON(mode.rb);
+	FAIL_ON(mode.cvt);
+	FAIL_ON(mode.interlace);
+	FAIL_ON(mode.margins);
+	FAIL_ON(mode.force != DRM_FORCE_OFF);
+
+	return 0;
+}
+
+static int drm_cmdline_test_res_bpp_refresh_force_on_off(void *ignored)
+{
+	struct drm_connector connector = { };
+	struct drm_cmdline_mode mode = { };
+
+	FAIL_ON(drm_mode_parse_command_line_for_connector("720x480-24@60de",
+							  &connector,
+							  &mode));
+
+	return 0;
+}
+
+static int drm_cmdline_test_res_bpp_refresh_force_on(void *ignored)
+{
+	struct drm_connector connector = { };
+	struct drm_cmdline_mode mode = { };
+
+	FAIL_ON(!drm_mode_parse_command_line_for_connector("720x480-24@60e",
+							   &connector,
+							   &mode));
+	FAIL_ON(!mode.specified);
+	FAIL_ON(mode.xres != 720);
+	FAIL_ON(mode.yres != 480);
+
+	FAIL_ON(!mode.refresh_specified);
+	FAIL_ON(mode.refresh != 60);
+
+	FAIL_ON(!mode.bpp_specified);
+	FAIL_ON(mode.bpp != 24);
+
+	FAIL_ON(mode.rb);
+	FAIL_ON(mode.cvt);
+	FAIL_ON(mode.interlace);
+	FAIL_ON(mode.margins);
+	FAIL_ON(mode.force != DRM_FORCE_ON);
+
+	return 0;
+}
+
+static int drm_cmdline_test_res_bpp_refresh_force_on_analog(void *ignored)
+{
+	struct drm_connector connector = { };
+	struct drm_cmdline_mode mode = { };
+
+	FAIL_ON(!drm_mode_parse_command_line_for_connector("720x480-24@60D",
+							   &connector,
+							   &mode));
+	FAIL_ON(!mode.specified);
+	FAIL_ON(mode.xres != 720);
+	FAIL_ON(mode.yres != 480);
+
+	FAIL_ON(!mode.refresh_specified);
+	FAIL_ON(mode.refresh != 60);
+
+	FAIL_ON(!mode.bpp_specified);
+	FAIL_ON(mode.bpp != 24);
+
+	FAIL_ON(mode.rb);
+	FAIL_ON(mode.cvt);
+	FAIL_ON(mode.interlace);
+	FAIL_ON(mode.margins);
+	FAIL_ON(mode.force != DRM_FORCE_ON);
+
+	return 0;
+}
+
+static int drm_cmdline_test_res_bpp_refresh_force_on_digital(void *ignored)
+{
+	struct drm_connector connector = { };
+	struct drm_cmdline_mode mode = { };
+
+	connector.connector_type = DRM_MODE_CONNECTOR_DVII;
+	FAIL_ON(!drm_mode_parse_command_line_for_connector("720x480-24@60D",
+							   &connector,
+							   &mode));
+	FAIL_ON(!mode.specified);
+	FAIL_ON(mode.xres != 720);
+	FAIL_ON(mode.yres != 480);
+
+	FAIL_ON(!mode.refresh_specified);
+	FAIL_ON(mode.refresh != 60);
+
+	FAIL_ON(!mode.bpp_specified);
+	FAIL_ON(mode.bpp != 24);
+
+	FAIL_ON(mode.rb);
+	FAIL_ON(mode.cvt);
+	FAIL_ON(mode.interlace);
+	FAIL_ON(mode.margins);
+	FAIL_ON(mode.force != DRM_FORCE_ON_DIGITAL);
+
+	return 0;
+}
+
+static int drm_cmdline_test_res_bpp_refresh_interlaced_margins_force_on(void *ignored)
+{
+	struct drm_connector connector = { };
+	struct drm_cmdline_mode mode = { };
+
+	FAIL_ON(!drm_mode_parse_command_line_for_connector("720x480-24@60ime",
+							   &connector,
+							   &mode));
+	FAIL_ON(!mode.specified);
+	FAIL_ON(mode.xres != 720);
+	FAIL_ON(mode.yres != 480);
+
+	FAIL_ON(!mode.refresh_specified);
+	FAIL_ON(mode.refresh != 60);
+
+	FAIL_ON(!mode.bpp_specified);
+	FAIL_ON(mode.bpp != 24);
+
+	FAIL_ON(mode.rb);
+	FAIL_ON(mode.cvt);
+	FAIL_ON(!mode.interlace);
+	FAIL_ON(!mode.margins);
+	FAIL_ON(mode.force != DRM_FORCE_ON);
+
+	return 0;
+}
+
+static int drm_cmdline_test_res_margins_force_on(void *ignored)
+{
+	struct drm_connector connector = { };
+	struct drm_cmdline_mode mode = { };
+
+	FAIL_ON(!drm_mode_parse_command_line_for_connector("720x480me",
+							   &connector,
+							   &mode));
+	FAIL_ON(!mode.specified);
+	FAIL_ON(mode.xres != 720);
+	FAIL_ON(mode.yres != 480);
+
+	FAIL_ON(mode.refresh_specified);
+
+	FAIL_ON(mode.bpp_specified);
+
+	FAIL_ON(mode.rb);
+	FAIL_ON(mode.cvt);
+	FAIL_ON(mode.interlace);
+	FAIL_ON(!mode.margins);
+	FAIL_ON(mode.force != DRM_FORCE_ON);
+
+	return 0;
+}
+
+static int drm_cmdline_test_res_vesa_margins(void *ignored)
+{
+	struct drm_connector connector = { };
+	struct drm_cmdline_mode mode = { };
+
+	FAIL_ON(!drm_mode_parse_command_line_for_connector("720x480Mm",
+							   &connector,
+							   &mode));
+	FAIL_ON(!mode.specified);
+	FAIL_ON(mode.xres != 720);
+	FAIL_ON(mode.yres != 480);
+
+	FAIL_ON(mode.refresh_specified);
+
+	FAIL_ON(mode.bpp_specified);
+
+	FAIL_ON(mode.rb);
+	FAIL_ON(!mode.cvt);
+	FAIL_ON(mode.interlace);
+	FAIL_ON(!mode.margins);
+	FAIL_ON(mode.force != DRM_FORCE_UNSPECIFIED);
+
+	return 0;
+}
+
+static int drm_cmdline_test_res_invalid_mode(void *ignored)
+{
+	struct drm_connector connector = { };
+	struct drm_cmdline_mode mode = { };
+
+	FAIL_ON(drm_mode_parse_command_line_for_connector("720x480f",
+							  &connector,
+							  &mode));
+
+	return 0;
+}
+
+static int drm_cmdline_test_res_bpp_wrong_place_mode(void *ignored)
+{
+	struct drm_connector connector = { };
+	struct drm_cmdline_mode mode = { };
+
+	FAIL_ON(drm_mode_parse_command_line_for_connector("720x480e-24",
+							  &connector,
+							  &mode));
+
+	return 0;
+}
+
+static int drm_cmdline_test_name(void *ignored)
+{
+	struct drm_connector connector = { };
+	struct drm_cmdline_mode mode = { };
+
+	FAIL_ON(!drm_mode_parse_command_line_for_connector("NTSC",
+							   &connector,
+							   &mode));
+	FAIL_ON(strcmp(mode.name, "NTSC"));
+	FAIL_ON(mode.refresh_specified);
+	FAIL_ON(mode.bpp_specified);
+
+	return 0;
+}
+
+static int drm_cmdline_test_name_bpp(void *ignored)
+{
+	struct drm_connector connector = { };
+	struct drm_cmdline_mode mode = { };
+
+	FAIL_ON(!drm_mode_parse_command_line_for_connector("NTSC-24",
+							   &connector,
+							   &mode));
+	FAIL_ON(strcmp(mode.name, "NTSC"));
+
+	FAIL_ON(mode.refresh_specified);
+
+	FAIL_ON(!mode.bpp_specified);
+	FAIL_ON(mode.bpp != 24);
+
+	return 0;
+}
+
+static int drm_cmdline_test_name_bpp_refresh(void *ignored)
+{
+	struct drm_connector connector = { };
+	struct drm_cmdline_mode mode = { };
+
+	FAIL_ON(drm_mode_parse_command_line_for_connector("NTSC-24@60",
+							  &connector,
+							  &mode));
+
+	return 0;
+}
+
+static int drm_cmdline_test_name_refresh(void *ignored)
+{
+	struct drm_connector connector = { };
+	struct drm_cmdline_mode mode = { };
+
+	FAIL_ON(drm_mode_parse_command_line_for_connector("NTSC@60",
+							  &connector,
+							  &mode));
+
+	return 0;
+}
+
+static int drm_cmdline_test_name_refresh_wrong_mode(void *ignored)
+{
+	struct drm_connector connector = { };
+	struct drm_cmdline_mode mode = { };
+
+	FAIL_ON(drm_mode_parse_command_line_for_connector("NTSC@60m",
+							  &connector,
+							  &mode));
+
+	return 0;
+}
+
+static int drm_cmdline_test_name_refresh_invalid_mode(void *ignored)
+{
+	struct drm_connector connector = { };
+	struct drm_cmdline_mode mode = { };
+
+	FAIL_ON(drm_mode_parse_command_line_for_connector("NTSC@60f",
+							  &connector,
+							  &mode));
+
+	return 0;
+}
+
+static int drm_cmdline_test_name_option(void *ignored)
+{
+	struct drm_connector connector = { };
+	struct drm_cmdline_mode mode = { };
+
+	FAIL_ON(!drm_mode_parse_command_line_for_connector("NTSC,rotate=180",
+							   &connector,
+							   &mode));
+	FAIL_ON(!mode.specified);
+	FAIL_ON(strcmp(mode.name, "NTSC"));
+	FAIL_ON(mode.rotation_reflection != DRM_MODE_ROTATE_180);
+
+	return 0;
+}
+
+static int drm_cmdline_test_name_bpp_option(void *ignored)
+{
+	struct drm_connector connector = { };
+	struct drm_cmdline_mode mode = { };
+
+	FAIL_ON(!drm_mode_parse_command_line_for_connector("NTSC-24,rotate=180",
+							   &connector,
+							   &mode));
+	FAIL_ON(!mode.specified);
+	FAIL_ON(strcmp(mode.name, "NTSC"));
+	FAIL_ON(mode.rotation_reflection != DRM_MODE_ROTATE_180);
+	FAIL_ON(!mode.bpp_specified);
+	FAIL_ON(mode.bpp != 24);
+
+	return 0;
+}
+
+static int drm_cmdline_test_rotate_0(void *ignored)
+{
+	struct drm_connector connector = { };
+	struct drm_cmdline_mode mode = { };
+
+	FAIL_ON(!drm_mode_parse_command_line_for_connector("720x480,rotate=0",
+							   &connector,
+							   &mode));
+	FAIL_ON(!mode.specified);
+	FAIL_ON(mode.xres != 720);
+	FAIL_ON(mode.yres != 480);
+	FAIL_ON(mode.rotation_reflection != DRM_MODE_ROTATE_0);
+
+	FAIL_ON(mode.refresh_specified);
+
+	FAIL_ON(mode.bpp_specified);
+
+	FAIL_ON(mode.rb);
+	FAIL_ON(mode.cvt);
+	FAIL_ON(mode.interlace);
+	FAIL_ON(mode.margins);
+	FAIL_ON(mode.force != DRM_FORCE_UNSPECIFIED);
+
+	return 0;
+}
+
+static int drm_cmdline_test_rotate_90(void *ignored)
+{
+	struct drm_connector connector = { };
+	struct drm_cmdline_mode mode = { };
+
+	FAIL_ON(!drm_mode_parse_command_line_for_connector("720x480,rotate=90",
+							   &connector,
+							   &mode));
+	FAIL_ON(!mode.specified);
+	FAIL_ON(mode.xres != 720);
+	FAIL_ON(mode.yres != 480);
+	FAIL_ON(mode.rotation_reflection != DRM_MODE_ROTATE_90);
+
+	FAIL_ON(mode.refresh_specified);
+
+	FAIL_ON(mode.bpp_specified);
+
+	FAIL_ON(mode.rb);
+	FAIL_ON(mode.cvt);
+	FAIL_ON(mode.interlace);
+	FAIL_ON(mode.margins);
+	FAIL_ON(mode.force != DRM_FORCE_UNSPECIFIED);
+
+	return 0;
+}
+
+static int drm_cmdline_test_rotate_180(void *ignored)
+{
+	struct drm_connector connector = { };
+	struct drm_cmdline_mode mode = { };
+
+	FAIL_ON(!drm_mode_parse_command_line_for_connector("720x480,rotate=180",
+							   &connector,
+							   &mode));
+	FAIL_ON(!mode.specified);
+	FAIL_ON(mode.xres != 720);
+	FAIL_ON(mode.yres != 480);
+	FAIL_ON(mode.rotation_reflection != DRM_MODE_ROTATE_180);
+
+	FAIL_ON(mode.refresh_specified);
+
+	FAIL_ON(mode.bpp_specified);
+
+	FAIL_ON(mode.rb);
+	FAIL_ON(mode.cvt);
+	FAIL_ON(mode.interlace);
+	FAIL_ON(mode.margins);
+	FAIL_ON(mode.force != DRM_FORCE_UNSPECIFIED);
+
+	return 0;
+}
+
+static int drm_cmdline_test_rotate_270(void *ignored)
+{
+	struct drm_connector connector = { };
+	struct drm_cmdline_mode mode = { };
+
+	FAIL_ON(!drm_mode_parse_command_line_for_connector("720x480,rotate=270",
+							   &connector,
+							   &mode));
+	FAIL_ON(!mode.specified);
+	FAIL_ON(mode.xres != 720);
+	FAIL_ON(mode.yres != 480);
+	FAIL_ON(mode.rotation_reflection != DRM_MODE_ROTATE_270);
+
+	FAIL_ON(mode.refresh_specified);
+
+	FAIL_ON(mode.bpp_specified);
+
+	FAIL_ON(mode.rb);
+	FAIL_ON(mode.cvt);
+	FAIL_ON(mode.interlace);
+	FAIL_ON(mode.margins);
+	FAIL_ON(mode.force != DRM_FORCE_UNSPECIFIED);
+
+	return 0;
+}
+
+static int drm_cmdline_test_rotate_invalid_val(void *ignored)
+{
+	struct drm_connector connector = { };
+	struct drm_cmdline_mode mode = { };
+
+	FAIL_ON(drm_mode_parse_command_line_for_connector("720x480,rotate=42",
+							  &connector,
+							  &mode));
+
+	return 0;
+}
+
+static int drm_cmdline_test_rotate_truncated(void *ignored)
+{
+	struct drm_connector connector = { };
+	struct drm_cmdline_mode mode = { };
+
+	FAIL_ON(drm_mode_parse_command_line_for_connector("720x480,rotate=",
+							  &connector,
+							  &mode));
+
+	return 0;
+}
+
+static int drm_cmdline_test_hmirror(void *ignored)
+{
+	struct drm_connector connector = { };
+	struct drm_cmdline_mode mode = { };
+
+	FAIL_ON(!drm_mode_parse_command_line_for_connector("720x480,reflect_x",
+							   &connector,
+							   &mode));
+	FAIL_ON(!mode.specified);
+	FAIL_ON(mode.xres != 720);
+	FAIL_ON(mode.yres != 480);
+	FAIL_ON(mode.rotation_reflection != DRM_MODE_REFLECT_X);
+
+	FAIL_ON(mode.refresh_specified);
+
+	FAIL_ON(mode.bpp_specified);
+
+	FAIL_ON(mode.rb);
+	FAIL_ON(mode.cvt);
+	FAIL_ON(mode.interlace);
+	FAIL_ON(mode.margins);
+	FAIL_ON(mode.force != DRM_FORCE_UNSPECIFIED);
+
+	return 0;
+}
+
+static int drm_cmdline_test_vmirror(void *ignored)
+{
+	struct drm_connector connector = { };
+	struct drm_cmdline_mode mode = { };
+
+	FAIL_ON(!drm_mode_parse_command_line_for_connector("720x480,reflect_y",
+							   &connector,
+							   &mode));
+	FAIL_ON(!mode.specified);
+	FAIL_ON(mode.xres != 720);
+	FAIL_ON(mode.yres != 480);
+	FAIL_ON(mode.rotation_reflection != DRM_MODE_REFLECT_Y);
+
+	FAIL_ON(mode.refresh_specified);
+
+	FAIL_ON(mode.bpp_specified);
+
+	FAIL_ON(mode.rb);
+	FAIL_ON(mode.cvt);
+	FAIL_ON(mode.interlace);
+	FAIL_ON(mode.margins);
+	FAIL_ON(mode.force != DRM_FORCE_UNSPECIFIED);
+
+	return 0;
+}
+
+static int drm_cmdline_test_margin_options(void *ignored)
+{
+	struct drm_connector connector = { };
+	struct drm_cmdline_mode mode = { };
+
+	FAIL_ON(!drm_mode_parse_command_line_for_connector("720x480,margin_right=14,margin_left=24,margin_bottom=36,margin_top=42",
+							   &connector,
+							   &mode));
+	FAIL_ON(!mode.specified);
+	FAIL_ON(mode.xres != 720);
+	FAIL_ON(mode.yres != 480);
+	FAIL_ON(mode.tv_margins.right != 14);
+	FAIL_ON(mode.tv_margins.left != 24);
+	FAIL_ON(mode.tv_margins.bottom != 36);
+	FAIL_ON(mode.tv_margins.top != 42);
+
+	FAIL_ON(mode.refresh_specified);
+
+	FAIL_ON(mode.bpp_specified);
+
+	FAIL_ON(mode.rb);
+	FAIL_ON(mode.cvt);
+	FAIL_ON(mode.interlace);
+	FAIL_ON(mode.margins);
+	FAIL_ON(mode.force != DRM_FORCE_UNSPECIFIED);
+
+	return 0;
+}
+
+static int drm_cmdline_test_multiple_options(void *ignored)
+{
+	struct drm_connector connector = { };
+	struct drm_cmdline_mode mode = { };
+
+	FAIL_ON(!drm_mode_parse_command_line_for_connector("720x480,rotate=270,reflect_x",
+							   &connector,
+							   &mode));
+	FAIL_ON(!mode.specified);
+	FAIL_ON(mode.xres != 720);
+	FAIL_ON(mode.yres != 480);
+	FAIL_ON(mode.rotation_reflection != (DRM_MODE_ROTATE_270 | DRM_MODE_REFLECT_X));
+
+	FAIL_ON(mode.refresh_specified);
+
+	FAIL_ON(mode.bpp_specified);
+
+	FAIL_ON(mode.rb);
+	FAIL_ON(mode.cvt);
+	FAIL_ON(mode.interlace);
+	FAIL_ON(mode.margins);
+	FAIL_ON(mode.force != DRM_FORCE_UNSPECIFIED);
+
+	return 0;
+}
+
+static int drm_cmdline_test_invalid_option(void *ignored)
+{
+	struct drm_connector connector = { };
+	struct drm_cmdline_mode mode = { };
+
+	FAIL_ON(drm_mode_parse_command_line_for_connector("720x480,test=42",
+							  &connector,
+							  &mode));
+
+	return 0;
+}
+
+#include "drm_selftest.c"
+
+static int __init test_drm_cmdline_init(void)
+{
+	int err;
+
+	err = run_selftests(selftests, ARRAY_SIZE(selftests), NULL);
+
+	return err > 0 ? 0 : err;
+}
+module_init(test_drm_cmdline_init);
+
+MODULE_AUTHOR("Maxime Ripard <maxime.ripard@bootlin.com>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/gpu/drm/sis/sis_drv.c b/drivers/gpu/drm/sis/sis_drv.c
index e04a92658cd7..ee3801201ecc 100644
--- a/drivers/gpu/drm/sis/sis_drv.c
+++ b/drivers/gpu/drm/sis/sis_drv.c
@@ -27,11 +27,13 @@
 
 #include <linux/module.h>
 
-#include <drm/drmP.h>
+#include <drm/drm_drv.h>
+#include <drm/drm_file.h>
+#include <drm/drm_pci.h>
+#include <drm/drm_pciids.h>
 #include <drm/sis_drm.h>
-#include "sis_drv.h"
 
-#include <drm/drm_pciids.h>
+#include "sis_drv.h"
 
 static struct pci_device_id pciidlist[] = {
 	sisdrv_PCI_IDS
diff --git a/drivers/gpu/drm/sis/sis_drv.h b/drivers/gpu/drm/sis/sis_drv.h
index 328f8a750976..81339443b3b1 100644
--- a/drivers/gpu/drm/sis/sis_drv.h
+++ b/drivers/gpu/drm/sis/sis_drv.h
@@ -28,7 +28,9 @@
 #ifndef _SIS_DRV_H_
 #define _SIS_DRV_H_
 
+#include <drm/drm_ioctl.h>
 #include <drm/drm_legacy.h>
+#include <drm/drm_mm.h>
 
 /* General customization:
  */
@@ -46,12 +48,8 @@ enum sis_family {
 	SIS_CHIP_315 = 1,
 };
 
-#include <drm/drm_mm.h>
-
-
-#define SIS_BASE (dev_priv->mmio)
-#define SIS_READ(reg)         DRM_READ32(SIS_BASE, reg)
-#define SIS_WRITE(reg, val)   DRM_WRITE32(SIS_BASE, reg, val)
+#define SIS_READ(reg)         readl(((void __iomem *)dev_priv->mmio->handle) + (reg))
+#define SIS_WRITE(reg, val)   writel(val, ((void __iomem *)dev_priv->mmio->handle) + (reg))
 
 typedef struct drm_sis_private {
 	drm_local_map_t *mmio;
diff --git a/drivers/gpu/drm/sis/sis_mm.c b/drivers/gpu/drm/sis/sis_mm.c
index 1622db24cd39..e51d4289a3d0 100644
--- a/drivers/gpu/drm/sis/sis_mm.c
+++ b/drivers/gpu/drm/sis/sis_mm.c
@@ -31,11 +31,14 @@
  *    Thomas Hellström <thomas-at-tungstengraphics-dot-com>
  */
 
-#include <drm/drmP.h>
+#include <video/sisfb.h>
+
+#include <drm/drm_device.h>
+#include <drm/drm_file.h>
 #include <drm/sis_drm.h>
+
 #include "sis_drv.h"
 
-#include <video/sisfb.h>
 
 #define VIDEO_TYPE 0
 #define AGP_TYPE 1
diff --git a/drivers/gpu/drm/sti/sti_awg_utils.c b/drivers/gpu/drm/sti/sti_awg_utils.c
index 7c5a7830b6e8..5ff87a4a1c4c 100644
--- a/drivers/gpu/drm/sti/sti_awg_utils.c
+++ b/drivers/gpu/drm/sti/sti_awg_utils.c
@@ -4,6 +4,8 @@
  * Author: Vincent Abriou <vincent.abriou@st.com> for STMicroelectronics.
  */
 
+#include <drm/drm_print.h>
+
 #include "sti_awg_utils.h"
 
 #define AWG_DELAY (-5)
diff --git a/drivers/gpu/drm/sti/sti_awg_utils.h b/drivers/gpu/drm/sti/sti_awg_utils.h
index 258a568f050b..8ddfdc049b10 100644
--- a/drivers/gpu/drm/sti/sti_awg_utils.h
+++ b/drivers/gpu/drm/sti/sti_awg_utils.h
@@ -7,7 +7,7 @@
 #ifndef _STI_AWG_UTILS_H_
 #define _STI_AWG_UTILS_H_
 
-#include <drm/drmP.h>
+#include <linux/types.h>
 
 #define AWG_MAX_INST 64
 
diff --git a/drivers/gpu/drm/sti/sti_compositor.c b/drivers/gpu/drm/sti/sti_compositor.c
index 021b8fcaa0b9..c7652584255d 100644
--- a/drivers/gpu/drm/sti/sti_compositor.c
+++ b/drivers/gpu/drm/sti/sti_compositor.c
@@ -7,11 +7,14 @@
  */
 
 #include <linux/component.h>
+#include <linux/io.h>
 #include <linux/module.h>
 #include <linux/platform_device.h>
 #include <linux/reset.h>
 
-#include <drm/drmP.h>
+#include <drm/drm_device.h>
+#include <drm/drm_print.h>
+#include <drm/drm_vblank.h>
 
 #include "sti_compositor.h"
 #include "sti_crtc.h"
diff --git a/drivers/gpu/drm/sti/sti_crtc.c b/drivers/gpu/drm/sti/sti_crtc.c
index 387f0bed6c1c..dc64fbfc4e61 100644
--- a/drivers/gpu/drm/sti/sti_crtc.c
+++ b/drivers/gpu/drm/sti/sti_crtc.c
@@ -8,11 +8,13 @@
 
 #include <linux/clk.h>
 
-#include <drm/drmP.h>
 #include <drm/drm_atomic.h>
 #include <drm/drm_atomic_helper.h>
+#include <drm/drm_device.h>
 #include <drm/drm_plane_helper.h>
+#include <drm/drm_print.h>
 #include <drm/drm_probe_helper.h>
+#include <drm/drm_vblank.h>
 
 #include "sti_compositor.h"
 #include "sti_crtc.h"
diff --git a/drivers/gpu/drm/sti/sti_crtc.h b/drivers/gpu/drm/sti/sti_crtc.h
index d87c488212d6..df489ab14e2b 100644
--- a/drivers/gpu/drm/sti/sti_crtc.h
+++ b/drivers/gpu/drm/sti/sti_crtc.h
@@ -7,8 +7,10 @@
 #ifndef _STI_CRTC_H_
 #define _STI_CRTC_H_
 
-#include <drm/drmP.h>
-
+struct drm_crtc;
+struct drm_device;
+struct drm_plane;
+struct notifier_block;
 struct sti_mixer;
 
 int sti_crtc_init(struct drm_device *drm_dev, struct sti_mixer *mixer,
diff --git a/drivers/gpu/drm/sti/sti_cursor.c b/drivers/gpu/drm/sti/sti_cursor.c
index bc908453ffb3..0bf7c332cf0b 100644
--- a/drivers/gpu/drm/sti/sti_cursor.c
+++ b/drivers/gpu/drm/sti/sti_cursor.c
@@ -6,9 +6,11 @@
  *          for STMicroelectronics.
  */
 
+#include <linux/dma-mapping.h>
 #include <linux/seq_file.h>
 
 #include <drm/drm_atomic.h>
+#include <drm/drm_device.h>
 #include <drm/drm_fb_cma_helper.h>
 #include <drm/drm_gem_cma_helper.h>
 
diff --git a/drivers/gpu/drm/sti/sti_cursor.h b/drivers/gpu/drm/sti/sti_cursor.h
index 067feda5226c..25ebeb3f6bbc 100644
--- a/drivers/gpu/drm/sti/sti_cursor.h
+++ b/drivers/gpu/drm/sti/sti_cursor.h
@@ -7,6 +7,9 @@
 #ifndef _STI_CURSOR_H_
 #define _STI_CURSOR_H_
 
+struct drm_device;
+struct device;
+
 struct drm_plane *sti_cursor_create(struct drm_device *drm_dev,
 				    struct device *dev, int desc,
 				    void __iomem *baseaddr,
diff --git a/drivers/gpu/drm/sti/sti_drv.c b/drivers/gpu/drm/sti/sti_drv.c
index a525fd899f68..bb6ae6dd66c9 100644
--- a/drivers/gpu/drm/sti/sti_drv.c
+++ b/drivers/gpu/drm/sti/sti_drv.c
@@ -4,25 +4,26 @@
  * Author: Benjamin Gaignard <benjamin.gaignard@st.com> for STMicroelectronics.
  */
 
-#include <drm/drmP.h>
-
 #include <linux/component.h>
-#include <linux/debugfs.h>
+#include <linux/dma-mapping.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/of_platform.h>
 
 #include <drm/drm_atomic.h>
 #include <drm/drm_atomic_helper.h>
+#include <drm/drm_debugfs.h>
+#include <drm/drm_drv.h>
+#include <drm/drm_fb_cma_helper.h>
+#include <drm/drm_fb_helper.h>
 #include <drm/drm_gem_cma_helper.h>
 #include <drm/drm_gem_framebuffer_helper.h>
-#include <drm/drm_fb_helper.h>
-#include <drm/drm_fb_cma_helper.h>
 #include <drm/drm_of.h>
 #include <drm/drm_probe_helper.h>
 
 #include "sti_crtc.h"
 #include "sti_drv.h"
+#include "sti_drv.h"
 #include "sti_plane.h"
 
 #define DRIVER_NAME	"sti"
@@ -95,7 +96,6 @@ static struct drm_info_list sti_drm_dbg_list[] = {
 
 static int sti_drm_dbg_init(struct drm_minor *minor)
 {
-	struct dentry *dentry;
 	int ret;
 
 	ret = drm_debugfs_create_files(sti_drm_dbg_list,
@@ -104,13 +104,8 @@ static int sti_drm_dbg_init(struct drm_minor *minor)
 	if (ret)
 		goto err;
 
-	dentry = debugfs_create_file("fps_show", S_IRUGO | S_IWUSR,
-				     minor->debugfs_root, minor->dev,
-				     &sti_drm_fps_fops);
-	if (!dentry) {
-		ret = -ENOMEM;
-		goto err;
-	}
+	debugfs_create_file("fps_show", S_IRUGO | S_IWUSR, minor->debugfs_root,
+			    minor->dev, &sti_drm_fps_fops);
 
 	DRM_INFO("%s: debugfs installed\n", DRIVER_NAME);
 	return 0;
diff --git a/drivers/gpu/drm/sti/sti_drv.h b/drivers/gpu/drm/sti/sti_drv.h
index 4b41142a22e4..b5b2dd560bae 100644
--- a/drivers/gpu/drm/sti/sti_drv.h
+++ b/drivers/gpu/drm/sti/sti_drv.h
@@ -7,10 +7,11 @@
 #ifndef _STI_DRV_H_
 #define _STI_DRV_H_
 
-#include <drm/drmP.h>
+#include <linux/platform_device.h>
 
+struct drm_device;
+struct drm_property;
 struct sti_compositor;
-struct sti_tvout;
 
 /**
  * STI drm private structure
diff --git a/drivers/gpu/drm/sti/sti_dvo.c b/drivers/gpu/drm/sti/sti_dvo.c
index b31cc2672d36..9e6d5d8b7030 100644
--- a/drivers/gpu/drm/sti/sti_dvo.c
+++ b/drivers/gpu/drm/sti/sti_dvo.c
@@ -11,9 +11,10 @@
 #include <linux/of_gpio.h>
 #include <linux/platform_device.h>
 
-#include <drm/drmP.h>
 #include <drm/drm_atomic_helper.h>
+#include <drm/drm_device.h>
 #include <drm/drm_panel.h>
+#include <drm/drm_print.h>
 #include <drm/drm_probe_helper.h>
 
 #include "sti_awg_utils.h"
diff --git a/drivers/gpu/drm/sti/sti_gdp.c b/drivers/gpu/drm/sti/sti_gdp.c
index cff7b2b5ee9e..8e926cd6a1c8 100644
--- a/drivers/gpu/drm/sti/sti_gdp.c
+++ b/drivers/gpu/drm/sti/sti_gdp.c
@@ -5,10 +5,14 @@
  *          Fabien Dessenne <fabien.dessenne@st.com>
  *          for STMicroelectronics.
  */
+
+#include <linux/dma-mapping.h>
 #include <linux/seq_file.h>
 
 #include <drm/drm_atomic.h>
+#include <drm/drm_device.h>
 #include <drm/drm_fb_cma_helper.h>
+#include <drm/drm_fourcc.h>
 #include <drm/drm_gem_cma_helper.h>
 
 #include "sti_compositor.h"
diff --git a/drivers/gpu/drm/sti/sti_gdp.h b/drivers/gpu/drm/sti/sti_gdp.h
index d3e8ebfe2e66..deb07e34173d 100644
--- a/drivers/gpu/drm/sti/sti_gdp.h
+++ b/drivers/gpu/drm/sti/sti_gdp.h
@@ -11,6 +11,11 @@
 
 #include <linux/types.h>
 
+#include <drm/drm_plane.h>
+
+struct drm_device;
+struct device;
+
 struct drm_plane *sti_gdp_create(struct drm_device *drm_dev,
 				 struct device *dev, int desc,
 				 void __iomem *baseaddr,
diff --git a/drivers/gpu/drm/sti/sti_hda.c b/drivers/gpu/drm/sti/sti_hda.c
index ff9256673fc8..94e404f13234 100644
--- a/drivers/gpu/drm/sti/sti_hda.c
+++ b/drivers/gpu/drm/sti/sti_hda.c
@@ -6,12 +6,16 @@
 
 #include <linux/clk.h>
 #include <linux/component.h>
+#include <linux/io.h>
 #include <linux/module.h>
 #include <linux/platform_device.h>
 #include <linux/seq_file.h>
 
-#include <drm/drmP.h>
 #include <drm/drm_atomic_helper.h>
+#include <drm/drm_debugfs.h>
+#include <drm/drm_device.h>
+#include <drm/drm_file.h>
+#include <drm/drm_print.h>
 #include <drm/drm_probe_helper.h>
 
 /* HDformatter registers */
diff --git a/drivers/gpu/drm/sti/sti_hdmi.c b/drivers/gpu/drm/sti/sti_hdmi.c
index 6000df624980..f03d617edc4c 100644
--- a/drivers/gpu/drm/sti/sti_hdmi.c
+++ b/drivers/gpu/drm/sti/sti_hdmi.c
@@ -13,9 +13,12 @@
 #include <linux/platform_device.h>
 #include <linux/reset.h>
 
-#include <drm/drmP.h>
 #include <drm/drm_atomic_helper.h>
+#include <drm/drm_debugfs.h>
+#include <drm/drm_drv.h>
 #include <drm/drm_edid.h>
+#include <drm/drm_file.h>
+#include <drm/drm_print.h>
 #include <drm/drm_probe_helper.h>
 
 #include <sound/hdmi-codec.h>
diff --git a/drivers/gpu/drm/sti/sti_hdmi.h b/drivers/gpu/drm/sti/sti_hdmi.h
index 63a24941db3b..1f6dc90b5d83 100644
--- a/drivers/gpu/drm/sti/sti_hdmi.h
+++ b/drivers/gpu/drm/sti/sti_hdmi.h
@@ -10,9 +10,11 @@
 #include <linux/hdmi.h>
 #include <linux/platform_device.h>
 
-#include <drm/drmP.h>
 #include <media/cec-notifier.h>
 
+#include <drm/drm_modes.h>
+#include <drm/drm_property.h>
+
 #define HDMI_STA           0x0010
 #define HDMI_STA_DLL_LCK   BIT(5)
 #define HDMI_STA_HOT_PLUG  BIT(4)
diff --git a/drivers/gpu/drm/sti/sti_hdmi_tx3g4c28phy.c b/drivers/gpu/drm/sti/sti_hdmi_tx3g4c28phy.c
index 01699af6a768..d5f94dca0d32 100644
--- a/drivers/gpu/drm/sti/sti_hdmi_tx3g4c28phy.c
+++ b/drivers/gpu/drm/sti/sti_hdmi_tx3g4c28phy.c
@@ -4,6 +4,8 @@
  * Author: Vincent Abriou <vincent.abriou@st.com> for STMicroelectronics.
  */
 
+#include <drm/drm_print.h>
+
 #include "sti_hdmi_tx3g4c28phy.h"
 
 #define HDMI_SRZ_CFG                             0x504
diff --git a/drivers/gpu/drm/sti/sti_hqvdp.c b/drivers/gpu/drm/sti/sti_hqvdp.c
index 23565f52dd71..1015abe0ce08 100644
--- a/drivers/gpu/drm/sti/sti_hqvdp.c
+++ b/drivers/gpu/drm/sti/sti_hqvdp.c
@@ -5,19 +5,25 @@
  */
 
 #include <linux/component.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
 #include <linux/firmware.h>
+#include <linux/io.h>
+#include <linux/module.h>
 #include <linux/reset.h>
 #include <linux/seq_file.h>
 
 #include <drm/drm_atomic.h>
+#include <drm/drm_device.h>
 #include <drm/drm_fb_cma_helper.h>
+#include <drm/drm_fourcc.h>
 #include <drm/drm_gem_cma_helper.h>
 
 #include "sti_compositor.h"
+#include "sti_drv.h"
 #include "sti_hqvdp_lut.h"
 #include "sti_plane.h"
 #include "sti_vtg.h"
-#include "sti_drv.h"
 
 /* Firmware name */
 #define HQVDP_FMW_NAME          "hqvdp-stih407.bin"
diff --git a/drivers/gpu/drm/sti/sti_mixer.c b/drivers/gpu/drm/sti/sti_mixer.c
index a4f45c74d678..c3a3e1e5fc8a 100644
--- a/drivers/gpu/drm/sti/sti_mixer.c
+++ b/drivers/gpu/drm/sti/sti_mixer.c
@@ -5,8 +5,12 @@
  *          Fabien Dessenne <fabien.dessenne@st.com>
  *          for STMicroelectronics.
  */
+
+#include <linux/moduleparam.h>
 #include <linux/seq_file.h>
 
+#include <drm/drm_print.h>
+
 #include "sti_compositor.h"
 #include "sti_mixer.h"
 #include "sti_vtg.h"
diff --git a/drivers/gpu/drm/sti/sti_mixer.h b/drivers/gpu/drm/sti/sti_mixer.h
index 4cb3cfddc03a..d9544246913a 100644
--- a/drivers/gpu/drm/sti/sti_mixer.h
+++ b/drivers/gpu/drm/sti/sti_mixer.h
@@ -9,10 +9,15 @@
 #ifndef _STI_MIXER_H_
 #define _STI_MIXER_H_
 
-#include <drm/drmP.h>
+#include <drm/drm_crtc.h>
+
+#include <drm/drm_debugfs.h>
+#include <drm/drm_file.h>
 
 #include "sti_plane.h"
 
+struct device;
+
 #define to_sti_mixer(x) container_of(x, struct sti_mixer, drm_crtc)
 
 enum sti_mixer_status {
diff --git a/drivers/gpu/drm/sti/sti_plane.c b/drivers/gpu/drm/sti/sti_plane.c
index b48cd86e0250..3da4a46df2f2 100644
--- a/drivers/gpu/drm/sti/sti_plane.c
+++ b/drivers/gpu/drm/sti/sti_plane.c
@@ -6,8 +6,10 @@
  *          for STMicroelectronics.
  */
 
-#include <drm/drmP.h>
+#include <linux/types.h>
+
 #include <drm/drm_fb_cma_helper.h>
+#include <drm/drm_fourcc.h>
 #include <drm/drm_gem_cma_helper.h>
 
 #include "sti_compositor.h"
diff --git a/drivers/gpu/drm/sti/sti_plane.h b/drivers/gpu/drm/sti/sti_plane.h
index b8d7fae2a014..065ffffbfb4a 100644
--- a/drivers/gpu/drm/sti/sti_plane.h
+++ b/drivers/gpu/drm/sti/sti_plane.h
@@ -7,7 +7,6 @@
 #ifndef _STI_PLANE_H_
 #define _STI_PLANE_H_
 
-#include <drm/drmP.h>
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_plane_helper.h>
 
diff --git a/drivers/gpu/drm/sti/sti_tvout.c b/drivers/gpu/drm/sti/sti_tvout.c
index c42f2fa7053c..e1b3c8cb7287 100644
--- a/drivers/gpu/drm/sti/sti_tvout.c
+++ b/drivers/gpu/drm/sti/sti_tvout.c
@@ -8,14 +8,18 @@
 
 #include <linux/clk.h>
 #include <linux/component.h>
+#include <linux/io.h>
 #include <linux/module.h>
 #include <linux/of_platform.h>
 #include <linux/platform_device.h>
 #include <linux/reset.h>
 #include <linux/seq_file.h>
 
-#include <drm/drmP.h>
 #include <drm/drm_atomic_helper.h>
+#include <drm/drm_debugfs.h>
+#include <drm/drm_device.h>
+#include <drm/drm_file.h>
+#include <drm/drm_print.h>
 
 #include "sti_crtc.h"
 #include "sti_drv.h"
diff --git a/drivers/gpu/drm/sti/sti_vid.c b/drivers/gpu/drm/sti/sti_vid.c
index 2aac36c95835..2d4230410464 100644
--- a/drivers/gpu/drm/sti/sti_vid.c
+++ b/drivers/gpu/drm/sti/sti_vid.c
@@ -5,7 +5,9 @@
  */
 #include <linux/seq_file.h>
 
-#include <drm/drmP.h>
+#include <drm/drm_debugfs.h>
+#include <drm/drm_file.h>
+#include <drm/drm_print.h>
 
 #include "sti_plane.h"
 #include "sti_vid.h"
diff --git a/drivers/gpu/drm/sti/sti_vtg.c b/drivers/gpu/drm/sti/sti_vtg.c
index 6c421644de18..ef4009f11396 100644
--- a/drivers/gpu/drm/sti/sti_vtg.c
+++ b/drivers/gpu/drm/sti/sti_vtg.c
@@ -8,11 +8,13 @@
  */
 
 #include <linux/module.h>
+#include <linux/io.h>
 #include <linux/notifier.h>
 #include <linux/of_platform.h>
 #include <linux/platform_device.h>
 
-#include <drm/drmP.h>
+#include <drm/drm_modes.h>
+#include <drm/drm_print.h>
 
 #include "sti_drv.h"
 #include "sti_vtg.h"
diff --git a/drivers/gpu/drm/sti/sti_vtg.h b/drivers/gpu/drm/sti/sti_vtg.h
index d177129e5bcb..46faf141b2d9 100644
--- a/drivers/gpu/drm/sti/sti_vtg.h
+++ b/drivers/gpu/drm/sti/sti_vtg.h
@@ -16,6 +16,7 @@
 #define VTG_SYNC_ID_DVO         4
 
 struct sti_vtg;
+struct drm_crtc;
 struct drm_display_mode;
 struct notifier_block;
 
diff --git a/drivers/gpu/drm/stm/drv.c b/drivers/gpu/drm/stm/drv.c
index 5834ef56fbaa..9dee4e430de5 100644
--- a/drivers/gpu/drm/stm/drv.c
+++ b/drivers/gpu/drm/stm/drv.c
@@ -12,6 +12,7 @@
 #include <linux/dma-mapping.h>
 #include <linux/module.h>
 #include <linux/of_platform.h>
+#include <linux/pm_runtime.h>
 
 #include <drm/drm_atomic.h>
 #include <drm/drm_atomic_helper.h>
@@ -135,14 +136,14 @@ static __maybe_unused int drv_suspend(struct device *dev)
 	struct ltdc_device *ldev = ddev->dev_private;
 	struct drm_atomic_state *state;
 
-	drm_kms_helper_poll_disable(ddev);
+	WARN_ON(ldev->suspend_state);
+
 	state = drm_atomic_helper_suspend(ddev);
-	if (IS_ERR(state)) {
-		drm_kms_helper_poll_enable(ddev);
+	if (IS_ERR(state))
 		return PTR_ERR(state);
-	}
+
 	ldev->suspend_state = state;
-	ltdc_suspend(ddev);
+	pm_runtime_force_suspend(dev);
 
 	return 0;
 }
@@ -151,16 +152,43 @@ static __maybe_unused int drv_resume(struct device *dev)
 {
 	struct drm_device *ddev = dev_get_drvdata(dev);
 	struct ltdc_device *ldev = ddev->dev_private;
+	int ret;
+
+	if (WARN_ON(!ldev->suspend_state))
+		return -ENOENT;
+
+	pm_runtime_force_resume(dev);
+	ret = drm_atomic_helper_resume(ddev, ldev->suspend_state);
+	if (ret)
+		pm_runtime_force_suspend(dev);
+
+	ldev->suspend_state = NULL;
+
+	return ret;
+}
 
-	ltdc_resume(ddev);
-	drm_atomic_helper_resume(ddev, ldev->suspend_state);
-	drm_kms_helper_poll_enable(ddev);
+static __maybe_unused int drv_runtime_suspend(struct device *dev)
+{
+	struct drm_device *ddev = dev_get_drvdata(dev);
+
+	DRM_DEBUG_DRIVER("\n");
+	ltdc_suspend(ddev);
 
 	return 0;
 }
 
+static __maybe_unused int drv_runtime_resume(struct device *dev)
+{
+	struct drm_device *ddev = dev_get_drvdata(dev);
+
+	DRM_DEBUG_DRIVER("\n");
+	return ltdc_resume(ddev);
+}
+
 static const struct dev_pm_ops drv_pm_ops = {
 	SET_SYSTEM_SLEEP_PM_OPS(drv_suspend, drv_resume)
+	SET_RUNTIME_PM_OPS(drv_runtime_suspend,
+			   drv_runtime_resume, NULL)
 };
 
 static int stm_drm_platform_probe(struct platform_device *pdev)
diff --git a/drivers/gpu/drm/stm/dw_mipi_dsi-stm.c b/drivers/gpu/drm/stm/dw_mipi_dsi-stm.c
index 1bef73e8c8fe..0ab32fee6c1b 100644
--- a/drivers/gpu/drm/stm/dw_mipi_dsi-stm.c
+++ b/drivers/gpu/drm/stm/dw_mipi_dsi-stm.c
@@ -9,6 +9,7 @@
 #include <linux/clk.h>
 #include <linux/iopoll.h>
 #include <linux/module.h>
+#include <linux/regulator/consumer.h>
 #include <drm/drmP.h>
 #include <drm/drm_mipi_dsi.h>
 #include <drm/bridge/dw_mipi_dsi.h>
@@ -76,6 +77,7 @@ struct dw_mipi_dsi_stm {
 	u32 hw_version;
 	int lane_min_kbps;
 	int lane_max_kbps;
+	struct regulator *vdd_supply;
 };
 
 static inline void dsi_write(struct dw_mipi_dsi_stm *dsi, u32 reg, u32 val)
@@ -208,10 +210,27 @@ static int dw_mipi_dsi_phy_init(void *priv_data)
 	if (ret)
 		DRM_DEBUG_DRIVER("!TIMEOUT! waiting PLL, let's continue\n");
 
+	return 0;
+}
+
+static void dw_mipi_dsi_phy_power_on(void *priv_data)
+{
+	struct dw_mipi_dsi_stm *dsi = priv_data;
+
+	DRM_DEBUG_DRIVER("\n");
+
 	/* Enable the DSI wrapper */
 	dsi_set(dsi, DSI_WCR, WCR_DSIEN);
+}
 
-	return 0;
+static void dw_mipi_dsi_phy_power_off(void *priv_data)
+{
+	struct dw_mipi_dsi_stm *dsi = priv_data;
+
+	DRM_DEBUG_DRIVER("\n");
+
+	/* Disable the DSI wrapper */
+	dsi_clear(dsi, DSI_WCR, WCR_DSIEN);
 }
 
 static int
@@ -225,7 +244,6 @@ dw_mipi_dsi_get_lane_mbps(void *priv_data, const struct drm_display_mode *mode,
 	u32 val;
 
 	/* Update lane capabilities according to hw version */
-	dsi->hw_version = dsi_read(dsi, DSI_VERSION) & VERSION;
 	dsi->lane_min_kbps = LANE_MIN_KBPS;
 	dsi->lane_max_kbps = LANE_MAX_KBPS;
 	if (dsi->hw_version == HWVER_131) {
@@ -286,6 +304,8 @@ dw_mipi_dsi_get_lane_mbps(void *priv_data, const struct drm_display_mode *mode,
 
 static const struct dw_mipi_dsi_phy_ops dw_mipi_dsi_stm_phy_ops = {
 	.init = dw_mipi_dsi_phy_init,
+	.power_on = dw_mipi_dsi_phy_power_on,
+	.power_off = dw_mipi_dsi_phy_power_off,
 	.get_lane_mbps = dw_mipi_dsi_get_lane_mbps,
 };
 
@@ -304,6 +324,7 @@ static int dw_mipi_dsi_stm_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
 	struct dw_mipi_dsi_stm *dsi;
+	struct clk *pclk;
 	struct resource *res;
 	int ret;
 
@@ -314,21 +335,58 @@ static int dw_mipi_dsi_stm_probe(struct platform_device *pdev)
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	dsi->base = devm_ioremap_resource(dev, res);
 	if (IS_ERR(dsi->base)) {
-		DRM_ERROR("Unable to get dsi registers\n");
-		return PTR_ERR(dsi->base);
+		ret = PTR_ERR(dsi->base);
+		DRM_ERROR("Unable to get dsi registers %d\n", ret);
+		return ret;
+	}
+
+	dsi->vdd_supply = devm_regulator_get(dev, "phy-dsi");
+	if (IS_ERR(dsi->vdd_supply)) {
+		ret = PTR_ERR(dsi->vdd_supply);
+		if (ret != -EPROBE_DEFER)
+			DRM_ERROR("Failed to request regulator: %d\n", ret);
+		return ret;
+	}
+
+	ret = regulator_enable(dsi->vdd_supply);
+	if (ret) {
+		DRM_ERROR("Failed to enable regulator: %d\n", ret);
+		return ret;
 	}
 
 	dsi->pllref_clk = devm_clk_get(dev, "ref");
 	if (IS_ERR(dsi->pllref_clk)) {
 		ret = PTR_ERR(dsi->pllref_clk);
-		dev_err(dev, "Unable to get pll reference clock: %d\n", ret);
-		return ret;
+		DRM_ERROR("Unable to get pll reference clock: %d\n", ret);
+		goto err_clk_get;
 	}
 
 	ret = clk_prepare_enable(dsi->pllref_clk);
 	if (ret) {
-		dev_err(dev, "%s: Failed to enable pllref_clk\n", __func__);
-		return ret;
+		DRM_ERROR("Failed to enable pllref_clk: %d\n", ret);
+		goto err_clk_get;
+	}
+
+	pclk = devm_clk_get(dev, "pclk");
+	if (IS_ERR(pclk)) {
+		ret = PTR_ERR(pclk);
+		DRM_ERROR("Unable to get peripheral clock: %d\n", ret);
+		goto err_dsi_probe;
+	}
+
+	ret = clk_prepare_enable(pclk);
+	if (ret) {
+		DRM_ERROR("%s: Failed to enable peripheral clk\n", __func__);
+		goto err_dsi_probe;
+	}
+
+	dsi->hw_version = dsi_read(dsi, DSI_VERSION) & VERSION;
+	clk_disable_unprepare(pclk);
+
+	if (dsi->hw_version != HWVER_130 && dsi->hw_version != HWVER_131) {
+		ret = -ENODEV;
+		DRM_ERROR("bad dsi hardware version\n");
+		goto err_dsi_probe;
 	}
 
 	dw_mipi_dsi_stm_plat_data.base = dsi->base;
@@ -338,20 +396,28 @@ static int dw_mipi_dsi_stm_probe(struct platform_device *pdev)
 
 	dsi->dsi = dw_mipi_dsi_probe(pdev, &dw_mipi_dsi_stm_plat_data);
 	if (IS_ERR(dsi->dsi)) {
-		DRM_ERROR("Failed to initialize mipi dsi host\n");
-		clk_disable_unprepare(dsi->pllref_clk);
-		return PTR_ERR(dsi->dsi);
+		ret = PTR_ERR(dsi->dsi);
+		DRM_ERROR("Failed to initialize mipi dsi host: %d\n", ret);
+		goto err_dsi_probe;
 	}
 
 	return 0;
+
+err_dsi_probe:
+	clk_disable_unprepare(dsi->pllref_clk);
+err_clk_get:
+	regulator_disable(dsi->vdd_supply);
+
+	return ret;
 }
 
 static int dw_mipi_dsi_stm_remove(struct platform_device *pdev)
 {
 	struct dw_mipi_dsi_stm *dsi = platform_get_drvdata(pdev);
 
-	clk_disable_unprepare(dsi->pllref_clk);
 	dw_mipi_dsi_remove(dsi->dsi);
+	clk_disable_unprepare(dsi->pllref_clk);
+	regulator_disable(dsi->vdd_supply);
 
 	return 0;
 }
@@ -363,6 +429,7 @@ static int __maybe_unused dw_mipi_dsi_stm_suspend(struct device *dev)
 	DRM_DEBUG_DRIVER("\n");
 
 	clk_disable_unprepare(dsi->pllref_clk);
+	regulator_disable(dsi->vdd_supply);
 
 	return 0;
 }
@@ -370,10 +437,22 @@ static int __maybe_unused dw_mipi_dsi_stm_suspend(struct device *dev)
 static int __maybe_unused dw_mipi_dsi_stm_resume(struct device *dev)
 {
 	struct dw_mipi_dsi_stm *dsi = dw_mipi_dsi_stm_plat_data.priv_data;
+	int ret;
 
 	DRM_DEBUG_DRIVER("\n");
 
-	clk_prepare_enable(dsi->pllref_clk);
+	ret = regulator_enable(dsi->vdd_supply);
+	if (ret) {
+		DRM_ERROR("Failed to enable regulator: %d\n", ret);
+		return ret;
+	}
+
+	ret = clk_prepare_enable(dsi->pllref_clk);
+	if (ret) {
+		regulator_disable(dsi->vdd_supply);
+		DRM_ERROR("Failed to enable pllref_clk: %d\n", ret);
+		return ret;
+	}
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/stm/ltdc.c b/drivers/gpu/drm/stm/ltdc.c
index 32fd6a3b37fb..2fe6c4a8d915 100644
--- a/drivers/gpu/drm/stm/ltdc.c
+++ b/drivers/gpu/drm/stm/ltdc.c
@@ -16,6 +16,7 @@
 #include <linux/of_address.h>
 #include <linux/of_graph.h>
 #include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
 #include <linux/reset.h>
 
 #include <drm/drm_atomic.h>
@@ -232,6 +233,11 @@ static const enum ltdc_pix_fmt ltdc_pix_fmt_a1[NB_PF] = {
 	PF_ARGB4444		/* 0x07 */
 };
 
+static const u64 ltdc_format_modifiers[] = {
+	DRM_FORMAT_MOD_LINEAR,
+	DRM_FORMAT_MOD_INVALID
+};
+
 static inline u32 reg_read(void __iomem *base, u32 reg)
 {
 	return readl_relaxed(base + reg);
@@ -426,8 +432,8 @@ static void ltdc_crtc_atomic_enable(struct drm_crtc *crtc,
 	/* Enable IRQ */
 	reg_set(ldev->regs, LTDC_IER, IER_RRIE | IER_FUIE | IER_TERRIE);
 
-	/* Immediately commit the planes */
-	reg_set(ldev->regs, LTDC_SRCR, SRCR_IMR);
+	/* Commit shadow registers = update planes at next vblank */
+	reg_set(ldev->regs, LTDC_SRCR, SRCR_VBR);
 
 	/* Enable LTDC */
 	reg_set(ldev->regs, LTDC_GCR, GCR_LTDCEN);
@@ -439,6 +445,7 @@ static void ltdc_crtc_atomic_disable(struct drm_crtc *crtc,
 				     struct drm_crtc_state *old_state)
 {
 	struct ltdc_device *ldev = crtc_to_ltdc(crtc);
+	struct drm_device *ddev = crtc->dev;
 
 	DRM_DEBUG_DRIVER("\n");
 
@@ -452,6 +459,8 @@ static void ltdc_crtc_atomic_disable(struct drm_crtc *crtc,
 
 	/* immediately commit disable of layers before switching off LTDC */
 	reg_set(ldev->regs, LTDC_SRCR, SRCR_IMR);
+
+	pm_runtime_put_sync(ddev->dev);
 }
 
 #define CLK_TOLERANCE_HZ 50
@@ -500,33 +509,55 @@ static bool ltdc_crtc_mode_fixup(struct drm_crtc *crtc,
 				 struct drm_display_mode *adjusted_mode)
 {
 	struct ltdc_device *ldev = crtc_to_ltdc(crtc);
+	struct drm_device *ddev = crtc->dev;
 	int rate = mode->clock * 1000;
+	bool runtime_active;
+	int ret;
 
-	/*
-	 * TODO clk_round_rate() does not work yet. When ready, it can
-	 * be used instead of clk_set_rate() then clk_get_rate().
-	 */
+	runtime_active = pm_runtime_active(ddev->dev);
+
+	if (runtime_active)
+		pm_runtime_put_sync(ddev->dev);
 
-	clk_disable(ldev->pixel_clk);
 	if (clk_set_rate(ldev->pixel_clk, rate) < 0) {
 		DRM_ERROR("Cannot set rate (%dHz) for pixel clk\n", rate);
 		return false;
 	}
-	clk_enable(ldev->pixel_clk);
 
 	adjusted_mode->clock = clk_get_rate(ldev->pixel_clk) / 1000;
 
+	if (runtime_active) {
+		ret = pm_runtime_get_sync(ddev->dev);
+		if (ret) {
+			DRM_ERROR("Failed to fixup mode, cannot get sync\n");
+			return false;
+		}
+	}
+
+	DRM_DEBUG_DRIVER("requested clock %dkHz, adjusted clock %dkHz\n",
+			 mode->clock, adjusted_mode->clock);
+
 	return true;
 }
 
 static void ltdc_crtc_mode_set_nofb(struct drm_crtc *crtc)
 {
 	struct ltdc_device *ldev = crtc_to_ltdc(crtc);
+	struct drm_device *ddev = crtc->dev;
 	struct drm_display_mode *mode = &crtc->state->adjusted_mode;
 	struct videomode vm;
 	u32 hsync, vsync, accum_hbp, accum_vbp, accum_act_w, accum_act_h;
 	u32 total_width, total_height;
 	u32 val;
+	int ret;
+
+	if (!pm_runtime_active(ddev->dev)) {
+		ret = pm_runtime_get_sync(ddev->dev);
+		if (ret) {
+			DRM_ERROR("Failed to set mode, cannot get sync\n");
+			return;
+		}
+	}
 
 	drm_display_mode_to_videomode(mode, &vm);
 
@@ -555,7 +586,7 @@ static void ltdc_crtc_mode_set_nofb(struct drm_crtc *crtc)
 	if (vm.flags & DISPLAY_FLAGS_VSYNC_HIGH)
 		val |= GCR_VSPOL;
 
-	if (vm.flags & DISPLAY_FLAGS_DE_HIGH)
+	if (vm.flags & DISPLAY_FLAGS_DE_LOW)
 		val |= GCR_DEPOL;
 
 	if (vm.flags & DISPLAY_FLAGS_PIXDATA_NEGEDGE)
@@ -587,6 +618,7 @@ static void ltdc_crtc_atomic_flush(struct drm_crtc *crtc,
 				   struct drm_crtc_state *old_crtc_state)
 {
 	struct ltdc_device *ldev = crtc_to_ltdc(crtc);
+	struct drm_device *ddev = crtc->dev;
 	struct drm_pending_vblank_event *event = crtc->state->event;
 
 	DRM_DEBUG_ATOMIC("\n");
@@ -599,12 +631,12 @@ static void ltdc_crtc_atomic_flush(struct drm_crtc *crtc,
 	if (event) {
 		crtc->state->event = NULL;
 
-		spin_lock_irq(&crtc->dev->event_lock);
+		spin_lock_irq(&ddev->event_lock);
 		if (drm_crtc_vblank_get(crtc) == 0)
 			drm_crtc_arm_vblank_event(crtc, event);
 		else
 			drm_crtc_send_vblank_event(crtc, event);
-		spin_unlock_irq(&crtc->dev->event_lock);
+		spin_unlock_irq(&ddev->event_lock);
 	}
 }
 
@@ -660,15 +692,19 @@ bool ltdc_crtc_scanoutpos(struct drm_device *ddev, unsigned int pipe,
 	 * Computation for the two first cases are identical so we can
 	 * simplify the code and only test if line > vactive_end
 	 */
-	line = reg_read(ldev->regs, LTDC_CPSR) & CPSR_CYPOS;
-	vactive_start = reg_read(ldev->regs, LTDC_BPCR) & BPCR_AVBP;
-	vactive_end = reg_read(ldev->regs, LTDC_AWCR) & AWCR_AAH;
-	vtotal = reg_read(ldev->regs, LTDC_TWCR) & TWCR_TOTALH;
-
-	if (line > vactive_end)
-		*vpos = line - vtotal - vactive_start;
-	else
-		*vpos = line - vactive_start;
+	if (pm_runtime_active(ddev->dev)) {
+		line = reg_read(ldev->regs, LTDC_CPSR) & CPSR_CYPOS;
+		vactive_start = reg_read(ldev->regs, LTDC_BPCR) & BPCR_AVBP;
+		vactive_end = reg_read(ldev->regs, LTDC_AWCR) & AWCR_AAH;
+		vtotal = reg_read(ldev->regs, LTDC_TWCR) & TWCR_TOTALH;
+
+		if (line > vactive_end)
+			*vpos = line - vtotal - vactive_start;
+		else
+			*vpos = line - vactive_start;
+	} else {
+		*vpos = 0;
+	}
 
 	*hpos = 0;
 
@@ -779,7 +815,7 @@ static void ltdc_plane_atomic_update(struct drm_plane *plane,
 
 	/* Configures the color frame buffer pitch in bytes & line length */
 	pitch_in_bytes = fb->pitches[0];
-	line_length = drm_format_plane_cpp(fb->format->format, 0) *
+	line_length = fb->format->cpp[0] *
 		      (x1 - x0 + 1) + (ldev->caps.bus_width >> 3) - 1;
 	val = ((pitch_in_bytes << 16) | line_length);
 	reg_update_bits(ldev->regs, LTDC_L1CFBLR + lofs,
@@ -822,11 +858,11 @@ static void ltdc_plane_atomic_update(struct drm_plane *plane,
 
 	mutex_lock(&ldev->err_lock);
 	if (ldev->error_status & ISR_FUIF) {
-		DRM_DEBUG_DRIVER("Fifo underrun\n");
+		DRM_WARN("ltdc fifo underrun: please verify display mode\n");
 		ldev->error_status &= ~ISR_FUIF;
 	}
 	if (ldev->error_status & ISR_TERRIF) {
-		DRM_DEBUG_DRIVER("Transfer error\n");
+		DRM_WARN("ltdc transfer error\n");
 		ldev->error_status &= ~ISR_TERRIF;
 	}
 	mutex_unlock(&ldev->err_lock);
@@ -864,6 +900,16 @@ static void ltdc_plane_atomic_print_state(struct drm_printer *p,
 	fpsi->counter = 0;
 }
 
+static bool ltdc_plane_format_mod_supported(struct drm_plane *plane,
+					    u32 format,
+					    u64 modifier)
+{
+	if (modifier == DRM_FORMAT_MOD_LINEAR)
+		return true;
+
+	return false;
+}
+
 static const struct drm_plane_funcs ltdc_plane_funcs = {
 	.update_plane = drm_atomic_helper_update_plane,
 	.disable_plane = drm_atomic_helper_disable_plane,
@@ -872,6 +918,7 @@ static const struct drm_plane_funcs ltdc_plane_funcs = {
 	.atomic_duplicate_state = drm_atomic_helper_plane_duplicate_state,
 	.atomic_destroy_state = drm_atomic_helper_plane_destroy_state,
 	.atomic_print_state = ltdc_plane_atomic_print_state,
+	.format_mod_supported = ltdc_plane_format_mod_supported,
 };
 
 static const struct drm_plane_helper_funcs ltdc_plane_helper_funcs = {
@@ -890,6 +937,7 @@ static struct drm_plane *ltdc_plane_create(struct drm_device *ddev,
 	unsigned int i, nb_fmt = 0;
 	u32 formats[NB_PF * 2];
 	u32 drm_fmt, drm_fmt_no_alpha;
+	const u64 *modifiers = ltdc_format_modifiers;
 	int ret;
 
 	/* Get supported pixel formats */
@@ -918,7 +966,7 @@ static struct drm_plane *ltdc_plane_create(struct drm_device *ddev,
 
 	ret = drm_universal_plane_init(ddev, plane, possible_crtcs,
 				       &ltdc_plane_funcs, formats, nb_fmt,
-				       NULL, type, NULL);
+				       modifiers, type, NULL);
 	if (ret < 0)
 		return NULL;
 
@@ -1021,10 +1069,13 @@ static int ltdc_get_caps(struct drm_device *ddev)
 	struct ltdc_device *ldev = ddev->dev_private;
 	u32 bus_width_log2, lcr, gc2r;
 
-	/* at least 1 layer must be managed */
+	/*
+	 * at least 1 layer must be managed & the number of layers
+	 * must not exceed LTDC_MAX_LAYER
+	 */
 	lcr = reg_read(ldev->regs, LTDC_LCR);
 
-	ldev->caps.nb_layers = max_t(int, lcr, 1);
+	ldev->caps.nb_layers = clamp((int)lcr, 1, LTDC_MAX_LAYER);
 
 	/* set data bus width */
 	gc2r = reg_read(ldev->regs, LTDC_GC2R);
@@ -1125,8 +1176,9 @@ int ltdc_load(struct drm_device *ddev)
 
 	ldev->pixel_clk = devm_clk_get(dev, "lcd");
 	if (IS_ERR(ldev->pixel_clk)) {
-		DRM_ERROR("Unable to get lcd clock\n");
-		return -ENODEV;
+		if (PTR_ERR(ldev->pixel_clk) != -EPROBE_DEFER)
+			DRM_ERROR("Unable to get lcd clock\n");
+		return PTR_ERR(ldev->pixel_clk);
 	}
 
 	if (clk_prepare_enable(ldev->pixel_clk)) {
@@ -1134,6 +1186,12 @@ int ltdc_load(struct drm_device *ddev)
 		return -ENODEV;
 	}
 
+	if (!IS_ERR(rstc)) {
+		reset_control_assert(rstc);
+		usleep_range(10, 20);
+		reset_control_deassert(rstc);
+	}
+
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	ldev->regs = devm_ioremap_resource(dev, res);
 	if (IS_ERR(ldev->regs)) {
@@ -1142,8 +1200,15 @@ int ltdc_load(struct drm_device *ddev)
 		goto err;
 	}
 
+	/* Disable interrupts */
+	reg_clear(ldev->regs, LTDC_IER,
+		  IER_LIE | IER_RRIE | IER_FUIE | IER_TERRIE);
+
 	for (i = 0; i < MAX_IRQ; i++) {
 		irq = platform_get_irq(pdev, i);
+		if (irq == -EPROBE_DEFER)
+			goto err;
+
 		if (irq < 0)
 			continue;
 
@@ -1156,15 +1221,6 @@ int ltdc_load(struct drm_device *ddev)
 		}
 	}
 
-	if (!IS_ERR(rstc)) {
-		reset_control_assert(rstc);
-		usleep_range(10, 20);
-		reset_control_deassert(rstc);
-	}
-
-	/* Disable interrupts */
-	reg_clear(ldev->regs, LTDC_IER,
-		  IER_LIE | IER_RRIE | IER_FUIE | IER_TERRIE);
 
 	ret = ltdc_get_caps(ddev);
 	if (ret) {
@@ -1173,7 +1229,7 @@ int ltdc_load(struct drm_device *ddev)
 		goto err;
 	}
 
-	DRM_INFO("ltdc hw version 0x%08x - ready\n", ldev->caps.hw_version);
+	DRM_DEBUG_DRIVER("ltdc hw version 0x%08x\n", ldev->caps.hw_version);
 
 	/* Add endpoints panels or bridges if any */
 	for (i = 0; i < MAX_ENDPOINTS; i++) {
@@ -1203,6 +1259,8 @@ int ltdc_load(struct drm_device *ddev)
 		goto err;
 	}
 
+	ddev->mode_config.allow_fb_modifiers = true;
+
 	ret = ltdc_crtc_init(ddev, crtc);
 	if (ret) {
 		DRM_ERROR("Failed to init crtc\n");
@@ -1218,8 +1276,11 @@ int ltdc_load(struct drm_device *ddev)
 	/* Allow usage of vblank without having to call drm_irq_install */
 	ddev->irq_enabled = 1;
 
-	return 0;
+	clk_disable_unprepare(ldev->pixel_clk);
+
+	pm_runtime_enable(ddev->dev);
 
+	return 0;
 err:
 	for (i = 0; i < MAX_ENDPOINTS; i++)
 		drm_panel_bridge_remove(bridge[i]);
@@ -1231,7 +1292,6 @@ err:
 
 void ltdc_unload(struct drm_device *ddev)
 {
-	struct ltdc_device *ldev = ddev->dev_private;
 	int i;
 
 	DRM_DEBUG_DRIVER("\n");
@@ -1239,7 +1299,7 @@ void ltdc_unload(struct drm_device *ddev)
 	for (i = 0; i < MAX_ENDPOINTS; i++)
 		drm_of_panel_bridge_remove(ddev->dev->of_node, 0, i);
 
-	clk_disable_unprepare(ldev->pixel_clk);
+	pm_runtime_disable(ddev->dev);
 }
 
 MODULE_AUTHOR("Philippe Cornu <philippe.cornu@st.com>");
diff --git a/drivers/gpu/drm/sun4i/sun4i_drv.c b/drivers/gpu/drm/sun4i/sun4i_drv.c
index 3ff73998d841..1a1b52e6f73e 100644
--- a/drivers/gpu/drm/sun4i/sun4i_drv.c
+++ b/drivers/gpu/drm/sun4i/sun4i_drv.c
@@ -49,22 +49,8 @@ static struct drm_driver sun4i_drv_driver = {
 	.minor			= 0,
 
 	/* GEM Operations */
+	DRM_GEM_CMA_VMAP_DRIVER_OPS,
 	.dumb_create		= drm_sun4i_gem_dumb_create,
-	.gem_free_object_unlocked = drm_gem_cma_free_object,
-	.gem_vm_ops		= &drm_gem_cma_vm_ops,
-
-	/* PRIME Operations */
-	.prime_handle_to_fd	= drm_gem_prime_handle_to_fd,
-	.prime_fd_to_handle	= drm_gem_prime_fd_to_handle,
-	.gem_prime_import	= drm_gem_prime_import,
-	.gem_prime_export	= drm_gem_prime_export,
-	.gem_prime_get_sg_table	= drm_gem_cma_prime_get_sg_table,
-	.gem_prime_import_sg_table = drm_gem_cma_prime_import_sg_table,
-	.gem_prime_vmap		= drm_gem_cma_prime_vmap,
-	.gem_prime_vunmap	= drm_gem_cma_prime_vunmap,
-	.gem_prime_mmap		= drm_gem_cma_prime_mmap,
-
-	/* Frame Buffer Operations */
 };
 
 static int sun4i_drv_bind(struct device *dev)
diff --git a/drivers/gpu/drm/sun4i/sun6i_mipi_dsi.c b/drivers/gpu/drm/sun4i/sun6i_mipi_dsi.c
index bfa7e2b146df..a1fc8b520985 100644
--- a/drivers/gpu/drm/sun4i/sun6i_mipi_dsi.c
+++ b/drivers/gpu/drm/sun4i/sun6i_mipi_dsi.c
@@ -980,6 +980,7 @@ static ssize_t sun6i_dsi_transfer(struct mipi_dsi_host *host,
 	switch (msg->type) {
 	case MIPI_DSI_DCS_SHORT_WRITE:
 	case MIPI_DSI_DCS_SHORT_WRITE_PARAM:
+	case MIPI_DSI_GENERIC_SHORT_WRITE_2_PARAM:
 		ret = sun6i_dsi_dcs_write_short(dsi, msg);
 		break;
 
diff --git a/drivers/gpu/drm/tegra/dc.c b/drivers/gpu/drm/tegra/dc.c
index 607a6ea17ecc..079250c85733 100644
--- a/drivers/gpu/drm/tegra/dc.c
+++ b/drivers/gpu/drm/tegra/dc.c
@@ -26,6 +26,9 @@
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_plane_helper.h>
 
+static void tegra_crtc_atomic_destroy_state(struct drm_crtc *crtc,
+					    struct drm_crtc_state *state);
+
 static void tegra_dc_stats_reset(struct tegra_dc_stats *stats)
 {
 	stats->frames = 0;
@@ -1155,20 +1158,12 @@ static void tegra_dc_destroy(struct drm_crtc *crtc)
 
 static void tegra_crtc_reset(struct drm_crtc *crtc)
 {
-	struct tegra_dc_state *state;
+	struct tegra_dc_state *state = kzalloc(sizeof(*state), GFP_KERNEL);
 
 	if (crtc->state)
-		__drm_atomic_helper_crtc_destroy_state(crtc->state);
-
-	kfree(crtc->state);
-	crtc->state = NULL;
-
-	state = kzalloc(sizeof(*state), GFP_KERNEL);
-	if (state) {
-		crtc->state = &state->base;
-		crtc->state->crtc = crtc;
-	}
+		tegra_crtc_atomic_destroy_state(crtc, crtc->state);
 
+	__drm_atomic_helper_crtc_reset(crtc, &state->base);
 	drm_crtc_vblank_reset(crtc);
 }
 
diff --git a/drivers/gpu/drm/tegra/dpaux.c b/drivers/gpu/drm/tegra/dpaux.c
index ee4180d8db14..65c389d9c85d 100644
--- a/drivers/gpu/drm/tegra/dpaux.c
+++ b/drivers/gpu/drm/tegra/dpaux.c
@@ -485,11 +485,16 @@ static int tegra_dpaux_probe(struct platform_device *pdev)
 		return err;
 	}
 
-	dpaux->vdd = devm_regulator_get(&pdev->dev, "vdd");
+	dpaux->vdd = devm_regulator_get_optional(&pdev->dev, "vdd");
 	if (IS_ERR(dpaux->vdd)) {
-		dev_err(&pdev->dev, "failed to get VDD supply: %ld\n",
-			PTR_ERR(dpaux->vdd));
-		return PTR_ERR(dpaux->vdd);
+		if (PTR_ERR(dpaux->vdd) != -ENODEV) {
+			if (PTR_ERR(dpaux->vdd) != -EPROBE_DEFER)
+				dev_err(&pdev->dev,
+					"failed to get VDD supply: %ld\n",
+					PTR_ERR(dpaux->vdd));
+
+			return PTR_ERR(dpaux->vdd);
+		}
 	}
 
 	platform_set_drvdata(pdev, dpaux);
diff --git a/drivers/gpu/drm/tegra/drm.h b/drivers/gpu/drm/tegra/drm.h
index 70154c253d45..488f36f00bd8 100644
--- a/drivers/gpu/drm/tegra/drm.h
+++ b/drivers/gpu/drm/tegra/drm.h
@@ -127,8 +127,7 @@ struct tegra_output {
 	const struct edid *edid;
 	struct cec_notifier *cec;
 	unsigned int hpd_irq;
-	int hpd_gpio;
-	enum of_gpio_flags hpd_gpio_flags;
+	struct gpio_desc *hpd_gpio;
 
 	struct drm_encoder encoder;
 	struct drm_connector connector;
diff --git a/drivers/gpu/drm/tegra/fb.c b/drivers/gpu/drm/tegra/fb.c
index 1dd83a757dba..57cc26e1da01 100644
--- a/drivers/gpu/drm/tegra/fb.c
+++ b/drivers/gpu/drm/tegra/fb.c
@@ -131,18 +131,16 @@ struct drm_framebuffer *tegra_fb_create(struct drm_device *drm,
 					struct drm_file *file,
 					const struct drm_mode_fb_cmd2 *cmd)
 {
-	unsigned int hsub, vsub, i;
+	const struct drm_format_info *info = drm_get_format_info(drm, cmd);
 	struct tegra_bo *planes[4];
 	struct drm_gem_object *gem;
 	struct drm_framebuffer *fb;
+	unsigned int i;
 	int err;
 
-	hsub = drm_format_horz_chroma_subsampling(cmd->pixel_format);
-	vsub = drm_format_vert_chroma_subsampling(cmd->pixel_format);
-
-	for (i = 0; i < drm_format_num_planes(cmd->pixel_format); i++) {
-		unsigned int width = cmd->width / (i ? hsub : 1);
-		unsigned int height = cmd->height / (i ? vsub : 1);
+	for (i = 0; i < info->num_planes; i++) {
+		unsigned int width = cmd->width / (i ? info->hsub : 1);
+		unsigned int height = cmd->height / (i ? info->vsub : 1);
 		unsigned int size, bpp;
 
 		gem = drm_gem_object_lookup(file, cmd->handles[i]);
@@ -151,7 +149,7 @@ struct drm_framebuffer *tegra_fb_create(struct drm_device *drm,
 			goto unreference;
 		}
 
-		bpp = drm_format_plane_cpp(cmd->pixel_format, i);
+		bpp = info->cpp[i];
 
 		size = (height - 1) * cmd->pitches[i] +
 		       width * bpp + cmd->offsets[i];
diff --git a/drivers/gpu/drm/tegra/output.c b/drivers/gpu/drm/tegra/output.c
index 9c2b9dad55c3..e4d242ca27b8 100644
--- a/drivers/gpu/drm/tegra/output.c
+++ b/drivers/gpu/drm/tegra/output.c
@@ -53,18 +53,11 @@ tegra_output_connector_detect(struct drm_connector *connector, bool force)
 	struct tegra_output *output = connector_to_output(connector);
 	enum drm_connector_status status = connector_status_unknown;
 
-	if (gpio_is_valid(output->hpd_gpio)) {
-		if (output->hpd_gpio_flags & OF_GPIO_ACTIVE_LOW) {
-			if (gpio_get_value(output->hpd_gpio) != 0)
-				status = connector_status_disconnected;
-			else
-				status = connector_status_connected;
-		} else {
-			if (gpio_get_value(output->hpd_gpio) == 0)
-				status = connector_status_disconnected;
-			else
-				status = connector_status_connected;
-		}
+	if (output->hpd_gpio) {
+		if (gpiod_get_value(output->hpd_gpio) == 0)
+			status = connector_status_disconnected;
+		else
+			status = connector_status_connected;
 	} else {
 		if (!output->panel)
 			status = connector_status_disconnected;
@@ -102,6 +95,7 @@ static irqreturn_t hpd_irq(int irq, void *data)
 int tegra_output_probe(struct tegra_output *output)
 {
 	struct device_node *ddc, *panel;
+	unsigned long flags;
 	int err, size;
 
 	if (!output->of_node)
@@ -130,23 +124,18 @@ int tegra_output_probe(struct tegra_output *output)
 		of_node_put(ddc);
 	}
 
-	output->hpd_gpio = of_get_named_gpio_flags(output->of_node,
-						   "nvidia,hpd-gpio", 0,
-						   &output->hpd_gpio_flags);
-	if (gpio_is_valid(output->hpd_gpio)) {
-		unsigned long flags;
+	output->hpd_gpio = devm_gpiod_get_from_of_node(output->dev,
+						       output->of_node,
+						       "nvidia,hpd-gpio", 0,
+						       GPIOD_IN,
+						       "HDMI hotplug detect");
+	if (IS_ERR(output->hpd_gpio))
+		return PTR_ERR(output->hpd_gpio);
 
-		err = gpio_request_one(output->hpd_gpio, GPIOF_DIR_IN,
-				       "HDMI hotplug detect");
+	if (output->hpd_gpio) {
+		err = gpiod_to_irq(output->hpd_gpio);
 		if (err < 0) {
-			dev_err(output->dev, "gpio_request_one(): %d\n", err);
-			return err;
-		}
-
-		err = gpio_to_irq(output->hpd_gpio);
-		if (err < 0) {
-			dev_err(output->dev, "gpio_to_irq(): %d\n", err);
-			gpio_free(output->hpd_gpio);
+			dev_err(output->dev, "gpiod_to_irq(): %d\n", err);
 			return err;
 		}
 
@@ -160,7 +149,6 @@ int tegra_output_probe(struct tegra_output *output)
 		if (err < 0) {
 			dev_err(output->dev, "failed to request IRQ#%u: %d\n",
 				output->hpd_irq, err);
-			gpio_free(output->hpd_gpio);
 			return err;
 		}
 
@@ -186,10 +174,8 @@ void tegra_output_remove(struct tegra_output *output)
 	if (output->cec)
 		cec_notifier_put(output->cec);
 
-	if (gpio_is_valid(output->hpd_gpio)) {
+	if (output->hpd_gpio)
 		free_irq(output->hpd_irq, output);
-		gpio_free(output->hpd_gpio);
-	}
 
 	if (output->ddc)
 		put_device(&output->ddc->dev);
@@ -209,7 +195,7 @@ int tegra_output_init(struct drm_device *drm, struct tegra_output *output)
 	 * The connector is now registered and ready to receive hotplug events
 	 * so the hotplug interrupt can be enabled.
 	 */
-	if (gpio_is_valid(output->hpd_gpio))
+	if (output->hpd_gpio)
 		enable_irq(output->hpd_irq);
 
 	return 0;
@@ -221,7 +207,7 @@ void tegra_output_exit(struct tegra_output *output)
 	 * The connector is going away, so the interrupt must be disabled to
 	 * prevent the hotplug interrupt handler from potentially crashing.
 	 */
-	if (gpio_is_valid(output->hpd_gpio))
+	if (output->hpd_gpio)
 		disable_irq(output->hpd_irq);
 
 	if (output->panel)
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 2845fceb2fbd..6953dd264172 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -166,28 +166,35 @@ static void ttm_bo_release_list(struct kref *list_kref)
 	ttm_mem_global_free(bdev->glob->mem_glob, acc_size);
 }
 
-void ttm_bo_add_to_lru(struct ttm_buffer_object *bo)
+static void ttm_bo_add_mem_to_lru(struct ttm_buffer_object *bo,
+				  struct ttm_mem_reg *mem)
 {
 	struct ttm_bo_device *bdev = bo->bdev;
 	struct ttm_mem_type_manager *man;
 
 	reservation_object_assert_held(bo->resv);
 
-	if (!(bo->mem.placement & TTM_PL_FLAG_NO_EVICT)) {
-		BUG_ON(!list_empty(&bo->lru));
+	if (!list_empty(&bo->lru))
+		return;
 
-		man = &bdev->man[bo->mem.mem_type];
-		list_add_tail(&bo->lru, &man->lru[bo->priority]);
-		kref_get(&bo->list_kref);
+	if (mem->placement & TTM_PL_FLAG_NO_EVICT)
+		return;
 
-		if (bo->ttm && !(bo->ttm->page_flags &
-				 (TTM_PAGE_FLAG_SG | TTM_PAGE_FLAG_SWAPPED))) {
-			list_add_tail(&bo->swap,
-				      &bdev->glob->swap_lru[bo->priority]);
-			kref_get(&bo->list_kref);
-		}
+	man = &bdev->man[mem->mem_type];
+	list_add_tail(&bo->lru, &man->lru[bo->priority]);
+	kref_get(&bo->list_kref);
+
+	if (bo->ttm && !(bo->ttm->page_flags &
+			 (TTM_PAGE_FLAG_SG | TTM_PAGE_FLAG_SWAPPED))) {
+		list_add_tail(&bo->swap, &bdev->glob->swap_lru[bo->priority]);
+		kref_get(&bo->list_kref);
 	}
 }
+
+void ttm_bo_add_to_lru(struct ttm_buffer_object *bo)
+{
+	ttm_bo_add_mem_to_lru(bo, &bo->mem);
+}
 EXPORT_SYMBOL(ttm_bo_add_to_lru);
 
 static void ttm_bo_ref_bug(struct kref *list_kref)
@@ -766,32 +773,72 @@ EXPORT_SYMBOL(ttm_bo_eviction_valuable);
  * b. Otherwise, trylock it.
  */
 static bool ttm_bo_evict_swapout_allowable(struct ttm_buffer_object *bo,
-			struct ttm_operation_ctx *ctx, bool *locked)
+			struct ttm_operation_ctx *ctx, bool *locked, bool *busy)
 {
 	bool ret = false;
 
-	*locked = false;
 	if (bo->resv == ctx->resv) {
 		reservation_object_assert_held(bo->resv);
 		if (ctx->flags & TTM_OPT_FLAG_ALLOW_RES_EVICT
 		    || !list_empty(&bo->ddestroy))
 			ret = true;
+		*locked = false;
+		if (busy)
+			*busy = false;
 	} else {
-		*locked = reservation_object_trylock(bo->resv);
-		ret = *locked;
+		ret = reservation_object_trylock(bo->resv);
+		*locked = ret;
+		if (busy)
+			*busy = !ret;
 	}
 
 	return ret;
 }
 
+/**
+ * ttm_mem_evict_wait_busy - wait for a busy BO to become available
+ *
+ * @busy_bo: BO which couldn't be locked with trylock
+ * @ctx: operation context
+ * @ticket: acquire ticket
+ *
+ * Try to lock a busy buffer object to avoid failing eviction.
+ */
+static int ttm_mem_evict_wait_busy(struct ttm_buffer_object *busy_bo,
+				   struct ttm_operation_ctx *ctx,
+				   struct ww_acquire_ctx *ticket)
+{
+	int r;
+
+	if (!busy_bo || !ticket)
+		return -EBUSY;
+
+	if (ctx->interruptible)
+		r = reservation_object_lock_interruptible(busy_bo->resv,
+							  ticket);
+	else
+		r = reservation_object_lock(busy_bo->resv, ticket);
+
+	/*
+	 * TODO: It would be better to keep the BO locked until allocation is at
+	 * least tried one more time, but that would mean a much larger rework
+	 * of TTM.
+	 */
+	if (!r)
+		reservation_object_unlock(busy_bo->resv);
+
+	return r == -EDEADLK ? -EAGAIN : r;
+}
+
 static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
 			       uint32_t mem_type,
 			       const struct ttm_place *place,
-			       struct ttm_operation_ctx *ctx)
+			       struct ttm_operation_ctx *ctx,
+			       struct ww_acquire_ctx *ticket)
 {
+	struct ttm_buffer_object *bo = NULL, *busy_bo = NULL;
 	struct ttm_bo_global *glob = bdev->glob;
 	struct ttm_mem_type_manager *man = &bdev->man[mem_type];
-	struct ttm_buffer_object *bo = NULL;
 	bool locked = false;
 	unsigned i;
 	int ret;
@@ -799,8 +846,15 @@ static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
 	spin_lock(&glob->lru_lock);
 	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
 		list_for_each_entry(bo, &man->lru[i], lru) {
-			if (!ttm_bo_evict_swapout_allowable(bo, ctx, &locked))
+			bool busy;
+
+			if (!ttm_bo_evict_swapout_allowable(bo, ctx, &locked,
+							    &busy)) {
+				if (busy && !busy_bo &&
+				    bo->resv->lock.ctx != ticket)
+					busy_bo = bo;
 				continue;
+			}
 
 			if (place && !bdev->driver->eviction_valuable(bo,
 								      place)) {
@@ -819,8 +873,13 @@ static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
 	}
 
 	if (!bo) {
+		if (busy_bo)
+			ttm_bo_get(busy_bo);
 		spin_unlock(&glob->lru_lock);
-		return -EBUSY;
+		ret = ttm_mem_evict_wait_busy(busy_bo, ctx, ticket);
+		if (busy_bo)
+			ttm_bo_put(busy_bo);
+		return ret;
 	}
 
 	kref_get(&bo->list_kref);
@@ -892,13 +951,12 @@ static int ttm_bo_add_move_fence(struct ttm_buffer_object *bo,
  * space, or we've evicted everything and there isn't enough space.
  */
 static int ttm_bo_mem_force_space(struct ttm_buffer_object *bo,
-					uint32_t mem_type,
-					const struct ttm_place *place,
-					struct ttm_mem_reg *mem,
-					struct ttm_operation_ctx *ctx)
+				  const struct ttm_place *place,
+				  struct ttm_mem_reg *mem,
+				  struct ttm_operation_ctx *ctx)
 {
 	struct ttm_bo_device *bdev = bo->bdev;
-	struct ttm_mem_type_manager *man = &bdev->man[mem_type];
+	struct ttm_mem_type_manager *man = &bdev->man[mem->mem_type];
 	int ret;
 
 	do {
@@ -907,11 +965,12 @@ static int ttm_bo_mem_force_space(struct ttm_buffer_object *bo,
 			return ret;
 		if (mem->mm_node)
 			break;
-		ret = ttm_mem_evict_first(bdev, mem_type, place, ctx);
+		ret = ttm_mem_evict_first(bdev, mem->mem_type, place, ctx,
+					  bo->resv->lock.ctx);
 		if (unlikely(ret != 0))
 			return ret;
 	} while (1);
-	mem->mem_type = mem_type;
+
 	return ttm_bo_add_move_fence(bo, man, mem);
 }
 
@@ -960,6 +1019,59 @@ static bool ttm_bo_mt_compatible(struct ttm_mem_type_manager *man,
 }
 
 /**
+ * ttm_bo_mem_placement - check if placement is compatible
+ * @bo: BO to find memory for
+ * @place: where to search
+ * @mem: the memory object to fill in
+ * @ctx: operation context
+ *
+ * Check if placement is compatible and fill in mem structure.
+ * Returns -EBUSY if placement won't work or negative error code.
+ * 0 when placement can be used.
+ */
+static int ttm_bo_mem_placement(struct ttm_buffer_object *bo,
+				const struct ttm_place *place,
+				struct ttm_mem_reg *mem,
+				struct ttm_operation_ctx *ctx)
+{
+	struct ttm_bo_device *bdev = bo->bdev;
+	uint32_t mem_type = TTM_PL_SYSTEM;
+	struct ttm_mem_type_manager *man;
+	uint32_t cur_flags = 0;
+	int ret;
+
+	ret = ttm_mem_type_from_place(place, &mem_type);
+	if (ret)
+		return ret;
+
+	man = &bdev->man[mem_type];
+	if (!man->has_type || !man->use_type)
+		return -EBUSY;
+
+	if (!ttm_bo_mt_compatible(man, mem_type, place, &cur_flags))
+		return -EBUSY;
+
+	cur_flags = ttm_bo_select_caching(man, bo->mem.placement, cur_flags);
+	/*
+	 * Use the access and other non-mapping-related flag bits from
+	 * the memory placement flags to the current flags
+	 */
+	ttm_flag_masked(&cur_flags, place->flags, ~TTM_PL_MASK_MEMTYPE);
+
+	mem->mem_type = mem_type;
+	mem->placement = cur_flags;
+
+	if (bo->mem.mem_type < mem_type && !list_empty(&bo->lru)) {
+		spin_lock(&bo->bdev->glob->lru_lock);
+		ttm_bo_del_from_lru(bo);
+		ttm_bo_add_mem_to_lru(bo, mem);
+		spin_unlock(&bo->bdev->glob->lru_lock);
+	}
+
+	return 0;
+}
+
+/**
  * Creates space for memory region @mem according to its type.
  *
  * This function first searches for free space in compatible memory types in
@@ -973,12 +1085,7 @@ int ttm_bo_mem_space(struct ttm_buffer_object *bo,
 			struct ttm_operation_ctx *ctx)
 {
 	struct ttm_bo_device *bdev = bo->bdev;
-	struct ttm_mem_type_manager *man;
-	uint32_t mem_type = TTM_PL_SYSTEM;
-	uint32_t cur_flags = 0;
 	bool type_found = false;
-	bool type_ok = false;
-	bool has_erestartsys = false;
 	int i, ret;
 
 	ret = reservation_object_reserve_shared(bo->resv, 1);
@@ -988,97 +1095,70 @@ int ttm_bo_mem_space(struct ttm_buffer_object *bo,
 	mem->mm_node = NULL;
 	for (i = 0; i < placement->num_placement; ++i) {
 		const struct ttm_place *place = &placement->placement[i];
+		struct ttm_mem_type_manager *man;
 
-		ret = ttm_mem_type_from_place(place, &mem_type);
-		if (ret)
-			return ret;
-		man = &bdev->man[mem_type];
-		if (!man->has_type || !man->use_type)
-			continue;
-
-		type_ok = ttm_bo_mt_compatible(man, mem_type, place,
-						&cur_flags);
-
-		if (!type_ok)
+		ret = ttm_bo_mem_placement(bo, place, mem, ctx);
+		if (ret == -EBUSY)
 			continue;
+		if (ret)
+			goto error;
 
 		type_found = true;
-		cur_flags = ttm_bo_select_caching(man, bo->mem.placement,
-						  cur_flags);
-		/*
-		 * Use the access and other non-mapping-related flag bits from
-		 * the memory placement flags to the current flags
-		 */
-		ttm_flag_masked(&cur_flags, place->flags,
-				~TTM_PL_MASK_MEMTYPE);
-
-		if (mem_type == TTM_PL_SYSTEM)
-			break;
+		mem->mm_node = NULL;
+		if (mem->mem_type == TTM_PL_SYSTEM)
+			return 0;
 
+		man = &bdev->man[mem->mem_type];
 		ret = (*man->func->get_node)(man, bo, place, mem);
 		if (unlikely(ret))
-			return ret;
+			goto error;
 
 		if (mem->mm_node) {
 			ret = ttm_bo_add_move_fence(bo, man, mem);
 			if (unlikely(ret)) {
 				(*man->func->put_node)(man, mem);
-				return ret;
+				goto error;
 			}
-			break;
+			return 0;
 		}
 	}
 
-	if ((type_ok && (mem_type == TTM_PL_SYSTEM)) || mem->mm_node) {
-		mem->mem_type = mem_type;
-		mem->placement = cur_flags;
-		return 0;
-	}
-
 	for (i = 0; i < placement->num_busy_placement; ++i) {
 		const struct ttm_place *place = &placement->busy_placement[i];
 
-		ret = ttm_mem_type_from_place(place, &mem_type);
-		if (ret)
-			return ret;
-		man = &bdev->man[mem_type];
-		if (!man->has_type || !man->use_type)
-			continue;
-		if (!ttm_bo_mt_compatible(man, mem_type, place, &cur_flags))
+		ret = ttm_bo_mem_placement(bo, place, mem, ctx);
+		if (ret == -EBUSY)
 			continue;
+		if (ret)
+			goto error;
 
 		type_found = true;
-		cur_flags = ttm_bo_select_caching(man, bo->mem.placement,
-						  cur_flags);
-		/*
-		 * Use the access and other non-mapping-related flag bits from
-		 * the memory placement flags to the current flags
-		 */
-		ttm_flag_masked(&cur_flags, place->flags,
-				~TTM_PL_MASK_MEMTYPE);
-
-		if (mem_type == TTM_PL_SYSTEM) {
-			mem->mem_type = mem_type;
-			mem->placement = cur_flags;
-			mem->mm_node = NULL;
+		mem->mm_node = NULL;
+		if (mem->mem_type == TTM_PL_SYSTEM)
 			return 0;
-		}
 
-		ret = ttm_bo_mem_force_space(bo, mem_type, place, mem, ctx);
-		if (ret == 0 && mem->mm_node) {
-			mem->placement = cur_flags;
+		ret = ttm_bo_mem_force_space(bo, place, mem, ctx);
+		if (ret == 0 && mem->mm_node)
 			return 0;
-		}
-		if (ret == -ERESTARTSYS)
-			has_erestartsys = true;
+
+		if (ret && ret != -EBUSY)
+			goto error;
 	}
 
+	ret = -ENOMEM;
 	if (!type_found) {
 		pr_err(TTM_PFX "No compatible memory type found\n");
-		return -EINVAL;
+		ret = -EINVAL;
+	}
+
+error:
+	if (bo->mem.mem_type == TTM_PL_SYSTEM && !list_empty(&bo->lru)) {
+		spin_lock(&bo->bdev->glob->lru_lock);
+		ttm_bo_move_to_lru_tail(bo, NULL);
+		spin_unlock(&bo->bdev->glob->lru_lock);
 	}
 
-	return (has_erestartsys) ? -ERESTARTSYS : -ENOMEM;
+	return ret;
 }
 EXPORT_SYMBOL(ttm_bo_mem_space);
 
@@ -1401,7 +1481,8 @@ static int ttm_bo_force_list_clean(struct ttm_bo_device *bdev,
 	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
 		while (!list_empty(&man->lru[i])) {
 			spin_unlock(&glob->lru_lock);
-			ret = ttm_mem_evict_first(bdev, mem_type, NULL, &ctx);
+			ret = ttm_mem_evict_first(bdev, mem_type, NULL, &ctx,
+						  NULL);
 			if (ret)
 				return ret;
 			spin_lock(&glob->lru_lock);
@@ -1658,6 +1739,7 @@ int ttm_bo_device_init(struct ttm_bo_device *bdev,
 	mutex_lock(&ttm_global_mutex);
 	list_add_tail(&bdev->device_list, &glob->device_list);
 	mutex_unlock(&ttm_global_mutex);
+	bdev->vm_ops = &ttm_bo_vm_ops;
 
 	return 0;
 out_no_sys:
@@ -1772,7 +1854,8 @@ int ttm_bo_swapout(struct ttm_bo_global *glob, struct ttm_operation_ctx *ctx)
 	spin_lock(&glob->lru_lock);
 	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
 		list_for_each_entry(bo, &glob->swap_lru[i], swap) {
-			if (ttm_bo_evict_swapout_allowable(bo, ctx, &locked)) {
+			if (ttm_bo_evict_swapout_allowable(bo, ctx, &locked,
+							   NULL)) {
 				ret = 0;
 				break;
 			}
diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c
index 895d77d799e4..9f918b992f7e 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_util.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
@@ -539,13 +539,13 @@ pgprot_t ttm_io_prot(uint32_t caching_flags, pgprot_t tmp)
 		tmp = pgprot_noncached(tmp);
 #endif
 #if defined(__ia64__) || defined(__arm__) || defined(__aarch64__) || \
-    defined(__powerpc__)
+    defined(__powerpc__) || defined(__mips__)
 	if (caching_flags & TTM_PL_FLAG_WC)
 		tmp = pgprot_writecombine(tmp);
 	else
 		tmp = pgprot_noncached(tmp);
 #endif
-#if defined(__sparc__) || defined(__mips__)
+#if defined(__sparc__)
 	tmp = pgprot_noncached(tmp);
 #endif
 	return tmp;
diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c
index 6dacff49c1cc..0c4576cbafcf 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
@@ -42,8 +42,6 @@
 #include <linux/uaccess.h>
 #include <linux/mem_encrypt.h>
 
-#define TTM_BO_VM_NUM_PREFAULT 16
-
 static vm_fault_t ttm_bo_vm_fault_idle(struct ttm_buffer_object *bo,
 				struct vm_fault *vmf)
 {
@@ -106,25 +104,30 @@ static unsigned long ttm_bo_io_mem_pfn(struct ttm_buffer_object *bo,
 		+ page_offset;
 }
 
-static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
+/**
+ * ttm_bo_vm_reserve - Reserve a buffer object in a retryable vm callback
+ * @bo: The buffer object
+ * @vmf: The fault structure handed to the callback
+ *
+ * vm callbacks like fault() and *_mkwrite() allow for the mm_sem to be dropped
+ * during long waits, and after the wait the callback will be restarted. This
+ * is to allow other threads using the same virtual memory space concurrent
+ * access to map(), unmap() completely unrelated buffer objects. TTM buffer
+ * object reservations sometimes wait for GPU and should therefore be
+ * considered long waits. This function reserves the buffer object interruptibly
+ * taking this into account. Starvation is avoided by the vm system not
+ * allowing too many repeated restarts.
+ * This function is intended to be used in customized fault() and _mkwrite()
+ * handlers.
+ *
+ * Return:
+ *    0 on success and the bo was reserved.
+ *    VM_FAULT_RETRY if blocking wait.
+ *    VM_FAULT_NOPAGE if blocking wait and retrying was not allowed.
+ */
+vm_fault_t ttm_bo_vm_reserve(struct ttm_buffer_object *bo,
+			     struct vm_fault *vmf)
 {
-	struct vm_area_struct *vma = vmf->vma;
-	struct ttm_buffer_object *bo = (struct ttm_buffer_object *)
-	    vma->vm_private_data;
-	struct ttm_bo_device *bdev = bo->bdev;
-	unsigned long page_offset;
-	unsigned long page_last;
-	unsigned long pfn;
-	struct ttm_tt *ttm = NULL;
-	struct page *page;
-	int err;
-	int i;
-	vm_fault_t ret = VM_FAULT_NOPAGE;
-	unsigned long address = vmf->address;
-	struct ttm_mem_type_manager *man =
-		&bdev->man[bo->mem.mem_type];
-	struct vm_area_struct cvma;
-
 	/*
 	 * Work around locking order reversal in fault / nopfn
 	 * between mmap_sem and bo_reserve: Perform a trylock operation
@@ -151,14 +154,55 @@ static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
 		return VM_FAULT_NOPAGE;
 	}
 
+	return 0;
+}
+EXPORT_SYMBOL(ttm_bo_vm_reserve);
+
+/**
+ * ttm_bo_vm_fault_reserved - TTM fault helper
+ * @vmf: The struct vm_fault given as argument to the fault callback
+ * @prot: The page protection to be used for this memory area.
+ * @num_prefault: Maximum number of prefault pages. The caller may want to
+ * specify this based on madvice settings and the size of the GPU object
+ * backed by the memory.
+ *
+ * This function inserts one or more page table entries pointing to the
+ * memory backing the buffer object, and then returns a return code
+ * instructing the caller to retry the page access.
+ *
+ * Return:
+ *   VM_FAULT_NOPAGE on success or pending signal
+ *   VM_FAULT_SIGBUS on unspecified error
+ *   VM_FAULT_OOM on out-of-memory
+ *   VM_FAULT_RETRY if retryable wait
+ */
+vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf,
+				    pgprot_t prot,
+				    pgoff_t num_prefault)
+{
+	struct vm_area_struct *vma = vmf->vma;
+	struct vm_area_struct cvma = *vma;
+	struct ttm_buffer_object *bo = (struct ttm_buffer_object *)
+	    vma->vm_private_data;
+	struct ttm_bo_device *bdev = bo->bdev;
+	unsigned long page_offset;
+	unsigned long page_last;
+	unsigned long pfn;
+	struct ttm_tt *ttm = NULL;
+	struct page *page;
+	int err;
+	pgoff_t i;
+	vm_fault_t ret = VM_FAULT_NOPAGE;
+	unsigned long address = vmf->address;
+	struct ttm_mem_type_manager *man =
+		&bdev->man[bo->mem.mem_type];
+
 	/*
 	 * Refuse to fault imported pages. This should be handled
 	 * (if at all) by redirecting mmap to the exporter.
 	 */
-	if (bo->ttm && (bo->ttm->page_flags & TTM_PAGE_FLAG_SG)) {
-		ret = VM_FAULT_SIGBUS;
-		goto out_unlock;
-	}
+	if (bo->ttm && (bo->ttm->page_flags & TTM_PAGE_FLAG_SG))
+		return VM_FAULT_SIGBUS;
 
 	if (bdev->driver->fault_reserve_notify) {
 		struct dma_fence *moving = dma_fence_get(bo->moving);
@@ -169,11 +213,9 @@ static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
 			break;
 		case -EBUSY:
 		case -ERESTARTSYS:
-			ret = VM_FAULT_NOPAGE;
-			goto out_unlock;
+			return VM_FAULT_NOPAGE;
 		default:
-			ret = VM_FAULT_SIGBUS;
-			goto out_unlock;
+			return VM_FAULT_SIGBUS;
 		}
 
 		if (bo->moving != moving) {
@@ -189,21 +231,12 @@ static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
 	 * move.
 	 */
 	ret = ttm_bo_vm_fault_idle(bo, vmf);
-	if (unlikely(ret != 0)) {
-		if (ret == VM_FAULT_RETRY &&
-		    !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) {
-			/* The BO has already been unreserved. */
-			return ret;
-		}
-
-		goto out_unlock;
-	}
+	if (unlikely(ret != 0))
+		return ret;
 
 	err = ttm_mem_io_lock(man, true);
-	if (unlikely(err != 0)) {
-		ret = VM_FAULT_NOPAGE;
-		goto out_unlock;
-	}
+	if (unlikely(err != 0))
+		return VM_FAULT_NOPAGE;
 	err = ttm_mem_io_reserve_vm(bo);
 	if (unlikely(err != 0)) {
 		ret = VM_FAULT_SIGBUS;
@@ -220,18 +253,8 @@ static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
 		goto out_io_unlock;
 	}
 
-	/*
-	 * Make a local vma copy to modify the page_prot member
-	 * and vm_flags if necessary. The vma parameter is protected
-	 * by mmap_sem in write mode.
-	 */
-	cvma = *vma;
-	cvma.vm_page_prot = vm_get_page_prot(cvma.vm_flags);
-
-	if (bo->mem.bus.is_iomem) {
-		cvma.vm_page_prot = ttm_io_prot(bo->mem.placement,
-						cvma.vm_page_prot);
-	} else {
+	cvma.vm_page_prot = ttm_io_prot(bo->mem.placement, prot);
+	if (!bo->mem.bus.is_iomem) {
 		struct ttm_operation_ctx ctx = {
 			.interruptible = false,
 			.no_wait_gpu = false,
@@ -240,24 +263,21 @@ static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
 		};
 
 		ttm = bo->ttm;
-		cvma.vm_page_prot = ttm_io_prot(bo->mem.placement,
-						cvma.vm_page_prot);
-
-		/* Allocate all page at once, most common usage */
-		if (ttm_tt_populate(ttm, &ctx)) {
+		if (ttm_tt_populate(bo->ttm, &ctx)) {
 			ret = VM_FAULT_OOM;
 			goto out_io_unlock;
 		}
+	} else {
+		/* Iomem should not be marked encrypted */
+		cvma.vm_page_prot = pgprot_decrypted(cvma.vm_page_prot);
 	}
 
 	/*
 	 * Speculatively prefault a number of pages. Only error on
 	 * first page.
 	 */
-	for (i = 0; i < TTM_BO_VM_NUM_PREFAULT; ++i) {
+	for (i = 0; i < num_prefault; ++i) {
 		if (bo->mem.bus.is_iomem) {
-			/* Iomem should not be marked encrypted */
-			cvma.vm_page_prot = pgprot_decrypted(cvma.vm_page_prot);
 			pfn = ttm_bo_io_mem_pfn(bo, page_offset);
 		} else {
 			page = ttm->pages[page_offset];
@@ -295,7 +315,26 @@ static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
 	ret = VM_FAULT_NOPAGE;
 out_io_unlock:
 	ttm_mem_io_unlock(man);
-out_unlock:
+	return ret;
+}
+EXPORT_SYMBOL(ttm_bo_vm_fault_reserved);
+
+static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
+{
+	struct vm_area_struct *vma = vmf->vma;
+	pgprot_t prot;
+	struct ttm_buffer_object *bo = vma->vm_private_data;
+	vm_fault_t ret;
+
+	ret = ttm_bo_vm_reserve(bo, vmf);
+	if (ret)
+		return ret;
+
+	prot = vm_get_page_prot(vma->vm_flags);
+	ret = ttm_bo_vm_fault_reserved(vmf, prot, TTM_BO_VM_NUM_PREFAULT);
+	if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT))
+		return ret;
+
 	reservation_object_unlock(bo->resv);
 	return ret;
 }
@@ -395,7 +434,7 @@ static int ttm_bo_vm_access(struct vm_area_struct *vma, unsigned long addr,
 	return ret;
 }
 
-static const struct vm_operations_struct ttm_bo_vm_ops = {
+const struct vm_operations_struct ttm_bo_vm_ops = {
 	.fault = ttm_bo_vm_fault,
 	.open = ttm_bo_vm_open,
 	.close = ttm_bo_vm_close,
@@ -448,7 +487,7 @@ int ttm_bo_mmap(struct file *filp, struct vm_area_struct *vma,
 	if (unlikely(ret != 0))
 		goto out_unref;
 
-	vma->vm_ops = &ttm_bo_vm_ops;
+	vma->vm_ops = bdev->vm_ops;
 
 	/*
 	 * Note: We're transferring the bo reference to
@@ -480,7 +519,7 @@ int ttm_fbdev_mmap(struct vm_area_struct *vma, struct ttm_buffer_object *bo)
 
 	ttm_bo_get(bo);
 
-	vma->vm_ops = &ttm_bo_vm_ops;
+	vma->vm_ops = bo->bdev->vm_ops;
 	vma->vm_private_data = bo;
 	vma->vm_flags |= VM_MIXEDMAP;
 	vma->vm_flags |= VM_IO | VM_DONTEXPAND;
diff --git a/drivers/gpu/drm/ttm/ttm_execbuf_util.c b/drivers/gpu/drm/ttm/ttm_execbuf_util.c
index 0075eb9a0b52..957ec375a4ba 100644
--- a/drivers/gpu/drm/ttm/ttm_execbuf_util.c
+++ b/drivers/gpu/drm/ttm/ttm_execbuf_util.c
@@ -69,7 +69,8 @@ void ttm_eu_backoff_reservation(struct ww_acquire_ctx *ticket,
 	list_for_each_entry(entry, list, head) {
 		struct ttm_buffer_object *bo = entry->bo;
 
-		ttm_bo_add_to_lru(bo);
+		if (list_empty(&bo->lru))
+			ttm_bo_add_to_lru(bo);
 		reservation_object_unlock(bo->resv);
 	}
 	spin_unlock(&glob->lru_lock);
@@ -93,7 +94,7 @@ EXPORT_SYMBOL(ttm_eu_backoff_reservation);
 
 int ttm_eu_reserve_buffers(struct ww_acquire_ctx *ticket,
 			   struct list_head *list, bool intr,
-			   struct list_head *dups)
+			   struct list_head *dups, bool del_lru)
 {
 	struct ttm_bo_global *glob;
 	struct ttm_validate_buffer *entry;
@@ -172,11 +173,11 @@ int ttm_eu_reserve_buffers(struct ww_acquire_ctx *ticket,
 		list_add(&entry->head, list);
 	}
 
-	if (ticket)
-		ww_acquire_done(ticket);
-	spin_lock(&glob->lru_lock);
-	ttm_eu_del_from_lru_locked(list);
-	spin_unlock(&glob->lru_lock);
+	if (del_lru) {
+		spin_lock(&glob->lru_lock);
+		ttm_eu_del_from_lru_locked(list);
+		spin_unlock(&glob->lru_lock);
+	}
 	return 0;
 }
 EXPORT_SYMBOL(ttm_eu_reserve_buffers);
@@ -203,7 +204,10 @@ void ttm_eu_fence_buffer_objects(struct ww_acquire_ctx *ticket,
 			reservation_object_add_shared_fence(bo->resv, fence);
 		else
 			reservation_object_add_excl_fence(bo->resv, fence);
-		ttm_bo_add_to_lru(bo);
+		if (list_empty(&bo->lru))
+			ttm_bo_add_to_lru(bo);
+		else
+			ttm_bo_move_to_lru_tail(bo, NULL);
 		reservation_object_unlock(bo->resv);
 	}
 	spin_unlock(&glob->lru_lock);
diff --git a/drivers/gpu/drm/v3d/v3d_debugfs.c b/drivers/gpu/drm/v3d/v3d_debugfs.c
index a24af2d2f574..78a78938e81f 100644
--- a/drivers/gpu/drm/v3d/v3d_debugfs.c
+++ b/drivers/gpu/drm/v3d/v3d_debugfs.c
@@ -26,6 +26,11 @@ static const struct v3d_reg_def v3d_hub_reg_defs[] = {
 	REGDEF(V3D_HUB_IDENT3),
 	REGDEF(V3D_HUB_INT_STS),
 	REGDEF(V3D_HUB_INT_MSK_STS),
+
+	REGDEF(V3D_MMU_CTL),
+	REGDEF(V3D_MMU_VIO_ADDR),
+	REGDEF(V3D_MMU_VIO_ID),
+	REGDEF(V3D_MMU_DEBUG_INFO),
 };
 
 static const struct v3d_reg_def v3d_gca_reg_defs[] = {
@@ -50,12 +55,25 @@ static const struct v3d_reg_def v3d_core_reg_defs[] = {
 	REGDEF(V3D_PTB_BPCA),
 	REGDEF(V3D_PTB_BPCS),
 
-	REGDEF(V3D_MMU_CTL),
-	REGDEF(V3D_MMU_VIO_ADDR),
-
 	REGDEF(V3D_GMP_STATUS),
 	REGDEF(V3D_GMP_CFG),
 	REGDEF(V3D_GMP_VIO_ADDR),
+
+	REGDEF(V3D_ERR_FDBGO),
+	REGDEF(V3D_ERR_FDBGB),
+	REGDEF(V3D_ERR_FDBGS),
+	REGDEF(V3D_ERR_STAT),
+};
+
+static const struct v3d_reg_def v3d_csd_reg_defs[] = {
+	REGDEF(V3D_CSD_STATUS),
+	REGDEF(V3D_CSD_CURRENT_CFG0),
+	REGDEF(V3D_CSD_CURRENT_CFG1),
+	REGDEF(V3D_CSD_CURRENT_CFG2),
+	REGDEF(V3D_CSD_CURRENT_CFG3),
+	REGDEF(V3D_CSD_CURRENT_CFG4),
+	REGDEF(V3D_CSD_CURRENT_CFG5),
+	REGDEF(V3D_CSD_CURRENT_CFG6),
 };
 
 static int v3d_v3d_debugfs_regs(struct seq_file *m, void *unused)
@@ -89,6 +107,17 @@ static int v3d_v3d_debugfs_regs(struct seq_file *m, void *unused)
 				   V3D_CORE_READ(core,
 						 v3d_core_reg_defs[i].reg));
 		}
+
+		if (v3d_has_csd(v3d)) {
+			for (i = 0; i < ARRAY_SIZE(v3d_csd_reg_defs); i++) {
+				seq_printf(m, "core %d %s (0x%04x): 0x%08x\n",
+					   core,
+					   v3d_csd_reg_defs[i].name,
+					   v3d_csd_reg_defs[i].reg,
+					   V3D_CORE_READ(core,
+							 v3d_csd_reg_defs[i].reg));
+			}
+		}
 	}
 
 	return 0;
diff --git a/drivers/gpu/drm/v3d/v3d_drv.c b/drivers/gpu/drm/v3d/v3d_drv.c
index a06b05f714a5..fea597f4db8a 100644
--- a/drivers/gpu/drm/v3d/v3d_drv.c
+++ b/drivers/gpu/drm/v3d/v3d_drv.c
@@ -7,9 +7,9 @@
  * This driver supports the Broadcom V3D 3.3 and 4.1 OpenGL ES GPUs.
  * For V3D 2.x support, see the VC4 driver.
  *
- * Currently only single-core rendering using the binner and renderer,
- * along with TFU (texture formatting unit) rendering is supported.
- * V3D 4.x's CSD (compute shader dispatch) is not yet supported.
+ * The V3D GPU includes a tiled render (composed of a bin and render
+ * pipelines), the TFU (texture formatting unit), and the CSD (compute
+ * shader dispatch).
  */
 
 #include <linux/clk.h>
@@ -120,6 +120,9 @@ static int v3d_get_param_ioctl(struct drm_device *dev, void *data,
 	case DRM_V3D_PARAM_SUPPORTS_TFU:
 		args->value = 1;
 		return 0;
+	case DRM_V3D_PARAM_SUPPORTS_CSD:
+		args->value = v3d_has_csd(v3d);
+		return 0;
 	default:
 		DRM_DEBUG("Unknown parameter %d\n", args->param);
 		return -EINVAL;
@@ -179,6 +182,7 @@ static const struct drm_ioctl_desc v3d_drm_ioctls[] = {
 	DRM_IOCTL_DEF_DRV(V3D_GET_PARAM, v3d_get_param_ioctl, DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(V3D_GET_BO_OFFSET, v3d_get_bo_offset_ioctl, DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(V3D_SUBMIT_TFU, v3d_submit_tfu_ioctl, DRM_RENDER_ALLOW | DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(V3D_SUBMIT_CSD, v3d_submit_csd_ioctl, DRM_RENDER_ALLOW | DRM_AUTH),
 };
 
 static struct drm_driver v3d_drm_driver = {
@@ -235,9 +239,9 @@ static int v3d_platform_drm_probe(struct platform_device *pdev)
 	struct drm_device *drm;
 	struct v3d_dev *v3d;
 	int ret;
+	u32 mmu_debug;
 	u32 ident1;
 
-	dev->coherent_dma_mask = DMA_BIT_MASK(36);
 
 	v3d = kzalloc(sizeof(*v3d), GFP_KERNEL);
 	if (!v3d)
@@ -254,6 +258,11 @@ static int v3d_platform_drm_probe(struct platform_device *pdev)
 	if (ret)
 		goto dev_free;
 
+	mmu_debug = V3D_READ(V3D_MMU_DEBUG_INFO);
+	dev->coherent_dma_mask =
+		DMA_BIT_MASK(30 + V3D_GET_FIELD(mmu_debug, V3D_MMU_PA_WIDTH));
+	v3d->va_width = 30 + V3D_GET_FIELD(mmu_debug, V3D_MMU_VA_WIDTH);
+
 	ident1 = V3D_READ(V3D_HUB_IDENT1);
 	v3d->ver = (V3D_GET_FIELD(ident1, V3D_HUB_IDENT1_TVER) * 10 +
 		    V3D_GET_FIELD(ident1, V3D_HUB_IDENT1_REV));
diff --git a/drivers/gpu/drm/v3d/v3d_drv.h b/drivers/gpu/drm/v3d/v3d_drv.h
index e9d4a2fdcf44..9aad9da1eb11 100644
--- a/drivers/gpu/drm/v3d/v3d_drv.h
+++ b/drivers/gpu/drm/v3d/v3d_drv.h
@@ -16,9 +16,11 @@ enum v3d_queue {
 	V3D_BIN,
 	V3D_RENDER,
 	V3D_TFU,
+	V3D_CSD,
+	V3D_CACHE_CLEAN,
 };
 
-#define V3D_MAX_QUEUES (V3D_TFU + 1)
+#define V3D_MAX_QUEUES (V3D_CACHE_CLEAN + 1)
 
 struct v3d_queue_state {
 	struct drm_gpu_scheduler sched;
@@ -55,6 +57,8 @@ struct v3d_dev {
 	 */
 	void *mmu_scratch;
 	dma_addr_t mmu_scratch_paddr;
+	/* virtual address bits from V3D to the MMU. */
+	int va_width;
 
 	/* Number of V3D cores. */
 	u32 cores;
@@ -67,9 +71,10 @@ struct v3d_dev {
 
 	struct work_struct overflow_mem_work;
 
-	struct v3d_exec_info *bin_job;
-	struct v3d_exec_info *render_job;
+	struct v3d_bin_job *bin_job;
+	struct v3d_render_job *render_job;
 	struct v3d_tfu_job *tfu_job;
+	struct v3d_csd_job *csd_job;
 
 	struct v3d_queue_state queue[V3D_MAX_QUEUES];
 
@@ -92,6 +97,12 @@ struct v3d_dev {
 	 */
 	struct mutex sched_lock;
 
+	/* Lock taken during a cache clean and when initiating an L2
+	 * flush, to keep L2 flushes from interfering with the
+	 * synchronous L2 cleans.
+	 */
+	struct mutex cache_clean_lock;
+
 	struct {
 		u32 num_allocated;
 		u32 pages_allocated;
@@ -104,6 +115,12 @@ to_v3d_dev(struct drm_device *dev)
 	return (struct v3d_dev *)dev->dev_private;
 }
 
+static inline bool
+v3d_has_csd(struct v3d_dev *v3d)
+{
+	return v3d->ver >= 41;
+}
+
 /* The per-fd struct, which tracks the MMU mappings. */
 struct v3d_file_priv {
 	struct v3d_dev *v3d;
@@ -117,7 +134,7 @@ struct v3d_bo {
 	struct drm_mm_node node;
 
 	/* List entry for the BO's position in
-	 * v3d_exec_info->unref_list
+	 * v3d_render_job->unref_list
 	 */
 	struct list_head unref_head;
 };
@@ -157,67 +174,74 @@ to_v3d_fence(struct dma_fence *fence)
 struct v3d_job {
 	struct drm_sched_job base;
 
-	struct v3d_exec_info *exec;
+	struct kref refcount;
 
-	/* An optional fence userspace can pass in for the job to depend on. */
-	struct dma_fence *in_fence;
+	struct v3d_dev *v3d;
+
+	/* This is the array of BOs that were looked up at the start
+	 * of submission.
+	 */
+	struct drm_gem_object **bo;
+	u32 bo_count;
+
+	/* Array of struct dma_fence * to block on before submitting this job.
+	 */
+	struct xarray deps;
+	unsigned long last_dep;
 
 	/* v3d fence to be signaled by IRQ handler when the job is complete. */
 	struct dma_fence *irq_fence;
 
+	/* scheduler fence for when the job is considered complete and
+	 * the BO reservations can be released.
+	 */
+	struct dma_fence *done_fence;
+
+	/* Callback for the freeing of the job on refcount going to 0. */
+	void (*free)(struct kref *ref);
+};
+
+struct v3d_bin_job {
+	struct v3d_job base;
+
 	/* GPU virtual addresses of the start/end of the CL job. */
 	u32 start, end;
 
 	u32 timedout_ctca, timedout_ctra;
-};
 
-struct v3d_exec_info {
-	struct v3d_dev *v3d;
+	/* Corresponding render job, for attaching our overflow memory. */
+	struct v3d_render_job *render;
 
-	struct v3d_job bin, render;
-
-	/* Fence for when the scheduler considers the binner to be
-	 * done, for render to depend on.
-	 */
-	struct dma_fence *bin_done_fence;
+	/* Submitted tile memory allocation start/size, tile state. */
+	u32 qma, qms, qts;
+};
 
-	/* Fence for when the scheduler considers the render to be
-	 * done, for when the BOs reservations should be complete.
-	 */
-	struct dma_fence *render_done_fence;
+struct v3d_render_job {
+	struct v3d_job base;
 
-	struct kref refcount;
+	/* GPU virtual addresses of the start/end of the CL job. */
+	u32 start, end;
 
-	/* This is the array of BOs that were looked up at the start of exec. */
-	struct v3d_bo **bo;
-	u32 bo_count;
+	u32 timedout_ctca, timedout_ctra;
 
 	/* List of overflow BOs used in the job that need to be
 	 * released once the job is complete.
 	 */
 	struct list_head unref_list;
-
-	/* Submitted tile memory allocation start/size, tile state. */
-	u32 qma, qms, qts;
 };
 
 struct v3d_tfu_job {
-	struct drm_sched_job base;
+	struct v3d_job base;
 
 	struct drm_v3d_submit_tfu args;
+};
 
-	/* An optional fence userspace can pass in for the job to depend on. */
-	struct dma_fence *in_fence;
-
-	/* v3d fence to be signaled by IRQ handler when the job is complete. */
-	struct dma_fence *irq_fence;
-
-	struct v3d_dev *v3d;
+struct v3d_csd_job {
+	struct v3d_job base;
 
-	struct kref refcount;
+	u32 timedout_batches;
 
-	/* This is the array of BOs that were looked up at the start of exec. */
-	struct v3d_bo *bo[4];
+	struct drm_v3d_submit_csd args;
 };
 
 /**
@@ -281,12 +305,14 @@ int v3d_submit_cl_ioctl(struct drm_device *dev, void *data,
 			struct drm_file *file_priv);
 int v3d_submit_tfu_ioctl(struct drm_device *dev, void *data,
 			 struct drm_file *file_priv);
+int v3d_submit_csd_ioctl(struct drm_device *dev, void *data,
+			 struct drm_file *file_priv);
 int v3d_wait_bo_ioctl(struct drm_device *dev, void *data,
 		      struct drm_file *file_priv);
-void v3d_exec_put(struct v3d_exec_info *exec);
-void v3d_tfu_job_put(struct v3d_tfu_job *exec);
+void v3d_job_put(struct v3d_job *job);
 void v3d_reset(struct v3d_dev *v3d);
 void v3d_invalidate_caches(struct v3d_dev *v3d);
+void v3d_clean_caches(struct v3d_dev *v3d);
 
 /* v3d_irq.c */
 int v3d_irq_init(struct v3d_dev *v3d);
diff --git a/drivers/gpu/drm/v3d/v3d_fence.c b/drivers/gpu/drm/v3d/v3d_fence.c
index b0a2a1ae2eb1..89840ed212c0 100644
--- a/drivers/gpu/drm/v3d/v3d_fence.c
+++ b/drivers/gpu/drm/v3d/v3d_fence.c
@@ -36,6 +36,8 @@ static const char *v3d_fence_get_timeline_name(struct dma_fence *fence)
 		return "v3d-render";
 	case V3D_TFU:
 		return "v3d-tfu";
+	case V3D_CSD:
+		return "v3d-csd";
 	default:
 		return NULL;
 	}
diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c
index 93ff8fcbe475..27e0f87075d9 100644
--- a/drivers/gpu/drm/v3d/v3d_gem.c
+++ b/drivers/gpu/drm/v3d/v3d_gem.c
@@ -109,7 +109,9 @@ v3d_reset(struct v3d_dev *v3d)
 {
 	struct drm_device *dev = &v3d->drm;
 
-	DRM_ERROR("Resetting GPU.\n");
+	DRM_DEV_ERROR(dev->dev, "Resetting GPU for hang.\n");
+	DRM_DEV_ERROR(dev->dev, "V3D_ERR_STAT: 0x%08x\n",
+		      V3D_CORE_READ(0, V3D_ERR_STAT));
 	trace_v3d_reset_begin(dev);
 
 	/* XXX: only needed for safe powerdown, not reset. */
@@ -162,10 +164,52 @@ v3d_flush_l2t(struct v3d_dev *v3d, int core)
 	/* While there is a busy bit (V3D_L2TCACTL_L2TFLS), we don't
 	 * need to wait for completion before dispatching the job --
 	 * L2T accesses will be stalled until the flush has completed.
+	 * However, we do need to make sure we don't try to trigger a
+	 * new flush while the L2_CLEAN queue is trying to
+	 * synchronously clean after a job.
 	 */
+	mutex_lock(&v3d->cache_clean_lock);
 	V3D_CORE_WRITE(core, V3D_CTL_L2TCACTL,
 		       V3D_L2TCACTL_L2TFLS |
 		       V3D_SET_FIELD(V3D_L2TCACTL_FLM_FLUSH, V3D_L2TCACTL_FLM));
+	mutex_unlock(&v3d->cache_clean_lock);
+}
+
+/* Cleans texture L1 and L2 cachelines (writing back dirty data).
+ *
+ * For cleaning, which happens from the CACHE_CLEAN queue after CSD has
+ * executed, we need to make sure that the clean is done before
+ * signaling job completion.  So, we synchronously wait before
+ * returning, and we make sure that L2 invalidates don't happen in the
+ * meantime to confuse our are-we-done checks.
+ */
+void
+v3d_clean_caches(struct v3d_dev *v3d)
+{
+	struct drm_device *dev = &v3d->drm;
+	int core = 0;
+
+	trace_v3d_cache_clean_begin(dev);
+
+	V3D_CORE_WRITE(core, V3D_CTL_L2TCACTL, V3D_L2TCACTL_TMUWCF);
+	if (wait_for(!(V3D_CORE_READ(core, V3D_CTL_L2TCACTL) &
+		       V3D_L2TCACTL_L2TFLS), 100)) {
+		DRM_ERROR("Timeout waiting for L1T write combiner flush\n");
+	}
+
+	mutex_lock(&v3d->cache_clean_lock);
+	V3D_CORE_WRITE(core, V3D_CTL_L2TCACTL,
+		       V3D_L2TCACTL_L2TFLS |
+		       V3D_SET_FIELD(V3D_L2TCACTL_FLM_CLEAN, V3D_L2TCACTL_FLM));
+
+	if (wait_for(!(V3D_CORE_READ(core, V3D_CTL_L2TCACTL) &
+		       V3D_L2TCACTL_L2TFLS), 100)) {
+		DRM_ERROR("Timeout waiting for L2T clean\n");
+	}
+
+	mutex_unlock(&v3d->cache_clean_lock);
+
+	trace_v3d_cache_clean_end(dev);
 }
 
 /* Invalidates the slice caches.  These are read-only caches. */
@@ -193,28 +237,6 @@ v3d_invalidate_caches(struct v3d_dev *v3d)
 	v3d_invalidate_slices(v3d, 0);
 }
 
-static void
-v3d_attach_object_fences(struct v3d_bo **bos, int bo_count,
-			 struct dma_fence *fence)
-{
-	int i;
-
-	for (i = 0; i < bo_count; i++) {
-		/* XXX: Use shared fences for read-only objects. */
-		reservation_object_add_excl_fence(bos[i]->base.base.resv,
-						  fence);
-	}
-}
-
-static void
-v3d_unlock_bo_reservations(struct v3d_bo **bos,
-			   int bo_count,
-			   struct ww_acquire_ctx *acquire_ctx)
-{
-	drm_gem_unlock_reservations((struct drm_gem_object **)bos, bo_count,
-				    acquire_ctx);
-}
-
 /* Takes the reservation lock on all the BOs being referenced, so that
  * at queue submit time we can update the reservations.
  *
@@ -223,26 +245,21 @@ v3d_unlock_bo_reservations(struct v3d_bo **bos,
  * to v3d, so we don't attach dma-buf fences to them.
  */
 static int
-v3d_lock_bo_reservations(struct v3d_bo **bos,
-			 int bo_count,
+v3d_lock_bo_reservations(struct v3d_job *job,
 			 struct ww_acquire_ctx *acquire_ctx)
 {
 	int i, ret;
 
-	ret = drm_gem_lock_reservations((struct drm_gem_object **)bos,
-					bo_count, acquire_ctx);
+	ret = drm_gem_lock_reservations(job->bo, job->bo_count, acquire_ctx);
 	if (ret)
 		return ret;
 
-	/* Reserve space for our shared (read-only) fence references,
-	 * before we commit the CL to the hardware.
-	 */
-	for (i = 0; i < bo_count; i++) {
-		ret = reservation_object_reserve_shared(bos[i]->base.base.resv,
-							1);
+	for (i = 0; i < job->bo_count; i++) {
+		ret = drm_gem_fence_array_add_implicit(&job->deps,
+						       job->bo[i], true);
 		if (ret) {
-			v3d_unlock_bo_reservations(bos, bo_count,
-						   acquire_ctx);
+			drm_gem_unlock_reservations(job->bo, job->bo_count,
+						    acquire_ctx);
 			return ret;
 		}
 	}
@@ -251,11 +268,11 @@ v3d_lock_bo_reservations(struct v3d_bo **bos,
 }
 
 /**
- * v3d_cl_lookup_bos() - Sets up exec->bo[] with the GEM objects
+ * v3d_lookup_bos() - Sets up job->bo[] with the GEM objects
  * referenced by the job.
  * @dev: DRM device
  * @file_priv: DRM file for this fd
- * @exec: V3D job being set up
+ * @job: V3D job being set up
  *
  * The command validator needs to reference BOs by their index within
  * the submitted job's BO list.  This does the validation of the job's
@@ -265,18 +282,19 @@ v3d_lock_bo_reservations(struct v3d_bo **bos,
  * failure, because that will happen at v3d_exec_cleanup() time.
  */
 static int
-v3d_cl_lookup_bos(struct drm_device *dev,
-		  struct drm_file *file_priv,
-		  struct drm_v3d_submit_cl *args,
-		  struct v3d_exec_info *exec)
+v3d_lookup_bos(struct drm_device *dev,
+	       struct drm_file *file_priv,
+	       struct v3d_job *job,
+	       u64 bo_handles,
+	       u32 bo_count)
 {
 	u32 *handles;
 	int ret = 0;
 	int i;
 
-	exec->bo_count = args->bo_handle_count;
+	job->bo_count = bo_count;
 
-	if (!exec->bo_count) {
+	if (!job->bo_count) {
 		/* See comment on bo_index for why we have to check
 		 * this.
 		 */
@@ -284,15 +302,15 @@ v3d_cl_lookup_bos(struct drm_device *dev,
 		return -EINVAL;
 	}
 
-	exec->bo = kvmalloc_array(exec->bo_count,
-				  sizeof(struct drm_gem_cma_object *),
-				  GFP_KERNEL | __GFP_ZERO);
-	if (!exec->bo) {
+	job->bo = kvmalloc_array(job->bo_count,
+				 sizeof(struct drm_gem_cma_object *),
+				 GFP_KERNEL | __GFP_ZERO);
+	if (!job->bo) {
 		DRM_DEBUG("Failed to allocate validated BO pointers\n");
 		return -ENOMEM;
 	}
 
-	handles = kvmalloc_array(exec->bo_count, sizeof(u32), GFP_KERNEL);
+	handles = kvmalloc_array(job->bo_count, sizeof(u32), GFP_KERNEL);
 	if (!handles) {
 		ret = -ENOMEM;
 		DRM_DEBUG("Failed to allocate incoming GEM handles\n");
@@ -300,15 +318,15 @@ v3d_cl_lookup_bos(struct drm_device *dev,
 	}
 
 	if (copy_from_user(handles,
-			   (void __user *)(uintptr_t)args->bo_handles,
-			   exec->bo_count * sizeof(u32))) {
+			   (void __user *)(uintptr_t)bo_handles,
+			   job->bo_count * sizeof(u32))) {
 		ret = -EFAULT;
 		DRM_DEBUG("Failed to copy in GEM handles\n");
 		goto fail;
 	}
 
 	spin_lock(&file_priv->table_lock);
-	for (i = 0; i < exec->bo_count; i++) {
+	for (i = 0; i < job->bo_count; i++) {
 		struct drm_gem_object *bo = idr_find(&file_priv->object_idr,
 						     handles[i]);
 		if (!bo) {
@@ -319,7 +337,7 @@ v3d_cl_lookup_bos(struct drm_device *dev,
 			goto fail;
 		}
 		drm_gem_object_get(bo);
-		exec->bo[i] = to_v3d_bo(bo);
+		job->bo[i] = bo;
 	}
 	spin_unlock(&file_priv->table_lock);
 
@@ -329,67 +347,50 @@ fail:
 }
 
 static void
-v3d_exec_cleanup(struct kref *ref)
+v3d_job_free(struct kref *ref)
 {
-	struct v3d_exec_info *exec = container_of(ref, struct v3d_exec_info,
-						  refcount);
-	struct v3d_dev *v3d = exec->v3d;
-	unsigned int i;
-	struct v3d_bo *bo, *save;
-
-	dma_fence_put(exec->bin.in_fence);
-	dma_fence_put(exec->render.in_fence);
-
-	dma_fence_put(exec->bin.irq_fence);
-	dma_fence_put(exec->render.irq_fence);
-
-	dma_fence_put(exec->bin_done_fence);
-	dma_fence_put(exec->render_done_fence);
+	struct v3d_job *job = container_of(ref, struct v3d_job, refcount);
+	unsigned long index;
+	struct dma_fence *fence;
+	int i;
 
-	for (i = 0; i < exec->bo_count; i++)
-		drm_gem_object_put_unlocked(&exec->bo[i]->base.base);
-	kvfree(exec->bo);
+	for (i = 0; i < job->bo_count; i++) {
+		if (job->bo[i])
+			drm_gem_object_put_unlocked(job->bo[i]);
+	}
+	kvfree(job->bo);
 
-	list_for_each_entry_safe(bo, save, &exec->unref_list, unref_head) {
-		drm_gem_object_put_unlocked(&bo->base.base);
+	xa_for_each(&job->deps, index, fence) {
+		dma_fence_put(fence);
 	}
+	xa_destroy(&job->deps);
 
-	pm_runtime_mark_last_busy(v3d->dev);
-	pm_runtime_put_autosuspend(v3d->dev);
+	dma_fence_put(job->irq_fence);
+	dma_fence_put(job->done_fence);
 
-	kfree(exec);
-}
+	pm_runtime_mark_last_busy(job->v3d->dev);
+	pm_runtime_put_autosuspend(job->v3d->dev);
 
-void v3d_exec_put(struct v3d_exec_info *exec)
-{
-	kref_put(&exec->refcount, v3d_exec_cleanup);
+	kfree(job);
 }
 
 static void
-v3d_tfu_job_cleanup(struct kref *ref)
+v3d_render_job_free(struct kref *ref)
 {
-	struct v3d_tfu_job *job = container_of(ref, struct v3d_tfu_job,
-					       refcount);
-	struct v3d_dev *v3d = job->v3d;
-	unsigned int i;
-
-	dma_fence_put(job->in_fence);
-	dma_fence_put(job->irq_fence);
+	struct v3d_render_job *job = container_of(ref, struct v3d_render_job,
+						  base.refcount);
+	struct v3d_bo *bo, *save;
 
-	for (i = 0; i < ARRAY_SIZE(job->bo); i++) {
-		if (job->bo[i])
-			drm_gem_object_put_unlocked(&job->bo[i]->base.base);
+	list_for_each_entry_safe(bo, save, &job->unref_list, unref_head) {
+		drm_gem_object_put_unlocked(&bo->base.base);
 	}
 
-	pm_runtime_mark_last_busy(v3d->dev);
-	pm_runtime_put_autosuspend(v3d->dev);
-
-	kfree(job);
+	v3d_job_free(ref);
 }
 
-void v3d_tfu_job_put(struct v3d_tfu_job *job)
+void v3d_job_put(struct v3d_job *job)
 {
-	kref_put(&job->refcount, v3d_tfu_job_cleanup);
+	kref_put(&job->refcount, job->free);
 }
 
 int
@@ -425,6 +426,87 @@ v3d_wait_bo_ioctl(struct drm_device *dev, void *data,
 	return ret;
 }
 
+static int
+v3d_job_init(struct v3d_dev *v3d, struct drm_file *file_priv,
+	     struct v3d_job *job, void (*free)(struct kref *ref),
+	     u32 in_sync)
+{
+	struct dma_fence *in_fence = NULL;
+	int ret;
+
+	job->v3d = v3d;
+	job->free = free;
+
+	ret = pm_runtime_get_sync(v3d->dev);
+	if (ret < 0)
+		return ret;
+
+	xa_init_flags(&job->deps, XA_FLAGS_ALLOC);
+
+	ret = drm_syncobj_find_fence(file_priv, in_sync, 0, 0, &in_fence);
+	if (ret == -EINVAL)
+		goto fail;
+
+	ret = drm_gem_fence_array_add(&job->deps, in_fence);
+	if (ret)
+		goto fail;
+
+	kref_init(&job->refcount);
+
+	return 0;
+fail:
+	xa_destroy(&job->deps);
+	pm_runtime_put_autosuspend(v3d->dev);
+	return ret;
+}
+
+static int
+v3d_push_job(struct v3d_file_priv *v3d_priv,
+	     struct v3d_job *job, enum v3d_queue queue)
+{
+	int ret;
+
+	ret = drm_sched_job_init(&job->base, &v3d_priv->sched_entity[queue],
+				 v3d_priv);
+	if (ret)
+		return ret;
+
+	job->done_fence = dma_fence_get(&job->base.s_fence->finished);
+
+	/* put by scheduler job completion */
+	kref_get(&job->refcount);
+
+	drm_sched_entity_push_job(&job->base, &v3d_priv->sched_entity[queue]);
+
+	return 0;
+}
+
+static void
+v3d_attach_fences_and_unlock_reservation(struct drm_file *file_priv,
+					 struct v3d_job *job,
+					 struct ww_acquire_ctx *acquire_ctx,
+					 u32 out_sync,
+					 struct dma_fence *done_fence)
+{
+	struct drm_syncobj *sync_out;
+	int i;
+
+	for (i = 0; i < job->bo_count; i++) {
+		/* XXX: Use shared fences for read-only objects. */
+		reservation_object_add_excl_fence(job->bo[i]->resv,
+						  job->done_fence);
+	}
+
+	drm_gem_unlock_reservations(job->bo, job->bo_count, acquire_ctx);
+
+	/* Update the return sync object for the job */
+	sync_out = drm_syncobj_find(file_priv, out_sync);
+	if (sync_out) {
+		drm_syncobj_replace_fence(sync_out, done_fence);
+		drm_syncobj_put(sync_out);
+	}
+}
+
 /**
  * v3d_submit_cl_ioctl() - Submits a job (frame) to the V3D.
  * @dev: DRM device
@@ -444,9 +526,9 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data,
 	struct v3d_dev *v3d = to_v3d_dev(dev);
 	struct v3d_file_priv *v3d_priv = file_priv->driver_priv;
 	struct drm_v3d_submit_cl *args = data;
-	struct v3d_exec_info *exec;
+	struct v3d_bin_job *bin = NULL;
+	struct v3d_render_job *render;
 	struct ww_acquire_ctx acquire_ctx;
-	struct drm_syncobj *sync_out;
 	int ret = 0;
 
 	trace_v3d_submit_cl_ioctl(&v3d->drm, args->rcl_start, args->rcl_end);
@@ -456,100 +538,87 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data,
 		return -EINVAL;
 	}
 
-	exec = kcalloc(1, sizeof(*exec), GFP_KERNEL);
-	if (!exec)
+	render = kcalloc(1, sizeof(*render), GFP_KERNEL);
+	if (!render)
 		return -ENOMEM;
 
-	ret = pm_runtime_get_sync(v3d->dev);
-	if (ret < 0) {
-		kfree(exec);
+	render->start = args->rcl_start;
+	render->end = args->rcl_end;
+	INIT_LIST_HEAD(&render->unref_list);
+
+	ret = v3d_job_init(v3d, file_priv, &render->base,
+			   v3d_render_job_free, args->in_sync_rcl);
+	if (ret) {
+		kfree(render);
 		return ret;
 	}
 
-	kref_init(&exec->refcount);
+	if (args->bcl_start != args->bcl_end) {
+		bin = kcalloc(1, sizeof(*bin), GFP_KERNEL);
+		if (!bin)
+			return -ENOMEM;
 
-	ret = drm_syncobj_find_fence(file_priv, args->in_sync_bcl,
-				     0, 0, &exec->bin.in_fence);
-	if (ret == -EINVAL)
-		goto fail;
+		ret = v3d_job_init(v3d, file_priv, &bin->base,
+				   v3d_job_free, args->in_sync_bcl);
+		if (ret) {
+			v3d_job_put(&render->base);
+			return ret;
+		}
 
-	ret = drm_syncobj_find_fence(file_priv, args->in_sync_rcl,
-				     0, 0, &exec->render.in_fence);
-	if (ret == -EINVAL)
-		goto fail;
+		bin->start = args->bcl_start;
+		bin->end = args->bcl_end;
+		bin->qma = args->qma;
+		bin->qms = args->qms;
+		bin->qts = args->qts;
+		bin->render = render;
+	}
 
-	exec->qma = args->qma;
-	exec->qms = args->qms;
-	exec->qts = args->qts;
-	exec->bin.exec = exec;
-	exec->bin.start = args->bcl_start;
-	exec->bin.end = args->bcl_end;
-	exec->render.exec = exec;
-	exec->render.start = args->rcl_start;
-	exec->render.end = args->rcl_end;
-	exec->v3d = v3d;
-	INIT_LIST_HEAD(&exec->unref_list);
-
-	ret = v3d_cl_lookup_bos(dev, file_priv, args, exec);
+	ret = v3d_lookup_bos(dev, file_priv, &render->base,
+			     args->bo_handles, args->bo_handle_count);
 	if (ret)
 		goto fail;
 
-	ret = v3d_lock_bo_reservations(exec->bo, exec->bo_count,
-				       &acquire_ctx);
+	ret = v3d_lock_bo_reservations(&render->base, &acquire_ctx);
 	if (ret)
 		goto fail;
 
 	mutex_lock(&v3d->sched_lock);
-	if (exec->bin.start != exec->bin.end) {
-		ret = drm_sched_job_init(&exec->bin.base,
-					 &v3d_priv->sched_entity[V3D_BIN],
-					 v3d_priv);
+	if (bin) {
+		ret = v3d_push_job(v3d_priv, &bin->base, V3D_BIN);
 		if (ret)
 			goto fail_unreserve;
 
-		exec->bin_done_fence =
-			dma_fence_get(&exec->bin.base.s_fence->finished);
-
-		kref_get(&exec->refcount); /* put by scheduler job completion */
-		drm_sched_entity_push_job(&exec->bin.base,
-					  &v3d_priv->sched_entity[V3D_BIN]);
+		ret = drm_gem_fence_array_add(&render->base.deps,
+					      dma_fence_get(bin->base.done_fence));
+		if (ret)
+			goto fail_unreserve;
 	}
 
-	ret = drm_sched_job_init(&exec->render.base,
-				 &v3d_priv->sched_entity[V3D_RENDER],
-				 v3d_priv);
+	ret = v3d_push_job(v3d_priv, &render->base, V3D_RENDER);
 	if (ret)
 		goto fail_unreserve;
-
-	exec->render_done_fence =
-		dma_fence_get(&exec->render.base.s_fence->finished);
-
-	kref_get(&exec->refcount); /* put by scheduler job completion */
-	drm_sched_entity_push_job(&exec->render.base,
-				  &v3d_priv->sched_entity[V3D_RENDER]);
 	mutex_unlock(&v3d->sched_lock);
 
-	v3d_attach_object_fences(exec->bo, exec->bo_count,
-				 exec->render_done_fence);
-
-	v3d_unlock_bo_reservations(exec->bo, exec->bo_count, &acquire_ctx);
-
-	/* Update the return sync object for the */
-	sync_out = drm_syncobj_find(file_priv, args->out_sync);
-	if (sync_out) {
-		drm_syncobj_replace_fence(sync_out, exec->render_done_fence);
-		drm_syncobj_put(sync_out);
-	}
+	v3d_attach_fences_and_unlock_reservation(file_priv,
+						 &render->base,
+						 &acquire_ctx,
+						 args->out_sync,
+						 render->base.done_fence);
 
-	v3d_exec_put(exec);
+	if (bin)
+		v3d_job_put(&bin->base);
+	v3d_job_put(&render->base);
 
 	return 0;
 
 fail_unreserve:
 	mutex_unlock(&v3d->sched_lock);
-	v3d_unlock_bo_reservations(exec->bo, exec->bo_count, &acquire_ctx);
+	drm_gem_unlock_reservations(render->base.bo,
+				    render->base.bo_count, &acquire_ctx);
 fail:
-	v3d_exec_put(exec);
+	if (bin)
+		v3d_job_put(&bin->base);
+	v3d_job_put(&render->base);
 
 	return ret;
 }
@@ -572,10 +641,7 @@ v3d_submit_tfu_ioctl(struct drm_device *dev, void *data,
 	struct drm_v3d_submit_tfu *args = data;
 	struct v3d_tfu_job *job;
 	struct ww_acquire_ctx acquire_ctx;
-	struct drm_syncobj *sync_out;
-	struct dma_fence *sched_done_fence;
 	int ret = 0;
-	int bo_count;
 
 	trace_v3d_submit_tfu_ioctl(&v3d->drm, args->iia);
 
@@ -583,81 +649,172 @@ v3d_submit_tfu_ioctl(struct drm_device *dev, void *data,
 	if (!job)
 		return -ENOMEM;
 
-	ret = pm_runtime_get_sync(v3d->dev);
-	if (ret < 0) {
+	ret = v3d_job_init(v3d, file_priv, &job->base,
+			   v3d_job_free, args->in_sync);
+	if (ret) {
 		kfree(job);
 		return ret;
 	}
 
-	kref_init(&job->refcount);
-
-	ret = drm_syncobj_find_fence(file_priv, args->in_sync,
-				     0, 0, &job->in_fence);
-	if (ret == -EINVAL)
-		goto fail;
+	job->base.bo = kcalloc(ARRAY_SIZE(args->bo_handles),
+			       sizeof(*job->base.bo), GFP_KERNEL);
+	if (!job->base.bo) {
+		v3d_job_put(&job->base);
+		return -ENOMEM;
+	}
 
 	job->args = *args;
-	job->v3d = v3d;
 
 	spin_lock(&file_priv->table_lock);
-	for (bo_count = 0; bo_count < ARRAY_SIZE(job->bo); bo_count++) {
+	for (job->base.bo_count = 0;
+	     job->base.bo_count < ARRAY_SIZE(args->bo_handles);
+	     job->base.bo_count++) {
 		struct drm_gem_object *bo;
 
-		if (!args->bo_handles[bo_count])
+		if (!args->bo_handles[job->base.bo_count])
 			break;
 
 		bo = idr_find(&file_priv->object_idr,
-			      args->bo_handles[bo_count]);
+			      args->bo_handles[job->base.bo_count]);
 		if (!bo) {
 			DRM_DEBUG("Failed to look up GEM BO %d: %d\n",
-				  bo_count, args->bo_handles[bo_count]);
+				  job->base.bo_count,
+				  args->bo_handles[job->base.bo_count]);
 			ret = -ENOENT;
 			spin_unlock(&file_priv->table_lock);
 			goto fail;
 		}
 		drm_gem_object_get(bo);
-		job->bo[bo_count] = to_v3d_bo(bo);
+		job->base.bo[job->base.bo_count] = bo;
 	}
 	spin_unlock(&file_priv->table_lock);
 
-	ret = v3d_lock_bo_reservations(job->bo, bo_count, &acquire_ctx);
+	ret = v3d_lock_bo_reservations(&job->base, &acquire_ctx);
 	if (ret)
 		goto fail;
 
 	mutex_lock(&v3d->sched_lock);
-	ret = drm_sched_job_init(&job->base,
-				 &v3d_priv->sched_entity[V3D_TFU],
-				 v3d_priv);
+	ret = v3d_push_job(v3d_priv, &job->base, V3D_TFU);
 	if (ret)
 		goto fail_unreserve;
+	mutex_unlock(&v3d->sched_lock);
 
-	sched_done_fence = dma_fence_get(&job->base.s_fence->finished);
+	v3d_attach_fences_and_unlock_reservation(file_priv,
+						 &job->base, &acquire_ctx,
+						 args->out_sync,
+						 job->base.done_fence);
 
-	kref_get(&job->refcount); /* put by scheduler job completion */
-	drm_sched_entity_push_job(&job->base, &v3d_priv->sched_entity[V3D_TFU]);
+	v3d_job_put(&job->base);
+
+	return 0;
+
+fail_unreserve:
 	mutex_unlock(&v3d->sched_lock);
+	drm_gem_unlock_reservations(job->base.bo, job->base.bo_count,
+				    &acquire_ctx);
+fail:
+	v3d_job_put(&job->base);
 
-	v3d_attach_object_fences(job->bo, bo_count, sched_done_fence);
+	return ret;
+}
 
-	v3d_unlock_bo_reservations(job->bo, bo_count, &acquire_ctx);
+/**
+ * v3d_submit_csd_ioctl() - Submits a CSD (texture formatting) job to the V3D.
+ * @dev: DRM device
+ * @data: ioctl argument
+ * @file_priv: DRM file for this fd
+ *
+ * Userspace provides the register setup for the CSD, which we don't
+ * need to validate since the CSD is behind the MMU.
+ */
+int
+v3d_submit_csd_ioctl(struct drm_device *dev, void *data,
+		     struct drm_file *file_priv)
+{
+	struct v3d_dev *v3d = to_v3d_dev(dev);
+	struct v3d_file_priv *v3d_priv = file_priv->driver_priv;
+	struct drm_v3d_submit_csd *args = data;
+	struct v3d_csd_job *job;
+	struct v3d_job *clean_job;
+	struct ww_acquire_ctx acquire_ctx;
+	int ret;
 
-	/* Update the return sync object */
-	sync_out = drm_syncobj_find(file_priv, args->out_sync);
-	if (sync_out) {
-		drm_syncobj_replace_fence(sync_out, sched_done_fence);
-		drm_syncobj_put(sync_out);
+	trace_v3d_submit_csd_ioctl(&v3d->drm, args->cfg[5], args->cfg[6]);
+
+	if (!v3d_has_csd(v3d)) {
+		DRM_DEBUG("Attempting CSD submit on non-CSD hardware\n");
+		return -EINVAL;
+	}
+
+	job = kcalloc(1, sizeof(*job), GFP_KERNEL);
+	if (!job)
+		return -ENOMEM;
+
+	ret = v3d_job_init(v3d, file_priv, &job->base,
+			   v3d_job_free, args->in_sync);
+	if (ret) {
+		kfree(job);
+		return ret;
+	}
+
+	clean_job = kcalloc(1, sizeof(*clean_job), GFP_KERNEL);
+	if (!clean_job) {
+		v3d_job_put(&job->base);
+		kfree(job);
+		return -ENOMEM;
 	}
-	dma_fence_put(sched_done_fence);
 
-	v3d_tfu_job_put(job);
+	ret = v3d_job_init(v3d, file_priv, clean_job, v3d_job_free, 0);
+	if (ret) {
+		v3d_job_put(&job->base);
+		kfree(clean_job);
+		return ret;
+	}
+
+	job->args = *args;
+
+	ret = v3d_lookup_bos(dev, file_priv, clean_job,
+			     args->bo_handles, args->bo_handle_count);
+	if (ret)
+		goto fail;
+
+	ret = v3d_lock_bo_reservations(clean_job, &acquire_ctx);
+	if (ret)
+		goto fail;
+
+	mutex_lock(&v3d->sched_lock);
+	ret = v3d_push_job(v3d_priv, &job->base, V3D_CSD);
+	if (ret)
+		goto fail_unreserve;
+
+	ret = drm_gem_fence_array_add(&clean_job->deps,
+				      dma_fence_get(job->base.done_fence));
+	if (ret)
+		goto fail_unreserve;
+
+	ret = v3d_push_job(v3d_priv, clean_job, V3D_CACHE_CLEAN);
+	if (ret)
+		goto fail_unreserve;
+	mutex_unlock(&v3d->sched_lock);
+
+	v3d_attach_fences_and_unlock_reservation(file_priv,
+						 clean_job,
+						 &acquire_ctx,
+						 args->out_sync,
+						 clean_job->done_fence);
+
+	v3d_job_put(&job->base);
+	v3d_job_put(clean_job);
 
 	return 0;
 
 fail_unreserve:
 	mutex_unlock(&v3d->sched_lock);
-	v3d_unlock_bo_reservations(job->bo, bo_count, &acquire_ctx);
+	drm_gem_unlock_reservations(clean_job->bo, clean_job->bo_count,
+				    &acquire_ctx);
 fail:
-	v3d_tfu_job_put(job);
+	v3d_job_put(&job->base);
+	v3d_job_put(clean_job);
 
 	return ret;
 }
@@ -677,6 +834,7 @@ v3d_gem_init(struct drm_device *dev)
 	mutex_init(&v3d->bo_lock);
 	mutex_init(&v3d->reset_lock);
 	mutex_init(&v3d->sched_lock);
+	mutex_init(&v3d->cache_clean_lock);
 
 	/* Note: We don't allocate address 0.  Various bits of HW
 	 * treat 0 as special, such as the occlusion query counters
@@ -715,7 +873,7 @@ v3d_gem_destroy(struct drm_device *dev)
 
 	v3d_sched_fini(v3d);
 
-	/* Waiting for exec to finish would need to be done before
+	/* Waiting for jobs to finish would need to be done before
 	 * unregistering V3D.
 	 */
 	WARN_ON(v3d->bin_job);
diff --git a/drivers/gpu/drm/v3d/v3d_irq.c b/drivers/gpu/drm/v3d/v3d_irq.c
index aa0a180ae700..268d8a889ac5 100644
--- a/drivers/gpu/drm/v3d/v3d_irq.c
+++ b/drivers/gpu/drm/v3d/v3d_irq.c
@@ -4,9 +4,9 @@
 /**
  * DOC: Interrupt management for the V3D engine
  *
- * When we take a bin, render, or TFU done interrupt, we need to
- * signal the fence for that job so that the scheduler can queue up
- * the next one and unblock any waiters.
+ * When we take a bin, render, TFU done, or CSD done interrupt, we
+ * need to signal the fence for that job so that the scheduler can
+ * queue up the next one and unblock any waiters.
  *
  * When we take the binner out of memory interrupt, we need to
  * allocate some new memory and pass it to the binner so that the
@@ -20,6 +20,7 @@
 #define V3D_CORE_IRQS ((u32)(V3D_INT_OUTOMEM |	\
 			     V3D_INT_FLDONE |	\
 			     V3D_INT_FRDONE |	\
+			     V3D_INT_CSDDONE |	\
 			     V3D_INT_GMPV))
 
 #define V3D_HUB_IRQS ((u32)(V3D_HUB_INT_MMU_WRV |	\
@@ -62,7 +63,7 @@ v3d_overflow_mem_work(struct work_struct *work)
 	}
 
 	drm_gem_object_get(obj);
-	list_add_tail(&bo->unref_head, &v3d->bin_job->unref_list);
+	list_add_tail(&bo->unref_head, &v3d->bin_job->render->unref_list);
 	spin_unlock_irqrestore(&v3d->job_lock, irqflags);
 
 	V3D_CORE_WRITE(0, V3D_PTB_BPOA, bo->node.start << PAGE_SHIFT);
@@ -96,7 +97,7 @@ v3d_irq(int irq, void *arg)
 
 	if (intsts & V3D_INT_FLDONE) {
 		struct v3d_fence *fence =
-			to_v3d_fence(v3d->bin_job->bin.irq_fence);
+			to_v3d_fence(v3d->bin_job->base.irq_fence);
 
 		trace_v3d_bcl_irq(&v3d->drm, fence->seqno);
 		dma_fence_signal(&fence->base);
@@ -105,13 +106,22 @@ v3d_irq(int irq, void *arg)
 
 	if (intsts & V3D_INT_FRDONE) {
 		struct v3d_fence *fence =
-			to_v3d_fence(v3d->render_job->render.irq_fence);
+			to_v3d_fence(v3d->render_job->base.irq_fence);
 
 		trace_v3d_rcl_irq(&v3d->drm, fence->seqno);
 		dma_fence_signal(&fence->base);
 		status = IRQ_HANDLED;
 	}
 
+	if (intsts & V3D_INT_CSDDONE) {
+		struct v3d_fence *fence =
+			to_v3d_fence(v3d->csd_job->base.irq_fence);
+
+		trace_v3d_csd_irq(&v3d->drm, fence->seqno);
+		dma_fence_signal(&fence->base);
+		status = IRQ_HANDLED;
+	}
+
 	/* We shouldn't be triggering these if we have GMP in
 	 * always-allowed mode.
 	 */
@@ -141,7 +151,7 @@ v3d_hub_irq(int irq, void *arg)
 
 	if (intsts & V3D_HUB_INT_TFUC) {
 		struct v3d_fence *fence =
-			to_v3d_fence(v3d->tfu_job->irq_fence);
+			to_v3d_fence(v3d->tfu_job->base.irq_fence);
 
 		trace_v3d_tfu_irq(&v3d->drm, fence->seqno);
 		dma_fence_signal(&fence->base);
@@ -152,10 +162,33 @@ v3d_hub_irq(int irq, void *arg)
 		      V3D_HUB_INT_MMU_PTI |
 		      V3D_HUB_INT_MMU_CAP)) {
 		u32 axi_id = V3D_READ(V3D_MMU_VIO_ID);
-		u64 vio_addr = (u64)V3D_READ(V3D_MMU_VIO_ADDR) << 8;
-
-		dev_err(v3d->dev, "MMU error from client %d at 0x%08llx%s%s%s\n",
-			axi_id, (long long)vio_addr,
+		u64 vio_addr = ((u64)V3D_READ(V3D_MMU_VIO_ADDR) <<
+				(v3d->va_width - 32));
+		static const char *const v3d41_axi_ids[] = {
+			"L2T",
+			"PTB",
+			"PSE",
+			"TLB",
+			"CLE",
+			"TFU",
+			"MMU",
+			"GMP",
+		};
+		const char *client = "?";
+
+		V3D_WRITE(V3D_MMU_CTL,
+			  V3D_READ(V3D_MMU_CTL) & (V3D_MMU_CTL_CAP_EXCEEDED |
+						   V3D_MMU_CTL_PT_INVALID |
+						   V3D_MMU_CTL_WRITE_VIOLATION));
+
+		if (v3d->ver >= 41) {
+			axi_id = axi_id >> 5;
+			if (axi_id < ARRAY_SIZE(v3d41_axi_ids))
+				client = v3d41_axi_ids[axi_id];
+		}
+
+		dev_err(v3d->dev, "MMU error from client %s (%d) at 0x%llx%s%s%s\n",
+			client, axi_id, (long long)vio_addr,
 			((intsts & V3D_HUB_INT_MMU_WRV) ?
 			 ", write violation" : ""),
 			((intsts & V3D_HUB_INT_MMU_PTI) ?
diff --git a/drivers/gpu/drm/v3d/v3d_mmu.c b/drivers/gpu/drm/v3d/v3d_mmu.c
index 7a21f1787ab1..395e81d97163 100644
--- a/drivers/gpu/drm/v3d/v3d_mmu.c
+++ b/drivers/gpu/drm/v3d/v3d_mmu.c
@@ -69,10 +69,13 @@ int v3d_mmu_set_page_table(struct v3d_dev *v3d)
 	V3D_WRITE(V3D_MMU_PT_PA_BASE, v3d->pt_paddr >> V3D_MMU_PAGE_SHIFT);
 	V3D_WRITE(V3D_MMU_CTL,
 		  V3D_MMU_CTL_ENABLE |
-		  V3D_MMU_CTL_PT_INVALID |
+		  V3D_MMU_CTL_PT_INVALID_ENABLE |
 		  V3D_MMU_CTL_PT_INVALID_ABORT |
+		  V3D_MMU_CTL_PT_INVALID_INT |
 		  V3D_MMU_CTL_WRITE_VIOLATION_ABORT |
-		  V3D_MMU_CTL_CAP_EXCEEDED_ABORT);
+		  V3D_MMU_CTL_WRITE_VIOLATION_INT |
+		  V3D_MMU_CTL_CAP_EXCEEDED_ABORT |
+		  V3D_MMU_CTL_CAP_EXCEEDED_INT);
 	V3D_WRITE(V3D_MMU_ILLEGAL_ADDR,
 		  (v3d->mmu_scratch_paddr >> V3D_MMU_PAGE_SHIFT) |
 		  V3D_MMU_ILLEGAL_ADDR_ENABLE);
diff --git a/drivers/gpu/drm/v3d/v3d_regs.h b/drivers/gpu/drm/v3d/v3d_regs.h
index 8e88af237610..9bcb57781d31 100644
--- a/drivers/gpu/drm/v3d/v3d_regs.h
+++ b/drivers/gpu/drm/v3d/v3d_regs.h
@@ -152,7 +152,8 @@
 # define V3D_MMU_CTL_PT_INVALID_ABORT                  BIT(19)
 # define V3D_MMU_CTL_PT_INVALID_INT                    BIT(18)
 # define V3D_MMU_CTL_PT_INVALID_EXCEPTION              BIT(17)
-# define V3D_MMU_CTL_WRITE_VIOLATION                   BIT(16)
+# define V3D_MMU_CTL_PT_INVALID_ENABLE                 BIT(16)
+# define V3D_MMU_CTL_WRITE_VIOLATION                   BIT(12)
 # define V3D_MMU_CTL_WRITE_VIOLATION_ABORT             BIT(11)
 # define V3D_MMU_CTL_WRITE_VIOLATION_INT               BIT(10)
 # define V3D_MMU_CTL_WRITE_VIOLATION_EXCEPTION         BIT(9)
@@ -191,6 +192,14 @@
 /* Address that faulted */
 #define V3D_MMU_VIO_ADDR                               0x01234
 
+#define V3D_MMU_DEBUG_INFO                             0x01238
+# define V3D_MMU_PA_WIDTH_MASK                         V3D_MASK(11, 8)
+# define V3D_MMU_PA_WIDTH_SHIFT                        8
+# define V3D_MMU_VA_WIDTH_MASK                         V3D_MASK(7, 4)
+# define V3D_MMU_VA_WIDTH_SHIFT                        4
+# define V3D_MMU_VERSION_MASK                          V3D_MASK(3, 0)
+# define V3D_MMU_VERSION_SHIFT                         0
+
 /* Per-V3D-core registers */
 
 #define V3D_CTL_IDENT0                                 0x00000
@@ -238,8 +247,11 @@
 #define V3D_CTL_L2TCACTL                               0x00030
 # define V3D_L2TCACTL_TMUWCF                           BIT(8)
 # define V3D_L2TCACTL_L2T_NO_WM                        BIT(4)
+/* Invalidates cache lines. */
 # define V3D_L2TCACTL_FLM_FLUSH                        0
+/* Removes cachelines without writing dirty lines back. */
 # define V3D_L2TCACTL_FLM_CLEAR                        1
+/* Writes out dirty cachelines and marks them clean, but doesn't invalidate. */
 # define V3D_L2TCACTL_FLM_CLEAN                        2
 # define V3D_L2TCACTL_FLM_MASK                         V3D_MASK(2, 1)
 # define V3D_L2TCACTL_FLM_SHIFT                        1
@@ -255,6 +267,8 @@
 #define V3D_CTL_INT_MSK_CLR                            0x00064
 # define V3D_INT_QPU_MASK                              V3D_MASK(27, 16)
 # define V3D_INT_QPU_SHIFT                             16
+# define V3D_INT_CSDDONE                               BIT(7)
+# define V3D_INT_PCTR                                  BIT(6)
 # define V3D_INT_GMPV                                  BIT(5)
 # define V3D_INT_TRFB                                  BIT(4)
 # define V3D_INT_SPILLUSE                              BIT(3)
@@ -374,4 +388,110 @@
 #define V3D_GMP_PRESERVE_LOAD                          0x00818
 #define V3D_GMP_VALID_LINES                            0x00820
 
+#define V3D_CSD_STATUS                                 0x00900
+# define V3D_CSD_STATUS_NUM_COMPLETED_MASK             V3D_MASK(11, 4)
+# define V3D_CSD_STATUS_NUM_COMPLETED_SHIFT            4
+# define V3D_CSD_STATUS_NUM_ACTIVE_MASK                V3D_MASK(3, 2)
+# define V3D_CSD_STATUS_NUM_ACTIVE_SHIFT               2
+# define V3D_CSD_STATUS_HAVE_CURRENT_DISPATCH          BIT(1)
+# define V3D_CSD_STATUS_HAVE_QUEUED_DISPATCH           BIT(0)
+
+#define V3D_CSD_QUEUED_CFG0                            0x00904
+# define V3D_CSD_QUEUED_CFG0_NUM_WGS_X_MASK            V3D_MASK(31, 16)
+# define V3D_CSD_QUEUED_CFG0_NUM_WGS_X_SHIFT           16
+# define V3D_CSD_QUEUED_CFG0_WG_X_OFFSET_MASK          V3D_MASK(15, 0)
+# define V3D_CSD_QUEUED_CFG0_WG_X_OFFSET_SHIFT         0
+
+#define V3D_CSD_QUEUED_CFG1                            0x00908
+# define V3D_CSD_QUEUED_CFG1_NUM_WGS_Y_MASK            V3D_MASK(31, 16)
+# define V3D_CSD_QUEUED_CFG1_NUM_WGS_Y_SHIFT           16
+# define V3D_CSD_QUEUED_CFG1_WG_Y_OFFSET_MASK          V3D_MASK(15, 0)
+# define V3D_CSD_QUEUED_CFG1_WG_Y_OFFSET_SHIFT         0
+
+#define V3D_CSD_QUEUED_CFG2                            0x0090c
+# define V3D_CSD_QUEUED_CFG2_NUM_WGS_Z_MASK            V3D_MASK(31, 16)
+# define V3D_CSD_QUEUED_CFG2_NUM_WGS_Z_SHIFT           16
+# define V3D_CSD_QUEUED_CFG2_WG_Z_OFFSET_MASK          V3D_MASK(15, 0)
+# define V3D_CSD_QUEUED_CFG2_WG_Z_OFFSET_SHIFT         0
+
+#define V3D_CSD_QUEUED_CFG3                            0x00910
+# define V3D_CSD_QUEUED_CFG3_OVERLAP_WITH_PREV         BIT(26)
+# define V3D_CSD_QUEUED_CFG3_MAX_SG_ID_MASK            V3D_MASK(25, 20)
+# define V3D_CSD_QUEUED_CFG3_MAX_SG_ID_SHIFT           20
+# define V3D_CSD_QUEUED_CFG3_BATCHES_PER_SG_M1_MASK    V3D_MASK(19, 12)
+# define V3D_CSD_QUEUED_CFG3_BATCHES_PER_SG_M1_SHIFT   12
+# define V3D_CSD_QUEUED_CFG3_WGS_PER_SG_MASK           V3D_MASK(11, 8)
+# define V3D_CSD_QUEUED_CFG3_WGS_PER_SG_SHIFT          8
+# define V3D_CSD_QUEUED_CFG3_WG_SIZE_MASK              V3D_MASK(7, 0)
+# define V3D_CSD_QUEUED_CFG3_WG_SIZE_SHIFT             0
+
+/* Number of batches, minus 1 */
+#define V3D_CSD_QUEUED_CFG4                            0x00914
+
+/* Shader address, pnan, singleseg, threading, like a shader record. */
+#define V3D_CSD_QUEUED_CFG5                            0x00918
+
+/* Uniforms address (4 byte aligned) */
+#define V3D_CSD_QUEUED_CFG6                            0x0091c
+
+#define V3D_CSD_CURRENT_CFG0                          0x00920
+#define V3D_CSD_CURRENT_CFG1                          0x00924
+#define V3D_CSD_CURRENT_CFG2                          0x00928
+#define V3D_CSD_CURRENT_CFG3                          0x0092c
+#define V3D_CSD_CURRENT_CFG4                          0x00930
+#define V3D_CSD_CURRENT_CFG5                          0x00934
+#define V3D_CSD_CURRENT_CFG6                          0x00938
+
+#define V3D_CSD_CURRENT_ID0                            0x0093c
+# define V3D_CSD_CURRENT_ID0_WG_X_MASK                 V3D_MASK(31, 16)
+# define V3D_CSD_CURRENT_ID0_WG_X_SHIFT                16
+# define V3D_CSD_CURRENT_ID0_WG_IN_SG_MASK             V3D_MASK(11, 8)
+# define V3D_CSD_CURRENT_ID0_WG_IN_SG_SHIFT            8
+# define V3D_CSD_CURRENT_ID0_L_IDX_MASK                V3D_MASK(7, 0)
+# define V3D_CSD_CURRENT_ID0_L_IDX_SHIFT               0
+
+#define V3D_CSD_CURRENT_ID1                            0x00940
+# define V3D_CSD_CURRENT_ID0_WG_Z_MASK                 V3D_MASK(31, 16)
+# define V3D_CSD_CURRENT_ID0_WG_Z_SHIFT                16
+# define V3D_CSD_CURRENT_ID0_WG_Y_MASK                 V3D_MASK(15, 0)
+# define V3D_CSD_CURRENT_ID0_WG_Y_SHIFT                0
+
+#define V3D_ERR_FDBGO                                  0x00f04
+#define V3D_ERR_FDBGB                                  0x00f08
+#define V3D_ERR_FDBGR                                  0x00f0c
+
+#define V3D_ERR_FDBGS                                  0x00f10
+# define V3D_ERR_FDBGS_INTERPZ_IP_STALL                BIT(17)
+# define V3D_ERR_FDBGS_DEPTHO_FIFO_IP_STALL            BIT(16)
+# define V3D_ERR_FDBGS_XYNRM_IP_STALL                  BIT(14)
+# define V3D_ERR_FDBGS_EZREQ_FIFO_OP_VALID             BIT(13)
+# define V3D_ERR_FDBGS_QXYF_FIFO_OP_VALID              BIT(12)
+# define V3D_ERR_FDBGS_QXYF_FIFO_OP_LAST               BIT(11)
+# define V3D_ERR_FDBGS_EZTEST_ANYQVALID                BIT(7)
+# define V3D_ERR_FDBGS_EZTEST_PASS                     BIT(6)
+# define V3D_ERR_FDBGS_EZTEST_QREADY                   BIT(5)
+# define V3D_ERR_FDBGS_EZTEST_VLF_OKNOVALID            BIT(4)
+# define V3D_ERR_FDBGS_EZTEST_QSTALL                   BIT(3)
+# define V3D_ERR_FDBGS_EZTEST_IP_VLFSTALL              BIT(2)
+# define V3D_ERR_FDBGS_EZTEST_IP_PRSTALL               BIT(1)
+# define V3D_ERR_FDBGS_EZTEST_IP_QSTALL                BIT(0)
+
+#define V3D_ERR_STAT                                   0x00f20
+# define V3D_ERR_L2CARE                                BIT(15)
+# define V3D_ERR_VCMBE                                 BIT(14)
+# define V3D_ERR_VCMRE                                 BIT(13)
+# define V3D_ERR_VCDI                                  BIT(12)
+# define V3D_ERR_VCDE                                  BIT(11)
+# define V3D_ERR_VDWE                                  BIT(10)
+# define V3D_ERR_VPMEAS                                BIT(9)
+# define V3D_ERR_VPMEFNA                               BIT(8)
+# define V3D_ERR_VPMEWNA                               BIT(7)
+# define V3D_ERR_VPMERNA                               BIT(6)
+# define V3D_ERR_VPMERR                                BIT(5)
+# define V3D_ERR_VPMEWR                                BIT(4)
+# define V3D_ERR_VPAERRGL                              BIT(3)
+# define V3D_ERR_VPAEBRGL                              BIT(2)
+# define V3D_ERR_VPAERGS                               BIT(1)
+# define V3D_ERR_VPAEABB                               BIT(0)
+
 #endif /* V3D_REGS_H */
diff --git a/drivers/gpu/drm/v3d/v3d_sched.c b/drivers/gpu/drm/v3d/v3d_sched.c
index e740f3b99aa5..8c2df6d95283 100644
--- a/drivers/gpu/drm/v3d/v3d_sched.c
+++ b/drivers/gpu/drm/v3d/v3d_sched.c
@@ -30,158 +30,152 @@ to_v3d_job(struct drm_sched_job *sched_job)
 	return container_of(sched_job, struct v3d_job, base);
 }
 
-static struct v3d_tfu_job *
-to_tfu_job(struct drm_sched_job *sched_job)
+static struct v3d_bin_job *
+to_bin_job(struct drm_sched_job *sched_job)
 {
-	return container_of(sched_job, struct v3d_tfu_job, base);
+	return container_of(sched_job, struct v3d_bin_job, base.base);
 }
 
-static void
-v3d_job_free(struct drm_sched_job *sched_job)
+static struct v3d_render_job *
+to_render_job(struct drm_sched_job *sched_job)
 {
-	struct v3d_job *job = to_v3d_job(sched_job);
+	return container_of(sched_job, struct v3d_render_job, base.base);
+}
 
-	drm_sched_job_cleanup(sched_job);
+static struct v3d_tfu_job *
+to_tfu_job(struct drm_sched_job *sched_job)
+{
+	return container_of(sched_job, struct v3d_tfu_job, base.base);
+}
 
-	v3d_exec_put(job->exec);
+static struct v3d_csd_job *
+to_csd_job(struct drm_sched_job *sched_job)
+{
+	return container_of(sched_job, struct v3d_csd_job, base.base);
 }
 
 static void
-v3d_tfu_job_free(struct drm_sched_job *sched_job)
+v3d_job_free(struct drm_sched_job *sched_job)
 {
-	struct v3d_tfu_job *job = to_tfu_job(sched_job);
+	struct v3d_job *job = to_v3d_job(sched_job);
 
 	drm_sched_job_cleanup(sched_job);
-
-	v3d_tfu_job_put(job);
+	v3d_job_put(job);
 }
 
 /**
- * Returns the fences that the bin or render job depends on, one by one.
- * v3d_job_run() won't be called until all of them have been signaled.
+ * Returns the fences that the job depends on, one by one.
+ *
+ * If placed in the scheduler's .dependency method, the corresponding
+ * .run_job won't be called until all of them have been signaled.
  */
 static struct dma_fence *
 v3d_job_dependency(struct drm_sched_job *sched_job,
 		   struct drm_sched_entity *s_entity)
 {
 	struct v3d_job *job = to_v3d_job(sched_job);
-	struct v3d_exec_info *exec = job->exec;
-	enum v3d_queue q = job == &exec->bin ? V3D_BIN : V3D_RENDER;
-	struct dma_fence *fence;
-
-	fence = job->in_fence;
-	if (fence) {
-		job->in_fence = NULL;
-		return fence;
-	}
-
-	if (q == V3D_RENDER) {
-		/* If we had a bin job, the render job definitely depends on
-		 * it. We first have to wait for bin to be scheduled, so that
-		 * its done_fence is created.
-		 */
-		fence = exec->bin_done_fence;
-		if (fence) {
-			exec->bin_done_fence = NULL;
-			return fence;
-		}
-	}
 
 	/* XXX: Wait on a fence for switching the GMP if necessary,
 	 * and then do so.
 	 */
 
-	return fence;
-}
-
-/**
- * Returns the fences that the TFU job depends on, one by one.
- * v3d_tfu_job_run() won't be called until all of them have been
- * signaled.
- */
-static struct dma_fence *
-v3d_tfu_job_dependency(struct drm_sched_job *sched_job,
-		       struct drm_sched_entity *s_entity)
-{
-	struct v3d_tfu_job *job = to_tfu_job(sched_job);
-	struct dma_fence *fence;
-
-	fence = job->in_fence;
-	if (fence) {
-		job->in_fence = NULL;
-		return fence;
-	}
+	if (!xa_empty(&job->deps))
+		return xa_erase(&job->deps, job->last_dep++);
 
 	return NULL;
 }
 
-static struct dma_fence *v3d_job_run(struct drm_sched_job *sched_job)
+static struct dma_fence *v3d_bin_job_run(struct drm_sched_job *sched_job)
 {
-	struct v3d_job *job = to_v3d_job(sched_job);
-	struct v3d_exec_info *exec = job->exec;
-	enum v3d_queue q = job == &exec->bin ? V3D_BIN : V3D_RENDER;
-	struct v3d_dev *v3d = exec->v3d;
+	struct v3d_bin_job *job = to_bin_job(sched_job);
+	struct v3d_dev *v3d = job->base.v3d;
 	struct drm_device *dev = &v3d->drm;
 	struct dma_fence *fence;
 	unsigned long irqflags;
 
-	if (unlikely(job->base.s_fence->finished.error))
+	if (unlikely(job->base.base.s_fence->finished.error))
 		return NULL;
 
 	/* Lock required around bin_job update vs
 	 * v3d_overflow_mem_work().
 	 */
 	spin_lock_irqsave(&v3d->job_lock, irqflags);
-	if (q == V3D_BIN) {
-		v3d->bin_job = job->exec;
-
-		/* Clear out the overflow allocation, so we don't
-		 * reuse the overflow attached to a previous job.
-		 */
-		V3D_CORE_WRITE(0, V3D_PTB_BPOS, 0);
-	} else {
-		v3d->render_job = job->exec;
-	}
+	v3d->bin_job = job;
+	/* Clear out the overflow allocation, so we don't
+	 * reuse the overflow attached to a previous job.
+	 */
+	V3D_CORE_WRITE(0, V3D_PTB_BPOS, 0);
 	spin_unlock_irqrestore(&v3d->job_lock, irqflags);
 
-	/* Can we avoid this flush when q==RENDER?  We need to be
-	 * careful of scheduling, though -- imagine job0 rendering to
-	 * texture and job1 reading, and them being executed as bin0,
-	 * bin1, render0, render1, so that render1's flush at bin time
+	v3d_invalidate_caches(v3d);
+
+	fence = v3d_fence_create(v3d, V3D_BIN);
+	if (IS_ERR(fence))
+		return NULL;
+
+	if (job->base.irq_fence)
+		dma_fence_put(job->base.irq_fence);
+	job->base.irq_fence = dma_fence_get(fence);
+
+	trace_v3d_submit_cl(dev, false, to_v3d_fence(fence)->seqno,
+			    job->start, job->end);
+
+	/* Set the current and end address of the control list.
+	 * Writing the end register is what starts the job.
+	 */
+	if (job->qma) {
+		V3D_CORE_WRITE(0, V3D_CLE_CT0QMA, job->qma);
+		V3D_CORE_WRITE(0, V3D_CLE_CT0QMS, job->qms);
+	}
+	if (job->qts) {
+		V3D_CORE_WRITE(0, V3D_CLE_CT0QTS,
+			       V3D_CLE_CT0QTS_ENABLE |
+			       job->qts);
+	}
+	V3D_CORE_WRITE(0, V3D_CLE_CT0QBA, job->start);
+	V3D_CORE_WRITE(0, V3D_CLE_CT0QEA, job->end);
+
+	return fence;
+}
+
+static struct dma_fence *v3d_render_job_run(struct drm_sched_job *sched_job)
+{
+	struct v3d_render_job *job = to_render_job(sched_job);
+	struct v3d_dev *v3d = job->base.v3d;
+	struct drm_device *dev = &v3d->drm;
+	struct dma_fence *fence;
+
+	if (unlikely(job->base.base.s_fence->finished.error))
+		return NULL;
+
+	v3d->render_job = job;
+
+	/* Can we avoid this flush?  We need to be careful of
+	 * scheduling, though -- imagine job0 rendering to texture and
+	 * job1 reading, and them being executed as bin0, bin1,
+	 * render0, render1, so that render1's flush at bin time
 	 * wasn't enough.
 	 */
 	v3d_invalidate_caches(v3d);
 
-	fence = v3d_fence_create(v3d, q);
+	fence = v3d_fence_create(v3d, V3D_RENDER);
 	if (IS_ERR(fence))
 		return NULL;
 
-	if (job->irq_fence)
-		dma_fence_put(job->irq_fence);
-	job->irq_fence = dma_fence_get(fence);
+	if (job->base.irq_fence)
+		dma_fence_put(job->base.irq_fence);
+	job->base.irq_fence = dma_fence_get(fence);
 
-	trace_v3d_submit_cl(dev, q == V3D_RENDER, to_v3d_fence(fence)->seqno,
+	trace_v3d_submit_cl(dev, true, to_v3d_fence(fence)->seqno,
 			    job->start, job->end);
 
-	if (q == V3D_BIN) {
-		if (exec->qma) {
-			V3D_CORE_WRITE(0, V3D_CLE_CT0QMA, exec->qma);
-			V3D_CORE_WRITE(0, V3D_CLE_CT0QMS, exec->qms);
-		}
-		if (exec->qts) {
-			V3D_CORE_WRITE(0, V3D_CLE_CT0QTS,
-				       V3D_CLE_CT0QTS_ENABLE |
-				       exec->qts);
-		}
-	} else {
-		/* XXX: Set the QCFG */
-	}
+	/* XXX: Set the QCFG */
 
 	/* Set the current and end address of the control list.
 	 * Writing the end register is what starts the job.
 	 */
-	V3D_CORE_WRITE(0, V3D_CLE_CTNQBA(q), job->start);
-	V3D_CORE_WRITE(0, V3D_CLE_CTNQEA(q), job->end);
+	V3D_CORE_WRITE(0, V3D_CLE_CT1QBA, job->start);
+	V3D_CORE_WRITE(0, V3D_CLE_CT1QEA, job->end);
 
 	return fence;
 }
@@ -190,7 +184,7 @@ static struct dma_fence *
 v3d_tfu_job_run(struct drm_sched_job *sched_job)
 {
 	struct v3d_tfu_job *job = to_tfu_job(sched_job);
-	struct v3d_dev *v3d = job->v3d;
+	struct v3d_dev *v3d = job->base.v3d;
 	struct drm_device *dev = &v3d->drm;
 	struct dma_fence *fence;
 
@@ -199,9 +193,9 @@ v3d_tfu_job_run(struct drm_sched_job *sched_job)
 		return NULL;
 
 	v3d->tfu_job = job;
-	if (job->irq_fence)
-		dma_fence_put(job->irq_fence);
-	job->irq_fence = dma_fence_get(fence);
+	if (job->base.irq_fence)
+		dma_fence_put(job->base.irq_fence);
+	job->base.irq_fence = dma_fence_get(fence);
 
 	trace_v3d_submit_tfu(dev, to_v3d_fence(fence)->seqno);
 
@@ -223,6 +217,48 @@ v3d_tfu_job_run(struct drm_sched_job *sched_job)
 	return fence;
 }
 
+static struct dma_fence *
+v3d_csd_job_run(struct drm_sched_job *sched_job)
+{
+	struct v3d_csd_job *job = to_csd_job(sched_job);
+	struct v3d_dev *v3d = job->base.v3d;
+	struct drm_device *dev = &v3d->drm;
+	struct dma_fence *fence;
+	int i;
+
+	v3d->csd_job = job;
+
+	v3d_invalidate_caches(v3d);
+
+	fence = v3d_fence_create(v3d, V3D_CSD);
+	if (IS_ERR(fence))
+		return NULL;
+
+	if (job->base.irq_fence)
+		dma_fence_put(job->base.irq_fence);
+	job->base.irq_fence = dma_fence_get(fence);
+
+	trace_v3d_submit_csd(dev, to_v3d_fence(fence)->seqno);
+
+	for (i = 1; i <= 6; i++)
+		V3D_CORE_WRITE(0, V3D_CSD_QUEUED_CFG0 + 4 * i, job->args.cfg[i]);
+	/* CFG0 write kicks off the job. */
+	V3D_CORE_WRITE(0, V3D_CSD_QUEUED_CFG0, job->args.cfg[0]);
+
+	return fence;
+}
+
+static struct dma_fence *
+v3d_cache_clean_job_run(struct drm_sched_job *sched_job)
+{
+	struct v3d_job *job = to_v3d_job(sched_job);
+	struct v3d_dev *v3d = job->v3d;
+
+	v3d_clean_caches(v3d);
+
+	return NULL;
+}
+
 static void
 v3d_gpu_reset_for_timeout(struct v3d_dev *v3d, struct drm_sched_job *sched_job)
 {
@@ -232,7 +268,7 @@ v3d_gpu_reset_for_timeout(struct v3d_dev *v3d, struct drm_sched_job *sched_job)
 
 	/* block scheduler */
 	for (q = 0; q < V3D_MAX_QUEUES; q++)
-		drm_sched_stop(&v3d->queue[q].sched);
+		drm_sched_stop(&v3d->queue[q].sched, sched_job);
 
 	if (sched_job)
 		drm_sched_increase_karma(sched_job);
@@ -251,25 +287,23 @@ v3d_gpu_reset_for_timeout(struct v3d_dev *v3d, struct drm_sched_job *sched_job)
 	mutex_unlock(&v3d->reset_lock);
 }
 
+/* If the current address or return address have changed, then the GPU
+ * has probably made progress and we should delay the reset.  This
+ * could fail if the GPU got in an infinite loop in the CL, but that
+ * is pretty unlikely outside of an i-g-t testcase.
+ */
 static void
-v3d_job_timedout(struct drm_sched_job *sched_job)
+v3d_cl_job_timedout(struct drm_sched_job *sched_job, enum v3d_queue q,
+		    u32 *timedout_ctca, u32 *timedout_ctra)
 {
 	struct v3d_job *job = to_v3d_job(sched_job);
-	struct v3d_exec_info *exec = job->exec;
-	struct v3d_dev *v3d = exec->v3d;
-	enum v3d_queue job_q = job == &exec->bin ? V3D_BIN : V3D_RENDER;
-	u32 ctca = V3D_CORE_READ(0, V3D_CLE_CTNCA(job_q));
-	u32 ctra = V3D_CORE_READ(0, V3D_CLE_CTNRA(job_q));
-
-	/* If the current address or return address have changed, then
-	 * the GPU has probably made progress and we should delay the
-	 * reset.  This could fail if the GPU got in an infinite loop
-	 * in the CL, but that is pretty unlikely outside of an i-g-t
-	 * testcase.
-	 */
-	if (job->timedout_ctca != ctca || job->timedout_ctra != ctra) {
-		job->timedout_ctca = ctca;
-		job->timedout_ctra = ctra;
+	struct v3d_dev *v3d = job->v3d;
+	u32 ctca = V3D_CORE_READ(0, V3D_CLE_CTNCA(q));
+	u32 ctra = V3D_CORE_READ(0, V3D_CLE_CTNRA(q));
+
+	if (*timedout_ctca != ctca || *timedout_ctra != ctra) {
+		*timedout_ctca = ctca;
+		*timedout_ctra = ctra;
 		return;
 	}
 
@@ -277,25 +311,82 @@ v3d_job_timedout(struct drm_sched_job *sched_job)
 }
 
 static void
-v3d_tfu_job_timedout(struct drm_sched_job *sched_job)
+v3d_bin_job_timedout(struct drm_sched_job *sched_job)
 {
-	struct v3d_tfu_job *job = to_tfu_job(sched_job);
+	struct v3d_bin_job *job = to_bin_job(sched_job);
+
+	v3d_cl_job_timedout(sched_job, V3D_BIN,
+			    &job->timedout_ctca, &job->timedout_ctra);
+}
+
+static void
+v3d_render_job_timedout(struct drm_sched_job *sched_job)
+{
+	struct v3d_render_job *job = to_render_job(sched_job);
+
+	v3d_cl_job_timedout(sched_job, V3D_RENDER,
+			    &job->timedout_ctca, &job->timedout_ctra);
+}
+
+static void
+v3d_generic_job_timedout(struct drm_sched_job *sched_job)
+{
+	struct v3d_job *job = to_v3d_job(sched_job);
 
 	v3d_gpu_reset_for_timeout(job->v3d, sched_job);
 }
 
-static const struct drm_sched_backend_ops v3d_sched_ops = {
+static void
+v3d_csd_job_timedout(struct drm_sched_job *sched_job)
+{
+	struct v3d_csd_job *job = to_csd_job(sched_job);
+	struct v3d_dev *v3d = job->base.v3d;
+	u32 batches = V3D_CORE_READ(0, V3D_CSD_CURRENT_CFG4);
+
+	/* If we've made progress, skip reset and let the timer get
+	 * rearmed.
+	 */
+	if (job->timedout_batches != batches) {
+		job->timedout_batches = batches;
+		return;
+	}
+
+	v3d_gpu_reset_for_timeout(v3d, sched_job);
+}
+
+static const struct drm_sched_backend_ops v3d_bin_sched_ops = {
 	.dependency = v3d_job_dependency,
-	.run_job = v3d_job_run,
-	.timedout_job = v3d_job_timedout,
-	.free_job = v3d_job_free
+	.run_job = v3d_bin_job_run,
+	.timedout_job = v3d_bin_job_timedout,
+	.free_job = v3d_job_free,
+};
+
+static const struct drm_sched_backend_ops v3d_render_sched_ops = {
+	.dependency = v3d_job_dependency,
+	.run_job = v3d_render_job_run,
+	.timedout_job = v3d_render_job_timedout,
+	.free_job = v3d_job_free,
 };
 
 static const struct drm_sched_backend_ops v3d_tfu_sched_ops = {
-	.dependency = v3d_tfu_job_dependency,
+	.dependency = v3d_job_dependency,
 	.run_job = v3d_tfu_job_run,
-	.timedout_job = v3d_tfu_job_timedout,
-	.free_job = v3d_tfu_job_free
+	.timedout_job = v3d_generic_job_timedout,
+	.free_job = v3d_job_free,
+};
+
+static const struct drm_sched_backend_ops v3d_csd_sched_ops = {
+	.dependency = v3d_job_dependency,
+	.run_job = v3d_csd_job_run,
+	.timedout_job = v3d_csd_job_timedout,
+	.free_job = v3d_job_free
+};
+
+static const struct drm_sched_backend_ops v3d_cache_clean_sched_ops = {
+	.dependency = v3d_job_dependency,
+	.run_job = v3d_cache_clean_job_run,
+	.timedout_job = v3d_generic_job_timedout,
+	.free_job = v3d_job_free
 };
 
 int
@@ -307,7 +398,7 @@ v3d_sched_init(struct v3d_dev *v3d)
 	int ret;
 
 	ret = drm_sched_init(&v3d->queue[V3D_BIN].sched,
-			     &v3d_sched_ops,
+			     &v3d_bin_sched_ops,
 			     hw_jobs_limit, job_hang_limit,
 			     msecs_to_jiffies(hang_limit_ms),
 			     "v3d_bin");
@@ -317,14 +408,14 @@ v3d_sched_init(struct v3d_dev *v3d)
 	}
 
 	ret = drm_sched_init(&v3d->queue[V3D_RENDER].sched,
-			     &v3d_sched_ops,
+			     &v3d_render_sched_ops,
 			     hw_jobs_limit, job_hang_limit,
 			     msecs_to_jiffies(hang_limit_ms),
 			     "v3d_render");
 	if (ret) {
 		dev_err(v3d->dev, "Failed to create render scheduler: %d.",
 			ret);
-		drm_sched_fini(&v3d->queue[V3D_BIN].sched);
+		v3d_sched_fini(v3d);
 		return ret;
 	}
 
@@ -336,11 +427,36 @@ v3d_sched_init(struct v3d_dev *v3d)
 	if (ret) {
 		dev_err(v3d->dev, "Failed to create TFU scheduler: %d.",
 			ret);
-		drm_sched_fini(&v3d->queue[V3D_RENDER].sched);
-		drm_sched_fini(&v3d->queue[V3D_BIN].sched);
+		v3d_sched_fini(v3d);
 		return ret;
 	}
 
+	if (v3d_has_csd(v3d)) {
+		ret = drm_sched_init(&v3d->queue[V3D_CSD].sched,
+				     &v3d_csd_sched_ops,
+				     hw_jobs_limit, job_hang_limit,
+				     msecs_to_jiffies(hang_limit_ms),
+				     "v3d_csd");
+		if (ret) {
+			dev_err(v3d->dev, "Failed to create CSD scheduler: %d.",
+				ret);
+			v3d_sched_fini(v3d);
+			return ret;
+		}
+
+		ret = drm_sched_init(&v3d->queue[V3D_CACHE_CLEAN].sched,
+				     &v3d_cache_clean_sched_ops,
+				     hw_jobs_limit, job_hang_limit,
+				     msecs_to_jiffies(hang_limit_ms),
+				     "v3d_cache_clean");
+		if (ret) {
+			dev_err(v3d->dev, "Failed to create CACHE_CLEAN scheduler: %d.",
+				ret);
+			v3d_sched_fini(v3d);
+			return ret;
+		}
+	}
+
 	return 0;
 }
 
@@ -349,6 +465,8 @@ v3d_sched_fini(struct v3d_dev *v3d)
 {
 	enum v3d_queue q;
 
-	for (q = 0; q < V3D_MAX_QUEUES; q++)
-		drm_sched_fini(&v3d->queue[q].sched);
+	for (q = 0; q < V3D_MAX_QUEUES; q++) {
+		if (v3d->queue[q].sched.ready)
+			drm_sched_fini(&v3d->queue[q].sched);
+	}
 }
diff --git a/drivers/gpu/drm/v3d/v3d_trace.h b/drivers/gpu/drm/v3d/v3d_trace.h
index edd984afa33f..7aa8dc356e54 100644
--- a/drivers/gpu/drm/v3d/v3d_trace.h
+++ b/drivers/gpu/drm/v3d/v3d_trace.h
@@ -124,6 +124,26 @@ TRACE_EVENT(v3d_tfu_irq,
 		      __entry->seqno)
 );
 
+TRACE_EVENT(v3d_csd_irq,
+	    TP_PROTO(struct drm_device *dev,
+		     uint64_t seqno),
+	    TP_ARGS(dev, seqno),
+
+	    TP_STRUCT__entry(
+			     __field(u32, dev)
+			     __field(u64, seqno)
+			     ),
+
+	    TP_fast_assign(
+			   __entry->dev = dev->primary->index;
+			   __entry->seqno = seqno;
+			   ),
+
+	    TP_printk("dev=%u, seqno=%llu",
+		      __entry->dev,
+		      __entry->seqno)
+);
+
 TRACE_EVENT(v3d_submit_tfu_ioctl,
 	    TP_PROTO(struct drm_device *dev, u32 iia),
 	    TP_ARGS(dev, iia),
@@ -163,6 +183,80 @@ TRACE_EVENT(v3d_submit_tfu,
 		      __entry->seqno)
 );
 
+TRACE_EVENT(v3d_submit_csd_ioctl,
+	    TP_PROTO(struct drm_device *dev, u32 cfg5, u32 cfg6),
+	    TP_ARGS(dev, cfg5, cfg6),
+
+	    TP_STRUCT__entry(
+			     __field(u32, dev)
+			     __field(u32, cfg5)
+			     __field(u32, cfg6)
+			     ),
+
+	    TP_fast_assign(
+			   __entry->dev = dev->primary->index;
+			   __entry->cfg5 = cfg5;
+			   __entry->cfg6 = cfg6;
+			   ),
+
+	    TP_printk("dev=%u, CFG5 0x%08x, CFG6 0x%08x",
+		      __entry->dev,
+		      __entry->cfg5,
+		      __entry->cfg6)
+);
+
+TRACE_EVENT(v3d_submit_csd,
+	    TP_PROTO(struct drm_device *dev,
+		     uint64_t seqno),
+	    TP_ARGS(dev, seqno),
+
+	    TP_STRUCT__entry(
+			     __field(u32, dev)
+			     __field(u64, seqno)
+			     ),
+
+	    TP_fast_assign(
+			   __entry->dev = dev->primary->index;
+			   __entry->seqno = seqno;
+			   ),
+
+	    TP_printk("dev=%u, seqno=%llu",
+		      __entry->dev,
+		      __entry->seqno)
+);
+
+TRACE_EVENT(v3d_cache_clean_begin,
+	    TP_PROTO(struct drm_device *dev),
+	    TP_ARGS(dev),
+
+	    TP_STRUCT__entry(
+			     __field(u32, dev)
+			     ),
+
+	    TP_fast_assign(
+			   __entry->dev = dev->primary->index;
+			   ),
+
+	    TP_printk("dev=%u",
+		      __entry->dev)
+);
+
+TRACE_EVENT(v3d_cache_clean_end,
+	    TP_PROTO(struct drm_device *dev),
+	    TP_ARGS(dev),
+
+	    TP_STRUCT__entry(
+			     __field(u32, dev)
+			     ),
+
+	    TP_fast_assign(
+			   __entry->dev = dev->primary->index;
+			   ),
+
+	    TP_printk("dev=%u",
+		      __entry->dev)
+);
+
 TRACE_EVENT(v3d_reset_begin,
 	    TP_PROTO(struct drm_device *dev),
 	    TP_ARGS(dev),
diff --git a/drivers/gpu/drm/vboxvideo/Kconfig b/drivers/gpu/drm/vboxvideo/Kconfig
index d6ab955c0768..56ba510f21a2 100644
--- a/drivers/gpu/drm/vboxvideo/Kconfig
+++ b/drivers/gpu/drm/vboxvideo/Kconfig
@@ -3,7 +3,7 @@ config DRM_VBOXVIDEO
 	tristate "Virtual Box Graphics Card"
 	depends on DRM && X86 && PCI
 	select DRM_KMS_HELPER
-	select DRM_TTM
+	select DRM_VRAM_HELPER
 	select GENERIC_ALLOCATOR
 	help
 	  This is a KMS driver for the virtual Graphics Card used in
diff --git a/drivers/gpu/drm/vboxvideo/vbox_drv.c b/drivers/gpu/drm/vboxvideo/vbox_drv.c
index fb6a0f0b8167..02537ab9cc08 100644
--- a/drivers/gpu/drm/vboxvideo/vbox_drv.c
+++ b/drivers/gpu/drm/vboxvideo/vbox_drv.c
@@ -191,13 +191,7 @@ static struct pci_driver vbox_pci_driver = {
 
 static const struct file_operations vbox_fops = {
 	.owner = THIS_MODULE,
-	.open = drm_open,
-	.release = drm_release,
-	.unlocked_ioctl = drm_ioctl,
-	.compat_ioctl = drm_compat_ioctl,
-	.mmap = vbox_mmap,
-	.poll = drm_poll,
-	.read = drm_read,
+	DRM_VRAM_MM_FILE_OPERATIONS
 };
 
 static struct drm_driver driver = {
@@ -215,9 +209,7 @@ static struct drm_driver driver = {
 	.minor = DRIVER_MINOR,
 	.patchlevel = DRIVER_PATCHLEVEL,
 
-	.gem_free_object_unlocked = vbox_gem_free_object,
-	.dumb_create = vbox_dumb_create,
-	.dumb_map_offset = vbox_dumb_mmap_offset,
+	DRM_GEM_VRAM_DRIVER,
 	.prime_handle_to_fd = drm_gem_prime_handle_to_fd,
 	.prime_fd_to_handle = drm_gem_prime_fd_to_handle,
 	.gem_prime_export = drm_gem_prime_export,
diff --git a/drivers/gpu/drm/vboxvideo/vbox_drv.h b/drivers/gpu/drm/vboxvideo/vbox_drv.h
index ece31f395540..9028f946bc06 100644
--- a/drivers/gpu/drm/vboxvideo/vbox_drv.h
+++ b/drivers/gpu/drm/vboxvideo/vbox_drv.h
@@ -18,12 +18,9 @@
 #include <drm/drm_encoder.h>
 #include <drm/drm_fb_helper.h>
 #include <drm/drm_gem.h>
+#include <drm/drm_gem_vram_helper.h>
 
-#include <drm/ttm/ttm_bo_api.h>
-#include <drm/ttm/ttm_bo_driver.h>
-#include <drm/ttm/ttm_placement.h>
-#include <drm/ttm/ttm_memory.h>
-#include <drm/ttm/ttm_module.h>
+#include <drm/drm_vram_mm_helper.h>
 
 #include "vboxvideo_guest.h"
 #include "vboxvideo_vbe.h"
@@ -77,10 +74,6 @@ struct vbox_private {
 
 	int fb_mtrr;
 
-	struct {
-		struct ttm_bo_device bdev;
-	} ttm;
-
 	struct mutex hw_mutex; /* protects modeset and accel/vbva accesses */
 	struct work_struct hotplug_work;
 	u32 input_mapping_width;
@@ -96,8 +89,6 @@ struct vbox_private {
 #undef CURSOR_PIXEL_COUNT
 #undef CURSOR_DATA_SIZE
 
-struct vbox_gem_object;
-
 struct vbox_connector {
 	struct drm_connector base;
 	char name[32];
@@ -170,74 +161,12 @@ int vboxfb_create(struct drm_fb_helper *helper,
 		  struct drm_fb_helper_surface_size *sizes);
 void vbox_fbdev_fini(struct vbox_private *vbox);
 
-struct vbox_bo {
-	struct ttm_buffer_object bo;
-	struct ttm_placement placement;
-	struct ttm_bo_kmap_obj kmap;
-	struct drm_gem_object gem;
-	struct ttm_place placements[3];
-	int pin_count;
-};
-
-#define gem_to_vbox_bo(gobj) container_of((gobj), struct vbox_bo, gem)
-
-static inline struct vbox_bo *vbox_bo(struct ttm_buffer_object *bo)
-{
-	return container_of(bo, struct vbox_bo, bo);
-}
-
-#define to_vbox_obj(x) container_of(x, struct vbox_gem_object, base)
-
-static inline u64 vbox_bo_gpu_offset(struct vbox_bo *bo)
-{
-	return bo->bo.offset;
-}
-
-int vbox_dumb_create(struct drm_file *file,
-		     struct drm_device *dev,
-		     struct drm_mode_create_dumb *args);
-
-void vbox_gem_free_object(struct drm_gem_object *obj);
-int vbox_dumb_mmap_offset(struct drm_file *file,
-			  struct drm_device *dev,
-			  u32 handle, u64 *offset);
-
 int vbox_mm_init(struct vbox_private *vbox);
 void vbox_mm_fini(struct vbox_private *vbox);
 
-int vbox_bo_create(struct vbox_private *vbox, int size, int align,
-		   u32 flags, struct vbox_bo **pvboxbo);
-
 int vbox_gem_create(struct vbox_private *vbox,
 		    u32 size, bool iskernel, struct drm_gem_object **obj);
 
-int vbox_bo_pin(struct vbox_bo *bo, u32 pl_flag);
-int vbox_bo_unpin(struct vbox_bo *bo);
-
-static inline int vbox_bo_reserve(struct vbox_bo *bo, bool no_wait)
-{
-	int ret;
-
-	ret = ttm_bo_reserve(&bo->bo, true, no_wait, NULL);
-	if (ret) {
-		if (ret != -ERESTARTSYS && ret != -EBUSY)
-			DRM_ERROR("reserve failed %p\n", bo);
-		return ret;
-	}
-	return 0;
-}
-
-static inline void vbox_bo_unreserve(struct vbox_bo *bo)
-{
-	ttm_bo_unreserve(&bo->bo);
-}
-
-void vbox_ttm_placement(struct vbox_bo *bo, int domain);
-int vbox_bo_push_sysram(struct vbox_bo *bo);
-int vbox_mmap(struct file *filp, struct vm_area_struct *vma);
-void *vbox_bo_kmap(struct vbox_bo *bo);
-void vbox_bo_kunmap(struct vbox_bo *bo);
-
 /* vbox_prime.c */
 int vbox_gem_prime_pin(struct drm_gem_object *obj);
 void vbox_gem_prime_unpin(struct drm_gem_object *obj);
diff --git a/drivers/gpu/drm/vboxvideo/vbox_fb.c b/drivers/gpu/drm/vboxvideo/vbox_fb.c
index b724fe7c0c30..8f74bcffc034 100644
--- a/drivers/gpu/drm/vboxvideo/vbox_fb.c
+++ b/drivers/gpu/drm/vboxvideo/vbox_fb.c
@@ -51,9 +51,9 @@ int vboxfb_create(struct drm_fb_helper *helper,
 	struct drm_framebuffer *fb;
 	struct fb_info *info;
 	struct drm_gem_object *gobj;
-	struct vbox_bo *bo;
+	struct drm_gem_vram_object *gbo;
 	int size, ret;
-	u64 gpu_addr;
+	s64 gpu_addr;
 	u32 pitch;
 
 	mode_cmd.width = sizes->surface_width;
@@ -75,9 +75,9 @@ int vboxfb_create(struct drm_fb_helper *helper,
 	if (ret)
 		return ret;
 
-	bo = gem_to_vbox_bo(gobj);
+	gbo = drm_gem_vram_of_gem(gobj);
 
-	ret = vbox_bo_pin(bo, TTM_PL_FLAG_VRAM);
+	ret = drm_gem_vram_pin(gbo, DRM_GEM_VRAM_PL_FLAG_VRAM);
 	if (ret)
 		return ret;
 
@@ -86,7 +86,7 @@ int vboxfb_create(struct drm_fb_helper *helper,
 		return PTR_ERR(info);
 
 	info->screen_size = size;
-	info->screen_base = (char __iomem *)vbox_bo_kmap(bo);
+	info->screen_base = (char __iomem *)drm_gem_vram_kmap(gbo, true, NULL);
 	if (IS_ERR(info->screen_base))
 		return PTR_ERR(info->screen_base);
 
@@ -104,7 +104,9 @@ int vboxfb_create(struct drm_fb_helper *helper,
 
 	drm_fb_helper_fill_info(info, helper, sizes);
 
-	gpu_addr = vbox_bo_gpu_offset(bo);
+	gpu_addr = drm_gem_vram_offset(gbo);
+	if (gpu_addr < 0)
+		return (int)gpu_addr;
 	info->fix.smem_start = info->apertures->ranges[0].base + gpu_addr;
 	info->fix.smem_len = vbox->available_vram_size - gpu_addr;
 
@@ -132,12 +134,10 @@ void vbox_fbdev_fini(struct vbox_private *vbox)
 	drm_fb_helper_unregister_fbi(&vbox->fb_helper);
 
 	if (afb->obj) {
-		struct vbox_bo *bo = gem_to_vbox_bo(afb->obj);
+		struct drm_gem_vram_object *gbo = drm_gem_vram_of_gem(afb->obj);
 
-		vbox_bo_kunmap(bo);
-
-		if (bo->pin_count)
-			vbox_bo_unpin(bo);
+		drm_gem_vram_kunmap(gbo);
+		drm_gem_vram_unpin(gbo);
 
 		drm_gem_object_put_unlocked(afb->obj);
 		afb->obj = NULL;
diff --git a/drivers/gpu/drm/vboxvideo/vbox_main.c b/drivers/gpu/drm/vboxvideo/vbox_main.c
index f4d02de5518a..18693e2bf72a 100644
--- a/drivers/gpu/drm/vboxvideo/vbox_main.c
+++ b/drivers/gpu/drm/vboxvideo/vbox_main.c
@@ -274,7 +274,7 @@ void vbox_hw_fini(struct vbox_private *vbox)
 int vbox_gem_create(struct vbox_private *vbox,
 		    u32 size, bool iskernel, struct drm_gem_object **obj)
 {
-	struct vbox_bo *vboxbo;
+	struct drm_gem_vram_object *gbo;
 	int ret;
 
 	*obj = NULL;
@@ -283,79 +283,16 @@ int vbox_gem_create(struct vbox_private *vbox,
 	if (size == 0)
 		return -EINVAL;
 
-	ret = vbox_bo_create(vbox, size, 0, 0, &vboxbo);
-	if (ret) {
+	gbo = drm_gem_vram_create(&vbox->ddev, &vbox->ddev.vram_mm->bdev,
+				  size, 0, false);
+	if (IS_ERR(gbo)) {
+		ret = PTR_ERR(gbo);
 		if (ret != -ERESTARTSYS)
 			DRM_ERROR("failed to allocate GEM object\n");
 		return ret;
 	}
 
-	*obj = &vboxbo->gem;
-
-	return 0;
-}
-
-int vbox_dumb_create(struct drm_file *file,
-		     struct drm_device *dev, struct drm_mode_create_dumb *args)
-{
-	struct vbox_private *vbox =
-		container_of(dev, struct vbox_private, ddev);
-	struct drm_gem_object *gobj;
-	u32 handle;
-	int ret;
-
-	args->pitch = args->width * ((args->bpp + 7) / 8);
-	args->size = args->pitch * args->height;
-
-	ret = vbox_gem_create(vbox, args->size, false, &gobj);
-	if (ret)
-		return ret;
-
-	ret = drm_gem_handle_create(file, gobj, &handle);
-	drm_gem_object_put_unlocked(gobj);
-	if (ret)
-		return ret;
-
-	args->handle = handle;
+	*obj = &gbo->gem;
 
 	return 0;
 }
-
-void vbox_gem_free_object(struct drm_gem_object *obj)
-{
-	struct vbox_bo *vbox_bo = gem_to_vbox_bo(obj);
-
-	ttm_bo_put(&vbox_bo->bo);
-}
-
-static inline u64 vbox_bo_mmap_offset(struct vbox_bo *bo)
-{
-	return drm_vma_node_offset_addr(&bo->bo.vma_node);
-}
-
-int
-vbox_dumb_mmap_offset(struct drm_file *file,
-		      struct drm_device *dev,
-		      u32 handle, u64 *offset)
-{
-	struct drm_gem_object *obj;
-	int ret;
-	struct vbox_bo *bo;
-
-	mutex_lock(&dev->struct_mutex);
-	obj = drm_gem_object_lookup(file, handle);
-	if (!obj) {
-		ret = -ENOENT;
-		goto out_unlock;
-	}
-
-	bo = gem_to_vbox_bo(obj);
-	*offset = vbox_bo_mmap_offset(bo);
-
-	drm_gem_object_put(obj);
-	ret = 0;
-
-out_unlock:
-	mutex_unlock(&dev->struct_mutex);
-	return ret;
-}
diff --git a/drivers/gpu/drm/vboxvideo/vbox_mode.c b/drivers/gpu/drm/vboxvideo/vbox_mode.c
index 58cea131470e..e1e48ba919eb 100644
--- a/drivers/gpu/drm/vboxvideo/vbox_mode.c
+++ b/drivers/gpu/drm/vboxvideo/vbox_mode.c
@@ -172,7 +172,8 @@ static void vbox_crtc_set_base_and_mode(struct drm_crtc *crtc,
 					struct drm_framebuffer *fb,
 					int x, int y)
 {
-	struct vbox_bo *bo = gem_to_vbox_bo(to_vbox_framebuffer(fb)->obj);
+	struct drm_gem_vram_object *gbo =
+		drm_gem_vram_of_gem(to_vbox_framebuffer(fb)->obj);
 	struct vbox_private *vbox = crtc->dev->dev_private;
 	struct vbox_crtc *vbox_crtc = to_vbox_crtc(crtc);
 	bool needs_modeset = drm_atomic_crtc_needs_modeset(crtc->state);
@@ -186,7 +187,7 @@ static void vbox_crtc_set_base_and_mode(struct drm_crtc *crtc,
 
 	vbox_crtc->x = x;
 	vbox_crtc->y = y;
-	vbox_crtc->fb_offset = vbox_bo_gpu_offset(bo);
+	vbox_crtc->fb_offset = drm_gem_vram_offset(gbo);
 
 	/* vbox_do_modeset() checks vbox->single_framebuffer so update it now */
 	if (needs_modeset && vbox_set_up_input_mapping(vbox)) {
@@ -302,14 +303,14 @@ static void vbox_primary_atomic_disable(struct drm_plane *plane,
 static int vbox_primary_prepare_fb(struct drm_plane *plane,
 				   struct drm_plane_state *new_state)
 {
-	struct vbox_bo *bo;
+	struct drm_gem_vram_object *gbo;
 	int ret;
 
 	if (!new_state->fb)
 		return 0;
 
-	bo = gem_to_vbox_bo(to_vbox_framebuffer(new_state->fb)->obj);
-	ret = vbox_bo_pin(bo, TTM_PL_FLAG_VRAM);
+	gbo = drm_gem_vram_of_gem(to_vbox_framebuffer(new_state->fb)->obj);
+	ret = drm_gem_vram_pin(gbo, DRM_GEM_VRAM_PL_FLAG_VRAM);
 	if (ret)
 		DRM_WARN("Error %d pinning new fb, out of video mem?\n", ret);
 
@@ -319,13 +320,13 @@ static int vbox_primary_prepare_fb(struct drm_plane *plane,
 static void vbox_primary_cleanup_fb(struct drm_plane *plane,
 				    struct drm_plane_state *old_state)
 {
-	struct vbox_bo *bo;
+	struct drm_gem_vram_object *gbo;
 
 	if (!old_state->fb)
 		return;
 
-	bo = gem_to_vbox_bo(to_vbox_framebuffer(old_state->fb)->obj);
-	vbox_bo_unpin(bo);
+	gbo = drm_gem_vram_of_gem(to_vbox_framebuffer(old_state->fb)->obj);
+	drm_gem_vram_unpin(gbo);
 }
 
 static int vbox_cursor_atomic_check(struct drm_plane *plane,
@@ -385,7 +386,8 @@ static void vbox_cursor_atomic_update(struct drm_plane *plane,
 		container_of(plane->dev, struct vbox_private, ddev);
 	struct vbox_crtc *vbox_crtc = to_vbox_crtc(plane->state->crtc);
 	struct drm_framebuffer *fb = plane->state->fb;
-	struct vbox_bo *bo = gem_to_vbox_bo(to_vbox_framebuffer(fb)->obj);
+	struct drm_gem_vram_object *gbo =
+		drm_gem_vram_of_gem(to_vbox_framebuffer(fb)->obj);
 	u32 width = plane->state->crtc_w;
 	u32 height = plane->state->crtc_h;
 	size_t data_size, mask_size;
@@ -404,7 +406,7 @@ static void vbox_cursor_atomic_update(struct drm_plane *plane,
 	vbox_crtc->cursor_enabled = true;
 
 	/* pinning is done in prepare/cleanup framebuffer */
-	src = vbox_bo_kmap(bo);
+	src = drm_gem_vram_kmap(gbo, true, NULL);
 	if (IS_ERR(src)) {
 		mutex_unlock(&vbox->hw_mutex);
 		DRM_WARN("Could not kmap cursor bo, skipping update\n");
@@ -420,7 +422,7 @@ static void vbox_cursor_atomic_update(struct drm_plane *plane,
 	data_size = width * height * 4 + mask_size;
 
 	copy_cursor_image(src, vbox->cursor_data, width, height, mask_size);
-	vbox_bo_kunmap(bo);
+	drm_gem_vram_kunmap(gbo);
 
 	flags = VBOX_MOUSE_POINTER_VISIBLE | VBOX_MOUSE_POINTER_SHAPE |
 		VBOX_MOUSE_POINTER_ALPHA;
@@ -460,25 +462,25 @@ static void vbox_cursor_atomic_disable(struct drm_plane *plane,
 static int vbox_cursor_prepare_fb(struct drm_plane *plane,
 				  struct drm_plane_state *new_state)
 {
-	struct vbox_bo *bo;
+	struct drm_gem_vram_object *gbo;
 
 	if (!new_state->fb)
 		return 0;
 
-	bo = gem_to_vbox_bo(to_vbox_framebuffer(new_state->fb)->obj);
-	return vbox_bo_pin(bo, TTM_PL_FLAG_SYSTEM);
+	gbo = drm_gem_vram_of_gem(to_vbox_framebuffer(new_state->fb)->obj);
+	return drm_gem_vram_pin(gbo, DRM_GEM_VRAM_PL_FLAG_SYSTEM);
 }
 
 static void vbox_cursor_cleanup_fb(struct drm_plane *plane,
 				   struct drm_plane_state *old_state)
 {
-	struct vbox_bo *bo;
+	struct drm_gem_vram_object *gbo;
 
 	if (!plane->state->fb)
 		return;
 
-	bo = gem_to_vbox_bo(to_vbox_framebuffer(plane->state->fb)->obj);
-	vbox_bo_unpin(bo);
+	gbo = drm_gem_vram_of_gem(to_vbox_framebuffer(plane->state->fb)->obj);
+	drm_gem_vram_unpin(gbo);
 }
 
 static const u32 vbox_cursor_plane_formats[] = {
diff --git a/drivers/gpu/drm/vboxvideo/vbox_ttm.c b/drivers/gpu/drm/vboxvideo/vbox_ttm.c
index 9d78438c2877..b82595a9ed0f 100644
--- a/drivers/gpu/drm/vboxvideo/vbox_ttm.c
+++ b/drivers/gpu/drm/vboxvideo/vbox_ttm.c
@@ -8,167 +8,23 @@
  */
 #include <linux/pci.h>
 #include <drm/drm_file.h>
-#include <drm/ttm/ttm_page_alloc.h>
 #include "vbox_drv.h"
 
-static inline struct vbox_private *vbox_bdev(struct ttm_bo_device *bd)
-{
-	return container_of(bd, struct vbox_private, ttm.bdev);
-}
-
-static void vbox_bo_ttm_destroy(struct ttm_buffer_object *tbo)
-{
-	struct vbox_bo *bo;
-
-	bo = container_of(tbo, struct vbox_bo, bo);
-
-	drm_gem_object_release(&bo->gem);
-	kfree(bo);
-}
-
-static bool vbox_ttm_bo_is_vbox_bo(struct ttm_buffer_object *bo)
-{
-	if (bo->destroy == &vbox_bo_ttm_destroy)
-		return true;
-
-	return false;
-}
-
-static int
-vbox_bo_init_mem_type(struct ttm_bo_device *bdev, u32 type,
-		      struct ttm_mem_type_manager *man)
-{
-	switch (type) {
-	case TTM_PL_SYSTEM:
-		man->flags = TTM_MEMTYPE_FLAG_MAPPABLE;
-		man->available_caching = TTM_PL_MASK_CACHING;
-		man->default_caching = TTM_PL_FLAG_CACHED;
-		break;
-	case TTM_PL_VRAM:
-		man->func = &ttm_bo_manager_func;
-		man->flags = TTM_MEMTYPE_FLAG_FIXED | TTM_MEMTYPE_FLAG_MAPPABLE;
-		man->available_caching = TTM_PL_FLAG_UNCACHED | TTM_PL_FLAG_WC;
-		man->default_caching = TTM_PL_FLAG_WC;
-		break;
-	default:
-		DRM_ERROR("Unsupported memory type %u\n", (unsigned int)type);
-		return -EINVAL;
-	}
-
-	return 0;
-}
-
-static void
-vbox_bo_evict_flags(struct ttm_buffer_object *bo, struct ttm_placement *pl)
-{
-	struct vbox_bo *vboxbo = vbox_bo(bo);
-
-	if (!vbox_ttm_bo_is_vbox_bo(bo))
-		return;
-
-	vbox_ttm_placement(vboxbo, TTM_PL_FLAG_SYSTEM);
-	*pl = vboxbo->placement;
-}
-
-static int vbox_bo_verify_access(struct ttm_buffer_object *bo,
-				 struct file *filp)
-{
-	return 0;
-}
-
-static int vbox_ttm_io_mem_reserve(struct ttm_bo_device *bdev,
-				   struct ttm_mem_reg *mem)
-{
-	struct ttm_mem_type_manager *man = &bdev->man[mem->mem_type];
-	struct vbox_private *vbox = vbox_bdev(bdev);
-
-	mem->bus.addr = NULL;
-	mem->bus.offset = 0;
-	mem->bus.size = mem->num_pages << PAGE_SHIFT;
-	mem->bus.base = 0;
-	mem->bus.is_iomem = false;
-	if (!(man->flags & TTM_MEMTYPE_FLAG_MAPPABLE))
-		return -EINVAL;
-	switch (mem->mem_type) {
-	case TTM_PL_SYSTEM:
-		/* system memory */
-		return 0;
-	case TTM_PL_VRAM:
-		mem->bus.offset = mem->start << PAGE_SHIFT;
-		mem->bus.base = pci_resource_start(vbox->ddev.pdev, 0);
-		mem->bus.is_iomem = true;
-		break;
-	default:
-		return -EINVAL;
-	}
-	return 0;
-}
-
-static void vbox_ttm_io_mem_free(struct ttm_bo_device *bdev,
-				 struct ttm_mem_reg *mem)
-{
-}
-
-static void vbox_ttm_backend_destroy(struct ttm_tt *tt)
-{
-	ttm_tt_fini(tt);
-	kfree(tt);
-}
-
-static struct ttm_backend_func vbox_tt_backend_func = {
-	.destroy = &vbox_ttm_backend_destroy,
-};
-
-static struct ttm_tt *vbox_ttm_tt_create(struct ttm_buffer_object *bo,
-					 u32 page_flags)
-{
-	struct ttm_tt *tt;
-
-	tt = kzalloc(sizeof(*tt), GFP_KERNEL);
-	if (!tt)
-		return NULL;
-
-	tt->func = &vbox_tt_backend_func;
-	if (ttm_tt_init(tt, bo, page_flags)) {
-		kfree(tt);
-		return NULL;
-	}
-
-	return tt;
-}
-
-static struct ttm_bo_driver vbox_bo_driver = {
-	.ttm_tt_create = vbox_ttm_tt_create,
-	.init_mem_type = vbox_bo_init_mem_type,
-	.eviction_valuable = ttm_bo_eviction_valuable,
-	.evict_flags = vbox_bo_evict_flags,
-	.verify_access = vbox_bo_verify_access,
-	.io_mem_reserve = &vbox_ttm_io_mem_reserve,
-	.io_mem_free = &vbox_ttm_io_mem_free,
-};
-
 int vbox_mm_init(struct vbox_private *vbox)
 {
+	struct drm_vram_mm *vmm;
 	int ret;
 	struct drm_device *dev = &vbox->ddev;
-	struct ttm_bo_device *bdev = &vbox->ttm.bdev;
 
-	ret = ttm_bo_device_init(&vbox->ttm.bdev,
-				 &vbox_bo_driver,
-				 dev->anon_inode->i_mapping,
-				 true);
-	if (ret) {
-		DRM_ERROR("Error initialising bo driver; %d\n", ret);
+	vmm = drm_vram_helper_alloc_mm(dev, pci_resource_start(dev->pdev, 0),
+				       vbox->available_vram_size,
+				       &drm_gem_vram_mm_funcs);
+	if (IS_ERR(vmm)) {
+		ret = PTR_ERR(vmm);
+		DRM_ERROR("Error initializing VRAM MM; %d\n", ret);
 		return ret;
 	}
 
-	ret = ttm_bo_init_mm(bdev, TTM_PL_VRAM,
-			     vbox->available_vram_size >> PAGE_SHIFT);
-	if (ret) {
-		DRM_ERROR("Failed ttm VRAM init: %d\n", ret);
-		goto err_device_release;
-	}
-
 #ifdef DRM_MTRR_WC
 	vbox->fb_mtrr = drm_mtrr_add(pci_resource_start(dev->pdev, 0),
 				     pci_resource_len(dev->pdev, 0),
@@ -178,10 +34,6 @@ int vbox_mm_init(struct vbox_private *vbox)
 					 pci_resource_len(dev->pdev, 0));
 #endif
 	return 0;
-
-err_device_release:
-	ttm_bo_device_release(&vbox->ttm.bdev);
-	return ret;
 }
 
 void vbox_mm_fini(struct vbox_private *vbox)
@@ -193,196 +45,5 @@ void vbox_mm_fini(struct vbox_private *vbox)
 #else
 	arch_phys_wc_del(vbox->fb_mtrr);
 #endif
-	ttm_bo_device_release(&vbox->ttm.bdev);
-}
-
-void vbox_ttm_placement(struct vbox_bo *bo, int domain)
-{
-	unsigned int i;
-	u32 c = 0;
-
-	bo->placement.placement = bo->placements;
-	bo->placement.busy_placement = bo->placements;
-
-	if (domain & TTM_PL_FLAG_VRAM)
-		bo->placements[c++].flags =
-		    TTM_PL_FLAG_WC | TTM_PL_FLAG_UNCACHED | TTM_PL_FLAG_VRAM;
-	if (domain & TTM_PL_FLAG_SYSTEM)
-		bo->placements[c++].flags =
-		    TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM;
-	if (!c)
-		bo->placements[c++].flags =
-		    TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM;
-
-	bo->placement.num_placement = c;
-	bo->placement.num_busy_placement = c;
-
-	for (i = 0; i < c; ++i) {
-		bo->placements[i].fpfn = 0;
-		bo->placements[i].lpfn = 0;
-	}
-}
-
-int vbox_bo_create(struct vbox_private *vbox, int size, int align,
-		   u32 flags, struct vbox_bo **pvboxbo)
-{
-	struct vbox_bo *vboxbo;
-	size_t acc_size;
-	int ret;
-
-	vboxbo = kzalloc(sizeof(*vboxbo), GFP_KERNEL);
-	if (!vboxbo)
-		return -ENOMEM;
-
-	ret = drm_gem_object_init(&vbox->ddev, &vboxbo->gem, size);
-	if (ret)
-		goto err_free_vboxbo;
-
-	vboxbo->bo.bdev = &vbox->ttm.bdev;
-
-	vbox_ttm_placement(vboxbo, TTM_PL_FLAG_VRAM | TTM_PL_FLAG_SYSTEM);
-
-	acc_size = ttm_bo_dma_acc_size(&vbox->ttm.bdev, size,
-				       sizeof(struct vbox_bo));
-
-	ret = ttm_bo_init(&vbox->ttm.bdev, &vboxbo->bo, size,
-			  ttm_bo_type_device, &vboxbo->placement,
-			  align >> PAGE_SHIFT, false, acc_size,
-			  NULL, NULL, vbox_bo_ttm_destroy);
-	if (ret)
-		goto err_free_vboxbo;
-
-	*pvboxbo = vboxbo;
-
-	return 0;
-
-err_free_vboxbo:
-	kfree(vboxbo);
-	return ret;
-}
-
-int vbox_bo_pin(struct vbox_bo *bo, u32 pl_flag)
-{
-	struct ttm_operation_ctx ctx = { false, false };
-	int i, ret;
-
-	if (bo->pin_count) {
-		bo->pin_count++;
-		return 0;
-	}
-
-	ret = vbox_bo_reserve(bo, false);
-	if (ret)
-		return ret;
-
-	vbox_ttm_placement(bo, pl_flag);
-
-	for (i = 0; i < bo->placement.num_placement; i++)
-		bo->placements[i].flags |= TTM_PL_FLAG_NO_EVICT;
-
-	ret = ttm_bo_validate(&bo->bo, &bo->placement, &ctx);
-	if (ret == 0)
-		bo->pin_count = 1;
-
-	vbox_bo_unreserve(bo);
-
-	return ret;
-}
-
-int vbox_bo_unpin(struct vbox_bo *bo)
-{
-	struct ttm_operation_ctx ctx = { false, false };
-	int i, ret;
-
-	if (!bo->pin_count) {
-		DRM_ERROR("unpin bad %p\n", bo);
-		return 0;
-	}
-	bo->pin_count--;
-	if (bo->pin_count)
-		return 0;
-
-	ret = vbox_bo_reserve(bo, false);
-	if (ret) {
-		DRM_ERROR("Error %d reserving bo, leaving it pinned\n", ret);
-		return ret;
-	}
-
-	for (i = 0; i < bo->placement.num_placement; i++)
-		bo->placements[i].flags &= ~TTM_PL_FLAG_NO_EVICT;
-
-	ret = ttm_bo_validate(&bo->bo, &bo->placement, &ctx);
-
-	vbox_bo_unreserve(bo);
-
-	return ret;
-}
-
-/*
- * Move a vbox-owned buffer object to system memory if no one else has it
- * pinned.  The caller must have pinned it previously, and this call will
- * release the caller's pin.
- */
-int vbox_bo_push_sysram(struct vbox_bo *bo)
-{
-	struct ttm_operation_ctx ctx = { false, false };
-	int i, ret;
-
-	if (!bo->pin_count) {
-		DRM_ERROR("unpin bad %p\n", bo);
-		return 0;
-	}
-	bo->pin_count--;
-	if (bo->pin_count)
-		return 0;
-
-	if (bo->kmap.virtual) {
-		ttm_bo_kunmap(&bo->kmap);
-		bo->kmap.virtual = NULL;
-	}
-
-	vbox_ttm_placement(bo, TTM_PL_FLAG_SYSTEM);
-
-	for (i = 0; i < bo->placement.num_placement; i++)
-		bo->placements[i].flags |= TTM_PL_FLAG_NO_EVICT;
-
-	ret = ttm_bo_validate(&bo->bo, &bo->placement, &ctx);
-	if (ret) {
-		DRM_ERROR("pushing to VRAM failed\n");
-		return ret;
-	}
-
-	return 0;
-}
-
-int vbox_mmap(struct file *filp, struct vm_area_struct *vma)
-{
-	struct drm_file *file_priv = filp->private_data;
-	struct vbox_private *vbox = file_priv->minor->dev->dev_private;
-
-	return ttm_bo_mmap(filp, vma, &vbox->ttm.bdev);
-}
-
-void *vbox_bo_kmap(struct vbox_bo *bo)
-{
-	int ret;
-
-	if (bo->kmap.virtual)
-		return bo->kmap.virtual;
-
-	ret = ttm_bo_kmap(&bo->bo, 0, bo->bo.num_pages, &bo->kmap);
-	if (ret) {
-		DRM_ERROR("Error kmapping bo: %d\n", ret);
-		return NULL;
-	}
-
-	return bo->kmap.virtual;
-}
-
-void vbox_bo_kunmap(struct vbox_bo *bo)
-{
-	if (bo->kmap.virtual) {
-		ttm_bo_kunmap(&bo->kmap);
-		bo->kmap.virtual = NULL;
-	}
+	drm_vram_helper_release_mm(&vbox->ddev);
 }
diff --git a/drivers/gpu/drm/vc4/vc4_bo.c b/drivers/gpu/drm/vc4/vc4_bo.c
index 88ebd681d7eb..1434bb829267 100644
--- a/drivers/gpu/drm/vc4/vc4_bo.c
+++ b/drivers/gpu/drm/vc4/vc4_bo.c
@@ -799,13 +799,36 @@ vc4_prime_import_sg_table(struct drm_device *dev,
 	return obj;
 }
 
+static int vc4_grab_bin_bo(struct vc4_dev *vc4, struct vc4_file *vc4file)
+{
+	int ret;
+
+	if (!vc4->v3d)
+		return -ENODEV;
+
+	if (vc4file->bin_bo_used)
+		return 0;
+
+	ret = vc4_v3d_bin_bo_get(vc4, &vc4file->bin_bo_used);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
 int vc4_create_bo_ioctl(struct drm_device *dev, void *data,
 			struct drm_file *file_priv)
 {
 	struct drm_vc4_create_bo *args = data;
+	struct vc4_file *vc4file = file_priv->driver_priv;
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
 	struct vc4_bo *bo = NULL;
 	int ret;
 
+	ret = vc4_grab_bin_bo(vc4, vc4file);
+	if (ret)
+		return ret;
+
 	/*
 	 * We can't allocate from the BO cache, because the BOs don't
 	 * get zeroed, and that might leak data between users.
@@ -846,6 +869,8 @@ vc4_create_shader_bo_ioctl(struct drm_device *dev, void *data,
 			   struct drm_file *file_priv)
 {
 	struct drm_vc4_create_shader_bo *args = data;
+	struct vc4_file *vc4file = file_priv->driver_priv;
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
 	struct vc4_bo *bo = NULL;
 	int ret;
 
@@ -865,6 +890,10 @@ vc4_create_shader_bo_ioctl(struct drm_device *dev, void *data,
 		return -EINVAL;
 	}
 
+	ret = vc4_grab_bin_bo(vc4, vc4file);
+	if (ret)
+		return ret;
+
 	bo = vc4_bo_create(dev, args->size, true, VC4_BO_TYPE_V3D_SHADER);
 	if (IS_ERR(bo))
 		return PTR_ERR(bo);
@@ -894,7 +923,7 @@ vc4_create_shader_bo_ioctl(struct drm_device *dev, void *data,
 	 */
 	ret = drm_gem_handle_create(file_priv, &bo->base.base, &args->handle);
 
- fail:
+fail:
 	drm_gem_object_put_unlocked(&bo->base.base);
 
 	return ret;
diff --git a/drivers/gpu/drm/vc4/vc4_debugfs.c b/drivers/gpu/drm/vc4/vc4_debugfs.c
index f9dec08267dc..f9b46911fa50 100644
--- a/drivers/gpu/drm/vc4/vc4_debugfs.c
+++ b/drivers/gpu/drm/vc4/vc4_debugfs.c
@@ -29,13 +29,9 @@ vc4_debugfs_init(struct drm_minor *minor)
 {
 	struct vc4_dev *vc4 = to_vc4_dev(minor->dev);
 	struct vc4_debugfs_info_entry *entry;
-	struct dentry *dentry;
 
-	dentry = debugfs_create_bool("hvs_load_tracker", S_IRUGO | S_IWUSR,
-				     minor->debugfs_root,
-				     &vc4->load_tracker_enabled);
-	if (!dentry)
-		return -ENOMEM;
+	debugfs_create_bool("hvs_load_tracker", S_IRUGO | S_IWUSR,
+			    minor->debugfs_root, &vc4->load_tracker_enabled);
 
 	list_for_each_entry(entry, &vc4->debugfs_list, link) {
 		int ret = drm_debugfs_create_files(&entry->info, 1,
diff --git a/drivers/gpu/drm/vc4/vc4_drv.c b/drivers/gpu/drm/vc4/vc4_drv.c
index 6d9be20a32be..0f99ad03614e 100644
--- a/drivers/gpu/drm/vc4/vc4_drv.c
+++ b/drivers/gpu/drm/vc4/vc4_drv.c
@@ -128,8 +128,12 @@ static int vc4_open(struct drm_device *dev, struct drm_file *file)
 
 static void vc4_close(struct drm_device *dev, struct drm_file *file)
 {
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
 	struct vc4_file *vc4file = file->driver_priv;
 
+	if (vc4file->bin_bo_used)
+		vc4_v3d_bin_bo_put(vc4);
+
 	vc4_perfmon_close_file(vc4file);
 	kfree(vc4file);
 }
@@ -274,6 +278,8 @@ static int vc4_drm_bind(struct device *dev)
 	drm->dev_private = vc4;
 	INIT_LIST_HEAD(&vc4->debugfs_list);
 
+	mutex_init(&vc4->bin_bo_lock);
+
 	ret = vc4_bo_cache_init(drm);
 	if (ret)
 		goto dev_put;
diff --git a/drivers/gpu/drm/vc4/vc4_drv.h b/drivers/gpu/drm/vc4/vc4_drv.h
index 4f13f6262491..9170a24ec5f5 100644
--- a/drivers/gpu/drm/vc4/vc4_drv.h
+++ b/drivers/gpu/drm/vc4/vc4_drv.h
@@ -216,6 +216,11 @@ struct vc4_dev {
 	 * the minor is available (after drm_dev_register()).
 	 */
 	struct list_head debugfs_list;
+
+	/* Mutex for binner bo allocation. */
+	struct mutex bin_bo_lock;
+	/* Reference count for our binner bo. */
+	struct kref bin_bo_kref;
 };
 
 static inline struct vc4_dev *
@@ -584,6 +589,11 @@ struct vc4_exec_info {
 	 * NULL otherwise.
 	 */
 	struct vc4_perfmon *perfmon;
+
+	/* Whether the exec has taken a reference to the binner BO, which should
+	 * happen with a VC4_PACKET_TILE_BINNING_MODE_CONFIG packet.
+	 */
+	bool bin_bo_used;
 };
 
 /* Per-open file private data. Any driver-specific resource that has to be
@@ -594,6 +604,8 @@ struct vc4_file {
 		struct idr idr;
 		struct mutex lock;
 	} perfmon;
+
+	bool bin_bo_used;
 };
 
 static inline struct vc4_exec_info *
@@ -833,6 +845,8 @@ void vc4_plane_async_set_fb(struct drm_plane *plane,
 extern struct platform_driver vc4_v3d_driver;
 extern const struct of_device_id vc4_v3d_dt_match[];
 int vc4_v3d_get_bin_slot(struct vc4_dev *vc4);
+int vc4_v3d_bin_bo_get(struct vc4_dev *vc4, bool *used);
+void vc4_v3d_bin_bo_put(struct vc4_dev *vc4);
 int vc4_v3d_pm_get(struct vc4_dev *vc4);
 void vc4_v3d_pm_put(struct vc4_dev *vc4);
 
diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c
index d9311be32a4f..84795d928f20 100644
--- a/drivers/gpu/drm/vc4/vc4_gem.c
+++ b/drivers/gpu/drm/vc4/vc4_gem.c
@@ -820,6 +820,7 @@ static int
 vc4_get_bcl(struct drm_device *dev, struct vc4_exec_info *exec)
 {
 	struct drm_vc4_submit_cl *args = exec->args;
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
 	void *temp = NULL;
 	void *bin;
 	int ret = 0;
@@ -918,6 +919,12 @@ vc4_get_bcl(struct drm_device *dev, struct vc4_exec_info *exec)
 	if (ret)
 		goto fail;
 
+	if (exec->found_tile_binning_mode_config_packet) {
+		ret = vc4_v3d_bin_bo_get(vc4, &exec->bin_bo_used);
+		if (ret)
+			goto fail;
+	}
+
 	/* Block waiting on any previous rendering into the CS's VBO,
 	 * IB, or textures, so that pixels are actually written by the
 	 * time we try to read them.
@@ -966,6 +973,10 @@ vc4_complete_exec(struct drm_device *dev, struct vc4_exec_info *exec)
 	vc4->bin_alloc_used &= ~exec->bin_slots;
 	spin_unlock_irqrestore(&vc4->job_lock, irqflags);
 
+	/* Release the reference on the binner BO if needed. */
+	if (exec->bin_bo_used)
+		vc4_v3d_bin_bo_put(vc4);
+
 	/* Release the reference we had on the perf monitor. */
 	vc4_perfmon_put(exec->perfmon);
 
diff --git a/drivers/gpu/drm/vc4/vc4_hdmi.c b/drivers/gpu/drm/vc4/vc4_hdmi.c
index 99fc8569e0f5..43442c5619a3 100644
--- a/drivers/gpu/drm/vc4/vc4_hdmi.c
+++ b/drivers/gpu/drm/vc4/vc4_hdmi.c
@@ -255,11 +255,17 @@ static int vc4_hdmi_connector_get_modes(struct drm_connector *connector)
 	return ret;
 }
 
+static void vc4_hdmi_connector_reset(struct drm_connector *connector)
+{
+	drm_atomic_helper_connector_reset(connector);
+	drm_atomic_helper_connector_tv_reset(connector);
+}
+
 static const struct drm_connector_funcs vc4_hdmi_connector_funcs = {
 	.detect = vc4_hdmi_connector_detect,
 	.fill_modes = drm_helper_probe_single_connector_modes,
 	.destroy = vc4_hdmi_connector_destroy,
-	.reset = drm_atomic_helper_connector_reset,
+	.reset = vc4_hdmi_connector_reset,
 	.atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state,
 	.atomic_destroy_state = drm_atomic_helper_connector_destroy_state,
 };
diff --git a/drivers/gpu/drm/vc4/vc4_irq.c b/drivers/gpu/drm/vc4/vc4_irq.c
index ffd0a4388752..e226c24e543f 100644
--- a/drivers/gpu/drm/vc4/vc4_irq.c
+++ b/drivers/gpu/drm/vc4/vc4_irq.c
@@ -59,15 +59,22 @@ vc4_overflow_mem_work(struct work_struct *work)
 {
 	struct vc4_dev *vc4 =
 		container_of(work, struct vc4_dev, overflow_mem_work);
-	struct vc4_bo *bo = vc4->bin_bo;
+	struct vc4_bo *bo;
 	int bin_bo_slot;
 	struct vc4_exec_info *exec;
 	unsigned long irqflags;
 
+	mutex_lock(&vc4->bin_bo_lock);
+
+	if (!vc4->bin_bo)
+		goto complete;
+
+	bo = vc4->bin_bo;
+
 	bin_bo_slot = vc4_v3d_get_bin_slot(vc4);
 	if (bin_bo_slot < 0) {
 		DRM_ERROR("Couldn't allocate binner overflow mem\n");
-		return;
+		goto complete;
 	}
 
 	spin_lock_irqsave(&vc4->job_lock, irqflags);
@@ -98,6 +105,9 @@ vc4_overflow_mem_work(struct work_struct *work)
 	V3D_WRITE(V3D_INTCTL, V3D_INT_OUTOMEM);
 	V3D_WRITE(V3D_INTENA, V3D_INT_OUTOMEM);
 	spin_unlock_irqrestore(&vc4->job_lock, irqflags);
+
+complete:
+	mutex_unlock(&vc4->bin_bo_lock);
 }
 
 static void
@@ -249,8 +259,10 @@ vc4_irq_postinstall(struct drm_device *dev)
 	if (!vc4->v3d)
 		return 0;
 
-	/* Enable both the render done and out of memory interrupts. */
-	V3D_WRITE(V3D_INTENA, V3D_DRIVER_IRQS);
+	/* Enable the render done interrupts. The out-of-memory interrupt is
+	 * enabled as soon as we have a binner BO allocated.
+	 */
+	V3D_WRITE(V3D_INTENA, V3D_INT_FLDONE | V3D_INT_FRDONE);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/vc4/vc4_plane.c b/drivers/gpu/drm/vc4/vc4_plane.c
index afc80b245ea3..441e06d45c89 100644
--- a/drivers/gpu/drm/vc4/vc4_plane.c
+++ b/drivers/gpu/drm/vc4/vc4_plane.c
@@ -310,10 +310,10 @@ static int vc4_plane_setup_clipping_and_scaling(struct drm_plane_state *state)
 	struct drm_framebuffer *fb = state->fb;
 	struct drm_gem_cma_object *bo = drm_fb_cma_get_gem_obj(fb, 0);
 	u32 subpixel_src_mask = (1 << 16) - 1;
-	u32 format = fb->format->format;
 	int num_planes = fb->format->num_planes;
 	struct drm_crtc_state *crtc_state;
-	u32 h_subsample, v_subsample;
+	u32 h_subsample = fb->format->hsub;
+	u32 v_subsample = fb->format->vsub;
 	int i, ret;
 
 	crtc_state = drm_atomic_get_existing_crtc_state(state->state,
@@ -328,9 +328,6 @@ static int vc4_plane_setup_clipping_and_scaling(struct drm_plane_state *state)
 	if (ret)
 		return ret;
 
-	h_subsample = drm_format_horz_chroma_subsampling(format);
-	v_subsample = drm_format_vert_chroma_subsampling(format);
-
 	for (i = 0; i < num_planes; i++)
 		vc4_state->offsets[i] = bo->paddr + fb->offsets[i];
 
@@ -592,8 +589,9 @@ static int vc4_plane_mode_set(struct drm_plane *plane,
 	u32 ctl0_offset = vc4_state->dlist_count;
 	const struct hvs_format *format = vc4_get_hvs_format(fb->format->format);
 	u64 base_format_mod = fourcc_mod_broadcom_mod(fb->modifier);
-	int num_planes = drm_format_num_planes(format->drm);
-	u32 h_subsample, v_subsample;
+	int num_planes = fb->format->num_planes;
+	u32 h_subsample = fb->format->hsub;
+	u32 v_subsample = fb->format->vsub;
 	bool mix_plane_alpha;
 	bool covers_screen;
 	u32 scl0, scl1, pitch0;
@@ -623,9 +621,6 @@ static int vc4_plane_mode_set(struct drm_plane *plane,
 		scl1 = vc4_get_scl_field(state, 0);
 	}
 
-	h_subsample = drm_format_horz_chroma_subsampling(format->drm);
-	v_subsample = drm_format_vert_chroma_subsampling(format->drm);
-
 	rotation = drm_rotation_simplify(state->rotation,
 					 DRM_MODE_ROTATE_0 |
 					 DRM_MODE_REFLECT_X |
diff --git a/drivers/gpu/drm/vc4/vc4_txp.c b/drivers/gpu/drm/vc4/vc4_txp.c
index c8b89a78f9f4..96f91c1b4b6e 100644
--- a/drivers/gpu/drm/vc4/vc4_txp.c
+++ b/drivers/gpu/drm/vc4/vc4_txp.c
@@ -221,17 +221,18 @@ static const u32 txp_fmts[] = {
 };
 
 static int vc4_txp_connector_atomic_check(struct drm_connector *conn,
-					struct drm_connector_state *conn_state)
+					  struct drm_atomic_state *state)
 {
+	struct drm_connector_state *conn_state;
 	struct drm_crtc_state *crtc_state;
 	struct drm_framebuffer *fb;
 	int i;
 
+	conn_state = drm_atomic_get_new_connector_state(state, conn);
 	if (!conn_state->writeback_job || !conn_state->writeback_job->fb)
 		return 0;
 
-	crtc_state = drm_atomic_get_new_crtc_state(conn_state->state,
-						   conn_state->crtc);
+	crtc_state = drm_atomic_get_new_crtc_state(state, conn_state->crtc);
 
 	fb = conn_state->writeback_job->fb;
 	if (fb->width != crtc_state->mode.hdisplay ||
diff --git a/drivers/gpu/drm/vc4/vc4_v3d.c b/drivers/gpu/drm/vc4/vc4_v3d.c
index a4b6859e3af6..0533646a4d13 100644
--- a/drivers/gpu/drm/vc4/vc4_v3d.c
+++ b/drivers/gpu/drm/vc4/vc4_v3d.c
@@ -213,7 +213,7 @@ try_again:
 }
 
 /**
- * vc4_allocate_bin_bo() - allocates the memory that will be used for
+ * bin_bo_alloc() - allocates the memory that will be used for
  * tile binning.
  *
  * The binner has a limitation that the addresses in the tile state
@@ -234,14 +234,16 @@ try_again:
  * overall CMA pool before they make scenes complicated enough to run
  * out of bin space.
  */
-static int vc4_allocate_bin_bo(struct drm_device *drm)
+static int bin_bo_alloc(struct vc4_dev *vc4)
 {
-	struct vc4_dev *vc4 = to_vc4_dev(drm);
 	struct vc4_v3d *v3d = vc4->v3d;
 	uint32_t size = 16 * 1024 * 1024;
 	int ret = 0;
 	struct list_head list;
 
+	if (!v3d)
+		return -ENODEV;
+
 	/* We may need to try allocating more than once to get a BO
 	 * that doesn't cross 256MB.  Track the ones we've allocated
 	 * that failed so far, so that we can free them when we've got
@@ -251,7 +253,7 @@ static int vc4_allocate_bin_bo(struct drm_device *drm)
 	INIT_LIST_HEAD(&list);
 
 	while (true) {
-		struct vc4_bo *bo = vc4_bo_create(drm, size, true,
+		struct vc4_bo *bo = vc4_bo_create(vc4->dev, size, true,
 						  VC4_BO_TYPE_BIN);
 
 		if (IS_ERR(bo)) {
@@ -292,6 +294,14 @@ static int vc4_allocate_bin_bo(struct drm_device *drm)
 			WARN_ON_ONCE(sizeof(vc4->bin_alloc_used) * 8 !=
 				     bo->base.base.size / vc4->bin_alloc_size);
 
+			kref_init(&vc4->bin_bo_kref);
+
+			/* Enable the out-of-memory interrupt to set our
+			 * newly-allocated binner BO, potentially from an
+			 * already-pending-but-masked interrupt.
+			 */
+			V3D_WRITE(V3D_INTENA, V3D_INT_OUTOMEM);
+
 			break;
 		}
 
@@ -311,6 +321,47 @@ static int vc4_allocate_bin_bo(struct drm_device *drm)
 	return ret;
 }
 
+int vc4_v3d_bin_bo_get(struct vc4_dev *vc4, bool *used)
+{
+	int ret = 0;
+
+	mutex_lock(&vc4->bin_bo_lock);
+
+	if (used && *used)
+		goto complete;
+
+	if (vc4->bin_bo)
+		kref_get(&vc4->bin_bo_kref);
+	else
+		ret = bin_bo_alloc(vc4);
+
+	if (ret == 0 && used)
+		*used = true;
+
+complete:
+	mutex_unlock(&vc4->bin_bo_lock);
+
+	return ret;
+}
+
+static void bin_bo_release(struct kref *ref)
+{
+	struct vc4_dev *vc4 = container_of(ref, struct vc4_dev, bin_bo_kref);
+
+	if (WARN_ON_ONCE(!vc4->bin_bo))
+		return;
+
+	drm_gem_object_put_unlocked(&vc4->bin_bo->base.base);
+	vc4->bin_bo = NULL;
+}
+
+void vc4_v3d_bin_bo_put(struct vc4_dev *vc4)
+{
+	mutex_lock(&vc4->bin_bo_lock);
+	kref_put(&vc4->bin_bo_kref, bin_bo_release);
+	mutex_unlock(&vc4->bin_bo_lock);
+}
+
 #ifdef CONFIG_PM
 static int vc4_v3d_runtime_suspend(struct device *dev)
 {
@@ -319,9 +370,6 @@ static int vc4_v3d_runtime_suspend(struct device *dev)
 
 	vc4_irq_uninstall(vc4->dev);
 
-	drm_gem_object_put_unlocked(&vc4->bin_bo->base.base);
-	vc4->bin_bo = NULL;
-
 	clk_disable_unprepare(v3d->clk);
 
 	return 0;
@@ -333,10 +381,6 @@ static int vc4_v3d_runtime_resume(struct device *dev)
 	struct vc4_dev *vc4 = v3d->vc4;
 	int ret;
 
-	ret = vc4_allocate_bin_bo(vc4->dev);
-	if (ret)
-		return ret;
-
 	ret = clk_prepare_enable(v3d->clk);
 	if (ret != 0)
 		return ret;
@@ -403,12 +447,6 @@ static int vc4_v3d_bind(struct device *dev, struct device *master, void *data)
 	if (ret != 0)
 		return ret;
 
-	ret = vc4_allocate_bin_bo(drm);
-	if (ret) {
-		clk_disable_unprepare(v3d->clk);
-		return ret;
-	}
-
 	/* Reset the binner overflow address/size at setup, to be sure
 	 * we don't reuse an old one.
 	 */
diff --git a/drivers/gpu/drm/virtio/Makefile b/drivers/gpu/drm/virtio/Makefile
index 4e90cc8fa651..458e606a936f 100644
--- a/drivers/gpu/drm/virtio/Makefile
+++ b/drivers/gpu/drm/virtio/Makefile
@@ -4,8 +4,8 @@
 # Direct Rendering Infrastructure (DRI) in XFree86 4.1.0 and higher.
 
 virtio-gpu-y := virtgpu_drv.o virtgpu_kms.o virtgpu_gem.o \
-	virtgpu_fb.o virtgpu_display.o virtgpu_vq.o virtgpu_ttm.o \
+	virtgpu_display.o virtgpu_vq.o virtgpu_ttm.o \
 	virtgpu_fence.o virtgpu_object.o virtgpu_debugfs.o virtgpu_plane.o \
-	virtgpu_ioctl.o virtgpu_prime.o
+	virtgpu_ioctl.o virtgpu_prime.o virtgpu_trace_points.o
 
 obj-$(CONFIG_DRM_VIRTIO_GPU) += virtio-gpu.o
diff --git a/drivers/gpu/drm/virtio/virtgpu_display.c b/drivers/gpu/drm/virtio/virtgpu_display.c
index 86843a4d6102..ba16e8cb7124 100644
--- a/drivers/gpu/drm/virtio/virtgpu_display.c
+++ b/drivers/gpu/drm/virtio/virtgpu_display.c
@@ -29,6 +29,7 @@
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_gem_framebuffer_helper.h>
 #include <drm/drm_probe_helper.h>
+#include <drm/drm_damage_helper.h>
 
 #define XRES_MIN    32
 #define YRES_MIN    32
@@ -49,23 +50,10 @@ static const struct drm_crtc_funcs virtio_gpu_crtc_funcs = {
 	.atomic_destroy_state   = drm_atomic_helper_crtc_destroy_state,
 };
 
-static int
-virtio_gpu_framebuffer_surface_dirty(struct drm_framebuffer *fb,
-				     struct drm_file *file_priv,
-				     unsigned int flags, unsigned int color,
-				     struct drm_clip_rect *clips,
-				     unsigned int num_clips)
-{
-	struct virtio_gpu_framebuffer *virtio_gpu_fb
-		= to_virtio_gpu_framebuffer(fb);
-
-	return virtio_gpu_surface_dirty(virtio_gpu_fb, clips, num_clips);
-}
-
 static const struct drm_framebuffer_funcs virtio_gpu_fb_funcs = {
 	.create_handle = drm_gem_fb_create_handle,
 	.destroy = drm_gem_fb_destroy,
-	.dirty = virtio_gpu_framebuffer_surface_dirty,
+	.dirty = drm_atomic_helper_dirtyfb,
 };
 
 int
@@ -85,10 +73,6 @@ virtio_gpu_framebuffer_init(struct drm_device *dev,
 		vgfb->base.obj[0] = NULL;
 		return ret;
 	}
-
-	spin_lock_init(&vgfb->dirty_lock);
-	vgfb->x1 = vgfb->y1 = INT_MAX;
-	vgfb->x2 = vgfb->y2 = 0;
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.h b/drivers/gpu/drm/virtio/virtgpu_drv.h
index b69ae10ca238..9e2d3062b01d 100644
--- a/drivers/gpu/drm/virtio/virtgpu_drv.h
+++ b/drivers/gpu/drm/virtio/virtgpu_drv.h
@@ -102,7 +102,6 @@ struct virtio_gpu_fence {
 	struct dma_fence f;
 	struct virtio_gpu_fence_driver *drv;
 	struct list_head node;
-	uint64_t seq;
 };
 #define to_virtio_fence(x) \
 	container_of(x, struct virtio_gpu_fence, f)
@@ -143,9 +142,6 @@ struct virtio_gpu_output {
 
 struct virtio_gpu_framebuffer {
 	struct drm_framebuffer base;
-	int x1, y1, x2, y2; /* dirty rect */
-	spinlock_t dirty_lock;
-	uint32_t hw_res_handle;
 	struct virtio_gpu_fence *fence;
 };
 #define to_virtio_gpu_framebuffer(x) \
@@ -255,10 +251,6 @@ int virtio_gpu_mode_dumb_mmap(struct drm_file *file_priv,
 			      struct drm_device *dev,
 			      uint32_t handle, uint64_t *offset_p);
 
-/* virtio_fb */
-int virtio_gpu_surface_dirty(struct virtio_gpu_framebuffer *qfb,
-			     struct drm_clip_rect *clips,
-			     unsigned int num_clips);
 /* virtio vg */
 int virtio_gpu_alloc_vbufs(struct virtio_gpu_device *vgdev);
 void virtio_gpu_free_vbufs(struct virtio_gpu_device *vgdev);
@@ -356,7 +348,7 @@ int virtio_gpu_mmap(struct file *filp, struct vm_area_struct *vma);
 bool virtio_fence_signaled(struct dma_fence *f);
 struct virtio_gpu_fence *virtio_gpu_fence_alloc(
 	struct virtio_gpu_device *vgdev);
-int virtio_gpu_fence_emit(struct virtio_gpu_device *vgdev,
+void virtio_gpu_fence_emit(struct virtio_gpu_device *vgdev,
 			  struct virtio_gpu_ctrl_hdr *cmd_hdr,
 			  struct virtio_gpu_fence *fence);
 void virtio_gpu_fence_event_process(struct virtio_gpu_device *vdev,
diff --git a/drivers/gpu/drm/virtio/virtgpu_fb.c b/drivers/gpu/drm/virtio/virtgpu_fb.c
deleted file mode 100644
index b07584b1c2bf..000000000000
--- a/drivers/gpu/drm/virtio/virtgpu_fb.c
+++ /dev/null
@@ -1,150 +0,0 @@
-/*
- * Copyright (C) 2015 Red Hat, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining
- * a copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial
- * portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
- * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#include <drm/drmP.h>
-#include <drm/drm_fb_helper.h>
-#include "virtgpu_drv.h"
-
-static int virtio_gpu_dirty_update(struct virtio_gpu_framebuffer *fb,
-				   bool store, int x, int y,
-				   int width, int height)
-{
-	struct drm_device *dev = fb->base.dev;
-	struct virtio_gpu_device *vgdev = dev->dev_private;
-	bool store_for_later = false;
-	int bpp = fb->base.format->cpp[0];
-	int x2, y2;
-	unsigned long flags;
-	struct virtio_gpu_object *obj = gem_to_virtio_gpu_obj(fb->base.obj[0]);
-
-	if ((width <= 0) ||
-	    (x + width > fb->base.width) ||
-	    (y + height > fb->base.height)) {
-		DRM_DEBUG("values out of range %dx%d+%d+%d, fb %dx%d\n",
-			  width, height, x, y,
-			  fb->base.width, fb->base.height);
-		return -EINVAL;
-	}
-
-	/*
-	 * Can be called with pretty much any context (console output
-	 * path).  If we are in atomic just store the dirty rect info
-	 * to send out the update later.
-	 *
-	 * Can't test inside spin lock.
-	 */
-	if (in_atomic() || store)
-		store_for_later = true;
-
-	x2 = x + width - 1;
-	y2 = y + height - 1;
-
-	spin_lock_irqsave(&fb->dirty_lock, flags);
-
-	if (fb->y1 < y)
-		y = fb->y1;
-	if (fb->y2 > y2)
-		y2 = fb->y2;
-	if (fb->x1 < x)
-		x = fb->x1;
-	if (fb->x2 > x2)
-		x2 = fb->x2;
-
-	if (store_for_later) {
-		fb->x1 = x;
-		fb->x2 = x2;
-		fb->y1 = y;
-		fb->y2 = y2;
-		spin_unlock_irqrestore(&fb->dirty_lock, flags);
-		return 0;
-	}
-
-	fb->x1 = fb->y1 = INT_MAX;
-	fb->x2 = fb->y2 = 0;
-
-	spin_unlock_irqrestore(&fb->dirty_lock, flags);
-
-	{
-		uint32_t offset;
-		uint32_t w = x2 - x + 1;
-		uint32_t h = y2 - y + 1;
-
-		offset = (y * fb->base.pitches[0]) + x * bpp;
-
-		virtio_gpu_cmd_transfer_to_host_2d(vgdev, obj,
-						   offset,
-						   cpu_to_le32(w),
-						   cpu_to_le32(h),
-						   cpu_to_le32(x),
-						   cpu_to_le32(y),
-						   NULL);
-
-	}
-	virtio_gpu_cmd_resource_flush(vgdev, obj->hw_res_handle,
-				      x, y, x2 - x + 1, y2 - y + 1);
-	return 0;
-}
-
-int virtio_gpu_surface_dirty(struct virtio_gpu_framebuffer *vgfb,
-			     struct drm_clip_rect *clips,
-			     unsigned int num_clips)
-{
-	struct virtio_gpu_device *vgdev = vgfb->base.dev->dev_private;
-	struct virtio_gpu_object *obj = gem_to_virtio_gpu_obj(vgfb->base.obj[0]);
-	struct drm_clip_rect norect;
-	struct drm_clip_rect *clips_ptr;
-	int left, right, top, bottom;
-	int i;
-	int inc = 1;
-
-	if (!num_clips) {
-		num_clips = 1;
-		clips = &norect;
-		norect.x1 = norect.y1 = 0;
-		norect.x2 = vgfb->base.width;
-		norect.y2 = vgfb->base.height;
-	}
-	left = clips->x1;
-	right = clips->x2;
-	top = clips->y1;
-	bottom = clips->y2;
-
-	/* skip the first clip rect */
-	for (i = 1, clips_ptr = clips + inc;
-	     i < num_clips; i++, clips_ptr += inc) {
-		left = min_t(int, left, (int)clips_ptr->x1);
-		right = max_t(int, right, (int)clips_ptr->x2);
-		top = min_t(int, top, (int)clips_ptr->y1);
-		bottom = max_t(int, bottom, (int)clips_ptr->y2);
-	}
-
-	if (obj->dumb)
-		return virtio_gpu_dirty_update(vgfb, false, left, top,
-					       right - left, bottom - top);
-
-	virtio_gpu_cmd_resource_flush(vgdev, obj->hw_res_handle,
-				      left, top, right - left, bottom - top);
-	return 0;
-}
diff --git a/drivers/gpu/drm/virtio/virtgpu_fence.c b/drivers/gpu/drm/virtio/virtgpu_fence.c
index 87d1966192f4..70d6c4329778 100644
--- a/drivers/gpu/drm/virtio/virtgpu_fence.c
+++ b/drivers/gpu/drm/virtio/virtgpu_fence.c
@@ -24,6 +24,7 @@
  */
 
 #include <drm/drmP.h>
+#include <trace/events/dma_fence.h>
 #include "virtgpu_drv.h"
 
 static const char *virtio_get_driver_name(struct dma_fence *f)
@@ -40,16 +41,14 @@ bool virtio_fence_signaled(struct dma_fence *f)
 {
 	struct virtio_gpu_fence *fence = to_virtio_fence(f);
 
-	if (atomic64_read(&fence->drv->last_seq) >= fence->seq)
+	if (atomic64_read(&fence->drv->last_seq) >= fence->f.seqno)
 		return true;
 	return false;
 }
 
 static void virtio_fence_value_str(struct dma_fence *f, char *str, int size)
 {
-	struct virtio_gpu_fence *fence = to_virtio_fence(f);
-
-	snprintf(str, size, "%llu", fence->seq);
+	snprintf(str, size, "%llu", f->seqno);
 }
 
 static void virtio_timeline_value_str(struct dma_fence *f, char *str, int size)
@@ -71,17 +70,22 @@ struct virtio_gpu_fence *virtio_gpu_fence_alloc(struct virtio_gpu_device *vgdev)
 {
 	struct virtio_gpu_fence_driver *drv = &vgdev->fence_drv;
 	struct virtio_gpu_fence *fence = kzalloc(sizeof(struct virtio_gpu_fence),
-							GFP_ATOMIC);
+							GFP_KERNEL);
 	if (!fence)
 		return fence;
 
 	fence->drv = drv;
+
+	/* This only partially initializes the fence because the seqno is
+	 * unknown yet.  The fence must not be used outside of the driver
+	 * until virtio_gpu_fence_emit is called.
+	 */
 	dma_fence_init(&fence->f, &virtio_fence_ops, &drv->lock, drv->context, 0);
 
 	return fence;
 }
 
-int virtio_gpu_fence_emit(struct virtio_gpu_device *vgdev,
+void virtio_gpu_fence_emit(struct virtio_gpu_device *vgdev,
 			  struct virtio_gpu_ctrl_hdr *cmd_hdr,
 			  struct virtio_gpu_fence *fence)
 {
@@ -89,14 +93,15 @@ int virtio_gpu_fence_emit(struct virtio_gpu_device *vgdev,
 	unsigned long irq_flags;
 
 	spin_lock_irqsave(&drv->lock, irq_flags);
-	fence->seq = ++drv->sync_seq;
+	fence->f.seqno = ++drv->sync_seq;
 	dma_fence_get(&fence->f);
 	list_add_tail(&fence->node, &drv->fences);
 	spin_unlock_irqrestore(&drv->lock, irq_flags);
 
+	trace_dma_fence_emit(&fence->f);
+
 	cmd_hdr->flags |= cpu_to_le32(VIRTIO_GPU_FLAG_FENCE);
-	cmd_hdr->fence_id = cpu_to_le64(fence->seq);
-	return 0;
+	cmd_hdr->fence_id = cpu_to_le64(fence->f.seqno);
 }
 
 void virtio_gpu_fence_event_process(struct virtio_gpu_device *vgdev,
@@ -109,7 +114,7 @@ void virtio_gpu_fence_event_process(struct virtio_gpu_device *vgdev,
 	spin_lock_irqsave(&drv->lock, irq_flags);
 	atomic64_set(&vgdev->fence_drv.last_seq, last_seq);
 	list_for_each_entry_safe(fence, tmp, &drv->fences, node) {
-		if (last_seq < fence->seq)
+		if (last_seq < fence->f.seqno)
 			continue;
 		dma_fence_signal_locked(&fence->f);
 		list_del(&fence->node);
diff --git a/drivers/gpu/drm/virtio/virtgpu_ioctl.c b/drivers/gpu/drm/virtio/virtgpu_ioctl.c
index 949a264985fc..ac60be9b5c19 100644
--- a/drivers/gpu/drm/virtio/virtgpu_ioctl.c
+++ b/drivers/gpu/drm/virtio/virtgpu_ioctl.c
@@ -63,7 +63,7 @@ int virtio_gpu_object_list_validate(struct ww_acquire_ctx *ticket,
 	struct virtio_gpu_object *qobj;
 	int ret;
 
-	ret = ttm_eu_reserve_buffers(ticket, head, true, NULL);
+	ret = ttm_eu_reserve_buffers(ticket, head, true, NULL, true);
 	if (ret != 0)
 		return ret;
 
@@ -168,7 +168,7 @@ static int virtio_gpu_execbuffer_ioctl(struct drm_device *dev, void *data,
 			goto out_unused_fd;
 		}
 
-		user_bo_handles = (void __user *)(uintptr_t)exbuf->bo_handles;
+		user_bo_handles = u64_to_user_ptr(exbuf->bo_handles);
 		if (copy_from_user(bo_handles, user_bo_handles,
 				   exbuf->num_bo_handles * sizeof(uint32_t))) {
 			ret = -EFAULT;
@@ -195,8 +195,7 @@ static int virtio_gpu_execbuffer_ioctl(struct drm_device *dev, void *data,
 	if (ret)
 		goto out_free;
 
-	buf = memdup_user((void __user *)(uintptr_t)exbuf->command,
-			  exbuf->size);
+	buf = memdup_user(u64_to_user_ptr(exbuf->command), exbuf->size);
 	if (IS_ERR(buf)) {
 		ret = PTR_ERR(buf);
 		goto out_unresv;
@@ -263,10 +262,9 @@ static int virtio_gpu_getparam_ioctl(struct drm_device *dev, void *data,
 	default:
 		return -EINVAL;
 	}
-	if (copy_to_user((void __user *)(unsigned long)param->value,
-			 &value, sizeof(int))) {
+	if (copy_to_user(u64_to_user_ptr(param->value), &value, sizeof(int)))
 		return -EFAULT;
-	}
+
 	return 0;
 }
 
@@ -526,7 +524,6 @@ static int virtio_gpu_get_caps_ioctl(struct drm_device *dev,
 	list_for_each_entry(cache_ent, &vgdev->cap_cache, head) {
 		if (cache_ent->id == args->cap_set_id &&
 		    cache_ent->version == args->cap_set_ver) {
-			ptr = cache_ent->caps_cache;
 			spin_unlock(&vgdev->display_info_lock);
 			goto copy_exit;
 		}
@@ -537,15 +534,18 @@ static int virtio_gpu_get_caps_ioctl(struct drm_device *dev,
 	virtio_gpu_cmd_get_capset(vgdev, found_valid, args->cap_set_ver,
 				  &cache_ent);
 
+copy_exit:
 	ret = wait_event_timeout(vgdev->resp_wq,
 				 atomic_read(&cache_ent->is_valid), 5 * HZ);
 	if (!ret)
 		return -EBUSY;
 
+	/* is_valid check must proceed before copy of the cache entry. */
+	smp_rmb();
+
 	ptr = cache_ent->caps_cache;
 
-copy_exit:
-	if (copy_to_user((void __user *)(unsigned long)args->addr, ptr, size))
+	if (copy_to_user(u64_to_user_ptr(args->addr), ptr, size))
 		return -EFAULT;
 
 	return 0;
@@ -553,34 +553,34 @@ copy_exit:
 
 struct drm_ioctl_desc virtio_gpu_ioctls[DRM_VIRTIO_NUM_IOCTLS] = {
 	DRM_IOCTL_DEF_DRV(VIRTGPU_MAP, virtio_gpu_map_ioctl,
-			  DRM_AUTH | DRM_UNLOCKED | DRM_RENDER_ALLOW),
+			  DRM_AUTH | DRM_RENDER_ALLOW),
 
 	DRM_IOCTL_DEF_DRV(VIRTGPU_EXECBUFFER, virtio_gpu_execbuffer_ioctl,
-			  DRM_AUTH | DRM_UNLOCKED | DRM_RENDER_ALLOW),
+			  DRM_AUTH | DRM_RENDER_ALLOW),
 
 	DRM_IOCTL_DEF_DRV(VIRTGPU_GETPARAM, virtio_gpu_getparam_ioctl,
-			  DRM_AUTH | DRM_UNLOCKED | DRM_RENDER_ALLOW),
+			  DRM_AUTH | DRM_RENDER_ALLOW),
 
 	DRM_IOCTL_DEF_DRV(VIRTGPU_RESOURCE_CREATE,
 			  virtio_gpu_resource_create_ioctl,
-			  DRM_AUTH | DRM_UNLOCKED | DRM_RENDER_ALLOW),
+			  DRM_AUTH | DRM_RENDER_ALLOW),
 
 	DRM_IOCTL_DEF_DRV(VIRTGPU_RESOURCE_INFO, virtio_gpu_resource_info_ioctl,
-			  DRM_AUTH | DRM_UNLOCKED | DRM_RENDER_ALLOW),
+			  DRM_AUTH | DRM_RENDER_ALLOW),
 
 	/* make transfer async to the main ring? - no sure, can we
 	 * thread these in the underlying GL
 	 */
 	DRM_IOCTL_DEF_DRV(VIRTGPU_TRANSFER_FROM_HOST,
 			  virtio_gpu_transfer_from_host_ioctl,
-			  DRM_AUTH | DRM_UNLOCKED | DRM_RENDER_ALLOW),
+			  DRM_AUTH | DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(VIRTGPU_TRANSFER_TO_HOST,
 			  virtio_gpu_transfer_to_host_ioctl,
-			  DRM_AUTH | DRM_UNLOCKED | DRM_RENDER_ALLOW),
+			  DRM_AUTH | DRM_RENDER_ALLOW),
 
 	DRM_IOCTL_DEF_DRV(VIRTGPU_WAIT, virtio_gpu_wait_ioctl,
-			  DRM_AUTH | DRM_UNLOCKED | DRM_RENDER_ALLOW),
+			  DRM_AUTH | DRM_RENDER_ALLOW),
 
 	DRM_IOCTL_DEF_DRV(VIRTGPU_GET_CAPS, virtio_gpu_get_caps_ioctl,
-			  DRM_AUTH | DRM_UNLOCKED | DRM_RENDER_ALLOW),
+			  DRM_AUTH | DRM_RENDER_ALLOW),
 };
diff --git a/drivers/gpu/drm/virtio/virtgpu_trace.h b/drivers/gpu/drm/virtio/virtgpu_trace.h
new file mode 100644
index 000000000000..711ecc2bd241
--- /dev/null
+++ b/drivers/gpu/drm/virtio/virtgpu_trace.h
@@ -0,0 +1,52 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#if !defined(_VIRTGPU_TRACE_H_) || defined(TRACE_HEADER_MULTI_READ)
+#define _VIRTGPU_TRACE_H_
+
+#include <linux/tracepoint.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM virtio_gpu
+#define TRACE_INCLUDE_FILE virtgpu_trace
+
+DECLARE_EVENT_CLASS(virtio_gpu_cmd,
+	TP_PROTO(struct virtqueue *vq, struct virtio_gpu_ctrl_hdr *hdr),
+	TP_ARGS(vq, hdr),
+	TP_STRUCT__entry(
+			 __field(int, dev)
+			 __field(unsigned int, vq)
+			 __field(const char *, name)
+			 __field(u32, type)
+			 __field(u32, flags)
+			 __field(u64, fence_id)
+			 __field(u32, ctx_id)
+			 ),
+	TP_fast_assign(
+		       __entry->dev = vq->vdev->index;
+		       __entry->vq = vq->index;
+		       __entry->name = vq->name;
+		       __entry->type = le32_to_cpu(hdr->type);
+		       __entry->flags = le32_to_cpu(hdr->flags);
+		       __entry->fence_id = le64_to_cpu(hdr->fence_id);
+		       __entry->ctx_id = le32_to_cpu(hdr->ctx_id);
+		       ),
+	TP_printk("vdev=%d vq=%u name=%s type=0x%x flags=0x%x fence_id=%llu ctx_id=%u",
+		  __entry->dev, __entry->vq, __entry->name,
+		  __entry->type, __entry->flags, __entry->fence_id,
+		  __entry->ctx_id)
+);
+
+DEFINE_EVENT(virtio_gpu_cmd, virtio_gpu_cmd_queue,
+	TP_PROTO(struct virtqueue *vq, struct virtio_gpu_ctrl_hdr *hdr),
+	TP_ARGS(vq, hdr)
+);
+
+DEFINE_EVENT(virtio_gpu_cmd, virtio_gpu_cmd_response,
+	TP_PROTO(struct virtqueue *vq, struct virtio_gpu_ctrl_hdr *hdr),
+	TP_ARGS(vq, hdr)
+);
+
+#endif
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH ../../drivers/gpu/drm/virtio
+#include <trace/define_trace.h>
diff --git a/drivers/gpu/drm/virtio/virtgpu_trace_points.c b/drivers/gpu/drm/virtio/virtgpu_trace_points.c
new file mode 100644
index 000000000000..1970cb6f24ef
--- /dev/null
+++ b/drivers/gpu/drm/virtio/virtgpu_trace_points.c
@@ -0,0 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "virtgpu_drv.h"
+
+#define CREATE_TRACE_POINTS
+#include "virtgpu_trace.h"
diff --git a/drivers/gpu/drm/virtio/virtgpu_vq.c b/drivers/gpu/drm/virtio/virtgpu_vq.c
index e62fe24b1a2e..6c1a90717535 100644
--- a/drivers/gpu/drm/virtio/virtgpu_vq.c
+++ b/drivers/gpu/drm/virtio/virtgpu_vq.c
@@ -28,6 +28,7 @@
 
 #include <drm/drmP.h>
 #include "virtgpu_drv.h"
+#include "virtgpu_trace.h"
 #include <linux/virtio.h>
 #include <linux/virtio_config.h>
 #include <linux/virtio_ring.h>
@@ -192,6 +193,9 @@ void virtio_gpu_dequeue_ctrl_func(struct work_struct *work)
 
 	list_for_each_entry_safe(entry, tmp, &reclaim_list, list) {
 		resp = (struct virtio_gpu_ctrl_hdr *)entry->resp_buf;
+
+		trace_virtio_gpu_cmd_response(vgdev->ctrlq.vq, resp);
+
 		if (resp->type != cpu_to_le32(VIRTIO_GPU_RESP_OK_NODATA)) {
 			if (resp->type >= cpu_to_le32(VIRTIO_GPU_RESP_ERR_UNSPEC)) {
 				struct virtio_gpu_ctrl_hdr *cmd;
@@ -284,6 +288,9 @@ retry:
 		spin_lock(&vgdev->ctrlq.qlock);
 		goto retry;
 	} else {
+		trace_virtio_gpu_cmd_queue(vq,
+			(struct virtio_gpu_ctrl_hdr *)vbuf->buf);
+
 		virtqueue_kick(vq);
 	}
 
@@ -359,6 +366,9 @@ retry:
 		spin_lock(&vgdev->cursorq.qlock);
 		goto retry;
 	} else {
+		trace_virtio_gpu_cmd_queue(vq,
+			(struct virtio_gpu_ctrl_hdr *)vbuf->buf);
+
 		virtqueue_kick(vq);
 	}
 
@@ -583,12 +593,14 @@ static void virtio_gpu_cmd_capset_cb(struct virtio_gpu_device *vgdev,
 		    cache_ent->id == le32_to_cpu(cmd->capset_id)) {
 			memcpy(cache_ent->caps_cache, resp->capset_data,
 			       cache_ent->size);
+			/* Copy must occur before is_valid is signalled. */
+			smp_wmb();
 			atomic_set(&cache_ent->is_valid, 1);
 			break;
 		}
 	}
 	spin_unlock(&vgdev->display_info_lock);
-	wake_up(&vgdev->resp_wq);
+	wake_up_all(&vgdev->resp_wq);
 }
 
 static int virtio_get_edid_block(void *data, u8 *buf,
@@ -684,8 +696,11 @@ int virtio_gpu_cmd_get_capset(struct virtio_gpu_device *vgdev,
 	struct virtio_gpu_vbuffer *vbuf;
 	int max_size;
 	struct virtio_gpu_drv_cap_cache *cache_ent;
+	struct virtio_gpu_drv_cap_cache *search_ent;
 	void *resp_buf;
 
+	*cache_p = NULL;
+
 	if (idx >= vgdev->num_capsets)
 		return -EINVAL;
 
@@ -716,9 +731,26 @@ int virtio_gpu_cmd_get_capset(struct virtio_gpu_device *vgdev,
 	atomic_set(&cache_ent->is_valid, 0);
 	cache_ent->size = max_size;
 	spin_lock(&vgdev->display_info_lock);
-	list_add_tail(&cache_ent->head, &vgdev->cap_cache);
+	/* Search while under lock in case it was added by another task. */
+	list_for_each_entry(search_ent, &vgdev->cap_cache, head) {
+		if (search_ent->id == vgdev->capsets[idx].id &&
+		    search_ent->version == version) {
+			*cache_p = search_ent;
+			break;
+		}
+	}
+	if (!*cache_p)
+		list_add_tail(&cache_ent->head, &vgdev->cap_cache);
 	spin_unlock(&vgdev->display_info_lock);
 
+	if (*cache_p) {
+		/* Entry was found, so free everything that was just created. */
+		kfree(resp_buf);
+		kfree(cache_ent->caps_cache);
+		kfree(cache_ent);
+		return 0;
+	}
+
 	cmd_p = virtio_gpu_alloc_cmd_resp
 		(vgdev, &virtio_gpu_cmd_capset_cb, &vbuf, sizeof(*cmd_p),
 		 sizeof(struct virtio_gpu_resp_capset) + max_size,
diff --git a/drivers/gpu/drm/vkms/vkms_crc.c b/drivers/gpu/drm/vkms/vkms_crc.c
index d7b409a3c0f8..e66ff25c008e 100644
--- a/drivers/gpu/drm/vkms/vkms_crc.c
+++ b/drivers/gpu/drm/vkms/vkms_crc.c
@@ -212,6 +212,15 @@ out:
 	spin_unlock_irqrestore(&out->state_lock, flags);
 }
 
+static const char * const pipe_crc_sources[] = {"auto"};
+
+const char *const *vkms_get_crc_sources(struct drm_crtc *crtc,
+					size_t *count)
+{
+	*count = ARRAY_SIZE(pipe_crc_sources);
+	return pipe_crc_sources;
+}
+
 static int vkms_crc_parse_source(const char *src_name, bool *enabled)
 {
 	int ret = 0;
diff --git a/drivers/gpu/drm/vkms/vkms_crtc.c b/drivers/gpu/drm/vkms/vkms_crtc.c
index bb66dbcd5e3f..4d11292bc6f3 100644
--- a/drivers/gpu/drm/vkms/vkms_crtc.c
+++ b/drivers/gpu/drm/vkms/vkms_crtc.c
@@ -15,6 +15,10 @@ static enum hrtimer_restart vkms_vblank_simulate(struct hrtimer *timer)
 
 	spin_lock(&output->lock);
 
+	ret_overrun = hrtimer_forward_now(&output->vblank_hrtimer,
+					  output->period_ns);
+	WARN_ON(ret_overrun != 1);
+
 	ret = drm_crtc_handle_vblank(crtc);
 	if (!ret)
 		DRM_ERROR("vkms failure on handling vblank");
@@ -35,10 +39,6 @@ static enum hrtimer_restart vkms_vblank_simulate(struct hrtimer *timer)
 			DRM_WARN("failed to queue vkms_crc_work_handle");
 	}
 
-	ret_overrun = hrtimer_forward_now(&output->vblank_hrtimer,
-					  output->period_ns);
-	WARN_ON(ret_overrun != 1);
-
 	spin_unlock(&output->lock);
 
 	return HRTIMER_RESTART;
@@ -74,33 +74,23 @@ bool vkms_get_vblank_timestamp(struct drm_device *dev, unsigned int pipe,
 {
 	struct vkms_device *vkmsdev = drm_device_to_vkms_device(dev);
 	struct vkms_output *output = &vkmsdev->output;
+	struct drm_vblank_crtc *vblank = &dev->vblank[pipe];
 
 	*vblank_time = output->vblank_hrtimer.node.expires;
 
-	if (!in_vblank_irq)
-		*vblank_time -= output->period_ns;
-
-	return true;
-}
-
-static void vkms_atomic_crtc_reset(struct drm_crtc *crtc)
-{
-	struct vkms_crtc_state *vkms_state = NULL;
+	if (WARN_ON(*vblank_time == vblank->time))
+		return true;
 
-	if (crtc->state) {
-		vkms_state = to_vkms_crtc_state(crtc->state);
-		__drm_atomic_helper_crtc_destroy_state(crtc->state);
-		kfree(vkms_state);
-		crtc->state = NULL;
-	}
-
-	vkms_state = kzalloc(sizeof(*vkms_state), GFP_KERNEL);
-	if (!vkms_state)
-		return;
-	INIT_WORK(&vkms_state->crc_work, vkms_crc_work_handle);
+	/*
+	 * To prevent races we roll the hrtimer forward before we do any
+	 * interrupt processing - this is how real hw works (the interrupt is
+	 * only generated after all the vblank registers are updated) and what
+	 * the vblank core expects. Therefore we need to always correct the
+	 * timestampe by one frame.
+	 */
+	*vblank_time -= output->period_ns;
 
-	crtc->state = &vkms_state->base;
-	crtc->state->crtc = crtc;
+	return true;
 }
 
 static struct drm_crtc_state *
@@ -135,6 +125,19 @@ static void vkms_atomic_crtc_destroy_state(struct drm_crtc *crtc,
 	}
 }
 
+static void vkms_atomic_crtc_reset(struct drm_crtc *crtc)
+{
+	struct vkms_crtc_state *vkms_state =
+		kzalloc(sizeof(*vkms_state), GFP_KERNEL);
+
+	if (crtc->state)
+		vkms_atomic_crtc_destroy_state(crtc, crtc->state);
+
+	__drm_atomic_helper_crtc_reset(crtc, &vkms_state->base);
+	if (vkms_state)
+		INIT_WORK(&vkms_state->crc_work, vkms_crc_work_handle);
+}
+
 static const struct drm_crtc_funcs vkms_crtc_funcs = {
 	.set_config             = drm_atomic_helper_set_config,
 	.destroy                = drm_crtc_cleanup,
@@ -144,6 +147,7 @@ static const struct drm_crtc_funcs vkms_crtc_funcs = {
 	.atomic_destroy_state   = vkms_atomic_crtc_destroy_state,
 	.enable_vblank		= vkms_enable_vblank,
 	.disable_vblank		= vkms_disable_vblank,
+	.get_crc_sources	= vkms_get_crc_sources,
 	.set_crc_source		= vkms_set_crc_source,
 	.verify_crc_source	= vkms_verify_crc_source,
 };
diff --git a/drivers/gpu/drm/vkms/vkms_drv.h b/drivers/gpu/drm/vkms/vkms_drv.h
index 81f1cfbeb936..b92c30c66a6f 100644
--- a/drivers/gpu/drm/vkms/vkms_drv.h
+++ b/drivers/gpu/drm/vkms/vkms_drv.h
@@ -20,14 +20,6 @@
 
 extern bool enable_cursor;
 
-static const u32 vkms_formats[] = {
-	DRM_FORMAT_XRGB8888,
-};
-
-static const u32 vkms_cursor_formats[] = {
-	DRM_FORMAT_ARGB8888,
-};
-
 struct vkms_crc_data {
 	struct drm_framebuffer fb;
 	struct drm_rect src, dst;
@@ -136,6 +128,8 @@ int vkms_gem_vmap(struct drm_gem_object *obj);
 void vkms_gem_vunmap(struct drm_gem_object *obj);
 
 /* CRC Support */
+const char *const *vkms_get_crc_sources(struct drm_crtc *crtc,
+					size_t *count);
 int vkms_set_crc_source(struct drm_crtc *crtc, const char *src_name);
 int vkms_verify_crc_source(struct drm_crtc *crtc, const char *source_name,
 			   size_t *values_cnt);
diff --git a/drivers/gpu/drm/vkms/vkms_output.c b/drivers/gpu/drm/vkms/vkms_output.c
index 3b162b25312e..56fb5c2a2315 100644
--- a/drivers/gpu/drm/vkms/vkms_output.c
+++ b/drivers/gpu/drm/vkms/vkms_output.c
@@ -6,7 +6,6 @@
 
 static void vkms_connector_destroy(struct drm_connector *connector)
 {
-	drm_connector_unregister(connector);
 	drm_connector_cleanup(connector);
 }
 
@@ -71,12 +70,6 @@ int vkms_output_init(struct vkms_device *vkmsdev)
 
 	drm_connector_helper_add(connector, &vkms_conn_helper_funcs);
 
-	ret = drm_connector_register(connector);
-	if (ret) {
-		DRM_ERROR("Failed to register connector\n");
-		goto err_connector_register;
-	}
-
 	ret = drm_encoder_init(dev, encoder, &vkms_encoder_funcs,
 			       DRM_MODE_ENCODER_VIRTUAL, NULL);
 	if (ret) {
@@ -99,9 +92,6 @@ err_attach:
 	drm_encoder_cleanup(encoder);
 
 err_encoder:
-	drm_connector_unregister(connector);
-
-err_connector_register:
 	drm_connector_cleanup(connector);
 
 err_connector:
diff --git a/drivers/gpu/drm/vkms/vkms_plane.c b/drivers/gpu/drm/vkms/vkms_plane.c
index 0e67d2d42f0c..0fceb6258422 100644
--- a/drivers/gpu/drm/vkms/vkms_plane.c
+++ b/drivers/gpu/drm/vkms/vkms_plane.c
@@ -6,6 +6,14 @@
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_gem_framebuffer_helper.h>
 
+static const u32 vkms_formats[] = {
+	DRM_FORMAT_XRGB8888,
+};
+
+static const u32 vkms_cursor_formats[] = {
+	DRM_FORMAT_ARGB8888,
+};
+
 static struct drm_plane_state *
 vkms_plane_duplicate_state(struct drm_plane *plane)
 {
diff --git a/drivers/gpu/drm/vmwgfx/Kconfig b/drivers/gpu/drm/vmwgfx/Kconfig
index 6b28a326f8bb..d5fd81a521f6 100644
--- a/drivers/gpu/drm/vmwgfx/Kconfig
+++ b/drivers/gpu/drm/vmwgfx/Kconfig
@@ -8,6 +8,7 @@ config DRM_VMWGFX
 	select FB_CFB_IMAGEBLIT
 	select DRM_TTM
 	select FB
+	select AS_DIRTY_HELPERS
 	# Only needed for the transitional use of drm_crtc_init - can be removed
 	# again once vmwgfx sets up the primary plane itself.
 	select DRM_KMS_HELPER
diff --git a/drivers/gpu/drm/vmwgfx/Makefile b/drivers/gpu/drm/vmwgfx/Makefile
index 8841bd30e1e5..c877a21a0739 100644
--- a/drivers/gpu/drm/vmwgfx/Makefile
+++ b/drivers/gpu/drm/vmwgfx/Makefile
@@ -8,7 +8,7 @@ vmwgfx-y := vmwgfx_execbuf.o vmwgfx_gmr.o vmwgfx_kms.o vmwgfx_drv.o \
 	    vmwgfx_cmdbuf_res.o vmwgfx_cmdbuf.o vmwgfx_stdu.o \
 	    vmwgfx_cotable.o vmwgfx_so.o vmwgfx_binding.o vmwgfx_msg.o \
 	    vmwgfx_simple_resource.o vmwgfx_va.o vmwgfx_blit.o \
-	    vmwgfx_validation.o \
+	    vmwgfx_validation.o vmwgfx_page_dirty.o \
 	    ttm_object.o ttm_lock.o
 
 obj-$(CONFIG_DRM_VMWGFX) := vmwgfx.o
diff --git a/drivers/gpu/drm/vmwgfx/device_include/svga3d_surfacedefs.h b/drivers/gpu/drm/vmwgfx/device_include/svga3d_surfacedefs.h
index f2bfd3d80598..61414f105c67 100644
--- a/drivers/gpu/drm/vmwgfx/device_include/svga3d_surfacedefs.h
+++ b/drivers/gpu/drm/vmwgfx/device_include/svga3d_surfacedefs.h
@@ -1280,7 +1280,6 @@ svga3dsurface_get_pixel_offset(SVGA3dSurfaceFormat format,
 	return offset;
 }
 
-
 static inline u32
 svga3dsurface_get_image_offset(SVGA3dSurfaceFormat format,
 			       surf_size_struct baseLevelSize,
@@ -1375,4 +1374,236 @@ svga3dsurface_is_screen_target_format(SVGA3dSurfaceFormat format)
 	return svga3dsurface_is_dx_screen_target_format(format);
 }
 
+/**
+ * struct svga3dsurface_mip - Mimpmap level information
+ * @bytes: Bytes required in the backing store of this mipmap level.
+ * @img_stride: Byte stride per image.
+ * @row_stride: Byte stride per block row.
+ * @size: The size of the mipmap.
+ */
+struct svga3dsurface_mip {
+	size_t bytes;
+	size_t img_stride;
+	size_t row_stride;
+	struct drm_vmw_size size;
+
+};
+
+/**
+ * struct svga3dsurface_cache - Cached surface information
+ * @desc: Pointer to the surface descriptor
+ * @mip: Array of mipmap level information. Valid size is @num_mip_levels.
+ * @mip_chain_bytes: Bytes required in the backing store for the whole chain
+ * of mip levels.
+ * @sheet_bytes: Bytes required in the backing store for a sheet
+ * representing a single sample.
+ * @num_mip_levels: Valid size of the @mip array. Number of mipmap levels in
+ * a chain.
+ * @num_layers: Number of slices in an array texture or number of faces in
+ * a cubemap texture.
+ */
+struct svga3dsurface_cache {
+	const struct svga3d_surface_desc *desc;
+	struct svga3dsurface_mip mip[DRM_VMW_MAX_MIP_LEVELS];
+	size_t mip_chain_bytes;
+	size_t sheet_bytes;
+	u32 num_mip_levels;
+	u32 num_layers;
+};
+
+/**
+ * struct svga3dsurface_loc - Surface location
+ * @sub_resource: Surface subresource. Defined as layer * num_mip_levels +
+ * mip_level.
+ * @x: X coordinate.
+ * @y: Y coordinate.
+ * @z: Z coordinate.
+ */
+struct svga3dsurface_loc {
+	u32 sub_resource;
+	u32 x, y, z;
+};
+
+/**
+ * svga3dsurface_subres - Compute the subresource from layer and mipmap.
+ * @cache: Surface layout data.
+ * @mip_level: The mipmap level.
+ * @layer: The surface layer (face or array slice).
+ *
+ * Return: The subresource.
+ */
+static inline u32 svga3dsurface_subres(const struct svga3dsurface_cache *cache,
+				       u32 mip_level, u32 layer)
+{
+	return cache->num_mip_levels * layer + mip_level;
+}
+
+/**
+ * svga3dsurface_setup_cache - Build a surface cache entry
+ * @size: The surface base level dimensions.
+ * @format: The surface format.
+ * @num_mip_levels: Number of mipmap levels.
+ * @num_layers: Number of layers.
+ * @cache: Pointer to a struct svga3dsurface_cach object to be filled in.
+ *
+ * Return: Zero on success, -EINVAL on invalid surface layout.
+ */
+static inline int svga3dsurface_setup_cache(const struct drm_vmw_size *size,
+					    SVGA3dSurfaceFormat format,
+					    u32 num_mip_levels,
+					    u32 num_layers,
+					    u32 num_samples,
+					    struct svga3dsurface_cache *cache)
+{
+	const struct svga3d_surface_desc *desc;
+	u32 i;
+
+	memset(cache, 0, sizeof(*cache));
+	cache->desc = desc = svga3dsurface_get_desc(format);
+	cache->num_mip_levels = num_mip_levels;
+	cache->num_layers = num_layers;
+	for (i = 0; i < cache->num_mip_levels; i++) {
+		struct svga3dsurface_mip *mip = &cache->mip[i];
+
+		mip->size = svga3dsurface_get_mip_size(*size, i);
+		mip->bytes = svga3dsurface_get_image_buffer_size
+			(desc, &mip->size, 0);
+		mip->row_stride =
+			__KERNEL_DIV_ROUND_UP(mip->size.width,
+					      desc->block_size.width) *
+			desc->bytes_per_block * num_samples;
+		if (!mip->row_stride)
+			goto invalid_dim;
+
+		mip->img_stride =
+			__KERNEL_DIV_ROUND_UP(mip->size.height,
+					      desc->block_size.height) *
+			mip->row_stride;
+		if (!mip->img_stride)
+			goto invalid_dim;
+
+		cache->mip_chain_bytes += mip->bytes;
+	}
+	cache->sheet_bytes = cache->mip_chain_bytes * num_layers;
+	if (!cache->sheet_bytes)
+		goto invalid_dim;
+
+	return 0;
+
+invalid_dim:
+	VMW_DEBUG_USER("Invalid surface layout for dirty tracking.\n");
+	return -EINVAL;
+}
+
+/**
+ * svga3dsurface_get_loc - Get a surface location from an offset into the
+ * backing store
+ * @cache: Surface layout data.
+ * @loc: Pointer to a struct svga3dsurface_loc to be filled in.
+ * @offset: Offset into the surface backing store.
+ */
+static inline void
+svga3dsurface_get_loc(const struct svga3dsurface_cache *cache,
+		      struct svga3dsurface_loc *loc,
+		      size_t offset)
+{
+	const struct svga3dsurface_mip *mip = &cache->mip[0];
+	const struct svga3d_surface_desc *desc = cache->desc;
+	u32 layer;
+	int i;
+
+	if (offset >= cache->sheet_bytes)
+		offset %= cache->sheet_bytes;
+
+	layer = offset / cache->mip_chain_bytes;
+	offset -= layer * cache->mip_chain_bytes;
+	for (i = 0; i < cache->num_mip_levels; ++i, ++mip) {
+		if (mip->bytes > offset)
+			break;
+		offset -= mip->bytes;
+	}
+
+	loc->sub_resource = svga3dsurface_subres(cache, i, layer);
+	loc->z = offset / mip->img_stride;
+	offset -= loc->z * mip->img_stride;
+	loc->z *= desc->block_size.depth;
+	loc->y = offset / mip->row_stride;
+	offset -= loc->y * mip->row_stride;
+	loc->y *= desc->block_size.height;
+	loc->x = offset / desc->bytes_per_block;
+	loc->x *= desc->block_size.width;
+}
+
+/**
+ * svga3dsurface_inc_loc - Clamp increment a surface location with one block
+ * size
+ * in each dimension.
+ * @loc: Pointer to a struct svga3dsurface_loc to be incremented.
+ *
+ * When computing the size of a range as size = end - start, the range does not
+ * include the end element. However a location representing the last byte
+ * of a touched region in the backing store *is* included in the range.
+ * This function modifies such a location to match the end definition
+ * given as start + size which is the one used in a SVGA3dBox.
+ */
+static inline void
+svga3dsurface_inc_loc(const struct svga3dsurface_cache *cache,
+		      struct svga3dsurface_loc *loc)
+{
+	const struct svga3d_surface_desc *desc = cache->desc;
+	u32 mip = loc->sub_resource % cache->num_mip_levels;
+	const struct drm_vmw_size *size = &cache->mip[mip].size;
+
+	loc->sub_resource++;
+	loc->x += desc->block_size.width;
+	if (loc->x > size->width)
+		loc->x = size->width;
+	loc->y += desc->block_size.height;
+	if (loc->y > size->height)
+		loc->y = size->height;
+	loc->z += desc->block_size.depth;
+	if (loc->z > size->depth)
+		loc->z = size->depth;
+}
+
+/**
+ * svga3dsurface_min_loc - The start location in a subresource
+ * @cache: Surface layout data.
+ * @sub_resource: The subresource.
+ * @loc: Pointer to a struct svga3dsurface_loc to be filled in.
+ */
+static inline void
+svga3dsurface_min_loc(const struct svga3dsurface_cache *cache,
+		      u32 sub_resource,
+		      struct svga3dsurface_loc *loc)
+{
+	loc->sub_resource = sub_resource;
+	loc->x = loc->y = loc->z = 0;
+}
+
+/**
+ * svga3dsurface_min_loc - The end location in a subresource
+ * @cache: Surface layout data.
+ * @sub_resource: The subresource.
+ * @loc: Pointer to a struct svga3dsurface_loc to be filled in.
+ *
+ * Following the end definition given in svga3dsurface_inc_loc(),
+ * Compute the end location of a surface subresource.
+ */
+static inline void
+svga3dsurface_max_loc(const struct svga3dsurface_cache *cache,
+		      u32 sub_resource,
+		      struct svga3dsurface_loc *loc)
+{
+	const struct drm_vmw_size *size;
+	u32 mip;
+
+	loc->sub_resource = sub_resource + 1;
+	mip = sub_resource % cache->num_mip_levels;
+	size = &cache->mip[mip].size;
+	loc->x = size->width;
+	loc->y = size->height;
+	loc->z = size->depth;
+}
+
 #endif /* _SVGA3D_SURFACEDEFS_H_ */
diff --git a/drivers/gpu/drm/vmwgfx/ttm_lock.c b/drivers/gpu/drm/vmwgfx/ttm_lock.c
index 16b2083cb9d4..5971c72e6d10 100644
--- a/drivers/gpu/drm/vmwgfx/ttm_lock.c
+++ b/drivers/gpu/drm/vmwgfx/ttm_lock.c
@@ -29,7 +29,6 @@
  * Authors: Thomas Hellstrom <thellstrom-at-vmware-dot-com>
  */
 
-#include <drm/ttm/ttm_module.h>
 #include <linux/atomic.h>
 #include <linux/errno.h>
 #include <linux/wait.h>
@@ -49,8 +48,6 @@ void ttm_lock_init(struct ttm_lock *lock)
 	init_waitqueue_head(&lock->queue);
 	lock->rw = 0;
 	lock->flags = 0;
-	lock->kill_takers = false;
-	lock->signal = SIGKILL;
 }
 
 void ttm_read_unlock(struct ttm_lock *lock)
@@ -66,11 +63,6 @@ static bool __ttm_read_lock(struct ttm_lock *lock)
 	bool locked = false;
 
 	spin_lock(&lock->lock);
-	if (unlikely(lock->kill_takers)) {
-		send_sig(lock->signal, current, 0);
-		spin_unlock(&lock->lock);
-		return false;
-	}
 	if (lock->rw >= 0 && lock->flags == 0) {
 		++lock->rw;
 		locked = true;
@@ -98,11 +90,6 @@ static bool __ttm_read_trylock(struct ttm_lock *lock, bool *locked)
 	*locked = false;
 
 	spin_lock(&lock->lock);
-	if (unlikely(lock->kill_takers)) {
-		send_sig(lock->signal, current, 0);
-		spin_unlock(&lock->lock);
-		return false;
-	}
 	if (lock->rw >= 0 && lock->flags == 0) {
 		++lock->rw;
 		block = false;
@@ -147,11 +134,6 @@ static bool __ttm_write_lock(struct ttm_lock *lock)
 	bool locked = false;
 
 	spin_lock(&lock->lock);
-	if (unlikely(lock->kill_takers)) {
-		send_sig(lock->signal, current, 0);
-		spin_unlock(&lock->lock);
-		return false;
-	}
 	if (lock->rw == 0 && ((lock->flags & ~TTM_WRITE_LOCK_PENDING) == 0)) {
 		lock->rw = -1;
 		lock->flags &= ~TTM_WRITE_LOCK_PENDING;
@@ -182,88 +164,6 @@ int ttm_write_lock(struct ttm_lock *lock, bool interruptible)
 	return ret;
 }
 
-static int __ttm_vt_unlock(struct ttm_lock *lock)
-{
-	int ret = 0;
-
-	spin_lock(&lock->lock);
-	if (unlikely(!(lock->flags & TTM_VT_LOCK)))
-		ret = -EINVAL;
-	lock->flags &= ~TTM_VT_LOCK;
-	wake_up_all(&lock->queue);
-	spin_unlock(&lock->lock);
-
-	return ret;
-}
-
-static void ttm_vt_lock_remove(struct ttm_base_object **p_base)
-{
-	struct ttm_base_object *base = *p_base;
-	struct ttm_lock *lock = container_of(base, struct ttm_lock, base);
-	int ret;
-
-	*p_base = NULL;
-	ret = __ttm_vt_unlock(lock);
-	BUG_ON(ret != 0);
-}
-
-static bool __ttm_vt_lock(struct ttm_lock *lock)
-{
-	bool locked = false;
-
-	spin_lock(&lock->lock);
-	if (lock->rw == 0) {
-		lock->flags &= ~TTM_VT_LOCK_PENDING;
-		lock->flags |= TTM_VT_LOCK;
-		locked = true;
-	} else {
-		lock->flags |= TTM_VT_LOCK_PENDING;
-	}
-	spin_unlock(&lock->lock);
-	return locked;
-}
-
-int ttm_vt_lock(struct ttm_lock *lock,
-		bool interruptible,
-		struct ttm_object_file *tfile)
-{
-	int ret = 0;
-
-	if (interruptible) {
-		ret = wait_event_interruptible(lock->queue,
-					       __ttm_vt_lock(lock));
-		if (unlikely(ret != 0)) {
-			spin_lock(&lock->lock);
-			lock->flags &= ~TTM_VT_LOCK_PENDING;
-			wake_up_all(&lock->queue);
-			spin_unlock(&lock->lock);
-			return ret;
-		}
-	} else
-		wait_event(lock->queue, __ttm_vt_lock(lock));
-
-	/*
-	 * Add a base-object, the destructor of which will
-	 * make sure the lock is released if the client dies
-	 * while holding it.
-	 */
-
-	ret = ttm_base_object_init(tfile, &lock->base, false,
-				   ttm_lock_type, &ttm_vt_lock_remove, NULL);
-	if (ret)
-		(void)__ttm_vt_unlock(lock);
-	else
-		lock->vt_holder = tfile;
-
-	return ret;
-}
-
-int ttm_vt_unlock(struct ttm_lock *lock)
-{
-	return ttm_ref_object_base_unref(lock->vt_holder,
-					 lock->base.handle, TTM_REF_USAGE);
-}
-
 void ttm_suspend_unlock(struct ttm_lock *lock)
 {
 	spin_lock(&lock->lock);
diff --git a/drivers/gpu/drm/vmwgfx/ttm_lock.h b/drivers/gpu/drm/vmwgfx/ttm_lock.h
index 0c3af9836863..3d454e8b491f 100644
--- a/drivers/gpu/drm/vmwgfx/ttm_lock.h
+++ b/drivers/gpu/drm/vmwgfx/ttm_lock.h
@@ -63,8 +63,6 @@
  * @lock: Spinlock protecting some lock members.
  * @rw: Read-write lock counter. Protected by @lock.
  * @flags: Lock state. Protected by @lock.
- * @kill_takers: Boolean whether to kill takers of the lock.
- * @signal: Signal to send when kill_takers is true.
  */
 
 struct ttm_lock {
@@ -73,9 +71,6 @@ struct ttm_lock {
 	spinlock_t lock;
 	int32_t rw;
 	uint32_t flags;
-	bool kill_takers;
-	int signal;
-	struct ttm_object_file *vt_holder;
 };
 
 
@@ -220,29 +215,4 @@ extern void ttm_write_unlock(struct ttm_lock *lock);
  */
 extern int ttm_write_lock(struct ttm_lock *lock, bool interruptible);
 
-/**
- * ttm_lock_set_kill
- *
- * @lock: Pointer to a struct ttm_lock
- * @val: Boolean whether to kill processes taking the lock.
- * @signal: Signal to send to the process taking the lock.
- *
- * The kill-when-taking-lock functionality is used to kill processes that keep
- * on using the TTM functionality when its resources has been taken down, for
- * example when the X server exits. A typical sequence would look like this:
- * - X server takes lock in write mode.
- * - ttm_lock_set_kill() is called with @val set to true.
- * - As part of X server exit, TTM resources are taken down.
- * - X server releases the lock on file release.
- * - Another dri client wants to render, takes the lock and is killed.
- *
- */
-static inline void ttm_lock_set_kill(struct ttm_lock *lock, bool val,
-				     int signal)
-{
-	lock->kill_takers = val;
-	if (val)
-		lock->signal = signal;
-}
-
 #endif
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
index 5d5c2bce01f3..e8bc7a7ac031 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
@@ -463,6 +463,8 @@ void vmw_bo_bo_free(struct ttm_buffer_object *bo)
 {
 	struct vmw_buffer_object *vmw_bo = vmw_buffer_object(bo);
 
+	WARN_ON(vmw_bo->dirty);
+	WARN_ON(!RB_EMPTY_ROOT(&vmw_bo->res_tree));
 	vmw_bo_unmap(vmw_bo);
 	kfree(vmw_bo);
 }
@@ -476,8 +478,11 @@ void vmw_bo_bo_free(struct ttm_buffer_object *bo)
 static void vmw_user_bo_destroy(struct ttm_buffer_object *bo)
 {
 	struct vmw_user_buffer_object *vmw_user_bo = vmw_user_buffer_object(bo);
+	struct vmw_buffer_object *vbo = &vmw_user_bo->vbo;
 
-	vmw_bo_unmap(&vmw_user_bo->vbo);
+	WARN_ON(vbo->dirty);
+	WARN_ON(!RB_EMPTY_ROOT(&vbo->res_tree));
+	vmw_bo_unmap(vbo);
 	ttm_prime_object_kfree(vmw_user_bo, prime);
 }
 
@@ -510,8 +515,9 @@ int vmw_bo_init(struct vmw_private *dev_priv,
 
 	acc_size = vmw_bo_acc_size(dev_priv, size, user);
 	memset(vmw_bo, 0, sizeof(*vmw_bo));
-
-	INIT_LIST_HEAD(&vmw_bo->res_list);
+	BUILD_BUG_ON(TTM_MAX_BO_PRIORITY <= 3);
+	vmw_bo->base.priority = 3;
+	vmw_bo->res_tree = RB_ROOT;
 
 	ret = ttm_bo_init(bdev, &vmw_bo->base, size,
 			  ttm_bo_type_device, placement,
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_context.c b/drivers/gpu/drm/vmwgfx/vmwgfx_context.c
index 63f111068a44..a56c9d802382 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_context.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_context.c
@@ -88,6 +88,8 @@ static const struct vmw_res_func vmw_gb_context_func = {
 	.res_type = vmw_res_context,
 	.needs_backup = true,
 	.may_evict = true,
+	.prio = 3,
+	.dirty_prio = 3,
 	.type_name = "guest backed contexts",
 	.backup_placement = &vmw_mob_placement,
 	.create = vmw_gb_context_create,
@@ -100,6 +102,8 @@ static const struct vmw_res_func vmw_dx_context_func = {
 	.res_type = vmw_res_dx_context,
 	.needs_backup = true,
 	.may_evict = true,
+	.prio = 3,
+	.dirty_prio = 3,
 	.type_name = "dx contexts",
 	.backup_placement = &vmw_mob_placement,
 	.create = vmw_dx_context_create,
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_cotable.c b/drivers/gpu/drm/vmwgfx/vmwgfx_cotable.c
index b4f6e1217c9d..8c699cb2565b 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_cotable.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_cotable.c
@@ -116,6 +116,8 @@ static const struct vmw_res_func vmw_cotable_func = {
 	.res_type = vmw_res_cotable,
 	.needs_backup = true,
 	.may_evict = true,
+	.prio = 3,
+	.dirty_prio = 3,
 	.type_name = "context guest backed object tables",
 	.backup_placement = &vmw_mob_placement,
 	.create = vmw_cotable_create,
@@ -307,7 +309,7 @@ static int vmw_cotable_unbind(struct vmw_resource *res,
 	struct ttm_buffer_object *bo = val_buf->bo;
 	struct vmw_fence_obj *fence;
 
-	if (list_empty(&res->mob_head))
+	if (!vmw_resource_mob_attached(res))
 		return 0;
 
 	WARN_ON_ONCE(bo->mem.mem_type != VMW_PL_MOB);
@@ -453,6 +455,7 @@ static int vmw_cotable_resize(struct vmw_resource *res, size_t new_size)
 		goto out_wait;
 	}
 
+	vmw_resource_mob_detach(res);
 	res->backup = buf;
 	res->backup_size = new_size;
 	vcotbl->size_read_back = cur_size_read_back;
@@ -467,12 +470,12 @@ static int vmw_cotable_resize(struct vmw_resource *res, size_t new_size)
 		res->backup = old_buf;
 		res->backup_size = old_size;
 		vcotbl->size_read_back = old_size_read_back;
+		vmw_resource_mob_attach(res);
 		goto out_wait;
 	}
 
+	vmw_resource_mob_attach(res);
 	/* Let go of the old mob. */
-	list_del(&res->mob_head);
-	list_add_tail(&res->mob_head, &buf->res_list);
 	vmw_bo_unreference(&old_buf);
 	res->id = vcotbl->type;
 
@@ -496,7 +499,7 @@ out_wait:
  * is called before bind() in the validation sequence is instead used for two
  * things.
  * 1) Unscrub the cotable if it is scrubbed and still attached to a backup
- *    buffer, that is, if @res->mob_head is non-empty.
+ *    buffer.
  * 2) Resize the cotable if needed.
  */
 static int vmw_cotable_create(struct vmw_resource *res)
@@ -512,7 +515,7 @@ static int vmw_cotable_create(struct vmw_resource *res)
 		new_size *= 2;
 
 	if (likely(new_size <= res->backup_size)) {
-		if (vcotbl->scrubbed && !list_empty(&res->mob_head)) {
+		if (vcotbl->scrubbed && vmw_resource_mob_attached(res)) {
 			ret = vmw_cotable_unscrub(res);
 			if (ret)
 				return ret;
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
index 4ff11a0077e1..8349a6cc126f 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
@@ -254,7 +254,6 @@ static int vmw_restrict_dma_mask;
 static int vmw_assume_16bpp;
 
 static int vmw_probe(struct pci_dev *, const struct pci_device_id *);
-static void vmw_master_init(struct vmw_master *);
 static int vmwgfx_pm_notifier(struct notifier_block *nb, unsigned long val,
 			      void *ptr);
 
@@ -762,10 +761,6 @@ static int vmw_driver_load(struct drm_device *dev, unsigned long chipset)
 	DRM_INFO("MMIO at 0x%08x size is %u kiB\n",
 		 dev_priv->mmio_start, dev_priv->mmio_size / 1024);
 
-	vmw_master_init(&dev_priv->fbdev_master);
-	ttm_lock_set_kill(&dev_priv->fbdev_master.lock, false, SIGTERM);
-	dev_priv->active_master = &dev_priv->fbdev_master;
-
 	dev_priv->mmio_virt = memremap(dev_priv->mmio_start,
 				       dev_priv->mmio_size, MEMREMAP_WB);
 
@@ -833,6 +828,11 @@ static int vmw_driver_load(struct drm_device *dev, unsigned long chipset)
 		DRM_ERROR("Failed initializing TTM buffer object driver.\n");
 		goto out_no_bdev;
 	}
+	dev_priv->vm_ops = *dev_priv->bdev.vm_ops;
+	dev_priv->vm_ops.fault = vmw_bo_vm_fault;
+	dev_priv->vm_ops.pfn_mkwrite = vmw_bo_vm_mkwrite;
+	dev_priv->vm_ops.page_mkwrite = vmw_bo_vm_mkwrite;
+	dev_priv->bdev.vm_ops = &dev_priv->vm_ops;
 
 	/*
 	 * Enable VRAM, but initially don't use it until SVGA is enabled and
@@ -1007,18 +1007,7 @@ static void vmw_driver_unload(struct drm_device *dev)
 static void vmw_postclose(struct drm_device *dev,
 			 struct drm_file *file_priv)
 {
-	struct vmw_fpriv *vmw_fp;
-
-	vmw_fp = vmw_fpriv(file_priv);
-
-	if (vmw_fp->locked_master) {
-		struct vmw_master *vmaster =
-			vmw_master(vmw_fp->locked_master);
-
-		ttm_lock_set_kill(&vmaster->lock, true, SIGTERM);
-		ttm_vt_unlock(&vmaster->lock);
-		drm_master_put(&vmw_fp->locked_master);
-	}
+	struct vmw_fpriv *vmw_fp = vmw_fpriv(file_priv);
 
 	ttm_object_file_release(&vmw_fp->tfile);
 	kfree(vmw_fp);
@@ -1047,55 +1036,6 @@ out_no_tfile:
 	return ret;
 }
 
-static struct vmw_master *vmw_master_check(struct drm_device *dev,
-					   struct drm_file *file_priv,
-					   unsigned int flags)
-{
-	int ret;
-	struct vmw_fpriv *vmw_fp = vmw_fpriv(file_priv);
-	struct vmw_master *vmaster;
-
-	if (!drm_is_primary_client(file_priv) || !(flags & DRM_AUTH))
-		return NULL;
-
-	ret = mutex_lock_interruptible(&dev->master_mutex);
-	if (unlikely(ret != 0))
-		return ERR_PTR(-ERESTARTSYS);
-
-	if (drm_is_current_master(file_priv)) {
-		mutex_unlock(&dev->master_mutex);
-		return NULL;
-	}
-
-	/*
-	 * Check if we were previously master, but now dropped. In that
-	 * case, allow at least render node functionality.
-	 */
-	if (vmw_fp->locked_master) {
-		mutex_unlock(&dev->master_mutex);
-
-		if (flags & DRM_RENDER_ALLOW)
-			return NULL;
-
-		DRM_ERROR("Dropped master trying to access ioctl that "
-			  "requires authentication.\n");
-		return ERR_PTR(-EACCES);
-	}
-	mutex_unlock(&dev->master_mutex);
-
-	/*
-	 * Take the TTM lock. Possibly sleep waiting for the authenticating
-	 * master to become master again, or for a SIGTERM if the
-	 * authenticating master exits.
-	 */
-	vmaster = vmw_master(file_priv->master);
-	ret = ttm_read_lock(&vmaster->lock, true);
-	if (unlikely(ret != 0))
-		vmaster = ERR_PTR(ret);
-
-	return vmaster;
-}
-
 static long vmw_generic_ioctl(struct file *filp, unsigned int cmd,
 			      unsigned long arg,
 			      long (*ioctl_func)(struct file *, unsigned int,
@@ -1104,7 +1044,6 @@ static long vmw_generic_ioctl(struct file *filp, unsigned int cmd,
 	struct drm_file *file_priv = filp->private_data;
 	struct drm_device *dev = file_priv->minor->dev;
 	unsigned int nr = DRM_IOCTL_NR(cmd);
-	struct vmw_master *vmaster;
 	unsigned int flags;
 	long ret;
 
@@ -1140,21 +1079,7 @@ static long vmw_generic_ioctl(struct file *filp, unsigned int cmd,
 	} else if (!drm_ioctl_flags(nr, &flags))
 		return -EINVAL;
 
-	vmaster = vmw_master_check(dev, file_priv, flags);
-	if (IS_ERR(vmaster)) {
-		ret = PTR_ERR(vmaster);
-
-		if (ret != -ERESTARTSYS)
-			DRM_INFO("IOCTL ERROR Command %d, Error %ld.\n",
-				 nr, ret);
-		return ret;
-	}
-
-	ret = ioctl_func(filp, cmd, arg);
-	if (vmaster)
-		ttm_read_unlock(&vmaster->lock);
-
-	return ret;
+	return ioctl_func(filp, cmd, arg);
 
 out_io_encoding:
 	DRM_ERROR("Invalid command format, ioctl %d\n",
@@ -1181,65 +1106,10 @@ static void vmw_lastclose(struct drm_device *dev)
 {
 }
 
-static void vmw_master_init(struct vmw_master *vmaster)
-{
-	ttm_lock_init(&vmaster->lock);
-}
-
-static int vmw_master_create(struct drm_device *dev,
-			     struct drm_master *master)
-{
-	struct vmw_master *vmaster;
-
-	vmaster = kzalloc(sizeof(*vmaster), GFP_KERNEL);
-	if (unlikely(!vmaster))
-		return -ENOMEM;
-
-	vmw_master_init(vmaster);
-	ttm_lock_set_kill(&vmaster->lock, true, SIGTERM);
-	master->driver_priv = vmaster;
-
-	return 0;
-}
-
-static void vmw_master_destroy(struct drm_device *dev,
-			       struct drm_master *master)
-{
-	struct vmw_master *vmaster = vmw_master(master);
-
-	master->driver_priv = NULL;
-	kfree(vmaster);
-}
-
 static int vmw_master_set(struct drm_device *dev,
 			  struct drm_file *file_priv,
 			  bool from_open)
 {
-	struct vmw_private *dev_priv = vmw_priv(dev);
-	struct vmw_fpriv *vmw_fp = vmw_fpriv(file_priv);
-	struct vmw_master *active = dev_priv->active_master;
-	struct vmw_master *vmaster = vmw_master(file_priv->master);
-	int ret = 0;
-
-	if (active) {
-		BUG_ON(active != &dev_priv->fbdev_master);
-		ret = ttm_vt_lock(&active->lock, false, vmw_fp->tfile);
-		if (unlikely(ret != 0))
-			return ret;
-
-		ttm_lock_set_kill(&active->lock, true, SIGTERM);
-		dev_priv->active_master = NULL;
-	}
-
-	ttm_lock_set_kill(&vmaster->lock, false, SIGTERM);
-	if (!from_open) {
-		ttm_vt_unlock(&vmaster->lock);
-		BUG_ON(vmw_fp->locked_master != file_priv->master);
-		drm_master_put(&vmw_fp->locked_master);
-	}
-
-	dev_priv->active_master = vmaster;
-
 	/*
 	 * Inform a new master that the layout may have changed while
 	 * it was gone.
@@ -1254,31 +1124,10 @@ static void vmw_master_drop(struct drm_device *dev,
 			    struct drm_file *file_priv)
 {
 	struct vmw_private *dev_priv = vmw_priv(dev);
-	struct vmw_fpriv *vmw_fp = vmw_fpriv(file_priv);
-	struct vmw_master *vmaster = vmw_master(file_priv->master);
-	int ret;
-
-	/**
-	 * Make sure the master doesn't disappear while we have
-	 * it locked.
-	 */
 
-	vmw_fp->locked_master = drm_master_get(file_priv->master);
-	ret = ttm_vt_lock(&vmaster->lock, false, vmw_fp->tfile);
 	vmw_kms_legacy_hotspot_clear(dev_priv);
-	if (unlikely((ret != 0))) {
-		DRM_ERROR("Unable to lock TTM at VT switch.\n");
-		drm_master_put(&vmw_fp->locked_master);
-	}
-
-	ttm_lock_set_kill(&vmaster->lock, false, SIGTERM);
-
 	if (!dev_priv->enable_fb)
 		vmw_svga_disable(dev_priv);
-
-	dev_priv->active_master = &dev_priv->fbdev_master;
-	ttm_lock_set_kill(&dev_priv->fbdev_master.lock, false, SIGTERM);
-	ttm_vt_unlock(&dev_priv->fbdev_master.lock);
 }
 
 /**
@@ -1557,8 +1406,6 @@ static struct drm_driver driver = {
 	.disable_vblank = vmw_disable_vblank,
 	.ioctls = vmw_ioctls,
 	.num_ioctls = ARRAY_SIZE(vmw_ioctls),
-	.master_create = vmw_master_create,
-	.master_destroy = vmw_master_destroy,
 	.master_set = vmw_master_set,
 	.master_drop = vmw_master_drop,
 	.open = vmw_driver_open,
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
index 366dcfc1f9bb..3a358a5495e4 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
@@ -44,9 +44,9 @@
 #include <linux/sync_file.h>
 
 #define VMWGFX_DRIVER_NAME "vmwgfx"
-#define VMWGFX_DRIVER_DATE "20180704"
+#define VMWGFX_DRIVER_DATE "20190328"
 #define VMWGFX_DRIVER_MAJOR 2
-#define VMWGFX_DRIVER_MINOR 15
+#define VMWGFX_DRIVER_MINOR 16
 #define VMWGFX_DRIVER_PATCHLEVEL 0
 #define VMWGFX_FIFO_STATIC_SIZE (1024*1024)
 #define VMWGFX_MAX_RELOCATIONS 2048
@@ -81,19 +81,30 @@
 #define VMW_RES_SHADER ttm_driver_type4
 
 struct vmw_fpriv {
-	struct drm_master *locked_master;
 	struct ttm_object_file *tfile;
 	bool gb_aware; /* user-space is guest-backed aware */
 };
 
+/**
+ * struct vmw_buffer_object - TTM buffer object with vmwgfx additions
+ * @base: The TTM buffer object
+ * @res_tree: RB tree of resources using this buffer object as a backing MOB
+ * @pin_count: pin depth
+ * @dx_query_ctx: DX context if this buffer object is used as a DX query MOB
+ * @map: Kmap object for semi-persistent mappings
+ * @res_prios: Eviction priority counts for attached resources
+ * @dirty: structure for user-space dirty-tracking
+ */
 struct vmw_buffer_object {
 	struct ttm_buffer_object base;
-	struct list_head res_list;
+	struct rb_root res_tree;
 	s32 pin_count;
 	/* Not ref-counted.  Protected by binding_mutex */
 	struct vmw_resource *dx_query_ctx;
 	/* Protected by reservation */
 	struct ttm_bo_kmap_obj map;
+	u32 res_prios[TTM_MAX_BO_PRIORITY];
+	struct vmw_bo_dirty *dirty;
 };
 
 /**
@@ -124,7 +135,8 @@ struct vmw_res_func;
  * @res_dirty: Resource contains data not yet in the backup buffer. Protected
  * by resource reserved.
  * @backup_dirty: Backup buffer contains data not yet in the HW resource.
- * Protecte by resource reserved.
+ * Protected by resource reserved.
+ * @coherent: Emulate coherency by tracking vm accesses.
  * @backup: The backup buffer if any. Protected by resource reserved.
  * @backup_offset: Offset into the backup buffer if any. Protected by resource
  * reserved. Note that only a few resource types can have a @backup_offset
@@ -133,28 +145,32 @@ struct vmw_res_func;
  * pin-count greater than zero. It is not on the resource LRU lists and its
  * backup buffer is pinned. Hence it can't be evicted.
  * @func: Method vtable for this resource. Immutable.
+ * @mob_node; Node for the MOB backup rbtree. Protected by @backup reserved.
  * @lru_head: List head for the LRU list. Protected by @dev_priv::resource_lock.
- * @mob_head: List head for the MOB backup list. Protected by @backup reserved.
  * @binding_head: List head for the context binding list. Protected by
  * the @dev_priv::binding_mutex
  * @res_free: The resource destructor.
  * @hw_destroy: Callback to destroy the resource on the device, as part of
  * resource destruction.
  */
+struct vmw_resource_dirty;
 struct vmw_resource {
 	struct kref kref;
 	struct vmw_private *dev_priv;
 	int id;
+	u32 used_prio;
 	unsigned long backup_size;
-	bool res_dirty;
-	bool backup_dirty;
+	u32 res_dirty : 1;
+	u32 backup_dirty : 1;
+	u32 coherent : 1;
 	struct vmw_buffer_object *backup;
 	unsigned long backup_offset;
 	unsigned long pin_count;
 	const struct vmw_res_func *func;
+	struct rb_node mob_node;
 	struct list_head lru_head;
-	struct list_head mob_head;
 	struct list_head binding_head;
+	struct vmw_resource_dirty *dirty;
 	void (*res_free) (struct vmw_resource *res);
 	void (*hw_destroy) (struct vmw_resource *res);
 };
@@ -376,10 +392,6 @@ struct vmw_sw_context{
 struct vmw_legacy_display;
 struct vmw_overlay;
 
-struct vmw_master {
-	struct ttm_lock lock;
-};
-
 struct vmw_vga_topology_state {
 	uint32_t width;
 	uint32_t height;
@@ -542,11 +554,8 @@ struct vmw_private {
 	spinlock_t svga_lock;
 
 	/**
-	 * Master management.
+	 * PM management.
 	 */
-
-	struct vmw_master *active_master;
-	struct vmw_master fbdev_master;
 	struct notifier_block pm_nb;
 	bool refuse_hibernation;
 	bool suspend_locked;
@@ -595,6 +604,9 @@ struct vmw_private {
 
 	/* Validation memory reservation */
 	struct vmw_validation_mem vvm;
+
+	/* VM operations */
+	struct vm_operations_struct vm_ops;
 };
 
 static inline struct vmw_surface *vmw_res_to_srf(struct vmw_resource *res)
@@ -612,11 +624,6 @@ static inline struct vmw_fpriv *vmw_fpriv(struct drm_file *file_priv)
 	return (struct vmw_fpriv *)file_priv->driver_priv;
 }
 
-static inline struct vmw_master *vmw_master(struct drm_master *master)
-{
-	return (struct vmw_master *) master->driver_priv;
-}
-
 /*
  * The locking here is fine-grained, so that it is performed once
  * for every read- and write operation. This is of course costly, but we
@@ -669,7 +676,8 @@ extern void vmw_resource_unreference(struct vmw_resource **p_res);
 extern struct vmw_resource *vmw_resource_reference(struct vmw_resource *res);
 extern struct vmw_resource *
 vmw_resource_reference_unless_doomed(struct vmw_resource *res);
-extern int vmw_resource_validate(struct vmw_resource *res, bool intr);
+extern int vmw_resource_validate(struct vmw_resource *res, bool intr,
+				 bool dirtying);
 extern int vmw_resource_reserve(struct vmw_resource *res, bool interruptible,
 				bool no_backup);
 extern bool vmw_resource_needs_backup(const struct vmw_resource *res);
@@ -709,6 +717,23 @@ extern void vmw_query_move_notify(struct ttm_buffer_object *bo,
 extern int vmw_query_readback_all(struct vmw_buffer_object *dx_query_mob);
 extern void vmw_resource_evict_all(struct vmw_private *dev_priv);
 extern void vmw_resource_unbind_list(struct vmw_buffer_object *vbo);
+void vmw_resource_mob_attach(struct vmw_resource *res);
+void vmw_resource_mob_detach(struct vmw_resource *res);
+void vmw_resource_dirty_update(struct vmw_resource *res, pgoff_t start,
+			       pgoff_t end);
+int vmw_resources_clean(struct vmw_buffer_object *vbo, pgoff_t start,
+			pgoff_t end, pgoff_t *num_prefault);
+
+/**
+ * vmw_resource_mob_attached - Whether a resource currently has a mob attached
+ * @res: The resource
+ *
+ * Return: true if the resource has a mob attached, false otherwise.
+ */
+static inline bool vmw_resource_mob_attached(const struct vmw_resource *res)
+{
+	return !RB_EMPTY_NODE(&res->mob_node);
+}
 
 /**
  * vmw_user_resource_noref_release - release a user resource pointer looked up
@@ -787,6 +812,54 @@ static inline void vmw_user_bo_noref_release(void)
 	ttm_base_object_noref_release();
 }
 
+/**
+ * vmw_bo_adjust_prio - Adjust the buffer object eviction priority
+ * according to attached resources
+ * @vbo: The struct vmw_buffer_object
+ */
+static inline void vmw_bo_prio_adjust(struct vmw_buffer_object *vbo)
+{
+	int i = ARRAY_SIZE(vbo->res_prios);
+
+	while (i--) {
+		if (vbo->res_prios[i]) {
+			vbo->base.priority = i;
+			return;
+		}
+	}
+
+	vbo->base.priority = 3;
+}
+
+/**
+ * vmw_bo_prio_add - Notify a buffer object of a newly attached resource
+ * eviction priority
+ * @vbo: The struct vmw_buffer_object
+ * @prio: The resource priority
+ *
+ * After being notified, the code assigns the highest resource eviction priority
+ * to the backing buffer object (mob).
+ */
+static inline void vmw_bo_prio_add(struct vmw_buffer_object *vbo, int prio)
+{
+	if (vbo->res_prios[prio]++ == 0)
+		vmw_bo_prio_adjust(vbo);
+}
+
+/**
+ * vmw_bo_prio_del - Notify a buffer object of a resource with a certain
+ * priority being removed
+ * @vbo: The struct vmw_buffer_object
+ * @prio: The resource priority
+ *
+ * After being notified, the code assigns the highest resource eviction priority
+ * to the backing buffer object (mob).
+ */
+static inline void vmw_bo_prio_del(struct vmw_buffer_object *vbo, int prio)
+{
+	if (--vbo->res_prios[prio] == 0)
+		vmw_bo_prio_adjust(vbo);
+}
 
 /**
  * Misc Ioctl functionality - vmwgfx_ioctl.c
@@ -1016,7 +1089,6 @@ void vmw_kms_cursor_snoop(struct vmw_surface *srf,
 int vmw_kms_write_svga(struct vmw_private *vmw_priv,
 		       unsigned width, unsigned height, unsigned pitch,
 		       unsigned bpp, unsigned depth);
-void vmw_kms_idle_workqueues(struct vmw_master *vmaster);
 bool vmw_kms_validate_mode_vram(struct vmw_private *dev_priv,
 				uint32_t pitch,
 				uint32_t height);
@@ -1339,6 +1411,25 @@ int vmw_host_log(const char *log);
 	DRM_DEBUG_DRIVER(fmt, ##__VA_ARGS__)
 
 /**
+ * VMW_DEBUG_KMS - Debug output for kernel mode-setting
+ *
+ * This macro is for debugging vmwgfx mode-setting code.
+ */
+#define VMW_DEBUG_KMS(fmt, ...)                                               \
+	DRM_DEBUG_DRIVER(fmt, ##__VA_ARGS__)
+
+/* Resource dirtying - vmwgfx_page_dirty.c */
+void vmw_bo_dirty_scan(struct vmw_buffer_object *vbo);
+int vmw_bo_dirty_add(struct vmw_buffer_object *vbo);
+void vmw_bo_dirty_transfer_to_res(struct vmw_resource *res);
+void vmw_bo_dirty_clear_res(struct vmw_resource *res);
+void vmw_bo_dirty_release(struct vmw_buffer_object *vbo);
+void vmw_bo_dirty_unmap(struct vmw_buffer_object *vbo,
+			pgoff_t start, pgoff_t end);
+vm_fault_t vmw_bo_vm_fault(struct vm_fault *vmf);
+vm_fault_t vmw_bo_vm_mkwrite(struct vm_fault *vmf);
+
+/**
  * Inline helper functions
  */
 
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
index 33533d126277..319c1ca35663 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
@@ -2560,7 +2560,6 @@ static int vmw_cmd_dx_check_subresource(struct vmw_private *dev_priv,
 		     offsetof(typeof(*cmd), sid));
 
 	cmd = container_of(header, typeof(*cmd), header);
-
 	return vmw_cmd_res_check(dev_priv, sw_context, vmw_res_surface,
 				 VMW_RES_DIRTY_NONE, user_surface_converter,
 				 &cmd->sid, NULL);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
index b97bc8e5944b..e7222fa2cfdf 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
@@ -1462,7 +1462,7 @@ static int vmw_kms_check_display_memory(struct drm_device *dev,
 		if (dev_priv->active_display_unit == vmw_du_screen_target &&
 		    (drm_rect_width(&rects[i]) > dev_priv->stdu_max_width ||
 		     drm_rect_height(&rects[i]) > dev_priv->stdu_max_height)) {
-			DRM_ERROR("Screen size not supported.\n");
+			VMW_DEBUG_KMS("Screen size not supported.\n");
 			return -EINVAL;
 		}
 
@@ -1486,7 +1486,7 @@ static int vmw_kms_check_display_memory(struct drm_device *dev,
 	 * limit on primary bounding box
 	 */
 	if (pixel_mem > dev_priv->prim_bb_mem) {
-		DRM_ERROR("Combined output size too large.\n");
+		VMW_DEBUG_KMS("Combined output size too large.\n");
 		return -EINVAL;
 	}
 
@@ -1496,7 +1496,7 @@ static int vmw_kms_check_display_memory(struct drm_device *dev,
 		bb_mem = (u64) bounding_box.x2 * bounding_box.y2 * 4;
 
 		if (bb_mem > dev_priv->prim_bb_mem) {
-			DRM_ERROR("Topology is beyond supported limits.\n");
+			VMW_DEBUG_KMS("Topology is beyond supported limits.\n");
 			return -EINVAL;
 		}
 	}
@@ -1645,6 +1645,7 @@ static int vmw_kms_check_topology(struct drm_device *dev,
 		struct vmw_connector_state *vmw_conn_state;
 
 		if (!du->pref_active && new_crtc_state->enable) {
+			VMW_DEBUG_KMS("Enabling a disabled display unit\n");
 			ret = -EINVAL;
 			goto clean;
 		}
@@ -1701,8 +1702,10 @@ vmw_kms_atomic_check_modeset(struct drm_device *dev,
 		return ret;
 
 	ret = vmw_kms_check_implicit(dev, state);
-	if (ret)
+	if (ret) {
+		VMW_DEBUG_KMS("Invalid implicit state\n");
 		return ret;
+	}
 
 	if (!state->allow_modeset)
 		return ret;
@@ -2347,6 +2350,9 @@ int vmw_kms_update_layout_ioctl(struct drm_device *dev, void *data,
 
 	if (!arg->num_outputs) {
 		struct drm_rect def_rect = {0, 0, 800, 600};
+		VMW_DEBUG_KMS("Default layout x1 = %d y1 = %d x2 = %d y2 = %d\n",
+			      def_rect.x1, def_rect.y1,
+			      def_rect.x2, def_rect.y2);
 		vmw_du_update_layout(dev_priv, 1, &def_rect);
 		return 0;
 	}
@@ -2367,6 +2373,7 @@ int vmw_kms_update_layout_ioctl(struct drm_device *dev, void *data,
 
 	drm_rects = (struct drm_rect *)rects;
 
+	VMW_DEBUG_KMS("Layout count = %u\n", arg->num_outputs);
 	for (i = 0; i < arg->num_outputs; i++) {
 		struct drm_vmw_rect curr_rect;
 
@@ -2383,6 +2390,10 @@ int vmw_kms_update_layout_ioctl(struct drm_device *dev, void *data,
 		drm_rects[i].x2 = curr_rect.x + curr_rect.w;
 		drm_rects[i].y2 = curr_rect.y + curr_rect.h;
 
+		VMW_DEBUG_KMS("  x1 = %d y1 = %d x2 = %d y2 = %d\n",
+			      drm_rects[i].x1, drm_rects[i].y1,
+			      drm_rects[i].x2, drm_rects[i].y2);
+
 		/*
 		 * Currently this check is limiting the topology within
 		 * mode_config->max (which actually is max texture size
@@ -2393,7 +2404,9 @@ int vmw_kms_update_layout_ioctl(struct drm_device *dev, void *data,
 		if (drm_rects[i].x1 < 0 ||  drm_rects[i].y1 < 0 ||
 		    drm_rects[i].x2 > mode_config->max_width ||
 		    drm_rects[i].y2 > mode_config->max_height) {
-			DRM_ERROR("Invalid GUI layout.\n");
+			VMW_DEBUG_KMS("Invalid layout %d %d %d %d\n",
+				      drm_rects[i].x1, drm_rects[i].y1,
+				      drm_rects[i].x2, drm_rects[i].y2);
 			ret = -EINVAL;
 			goto out_free;
 		}
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c b/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c
new file mode 100644
index 000000000000..730c51e397dd
--- /dev/null
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c
@@ -0,0 +1,472 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/**************************************************************************
+ *
+ * Copyright 2019 VMware, Inc., Palo Alto, CA., USA
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+#include "vmwgfx_drv.h"
+
+/*
+ * Different methods for tracking dirty:
+ * VMW_BO_DIRTY_PAGETABLE - Scan the pagetable for hardware dirty bits
+ * VMW_BO_DIRTY_MKWRITE - Write-protect page table entries and record write-
+ * accesses in the VM mkwrite() callback
+ */
+enum vmw_bo_dirty_method {
+	VMW_BO_DIRTY_PAGETABLE,
+	VMW_BO_DIRTY_MKWRITE,
+};
+
+/*
+ * No dirtied pages at scan trigger a transition to the _MKWRITE method,
+ * similarly a certain percentage of dirty pages trigger a transition to
+ * the _PAGETABLE method. How many triggers should we wait for before
+ * changing method?
+ */
+#define VMW_DIRTY_NUM_CHANGE_TRIGGERS 2
+
+/* Percentage to trigger a transition to the _PAGETABLE method */
+#define VMW_DIRTY_PERCENTAGE 10
+
+/**
+ * struct vmw_bo_dirty - Dirty information for buffer objects
+ * @start: First currently dirty bit
+ * @end: Last currently dirty bit + 1
+ * @method: The currently used dirty method
+ * @change_count: Number of consecutive method change triggers
+ * @ref_count: Reference count for this structure
+ * @bitmap_size: The size of the bitmap in bits. Typically equal to the
+ * nuber of pages in the bo.
+ * @size: The accounting size for this struct.
+ * @bitmap: A bitmap where each bit represents a page. A set bit means a
+ * dirty page.
+ */
+struct vmw_bo_dirty {
+	unsigned long start;
+	unsigned long end;
+	enum vmw_bo_dirty_method method;
+	unsigned int change_count;
+	unsigned int ref_count;
+	unsigned long bitmap_size;
+	size_t size;
+	unsigned long bitmap[0];
+};
+
+/**
+ * vmw_bo_dirty_scan_pagetable - Perform a pagetable scan for dirty bits
+ * @vbo: The buffer object to scan
+ *
+ * Scans the pagetable for dirty bits. Clear those bits and modify the
+ * dirty structure with the results. This function may change the
+ * dirty-tracking method.
+ */
+static void vmw_bo_dirty_scan_pagetable(struct vmw_buffer_object *vbo)
+{
+	struct vmw_bo_dirty *dirty = vbo->dirty;
+	pgoff_t offset = drm_vma_node_start(&vbo->base.vma_node);
+	struct address_space *mapping = vbo->base.bdev->dev_mapping;
+	pgoff_t num_marked;
+
+	num_marked = apply_as_clean(mapping,
+				    offset, dirty->bitmap_size,
+				    offset, &dirty->bitmap[0],
+				    &dirty->start, &dirty->end);
+	if (num_marked == 0)
+		dirty->change_count++;
+	else
+		dirty->change_count = 0;
+
+	if (dirty->change_count > VMW_DIRTY_NUM_CHANGE_TRIGGERS) {
+		dirty->change_count = 0;
+		dirty->method = VMW_BO_DIRTY_MKWRITE;
+		apply_as_wrprotect(mapping,
+				   offset, dirty->bitmap_size);
+		apply_as_clean(mapping,
+			       offset, dirty->bitmap_size,
+			       offset, &dirty->bitmap[0],
+			       &dirty->start, &dirty->end);
+	}
+}
+
+/**
+ * vmw_bo_dirty_scan_mkwrite - Reset the mkwrite dirty-tracking method
+ * @vbo: The buffer object to scan
+ *
+ * Write-protect pages written to so that consecutive write accesses will
+ * trigger a call to mkwrite.
+ *
+ * This function may change the dirty-tracking method.
+ */
+static void vmw_bo_dirty_scan_mkwrite(struct vmw_buffer_object *vbo)
+{
+	struct vmw_bo_dirty *dirty = vbo->dirty;
+	unsigned long offset = drm_vma_node_start(&vbo->base.vma_node);
+	struct address_space *mapping = vbo->base.bdev->dev_mapping;
+	pgoff_t num_marked;
+
+	if (dirty->end <= dirty->start)
+		return;
+
+	num_marked = apply_as_wrprotect(vbo->base.bdev->dev_mapping,
+					dirty->start + offset,
+					dirty->end - dirty->start);
+
+	if (100UL * num_marked / dirty->bitmap_size >
+	    VMW_DIRTY_PERCENTAGE) {
+		dirty->change_count++;
+	} else {
+		dirty->change_count = 0;
+	}
+
+	if (dirty->change_count > VMW_DIRTY_NUM_CHANGE_TRIGGERS) {
+		pgoff_t start = 0;
+		pgoff_t end = dirty->bitmap_size;
+
+		dirty->method = VMW_BO_DIRTY_PAGETABLE;
+		apply_as_clean(mapping, offset, end, offset, &dirty->bitmap[0],
+			       &start, &end);
+		bitmap_clear(&dirty->bitmap[0], 0, dirty->bitmap_size);
+		if (dirty->start < dirty->end)
+			bitmap_set(&dirty->bitmap[0], dirty->start,
+				   dirty->end - dirty->start);
+		dirty->change_count = 0;
+	}
+}
+
+/**
+ * vmw_bo_dirty_scan - Scan for dirty pages and add them to the dirty
+ * tracking structure
+ * @vbo: The buffer object to scan
+ *
+ * This function may change the dirty tracking method.
+ */
+void vmw_bo_dirty_scan(struct vmw_buffer_object *vbo)
+{
+	struct vmw_bo_dirty *dirty = vbo->dirty;
+
+	if (dirty->method == VMW_BO_DIRTY_PAGETABLE)
+		vmw_bo_dirty_scan_pagetable(vbo);
+	else
+		vmw_bo_dirty_scan_mkwrite(vbo);
+}
+
+/**
+ * vmw_bo_dirty_pre_unmap - write-protect and pick up dirty pages before
+ * an unmap_mapping_range operation.
+ * @vbo: The buffer object,
+ * @start: First page of the range within the buffer object.
+ * @end: Last page of the range within the buffer object + 1.
+ *
+ * If we're using the _PAGETABLE scan method, we may leak dirty pages
+ * when calling unmap_mapping_range(). This function makes sure we pick
+ * up all dirty pages.
+ */
+static void vmw_bo_dirty_pre_unmap(struct vmw_buffer_object *vbo,
+				   pgoff_t start, pgoff_t end)
+{
+	struct vmw_bo_dirty *dirty = vbo->dirty;
+	unsigned long offset = drm_vma_node_start(&vbo->base.vma_node);
+	struct address_space *mapping = vbo->base.bdev->dev_mapping;
+
+	if (dirty->method != VMW_BO_DIRTY_PAGETABLE || start >= end)
+		return;
+
+	apply_as_wrprotect(mapping, start + offset, end - start);
+	apply_as_clean(mapping, start + offset, end - start, offset,
+		       &dirty->bitmap[0], &dirty->start, &dirty->end);
+}
+
+/**
+ * vmw_bo_dirty_unmap - Clear all ptes pointing to a range within a bo
+ * @vbo: The buffer object,
+ * @start: First page of the range within the buffer object.
+ * @end: Last page of the range within the buffer object + 1.
+ *
+ * This is similar to ttm_bo_unmap_virtual_locked() except it takes a subrange.
+ */
+void vmw_bo_dirty_unmap(struct vmw_buffer_object *vbo,
+			pgoff_t start, pgoff_t end)
+{
+	unsigned long offset = drm_vma_node_start(&vbo->base.vma_node);
+	struct address_space *mapping = vbo->base.bdev->dev_mapping;
+
+	vmw_bo_dirty_pre_unmap(vbo, start, end);
+	unmap_shared_mapping_range(mapping, (offset + start) << PAGE_SHIFT,
+				   (loff_t) (end - start) << PAGE_SHIFT);
+}
+
+/**
+ * vmw_bo_dirty_add - Add a dirty-tracking user to a buffer object
+ * @vbo: The buffer object
+ *
+ * This function registers a dirty-tracking user to a buffer object.
+ * A user can be for example a resource or a vma in a special user-space
+ * mapping.
+ *
+ * Return: Zero on success, -ENOMEM on memory allocation failure.
+ */
+int vmw_bo_dirty_add(struct vmw_buffer_object *vbo)
+{
+	struct vmw_bo_dirty *dirty = vbo->dirty;
+	pgoff_t num_pages = vbo->base.num_pages;
+	size_t size, acc_size;
+	int ret;
+	static struct ttm_operation_ctx ctx = {
+		.interruptible = false,
+		.no_wait_gpu = false
+	};
+
+	if (dirty) {
+		dirty->ref_count++;
+		return 0;
+	}
+
+	size = sizeof(*dirty) + BITS_TO_LONGS(num_pages) * sizeof(long);
+	acc_size = ttm_round_pot(size);
+	ret = ttm_mem_global_alloc(&ttm_mem_glob, acc_size, &ctx);
+	if (ret) {
+		VMW_DEBUG_USER("Out of graphics memory for buffer object "
+			       "dirty tracker.\n");
+		return ret;
+	}
+	dirty = kvzalloc(size, GFP_KERNEL);
+	if (!dirty) {
+		ret = -ENOMEM;
+		goto out_no_dirty;
+	}
+
+	dirty->size = acc_size;
+	dirty->bitmap_size = num_pages;
+	dirty->start = dirty->bitmap_size;
+	dirty->end = 0;
+	dirty->ref_count = 1;
+	if (num_pages < PAGE_SIZE / sizeof(pte_t)) {
+		dirty->method = VMW_BO_DIRTY_PAGETABLE;
+	} else {
+		struct address_space *mapping = vbo->base.bdev->dev_mapping;
+		pgoff_t offset = drm_vma_node_start(&vbo->base.vma_node);
+
+		dirty->method = VMW_BO_DIRTY_MKWRITE;
+
+		/* Write-protect and then pick up already dirty bits */
+		apply_as_wrprotect(mapping, offset, num_pages);
+		apply_as_clean(mapping, offset, num_pages, offset,
+			       &dirty->bitmap[0], &dirty->start, &dirty->end);
+	}
+
+	vbo->dirty = dirty;
+
+	return 0;
+
+out_no_dirty:
+	ttm_mem_global_free(&ttm_mem_glob, acc_size);
+	return ret;
+}
+
+/**
+ * vmw_bo_dirty_release - Release a dirty-tracking user from a buffer object
+ * @vbo: The buffer object
+ *
+ * This function releases a dirty-tracking user from a buffer object.
+ * If the reference count reaches zero, then the dirty-tracking object is
+ * freed and the pointer to it cleared.
+ *
+ * Return: Zero on success, -ENOMEM on memory allocation failure.
+ */
+void vmw_bo_dirty_release(struct vmw_buffer_object *vbo)
+{
+	struct vmw_bo_dirty *dirty = vbo->dirty;
+
+	if (dirty && --dirty->ref_count == 0) {
+		size_t acc_size = dirty->size;
+
+		kvfree(dirty);
+		ttm_mem_global_free(&ttm_mem_glob, acc_size);
+		vbo->dirty = NULL;
+	}
+}
+
+/**
+ * vmw_bo_dirty_transfer_to_res - Pick up a resource's dirty region from
+ * its backing mob.
+ * @res: The resource
+ *
+ * This function will pick up all dirty ranges affecting the resource from
+ * it's backup mob, and call vmw_resource_dirty_update() once for each
+ * range. The transferred ranges will be cleared from the backing mob's
+ * dirty tracking.
+ */
+void vmw_bo_dirty_transfer_to_res(struct vmw_resource *res)
+{
+	struct vmw_buffer_object *vbo = res->backup;
+	struct vmw_bo_dirty *dirty = vbo->dirty;
+	pgoff_t start, cur, end;
+	unsigned long res_start = res->backup_offset;
+	unsigned long res_end = res->backup_offset + res->backup_size;
+
+	WARN_ON_ONCE(res_start & ~PAGE_MASK);
+	res_start >>= PAGE_SHIFT;
+	res_end = DIV_ROUND_UP(res_end, PAGE_SIZE);
+
+	if (res_start >= dirty->end || res_end <= dirty->start)
+		return;
+
+	cur = max(res_start, dirty->start);
+	res_end = max(res_end, dirty->end);
+	while (cur < res_end) {
+		unsigned long num;
+
+		start = find_next_bit(&dirty->bitmap[0], res_end, cur);
+		if (start >= res_end)
+			break;
+
+		end = find_next_zero_bit(&dirty->bitmap[0], res_end, start + 1);
+		cur = end + 1;
+		num = end - start;
+		bitmap_clear(&dirty->bitmap[0], start, num);
+		vmw_resource_dirty_update(res, start, end);
+	}
+
+	if (res_start <= dirty->start && res_end > dirty->start)
+		dirty->start = res_end;
+	if (res_start < dirty->end && res_end >= dirty->end)
+		dirty->end = res_start;
+}
+
+/**
+ * vmw_bo_dirty_clear_res - Clear a resource's dirty region from
+ * its backing mob.
+ * @res: The resource
+ *
+ * This function will clear all dirty ranges affecting the resource from
+ * it's backup mob's dirty tracking.
+ */
+void vmw_bo_dirty_clear_res(struct vmw_resource *res)
+{
+	unsigned long res_start = res->backup_offset;
+	unsigned long res_end = res->backup_offset + res->backup_size;
+	struct vmw_buffer_object *vbo = res->backup;
+	struct vmw_bo_dirty *dirty = vbo->dirty;
+
+	res_start >>= PAGE_SHIFT;
+	res_end = DIV_ROUND_UP(res_end, PAGE_SIZE);
+
+	if (res_start >= dirty->end || res_end <= dirty->start)
+		return;
+
+	res_start = max(res_start, dirty->start);
+	res_end = min(res_end, dirty->end);
+	bitmap_clear(&dirty->bitmap[0], res_start, res_end - res_start);
+
+	if (res_start <= dirty->start && res_end > dirty->start)
+		dirty->start = res_end;
+	if (res_start < dirty->end && res_end >= dirty->end)
+		dirty->end = res_start;
+}
+
+vm_fault_t vmw_bo_vm_mkwrite(struct vm_fault *vmf)
+{
+	struct vm_area_struct *vma = vmf->vma;
+	struct ttm_buffer_object *bo = (struct ttm_buffer_object *)
+	    vma->vm_private_data;
+	vm_fault_t ret;
+	unsigned long page_offset;
+	struct vmw_buffer_object *vbo =
+		container_of(bo, typeof(*vbo), base);
+
+	ret = ttm_bo_vm_reserve(bo, vmf);
+	if (ret)
+		return ret;
+
+	page_offset = vmf->pgoff - drm_vma_node_start(&bo->vma_node);
+	if (unlikely(page_offset >= bo->num_pages)) {
+		ret = VM_FAULT_SIGBUS;
+		goto out_unlock;
+	}
+
+	if (vbo->dirty && vbo->dirty->method == VMW_BO_DIRTY_MKWRITE &&
+	    !test_bit(page_offset, &vbo->dirty->bitmap[0])) {
+		struct vmw_bo_dirty *dirty = vbo->dirty;
+
+		__set_bit(page_offset, &dirty->bitmap[0]);
+		dirty->start = min(dirty->start, page_offset);
+		dirty->end = max(dirty->end, page_offset + 1);
+	}
+
+out_unlock:
+	reservation_object_unlock(bo->resv);
+	return ret;
+}
+
+vm_fault_t vmw_bo_vm_fault(struct vm_fault *vmf)
+{
+	struct vm_area_struct *vma = vmf->vma;
+	struct ttm_buffer_object *bo = (struct ttm_buffer_object *)
+	    vma->vm_private_data;
+	struct vmw_buffer_object *vbo =
+		container_of(bo, struct vmw_buffer_object, base);
+	pgoff_t num_prefault;
+	pgprot_t prot;
+	vm_fault_t ret;
+
+	ret = ttm_bo_vm_reserve(bo, vmf);
+	if (ret)
+		return ret;
+
+	num_prefault = (vma->vm_flags & VM_RAND_READ) ? 1 :
+		TTM_BO_VM_NUM_PREFAULT;
+
+	if (vbo->dirty) {
+		pgoff_t allowed_prefault;
+		unsigned long page_offset;
+
+		page_offset = vmf->pgoff - drm_vma_node_start(&bo->vma_node);
+		if (page_offset >= bo->num_pages ||
+		    vmw_resources_clean(vbo, page_offset,
+					page_offset + PAGE_SIZE,
+					&allowed_prefault)) {
+			ret = VM_FAULT_SIGBUS;
+			goto out_unlock;
+		}
+
+		num_prefault = min(num_prefault, allowed_prefault);
+	}
+
+	/*
+	 * If we don't track dirty using the MKWRITE method, make sure
+	 * sure the page protection is write-enabled so we don't get
+	 * a lot of unnecessary write faults.
+	 */
+	if (vbo->dirty && vbo->dirty->method == VMW_BO_DIRTY_MKWRITE)
+		prot = vma->vm_page_prot;
+	else
+		prot = vm_get_page_prot(vma->vm_flags);
+
+	ret = ttm_bo_vm_fault_reserved(vmf, prot, num_prefault);
+	if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT))
+		return ret;
+
+out_unlock:
+	reservation_object_unlock(bo->resv);
+	return ret;
+}
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
index 711f8fd0dd45..d70ee0df5c13 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
@@ -34,6 +34,51 @@
 
 #define VMW_RES_EVICT_ERR_COUNT 10
 
+/**
+ * vmw_resource_mob_attach - Mark a resource as attached to its backing mob
+ * @res: The resource
+ */
+void vmw_resource_mob_attach(struct vmw_resource *res)
+{
+	struct vmw_buffer_object *backup = res->backup;
+	struct rb_node **new = &backup->res_tree.rb_node, *parent = NULL;
+
+	lockdep_assert_held(&backup->base.resv->lock.base);
+	res->used_prio = (res->res_dirty) ? res->func->dirty_prio :
+		res->func->prio;
+
+	while (*new) {
+		struct vmw_resource *this =
+			container_of(*new, struct vmw_resource, mob_node);
+
+		parent = *new;
+		new = (res->backup_offset < this->backup_offset) ?
+			&((*new)->rb_left) : &((*new)->rb_right);
+	}
+
+	rb_link_node(&res->mob_node, parent, new);
+	rb_insert_color(&res->mob_node, &backup->res_tree);
+
+	vmw_bo_prio_add(backup, res->used_prio);
+}
+
+/**
+ * vmw_resource_mob_detach - Mark a resource as detached from its backing mob
+ * @res: The resource
+ */
+void vmw_resource_mob_detach(struct vmw_resource *res)
+{
+	struct vmw_buffer_object *backup = res->backup;
+
+	lockdep_assert_held(&backup->base.resv->lock.base);
+	if (vmw_resource_mob_attached(res)) {
+		rb_erase(&res->mob_node, &backup->res_tree);
+		RB_CLEAR_NODE(&res->mob_node);
+		vmw_bo_prio_del(backup, res->used_prio);
+	}
+}
+
+
 struct vmw_resource *vmw_resource_reference(struct vmw_resource *res)
 {
 	kref_get(&res->kref);
@@ -80,7 +125,7 @@ static void vmw_resource_release(struct kref *kref)
 		struct ttm_buffer_object *bo = &res->backup->base;
 
 		ttm_bo_reserve(bo, false, false, NULL);
-		if (!list_empty(&res->mob_head) &&
+		if (vmw_resource_mob_attached(res) &&
 		    res->func->unbind != NULL) {
 			struct ttm_validate_buffer val_buf;
 
@@ -89,7 +134,11 @@ static void vmw_resource_release(struct kref *kref)
 			res->func->unbind(res, false, &val_buf);
 		}
 		res->backup_dirty = false;
-		list_del_init(&res->mob_head);
+		vmw_resource_mob_detach(res);
+		if (res->dirty)
+			res->func->dirty_free(res);
+		if (res->coherent)
+			vmw_bo_dirty_release(res->backup);
 		ttm_bo_unreserve(bo);
 		vmw_bo_unreference(&res->backup);
 	}
@@ -171,14 +220,17 @@ int vmw_resource_init(struct vmw_private *dev_priv, struct vmw_resource *res,
 	res->res_free = res_free;
 	res->dev_priv = dev_priv;
 	res->func = func;
+	RB_CLEAR_NODE(&res->mob_node);
 	INIT_LIST_HEAD(&res->lru_head);
-	INIT_LIST_HEAD(&res->mob_head);
 	INIT_LIST_HEAD(&res->binding_head);
 	res->id = -1;
 	res->backup = NULL;
 	res->backup_offset = 0;
 	res->backup_dirty = false;
 	res->res_dirty = false;
+	res->coherent = false;
+	res->used_prio = 3;
+	res->dirty = NULL;
 	if (delay_id)
 		return 0;
 	else
@@ -343,7 +395,8 @@ out_no_bo:
  * should be retried once resources have been freed up.
  */
 static int vmw_resource_do_validate(struct vmw_resource *res,
-				    struct ttm_validate_buffer *val_buf)
+				    struct ttm_validate_buffer *val_buf,
+				    bool dirtying)
 {
 	int ret = 0;
 	const struct vmw_res_func *func = res->func;
@@ -355,14 +408,47 @@ static int vmw_resource_do_validate(struct vmw_resource *res,
 	}
 
 	if (func->bind &&
-	    ((func->needs_backup && list_empty(&res->mob_head) &&
+	    ((func->needs_backup && !vmw_resource_mob_attached(res) &&
 	      val_buf->bo != NULL) ||
 	     (!func->needs_backup && val_buf->bo != NULL))) {
 		ret = func->bind(res, val_buf);
 		if (unlikely(ret != 0))
 			goto out_bind_failed;
 		if (func->needs_backup)
-			list_add_tail(&res->mob_head, &res->backup->res_list);
+			vmw_resource_mob_attach(res);
+	}
+
+	/*
+	 * Handle the case where the backup mob is marked coherent but
+	 * the resource isn't.
+	 */
+	if (func->dirty_alloc && vmw_resource_mob_attached(res) &&
+	    !res->coherent) {
+		if (res->backup->dirty && !res->dirty) {
+			ret = func->dirty_alloc(res);
+			if (ret)
+				return ret;
+		} else if (!res->backup->dirty && res->dirty) {
+			func->dirty_free(res);
+		}
+	}
+
+	/*
+	 * Transfer the dirty regions to the resource and update
+	 * the resource.
+	 */
+	if (res->dirty) {
+		if (dirtying && !res->res_dirty) {
+			pgoff_t start = res->backup_offset >> PAGE_SHIFT;
+			pgoff_t end = __KERNEL_DIV_ROUND_UP
+				(res->backup_offset + res->backup_size,
+				 PAGE_SIZE);
+
+			vmw_bo_dirty_unmap(res->backup, start, end);
+		}
+
+		vmw_bo_dirty_transfer_to_res(res);
+		return func->dirty_sync(res);
 	}
 
 	return 0;
@@ -402,19 +488,29 @@ void vmw_resource_unreserve(struct vmw_resource *res,
 
 	if (switch_backup && new_backup != res->backup) {
 		if (res->backup) {
-			lockdep_assert_held(&res->backup->base.resv->lock.base);
-			list_del_init(&res->mob_head);
+			vmw_resource_mob_detach(res);
+			if (res->coherent)
+				vmw_bo_dirty_release(res->backup);
 			vmw_bo_unreference(&res->backup);
 		}
 
 		if (new_backup) {
 			res->backup = vmw_bo_reference(new_backup);
-			lockdep_assert_held(&new_backup->base.resv->lock.base);
-			list_add_tail(&res->mob_head, &new_backup->res_list);
+
+			/*
+			 * The validation code should already have added a
+			 * dirty tracker here.
+			 */
+			WARN_ON(res->coherent && !new_backup->dirty);
+
+			vmw_resource_mob_attach(res);
 		} else {
 			res->backup = NULL;
 		}
+	} else if (switch_backup && res->coherent) {
+		vmw_bo_dirty_release(res->backup);
 	}
+
 	if (switch_backup)
 		res->backup_offset = new_backup_offset;
 
@@ -464,11 +560,12 @@ vmw_resource_check_buffer(struct ww_acquire_ctx *ticket,
 	val_buf->bo = &res->backup->base;
 	val_buf->num_shared = 0;
 	list_add_tail(&val_buf->head, &val_list);
-	ret = ttm_eu_reserve_buffers(ticket, &val_list, interruptible, NULL);
+	ret = ttm_eu_reserve_buffers(ticket, &val_list, interruptible, NULL,
+				     true);
 	if (unlikely(ret != 0))
 		goto out_no_reserve;
 
-	if (res->func->needs_backup && list_empty(&res->mob_head))
+	if (res->func->needs_backup && !vmw_resource_mob_attached(res))
 		return 0;
 
 	backup_dirty = res->backup_dirty;
@@ -573,11 +670,11 @@ static int vmw_resource_do_evict(struct ww_acquire_ctx *ticket,
 		return ret;
 
 	if (unlikely(func->unbind != NULL &&
-		     (!func->needs_backup || !list_empty(&res->mob_head)))) {
+		     (!func->needs_backup || vmw_resource_mob_attached(res)))) {
 		ret = func->unbind(res, res->res_dirty, &val_buf);
 		if (unlikely(ret != 0))
 			goto out_no_unbind;
-		list_del_init(&res->mob_head);
+		vmw_resource_mob_detach(res);
 	}
 	ret = func->destroy(res);
 	res->backup_dirty = true;
@@ -594,6 +691,7 @@ out_no_unbind:
  *                         to the device.
  * @res: The resource to make visible to the device.
  * @intr: Perform waits interruptible if possible.
+ * @dirtying: Pending GPU operation will dirty the resource
  *
  * On succesful return, any backup DMA buffer pointed to by @res->backup will
  * be reserved and validated.
@@ -603,7 +701,8 @@ out_no_unbind:
  * Return: Zero on success, -ERESTARTSYS if interrupted, negative error code
  * on failure.
  */
-int vmw_resource_validate(struct vmw_resource *res, bool intr)
+int vmw_resource_validate(struct vmw_resource *res, bool intr,
+			  bool dirtying)
 {
 	int ret;
 	struct vmw_resource *evict_res;
@@ -620,7 +719,7 @@ int vmw_resource_validate(struct vmw_resource *res, bool intr)
 	if (res->backup)
 		val_buf.bo = &res->backup->base;
 	do {
-		ret = vmw_resource_do_validate(res, &val_buf);
+		ret = vmw_resource_do_validate(res, &val_buf, dirtying);
 		if (likely(ret != -EBUSY))
 			break;
 
@@ -659,7 +758,7 @@ int vmw_resource_validate(struct vmw_resource *res, bool intr)
 	if (unlikely(ret != 0))
 		goto out_no_validate;
 	else if (!res->func->needs_backup && res->backup) {
-		list_del_init(&res->mob_head);
+		WARN_ON_ONCE(vmw_resource_mob_attached(res));
 		vmw_bo_unreference(&res->backup);
 	}
 
@@ -683,22 +782,23 @@ out_no_validate:
  */
 void vmw_resource_unbind_list(struct vmw_buffer_object *vbo)
 {
-
-	struct vmw_resource *res, *next;
 	struct ttm_validate_buffer val_buf = {
 		.bo = &vbo->base,
 		.num_shared = 0
 	};
 
 	lockdep_assert_held(&vbo->base.resv->lock.base);
-	list_for_each_entry_safe(res, next, &vbo->res_list, mob_head) {
-		if (!res->func->unbind)
-			continue;
+	while (!RB_EMPTY_ROOT(&vbo->res_tree)) {
+		struct rb_node *node = vbo->res_tree.rb_node;
+		struct vmw_resource *res =
+			container_of(node, struct vmw_resource, mob_node);
+
+		if (!WARN_ON_ONCE(!res->func->unbind))
+			(void) res->func->unbind(res, res->res_dirty, &val_buf);
 
-		(void) res->func->unbind(res, res->res_dirty, &val_buf);
 		res->backup_dirty = true;
 		res->res_dirty = false;
-		list_del_init(&res->mob_head);
+		vmw_resource_mob_detach(res);
 	}
 
 	(void) ttm_bo_wait(&vbo->base, false, false);
@@ -919,7 +1019,7 @@ int vmw_resource_pin(struct vmw_resource *res, bool interruptible)
 			/* Do we really need to pin the MOB as well? */
 			vmw_bo_pin_reserved(vbo, true);
 		}
-		ret = vmw_resource_validate(res, interruptible);
+		ret = vmw_resource_validate(res, interruptible, true);
 		if (vbo)
 			ttm_bo_unreserve(&vbo->base);
 		if (ret)
@@ -979,3 +1079,101 @@ enum vmw_res_type vmw_res_type(const struct vmw_resource *res)
 {
 	return res->func->res_type;
 }
+
+/**
+ * vmw_resource_update_dirty - Update a resource's dirty tracker with a
+ * sequential range of touched backing store memory.
+ * @res: The resource.
+ * @start: The first page touched.
+ * @end: The last page touched + 1.
+ */
+void vmw_resource_dirty_update(struct vmw_resource *res, pgoff_t start,
+			       pgoff_t end)
+{
+	if (res->dirty)
+		res->func->dirty_range_add(res, start << PAGE_SHIFT,
+					   end << PAGE_SHIFT);
+}
+
+/**
+ * vmw_resources_clean - Clean resources intersecting a mob range
+ * @vbo: The mob buffer object
+ * @start: The mob page offset starting the range
+ * @end: The mob page offset ending the range
+ * @num_prefault: Returns how many pages including the first have been
+ * cleaned and are ok to prefault
+ */
+int vmw_resources_clean(struct vmw_buffer_object *vbo, pgoff_t start,
+			pgoff_t end, pgoff_t *num_prefault)
+{
+	struct rb_node *cur = vbo->res_tree.rb_node;
+	struct vmw_resource *found = NULL;
+	unsigned long res_start = start << PAGE_SHIFT;
+	unsigned long res_end = end << PAGE_SHIFT;
+	unsigned long last_cleaned = 0;
+
+	/*
+	 * Find the resource with lowest backup_offset that intersects the
+	 * range.
+	 */
+	while (cur) {
+		struct vmw_resource *cur_res =
+			container_of(cur, struct vmw_resource, mob_node);
+
+		if (cur_res->backup_offset >= res_end) {
+			cur = cur->rb_left;
+		} else if (cur_res->backup_offset + cur_res->backup_size <=
+			   res_start) {
+			cur = cur->rb_right;
+		} else {
+			found = cur_res;
+			cur = cur->rb_left;
+			/* Continue to look for resources with lower offsets */
+		}
+	}
+
+	/*
+	 * In order of increasing backup_offset, clean dirty resorces
+	 * intersecting the range.
+	 */
+	while (found) {
+		if (found->res_dirty) {
+			int ret;
+
+			if (!found->func->clean)
+				return -EINVAL;
+
+			ret = found->func->clean(found);
+			if (ret)
+				return ret;
+
+			found->res_dirty = false;
+		}
+		last_cleaned = found->backup_offset + found->backup_size;
+		cur = rb_next(&found->mob_node);
+		if (!cur)
+			break;
+
+		found = container_of(cur, struct vmw_resource, mob_node);
+		if (found->backup_offset >= res_end)
+			break;
+	}
+
+	/*
+	 * Set number of pages allowed prefaulting and fence the buffer object
+	 */
+	*num_prefault = 1;
+	if (last_cleaned > res_start) {
+		struct ttm_buffer_object *bo = &vbo->base;
+
+		*num_prefault = __KERNEL_DIV_ROUND_UP(last_cleaned - res_start,
+						      PAGE_SIZE);
+		vmw_bo_fence_single(bo, NULL);
+		if (bo->moving)
+			dma_fence_put(bo->moving);
+		bo->moving = dma_fence_get
+			(reservation_object_get_excl(bo->resv));
+	}
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource_priv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_resource_priv.h
index 7e19eba0b0b8..3b7438b2d289 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource_priv.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource_priv.h
@@ -71,6 +71,13 @@ struct vmw_user_resource_conv {
  * @commit_notify:     If the resource is a command buffer managed resource,
  *                     callback to notify that a define or remove command
  *                     has been committed to the device.
+ * @dirty_alloc:       Allocate a dirty tracker. NULL if dirty-tracking is not
+ *                     supported.
+ * @dirty_free:        Free the dirty tracker.
+ * @dirty_sync:        Upload the dirty mob contents to the resource.
+ * @dirty_add_range:   Add a sequential dirty range to the resource
+ *                     dirty tracker.
+ * @clean:             Clean the resource.
  */
 struct vmw_res_func {
 	enum vmw_res_type res_type;
@@ -78,6 +85,8 @@ struct vmw_res_func {
 	const char *type_name;
 	struct ttm_placement *backup_placement;
 	bool may_evict;
+	u32 prio;
+	u32 dirty_prio;
 
 	int (*create) (struct vmw_resource *res);
 	int (*destroy) (struct vmw_resource *res);
@@ -88,6 +97,12 @@ struct vmw_res_func {
 		       struct ttm_validate_buffer *val_buf);
 	void (*commit_notify)(struct vmw_resource *res,
 			      enum vmw_cmdbuf_res_state state);
+	int (*dirty_alloc)(struct vmw_resource *res);
+	void (*dirty_free)(struct vmw_resource *res);
+	int (*dirty_sync)(struct vmw_resource *res);
+	void (*dirty_range_add)(struct vmw_resource *res, size_t start,
+				 size_t end);
+	int (*clean)(struct vmw_resource *res);
 };
 
 /**
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_shader.c b/drivers/gpu/drm/vmwgfx/vmwgfx_shader.c
index d310d21f0d54..e139fdfd1635 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_shader.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_shader.c
@@ -95,6 +95,8 @@ static const struct vmw_res_func vmw_gb_shader_func = {
 	.res_type = vmw_res_shader,
 	.needs_backup = true,
 	.may_evict = true,
+	.prio = 3,
+	.dirty_prio = 3,
 	.type_name = "guest backed shaders",
 	.backup_placement = &vmw_mob_placement,
 	.create = vmw_gb_shader_create,
@@ -106,7 +108,9 @@ static const struct vmw_res_func vmw_gb_shader_func = {
 static const struct vmw_res_func vmw_dx_shader_func = {
 	.res_type = vmw_res_shader,
 	.needs_backup = true,
-	.may_evict = false,
+	.may_evict = true,
+	.prio = 3,
+	.dirty_prio = 3,
 	.type_name = "dx shaders",
 	.backup_placement = &vmw_mob_placement,
 	.create = vmw_dx_shader_create,
@@ -423,7 +427,7 @@ static int vmw_dx_shader_create(struct vmw_resource *res)
 
 	WARN_ON_ONCE(!shader->committed);
 
-	if (!list_empty(&res->mob_head)) {
+	if (vmw_resource_mob_attached(res)) {
 		mutex_lock(&dev_priv->binding_mutex);
 		ret = vmw_dx_shader_unscrub(res);
 		mutex_unlock(&dev_priv->binding_mutex);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c
index 219471903bc1..862ca44680ca 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c
@@ -68,6 +68,20 @@ struct vmw_surface_offset {
 	uint32_t bo_offset;
 };
 
+/**
+ * vmw_surface_dirty - Surface dirty-tracker
+ * @cache: Cached layout information of the surface.
+ * @size: Accounting size for the struct vmw_surface_dirty.
+ * @num_subres: Number of subresources.
+ * @boxes: Array of SVGA3dBoxes indicating dirty regions. One per subresource.
+ */
+struct vmw_surface_dirty {
+	struct svga3dsurface_cache cache;
+	size_t size;
+	u32 num_subres;
+	SVGA3dBox boxes[0];
+};
+
 static void vmw_user_surface_free(struct vmw_resource *res);
 static struct vmw_resource *
 vmw_user_surface_base_to_res(struct ttm_base_object *base);
@@ -96,6 +110,13 @@ vmw_gb_surface_reference_internal(struct drm_device *dev,
 				  struct drm_vmw_gb_surface_ref_ext_rep *rep,
 				  struct drm_file *file_priv);
 
+static void vmw_surface_dirty_free(struct vmw_resource *res);
+static int vmw_surface_dirty_alloc(struct vmw_resource *res);
+static int vmw_surface_dirty_sync(struct vmw_resource *res);
+static void vmw_surface_dirty_range_add(struct vmw_resource *res, size_t start,
+					size_t end);
+static int vmw_surface_clean(struct vmw_resource *res);
+
 static const struct vmw_user_resource_conv user_surface_conv = {
 	.object_type = VMW_RES_SURFACE,
 	.base_obj_to_res = vmw_user_surface_base_to_res,
@@ -112,6 +133,8 @@ static const struct vmw_res_func vmw_legacy_surface_func = {
 	.res_type = vmw_res_surface,
 	.needs_backup = false,
 	.may_evict = true,
+	.prio = 1,
+	.dirty_prio = 1,
 	.type_name = "legacy surfaces",
 	.backup_placement = &vmw_srf_placement,
 	.create = &vmw_legacy_srf_create,
@@ -124,12 +147,19 @@ static const struct vmw_res_func vmw_gb_surface_func = {
 	.res_type = vmw_res_surface,
 	.needs_backup = true,
 	.may_evict = true,
+	.prio = 1,
+	.dirty_prio = 2,
 	.type_name = "guest backed surfaces",
 	.backup_placement = &vmw_mob_placement,
 	.create = vmw_gb_surface_create,
 	.destroy = vmw_gb_surface_destroy,
 	.bind = vmw_gb_surface_bind,
-	.unbind = vmw_gb_surface_unbind
+	.unbind = vmw_gb_surface_unbind,
+	.dirty_alloc = vmw_surface_dirty_alloc,
+	.dirty_free = vmw_surface_dirty_free,
+	.dirty_sync = vmw_surface_dirty_sync,
+	.dirty_range_add = vmw_surface_dirty_range_add,
+	.clean = vmw_surface_clean,
 };
 
 /**
@@ -637,6 +667,7 @@ static void vmw_user_surface_free(struct vmw_resource *res)
 	struct vmw_private *dev_priv = srf->res.dev_priv;
 	uint32_t size = user_srf->size;
 
+	WARN_ON_ONCE(res->dirty);
 	if (user_srf->master)
 		drm_master_put(&user_srf->master);
 	kfree(srf->offsets);
@@ -915,12 +946,6 @@ vmw_surface_handle_reference(struct vmw_private *dev_priv,
 		if (unlikely(drm_is_render_client(file_priv)))
 			require_exist = true;
 
-		if (READ_ONCE(vmw_fpriv(file_priv)->locked_master)) {
-			DRM_ERROR("Locked master refused legacy "
-				  "surface reference.\n");
-			return -EACCES;
-		}
-
 		handle = u_handle;
 	}
 
@@ -1170,10 +1195,16 @@ static int vmw_gb_surface_bind(struct vmw_resource *res,
 		cmd2->header.id = SVGA_3D_CMD_UPDATE_GB_SURFACE;
 		cmd2->header.size = sizeof(cmd2->body);
 		cmd2->body.sid = res->id;
-		res->backup_dirty = false;
 	}
 	vmw_fifo_commit(dev_priv, submit_size);
 
+	if (res->backup->dirty && res->backup_dirty) {
+		/* We've just made a full upload. Cear dirty regions. */
+		vmw_bo_dirty_clear_res(res);
+	}
+
+	res->backup_dirty = false;
+
 	return 0;
 }
 
@@ -1638,7 +1669,8 @@ vmw_gb_surface_define_internal(struct drm_device *dev,
 			}
 		}
 	} else if (req->base.drm_surface_flags &
-		   drm_vmw_surface_flag_create_buffer)
+		   (drm_vmw_surface_flag_create_buffer |
+		    drm_vmw_surface_flag_coherent))
 		ret = vmw_user_bo_alloc(dev_priv, tfile,
 					res->backup_size,
 					req->base.drm_surface_flags &
@@ -1652,6 +1684,26 @@ vmw_gb_surface_define_internal(struct drm_device *dev,
 		goto out_unlock;
 	}
 
+	if (req->base.drm_surface_flags & drm_vmw_surface_flag_coherent) {
+		struct vmw_buffer_object *backup = res->backup;
+
+		ttm_bo_reserve(&backup->base, false, false, NULL);
+		if (!res->func->dirty_alloc)
+			ret = -EINVAL;
+		if (!ret)
+			ret = vmw_bo_dirty_add(backup);
+		if (!ret) {
+			res->coherent = true;
+			ret = res->func->dirty_alloc(res);
+		}
+		ttm_bo_unreserve(&backup->base);
+		if (ret) {
+			vmw_resource_unreference(&res);
+			goto out_unlock;
+		}
+
+	}
+
 	tmp = vmw_resource_reference(res);
 	ret = ttm_prime_object_init(tfile, res->backup_size, &user_srf->prime,
 				    req->base.drm_surface_flags &
@@ -1760,3 +1812,338 @@ out_bad_resource:
 
 	return ret;
 }
+
+/**
+ * vmw_subres_dirty_add - Add a dirty region to a subresource
+ * @dirty: The surfaces's dirty tracker.
+ * @loc_start: The location corresponding to the start of the region.
+ * @loc_end: The location corresponding to the end of the region.
+ *
+ * As we are assuming that @loc_start and @loc_end represent a sequential
+ * range of backing store memory, if the region spans multiple lines then
+ * regardless of the x coordinate, the full lines are dirtied.
+ * Correspondingly if the region spans multiple z slices, then full rather
+ * than partial z slices are dirtied.
+ */
+static void vmw_subres_dirty_add(struct vmw_surface_dirty *dirty,
+				 const struct svga3dsurface_loc *loc_start,
+				 const struct svga3dsurface_loc *loc_end)
+{
+	const struct svga3dsurface_cache *cache = &dirty->cache;
+	SVGA3dBox *box = &dirty->boxes[loc_start->sub_resource];
+	u32 mip = loc_start->sub_resource % cache->num_mip_levels;
+	const struct drm_vmw_size *size = &cache->mip[mip].size;
+	u32 box_c2 = box->z + box->d;
+
+	if (WARN_ON(loc_start->sub_resource >= dirty->num_subres))
+		return;
+
+	if (box->d == 0 || box->z > loc_start->z)
+		box->z = loc_start->z;
+	if (box_c2 < loc_end->z)
+		box->d = loc_end->z - box->z;
+
+	if (loc_start->z + 1 == loc_end->z) {
+		box_c2 = box->y + box->h;
+		if (box->h == 0 || box->y > loc_start->y)
+			box->y = loc_start->y;
+		if (box_c2 < loc_end->y)
+			box->h = loc_end->y - box->y;
+
+		if (loc_start->y + 1 == loc_end->y) {
+			box_c2 = box->x + box->w;
+			if (box->w == 0 || box->x > loc_start->x)
+				box->x = loc_start->x;
+			if (box_c2 < loc_end->x)
+				box->w = loc_end->x - box->x;
+		} else {
+			box->x = 0;
+			box->w = size->width;
+		}
+	} else {
+		box->y = 0;
+		box->h = size->height;
+		box->x = 0;
+		box->w = size->width;
+	}
+}
+
+/**
+ * vmw_subres_dirty_full - Mark a full subresource as dirty
+ * @dirty: The surface's dirty tracker.
+ * @subres: The subresource
+ */
+static void vmw_subres_dirty_full(struct vmw_surface_dirty *dirty, u32 subres)
+{
+	const struct svga3dsurface_cache *cache = &dirty->cache;
+	u32 mip = subres % cache->num_mip_levels;
+	const struct drm_vmw_size *size = &cache->mip[mip].size;
+	SVGA3dBox *box = &dirty->boxes[subres];
+
+	box->x = 0;
+	box->y = 0;
+	box->z = 0;
+	box->w = size->width;
+	box->h = size->height;
+	box->d = size->depth;
+}
+
+/*
+ * vmw_surface_tex_dirty_add_range - The dirty_add_range callback for texture
+ * surfaces.
+ */
+static void vmw_surface_tex_dirty_range_add(struct vmw_resource *res,
+					    size_t start, size_t end)
+{
+	struct vmw_surface_dirty *dirty =
+		(struct vmw_surface_dirty *) res->dirty;
+	size_t backup_end = res->backup_offset + res->backup_size;
+	struct svga3dsurface_loc loc1, loc2;
+	const struct svga3dsurface_cache *cache;
+
+	start = max_t(size_t, start, res->backup_offset) - res->backup_offset;
+	end = min(end, backup_end) - res->backup_offset;
+	cache = &dirty->cache;
+	svga3dsurface_get_loc(cache, &loc1, start);
+	svga3dsurface_get_loc(cache, &loc2, end - 1);
+	svga3dsurface_inc_loc(cache, &loc2);
+
+	if (loc1.sub_resource + 1 == loc2.sub_resource) {
+		/* Dirty range covers a single sub-resource */
+		vmw_subres_dirty_add(dirty, &loc1, &loc2);
+	} else {
+		/* Dirty range covers multiple sub-resources */
+		struct svga3dsurface_loc loc_min, loc_max;
+		u32 sub_res = loc1.sub_resource;
+
+		svga3dsurface_max_loc(cache, loc1.sub_resource, &loc_max);
+		vmw_subres_dirty_add(dirty, &loc1, &loc_max);
+		svga3dsurface_min_loc(cache, loc2.sub_resource - 1, &loc_min);
+		vmw_subres_dirty_add(dirty, &loc_min, &loc2);
+		for (sub_res = loc1.sub_resource + 1;
+		     sub_res < loc2.sub_resource - 1; ++sub_res)
+			vmw_subres_dirty_full(dirty, sub_res);
+	}
+}
+
+/*
+ * vmw_surface_tex_dirty_add_range - The dirty_add_range callback for buffer
+ * surfaces.
+ */
+static void vmw_surface_buf_dirty_range_add(struct vmw_resource *res,
+					    size_t start, size_t end)
+{
+	struct vmw_surface_dirty *dirty =
+		(struct vmw_surface_dirty *) res->dirty;
+	const struct svga3dsurface_cache *cache = &dirty->cache;
+	size_t backup_end = res->backup_offset + cache->mip_chain_bytes;
+	SVGA3dBox *box = &dirty->boxes[0];
+	u32 box_c2;
+
+	box->h = box->d = 1;
+	start = max_t(size_t, start, res->backup_offset) - res->backup_offset;
+	end = min(end, backup_end) - res->backup_offset;
+	box_c2 = box->x + box->w;
+	if (box->w == 0 || box->x > start)
+		box->x = start;
+	if (box_c2 < end)
+		box->w = end - box->x;
+}
+
+/*
+ * vmw_surface_tex_dirty_add_range - The dirty_add_range callback for surfaces
+ */
+static void vmw_surface_dirty_range_add(struct vmw_resource *res, size_t start,
+					size_t end)
+{
+	struct vmw_surface *srf = vmw_res_to_srf(res);
+
+	if (WARN_ON(end <= res->backup_offset ||
+		    start >= res->backup_offset + res->backup_size))
+		return;
+
+	if (srf->format == SVGA3D_BUFFER)
+		vmw_surface_buf_dirty_range_add(res, start, end);
+	else
+		vmw_surface_tex_dirty_range_add(res, start, end);
+}
+
+/*
+ * vmw_surface_dirty_sync - The surface's dirty_sync callback.
+ */
+static int vmw_surface_dirty_sync(struct vmw_resource *res)
+{
+	struct vmw_private *dev_priv = res->dev_priv;
+	bool has_dx = 0;
+	u32 i, num_dirty;
+	struct vmw_surface_dirty *dirty =
+		(struct vmw_surface_dirty *) res->dirty;
+	size_t alloc_size;
+	const struct svga3dsurface_cache *cache = &dirty->cache;
+	struct {
+		SVGA3dCmdHeader header;
+		SVGA3dCmdDXUpdateSubResource body;
+	} *cmd1;
+	struct {
+		SVGA3dCmdHeader header;
+		SVGA3dCmdUpdateGBImage body;
+	} *cmd2;
+	void *cmd;
+
+	num_dirty = 0;
+	for (i = 0; i < dirty->num_subres; ++i) {
+		const SVGA3dBox *box = &dirty->boxes[i];
+
+		if (box->d)
+			num_dirty++;
+	}
+
+	if (!num_dirty)
+		goto out;
+
+	alloc_size = num_dirty * ((has_dx) ? sizeof(*cmd1) : sizeof(*cmd2));
+	cmd = VMW_FIFO_RESERVE(dev_priv, alloc_size);
+	if (!cmd)
+		return -ENOMEM;
+
+	cmd1 = cmd;
+	cmd2 = cmd;
+
+	for (i = 0; i < dirty->num_subres; ++i) {
+		const SVGA3dBox *box = &dirty->boxes[i];
+
+		if (!box->d)
+			continue;
+
+		/*
+		 * DX_UPDATE_SUBRESOURCE is aware of array surfaces.
+		 * UPDATE_GB_IMAGE is not.
+		 */
+		if (has_dx) {
+			cmd1->header.id = SVGA_3D_CMD_DX_UPDATE_SUBRESOURCE;
+			cmd1->header.size = sizeof(cmd1->body);
+			cmd1->body.sid = res->id;
+			cmd1->body.subResource = i;
+			cmd1->body.box = *box;
+			cmd1++;
+		} else {
+			cmd2->header.id = SVGA_3D_CMD_UPDATE_GB_IMAGE;
+			cmd2->header.size = sizeof(cmd2->body);
+			cmd2->body.image.sid = res->id;
+			cmd2->body.image.face = i / cache->num_mip_levels;
+			cmd2->body.image.mipmap = i -
+				(cache->num_mip_levels * cmd2->body.image.face);
+			cmd2->body.box = *box;
+			cmd2++;
+		}
+
+	}
+	vmw_fifo_commit(dev_priv, alloc_size);
+ out:
+	memset(&dirty->boxes[0], 0, sizeof(dirty->boxes[0]) *
+	       dirty->num_subres);
+
+	return 0;
+}
+
+/*
+ * vmw_surface_dirty_alloc - The surface's dirty_alloc callback.
+ */
+static int vmw_surface_dirty_alloc(struct vmw_resource *res)
+{
+	struct vmw_surface *srf = vmw_res_to_srf(res);
+	struct vmw_surface_dirty *dirty;
+	u32 num_layers = 1;
+	u32 num_mip;
+	u32 num_subres;
+	u32 num_samples;
+	size_t dirty_size, acc_size;
+	static struct ttm_operation_ctx ctx = {
+		.interruptible = false,
+		.no_wait_gpu = false
+	};
+	int ret;
+
+	if (srf->array_size)
+		num_layers = srf->array_size;
+	else if (srf->flags & SVGA3D_SURFACE_CUBEMAP)
+		num_layers *= SVGA3D_MAX_SURFACE_FACES;
+
+	num_mip = srf->mip_levels[0];
+	if (!num_mip)
+		num_mip = 1;
+
+	num_subres = num_layers * num_mip;
+	dirty_size = sizeof(*dirty) + num_subres * sizeof(dirty->boxes[0]);
+	acc_size = ttm_round_pot(dirty_size);
+	ret = ttm_mem_global_alloc(vmw_mem_glob(res->dev_priv),
+				   acc_size, &ctx);
+	if (ret) {
+		VMW_DEBUG_USER("Out of graphics memory for surface "
+			       "dirty tracker.\n");
+		return ret;
+	}
+
+	dirty = kvzalloc(dirty_size, GFP_KERNEL);
+	if (!dirty) {
+		ret = -ENOMEM;
+		goto out_no_dirty;
+	}
+
+	num_samples = max_t(u32, 1, srf->multisample_count);
+	ret = svga3dsurface_setup_cache(&srf->base_size, srf->format, num_mip,
+					num_layers, num_samples, &dirty->cache);
+	if (ret)
+		goto out_no_cache;
+
+	dirty->num_subres = num_subres;
+	dirty->size = acc_size;
+	res->dirty = (struct vmw_resource_dirty *) dirty;
+
+	return 0;
+
+out_no_cache:
+	kvfree(dirty);
+out_no_dirty:
+	ttm_mem_global_free(vmw_mem_glob(res->dev_priv), acc_size);
+	return ret;
+}
+
+/*
+ * vmw_surface_dirty_free - The surface's dirty_free callback
+ */
+static void vmw_surface_dirty_free(struct vmw_resource *res)
+{
+	struct vmw_surface_dirty *dirty =
+		(struct vmw_surface_dirty *) res->dirty;
+	size_t acc_size = dirty->size;
+
+	kvfree(dirty);
+	ttm_mem_global_free(vmw_mem_glob(res->dev_priv), acc_size);
+	res->dirty = NULL;
+}
+
+/*
+ * vmw_surface_clean - The surface's clean callback
+ */
+static int vmw_surface_clean(struct vmw_resource *res)
+{
+	struct vmw_private *dev_priv = res->dev_priv;
+	size_t alloc_size;
+	struct {
+		SVGA3dCmdHeader header;
+		SVGA3dCmdReadbackGBSurface body;
+	} *cmd;
+
+	alloc_size = sizeof(*cmd);
+	cmd = VMW_FIFO_RESERVE(dev_priv, alloc_size);
+	if (!cmd)
+		return -ENOMEM;
+
+	cmd->header.id = SVGA_3D_CMD_READBACK_GB_SURFACE;
+	cmd->header.size = sizeof(cmd->body);
+	cmd->body.sid = res->id;
+	vmw_fifo_commit(dev_priv, alloc_size);
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_validation.c b/drivers/gpu/drm/vmwgfx/vmwgfx_validation.c
index f611b2290a1b..9aaf807ed73c 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_validation.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_validation.c
@@ -33,6 +33,8 @@
  * struct vmw_validation_bo_node - Buffer object validation metadata.
  * @base: Metadata used for TTM reservation- and validation.
  * @hash: A hash entry used for the duplicate detection hash table.
+ * @coherent_count: If switching backup buffers, number of new coherent
+ * resources that will have this buffer as a backup buffer.
  * @as_mob: Validate as mob.
  * @cpu_blit: Validate for cpu blit access.
  *
@@ -42,6 +44,7 @@
 struct vmw_validation_bo_node {
 	struct ttm_validate_buffer base;
 	struct drm_hash_item hash;
+	unsigned int coherent_count;
 	u32 as_mob : 1;
 	u32 cpu_blit : 1;
 };
@@ -459,6 +462,19 @@ int vmw_validation_res_reserve(struct vmw_validation_context *ctx,
 			if (ret)
 				goto out_unreserve;
 		}
+
+		if (val->switching_backup && val->new_backup &&
+		    res->coherent) {
+			struct vmw_validation_bo_node *bo_node =
+				vmw_validation_find_bo_dup(ctx,
+							   val->new_backup);
+
+			if (WARN_ON(!bo_node)) {
+				ret = -EINVAL;
+				goto out_unreserve;
+			}
+			bo_node->coherent_count++;
+		}
 	}
 
 	return 0;
@@ -562,6 +578,9 @@ int vmw_validation_bo_validate(struct vmw_validation_context *ctx, bool intr)
 	int ret;
 
 	list_for_each_entry(entry, &ctx->bo_list, base.head) {
+		struct vmw_buffer_object *vbo =
+			container_of(entry->base.bo, typeof(*vbo), base);
+
 		if (entry->cpu_blit) {
 			struct ttm_operation_ctx ctx = {
 				.interruptible = intr,
@@ -576,6 +595,27 @@ int vmw_validation_bo_validate(struct vmw_validation_context *ctx, bool intr)
 		}
 		if (ret)
 			return ret;
+
+		/*
+		 * Rather than having the resource code allocating the bo
+		 * dirty tracker in resource_unreserve() where we can't fail,
+		 * Do it here when validating the buffer object.
+		 */
+		if (entry->coherent_count) {
+			unsigned int coherent_count = entry->coherent_count;
+
+			while (coherent_count) {
+				ret = vmw_bo_dirty_add(vbo);
+				if (ret)
+					return ret;
+
+				coherent_count--;
+			}
+			entry->coherent_count -= coherent_count;
+		}
+
+		if (vbo->dirty)
+			vmw_bo_dirty_scan(vbo);
 	}
 	return 0;
 }
@@ -601,7 +641,8 @@ int vmw_validation_res_validate(struct vmw_validation_context *ctx, bool intr)
 		struct vmw_resource *res = val->res;
 		struct vmw_buffer_object *backup = res->backup;
 
-		ret = vmw_resource_validate(res, intr);
+		ret = vmw_resource_validate(res, intr, val->dirty_set &&
+					    val->dirty);
 		if (ret) {
 			if (ret != -ERESTARTSYS)
 				DRM_ERROR("Failed to validate resource.\n");
@@ -828,3 +869,34 @@ int vmw_validation_preload_res(struct vmw_validation_context *ctx,
 	ctx->mem_size_left += size;
 	return 0;
 }
+
+/**
+ * vmw_validation_bo_backoff - Unreserve buffer objects registered with a
+ * validation context
+ * @ctx: The validation context
+ *
+ * This function unreserves the buffer objects previously reserved using
+ * vmw_validation_bo_reserve. It's typically used as part of an error path
+ */
+void vmw_validation_bo_backoff(struct vmw_validation_context *ctx)
+{
+	struct vmw_validation_bo_node *entry;
+
+	/*
+	 * Switching coherent resource backup buffers failed.
+	 * Release corresponding buffer object dirty trackers.
+	 */
+	list_for_each_entry(entry, &ctx->bo_list, base.head) {
+		if (entry->coherent_count) {
+			unsigned int coherent_count = entry->coherent_count;
+			struct vmw_buffer_object *vbo =
+				container_of(entry->base.bo, typeof(*vbo),
+					     base);
+
+			while (coherent_count--)
+				vmw_bo_dirty_release(vbo);
+		}
+	}
+
+	ttm_eu_backoff_reservation(&ctx->ticket, &ctx->bo_list);
+}
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_validation.h b/drivers/gpu/drm/vmwgfx/vmwgfx_validation.h
index 523f6ac5c335..fd83e017c2a5 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_validation.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_validation.h
@@ -169,21 +169,7 @@ vmw_validation_bo_reserve(struct vmw_validation_context *ctx,
 			  bool intr)
 {
 	return ttm_eu_reserve_buffers(&ctx->ticket, &ctx->bo_list, intr,
-				      NULL);
-}
-
-/**
- * vmw_validation_bo_backoff - Unreserve buffer objects registered with a
- * validation context
- * @ctx: The validation context
- *
- * This function unreserves the buffer objects previously reserved using
- * vmw_validation_bo_reserve. It's typically used as part of an error path
- */
-static inline void
-vmw_validation_bo_backoff(struct vmw_validation_context *ctx)
-{
-	ttm_eu_backoff_reservation(&ctx->ticket, &ctx->bo_list);
+				      NULL, true);
 }
 
 /**
@@ -268,4 +254,6 @@ int vmw_validation_preload_res(struct vmw_validation_context *ctx,
 			       unsigned int size);
 void vmw_validation_res_set_dirty(struct vmw_validation_context *ctx,
 				  void *val_private, u32 dirty);
+void vmw_validation_bo_backoff(struct vmw_validation_context *ctx);
+
 #endif
diff --git a/drivers/gpu/drm/zte/zx_plane.c b/drivers/gpu/drm/zte/zx_plane.c
index 83d236fd893c..706452f9b276 100644
--- a/drivers/gpu/drm/zte/zx_plane.c
+++ b/drivers/gpu/drm/zte/zx_plane.c
@@ -199,7 +199,6 @@ static void zx_vl_plane_atomic_update(struct drm_plane *plane,
 	u32 dst_x, dst_y, dst_w, dst_h;
 	uint32_t format;
 	int fmt;
-	int num_planes;
 	int i;
 
 	if (!fb)
@@ -218,13 +217,12 @@ static void zx_vl_plane_atomic_update(struct drm_plane *plane,
 	dst_h = drm_rect_height(dst);
 
 	/* Set up data address registers for Y, Cb and Cr planes */
-	num_planes = drm_format_num_planes(format);
 	paddr_reg = layer + VL_Y;
-	for (i = 0; i < num_planes; i++) {
+	for (i = 0; i < fb->format->num_planes; i++) {
 		cma_obj = drm_fb_cma_get_gem_obj(fb, i);
 		paddr = cma_obj->paddr + fb->offsets[i];
 		paddr += src_y * fb->pitches[i];
-		paddr += src_x * drm_format_plane_cpp(format, i);
+		paddr += src_x * fb->format->cpp[i];
 		zx_writel(paddr_reg, paddr);
 		paddr_reg += 4;
 	}
diff --git a/drivers/gpu/host1x/bus.c b/drivers/gpu/host1x/bus.c
index 9797ccb0a073..742aa9ff21b8 100644
--- a/drivers/gpu/host1x/bus.c
+++ b/drivers/gpu/host1x/bus.c
@@ -305,6 +305,36 @@ static int host1x_device_match(struct device *dev, struct device_driver *drv)
 	return strcmp(dev_name(dev), drv->name) == 0;
 }
 
+static int host1x_device_uevent(struct device *dev,
+				struct kobj_uevent_env *env)
+{
+	struct device_node *np = dev->parent->of_node;
+	unsigned int count = 0;
+	struct property *p;
+	const char *compat;
+
+	/*
+	 * This duplicates most of of_device_uevent(), but the latter cannot
+	 * be called from modules and operates on dev->of_node, which is not
+	 * available in this case.
+	 *
+	 * Note that this is really only needed for backwards compatibility
+	 * with libdrm, which parses this information from sysfs and will
+	 * fail if it can't find the OF_FULLNAME, specifically.
+	 */
+	add_uevent_var(env, "OF_NAME=%pOFn", np);
+	add_uevent_var(env, "OF_FULLNAME=%pOF", np);
+
+	of_property_for_each_string(np, "compatible", p, compat) {
+		add_uevent_var(env, "OF_COMPATIBLE_%u=%s", count, compat);
+		count++;
+	}
+
+	add_uevent_var(env, "OF_COMPATIBLE_N=%u", count);
+
+	return 0;
+}
+
 static int host1x_dma_configure(struct device *dev)
 {
 	return of_dma_configure(dev, dev->of_node, true);
@@ -322,6 +352,7 @@ static const struct dev_pm_ops host1x_device_pm_ops = {
 struct bus_type host1x_bus_type = {
 	.name = "host1x",
 	.match = host1x_device_match,
+	.uevent = host1x_device_uevent,
 	.dma_configure = host1x_dma_configure,
 	.pm = &host1x_device_pm_ops,
 };
@@ -408,12 +439,14 @@ static int host1x_device_add(struct host1x *host1x,
 	device->dev.dma_mask = &device->dev.coherent_dma_mask;
 	dev_set_name(&device->dev, "%s", driver->driver.name);
 	device->dev.release = host1x_device_release;
-	device->dev.of_node = host1x->dev->of_node;
 	device->dev.bus = &host1x_bus_type;
 	device->dev.parent = host1x->dev;
 
 	of_dma_configure(&device->dev, host1x->dev->of_node, true);
 
+	device->dev.dma_parms = &device->dma_parms;
+	dma_set_max_seg_size(&device->dev, SZ_4M);
+
 	err = host1x_device_parse_dt(device, driver);
 	if (err < 0) {
 		kfree(device);
diff --git a/drivers/gpu/host1x/debug.c b/drivers/gpu/host1x/debug.c
index 070b30f72947..c0392672a842 100644
--- a/drivers/gpu/host1x/debug.c
+++ b/drivers/gpu/host1x/debug.c
@@ -162,9 +162,6 @@ static void host1x_debugfs_init(struct host1x *host1x)
 {
 	struct dentry *de = debugfs_create_dir("tegra-host1x", NULL);
 
-	if (!de)
-		return;
-
 	/* Store the created entry */
 	host1x->debugfs = de;
 
diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c
index c55e2d634887..5a3f797240d4 100644
--- a/drivers/gpu/host1x/dev.c
+++ b/drivers/gpu/host1x/dev.c
@@ -247,8 +247,11 @@ static int host1x_probe(struct platform_device *pdev)
 
 	host->clk = devm_clk_get(&pdev->dev, NULL);
 	if (IS_ERR(host->clk)) {
-		dev_err(&pdev->dev, "failed to get clock\n");
 		err = PTR_ERR(host->clk);
+
+		if (err != -EPROBE_DEFER)
+			dev_err(&pdev->dev, "failed to get clock: %d\n", err);
+
 		return err;
 	}
 
diff --git a/drivers/gpu/vga/vga_switcheroo.c b/drivers/gpu/vga/vga_switcheroo.c
index 65d7541c413a..087304b1a5d7 100644
--- a/drivers/gpu/vga/vga_switcheroo.c
+++ b/drivers/gpu/vga/vga_switcheroo.c
@@ -134,7 +134,6 @@ static DEFINE_MUTEX(vgasr_mutex);
  * @delayed_switch_active: whether a delayed switch is pending
  * @delayed_client_id: client to which a delayed switch is pending
  * @debugfs_root: directory for vga_switcheroo debugfs interface
- * @switch_file: file for vga_switcheroo debugfs interface
  * @registered_clients: number of registered GPUs
  *	(counting only vga clients, not audio clients)
  * @clients: list of registered clients
@@ -153,7 +152,6 @@ struct vgasr_priv {
 	enum vga_switcheroo_client_id delayed_client_id;
 
 	struct dentry *debugfs_root;
-	struct dentry *switch_file;
 
 	int registered_clients;
 	struct list_head clients;
@@ -169,7 +167,7 @@ struct vgasr_priv {
 #define client_is_vga(c)		(!client_is_audio(c))
 #define client_id(c)		((c)->id & ~ID_BIT_AUDIO)
 
-static int vga_switcheroo_debugfs_init(struct vgasr_priv *priv);
+static void vga_switcheroo_debugfs_init(struct vgasr_priv *priv);
 static void vga_switcheroo_debugfs_fini(struct vgasr_priv *priv);
 
 /* only one switcheroo per system */
@@ -909,38 +907,20 @@ static const struct file_operations vga_switcheroo_debugfs_fops = {
 
 static void vga_switcheroo_debugfs_fini(struct vgasr_priv *priv)
 {
-	debugfs_remove(priv->switch_file);
-	priv->switch_file = NULL;
-
-	debugfs_remove(priv->debugfs_root);
+	debugfs_remove_recursive(priv->debugfs_root);
 	priv->debugfs_root = NULL;
 }
 
-static int vga_switcheroo_debugfs_init(struct vgasr_priv *priv)
+static void vga_switcheroo_debugfs_init(struct vgasr_priv *priv)
 {
-	static const char mp[] = "/sys/kernel/debug";
-
 	/* already initialised */
 	if (priv->debugfs_root)
-		return 0;
-	priv->debugfs_root = debugfs_create_dir("vgaswitcheroo", NULL);
+		return;
 
-	if (!priv->debugfs_root) {
-		pr_err("Cannot create %s/vgaswitcheroo\n", mp);
-		goto fail;
-	}
+	priv->debugfs_root = debugfs_create_dir("vgaswitcheroo", NULL);
 
-	priv->switch_file = debugfs_create_file("switch", 0644,
-						priv->debugfs_root, NULL,
-						&vga_switcheroo_debugfs_fops);
-	if (!priv->switch_file) {
-		pr_err("cannot create %s/vgaswitcheroo/switch\n", mp);
-		goto fail;
-	}
-	return 0;
-fail:
-	vga_switcheroo_debugfs_fini(priv);
-	return -1;
+	debugfs_create_file("switch", 0644, priv->debugfs_root, NULL,
+			    &vga_switcheroo_debugfs_fops);
 }
 
 /**
diff --git a/drivers/hid/hid-a4tech.c b/drivers/hid/hid-a4tech.c
index fd806f590bf8..98bf694626f7 100644
--- a/drivers/hid/hid-a4tech.c
+++ b/drivers/hid/hid-a4tech.c
@@ -35,8 +35,10 @@ static int a4_input_mapped(struct hid_device *hdev, struct hid_input *hi,
 {
 	struct a4tech_sc *a4 = hid_get_drvdata(hdev);
 
-	if (usage->type == EV_REL && usage->code == REL_WHEEL)
+	if (usage->type == EV_REL && usage->code == REL_WHEEL_HI_RES) {
 		set_bit(REL_HWHEEL, *bit);
+		set_bit(REL_HWHEEL_HI_RES, *bit);
+	}
 
 	if ((a4->quirks & A4_2WHEEL_MOUSE_HACK_7) && usage->hid == 0x00090007)
 		return -1;
@@ -57,7 +59,7 @@ static int a4_event(struct hid_device *hdev, struct hid_field *field,
 	input = field->hidinput->input;
 
 	if (a4->quirks & A4_2WHEEL_MOUSE_HACK_B8) {
-		if (usage->type == EV_REL && usage->code == REL_WHEEL) {
+		if (usage->type == EV_REL && usage->code == REL_WHEEL_HI_RES) {
 			a4->delayed_value = value;
 			return 1;
 		}
@@ -65,6 +67,8 @@ static int a4_event(struct hid_device *hdev, struct hid_field *field,
 		if (usage->hid == 0x000100b8) {
 			input_event(input, EV_REL, value ? REL_HWHEEL :
 					REL_WHEEL, a4->delayed_value);
+			input_event(input, EV_REL, value ? REL_HWHEEL_HI_RES :
+					REL_WHEEL_HI_RES, a4->delayed_value * 120);
 			return 1;
 		}
 	}
@@ -74,8 +78,9 @@ static int a4_event(struct hid_device *hdev, struct hid_field *field,
 		return 1;
 	}
 
-	if (usage->code == REL_WHEEL && a4->hw_wheel) {
+	if (usage->code == REL_WHEEL_HI_RES && a4->hw_wheel) {
 		input_event(input, usage->type, REL_HWHEEL, value);
+		input_event(input, usage->type, REL_HWHEEL_HI_RES, value * 120);
 		return 1;
 	}
 
diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c
index 08d310723e96..210b81a56e1a 100644
--- a/drivers/hid/hid-core.c
+++ b/drivers/hid/hid-core.c
@@ -27,7 +27,6 @@
 #include <linux/vmalloc.h>
 #include <linux/sched.h>
 #include <linux/semaphore.h>
-#include <linux/async.h>
 
 #include <linux/hid.h>
 #include <linux/hiddev.h>
@@ -1311,10 +1310,10 @@ static u32 __extract(u8 *report, unsigned offset, int n)
 u32 hid_field_extract(const struct hid_device *hid, u8 *report,
 			unsigned offset, unsigned n)
 {
-	if (n > 256) {
-		hid_warn(hid, "hid_field_extract() called with n (%d) > 256! (%s)\n",
+	if (n > 32) {
+		hid_warn(hid, "hid_field_extract() called with n (%d) > 32! (%s)\n",
 			 n, current->comm);
-		n = 256;
+		n = 32;
 	}
 
 	return __extract(report, offset, n);
@@ -2362,15 +2361,6 @@ int hid_add_device(struct hid_device *hdev)
 	dev_set_name(&hdev->dev, "%04X:%04X:%04X.%04X", hdev->bus,
 		     hdev->vendor, hdev->product, atomic_inc_return(&id));
 
-	/*
-	 * Try loading the module for the device before the add, so that we do
-	 * not first have hid-generic binding only to have it replaced
-	 * immediately afterwards with a specialized driver.
-	 */
-	if (!current_is_async())
-		request_module("hid:b%04Xg%04Xv%08Xp%08X", hdev->bus,
-			       hdev->group, hdev->vendor, hdev->product);
-
 	hid_debug_register(hdev, dev_name(&hdev->dev));
 	ret = device_add(&hdev->dev);
 	if (!ret)
diff --git a/drivers/hid/hid-hyperv.c b/drivers/hid/hid-hyperv.c
index 8f806f46c278..7795831d37c2 100644
--- a/drivers/hid/hid-hyperv.c
+++ b/drivers/hid/hid-hyperv.c
@@ -606,5 +606,7 @@ static void __exit mousevsc_exit(void)
 }
 
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Microsoft Hyper-V Synthetic HID Driver");
+
 module_init(mousevsc_init);
 module_exit(mousevsc_exit);
diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h
index 84e0c78d73cd..eac0c54c5970 100644
--- a/drivers/hid/hid-ids.h
+++ b/drivers/hid/hid-ids.h
@@ -1086,6 +1086,7 @@
 #define USB_DEVICE_ID_SYNAPTICS_HD	0x0ac3
 #define USB_DEVICE_ID_SYNAPTICS_QUAD_HD	0x1ac3
 #define USB_DEVICE_ID_SYNAPTICS_TP_V103	0x5710
+#define USB_DEVICE_ID_SYNAPTICS_ACER_SWITCH5	0x81a7
 
 #define USB_VENDOR_ID_TEXAS_INSTRUMENTS	0x2047
 #define USB_DEVICE_ID_TEXAS_INSTRUMENTS_LENOVO_YOGA	0x0855
diff --git a/drivers/hid/hid-logitech-dj.c b/drivers/hid/hid-logitech-dj.c
index 904a4b0d90f5..e564bff86515 100644
--- a/drivers/hid/hid-logitech-dj.c
+++ b/drivers/hid/hid-logitech-dj.c
@@ -113,6 +113,7 @@ enum recvr_type {
 	recvr_type_dj,
 	recvr_type_hidpp,
 	recvr_type_gaming_hidpp,
+	recvr_type_mouse_only,
 	recvr_type_27mhz,
 	recvr_type_bluetooth,
 };
@@ -864,9 +865,12 @@ static void logi_dj_recv_queue_notification(struct dj_receiver_dev *djrcv_dev,
 	schedule_work(&djrcv_dev->work);
 }
 
-static void logi_hidpp_dev_conn_notif_equad(struct hidpp_event *hidpp_report,
+static void logi_hidpp_dev_conn_notif_equad(struct hid_device *hdev,
+					    struct hidpp_event *hidpp_report,
 					    struct dj_workitem *workitem)
 {
+	struct dj_receiver_dev *djrcv_dev = hid_get_drvdata(hdev);
+
 	workitem->type = WORKITEM_TYPE_PAIRED;
 	workitem->device_type = hidpp_report->params[HIDPP_PARAM_DEVICE_INFO] &
 				HIDPP_DEVICE_TYPE_MASK;
@@ -880,6 +884,8 @@ static void logi_hidpp_dev_conn_notif_equad(struct hidpp_event *hidpp_report,
 		break;
 	case REPORT_TYPE_MOUSE:
 		workitem->reports_supported |= STD_MOUSE | HIDPP;
+		if (djrcv_dev->type == recvr_type_mouse_only)
+			workitem->reports_supported |= MULTIMEDIA;
 		break;
 	}
 }
@@ -923,7 +929,7 @@ static void logi_hidpp_recv_queue_notif(struct hid_device *hdev,
 	case 0x01:
 		device_type = "Bluetooth";
 		/* Bluetooth connect packet contents is the same as (e)QUAD */
-		logi_hidpp_dev_conn_notif_equad(hidpp_report, &workitem);
+		logi_hidpp_dev_conn_notif_equad(hdev, hidpp_report, &workitem);
 		if (!(hidpp_report->params[HIDPP_PARAM_DEVICE_INFO] &
 						HIDPP_MANUFACTURER_MASK)) {
 			hid_info(hdev, "Non Logitech device connected on slot %d\n",
@@ -937,18 +943,18 @@ static void logi_hidpp_recv_queue_notif(struct hid_device *hdev,
 		break;
 	case 0x03:
 		device_type = "QUAD or eQUAD";
-		logi_hidpp_dev_conn_notif_equad(hidpp_report, &workitem);
+		logi_hidpp_dev_conn_notif_equad(hdev, hidpp_report, &workitem);
 		break;
 	case 0x04:
 		device_type = "eQUAD step 4 DJ";
-		logi_hidpp_dev_conn_notif_equad(hidpp_report, &workitem);
+		logi_hidpp_dev_conn_notif_equad(hdev, hidpp_report, &workitem);
 		break;
 	case 0x05:
 		device_type = "DFU Lite";
 		break;
 	case 0x06:
 		device_type = "eQUAD step 4 Lite";
-		logi_hidpp_dev_conn_notif_equad(hidpp_report, &workitem);
+		logi_hidpp_dev_conn_notif_equad(hdev, hidpp_report, &workitem);
 		break;
 	case 0x07:
 		device_type = "eQUAD step 4 Gaming";
@@ -958,11 +964,11 @@ static void logi_hidpp_recv_queue_notif(struct hid_device *hdev,
 		break;
 	case 0x0a:
 		device_type = "eQUAD nano Lite";
-		logi_hidpp_dev_conn_notif_equad(hidpp_report, &workitem);
+		logi_hidpp_dev_conn_notif_equad(hdev, hidpp_report, &workitem);
 		break;
 	case 0x0c:
 		device_type = "eQUAD Lightspeed";
-		logi_hidpp_dev_conn_notif_equad(hidpp_report, &workitem);
+		logi_hidpp_dev_conn_notif_equad(hdev, hidpp_report, &workitem);
 		workitem.reports_supported |= STD_KEYBOARD;
 		break;
 	}
@@ -1313,7 +1319,8 @@ static int logi_dj_ll_parse(struct hid_device *hid)
 	if (djdev->reports_supported & STD_MOUSE) {
 		dbg_hid("%s: sending a mouse descriptor, reports_supported: %llx\n",
 			__func__, djdev->reports_supported);
-		if (djdev->dj_receiver_dev->type == recvr_type_gaming_hidpp)
+		if (djdev->dj_receiver_dev->type == recvr_type_gaming_hidpp ||
+		    djdev->dj_receiver_dev->type == recvr_type_mouse_only)
 			rdcat(rdesc, &rsize, mse_high_res_descriptor,
 			      sizeof(mse_high_res_descriptor));
 		else if (djdev->dj_receiver_dev->type == recvr_type_27mhz)
@@ -1556,15 +1563,19 @@ static int logi_dj_raw_event(struct hid_device *hdev,
 			data[0] = data[1];
 			data[1] = 0;
 		}
-		/* The 27 MHz mouse-only receiver sends unnumbered mouse data */
+		/*
+		 * Mouse-only receivers send unnumbered mouse data. The 27 MHz
+		 * receiver uses 6 byte packets, the nano receiver 8 bytes.
+		 */
 		if (djrcv_dev->unnumbered_application == HID_GD_MOUSE &&
-		    size == 6) {
-			u8 mouse_report[7];
+		    size <= 8) {
+			u8 mouse_report[9];
 
 			/* Prepend report id */
 			mouse_report[0] = REPORT_TYPE_MOUSE;
-			memcpy(mouse_report + 1, data, 6);
-			logi_dj_recv_forward_input_report(hdev, mouse_report, 7);
+			memcpy(mouse_report + 1, data, size);
+			logi_dj_recv_forward_input_report(hdev, mouse_report,
+							  size + 1);
 		}
 
 		return false;
@@ -1635,6 +1646,7 @@ static int logi_dj_probe(struct hid_device *hdev,
 	case recvr_type_dj:		no_dj_interfaces = 3; break;
 	case recvr_type_hidpp:		no_dj_interfaces = 2; break;
 	case recvr_type_gaming_hidpp:	no_dj_interfaces = 3; break;
+	case recvr_type_mouse_only:	no_dj_interfaces = 2; break;
 	case recvr_type_27mhz:		no_dj_interfaces = 2; break;
 	case recvr_type_bluetooth:	no_dj_interfaces = 2; break;
 	}
@@ -1808,10 +1820,10 @@ static const struct hid_device_id logi_dj_receivers[] = {
 	{HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH,
 		USB_DEVICE_ID_LOGITECH_UNIFYING_RECEIVER_2),
 	 .driver_data = recvr_type_dj},
-	{ /* Logitech Nano (non DJ) receiver */
+	{ /* Logitech Nano mouse only receiver */
 	  HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH,
 			 USB_DEVICE_ID_LOGITECH_NANO_RECEIVER),
-	 .driver_data = recvr_type_hidpp},
+	 .driver_data = recvr_type_mouse_only},
 	{ /* Logitech Nano (non DJ) receiver */
 	  HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH,
 			 USB_DEVICE_ID_LOGITECH_NANO_RECEIVER_2),
@@ -1836,6 +1848,14 @@ static const struct hid_device_id logi_dj_receivers[] = {
 	  HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH,
 		0xc70a),
 	 .driver_data = recvr_type_bluetooth},
+	{ /* Logitech MX5500 HID++ / bluetooth receiver keyboard intf. */
+	  HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH,
+		0xc71b),
+	 .driver_data = recvr_type_bluetooth},
+	{ /* Logitech MX5500 HID++ / bluetooth receiver mouse intf. */
+	  HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH,
+		0xc71c),
+	 .driver_data = recvr_type_bluetooth},
 	{}
 };
 
diff --git a/drivers/hid/hid-logitech-hidpp.c b/drivers/hid/hid-logitech-hidpp.c
index 3d2e6d254e7d..cf05816a601f 100644
--- a/drivers/hid/hid-logitech-hidpp.c
+++ b/drivers/hid/hid-logitech-hidpp.c
@@ -3728,6 +3728,9 @@ static const struct hid_device_id hidpp_devices[] = {
 	{ /* Keyboard MX5000 (Bluetooth-receiver in HID proxy mode) */
 	  LDJ_DEVICE(0xb305),
 	  .driver_data = HIDPP_QUIRK_HIDPP_CONSUMER_VENDOR_KEYS },
+	{ /* Keyboard MX5500 (Bluetooth-receiver in HID proxy mode) */
+	  LDJ_DEVICE(0xb30b),
+	  .driver_data = HIDPP_QUIRK_HIDPP_CONSUMER_VENDOR_KEYS },
 
 	{ LDJ_DEVICE(HID_ANY_ID) },
 
@@ -3740,6 +3743,9 @@ static const struct hid_device_id hidpp_devices[] = {
 	{ /* Keyboard MX3200 (Y-RAV80) */
 	  L27MHZ_DEVICE(0x005c),
 	  .driver_data = HIDPP_QUIRK_KBD_ZOOM_WHEEL },
+	{ /* S510 Media Remote */
+	  L27MHZ_DEVICE(0x00fe),
+	  .driver_data = HIDPP_QUIRK_KBD_SCROLL_WHEEL },
 
 	{ L27MHZ_DEVICE(HID_ANY_ID) },
 
@@ -3756,6 +3762,9 @@ static const struct hid_device_id hidpp_devices[] = {
 	{ /* MX5000 keyboard over Bluetooth */
 	  HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb305),
 	  .driver_data = HIDPP_QUIRK_HIDPP_CONSUMER_VENDOR_KEYS },
+	{ /* MX5500 keyboard over Bluetooth */
+	  HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb30b),
+	  .driver_data = HIDPP_QUIRK_HIDPP_CONSUMER_VENDOR_KEYS },
 	{}
 };
 
diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c
index c34e972f6296..5df5dd56ecc8 100644
--- a/drivers/hid/hid-multitouch.c
+++ b/drivers/hid/hid-multitouch.c
@@ -637,6 +637,13 @@ static void mt_store_field(struct hid_device *hdev,
 	if (*target != DEFAULT_TRUE &&
 	    *target != DEFAULT_FALSE &&
 	    *target != DEFAULT_ZERO) {
+		if (usage->contactid == DEFAULT_ZERO ||
+		    usage->x == DEFAULT_ZERO ||
+		    usage->y == DEFAULT_ZERO) {
+			hid_dbg(hdev,
+				"ignoring duplicate usage on incomplete");
+			return;
+		}
 		usage = mt_allocate_usage(hdev, application);
 		if (!usage)
 			return;
diff --git a/drivers/hid/hid-rmi.c b/drivers/hid/hid-rmi.c
index 853842448f70..7c6abd7e0979 100644
--- a/drivers/hid/hid-rmi.c
+++ b/drivers/hid/hid-rmi.c
@@ -35,6 +35,7 @@
 /* device flags */
 #define RMI_DEVICE			BIT(0)
 #define RMI_DEVICE_HAS_PHYS_BUTTONS	BIT(1)
+#define RMI_DEVICE_OUTPUT_SET_REPORT	BIT(2)
 
 /*
  * retrieve the ctrl registers
@@ -163,9 +164,19 @@ static int rmi_set_mode(struct hid_device *hdev, u8 mode)
 
 static int rmi_write_report(struct hid_device *hdev, u8 *report, int len)
 {
+	struct rmi_data *data = hid_get_drvdata(hdev);
 	int ret;
 
-	ret = hid_hw_output_report(hdev, (void *)report, len);
+	if (data->device_flags & RMI_DEVICE_OUTPUT_SET_REPORT) {
+		/*
+		 * Talk to device by using SET_REPORT requests instead.
+		 */
+		ret = hid_hw_raw_request(hdev, report[0], report,
+				len, HID_OUTPUT_REPORT, HID_REQ_SET_REPORT);
+	} else {
+		ret = hid_hw_output_report(hdev, (void *)report, len);
+	}
+
 	if (ret < 0) {
 		dev_err(&hdev->dev, "failed to write hid report (%d)\n", ret);
 		return ret;
@@ -747,6 +758,8 @@ static const struct hid_device_id rmi_id[] = {
 		.driver_data = RMI_DEVICE_HAS_PHYS_BUTTONS },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_X1_COVER) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_PRIMAX, USB_DEVICE_ID_PRIMAX_REZEL) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_SYNAPTICS, USB_DEVICE_ID_SYNAPTICS_ACER_SWITCH5),
+		.driver_data = RMI_DEVICE_OUTPUT_SET_REPORT },
 	{ HID_DEVICE(HID_BUS_ANY, HID_GROUP_RMI, HID_ANY_ID, HID_ANY_ID) },
 	{ }
 };
diff --git a/drivers/hid/i2c-hid/i2c-hid-dmi-quirks.c b/drivers/hid/i2c-hid/i2c-hid-dmi-quirks.c
index fd1b6eea6d2f..75078c83be1a 100644
--- a/drivers/hid/i2c-hid/i2c-hid-dmi-quirks.c
+++ b/drivers/hid/i2c-hid/i2c-hid-dmi-quirks.c
@@ -354,6 +354,14 @@ static const struct dmi_system_id i2c_hid_dmi_desc_override_table[] = {
 		},
 		.driver_data = (void *)&sipodev_desc
 	},
+	{
+		.ident = "iBall Aer3",
+		.matches = {
+			DMI_EXACT_MATCH(DMI_SYS_VENDOR, "iBall"),
+			DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Aer3"),
+		},
+		.driver_data = (void *)&sipodev_desc
+	},
 	{ }	/* Terminate list */
 };
 
diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c
index 1f1ed276e388..43f6da357165 100644
--- a/drivers/hid/wacom_wac.c
+++ b/drivers/hid/wacom_wac.c
@@ -1232,13 +1232,13 @@ static void wacom_intuos_pro2_bt_pen(struct wacom_wac *wacom)
 		/* Add back in missing bits of ID for non-USI pens */
 		wacom->id[0] |= (wacom->serial[0] >> 32) & 0xFFFFF;
 	}
-	wacom->tool[0]   = wacom_intuos_get_tool_type(wacom_intuos_id_mangle(wacom->id[0]));
 
 	for (i = 0; i < pen_frames; i++) {
 		unsigned char *frame = &data[i*pen_frame_len + 1];
 		bool valid = frame[0] & 0x80;
 		bool prox = frame[0] & 0x40;
 		bool range = frame[0] & 0x20;
+		bool invert = frame[0] & 0x10;
 
 		if (!valid)
 			continue;
@@ -1247,9 +1247,24 @@ static void wacom_intuos_pro2_bt_pen(struct wacom_wac *wacom)
 			wacom->shared->stylus_in_proximity = false;
 			wacom_exit_report(wacom);
 			input_sync(pen_input);
+
+			wacom->tool[0] = 0;
+			wacom->id[0] = 0;
+			wacom->serial[0] = 0;
 			return;
 		}
+
 		if (range) {
+			if (!wacom->tool[0]) { /* first in range */
+				/* Going into range select tool */
+				if (invert)
+					wacom->tool[0] = BTN_TOOL_RUBBER;
+				else if (wacom->id[0])
+					wacom->tool[0] = wacom_intuos_get_tool_type(wacom->id[0]);
+				else
+					wacom->tool[0] = BTN_TOOL_PEN;
+			}
+
 			input_report_abs(pen_input, ABS_X, get_unaligned_le16(&frame[1]));
 			input_report_abs(pen_input, ABS_Y, get_unaligned_le16(&frame[3]));
 
@@ -1271,23 +1286,26 @@ static void wacom_intuos_pro2_bt_pen(struct wacom_wac *wacom)
 						 get_unaligned_le16(&frame[11]));
 			}
 		}
-		input_report_abs(pen_input, ABS_PRESSURE, get_unaligned_le16(&frame[5]));
-		if (wacom->features.type == INTUOSP2_BT) {
-			input_report_abs(pen_input, ABS_DISTANCE,
-					 range ? frame[13] : wacom->features.distance_max);
-		} else {
-			input_report_abs(pen_input, ABS_DISTANCE,
-					 range ? frame[7] : wacom->features.distance_max);
-		}
 
-		input_report_key(pen_input, BTN_TOUCH, frame[0] & 0x01);
-		input_report_key(pen_input, BTN_STYLUS, frame[0] & 0x02);
-		input_report_key(pen_input, BTN_STYLUS2, frame[0] & 0x04);
+		if (wacom->tool[0]) {
+			input_report_abs(pen_input, ABS_PRESSURE, get_unaligned_le16(&frame[5]));
+			if (wacom->features.type == INTUOSP2_BT) {
+				input_report_abs(pen_input, ABS_DISTANCE,
+						 range ? frame[13] : wacom->features.distance_max);
+			} else {
+				input_report_abs(pen_input, ABS_DISTANCE,
+						 range ? frame[7] : wacom->features.distance_max);
+			}
 
-		input_report_key(pen_input, wacom->tool[0], prox);
-		input_event(pen_input, EV_MSC, MSC_SERIAL, wacom->serial[0]);
-		input_report_abs(pen_input, ABS_MISC,
-				 wacom_intuos_id_mangle(wacom->id[0])); /* report tool id */
+			input_report_key(pen_input, BTN_TOUCH, frame[0] & 0x09);
+			input_report_key(pen_input, BTN_STYLUS, frame[0] & 0x02);
+			input_report_key(pen_input, BTN_STYLUS2, frame[0] & 0x04);
+
+			input_report_key(pen_input, wacom->tool[0], prox);
+			input_event(pen_input, EV_MSC, MSC_SERIAL, wacom->serial[0]);
+			input_report_abs(pen_input, ABS_MISC,
+					 wacom_intuos_id_mangle(wacom->id[0])); /* report tool id */
+		}
 
 		wacom->shared->stylus_in_proximity = prox;
 
@@ -1349,11 +1367,17 @@ static void wacom_intuos_pro2_bt_touch(struct wacom_wac *wacom)
 		if (wacom->num_contacts_left <= 0) {
 			wacom->num_contacts_left = 0;
 			wacom->shared->touch_down = wacom_wac_finger_count_touches(wacom);
+			input_sync(touch_input);
 		}
 	}
 
-	input_report_switch(touch_input, SW_MUTE_DEVICE, !(data[281] >> 7));
-	input_sync(touch_input);
+	if (wacom->num_contacts_left == 0) {
+		// Be careful that we don't accidentally call input_sync with
+		// only a partial set of fingers of processed
+		input_report_switch(touch_input, SW_MUTE_DEVICE, !(data[281] >> 7));
+		input_sync(touch_input);
+	}
+
 }
 
 static void wacom_intuos_pro2_bt_pad(struct wacom_wac *wacom)
@@ -1361,7 +1385,7 @@ static void wacom_intuos_pro2_bt_pad(struct wacom_wac *wacom)
 	struct input_dev *pad_input = wacom->pad_input;
 	unsigned char *data = wacom->data;
 
-	int buttons = (data[282] << 1) | ((data[281] >> 6) & 0x01);
+	int buttons = data[282] | ((data[281] & 0x40) << 2);
 	int ring = data[285] & 0x7F;
 	bool ringstatus = data[285] & 0x80;
 	bool prox = buttons || ringstatus;
@@ -3810,7 +3834,7 @@ static void wacom_24hd_update_leds(struct wacom *wacom, int mask, int group)
 static bool wacom_is_led_toggled(struct wacom *wacom, int button_count,
 				 int mask, int group)
 {
-	int button_per_group;
+	int group_button;
 
 	/*
 	 * 21UX2 has LED group 1 to the left and LED group 0
@@ -3820,9 +3844,12 @@ static bool wacom_is_led_toggled(struct wacom *wacom, int button_count,
 	if (wacom->wacom_wac.features.type == WACOM_21UX2)
 		group = 1 - group;
 
-	button_per_group = button_count/wacom->led.count;
+	group_button = group * (button_count/wacom->led.count);
+
+	if (wacom->wacom_wac.features.type == INTUOSP2_BT)
+		group_button = 8;
 
-	return mask & (1 << (group * button_per_group));
+	return mask & (1 << group_button);
 }
 
 static void wacom_update_led(struct wacom *wacom, int button_count, int mask,
diff --git a/drivers/i2c/busses/i2c-acorn.c b/drivers/i2c/busses/i2c-acorn.c
index f4a5ae69bf6a..fa3763e4b3ee 100644
--- a/drivers/i2c/busses/i2c-acorn.c
+++ b/drivers/i2c/busses/i2c-acorn.c
@@ -81,6 +81,7 @@ static struct i2c_algo_bit_data ioc_data = {
 
 static struct i2c_adapter ioc_ops = {
 	.nr			= 0,
+	.name			= "ioc",
 	.algo_data		= &ioc_data,
 };
 
diff --git a/drivers/i2c/busses/i2c-pca-platform.c b/drivers/i2c/busses/i2c-pca-platform.c
index de3fe6e828cb..f50afa8e3cba 100644
--- a/drivers/i2c/busses/i2c-pca-platform.c
+++ b/drivers/i2c/busses/i2c-pca-platform.c
@@ -21,7 +21,6 @@
 #include <linux/platform_device.h>
 #include <linux/i2c-algo-pca.h>
 #include <linux/platform_data/i2c-pca-platform.h>
-#include <linux/gpio.h>
 #include <linux/gpio/consumer.h>
 #include <linux/io.h>
 #include <linux/of.h>
@@ -173,7 +172,7 @@ static int i2c_pca_pf_probe(struct platform_device *pdev)
 	i2c->adap.dev.parent = &pdev->dev;
 	i2c->adap.dev.of_node = np;
 
-	i2c->gpio = devm_gpiod_get_optional(&pdev->dev, "reset-gpios", GPIOD_OUT_LOW);
+	i2c->gpio = devm_gpiod_get_optional(&pdev->dev, "reset", GPIOD_OUT_LOW);
 	if (IS_ERR(i2c->gpio))
 		return PTR_ERR(i2c->gpio);
 
diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index 5aeb1dbfaa08..586dd5a46d9f 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -47,6 +47,15 @@
 
 #include "arm-smmu-regs.h"
 
+/*
+ * Apparently, some Qualcomm arm64 platforms which appear to expose their SMMU
+ * global register space are still, in fact, using a hypervisor to mediate it
+ * by trapping and emulating register accesses. Sadly, some deployed versions
+ * of said trapping code have bugs wherein they go horribly wrong for stores
+ * using r31 (i.e. XZR/WZR) as the source register.
+ */
+#define QCOM_DUMMY_VAL -1
+
 #define ARM_MMU500_ACTLR_CPRE		(1 << 1)
 
 #define ARM_MMU500_ACR_CACHE_LOCK	(1 << 26)
@@ -411,7 +420,7 @@ static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu,
 {
 	unsigned int spin_cnt, delay;
 
-	writel_relaxed(0, sync);
+	writel_relaxed(QCOM_DUMMY_VAL, sync);
 	for (delay = 1; delay < TLB_LOOP_TIMEOUT; delay *= 2) {
 		for (spin_cnt = TLB_SPIN_COUNT; spin_cnt > 0; spin_cnt--) {
 			if (!(readl_relaxed(status) & sTLBGSTATUS_GSACTIVE))
@@ -1751,8 +1760,8 @@ static void arm_smmu_device_reset(struct arm_smmu_device *smmu)
 	}
 
 	/* Invalidate the TLB, just in case */
-	writel_relaxed(0, gr0_base + ARM_SMMU_GR0_TLBIALLH);
-	writel_relaxed(0, gr0_base + ARM_SMMU_GR0_TLBIALLNSNH);
+	writel_relaxed(QCOM_DUMMY_VAL, gr0_base + ARM_SMMU_GR0_TLBIALLH);
+	writel_relaxed(QCOM_DUMMY_VAL, gr0_base + ARM_SMMU_GR0_TLBIALLNSNH);
 
 	reg = readl_relaxed(ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
 
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 162b3236e72c..56297298d6ee 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -2504,6 +2504,7 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu,
 		}
 	}
 
+	spin_lock(&iommu->lock);
 	spin_lock_irqsave(&device_domain_lock, flags);
 	if (dev)
 		found = find_domain(dev);
@@ -2519,17 +2520,16 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu,
 
 	if (found) {
 		spin_unlock_irqrestore(&device_domain_lock, flags);
+		spin_unlock(&iommu->lock);
 		free_devinfo_mem(info);
 		/* Caller must free the original domain */
 		return found;
 	}
 
-	spin_lock(&iommu->lock);
 	ret = domain_attach_iommu(domain, iommu);
-	spin_unlock(&iommu->lock);
-
 	if (ret) {
 		spin_unlock_irqrestore(&device_domain_lock, flags);
+		spin_unlock(&iommu->lock);
 		free_devinfo_mem(info);
 		return NULL;
 	}
@@ -2539,6 +2539,7 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu,
 	if (dev)
 		dev->archdata.iommu = info;
 	spin_unlock_irqrestore(&device_domain_lock, flags);
+	spin_unlock(&iommu->lock);
 
 	/* PASID table is mandatory for a PCI device in scalable mode. */
 	if (dev && dev_is_pci(dev) && sm_supported(iommu)) {
diff --git a/drivers/iommu/intel-pasid.c b/drivers/iommu/intel-pasid.c
index 2fefeafda437..fe51d8af457f 100644
--- a/drivers/iommu/intel-pasid.c
+++ b/drivers/iommu/intel-pasid.c
@@ -389,7 +389,7 @@ static inline void pasid_set_present(struct pasid_entry *pe)
  */
 static inline void pasid_set_page_snoop(struct pasid_entry *pe, bool value)
 {
-	pasid_set_bits(&pe->val[1], 1 << 23, value);
+	pasid_set_bits(&pe->val[1], 1 << 23, value << 23);
 }
 
 /*
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index f9cacce909d3..9f0a2844371c 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -329,7 +329,7 @@ static ssize_t iommu_group_show_type(struct iommu_group *group,
 			type = "unmanaged\n";
 			break;
 		case IOMMU_DOMAIN_DMA:
-			type = "DMA";
+			type = "DMA\n";
 			break;
 		}
 	}
diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c
index 8f07fa6e1739..268f1b685084 100644
--- a/drivers/md/bcache/bset.c
+++ b/drivers/md/bcache/bset.c
@@ -887,12 +887,22 @@ unsigned int bch_btree_insert_key(struct btree_keys *b, struct bkey *k,
 	struct bset *i = bset_tree_last(b)->data;
 	struct bkey *m, *prev = NULL;
 	struct btree_iter iter;
+	struct bkey preceding_key_on_stack = ZERO_KEY;
+	struct bkey *preceding_key_p = &preceding_key_on_stack;
 
 	BUG_ON(b->ops->is_extents && !KEY_SIZE(k));
 
-	m = bch_btree_iter_init(b, &iter, b->ops->is_extents
-				? PRECEDING_KEY(&START_KEY(k))
-				: PRECEDING_KEY(k));
+	/*
+	 * If k has preceding key, preceding_key_p will be set to address
+	 *  of k's preceding key; otherwise preceding_key_p will be set
+	 * to NULL inside preceding_key().
+	 */
+	if (b->ops->is_extents)
+		preceding_key(&START_KEY(k), &preceding_key_p);
+	else
+		preceding_key(k, &preceding_key_p);
+
+	m = bch_btree_iter_init(b, &iter, preceding_key_p);
 
 	if (b->ops->insert_fixup(b, k, &iter, replace_key))
 		return status;
diff --git a/drivers/md/bcache/bset.h b/drivers/md/bcache/bset.h
index bac76aabca6d..c71365e7c1fa 100644
--- a/drivers/md/bcache/bset.h
+++ b/drivers/md/bcache/bset.h
@@ -434,20 +434,26 @@ static inline bool bch_cut_back(const struct bkey *where, struct bkey *k)
 	return __bch_cut_back(where, k);
 }
 
-#define PRECEDING_KEY(_k)					\
-({								\
-	struct bkey *_ret = NULL;				\
-								\
-	if (KEY_INODE(_k) || KEY_OFFSET(_k)) {			\
-		_ret = &KEY(KEY_INODE(_k), KEY_OFFSET(_k), 0);	\
-								\
-		if (!_ret->low)					\
-			_ret->high--;				\
-		_ret->low--;					\
-	}							\
-								\
-	_ret;							\
-})
+/*
+ * Pointer '*preceding_key_p' points to a memory object to store preceding
+ * key of k. If the preceding key does not exist, set '*preceding_key_p' to
+ * NULL. So the caller of preceding_key() needs to take care of memory
+ * which '*preceding_key_p' pointed to before calling preceding_key().
+ * Currently the only caller of preceding_key() is bch_btree_insert_key(),
+ * and it points to an on-stack variable, so the memory release is handled
+ * by stackframe itself.
+ */
+static inline void preceding_key(struct bkey *k, struct bkey **preceding_key_p)
+{
+	if (KEY_INODE(k) || KEY_OFFSET(k)) {
+		(**preceding_key_p) = KEY(KEY_INODE(k), KEY_OFFSET(k), 0);
+		if (!(*preceding_key_p)->low)
+			(*preceding_key_p)->high--;
+		(*preceding_key_p)->low--;
+	} else {
+		(*preceding_key_p) = NULL;
+	}
+}
 
 static inline bool bch_ptr_invalid(struct btree_keys *b, const struct bkey *k)
 {
diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
index 6cd44d3cf906..bfb437ffb13c 100644
--- a/drivers/md/bcache/sysfs.c
+++ b/drivers/md/bcache/sysfs.c
@@ -431,8 +431,13 @@ STORE(bch_cached_dev)
 			bch_writeback_queue(dc);
 	}
 
+	/*
+	 * Only set BCACHE_DEV_WB_RUNNING when cached device attached to
+	 * a cache set, otherwise it doesn't make sense.
+	 */
 	if (attr == &sysfs_writeback_percent)
-		if (!test_and_set_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags))
+		if ((dc->disk.c != NULL) &&
+		    (!test_and_set_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags)))
 			schedule_delayed_work(&dc->writeback_rate_update,
 				      dc->writeback_rate_update_seconds * HZ);
 
diff --git a/drivers/media/dvb-core/dvb_frontend.c b/drivers/media/dvb-core/dvb_frontend.c
index ecd3277f2e89..6351a97f3d18 100644
--- a/drivers/media/dvb-core/dvb_frontend.c
+++ b/drivers/media/dvb-core/dvb_frontend.c
@@ -905,7 +905,7 @@ static void dvb_frontend_get_frequency_limits(struct dvb_frontend *fe,
 			 "DVB: adapter %i frontend %u frequency limits undefined - fix the driver\n",
 			 fe->dvb->num, fe->id);
 
-	dprintk("frequency interval: tuner: %u...%u, frontend: %u...%u",
+	dev_dbg(fe->dvb->device, "frequency interval: tuner: %u...%u, frontend: %u...%u",
 		tuner_min, tuner_max, frontend_min, frontend_max);
 
 	/* If the standard is for satellite, convert frequencies to kHz */
diff --git a/drivers/media/platform/qcom/venus/hfi_helper.h b/drivers/media/platform/qcom/venus/hfi_helper.h
index 04cc80b106d6..b70551e296b7 100644
--- a/drivers/media/platform/qcom/venus/hfi_helper.h
+++ b/drivers/media/platform/qcom/venus/hfi_helper.h
@@ -560,7 +560,7 @@ struct hfi_capability {
 
 struct hfi_capabilities {
 	u32 num_capabilities;
-	struct hfi_capability *data;
+	struct hfi_capability data[];
 };
 
 #define HFI_DEBUG_MSG_LOW	0x01
@@ -717,7 +717,7 @@ struct hfi_profile_level {
 
 struct hfi_profile_level_supported {
 	u32 profile_count;
-	struct hfi_profile_level *profile_level;
+	struct hfi_profile_level profile_level[];
 };
 
 struct hfi_quality_vs_speed {
diff --git a/drivers/misc/mei/hdcp/mei_hdcp.c b/drivers/misc/mei/hdcp/mei_hdcp.c
index b07000202d4a..417865129407 100644
--- a/drivers/misc/mei/hdcp/mei_hdcp.c
+++ b/drivers/misc/mei/hdcp/mei_hdcp.c
@@ -576,7 +576,7 @@ static int mei_hdcp_verify_mprime(struct device *dev,
 
 	memcpy(verify_mprime_in.m_prime, stream_ready->m_prime,
 	       HDCP_2_2_MPRIME_LEN);
-	drm_hdcp2_u32_to_seq_num(verify_mprime_in.seq_num_m, data->seq_num_m);
+	drm_hdcp_cpu_to_be24(verify_mprime_in.seq_num_m, data->seq_num_m);
 	memcpy(verify_mprime_in.streams, data->streams,
 	       (data->k * sizeof(struct hdcp2_streamid_type)));
 
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index 28cb44c61d4a..24d7fe7c74ed 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -303,11 +303,19 @@ static const struct attribute_group *pmem_attribute_groups[] = {
 	NULL,
 };
 
-static void pmem_release_queue(void *q)
+static void __pmem_release_queue(struct percpu_ref *ref)
 {
+	struct request_queue *q;
+
+	q = container_of(ref, typeof(*q), q_usage_counter);
 	blk_cleanup_queue(q);
 }
 
+static void pmem_release_queue(void *ref)
+{
+	__pmem_release_queue(ref);
+}
+
 static void pmem_freeze_queue(struct percpu_ref *ref)
 {
 	struct request_queue *q;
@@ -399,12 +407,10 @@ static int pmem_attach_disk(struct device *dev,
 	if (!q)
 		return -ENOMEM;
 
-	if (devm_add_action_or_reset(dev, pmem_release_queue, q))
-		return -ENOMEM;
-
 	pmem->pfn_flags = PFN_DEV;
 	pmem->pgmap.ref = &q->q_usage_counter;
 	pmem->pgmap.kill = pmem_freeze_queue;
+	pmem->pgmap.cleanup = __pmem_release_queue;
 	if (is_nd_pfn(dev)) {
 		if (setup_pagemap_fsdax(dev, &pmem->pgmap))
 			return -ENOMEM;
@@ -425,6 +431,9 @@ static int pmem_attach_disk(struct device *dev,
 		pmem->pfn_flags |= PFN_MAP;
 		memcpy(&bb_res, &pmem->pgmap.res, sizeof(bb_res));
 	} else {
+		if (devm_add_action_or_reset(dev, pmem_release_queue,
+					&q->q_usage_counter))
+			return -ENOMEM;
 		addr = devm_memremap(dev, pmem->phys_addr,
 				pmem->size, ARCH_MEMREMAP_PMEM);
 		memcpy(&bb_res, &nsio->res, sizeof(bb_res));
diff --git a/drivers/pci/p2pdma.c b/drivers/pci/p2pdma.c
index 742928d0053e..a98126ad9c3a 100644
--- a/drivers/pci/p2pdma.c
+++ b/drivers/pci/p2pdma.c
@@ -20,12 +20,16 @@
 #include <linux/seq_buf.h>
 
 struct pci_p2pdma {
-	struct percpu_ref devmap_ref;
-	struct completion devmap_ref_done;
 	struct gen_pool *pool;
 	bool p2pmem_published;
 };
 
+struct p2pdma_pagemap {
+	struct dev_pagemap pgmap;
+	struct percpu_ref ref;
+	struct completion ref_done;
+};
+
 static ssize_t size_show(struct device *dev, struct device_attribute *attr,
 			 char *buf)
 {
@@ -74,41 +78,45 @@ static const struct attribute_group p2pmem_group = {
 	.name = "p2pmem",
 };
 
+static struct p2pdma_pagemap *to_p2p_pgmap(struct percpu_ref *ref)
+{
+	return container_of(ref, struct p2pdma_pagemap, ref);
+}
+
 static void pci_p2pdma_percpu_release(struct percpu_ref *ref)
 {
-	struct pci_p2pdma *p2p =
-		container_of(ref, struct pci_p2pdma, devmap_ref);
+	struct p2pdma_pagemap *p2p_pgmap = to_p2p_pgmap(ref);
 
-	complete_all(&p2p->devmap_ref_done);
+	complete(&p2p_pgmap->ref_done);
 }
 
 static void pci_p2pdma_percpu_kill(struct percpu_ref *ref)
 {
-	/*
-	 * pci_p2pdma_add_resource() may be called multiple times
-	 * by a driver and may register the percpu_kill devm action multiple
-	 * times. We only want the first action to actually kill the
-	 * percpu_ref.
-	 */
-	if (percpu_ref_is_dying(ref))
-		return;
-
 	percpu_ref_kill(ref);
 }
 
+static void pci_p2pdma_percpu_cleanup(struct percpu_ref *ref)
+{
+	struct p2pdma_pagemap *p2p_pgmap = to_p2p_pgmap(ref);
+
+	wait_for_completion(&p2p_pgmap->ref_done);
+	percpu_ref_exit(&p2p_pgmap->ref);
+}
+
 static void pci_p2pdma_release(void *data)
 {
 	struct pci_dev *pdev = data;
+	struct pci_p2pdma *p2pdma = pdev->p2pdma;
 
-	if (!pdev->p2pdma)
+	if (!p2pdma)
 		return;
 
-	wait_for_completion(&pdev->p2pdma->devmap_ref_done);
-	percpu_ref_exit(&pdev->p2pdma->devmap_ref);
+	/* Flush and disable pci_alloc_p2p_mem() */
+	pdev->p2pdma = NULL;
+	synchronize_rcu();
 
-	gen_pool_destroy(pdev->p2pdma->pool);
+	gen_pool_destroy(p2pdma->pool);
 	sysfs_remove_group(&pdev->dev.kobj, &p2pmem_group);
-	pdev->p2pdma = NULL;
 }
 
 static int pci_p2pdma_setup(struct pci_dev *pdev)
@@ -124,12 +132,6 @@ static int pci_p2pdma_setup(struct pci_dev *pdev)
 	if (!p2p->pool)
 		goto out;
 
-	init_completion(&p2p->devmap_ref_done);
-	error = percpu_ref_init(&p2p->devmap_ref,
-			pci_p2pdma_percpu_release, 0, GFP_KERNEL);
-	if (error)
-		goto out_pool_destroy;
-
 	error = devm_add_action_or_reset(&pdev->dev, pci_p2pdma_release, pdev);
 	if (error)
 		goto out_pool_destroy;
@@ -163,6 +165,7 @@ out:
 int pci_p2pdma_add_resource(struct pci_dev *pdev, int bar, size_t size,
 			    u64 offset)
 {
+	struct p2pdma_pagemap *p2p_pgmap;
 	struct dev_pagemap *pgmap;
 	void *addr;
 	int error;
@@ -185,18 +188,27 @@ int pci_p2pdma_add_resource(struct pci_dev *pdev, int bar, size_t size,
 			return error;
 	}
 
-	pgmap = devm_kzalloc(&pdev->dev, sizeof(*pgmap), GFP_KERNEL);
-	if (!pgmap)
+	p2p_pgmap = devm_kzalloc(&pdev->dev, sizeof(*p2p_pgmap), GFP_KERNEL);
+	if (!p2p_pgmap)
 		return -ENOMEM;
 
+	init_completion(&p2p_pgmap->ref_done);
+	error = percpu_ref_init(&p2p_pgmap->ref,
+			pci_p2pdma_percpu_release, 0, GFP_KERNEL);
+	if (error)
+		goto pgmap_free;
+
+	pgmap = &p2p_pgmap->pgmap;
+
 	pgmap->res.start = pci_resource_start(pdev, bar) + offset;
 	pgmap->res.end = pgmap->res.start + size - 1;
 	pgmap->res.flags = pci_resource_flags(pdev, bar);
-	pgmap->ref = &pdev->p2pdma->devmap_ref;
+	pgmap->ref = &p2p_pgmap->ref;
 	pgmap->type = MEMORY_DEVICE_PCI_P2PDMA;
 	pgmap->pci_p2pdma_bus_offset = pci_bus_address(pdev, bar) -
 		pci_resource_start(pdev, bar);
 	pgmap->kill = pci_p2pdma_percpu_kill;
+	pgmap->cleanup = pci_p2pdma_percpu_cleanup;
 
 	addr = devm_memremap_pages(&pdev->dev, pgmap);
 	if (IS_ERR(addr)) {
@@ -204,19 +216,22 @@ int pci_p2pdma_add_resource(struct pci_dev *pdev, int bar, size_t size,
 		goto pgmap_free;
 	}
 
-	error = gen_pool_add_virt(pdev->p2pdma->pool, (unsigned long)addr,
+	error = gen_pool_add_owner(pdev->p2pdma->pool, (unsigned long)addr,
 			pci_bus_address(pdev, bar) + offset,
-			resource_size(&pgmap->res), dev_to_node(&pdev->dev));
+			resource_size(&pgmap->res), dev_to_node(&pdev->dev),
+			&p2p_pgmap->ref);
 	if (error)
-		goto pgmap_free;
+		goto pages_free;
 
 	pci_info(pdev, "added peer-to-peer DMA memory %pR\n",
 		 &pgmap->res);
 
 	return 0;
 
+pages_free:
+	devm_memunmap_pages(&pdev->dev, pgmap);
 pgmap_free:
-	devm_kfree(&pdev->dev, pgmap);
+	devm_kfree(&pdev->dev, p2p_pgmap);
 	return error;
 }
 EXPORT_SYMBOL_GPL(pci_p2pdma_add_resource);
@@ -585,19 +600,30 @@ EXPORT_SYMBOL_GPL(pci_p2pmem_find_many);
  */
 void *pci_alloc_p2pmem(struct pci_dev *pdev, size_t size)
 {
-	void *ret;
+	void *ret = NULL;
+	struct percpu_ref *ref;
 
+	/*
+	 * Pairs with synchronize_rcu() in pci_p2pdma_release() to
+	 * ensure pdev->p2pdma is non-NULL for the duration of the
+	 * read-lock.
+	 */
+	rcu_read_lock();
 	if (unlikely(!pdev->p2pdma))
-		return NULL;
-
-	if (unlikely(!percpu_ref_tryget_live(&pdev->p2pdma->devmap_ref)))
-		return NULL;
-
-	ret = (void *)gen_pool_alloc(pdev->p2pdma->pool, size);
+		goto out;
 
-	if (unlikely(!ret))
-		percpu_ref_put(&pdev->p2pdma->devmap_ref);
+	ret = (void *)gen_pool_alloc_owner(pdev->p2pdma->pool, size,
+			(void **) &ref);
+	if (!ret)
+		goto out;
 
+	if (unlikely(!percpu_ref_tryget_live(ref))) {
+		gen_pool_free(pdev->p2pdma->pool, (unsigned long) ret, size);
+		ret = NULL;
+		goto out;
+	}
+out:
+	rcu_read_unlock();
 	return ret;
 }
 EXPORT_SYMBOL_GPL(pci_alloc_p2pmem);
@@ -610,8 +636,11 @@ EXPORT_SYMBOL_GPL(pci_alloc_p2pmem);
  */
 void pci_free_p2pmem(struct pci_dev *pdev, void *addr, size_t size)
 {
-	gen_pool_free(pdev->p2pdma->pool, (uintptr_t)addr, size);
-	percpu_ref_put(&pdev->p2pdma->devmap_ref);
+	struct percpu_ref *ref;
+
+	gen_pool_free_owner(pdev->p2pdma->pool, (uintptr_t)addr, size,
+			(void **) &ref);
+	percpu_ref_put(ref);
 }
 EXPORT_SYMBOL_GPL(pci_free_p2pmem);
 
diff --git a/drivers/platform/mellanox/mlxreg-hotplug.c b/drivers/platform/mellanox/mlxreg-hotplug.c
index 687ce6817d0d..f85a1b9d129b 100644
--- a/drivers/platform/mellanox/mlxreg-hotplug.c
+++ b/drivers/platform/mellanox/mlxreg-hotplug.c
@@ -694,6 +694,7 @@ static int mlxreg_hotplug_remove(struct platform_device *pdev)
 
 	/* Clean interrupts setup. */
 	mlxreg_hotplug_unset_irq(priv);
+	devm_free_irq(&pdev->dev, priv->irq, priv);
 
 	return 0;
 }
diff --git a/drivers/platform/x86/asus-nb-wmi.c b/drivers/platform/x86/asus-nb-wmi.c
index 81642102bf65..8d9e30dbb5af 100644
--- a/drivers/platform/x86/asus-nb-wmi.c
+++ b/drivers/platform/x86/asus-nb-wmi.c
@@ -65,10 +65,12 @@ static bool asus_q500a_i8042_filter(unsigned char data, unsigned char str,
 
 static struct quirk_entry quirk_asus_unknown = {
 	.wapf = 0,
+	.wmi_backlight_set_devstate = true,
 };
 
 static struct quirk_entry quirk_asus_q500a = {
 	.i8042_filter = asus_q500a_i8042_filter,
+	.wmi_backlight_set_devstate = true,
 };
 
 /*
@@ -79,26 +81,32 @@ static struct quirk_entry quirk_asus_q500a = {
 static struct quirk_entry quirk_asus_x55u = {
 	.wapf = 4,
 	.wmi_backlight_power = true,
+	.wmi_backlight_set_devstate = true,
 	.no_display_toggle = true,
 };
 
 static struct quirk_entry quirk_asus_wapf4 = {
 	.wapf = 4,
+	.wmi_backlight_set_devstate = true,
 };
 
 static struct quirk_entry quirk_asus_x200ca = {
 	.wapf = 2,
+	.wmi_backlight_set_devstate = true,
 };
 
 static struct quirk_entry quirk_asus_ux303ub = {
 	.wmi_backlight_native = true,
+	.wmi_backlight_set_devstate = true,
 };
 
 static struct quirk_entry quirk_asus_x550lb = {
+	.wmi_backlight_set_devstate = true,
 	.xusb2pr = 0x01D9,
 };
 
 static struct quirk_entry quirk_asus_forceals = {
+	.wmi_backlight_set_devstate = true,
 	.wmi_force_als_set = true,
 };
 
diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c
index 3e4336025e8f..9b18a184e0aa 100644
--- a/drivers/platform/x86/asus-wmi.c
+++ b/drivers/platform/x86/asus-wmi.c
@@ -2146,7 +2146,7 @@ static int asus_wmi_add(struct platform_device *pdev)
 		err = asus_wmi_backlight_init(asus);
 		if (err && err != -ENODEV)
 			goto fail_backlight;
-	} else
+	} else if (asus->driver->quirks->wmi_backlight_set_devstate)
 		err = asus_wmi_set_devstate(ASUS_WMI_DEVID_BACKLIGHT, 2, NULL);
 
 	if (asus_wmi_has_fnlock_key(asus)) {
diff --git a/drivers/platform/x86/asus-wmi.h b/drivers/platform/x86/asus-wmi.h
index 0930be770688..4f31b68642a0 100644
--- a/drivers/platform/x86/asus-wmi.h
+++ b/drivers/platform/x86/asus-wmi.h
@@ -31,6 +31,7 @@ struct quirk_entry {
 	bool store_backlight_power;
 	bool wmi_backlight_power;
 	bool wmi_backlight_native;
+	bool wmi_backlight_set_devstate;
 	bool wmi_force_als_set;
 	int wapf;
 	/*
diff --git a/drivers/platform/x86/intel-vbtn.c b/drivers/platform/x86/intel-vbtn.c
index 06cd7e818ed5..a0d0cecff55f 100644
--- a/drivers/platform/x86/intel-vbtn.c
+++ b/drivers/platform/x86/intel-vbtn.c
@@ -76,12 +76,24 @@ static void notify_handler(acpi_handle handle, u32 event, void *context)
 	struct platform_device *device = context;
 	struct intel_vbtn_priv *priv = dev_get_drvdata(&device->dev);
 	unsigned int val = !(event & 1); /* Even=press, Odd=release */
-	const struct key_entry *ke_rel;
+	const struct key_entry *ke, *ke_rel;
 	bool autorelease;
 
 	if (priv->wakeup_mode) {
-		if (sparse_keymap_entry_from_scancode(priv->input_dev, event)) {
+		ke = sparse_keymap_entry_from_scancode(priv->input_dev, event);
+		if (ke) {
 			pm_wakeup_hard_event(&device->dev);
+
+			/*
+			 * Switch events like tablet mode will wake the device
+			 * and report the new switch position to the input
+			 * subsystem.
+			 */
+			if (ke->type == KE_SW)
+				sparse_keymap_report_event(priv->input_dev,
+							   event,
+							   val,
+							   0);
 			return;
 		}
 		goto out_unknown;
diff --git a/drivers/platform/x86/mlx-platform.c b/drivers/platform/x86/mlx-platform.c
index cee039f57499..983f02b5b106 100644
--- a/drivers/platform/x86/mlx-platform.c
+++ b/drivers/platform/x86/mlx-platform.c
@@ -2032,7 +2032,7 @@ static int __init mlxplat_init(void)
 
 	for (i = 0; i < ARRAY_SIZE(mlxplat_mux_data); i++) {
 		priv->pdev_mux[i] = platform_device_register_resndata(
-						&mlxplat_dev->dev,
+						&priv->pdev_i2c->dev,
 						"i2c-mux-reg", i, NULL,
 						0, &mlxplat_mux_data[i],
 						sizeof(mlxplat_mux_data[i]));
diff --git a/drivers/ras/cec.c b/drivers/ras/cec.c
index 88e4f3ff0cb8..673f8a128397 100644
--- a/drivers/ras/cec.c
+++ b/drivers/ras/cec.c
@@ -2,6 +2,7 @@
 #include <linux/mm.h>
 #include <linux/gfp.h>
 #include <linux/kernel.h>
+#include <linux/workqueue.h>
 
 #include <asm/mce.h>
 
@@ -123,16 +124,12 @@ static u64 dfs_pfn;
 /* Amount of errors after which we offline */
 static unsigned int count_threshold = COUNT_MASK;
 
-/*
- * The timer "decays" element count each timer_interval which is 24hrs by
- * default.
- */
-
-#define CEC_TIMER_DEFAULT_INTERVAL	24 * 60 * 60	/* 24 hrs */
-#define CEC_TIMER_MIN_INTERVAL		 1 * 60 * 60	/* 1h */
-#define CEC_TIMER_MAX_INTERVAL	   30 *	24 * 60 * 60	/* one month */
-static struct timer_list cec_timer;
-static u64 timer_interval = CEC_TIMER_DEFAULT_INTERVAL;
+/* Each element "decays" each decay_interval which is 24hrs by default. */
+#define CEC_DECAY_DEFAULT_INTERVAL	24 * 60 * 60	/* 24 hrs */
+#define CEC_DECAY_MIN_INTERVAL		 1 * 60 * 60	/* 1h */
+#define CEC_DECAY_MAX_INTERVAL	   30 *	24 * 60 * 60	/* one month */
+static struct delayed_work cec_work;
+static u64 decay_interval = CEC_DECAY_DEFAULT_INTERVAL;
 
 /*
  * Decrement decay value. We're using DECAY_BITS bits to denote decay of an
@@ -160,20 +157,21 @@ static void do_spring_cleaning(struct ce_array *ca)
 /*
  * @interval in seconds
  */
-static void cec_mod_timer(struct timer_list *t, unsigned long interval)
+static void cec_mod_work(unsigned long interval)
 {
 	unsigned long iv;
 
-	iv = interval * HZ + jiffies;
-
-	mod_timer(t, round_jiffies(iv));
+	iv = interval * HZ;
+	mod_delayed_work(system_wq, &cec_work, round_jiffies(iv));
 }
 
-static void cec_timer_fn(struct timer_list *unused)
+static void cec_work_fn(struct work_struct *work)
 {
+	mutex_lock(&ce_mutex);
 	do_spring_cleaning(&ce_arr);
+	mutex_unlock(&ce_mutex);
 
-	cec_mod_timer(&cec_timer, timer_interval);
+	cec_mod_work(decay_interval);
 }
 
 /*
@@ -183,32 +181,38 @@ static void cec_timer_fn(struct timer_list *unused)
  */
 static int __find_elem(struct ce_array *ca, u64 pfn, unsigned int *to)
 {
+	int min = 0, max = ca->n - 1;
 	u64 this_pfn;
-	int min = 0, max = ca->n;
 
-	while (min < max) {
-		int tmp = (max + min) >> 1;
+	while (min <= max) {
+		int i = (min + max) >> 1;
 
-		this_pfn = PFN(ca->array[tmp]);
+		this_pfn = PFN(ca->array[i]);
 
 		if (this_pfn < pfn)
-			min = tmp + 1;
+			min = i + 1;
 		else if (this_pfn > pfn)
-			max = tmp;
-		else {
-			min = tmp;
-			break;
+			max = i - 1;
+		else if (this_pfn == pfn) {
+			if (to)
+				*to = i;
+
+			return i;
 		}
 	}
 
+	/*
+	 * When the loop terminates without finding @pfn, min has the index of
+	 * the element slot where the new @pfn should be inserted. The loop
+	 * terminates when min > max, which means the min index points to the
+	 * bigger element while the max index to the smaller element, in-between
+	 * which the new @pfn belongs to.
+	 *
+	 * For more details, see exercise 1, Section 6.2.1 in TAOCP, vol. 3.
+	 */
 	if (to)
 		*to = min;
 
-	this_pfn = PFN(ca->array[min]);
-
-	if (this_pfn == pfn)
-		return min;
-
 	return -ENOKEY;
 }
 
@@ -374,15 +378,15 @@ static int decay_interval_set(void *data, u64 val)
 {
 	*(u64 *)data = val;
 
-	if (val < CEC_TIMER_MIN_INTERVAL)
+	if (val < CEC_DECAY_MIN_INTERVAL)
 		return -EINVAL;
 
-	if (val > CEC_TIMER_MAX_INTERVAL)
+	if (val > CEC_DECAY_MAX_INTERVAL)
 		return -EINVAL;
 
-	timer_interval = val;
+	decay_interval = val;
 
-	cec_mod_timer(&cec_timer, timer_interval);
+	cec_mod_work(decay_interval);
 	return 0;
 }
 DEFINE_DEBUGFS_ATTRIBUTE(decay_interval_ops, u64_get, decay_interval_set, "%lld\n");
@@ -426,7 +430,7 @@ static int array_dump(struct seq_file *m, void *v)
 
 	seq_printf(m, "Flags: 0x%x\n", ca->flags);
 
-	seq_printf(m, "Timer interval: %lld seconds\n", timer_interval);
+	seq_printf(m, "Decay interval: %lld seconds\n", decay_interval);
 	seq_printf(m, "Decays: %lld\n", ca->decays_done);
 
 	seq_printf(m, "Action threshold: %d\n", count_threshold);
@@ -472,7 +476,7 @@ static int __init create_debugfs_nodes(void)
 	}
 
 	decay = debugfs_create_file("decay_interval", S_IRUSR | S_IWUSR, d,
-				    &timer_interval, &decay_interval_ops);
+				    &decay_interval, &decay_interval_ops);
 	if (!decay) {
 		pr_warn("Error creating decay_interval debugfs node!\n");
 		goto err;
@@ -508,8 +512,8 @@ void __init cec_init(void)
 	if (create_debugfs_nodes())
 		return;
 
-	timer_setup(&cec_timer, cec_timer_fn, 0);
-	cec_mod_timer(&cec_timer, CEC_TIMER_DEFAULT_INTERVAL);
+	INIT_DELAYED_WORK(&cec_work, cec_work_fn);
+	schedule_delayed_work(&cec_work, CEC_DECAY_DEFAULT_INTERVAL);
 
 	pr_info("Correctable Errors collector initialized.\n");
 }
diff --git a/drivers/regulator/tps6507x-regulator.c b/drivers/regulator/tps6507x-regulator.c
index a1b7fab91dd4..d2a8f69b2665 100644
--- a/drivers/regulator/tps6507x-regulator.c
+++ b/drivers/regulator/tps6507x-regulator.c
@@ -403,12 +403,12 @@ static int tps6507x_pmic_probe(struct platform_device *pdev)
 	/* common for all regulators */
 	tps->mfd = tps6507x_dev;
 
-	for (i = 0; i < TPS6507X_NUM_REGULATOR; i++, info++, init_data++) {
+	for (i = 0; i < TPS6507X_NUM_REGULATOR; i++, info++) {
 		/* Register the regulators */
 		tps->info[i] = info;
-		if (init_data && init_data->driver_data) {
+		if (init_data && init_data[i].driver_data) {
 			struct tps6507x_reg_platform_data *data =
-					init_data->driver_data;
+					init_data[i].driver_data;
 			info->defdcdc_default = data->defdcdc_default;
 		}
 
diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c
index 1bef1da273c2..8068520cf89e 100644
--- a/drivers/scsi/hpsa.c
+++ b/drivers/scsi/hpsa.c
@@ -4940,7 +4940,7 @@ static int hpsa_scsi_ioaccel2_queue_command(struct ctlr_info *h,
 			curr_sg->reserved[0] = 0;
 			curr_sg->reserved[1] = 0;
 			curr_sg->reserved[2] = 0;
-			curr_sg->chain_indicator = 0x80;
+			curr_sg->chain_indicator = IOACCEL2_CHAIN;
 
 			curr_sg = h->ioaccel2_cmd_sg_list[c->cmdindex];
 		}
@@ -4957,6 +4957,11 @@ static int hpsa_scsi_ioaccel2_queue_command(struct ctlr_info *h,
 			curr_sg++;
 		}
 
+		/*
+		 * Set the last s/g element bit
+		 */
+		(curr_sg - 1)->chain_indicator = IOACCEL2_LAST_SG;
+
 		switch (cmd->sc_data_direction) {
 		case DMA_TO_DEVICE:
 			cp->direction &= ~IOACCEL2_DIRECTION_MASK;
diff --git a/drivers/scsi/hpsa_cmd.h b/drivers/scsi/hpsa_cmd.h
index 21a726e2eec6..f6afca4b2319 100644
--- a/drivers/scsi/hpsa_cmd.h
+++ b/drivers/scsi/hpsa_cmd.h
@@ -517,6 +517,7 @@ struct ioaccel2_sg_element {
 	u8 reserved[3];
 	u8 chain_indicator;
 #define IOACCEL2_CHAIN 0x80
+#define IOACCEL2_LAST_SG 0x40
 };
 
 /*
diff --git a/drivers/spi/spi-bitbang.c b/drivers/spi/spi-bitbang.c
index dad566bfe372..d84e22dd6f9f 100644
--- a/drivers/spi/spi-bitbang.c
+++ b/drivers/spi/spi-bitbang.c
@@ -406,7 +406,7 @@ int spi_bitbang_start(struct spi_bitbang *bitbang)
 	if (ret)
 		spi_master_put(master);
 
-	return 0;
+	return ret;
 }
 EXPORT_SYMBOL_GPL(spi_bitbang_start);
 
diff --git a/drivers/spi/spi-fsl-spi.c b/drivers/spi/spi-fsl-spi.c
index 4c71df93a6f6..1d9b33aa1a3b 100644
--- a/drivers/spi/spi-fsl-spi.c
+++ b/drivers/spi/spi-fsl-spi.c
@@ -428,7 +428,6 @@ static int fsl_spi_do_one_msg(struct spi_master *master,
 	}
 
 	m->status = status;
-	spi_finalize_current_message(master);
 
 	if (status || !cs_change) {
 		ndelay(nsecs);
@@ -436,6 +435,7 @@ static int fsl_spi_do_one_msg(struct spi_master *master,
 	}
 
 	fsl_spi_setup_transfer(spi, NULL);
+	spi_finalize_current_message(master);
 	return 0;
 }
 
diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index 5e75944ad5d1..5e4654032bfa 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -1181,10 +1181,10 @@ out:
 	if (msg->status && ctlr->handle_err)
 		ctlr->handle_err(ctlr, msg);
 
-	spi_finalize_current_message(ctlr);
-
 	spi_res_release(ctlr, msg);
 
+	spi_finalize_current_message(ctlr);
+
 	return ret;
 }
 
@@ -1307,10 +1307,15 @@ static void __spi_pump_messages(struct spi_controller *ctlr, bool in_kthread)
 		ret = ctlr->prepare_transfer_hardware(ctlr);
 		if (ret) {
 			dev_err(&ctlr->dev,
-				"failed to prepare transfer hardware\n");
+				"failed to prepare transfer hardware: %d\n",
+				ret);
 
 			if (ctlr->auto_runtime_pm)
 				pm_runtime_put(ctlr->dev.parent);
+
+			ctlr->cur_msg->status = ret;
+			spi_finalize_current_message(ctlr);
+
 			mutex_unlock(&ctlr->io_mutex);
 			return;
 		}
diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c
index 6082b008969b..6b6413073584 100644
--- a/drivers/usb/core/quirks.c
+++ b/drivers/usb/core/quirks.c
@@ -215,6 +215,9 @@ static const struct usb_device_id usb_quirk_list[] = {
 	/* Cherry Stream G230 2.0 (G85-231) and 3.0 (G85-232) */
 	{ USB_DEVICE(0x046a, 0x0023), .driver_info = USB_QUIRK_RESET_RESUME },
 
+	/* Logitech HD Webcam C270 */
+	{ USB_DEVICE(0x046d, 0x0825), .driver_info = USB_QUIRK_RESET_RESUME },
+
 	/* Logitech HD Pro Webcams C920, C920-C, C925e and C930e */
 	{ USB_DEVICE(0x046d, 0x082d), .driver_info = USB_QUIRK_DELAY_INIT },
 	{ USB_DEVICE(0x046d, 0x0841), .driver_info = USB_QUIRK_DELAY_INIT },
diff --git a/drivers/usb/dwc2/gadget.c b/drivers/usb/dwc2/gadget.c
index 16ffd9fd9361..bff48a8a1984 100644
--- a/drivers/usb/dwc2/gadget.c
+++ b/drivers/usb/dwc2/gadget.c
@@ -835,19 +835,22 @@ static void dwc2_gadget_fill_nonisoc_xfer_ddma_one(struct dwc2_hsotg_ep *hs_ep,
  * with corresponding information based on transfer data.
  */
 static void dwc2_gadget_config_nonisoc_xfer_ddma(struct dwc2_hsotg_ep *hs_ep,
-						 struct usb_request *ureq,
-						 unsigned int offset,
+						 dma_addr_t dma_buff,
 						 unsigned int len)
 {
+	struct usb_request *ureq = NULL;
 	struct dwc2_dma_desc *desc = hs_ep->desc_list;
 	struct scatterlist *sg;
 	int i;
 	u8 desc_count = 0;
 
+	if (hs_ep->req)
+		ureq = &hs_ep->req->req;
+
 	/* non-DMA sg buffer */
-	if (!ureq->num_sgs) {
+	if (!ureq || !ureq->num_sgs) {
 		dwc2_gadget_fill_nonisoc_xfer_ddma_one(hs_ep, &desc,
-			ureq->dma + offset, len, true);
+			dma_buff, len, true);
 		return;
 	}
 
@@ -1135,7 +1138,7 @@ static void dwc2_hsotg_start_req(struct dwc2_hsotg *hsotg,
 			offset = ureq->actual;
 
 		/* Fill DDMA chain entries */
-		dwc2_gadget_config_nonisoc_xfer_ddma(hs_ep, ureq, offset,
+		dwc2_gadget_config_nonisoc_xfer_ddma(hs_ep, ureq->dma + offset,
 						     length);
 
 		/* write descriptor chain address to control register */
@@ -2037,12 +2040,13 @@ static void dwc2_hsotg_program_zlp(struct dwc2_hsotg *hsotg,
 		dev_dbg(hsotg->dev, "Receiving zero-length packet on ep%d\n",
 			index);
 	if (using_desc_dma(hsotg)) {
+		/* Not specific buffer needed for ep0 ZLP */
+		dma_addr_t dma = hs_ep->desc_list_dma;
+
 		if (!index)
 			dwc2_gadget_set_ep0_desc_chain(hsotg, hs_ep);
 
-		/* Not specific buffer needed for ep0 ZLP */
-		dwc2_gadget_fill_nonisoc_xfer_ddma_one(hs_ep, &hs_ep->desc_list,
-			hs_ep->desc_list_dma, 0, true);
+		dwc2_gadget_config_nonisoc_xfer_ddma(hs_ep, dma, 0);
 	} else {
 		dwc2_writel(hsotg, DXEPTSIZ_MC(1) | DXEPTSIZ_PKTCNT(1) |
 			    DXEPTSIZ_XFERSIZE(0),
@@ -2417,6 +2421,10 @@ static void dwc2_hsotg_handle_outdone(struct dwc2_hsotg *hsotg, int epnum)
 			dwc2_gadget_incr_frame_num(hs_ep);
 	}
 
+	/* Set actual frame number for completed transfers */
+	if (!using_desc_dma(hsotg) && hs_ep->isochronous)
+		req->frame_number = hsotg->frame_number;
+
 	dwc2_hsotg_complete_request(hsotg, hs_ep, hs_req, result);
 }
 
diff --git a/drivers/usb/dwc2/hcd.c b/drivers/usb/dwc2/hcd.c
index b50ec3714fd8..2192a2873c7c 100644
--- a/drivers/usb/dwc2/hcd.c
+++ b/drivers/usb/dwc2/hcd.c
@@ -2480,8 +2480,10 @@ static void dwc2_free_dma_aligned_buffer(struct urb *urb)
 		return;
 
 	/* Restore urb->transfer_buffer from the end of the allocated area */
-	memcpy(&stored_xfer_buffer, urb->transfer_buffer +
-	       urb->transfer_buffer_length, sizeof(urb->transfer_buffer));
+	memcpy(&stored_xfer_buffer,
+	       PTR_ALIGN(urb->transfer_buffer + urb->transfer_buffer_length,
+			 dma_get_cache_alignment()),
+	       sizeof(urb->transfer_buffer));
 
 	if (usb_urb_dir_in(urb)) {
 		if (usb_pipeisoc(urb->pipe))
@@ -2513,6 +2515,7 @@ static int dwc2_alloc_dma_aligned_buffer(struct urb *urb, gfp_t mem_flags)
 	 * DMA
 	 */
 	kmalloc_size = urb->transfer_buffer_length +
+		(dma_get_cache_alignment() - 1) +
 		sizeof(urb->transfer_buffer);
 
 	kmalloc_ptr = kmalloc(kmalloc_size, mem_flags);
@@ -2523,7 +2526,8 @@ static int dwc2_alloc_dma_aligned_buffer(struct urb *urb, gfp_t mem_flags)
 	 * Position value of original urb->transfer_buffer pointer to the end
 	 * of allocation for later referencing
 	 */
-	memcpy(kmalloc_ptr + urb->transfer_buffer_length,
+	memcpy(PTR_ALIGN(kmalloc_ptr + urb->transfer_buffer_length,
+			 dma_get_cache_alignment()),
 	       &urb->transfer_buffer, sizeof(urb->transfer_buffer));
 
 	if (usb_urb_dir_out(urb))
@@ -2608,7 +2612,7 @@ static int dwc2_assign_and_init_hc(struct dwc2_hsotg *hsotg, struct dwc2_qh *qh)
 	chan->dev_addr = dwc2_hcd_get_dev_addr(&urb->pipe_info);
 	chan->ep_num = dwc2_hcd_get_ep_num(&urb->pipe_info);
 	chan->speed = qh->dev_speed;
-	chan->max_packet = dwc2_max_packet(qh->maxp);
+	chan->max_packet = qh->maxp;
 
 	chan->xfer_started = 0;
 	chan->halt_status = DWC2_HC_XFER_NO_HALT_STATUS;
@@ -2686,7 +2690,7 @@ static int dwc2_assign_and_init_hc(struct dwc2_hsotg *hsotg, struct dwc2_qh *qh)
 		 * This value may be modified when the transfer is started
 		 * to reflect the actual transfer length
 		 */
-		chan->multi_count = dwc2_hb_mult(qh->maxp);
+		chan->multi_count = qh->maxp_mult;
 
 	if (hsotg->params.dma_desc_enable) {
 		chan->desc_list_addr = qh->desc_list_dma;
@@ -3806,19 +3810,21 @@ static struct dwc2_hcd_urb *dwc2_hcd_urb_alloc(struct dwc2_hsotg *hsotg,
 
 static void dwc2_hcd_urb_set_pipeinfo(struct dwc2_hsotg *hsotg,
 				      struct dwc2_hcd_urb *urb, u8 dev_addr,
-				      u8 ep_num, u8 ep_type, u8 ep_dir, u16 mps)
+				      u8 ep_num, u8 ep_type, u8 ep_dir,
+				      u16 maxp, u16 maxp_mult)
 {
 	if (dbg_perio() ||
 	    ep_type == USB_ENDPOINT_XFER_BULK ||
 	    ep_type == USB_ENDPOINT_XFER_CONTROL)
 		dev_vdbg(hsotg->dev,
-			 "addr=%d, ep_num=%d, ep_dir=%1x, ep_type=%1x, mps=%d\n",
-			 dev_addr, ep_num, ep_dir, ep_type, mps);
+			 "addr=%d, ep_num=%d, ep_dir=%1x, ep_type=%1x, maxp=%d (%d mult)\n",
+			 dev_addr, ep_num, ep_dir, ep_type, maxp, maxp_mult);
 	urb->pipe_info.dev_addr = dev_addr;
 	urb->pipe_info.ep_num = ep_num;
 	urb->pipe_info.pipe_type = ep_type;
 	urb->pipe_info.pipe_dir = ep_dir;
-	urb->pipe_info.mps = mps;
+	urb->pipe_info.maxp = maxp;
+	urb->pipe_info.maxp_mult = maxp_mult;
 }
 
 /*
@@ -3909,8 +3915,9 @@ void dwc2_hcd_dump_state(struct dwc2_hsotg *hsotg)
 					dwc2_hcd_is_pipe_in(&urb->pipe_info) ?
 					"IN" : "OUT");
 				dev_dbg(hsotg->dev,
-					"      Max packet size: %d\n",
-					dwc2_hcd_get_mps(&urb->pipe_info));
+					"      Max packet size: %d (%d mult)\n",
+					dwc2_hcd_get_maxp(&urb->pipe_info),
+					dwc2_hcd_get_maxp_mult(&urb->pipe_info));
 				dev_dbg(hsotg->dev,
 					"      transfer_buffer: %p\n",
 					urb->buf);
@@ -4510,8 +4517,10 @@ static void dwc2_dump_urb_info(struct usb_hcd *hcd, struct urb *urb,
 	}
 
 	dev_vdbg(hsotg->dev, "  Speed: %s\n", speed);
-	dev_vdbg(hsotg->dev, "  Max packet size: %d\n",
-		 usb_maxpacket(urb->dev, urb->pipe, usb_pipeout(urb->pipe)));
+	dev_vdbg(hsotg->dev, "  Max packet size: %d (%d mult)\n",
+		 usb_endpoint_maxp(&urb->ep->desc),
+		 usb_endpoint_maxp_mult(&urb->ep->desc));
+
 	dev_vdbg(hsotg->dev, "  Data buffer length: %d\n",
 		 urb->transfer_buffer_length);
 	dev_vdbg(hsotg->dev, "  Transfer buffer: %p, Transfer DMA: %08lx\n",
@@ -4594,8 +4603,8 @@ static int _dwc2_hcd_urb_enqueue(struct usb_hcd *hcd, struct urb *urb,
 	dwc2_hcd_urb_set_pipeinfo(hsotg, dwc2_urb, usb_pipedevice(urb->pipe),
 				  usb_pipeendpoint(urb->pipe), ep_type,
 				  usb_pipein(urb->pipe),
-				  usb_maxpacket(urb->dev, urb->pipe,
-						!(usb_pipein(urb->pipe))));
+				  usb_endpoint_maxp(&ep->desc),
+				  usb_endpoint_maxp_mult(&ep->desc));
 
 	buf = urb->transfer_buffer;
 
diff --git a/drivers/usb/dwc2/hcd.h b/drivers/usb/dwc2/hcd.h
index c089ffa1f0a8..ce6445a06588 100644
--- a/drivers/usb/dwc2/hcd.h
+++ b/drivers/usb/dwc2/hcd.h
@@ -171,7 +171,8 @@ struct dwc2_hcd_pipe_info {
 	u8 ep_num;
 	u8 pipe_type;
 	u8 pipe_dir;
-	u16 mps;
+	u16 maxp;
+	u16 maxp_mult;
 };
 
 struct dwc2_hcd_iso_packet_desc {
@@ -264,6 +265,7 @@ struct dwc2_hs_transfer_time {
  *                       - USB_ENDPOINT_XFER_ISOC
  * @ep_is_in:           Endpoint direction
  * @maxp:               Value from wMaxPacketSize field of Endpoint Descriptor
+ * @maxp_mult:          Multiplier for maxp
  * @dev_speed:          Device speed. One of the following values:
  *                       - USB_SPEED_LOW
  *                       - USB_SPEED_FULL
@@ -340,6 +342,7 @@ struct dwc2_qh {
 	u8 ep_type;
 	u8 ep_is_in;
 	u16 maxp;
+	u16 maxp_mult;
 	u8 dev_speed;
 	u8 data_toggle;
 	u8 ping_state;
@@ -503,9 +506,14 @@ static inline u8 dwc2_hcd_get_pipe_type(struct dwc2_hcd_pipe_info *pipe)
 	return pipe->pipe_type;
 }
 
-static inline u16 dwc2_hcd_get_mps(struct dwc2_hcd_pipe_info *pipe)
+static inline u16 dwc2_hcd_get_maxp(struct dwc2_hcd_pipe_info *pipe)
+{
+	return pipe->maxp;
+}
+
+static inline u16 dwc2_hcd_get_maxp_mult(struct dwc2_hcd_pipe_info *pipe)
 {
-	return pipe->mps;
+	return pipe->maxp_mult;
 }
 
 static inline u8 dwc2_hcd_get_dev_addr(struct dwc2_hcd_pipe_info *pipe)
@@ -620,12 +628,6 @@ static inline bool dbg_urb(struct urb *urb)
 static inline bool dbg_perio(void) { return false; }
 #endif
 
-/* High bandwidth multiplier as encoded in highspeed endpoint descriptors */
-#define dwc2_hb_mult(wmaxpacketsize) (1 + (((wmaxpacketsize) >> 11) & 0x03))
-
-/* Packet size for any kind of endpoint descriptor */
-#define dwc2_max_packet(wmaxpacketsize) ((wmaxpacketsize) & 0x07ff)
-
 /*
  * Returns true if frame1 index is greater than frame2 index. The comparison
  * is done modulo FRLISTEN_64_SIZE. This accounts for the rollover of the
diff --git a/drivers/usb/dwc2/hcd_intr.c b/drivers/usb/dwc2/hcd_intr.c
index 88b5dcf3aefc..a052d39b4375 100644
--- a/drivers/usb/dwc2/hcd_intr.c
+++ b/drivers/usb/dwc2/hcd_intr.c
@@ -1617,8 +1617,9 @@ static void dwc2_hc_ahberr_intr(struct dwc2_hsotg *hsotg,
 
 	dev_err(hsotg->dev, "  Speed: %s\n", speed);
 
-	dev_err(hsotg->dev, "  Max packet size: %d\n",
-		dwc2_hcd_get_mps(&urb->pipe_info));
+	dev_err(hsotg->dev, "  Max packet size: %d (mult %d)\n",
+		dwc2_hcd_get_maxp(&urb->pipe_info),
+		dwc2_hcd_get_maxp_mult(&urb->pipe_info));
 	dev_err(hsotg->dev, "  Data buffer length: %d\n", urb->length);
 	dev_err(hsotg->dev, "  Transfer buffer: %p, Transfer DMA: %08lx\n",
 		urb->buf, (unsigned long)urb->dma);
diff --git a/drivers/usb/dwc2/hcd_queue.c b/drivers/usb/dwc2/hcd_queue.c
index ea3aa640c15c..68bbac64b753 100644
--- a/drivers/usb/dwc2/hcd_queue.c
+++ b/drivers/usb/dwc2/hcd_queue.c
@@ -708,7 +708,7 @@ static void dwc2_hs_pmap_unschedule(struct dwc2_hsotg *hsotg,
 static int dwc2_uframe_schedule_split(struct dwc2_hsotg *hsotg,
 				      struct dwc2_qh *qh)
 {
-	int bytecount = dwc2_hb_mult(qh->maxp) * dwc2_max_packet(qh->maxp);
+	int bytecount = qh->maxp_mult * qh->maxp;
 	int ls_search_slice;
 	int err = 0;
 	int host_interval_in_sched;
@@ -1332,7 +1332,7 @@ static int dwc2_check_max_xfer_size(struct dwc2_hsotg *hsotg,
 	u32 max_channel_xfer_size;
 	int status = 0;
 
-	max_xfer_size = dwc2_max_packet(qh->maxp) * dwc2_hb_mult(qh->maxp);
+	max_xfer_size = qh->maxp * qh->maxp_mult;
 	max_channel_xfer_size = hsotg->params.max_transfer_size;
 
 	if (max_xfer_size > max_channel_xfer_size) {
@@ -1517,8 +1517,9 @@ static void dwc2_qh_init(struct dwc2_hsotg *hsotg, struct dwc2_qh *qh,
 	u32 prtspd = (hprt & HPRT0_SPD_MASK) >> HPRT0_SPD_SHIFT;
 	bool do_split = (prtspd == HPRT0_SPD_HIGH_SPEED &&
 			 dev_speed != USB_SPEED_HIGH);
-	int maxp = dwc2_hcd_get_mps(&urb->pipe_info);
-	int bytecount = dwc2_hb_mult(maxp) * dwc2_max_packet(maxp);
+	int maxp = dwc2_hcd_get_maxp(&urb->pipe_info);
+	int maxp_mult = dwc2_hcd_get_maxp_mult(&urb->pipe_info);
+	int bytecount = maxp_mult * maxp;
 	char *speed, *type;
 
 	/* Initialize QH */
@@ -1531,6 +1532,7 @@ static void dwc2_qh_init(struct dwc2_hsotg *hsotg, struct dwc2_qh *qh,
 
 	qh->data_toggle = DWC2_HC_PID_DATA0;
 	qh->maxp = maxp;
+	qh->maxp_mult = maxp_mult;
 	INIT_LIST_HEAD(&qh->qtd_list);
 	INIT_LIST_HEAD(&qh->qh_list_entry);
 
diff --git a/drivers/usb/gadget/udc/fusb300_udc.c b/drivers/usb/gadget/udc/fusb300_udc.c
index 263804d154a7..00e3f66836a9 100644
--- a/drivers/usb/gadget/udc/fusb300_udc.c
+++ b/drivers/usb/gadget/udc/fusb300_udc.c
@@ -1342,12 +1342,15 @@ static const struct usb_gadget_ops fusb300_gadget_ops = {
 static int fusb300_remove(struct platform_device *pdev)
 {
 	struct fusb300 *fusb300 = platform_get_drvdata(pdev);
+	int i;
 
 	usb_del_gadget_udc(&fusb300->gadget);
 	iounmap(fusb300->reg);
 	free_irq(platform_get_irq(pdev, 0), fusb300);
 
 	fusb300_free_request(&fusb300->ep[0]->ep, fusb300->ep0_req);
+	for (i = 0; i < FUSB300_MAX_NUM_EP; i++)
+		kfree(fusb300->ep[i]);
 	kfree(fusb300);
 
 	return 0;
@@ -1491,6 +1494,8 @@ clean_up:
 		if (fusb300->ep0_req)
 			fusb300_free_request(&fusb300->ep[0]->ep,
 				fusb300->ep0_req);
+		for (i = 0; i < FUSB300_MAX_NUM_EP; i++)
+			kfree(fusb300->ep[i]);
 		kfree(fusb300);
 	}
 	if (reg)
diff --git a/drivers/usb/gadget/udc/lpc32xx_udc.c b/drivers/usb/gadget/udc/lpc32xx_udc.c
index d8f1c60793ed..5f1b14f3e5a0 100644
--- a/drivers/usb/gadget/udc/lpc32xx_udc.c
+++ b/drivers/usb/gadget/udc/lpc32xx_udc.c
@@ -937,8 +937,7 @@ static struct lpc32xx_usbd_dd_gad *udc_dd_alloc(struct lpc32xx_udc *udc)
 	dma_addr_t			dma;
 	struct lpc32xx_usbd_dd_gad	*dd;
 
-	dd = (struct lpc32xx_usbd_dd_gad *) dma_pool_alloc(
-			udc->dd_cache, (GFP_KERNEL | GFP_DMA), &dma);
+	dd = dma_pool_alloc(udc->dd_cache, GFP_ATOMIC | GFP_DMA, &dma);
 	if (dd)
 		dd->this_dma = dma;
 
@@ -3070,9 +3069,9 @@ static int lpc32xx_udc_probe(struct platform_device *pdev)
 	}
 
 	udc->udp_baseaddr = devm_ioremap_resource(dev, res);
-	if (!udc->udp_baseaddr) {
+	if (IS_ERR(udc->udp_baseaddr)) {
 		dev_err(udc->dev, "IO map failure\n");
-		return -ENOMEM;
+		return PTR_ERR(udc->udp_baseaddr);
 	}
 
 	/* Get USB device clock */
diff --git a/drivers/usb/phy/phy-mxs-usb.c b/drivers/usb/phy/phy-mxs-usb.c
index 1b1bb0ad40c3..6fa16ab31e2e 100644
--- a/drivers/usb/phy/phy-mxs-usb.c
+++ b/drivers/usb/phy/phy-mxs-usb.c
@@ -63,6 +63,7 @@
 
 #define ANADIG_USB1_CHRG_DETECT_SET		0x1b4
 #define ANADIG_USB1_CHRG_DETECT_CLR		0x1b8
+#define ANADIG_USB2_CHRG_DETECT_SET		0x214
 #define ANADIG_USB1_CHRG_DETECT_EN_B		BIT(20)
 #define ANADIG_USB1_CHRG_DETECT_CHK_CHRG_B	BIT(19)
 #define ANADIG_USB1_CHRG_DETECT_CHK_CONTACT	BIT(18)
@@ -250,6 +251,19 @@ static int mxs_phy_hw_init(struct mxs_phy *mxs_phy)
 	if (mxs_phy->data->flags & MXS_PHY_NEED_IP_FIX)
 		writel(BM_USBPHY_IP_FIX, base + HW_USBPHY_IP_SET);
 
+	if (mxs_phy->regmap_anatop) {
+		unsigned int reg = mxs_phy->port_id ?
+			ANADIG_USB1_CHRG_DETECT_SET :
+			ANADIG_USB2_CHRG_DETECT_SET;
+		/*
+		 * The external charger detector needs to be disabled,
+		 * or the signal at DP will be poor
+		 */
+		regmap_write(mxs_phy->regmap_anatop, reg,
+			     ANADIG_USB1_CHRG_DETECT_EN_B |
+			     ANADIG_USB1_CHRG_DETECT_CHK_CHRG_B);
+	}
+
 	mxs_phy_tx_init(mxs_phy);
 
 	return 0;
diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c
index 83869065b802..a0aaf0635359 100644
--- a/drivers/usb/serial/option.c
+++ b/drivers/usb/serial/option.c
@@ -1171,6 +1171,10 @@ static const struct usb_device_id option_ids[] = {
 	{ USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, TELIT_PRODUCT_LE920A4_1213, 0xff) },
 	{ USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE920A4_1214),
 	  .driver_info = NCTRL(0) | RSVD(1) | RSVD(2) | RSVD(3) },
+	{ USB_DEVICE(TELIT_VENDOR_ID, 0x1260),
+	  .driver_info = NCTRL(0) | RSVD(1) | RSVD(2) },
+	{ USB_DEVICE(TELIT_VENDOR_ID, 0x1261),
+	  .driver_info = NCTRL(0) | RSVD(1) | RSVD(2) },
 	{ USB_DEVICE(TELIT_VENDOR_ID, 0x1900),				/* Telit LN940 (QMI) */
 	  .driver_info = NCTRL(0) | RSVD(1) },
 	{ USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1901, 0xff),	/* Telit LN940 (MBIM) */
@@ -1772,6 +1776,8 @@ static const struct usb_device_id option_ids[] = {
 	{ USB_DEVICE(ALINK_VENDOR_ID, SIMCOM_PRODUCT_SIM7100E),
 	  .driver_info = RSVD(5) | RSVD(6) },
 	{ USB_DEVICE_INTERFACE_CLASS(0x1e0e, 0x9003, 0xff) },	/* Simcom SIM7500/SIM7600 MBIM mode */
+	{ USB_DEVICE_INTERFACE_CLASS(0x1e0e, 0x9011, 0xff),	/* Simcom SIM7500/SIM7600 RNDIS mode */
+	  .driver_info = RSVD(7) },
 	{ USB_DEVICE(ALCATEL_VENDOR_ID, ALCATEL_PRODUCT_X060S_X200),
 	  .driver_info = NCTRL(0) | NCTRL(1) | RSVD(4) },
 	{ USB_DEVICE(ALCATEL_VENDOR_ID, ALCATEL_PRODUCT_X220_X500D),
diff --git a/drivers/usb/serial/pl2303.c b/drivers/usb/serial/pl2303.c
index 55122ac84518..d7abde14b3cf 100644
--- a/drivers/usb/serial/pl2303.c
+++ b/drivers/usb/serial/pl2303.c
@@ -106,6 +106,7 @@ static const struct usb_device_id id_table[] = {
 	{ USB_DEVICE(SANWA_VENDOR_ID, SANWA_PRODUCT_ID) },
 	{ USB_DEVICE(ADLINK_VENDOR_ID, ADLINK_ND6530_PRODUCT_ID) },
 	{ USB_DEVICE(SMART_VENDOR_ID, SMART_PRODUCT_ID) },
+	{ USB_DEVICE(AT_VENDOR_ID, AT_VTKIT3_PRODUCT_ID) },
 	{ }					/* Terminating entry */
 };
 
diff --git a/drivers/usb/serial/pl2303.h b/drivers/usb/serial/pl2303.h
index 559941ca884d..b0175f17d1a2 100644
--- a/drivers/usb/serial/pl2303.h
+++ b/drivers/usb/serial/pl2303.h
@@ -155,3 +155,6 @@
 #define SMART_VENDOR_ID	0x0b8c
 #define SMART_PRODUCT_ID	0x2303
 
+/* Allied Telesis VT-Kit3 */
+#define AT_VENDOR_ID		0x0caa
+#define AT_VTKIT3_PRODUCT_ID	0x3001
diff --git a/drivers/usb/storage/unusual_realtek.h b/drivers/usb/storage/unusual_realtek.h
index 6b2140f966ef..7e14c2d7cf73 100644
--- a/drivers/usb/storage/unusual_realtek.h
+++ b/drivers/usb/storage/unusual_realtek.h
@@ -17,6 +17,11 @@ UNUSUAL_DEV(0x0bda, 0x0138, 0x0000, 0x9999,
 		"USB Card Reader",
 		USB_SC_DEVICE, USB_PR_DEVICE, init_realtek_cr, 0),
 
+UNUSUAL_DEV(0x0bda, 0x0153, 0x0000, 0x9999,
+		"Realtek",
+		"USB Card Reader",
+		USB_SC_DEVICE, USB_PR_DEVICE, init_realtek_cr, 0),
+
 UNUSUAL_DEV(0x0bda, 0x0158, 0x0000, 0x9999,
 		"Realtek",
 		"USB Card Reader",
diff --git a/drivers/usb/typec/bus.c b/drivers/usb/typec/bus.c
index 76299b6ff06d..74cb3c2ecb34 100644
--- a/drivers/usb/typec/bus.c
+++ b/drivers/usb/typec/bus.c
@@ -192,7 +192,7 @@ EXPORT_SYMBOL_GPL(typec_altmode_vdm);
 const struct typec_altmode *
 typec_altmode_get_partner(struct typec_altmode *adev)
 {
-	return &to_altmode(adev)->partner->adev;
+	return adev ? &to_altmode(adev)->partner->adev : NULL;
 }
 EXPORT_SYMBOL_GPL(typec_altmode_get_partner);
 
diff --git a/drivers/usb/typec/ucsi/ucsi_ccg.c b/drivers/usb/typec/ucsi/ucsi_ccg.c
index 9d46aa9e4e35..bf63074675fc 100644
--- a/drivers/usb/typec/ucsi/ucsi_ccg.c
+++ b/drivers/usb/typec/ucsi/ucsi_ccg.c
@@ -862,8 +862,10 @@ static int do_flash(struct ucsi_ccg *uc, enum enum_flash_mode mode)
 
 not_signed_fw:
 	wr_buf = kzalloc(CCG4_ROW_SIZE + 4, GFP_KERNEL);
-	if (!wr_buf)
-		return -ENOMEM;
+	if (!wr_buf) {
+		err = -ENOMEM;
+		goto release_fw;
+	}
 
 	err = ccg_cmd_enter_flashing(uc);
 	if (err)
diff --git a/drivers/vfio/mdev/mdev_core.c b/drivers/vfio/mdev/mdev_core.c
index 3cc1a05fde1c..ae23151442cb 100644
--- a/drivers/vfio/mdev/mdev_core.c
+++ b/drivers/vfio/mdev/mdev_core.c
@@ -102,56 +102,35 @@ static void mdev_put_parent(struct mdev_parent *parent)
 		kref_put(&parent->ref, mdev_release_parent);
 }
 
-static int mdev_device_create_ops(struct kobject *kobj,
-				  struct mdev_device *mdev)
+/* Caller must hold parent unreg_sem read or write lock */
+static void mdev_device_remove_common(struct mdev_device *mdev)
 {
-	struct mdev_parent *parent = mdev->parent;
-	int ret;
-
-	ret = parent->ops->create(kobj, mdev);
-	if (ret)
-		return ret;
-
-	ret = sysfs_create_groups(&mdev->dev.kobj,
-				  parent->ops->mdev_attr_groups);
-	if (ret)
-		parent->ops->remove(mdev);
-
-	return ret;
-}
-
-/*
- * mdev_device_remove_ops gets called from sysfs's 'remove' and when parent
- * device is being unregistered from mdev device framework.
- * - 'force_remove' is set to 'false' when called from sysfs's 'remove' which
- *   indicates that if the mdev device is active, used by VMM or userspace
- *   application, vendor driver could return error then don't remove the device.
- * - 'force_remove' is set to 'true' when called from mdev_unregister_device()
- *   which indicate that parent device is being removed from mdev device
- *   framework so remove mdev device forcefully.
- */
-static int mdev_device_remove_ops(struct mdev_device *mdev, bool force_remove)
-{
-	struct mdev_parent *parent = mdev->parent;
+	struct mdev_parent *parent;
+	struct mdev_type *type;
 	int ret;
 
-	/*
-	 * Vendor driver can return error if VMM or userspace application is
-	 * using this mdev device.
-	 */
+	type = to_mdev_type(mdev->type_kobj);
+	mdev_remove_sysfs_files(&mdev->dev, type);
+	device_del(&mdev->dev);
+	parent = mdev->parent;
+	lockdep_assert_held(&parent->unreg_sem);
 	ret = parent->ops->remove(mdev);
-	if (ret && !force_remove)
-		return ret;
+	if (ret)
+		dev_err(&mdev->dev, "Remove failed: err=%d\n", ret);
 
-	sysfs_remove_groups(&mdev->dev.kobj, parent->ops->mdev_attr_groups);
-	return 0;
+	/* Balances with device_initialize() */
+	put_device(&mdev->dev);
+	mdev_put_parent(parent);
 }
 
 static int mdev_device_remove_cb(struct device *dev, void *data)
 {
-	if (dev_is_mdev(dev))
-		mdev_device_remove(dev, true);
+	if (dev_is_mdev(dev)) {
+		struct mdev_device *mdev;
 
+		mdev = to_mdev_device(dev);
+		mdev_device_remove_common(mdev);
+	}
 	return 0;
 }
 
@@ -193,6 +172,7 @@ int mdev_register_device(struct device *dev, const struct mdev_parent_ops *ops)
 	}
 
 	kref_init(&parent->ref);
+	init_rwsem(&parent->unreg_sem);
 
 	parent->dev = dev;
 	parent->ops = ops;
@@ -251,21 +231,23 @@ void mdev_unregister_device(struct device *dev)
 	dev_info(dev, "MDEV: Unregistering\n");
 
 	list_del(&parent->next);
+	mutex_unlock(&parent_list_lock);
+
+	down_write(&parent->unreg_sem);
+
 	class_compat_remove_link(mdev_bus_compat_class, dev, NULL);
 
 	device_for_each_child(dev, NULL, mdev_device_remove_cb);
 
 	parent_remove_sysfs_files(parent);
+	up_write(&parent->unreg_sem);
 
-	mutex_unlock(&parent_list_lock);
 	mdev_put_parent(parent);
 }
 EXPORT_SYMBOL(mdev_unregister_device);
 
-static void mdev_device_release(struct device *dev)
+static void mdev_device_free(struct mdev_device *mdev)
 {
-	struct mdev_device *mdev = to_mdev_device(dev);
-
 	mutex_lock(&mdev_list_lock);
 	list_del(&mdev->next);
 	mutex_unlock(&mdev_list_lock);
@@ -274,6 +256,13 @@ static void mdev_device_release(struct device *dev)
 	kfree(mdev);
 }
 
+static void mdev_device_release(struct device *dev)
+{
+	struct mdev_device *mdev = to_mdev_device(dev);
+
+	mdev_device_free(mdev);
+}
+
 int mdev_device_create(struct kobject *kobj,
 		       struct device *dev, const guid_t *uuid)
 {
@@ -310,46 +299,55 @@ int mdev_device_create(struct kobject *kobj,
 
 	mdev->parent = parent;
 
+	/* Check if parent unregistration has started */
+	if (!down_read_trylock(&parent->unreg_sem)) {
+		mdev_device_free(mdev);
+		ret = -ENODEV;
+		goto mdev_fail;
+	}
+
+	device_initialize(&mdev->dev);
 	mdev->dev.parent  = dev;
 	mdev->dev.bus     = &mdev_bus_type;
 	mdev->dev.release = mdev_device_release;
 	dev_set_name(&mdev->dev, "%pUl", uuid);
+	mdev->dev.groups = parent->ops->mdev_attr_groups;
+	mdev->type_kobj = kobj;
 
-	ret = device_register(&mdev->dev);
-	if (ret) {
-		put_device(&mdev->dev);
-		goto mdev_fail;
-	}
+	ret = parent->ops->create(kobj, mdev);
+	if (ret)
+		goto ops_create_fail;
 
-	ret = mdev_device_create_ops(kobj, mdev);
+	ret = device_add(&mdev->dev);
 	if (ret)
-		goto create_fail;
+		goto add_fail;
 
 	ret = mdev_create_sysfs_files(&mdev->dev, type);
-	if (ret) {
-		mdev_device_remove_ops(mdev, true);
-		goto create_fail;
-	}
+	if (ret)
+		goto sysfs_fail;
 
-	mdev->type_kobj = kobj;
 	mdev->active = true;
 	dev_dbg(&mdev->dev, "MDEV: created\n");
+	up_read(&parent->unreg_sem);
 
 	return 0;
 
-create_fail:
-	device_unregister(&mdev->dev);
+sysfs_fail:
+	device_del(&mdev->dev);
+add_fail:
+	parent->ops->remove(mdev);
+ops_create_fail:
+	up_read(&parent->unreg_sem);
+	put_device(&mdev->dev);
 mdev_fail:
 	mdev_put_parent(parent);
 	return ret;
 }
 
-int mdev_device_remove(struct device *dev, bool force_remove)
+int mdev_device_remove(struct device *dev)
 {
 	struct mdev_device *mdev, *tmp;
 	struct mdev_parent *parent;
-	struct mdev_type *type;
-	int ret;
 
 	mdev = to_mdev_device(dev);
 
@@ -372,19 +370,13 @@ int mdev_device_remove(struct device *dev, bool force_remove)
 	mdev->active = false;
 	mutex_unlock(&mdev_list_lock);
 
-	type = to_mdev_type(mdev->type_kobj);
 	parent = mdev->parent;
+	/* Check if parent unregistration has started */
+	if (!down_read_trylock(&parent->unreg_sem))
+		return -ENODEV;
 
-	ret = mdev_device_remove_ops(mdev, force_remove);
-	if (ret) {
-		mdev->active = true;
-		return ret;
-	}
-
-	mdev_remove_sysfs_files(dev, type);
-	device_unregister(dev);
-	mdev_put_parent(parent);
-
+	mdev_device_remove_common(mdev);
+	up_read(&parent->unreg_sem);
 	return 0;
 }
 
diff --git a/drivers/vfio/mdev/mdev_private.h b/drivers/vfio/mdev/mdev_private.h
index 36cbbdb754de..398767526276 100644
--- a/drivers/vfio/mdev/mdev_private.h
+++ b/drivers/vfio/mdev/mdev_private.h
@@ -23,6 +23,8 @@ struct mdev_parent {
 	struct list_head next;
 	struct kset *mdev_types_kset;
 	struct list_head type_list;
+	/* Synchronize device creation/removal with parent unregistration */
+	struct rw_semaphore unreg_sem;
 };
 
 struct mdev_device {
@@ -60,6 +62,6 @@ void mdev_remove_sysfs_files(struct device *dev, struct mdev_type *type);
 
 int  mdev_device_create(struct kobject *kobj,
 			struct device *dev, const guid_t *uuid);
-int  mdev_device_remove(struct device *dev, bool force_remove);
+int  mdev_device_remove(struct device *dev);
 
 #endif /* MDEV_PRIVATE_H */
diff --git a/drivers/vfio/mdev/mdev_sysfs.c b/drivers/vfio/mdev/mdev_sysfs.c
index cbf94b8165ea..ffa3dcebf201 100644
--- a/drivers/vfio/mdev/mdev_sysfs.c
+++ b/drivers/vfio/mdev/mdev_sysfs.c
@@ -236,11 +236,9 @@ static ssize_t remove_store(struct device *dev, struct device_attribute *attr,
 	if (val && device_remove_file_self(dev, attr)) {
 		int ret;
 
-		ret = mdev_device_remove(dev, false);
-		if (ret) {
-			device_create_file(dev, attr);
+		ret = mdev_device_remove(dev);
+		if (ret)
 			return ret;
-		}
 	}
 
 	return count;
diff --git a/drivers/video/hdmi.c b/drivers/video/hdmi.c
index 799ae49774f5..b939bc28d886 100644
--- a/drivers/video/hdmi.c
+++ b/drivers/video/hdmi.c
@@ -650,6 +650,150 @@ hdmi_vendor_any_infoframe_check_only(const union hdmi_vendor_any_infoframe *fram
 	return 0;
 }
 
+/**
+ * hdmi_drm_infoframe_init() - initialize an HDMI Dynaminc Range and
+ * mastering infoframe
+ * @frame: HDMI DRM infoframe
+ *
+ * Returns 0 on success or a negative error code on failure.
+ */
+int hdmi_drm_infoframe_init(struct hdmi_drm_infoframe *frame)
+{
+	memset(frame, 0, sizeof(*frame));
+
+	frame->type = HDMI_INFOFRAME_TYPE_DRM;
+	frame->version = 1;
+	frame->length = HDMI_DRM_INFOFRAME_SIZE;
+
+	return 0;
+}
+EXPORT_SYMBOL(hdmi_drm_infoframe_init);
+
+static int hdmi_drm_infoframe_check_only(const struct hdmi_drm_infoframe *frame)
+{
+	if (frame->type != HDMI_INFOFRAME_TYPE_DRM ||
+	    frame->version != 1)
+		return -EINVAL;
+
+	if (frame->length != HDMI_DRM_INFOFRAME_SIZE)
+		return -EINVAL;
+
+	return 0;
+}
+
+/**
+ * hdmi_drm_infoframe_check() - check a HDMI DRM infoframe
+ * @frame: HDMI DRM infoframe
+ *
+ * Validates that the infoframe is consistent.
+ * Returns 0 on success or a negative error code on failure.
+ */
+int hdmi_drm_infoframe_check(struct hdmi_drm_infoframe *frame)
+{
+	return hdmi_drm_infoframe_check_only(frame);
+}
+EXPORT_SYMBOL(hdmi_drm_infoframe_check);
+
+/**
+ * hdmi_drm_infoframe_pack_only() - write HDMI DRM infoframe to binary buffer
+ * @frame: HDMI DRM infoframe
+ * @buffer: destination buffer
+ * @size: size of buffer
+ *
+ * Packs the information contained in the @frame structure into a binary
+ * representation that can be written into the corresponding controller
+ * registers. Also computes the checksum as required by section 5.3.5 of
+ * the HDMI 1.4 specification.
+ *
+ * Returns the number of bytes packed into the binary buffer or a negative
+ * error code on failure.
+ */
+ssize_t hdmi_drm_infoframe_pack_only(const struct hdmi_drm_infoframe *frame,
+				     void *buffer, size_t size)
+{
+	u8 *ptr = buffer;
+	size_t length;
+	int i;
+
+	length = HDMI_INFOFRAME_HEADER_SIZE + frame->length;
+
+	if (size < length)
+		return -ENOSPC;
+
+	memset(buffer, 0, size);
+
+	ptr[0] = frame->type;
+	ptr[1] = frame->version;
+	ptr[2] = frame->length;
+	ptr[3] = 0; /* checksum */
+
+	/* start infoframe payload */
+	ptr += HDMI_INFOFRAME_HEADER_SIZE;
+
+	*ptr++ = frame->eotf;
+	*ptr++ = frame->metadata_type;
+
+	for (i = 0; i < 3; i++) {
+		*ptr++ = frame->display_primaries[i].x;
+		*ptr++ = frame->display_primaries[i].x >> 8;
+		*ptr++ = frame->display_primaries[i].y;
+		*ptr++ = frame->display_primaries[i].y >> 8;
+	}
+
+	*ptr++ = frame->white_point.x;
+	*ptr++ = frame->white_point.x >> 8;
+
+	*ptr++ = frame->white_point.y;
+	*ptr++ = frame->white_point.y >> 8;
+
+	*ptr++ = frame->max_display_mastering_luminance;
+	*ptr++ = frame->max_display_mastering_luminance >> 8;
+
+	*ptr++ = frame->min_display_mastering_luminance;
+	*ptr++ = frame->min_display_mastering_luminance >> 8;
+
+	*ptr++ = frame->max_cll;
+	*ptr++ = frame->max_cll >> 8;
+
+	*ptr++ = frame->max_fall;
+	*ptr++ = frame->max_fall >> 8;
+
+	hdmi_infoframe_set_checksum(buffer, length);
+
+	return length;
+}
+EXPORT_SYMBOL(hdmi_drm_infoframe_pack_only);
+
+/**
+ * hdmi_drm_infoframe_pack() - check a HDMI DRM infoframe,
+ *                             and write it to binary buffer
+ * @frame: HDMI DRM infoframe
+ * @buffer: destination buffer
+ * @size: size of buffer
+ *
+ * Validates that the infoframe is consistent and updates derived fields
+ * (eg. length) based on other fields, after which it packs the information
+ * contained in the @frame structure into a binary representation that
+ * can be written into the corresponding controller registers. This function
+ * also computes the checksum as required by section 5.3.5 of the HDMI 1.4
+ * specification.
+ *
+ * Returns the number of bytes packed into the binary buffer or a negative
+ * error code on failure.
+ */
+ssize_t hdmi_drm_infoframe_pack(struct hdmi_drm_infoframe *frame,
+				void *buffer, size_t size)
+{
+	int ret;
+
+	ret = hdmi_drm_infoframe_check(frame);
+	if (ret)
+		return ret;
+
+	return hdmi_drm_infoframe_pack_only(frame, buffer, size);
+}
+EXPORT_SYMBOL(hdmi_drm_infoframe_pack);
+
 /*
  * hdmi_vendor_any_infoframe_check() - check a vendor infoframe
  */
@@ -758,6 +902,10 @@ hdmi_infoframe_pack_only(const union hdmi_infoframe *frame, void *buffer, size_t
 		length = hdmi_avi_infoframe_pack_only(&frame->avi,
 						      buffer, size);
 		break;
+	case HDMI_INFOFRAME_TYPE_DRM:
+		length = hdmi_drm_infoframe_pack_only(&frame->drm,
+						      buffer, size);
+		break;
 	case HDMI_INFOFRAME_TYPE_SPD:
 		length = hdmi_spd_infoframe_pack_only(&frame->spd,
 						      buffer, size);
@@ -806,6 +954,9 @@ hdmi_infoframe_pack(union hdmi_infoframe *frame,
 	case HDMI_INFOFRAME_TYPE_AVI:
 		length = hdmi_avi_infoframe_pack(&frame->avi, buffer, size);
 		break;
+	case HDMI_INFOFRAME_TYPE_DRM:
+		length = hdmi_drm_infoframe_pack(&frame->drm, buffer, size);
+		break;
 	case HDMI_INFOFRAME_TYPE_SPD:
 		length = hdmi_spd_infoframe_pack(&frame->spd, buffer, size);
 		break;
@@ -838,6 +989,8 @@ static const char *hdmi_infoframe_type_get_name(enum hdmi_infoframe_type type)
 		return "Source Product Description (SPD)";
 	case HDMI_INFOFRAME_TYPE_AUDIO:
 		return "Audio";
+	case HDMI_INFOFRAME_TYPE_DRM:
+		return "Dynamic Range and Mastering";
 	}
 	return "Reserved";
 }
@@ -1038,12 +1191,6 @@ hdmi_content_type_get_name(enum hdmi_content_type content_type)
 	return "Invalid";
 }
 
-/**
- * hdmi_avi_infoframe_log() - log info of HDMI AVI infoframe
- * @level: logging level
- * @dev: device
- * @frame: HDMI AVI infoframe
- */
 static void hdmi_avi_infoframe_log(const char *level,
 				   struct device *dev,
 				   const struct hdmi_avi_infoframe *frame)
@@ -1115,12 +1262,6 @@ static const char *hdmi_spd_sdi_get_name(enum hdmi_spd_sdi sdi)
 	return "Reserved";
 }
 
-/**
- * hdmi_spd_infoframe_log() - log info of HDMI SPD infoframe
- * @level: logging level
- * @dev: device
- * @frame: HDMI SPD infoframe
- */
 static void hdmi_spd_infoframe_log(const char *level,
 				   struct device *dev,
 				   const struct hdmi_spd_infoframe *frame)
@@ -1251,12 +1392,6 @@ hdmi_audio_coding_type_ext_get_name(enum hdmi_audio_coding_type_ext ctx)
 	return "Reserved";
 }
 
-/**
- * hdmi_audio_infoframe_log() - log info of HDMI AUDIO infoframe
- * @level: logging level
- * @dev: device
- * @frame: HDMI AUDIO infoframe
- */
 static void hdmi_audio_infoframe_log(const char *level,
 				     struct device *dev,
 				     const struct hdmi_audio_infoframe *frame)
@@ -1284,6 +1419,34 @@ static void hdmi_audio_infoframe_log(const char *level,
 			frame->downmix_inhibit ? "Yes" : "No");
 }
 
+static void hdmi_drm_infoframe_log(const char *level,
+				   struct device *dev,
+				   const struct hdmi_drm_infoframe *frame)
+{
+	int i;
+
+	hdmi_infoframe_log_header(level, dev,
+				  (struct hdmi_any_infoframe *)frame);
+	hdmi_log("length: %d\n", frame->length);
+	hdmi_log("metadata type: %d\n", frame->metadata_type);
+	hdmi_log("eotf: %d\n", frame->eotf);
+	for (i = 0; i < 3; i++) {
+		hdmi_log("x[%d]: %d\n", i, frame->display_primaries[i].x);
+		hdmi_log("y[%d]: %d\n", i, frame->display_primaries[i].y);
+	}
+
+	hdmi_log("white point x: %d\n", frame->white_point.x);
+	hdmi_log("white point y: %d\n", frame->white_point.y);
+
+	hdmi_log("max_display_mastering_luminance: %d\n",
+		 frame->max_display_mastering_luminance);
+	hdmi_log("min_display_mastering_luminance: %d\n",
+		 frame->min_display_mastering_luminance);
+
+	hdmi_log("max_cll: %d\n", frame->max_cll);
+	hdmi_log("max_fall: %d\n", frame->max_fall);
+}
+
 static const char *
 hdmi_3d_structure_get_name(enum hdmi_3d_structure s3d_struct)
 {
@@ -1313,12 +1476,6 @@ hdmi_3d_structure_get_name(enum hdmi_3d_structure s3d_struct)
 	return "Reserved";
 }
 
-/**
- * hdmi_vendor_infoframe_log() - log info of HDMI VENDOR infoframe
- * @level: logging level
- * @dev: device
- * @frame: HDMI VENDOR infoframe
- */
 static void
 hdmi_vendor_any_infoframe_log(const char *level,
 			      struct device *dev,
@@ -1372,6 +1529,9 @@ void hdmi_infoframe_log(const char *level,
 	case HDMI_INFOFRAME_TYPE_VENDOR:
 		hdmi_vendor_any_infoframe_log(level, dev, &frame->vendor);
 		break;
+	case HDMI_INFOFRAME_TYPE_DRM:
+		hdmi_drm_infoframe_log(level, dev, &frame->drm);
+		break;
 	}
 }
 EXPORT_SYMBOL(hdmi_infoframe_log);
@@ -1615,6 +1775,70 @@ hdmi_vendor_any_infoframe_unpack(union hdmi_vendor_any_infoframe *frame,
 }
 
 /**
+ * hdmi_drm_infoframe_unpack() - unpack binary buffer to a HDMI DRM infoframe
+ * @frame: HDMI DRM infoframe
+ * @buffer: source buffer
+ * @size: size of buffer
+ *
+ * Unpacks the information contained in binary @buffer into a structured
+ * @frame of the HDMI Dynamic Range and Mastering (DRM) information frame.
+ * Also verifies the checksum as required by section 5.3.5 of the HDMI 1.4
+ * specification.
+ *
+ * Returns 0 on success or a negative error code on failure.
+ */
+static int hdmi_drm_infoframe_unpack(struct hdmi_drm_infoframe *frame,
+				     const void *buffer, size_t size)
+{
+	const u8 *ptr = buffer;
+	const u8 *temp;
+	u8 x_lsb, x_msb;
+	u8 y_lsb, y_msb;
+	int ret;
+	int i;
+
+	if (size < HDMI_INFOFRAME_SIZE(DRM))
+		return -EINVAL;
+
+	if (ptr[0] != HDMI_INFOFRAME_TYPE_DRM ||
+	    ptr[1] != 1 ||
+	    ptr[2] != HDMI_DRM_INFOFRAME_SIZE)
+		return -EINVAL;
+
+	if (hdmi_infoframe_checksum(buffer, HDMI_INFOFRAME_SIZE(DRM)) != 0)
+		return -EINVAL;
+
+	ret = hdmi_drm_infoframe_init(frame);
+	if (ret)
+		return ret;
+
+	ptr += HDMI_INFOFRAME_HEADER_SIZE;
+
+	frame->eotf = ptr[0] & 0x7;
+	frame->metadata_type = ptr[1] & 0x7;
+
+	temp = ptr + 2;
+	for (i = 0; i < 3; i++) {
+		x_lsb = *temp++;
+		x_msb = *temp++;
+		frame->display_primaries[i].x =  (x_msb << 8) | x_lsb;
+		y_lsb = *temp++;
+		y_msb = *temp++;
+		frame->display_primaries[i].y = (y_msb << 8) | y_lsb;
+	}
+
+	frame->white_point.x = (ptr[15] << 8) | ptr[14];
+	frame->white_point.y = (ptr[17] << 8) | ptr[16];
+
+	frame->max_display_mastering_luminance = (ptr[19] << 8) | ptr[18];
+	frame->min_display_mastering_luminance = (ptr[21] << 8) | ptr[20];
+	frame->max_cll = (ptr[23] << 8) | ptr[22];
+	frame->max_fall = (ptr[25] << 8) | ptr[24];
+
+	return 0;
+}
+
+/**
  * hdmi_infoframe_unpack() - unpack binary buffer to a HDMI infoframe
  * @frame: HDMI infoframe
  * @buffer: source buffer
@@ -1640,6 +1864,9 @@ int hdmi_infoframe_unpack(union hdmi_infoframe *frame,
 	case HDMI_INFOFRAME_TYPE_AVI:
 		ret = hdmi_avi_infoframe_unpack(&frame->avi, buffer, size);
 		break;
+	case HDMI_INFOFRAME_TYPE_DRM:
+		ret = hdmi_drm_infoframe_unpack(&frame->drm, buffer, size);
+		break;
 	case HDMI_INFOFRAME_TYPE_SPD:
 		ret = hdmi_spd_infoframe_unpack(&frame->spd, buffer, size);
 		break;
diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index 0657b0b57dae..d53f3493a6b9 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -202,6 +202,15 @@ int __ref xen_swiotlb_init(int verbose, bool early)
 retry:
 	bytes = xen_set_nslabs(xen_io_tlb_nslabs);
 	order = get_order(xen_io_tlb_nslabs << IO_TLB_SHIFT);
+
+	/*
+	 * IO TLB memory already allocated. Just use it.
+	 */
+	if (io_tlb_start != 0) {
+		xen_io_tlb_start = phys_to_virt(io_tlb_start);
+		goto end;
+	}
+
 	/*
 	 * Get IO TLB memory from any location.
 	 */
@@ -231,7 +240,6 @@ retry:
 		m_ret = XEN_SWIOTLB_ENOMEM;
 		goto error;
 	}
-	xen_io_tlb_end = xen_io_tlb_start + bytes;
 	/*
 	 * And replace that memory with pages under 4GB.
 	 */
@@ -258,6 +266,8 @@ retry:
 	} else
 		rc = swiotlb_late_init_with_tbl(xen_io_tlb_start, xen_io_tlb_nslabs);
 
+end:
+	xen_io_tlb_end = xen_io_tlb_start + bytes;
 	if (!rc)
 		swiotlb_set_max_segment(PAGE_SIZE);
 
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 1aee51a9f3bf..c7adff343ba9 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -11137,13 +11137,11 @@ int btrfs_error_unpin_extent_range(struct btrfs_fs_info *fs_info,
  * it while performing the free space search since we have already
  * held back allocations.
  */
-static int btrfs_trim_free_extents(struct btrfs_device *device,
-				   struct fstrim_range *range, u64 *trimmed)
+static int btrfs_trim_free_extents(struct btrfs_device *device, u64 *trimmed)
 {
-	u64 start, len = 0, end = 0;
+	u64 start = SZ_1M, len = 0, end = 0;
 	int ret;
 
-	start = max_t(u64, range->start, SZ_1M);
 	*trimmed = 0;
 
 	/* Discard not supported = nothing to do. */
@@ -11186,22 +11184,6 @@ static int btrfs_trim_free_extents(struct btrfs_device *device,
 			break;
 		}
 
-		/* Keep going until we satisfy minlen or reach end of space */
-		if (len < range->minlen) {
-			mutex_unlock(&fs_info->chunk_mutex);
-			start += len;
-			continue;
-		}
-
-		/* If we are out of the passed range break */
-		if (start > range->start + range->len - 1) {
-			mutex_unlock(&fs_info->chunk_mutex);
-			break;
-		}
-
-		start = max(range->start, start);
-		len = min(range->len, len);
-
 		ret = btrfs_issue_discard(device->bdev, start, len,
 					  &bytes);
 		if (!ret)
@@ -11216,10 +11198,6 @@ static int btrfs_trim_free_extents(struct btrfs_device *device,
 		start += len;
 		*trimmed += bytes;
 
-		/* We've trimmed enough */
-		if (*trimmed >= range->len)
-			break;
-
 		if (fatal_signal_pending(current)) {
 			ret = -ERESTARTSYS;
 			break;
@@ -11303,7 +11281,7 @@ int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range)
 	mutex_lock(&fs_info->fs_devices->device_list_mutex);
 	devices = &fs_info->fs_devices->devices;
 	list_for_each_entry(device, devices, dev_list) {
-		ret = btrfs_trim_free_extents(device, range, &group_trimmed);
+		ret = btrfs_trim_free_extents(device, &group_trimmed);
 		if (ret) {
 			dev_failed++;
 			dev_ret = ret;
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index c78ccaf83ef8..93ea1d529aa3 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -991,9 +991,12 @@ static void gfs2_write_unlock(struct inode *inode)
 static int gfs2_iomap_page_prepare(struct inode *inode, loff_t pos,
 				   unsigned len, struct iomap *iomap)
 {
+	unsigned int blockmask = i_blocksize(inode) - 1;
 	struct gfs2_sbd *sdp = GFS2_SB(inode);
+	unsigned int blocks;
 
-	return gfs2_trans_begin(sdp, RES_DINODE + (len >> inode->i_blkbits), 0);
+	blocks = ((pos & blockmask) + len + blockmask) >> inode->i_blkbits;
+	return gfs2_trans_begin(sdp, RES_DINODE + blocks, 0);
 }
 
 static void gfs2_iomap_page_done(struct inode *inode, loff_t pos,
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 0fbb486a320e..86a2bd721900 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -2777,8 +2777,10 @@ static void io_ring_ctx_free(struct io_ring_ctx *ctx)
 	io_eventfd_unregister(ctx);
 
 #if defined(CONFIG_UNIX)
-	if (ctx->ring_sock)
+	if (ctx->ring_sock) {
+		ctx->ring_sock->file = NULL; /* so that iput() is called */
 		sock_release(ctx->ring_sock);
+	}
 #endif
 
 	io_mem_free(ctx->sq_ring);
diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c
index 2d016937fdda..42a61eecdacd 100644
--- a/fs/ocfs2/dcache.c
+++ b/fs/ocfs2/dcache.c
@@ -296,6 +296,18 @@ int ocfs2_dentry_attach_lock(struct dentry *dentry,
 
 out_attach:
 	spin_lock(&dentry_attach_lock);
+	if (unlikely(dentry->d_fsdata && !alias)) {
+		/* d_fsdata is set by a racing thread which is doing
+		 * the same thing as this thread is doing. Leave the racing
+		 * thread going ahead and we return here.
+		 */
+		spin_unlock(&dentry_attach_lock);
+		iput(dl->dl_inode);
+		ocfs2_lock_res_free(&dl->dl_lockres);
+		kfree(dl);
+		return 0;
+	}
+
 	dentry->d_fsdata = dl;
 	dl->dl_count++;
 	spin_unlock(&dentry_attach_lock);
diff --git a/include/drm/bridge/dw_hdmi.h b/include/drm/bridge/dw_hdmi.h
index b4ca970a5b75..c402364aec0d 100644
--- a/include/drm/bridge/dw_hdmi.h
+++ b/include/drm/bridge/dw_hdmi.h
@@ -150,6 +150,8 @@ struct dw_hdmi *dw_hdmi_bind(struct platform_device *pdev,
 			     struct drm_encoder *encoder,
 			     const struct dw_hdmi_plat_data *plat_data);
 
+void dw_hdmi_resume(struct dw_hdmi *hdmi);
+
 void dw_hdmi_setup_rx_sense(struct dw_hdmi *hdmi, bool hpd, bool rx_sense);
 
 void dw_hdmi_set_sample_rate(struct dw_hdmi *hdmi, unsigned int rate);
diff --git a/include/drm/bridge/dw_mipi_dsi.h b/include/drm/bridge/dw_mipi_dsi.h
index 0c33b9e9e0f0..94cc64a342e1 100644
--- a/include/drm/bridge/dw_mipi_dsi.h
+++ b/include/drm/bridge/dw_mipi_dsi.h
@@ -9,10 +9,20 @@
 #ifndef __DW_MIPI_DSI__
 #define __DW_MIPI_DSI__
 
+#include <linux/types.h>
+
+#include <drm/drm_modes.h>
+
+struct drm_display_mode;
+struct drm_encoder;
 struct dw_mipi_dsi;
+struct mipi_dsi_device;
+struct platform_device;
 
 struct dw_mipi_dsi_phy_ops {
 	int (*init)(void *priv_data);
+	void (*power_on)(void *priv_data);
+	void (*power_off)(void *priv_data);
 	int (*get_lane_mbps)(void *priv_data,
 			     const struct drm_display_mode *mode,
 			     unsigned long mode_flags, u32 lanes, u32 format,
diff --git a/include/drm/drm_atomic.h b/include/drm/drm_atomic.h
index e937ff2beb04..927e1205d7aa 100644
--- a/include/drm/drm_atomic.h
+++ b/include/drm/drm_atomic.h
@@ -459,6 +459,13 @@ struct drm_private_state *
 drm_atomic_get_new_private_obj_state(struct drm_atomic_state *state,
 				     struct drm_private_obj *obj);
 
+struct drm_connector *
+drm_atomic_get_old_connector_for_encoder(struct drm_atomic_state *state,
+					 struct drm_encoder *encoder);
+struct drm_connector *
+drm_atomic_get_new_connector_for_encoder(struct drm_atomic_state *state,
+					 struct drm_encoder *encoder);
+
 /**
  * drm_atomic_get_existing_crtc_state - get crtc state, if it exists
  * @state: global atomic state object
@@ -950,4 +957,19 @@ drm_atomic_crtc_needs_modeset(const struct drm_crtc_state *state)
 	       state->connectors_changed;
 }
 
+/**
+ * drm_atomic_crtc_effectively_active - compute whether crtc is actually active
+ * @state: &drm_crtc_state for the CRTC
+ *
+ * When in self refresh mode, the crtc_state->active value will be false, since
+ * the crtc is off. However in some cases we're interested in whether the crtc
+ * is active, or effectively active (ie: it's connected to an active display).
+ * In these cases, use this function instead of just checking active.
+ */
+static inline bool
+drm_atomic_crtc_effectively_active(const struct drm_crtc_state *state)
+{
+	return state->active || state->self_refresh_active;
+}
+
 #endif /* DRM_ATOMIC_H_ */
diff --git a/include/drm/drm_atomic_helper.h b/include/drm/drm_atomic_helper.h
index 58214be3bf3d..bf4e07141d81 100644
--- a/include/drm/drm_atomic_helper.h
+++ b/include/drm/drm_atomic_helper.h
@@ -117,12 +117,8 @@ int drm_atomic_helper_update_plane(struct drm_plane *plane,
 				   struct drm_modeset_acquire_ctx *ctx);
 int drm_atomic_helper_disable_plane(struct drm_plane *plane,
 				    struct drm_modeset_acquire_ctx *ctx);
-int __drm_atomic_helper_disable_plane(struct drm_plane *plane,
-		struct drm_plane_state *plane_state);
 int drm_atomic_helper_set_config(struct drm_mode_set *set,
 				 struct drm_modeset_acquire_ctx *ctx);
-int __drm_atomic_helper_set_config(struct drm_mode_set *set,
-		struct drm_atomic_state *state);
 
 int drm_atomic_helper_disable_all(struct drm_device *dev,
 				  struct drm_modeset_acquire_ctx *ctx);
diff --git a/include/drm/drm_atomic_state_helper.h b/include/drm/drm_atomic_state_helper.h
index 66c92cbd8e16..e4577cc11689 100644
--- a/include/drm/drm_atomic_state_helper.h
+++ b/include/drm/drm_atomic_state_helper.h
@@ -37,6 +37,8 @@ struct drm_private_state;
 struct drm_modeset_acquire_ctx;
 struct drm_device;
 
+void __drm_atomic_helper_crtc_reset(struct drm_crtc *crtc,
+				    struct drm_crtc_state *state);
 void drm_atomic_helper_crtc_reset(struct drm_crtc *crtc);
 void __drm_atomic_helper_crtc_duplicate_state(struct drm_crtc *crtc,
 					      struct drm_crtc_state *state);
@@ -60,6 +62,7 @@ void drm_atomic_helper_plane_destroy_state(struct drm_plane *plane,
 void __drm_atomic_helper_connector_reset(struct drm_connector *connector,
 					 struct drm_connector_state *conn_state);
 void drm_atomic_helper_connector_reset(struct drm_connector *connector);
+void drm_atomic_helper_connector_tv_reset(struct drm_connector *connector);
 void
 __drm_atomic_helper_connector_duplicate_state(struct drm_connector *connector,
 					   struct drm_connector_state *state);
diff --git a/include/drm/drm_auth.h b/include/drm/drm_auth.h
index 871008118bab..6bf8b2b78991 100644
--- a/include/drm/drm_auth.h
+++ b/include/drm/drm_auth.h
@@ -1,3 +1,6 @@
+#ifndef _DRM_AUTH_H_
+#define _DRM_AUTH_H_
+
 /*
  * Internal Header for the Direct Rendering Manager
  *
@@ -25,8 +28,12 @@
  * OTHER DEALINGS IN THE SOFTWARE.
  */
 
-#ifndef _DRM_AUTH_H_
-#define _DRM_AUTH_H_
+#include <linux/idr.h>
+#include <linux/kref.h>
+#include <linux/wait.h>
+
+struct drm_file;
+struct drm_hw_lock;
 
 /*
  * Legacy DRI1 locking data structure. Only here instead of in drm_legacy.h for
diff --git a/include/drm/drm_bridge.h b/include/drm/drm_bridge.h
index d4428913a4e1..7616f6562fe4 100644
--- a/include/drm/drm_bridge.h
+++ b/include/drm/drm_bridge.h
@@ -237,6 +237,103 @@ struct drm_bridge_funcs {
 	 * The enable callback is optional.
 	 */
 	void (*enable)(struct drm_bridge *bridge);
+
+	/**
+	 * @atomic_pre_enable:
+	 *
+	 * This callback should enable the bridge. It is called right before
+	 * the preceding element in the display pipe is enabled. If the
+	 * preceding element is a bridge this means it's called before that
+	 * bridge's @atomic_pre_enable or @pre_enable function. If the preceding
+	 * element is a &drm_encoder it's called right before the encoder's
+	 * &drm_encoder_helper_funcs.atomic_enable hook.
+	 *
+	 * The display pipe (i.e. clocks and timing signals) feeding this bridge
+	 * will not yet be running when this callback is called. The bridge must
+	 * not enable the display link feeding the next bridge in the chain (if
+	 * there is one) when this callback is called.
+	 *
+	 * Note that this function will only be invoked in the context of an
+	 * atomic commit. It will not be invoked from &drm_bridge_pre_enable. It
+	 * would be prudent to also provide an implementation of @pre_enable if
+	 * you are expecting driver calls into &drm_bridge_pre_enable.
+	 *
+	 * The @atomic_pre_enable callback is optional.
+	 */
+	void (*atomic_pre_enable)(struct drm_bridge *bridge,
+				  struct drm_atomic_state *state);
+
+	/**
+	 * @atomic_enable:
+	 *
+	 * This callback should enable the bridge. It is called right after
+	 * the preceding element in the display pipe is enabled. If the
+	 * preceding element is a bridge this means it's called after that
+	 * bridge's @atomic_enable or @enable function. If the preceding element
+	 * is a &drm_encoder it's called right after the encoder's
+	 * &drm_encoder_helper_funcs.atomic_enable hook.
+	 *
+	 * The bridge can assume that the display pipe (i.e. clocks and timing
+	 * signals) feeding it is running when this callback is called. This
+	 * callback must enable the display link feeding the next bridge in the
+	 * chain if there is one.
+	 *
+	 * Note that this function will only be invoked in the context of an
+	 * atomic commit. It will not be invoked from &drm_bridge_enable. It
+	 * would be prudent to also provide an implementation of @enable if
+	 * you are expecting driver calls into &drm_bridge_enable.
+	 *
+	 * The enable callback is optional.
+	 */
+	void (*atomic_enable)(struct drm_bridge *bridge,
+			      struct drm_atomic_state *state);
+	/**
+	 * @atomic_disable:
+	 *
+	 * This callback should disable the bridge. It is called right before
+	 * the preceding element in the display pipe is disabled. If the
+	 * preceding element is a bridge this means it's called before that
+	 * bridge's @atomic_disable or @disable vfunc. If the preceding element
+	 * is a &drm_encoder it's called right before the
+	 * &drm_encoder_helper_funcs.atomic_disable hook.
+	 *
+	 * The bridge can assume that the display pipe (i.e. clocks and timing
+	 * signals) feeding it is still running when this callback is called.
+	 *
+	 * Note that this function will only be invoked in the context of an
+	 * atomic commit. It will not be invoked from &drm_bridge_disable. It
+	 * would be prudent to also provide an implementation of @disable if
+	 * you are expecting driver calls into &drm_bridge_disable.
+	 *
+	 * The disable callback is optional.
+	 */
+	void (*atomic_disable)(struct drm_bridge *bridge,
+			       struct drm_atomic_state *state);
+
+	/**
+	 * @atomic_post_disable:
+	 *
+	 * This callback should disable the bridge. It is called right after the
+	 * preceding element in the display pipe is disabled. If the preceding
+	 * element is a bridge this means it's called after that bridge's
+	 * @atomic_post_disable or @post_disable function. If the preceding
+	 * element is a &drm_encoder it's called right after the encoder's
+	 * &drm_encoder_helper_funcs.atomic_disable hook.
+	 *
+	 * The bridge must assume that the display pipe (i.e. clocks and timing
+	 * signals) feeding it is no longer running when this callback is
+	 * called.
+	 *
+	 * Note that this function will only be invoked in the context of an
+	 * atomic commit. It will not be invoked from &drm_bridge_post_disable.
+	 * It would be prudent to also provide an implementation of
+	 * @post_disable if you are expecting driver calls into
+	 * &drm_bridge_post_disable.
+	 *
+	 * The post_disable callback is optional.
+	 */
+	void (*atomic_post_disable)(struct drm_bridge *bridge,
+				    struct drm_atomic_state *state);
 };
 
 /**
@@ -265,6 +362,14 @@ struct drm_bridge_timings {
 	 * input signal after the clock edge.
 	 */
 	u32 hold_time_ps;
+	/**
+	 * @dual_link:
+	 *
+	 * True if the bus operates in dual-link mode. The exact meaning is
+	 * dependent on the bus type. For LVDS buses, this indicates that even-
+	 * and odd-numbered pixels are received on separate links.
+	 */
+	bool dual_link;
 };
 
 /**
@@ -314,6 +419,15 @@ void drm_bridge_mode_set(struct drm_bridge *bridge,
 void drm_bridge_pre_enable(struct drm_bridge *bridge);
 void drm_bridge_enable(struct drm_bridge *bridge);
 
+void drm_atomic_bridge_disable(struct drm_bridge *bridge,
+			       struct drm_atomic_state *state);
+void drm_atomic_bridge_post_disable(struct drm_bridge *bridge,
+				    struct drm_atomic_state *state);
+void drm_atomic_bridge_pre_enable(struct drm_bridge *bridge,
+				  struct drm_atomic_state *state);
+void drm_atomic_bridge_enable(struct drm_bridge *bridge,
+			      struct drm_atomic_state *state);
+
 #ifdef CONFIG_DRM_PANEL_BRIDGE
 struct drm_bridge *drm_panel_bridge_add(struct drm_panel *panel,
 					u32 connector_type);
diff --git a/include/drm/drm_client.h b/include/drm/drm_client.h
index 268b2cf0052a..72d51d1e9dd9 100644
--- a/include/drm/drm_client.h
+++ b/include/drm/drm_client.h
@@ -3,8 +3,13 @@
 #ifndef _DRM_CLIENT_H_
 #define _DRM_CLIENT_H_
 
+#include <linux/lockdep.h>
+#include <linux/mutex.h>
 #include <linux/types.h>
 
+#include <drm/drm_connector.h>
+#include <drm/drm_crtc.h>
+
 struct drm_client_dev;
 struct drm_device;
 struct drm_file;
@@ -85,6 +90,16 @@ struct drm_client_dev {
 	 * @file: DRM file
 	 */
 	struct drm_file *file;
+
+	/**
+	 * @modeset_mutex: Protects @modesets.
+	 */
+	struct mutex modeset_mutex;
+
+	/**
+	 * @modesets: CRTC configurations
+	 */
+	struct drm_mode_set *modesets;
 };
 
 int drm_client_init(struct drm_device *dev, struct drm_client_dev *client,
@@ -135,6 +150,37 @@ struct drm_client_buffer *
 drm_client_framebuffer_create(struct drm_client_dev *client, u32 width, u32 height, u32 format);
 void drm_client_framebuffer_delete(struct drm_client_buffer *buffer);
 
+int drm_client_modeset_create(struct drm_client_dev *client);
+void drm_client_modeset_free(struct drm_client_dev *client);
+int drm_client_modeset_probe(struct drm_client_dev *client, unsigned int width, unsigned int height);
+bool drm_client_rotation(struct drm_mode_set *modeset, unsigned int *rotation);
+int drm_client_modeset_commit_force(struct drm_client_dev *client);
+int drm_client_modeset_commit(struct drm_client_dev *client);
+int drm_client_modeset_dpms(struct drm_client_dev *client, int mode);
+
+/**
+ * drm_client_for_each_modeset() - Iterate over client modesets
+ * @modeset: &drm_mode_set loop cursor
+ * @client: DRM client
+ */
+#define drm_client_for_each_modeset(modeset, client) \
+	for (({ lockdep_assert_held(&(client)->modeset_mutex); }), \
+	     modeset = (client)->modesets; modeset->crtc; modeset++)
+
+/**
+ * drm_client_for_each_connector_iter - connector_list iterator macro
+ * @connector: &struct drm_connector pointer used as cursor
+ * @iter: &struct drm_connector_list_iter
+ *
+ * This iterates the connectors that are useable for internal clients (excludes
+ * writeback connectors).
+ *
+ * For more info see drm_for_each_connector_iter().
+ */
+#define drm_client_for_each_connector_iter(connector, iter) \
+	drm_for_each_connector_iter(connector, iter) \
+		if (connector->connector_type != DRM_MODE_CONNECTOR_WRITEBACK)
+
 int drm_client_debugfs_init(struct drm_minor *minor);
 
 #endif
diff --git a/include/drm/drm_connector.h b/include/drm/drm_connector.h
index 02a131202add..ca745d9feaf5 100644
--- a/include/drm/drm_connector.h
+++ b/include/drm/drm_connector.h
@@ -464,13 +464,37 @@ int drm_display_info_set_bus_formats(struct drm_display_info *info,
 				     unsigned int num_formats);
 
 /**
+ * struct drm_connector_tv_margins - TV connector related margins
+ *
+ * Describes the margins in pixels to put around the image on TV
+ * connectors to deal with overscan.
+ */
+struct drm_connector_tv_margins {
+	/**
+	 * @bottom: Bottom margin in pixels.
+	 */
+	unsigned int bottom;
+
+	/**
+	 * @left: Left margin in pixels.
+	 */
+	unsigned int left;
+
+	/**
+	 * @right: Right margin in pixels.
+	 */
+	unsigned int right;
+
+	/**
+	 * @top: Top margin in pixels.
+	 */
+	unsigned int top;
+};
+
+/**
  * struct drm_tv_connector_state - TV connector related states
  * @subconnector: selected subconnector
- * @margins: margins (all margins are expressed in pixels)
- * @margins.left: left margin
- * @margins.right: right margin
- * @margins.top: top margin
- * @margins.bottom: bottom margin
+ * @margins: TV margins
  * @mode: TV mode
  * @brightness: brightness in percent
  * @contrast: contrast in percent
@@ -481,12 +505,7 @@ int drm_display_info_set_bus_formats(struct drm_display_info *info,
  */
 struct drm_tv_connector_state {
 	enum drm_mode_subconnector subconnector;
-	struct {
-		unsigned int left;
-		unsigned int right;
-		unsigned int top;
-		unsigned int bottom;
-	} margins;
+	struct drm_connector_tv_margins margins;
 	unsigned int mode;
 	unsigned int brightness;
 	unsigned int contrast;
@@ -517,6 +536,15 @@ struct drm_connector_state {
 	 * Used by the atomic helpers to select the encoder, through the
 	 * &drm_connector_helper_funcs.atomic_best_encoder or
 	 * &drm_connector_helper_funcs.best_encoder callbacks.
+	 *
+	 * This is also used in the atomic helpers to map encoders to their
+	 * current and previous connectors, see
+	 * &drm_atomic_get_old_connector_for_encoder() and
+	 * &drm_atomic_get_new_connector_for_encoder().
+	 *
+	 * NOTE: Atomic drivers must fill this out (either themselves or through
+	 * helpers), for otherwise the GETCONNECTOR and GETENCODER IOCTLs will
+	 * not return correct data to userspace.
 	 */
 	struct drm_encoder *best_encoder;
 
@@ -540,6 +568,20 @@ struct drm_connector_state {
 	struct drm_tv_connector_state tv;
 
 	/**
+	 * @self_refresh_aware:
+	 *
+	 * This tracks whether a connector is aware of the self refresh state.
+	 * It should be set to true for those connector implementations which
+	 * understand the self refresh state. This is needed since the crtc
+	 * registers the self refresh helpers and it doesn't know if the
+	 * connectors downstream have implemented self refresh entry/exit.
+	 *
+	 * Drivers should set this to true in atomic_check if they know how to
+	 * handle self_refresh requests.
+	 */
+	bool self_refresh_aware;
+
+	/**
 	 * @picture_aspect_ratio: Connector property to control the
 	 * HDMI infoframe aspect ratio setting.
 	 *
@@ -599,6 +641,12 @@ struct drm_connector_state {
 	 * and the connector bpc limitations obtained from edid.
 	 */
 	u8 max_bpc;
+
+	/**
+	 * @hdr_output_metadata:
+	 * DRM blob property for HDR output metadata
+	 */
+	struct drm_property_blob *hdr_output_metadata;
 };
 
 /**
@@ -894,19 +942,123 @@ struct drm_connector_funcs {
 				   const struct drm_connector_state *state);
 };
 
-/* mode specified on the command line */
+/**
+ * struct drm_cmdline_mode - DRM Mode passed through the kernel command-line
+ *
+ * Each connector can have an initial mode with additional options
+ * passed through the kernel command line. This structure allows to
+ * express those parameters and will be filled by the command-line
+ * parser.
+ */
 struct drm_cmdline_mode {
+	/**
+	 * @name:
+	 *
+	 * Name of the mode.
+	 */
+	char name[DRM_DISPLAY_MODE_LEN];
+
+	/**
+	 * @specified:
+	 *
+	 * Has a mode been read from the command-line?
+	 */
 	bool specified;
+
+	/**
+	 * @refresh_specified:
+	 *
+	 * Did the mode have a preferred refresh rate?
+	 */
 	bool refresh_specified;
+
+	/**
+	 * @bpp_specified:
+	 *
+	 * Did the mode have a preferred BPP?
+	 */
 	bool bpp_specified;
-	int xres, yres;
+
+	/**
+	 * @xres:
+	 *
+	 * Active resolution on the X axis, in pixels.
+	 */
+	int xres;
+
+	/**
+	 * @yres:
+	 *
+	 * Active resolution on the Y axis, in pixels.
+	 */
+	int yres;
+
+	/**
+	 * @bpp:
+	 *
+	 * Bits per pixels for the mode.
+	 */
 	int bpp;
+
+	/**
+	 * @refresh:
+	 *
+	 * Refresh rate, in Hertz.
+	 */
 	int refresh;
+
+	/**
+	 * @rb:
+	 *
+	 * Do we need to use reduced blanking?
+	 */
 	bool rb;
+
+	/**
+	 * @interlace:
+	 *
+	 * The mode is interlaced.
+	 */
 	bool interlace;
+
+	/**
+	 * @cvt:
+	 *
+	 * The timings will be calculated using the VESA Coordinated
+	 * Video Timings instead of looking up the mode from a table.
+	 */
 	bool cvt;
+
+	/**
+	 * @margins:
+	 *
+	 * Add margins to the mode calculation (1.8% of xres rounded
+	 * down to 8 pixels and 1.8% of yres).
+	 */
 	bool margins;
+
+	/**
+	 * @force:
+	 *
+	 * Ignore the hotplug state of the connector, and force its
+	 * state to one of the DRM_FORCE_* values.
+	 */
 	enum drm_connector_force force;
+
+	/**
+	 * @rotation_reflection:
+	 *
+	 * Initial rotation and reflection of the mode setup from the
+	 * command line. See DRM_MODE_ROTATE_* and
+	 * DRM_MODE_REFLECT_*. The only rotations supported are
+	 * DRM_MODE_ROTATE_0 and DRM_MODE_ROTATE_180.
+	 */
+	unsigned int rotation_reflection;
+
+	/**
+	 * @tv_margins: TV margins to apply to the mode.
+	 */
+	struct drm_connector_tv_margins tv_margins;
 };
 
 /**
@@ -1062,12 +1214,6 @@ struct drm_connector {
 	struct drm_property *vrr_capable_property;
 
 	/**
-	 * @content_protection_property: DRM ENUM property for content
-	 * protection. See drm_connector_attach_content_protection_property().
-	 */
-	struct drm_property *content_protection_property;
-
-	/**
 	 * @colorspace_property: Connector property to set the suitable
 	 * colorspace supported by the sink.
 	 */
@@ -1239,6 +1385,9 @@ struct drm_connector {
 	 * &drm_mode_config.connector_free_work.
 	 */
 	struct llist_node free_node;
+
+	/** @hdr_sink_metadata: HDR Metadata Information read from sink */
+	struct hdr_sink_metadata hdr_sink_metadata;
 };
 
 #define obj_to_connector(x) container_of(x, struct drm_connector, base)
@@ -1345,8 +1494,6 @@ int drm_connector_attach_scaling_mode_property(struct drm_connector *connector,
 					       u32 scaling_mode_mask);
 int drm_connector_attach_vrr_capable_property(
 		struct drm_connector *connector);
-int drm_connector_attach_content_protection_property(
-		struct drm_connector *connector);
 int drm_mode_create_aspect_ratio_property(struct drm_device *dev);
 int drm_mode_create_colorspace_property(struct drm_connector *connector);
 int drm_mode_create_content_type_property(struct drm_device *dev);
diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h
index 58ad983d7cd6..128d8b210621 100644
--- a/include/drm/drm_crtc.h
+++ b/include/drm/drm_crtc.h
@@ -39,6 +39,7 @@
 #include <drm/drm_framebuffer.h>
 #include <drm/drm_modes.h>
 #include <drm/drm_connector.h>
+#include <drm/drm_device.h>
 #include <drm/drm_property.h>
 #include <drm/drm_bridge.h>
 #include <drm/drm_edid.h>
@@ -53,6 +54,7 @@ struct drm_mode_set;
 struct drm_file;
 struct drm_clip_rect;
 struct drm_printer;
+struct drm_self_refresh_data;
 struct device_node;
 struct dma_fence;
 struct edid;
@@ -300,6 +302,17 @@ struct drm_crtc_state {
 	bool vrr_enabled;
 
 	/**
+	 * @self_refresh_active:
+	 *
+	 * Used by the self refresh helpers to denote when a self refresh
+	 * transition is occurring. This will be set on enable/disable callbacks
+	 * when self refresh is being enabled or disabled. In some cases, it may
+	 * not be desirable to fully shut off the crtc during self refresh.
+	 * CRTC's can inspect this flag and determine the best course of action.
+	 */
+	bool self_refresh_active;
+
+	/**
 	 * @event:
 	 *
 	 * Optional pointer to a DRM event to signal upon completion of the
@@ -1087,6 +1100,13 @@ struct drm_crtc {
 	 * The name of the CRTC's fence timeline.
 	 */
 	char timeline_name[32];
+
+	/**
+	 * @self_refresh_data: Holds the state for the self refresh helpers
+	 *
+	 * Initialized via drm_self_refresh_helper_register().
+	 */
+	struct drm_self_refresh_data *self_refresh_data;
 };
 
 /**
diff --git a/include/drm/drm_debugfs.h b/include/drm/drm_debugfs.h
index ac0f75df1ac9..7501e323d383 100644
--- a/include/drm/drm_debugfs.h
+++ b/include/drm/drm_debugfs.h
@@ -32,6 +32,8 @@
 #ifndef _DRM_DEBUGFS_H_
 #define _DRM_DEBUGFS_H_
 
+#include <linux/types.h>
+#include <linux/seq_file.h>
 /**
  * struct drm_info_list - debugfs info list entry
  *
diff --git a/include/drm/drm_device.h b/include/drm/drm_device.h
index 7f9ef709b2b6..1acfc3bbd3fb 100644
--- a/include/drm/drm_device.h
+++ b/include/drm/drm_device.h
@@ -17,6 +17,7 @@ struct drm_vblank_crtc;
 struct drm_sg_mem;
 struct drm_local_map;
 struct drm_vma_offset_manager;
+struct drm_vram_mm;
 struct drm_fb_helper;
 
 struct inode;
@@ -286,6 +287,9 @@ struct drm_device {
 	/** @vma_offset_manager: GEM information */
 	struct drm_vma_offset_manager *vma_offset_manager;
 
+	/** @vram_mm: VRAM MM memory manager */
+	struct drm_vram_mm *vram_mm;
+
 	/**
 	 * @switch_power_state:
 	 *
diff --git a/include/drm/drm_displayid.h b/include/drm/drm_displayid.h
index c0d4df6a606f..9d3b745c3107 100644
--- a/include/drm/drm_displayid.h
+++ b/include/drm/drm_displayid.h
@@ -40,6 +40,7 @@
 #define DATA_BLOCK_DISPLAY_INTERFACE 0x0f
 #define DATA_BLOCK_STEREO_DISPLAY_INTERFACE 0x10
 #define DATA_BLOCK_TILED_DISPLAY 0x12
+#define DATA_BLOCK_CTA 0x81
 
 #define DATA_BLOCK_VENDOR_SPECIFIC 0x7f
 
@@ -90,4 +91,13 @@ struct displayid_detailed_timing_block {
 	struct displayid_block base;
 	struct displayid_detailed_timings_1 timings[0];
 };
+
+#define for_each_displayid_db(displayid, block, idx, length) \
+	for ((block) = (struct displayid_block *)&(displayid)[idx]; \
+	     (idx) + sizeof(struct displayid_block) <= (length) && \
+	     (idx) + sizeof(struct displayid_block) + (block)->num_bytes <= (length) && \
+	     (block)->num_bytes > 0; \
+	     (idx) += (block)->num_bytes + sizeof(struct displayid_block), \
+	     (block) = (struct displayid_block *)&(displayid)[idx])
+
 #endif
diff --git a/include/drm/drm_dp_helper.h b/include/drm/drm_dp_helper.h
index 97ce790a5b5a..7e52eb81284a 100644
--- a/include/drm/drm_dp_helper.h
+++ b/include/drm/drm_dp_helper.h
@@ -1083,17 +1083,30 @@ struct dp_sdp_header {
 #define EDP_SDP_HEADER_VALID_PAYLOAD_BYTES	0x1F
 #define DP_SDP_PPS_HEADER_PAYLOAD_BYTES_MINUS_1 0x7F
 
-struct edp_vsc_psr {
+/**
+ * struct dp_sdp - DP secondary data packet
+ * @sdp_header: DP secondary data packet header
+ * @db: DP secondaray data packet data blocks
+ * VSC SDP Payload for PSR
+ * db[0]: Stereo Interface
+ * db[1]: 0 - PSR State; 1 - Update RFB; 2 - CRC Valid
+ * db[2]: CRC value bits 7:0 of the R or Cr component
+ * db[3]: CRC value bits 15:8 of the R or Cr component
+ * db[4]: CRC value bits 7:0 of the G or Y component
+ * db[5]: CRC value bits 15:8 of the G or Y component
+ * db[6]: CRC value bits 7:0 of the B or Cb component
+ * db[7]: CRC value bits 15:8 of the B or Cb component
+ * db[8] - db[31]: Reserved
+ * VSC SDP Payload for Pixel Encoding/Colorimetry Format
+ * db[0] - db[15]: Reserved
+ * db[16]: Pixel Encoding and Colorimetry Formats
+ * db[17]: Dynamic Range and Component Bit Depth
+ * db[18]: Content Type
+ * db[19] - db[31]: Reserved
+ */
+struct dp_sdp {
 	struct dp_sdp_header sdp_header;
-	u8 DB0; /* Stereo Interface */
-	u8 DB1; /* 0 - PSR State; 1 - Update RFB; 2 - CRC Valid */
-	u8 DB2; /* CRC value bits 7:0 of the R or Cr component */
-	u8 DB3; /* CRC value bits 15:8 of the R or Cr component */
-	u8 DB4; /* CRC value bits 7:0 of the G or Y component */
-	u8 DB5; /* CRC value bits 15:8 of the G or Y component */
-	u8 DB6; /* CRC value bits 7:0 of the B or Cb component */
-	u8 DB7; /* CRC value bits 15:8 of the B or Cb component */
-	u8 DB8_31[24]; /* Reserved */
+	u8 db[32];
 } __packed;
 
 #define EDP_VSC_PSR_STATE_ACTIVE	(1<<0)
@@ -1401,6 +1414,13 @@ enum drm_dp_quirk {
 	 * driver still need to implement proper handling for such device.
 	 */
 	DP_DPCD_QUIRK_NO_PSR,
+	/**
+	 * @DP_DPCD_QUIRK_NO_SINK_COUNT:
+	 *
+	 * The device does not set SINK_COUNT to a non-zero value.
+	 * The driver should ignore SINK_COUNT during detection.
+	 */
+	DP_DPCD_QUIRK_NO_SINK_COUNT,
 };
 
 /**
diff --git a/include/drm/drm_edid.h b/include/drm/drm_edid.h
index 9d3b5b93102c..b9719418c3d2 100644
--- a/include/drm/drm_edid.h
+++ b/include/drm/drm_edid.h
@@ -25,6 +25,7 @@
 
 #include <linux/types.h>
 #include <linux/hdmi.h>
+#include <drm/drm_mode.h>
 
 struct drm_device;
 struct i2c_adapter;
@@ -176,21 +177,23 @@ struct detailed_timing {
 #define DRM_EDID_INPUT_BLANK_TO_BLACK  (1 << 4)
 #define DRM_EDID_INPUT_VIDEO_LEVEL     (3 << 5)
 #define DRM_EDID_INPUT_DIGITAL         (1 << 7)
-#define DRM_EDID_DIGITAL_DEPTH_MASK    (7 << 4)
-#define DRM_EDID_DIGITAL_DEPTH_UNDEF   (0 << 4)
-#define DRM_EDID_DIGITAL_DEPTH_6       (1 << 4)
-#define DRM_EDID_DIGITAL_DEPTH_8       (2 << 4)
-#define DRM_EDID_DIGITAL_DEPTH_10      (3 << 4)
-#define DRM_EDID_DIGITAL_DEPTH_12      (4 << 4)
-#define DRM_EDID_DIGITAL_DEPTH_14      (5 << 4)
-#define DRM_EDID_DIGITAL_DEPTH_16      (6 << 4)
-#define DRM_EDID_DIGITAL_DEPTH_RSVD    (7 << 4)
-#define DRM_EDID_DIGITAL_TYPE_UNDEF    (0)
-#define DRM_EDID_DIGITAL_TYPE_DVI      (1)
-#define DRM_EDID_DIGITAL_TYPE_HDMI_A   (2)
-#define DRM_EDID_DIGITAL_TYPE_HDMI_B   (3)
-#define DRM_EDID_DIGITAL_TYPE_MDDI     (4)
-#define DRM_EDID_DIGITAL_TYPE_DP       (5)
+#define DRM_EDID_DIGITAL_DEPTH_MASK    (7 << 4) /* 1.4 */
+#define DRM_EDID_DIGITAL_DEPTH_UNDEF   (0 << 4) /* 1.4 */
+#define DRM_EDID_DIGITAL_DEPTH_6       (1 << 4) /* 1.4 */
+#define DRM_EDID_DIGITAL_DEPTH_8       (2 << 4) /* 1.4 */
+#define DRM_EDID_DIGITAL_DEPTH_10      (3 << 4) /* 1.4 */
+#define DRM_EDID_DIGITAL_DEPTH_12      (4 << 4) /* 1.4 */
+#define DRM_EDID_DIGITAL_DEPTH_14      (5 << 4) /* 1.4 */
+#define DRM_EDID_DIGITAL_DEPTH_16      (6 << 4) /* 1.4 */
+#define DRM_EDID_DIGITAL_DEPTH_RSVD    (7 << 4) /* 1.4 */
+#define DRM_EDID_DIGITAL_TYPE_MASK     (7 << 0) /* 1.4 */
+#define DRM_EDID_DIGITAL_TYPE_UNDEF    (0 << 0) /* 1.4 */
+#define DRM_EDID_DIGITAL_TYPE_DVI      (1 << 0) /* 1.4 */
+#define DRM_EDID_DIGITAL_TYPE_HDMI_A   (2 << 0) /* 1.4 */
+#define DRM_EDID_DIGITAL_TYPE_HDMI_B   (3 << 0) /* 1.4 */
+#define DRM_EDID_DIGITAL_TYPE_MDDI     (4 << 0) /* 1.4 */
+#define DRM_EDID_DIGITAL_TYPE_DP       (5 << 0) /* 1.4 */
+#define DRM_EDID_DIGITAL_DFP_1_X       (1 << 0) /* 1.3 */
 
 #define DRM_EDID_FEATURE_DEFAULT_GTF      (1 << 0)
 #define DRM_EDID_FEATURE_PREFERRED_TIMING (1 << 1)
@@ -370,6 +373,10 @@ drm_hdmi_avi_infoframe_quant_range(struct hdmi_avi_infoframe *frame,
 				   const struct drm_display_mode *mode,
 				   enum hdmi_quantization_range rgb_quant_range);
 
+int
+drm_hdmi_infoframe_set_hdr_metadata(struct hdmi_drm_infoframe *frame,
+				    const struct drm_connector_state *conn_state);
+
 /**
  * drm_eld_mnl - Get ELD monitor name length in bytes.
  * @eld: pointer to an eld memory structure with mnl set
@@ -471,6 +478,7 @@ struct edid *drm_get_edid_switcheroo(struct drm_connector *connector,
 				     struct i2c_adapter *adapter);
 struct edid *drm_edid_duplicate(const struct edid *edid);
 int drm_add_edid_modes(struct drm_connector *connector, struct edid *edid);
+int drm_add_override_edid_modes(struct drm_connector *connector);
 
 u8 drm_match_cea_mode(const struct drm_display_mode *to_match);
 enum hdmi_picture_aspect drm_get_cea_aspect_ratio(const u8 video_code);
diff --git a/include/drm/drm_fb_helper.h b/include/drm/drm_fb_helper.h
index 40af2866c26a..c8a8ae2a678a 100644
--- a/include/drm/drm_fb_helper.h
+++ b/include/drm/drm_fb_helper.h
@@ -43,17 +43,6 @@ enum mode_set_atomic {
 	ENTER_ATOMIC_MODE_SET,
 };
 
-struct drm_fb_offset {
-	int x, y;
-};
-
-struct drm_fb_helper_crtc {
-	struct drm_mode_set mode_set;
-	struct drm_display_mode *desired_mode;
-	int x, y;
-	int rotation;
-};
-
 /**
  * struct drm_fb_helper_surface_size - describes fbdev size and scanout surface size
  * @fb_width: fbdev width
@@ -104,18 +93,10 @@ struct drm_fb_helper_funcs {
 			struct drm_fb_helper_surface_size *sizes);
 };
 
-struct drm_fb_helper_connector {
-	struct drm_connector *connector;
-};
-
 /**
  * struct drm_fb_helper - main structure to emulate fbdev on top of KMS
  * @fb: Scanout framebuffer object
  * @dev: DRM device
- * @crtc_count: number of possible CRTCs
- * @crtc_info: per-CRTC helper state (mode, x/y offset, etc)
- * @connector_count: number of connected connectors
- * @connector_info_alloc_count: size of connector_info
  * @funcs: driver callbacks for fb helper
  * @fbdev: emulated fbdev device info struct
  * @pseudo_palette: fake palette of 16 colors
@@ -147,24 +128,6 @@ struct drm_fb_helper {
 
 	struct drm_framebuffer *fb;
 	struct drm_device *dev;
-	int crtc_count;
-	struct drm_fb_helper_crtc *crtc_info;
-	int connector_count;
-	int connector_info_alloc_count;
-	/**
-	 * @sw_rotations:
-	 * Bitmask of all rotations requested for panel-orientation which
-	 * could not be handled in hardware. If only one bit is set
-	 * fbdev->fbcon_rotate_hint gets set to the requested rotation.
-	 */
-	int sw_rotations;
-	/**
-	 * @connector_info:
-	 *
-	 * Array of per-connector information. Do not iterate directly, but use
-	 * drm_fb_helper_for_each_connector.
-	 */
-	struct drm_fb_helper_connector **connector_info;
 	const struct drm_fb_helper_funcs *funcs;
 	struct fb_info *fbdev;
 	u32 pseudo_palette[17];
@@ -304,18 +267,8 @@ int drm_fb_helper_ioctl(struct fb_info *info, unsigned int cmd,
 
 int drm_fb_helper_hotplug_event(struct drm_fb_helper *fb_helper);
 int drm_fb_helper_initial_config(struct drm_fb_helper *fb_helper, int bpp_sel);
-int drm_fb_helper_single_add_all_connectors(struct drm_fb_helper *fb_helper);
 int drm_fb_helper_debug_enter(struct fb_info *info);
 int drm_fb_helper_debug_leave(struct fb_info *info);
-struct drm_display_mode *
-drm_has_preferred_mode(struct drm_fb_helper_connector *fb_connector,
-			int width, int height);
-struct drm_display_mode *
-drm_pick_cmdline_mode(struct drm_fb_helper_connector *fb_helper_conn);
-
-int drm_fb_helper_add_one_connector(struct drm_fb_helper *fb_helper, struct drm_connector *connector);
-int drm_fb_helper_remove_one_connector(struct drm_fb_helper *fb_helper,
-				       struct drm_connector *connector);
 
 int drm_fb_helper_fbdev_setup(struct drm_device *dev,
 			      struct drm_fb_helper *fb_helper,
@@ -490,12 +443,6 @@ static inline int drm_fb_helper_initial_config(struct drm_fb_helper *fb_helper,
 	return 0;
 }
 
-static inline int
-drm_fb_helper_single_add_all_connectors(struct drm_fb_helper *fb_helper)
-{
-	return 0;
-}
-
 static inline int drm_fb_helper_debug_enter(struct fb_info *info)
 {
 	return 0;
@@ -506,34 +453,6 @@ static inline int drm_fb_helper_debug_leave(struct fb_info *info)
 	return 0;
 }
 
-static inline struct drm_display_mode *
-drm_has_preferred_mode(struct drm_fb_helper_connector *fb_connector,
-		       int width, int height)
-{
-	return NULL;
-}
-
-static inline struct drm_display_mode *
-drm_pick_cmdline_mode(struct drm_fb_helper_connector *fb_helper_conn,
-		      int width, int height)
-{
-	return NULL;
-}
-
-static inline int
-drm_fb_helper_add_one_connector(struct drm_fb_helper *fb_helper,
-				struct drm_connector *connector)
-{
-	return 0;
-}
-
-static inline int
-drm_fb_helper_remove_one_connector(struct drm_fb_helper *fb_helper,
-				   struct drm_connector *connector)
-{
-	return 0;
-}
-
 static inline int
 drm_fb_helper_fbdev_setup(struct drm_device *dev,
 			  struct drm_fb_helper *fb_helper,
@@ -575,6 +494,27 @@ drm_fbdev_generic_setup(struct drm_device *dev, unsigned int preferred_bpp)
 
 #endif
 
+/* TODO: There's a todo entry to remove these three */
+static inline int
+drm_fb_helper_single_add_all_connectors(struct drm_fb_helper *fb_helper)
+{
+	return 0;
+}
+
+static inline int
+drm_fb_helper_add_one_connector(struct drm_fb_helper *fb_helper,
+				struct drm_connector *connector)
+{
+	return 0;
+}
+
+static inline int
+drm_fb_helper_remove_one_connector(struct drm_fb_helper *fb_helper,
+				   struct drm_connector *connector)
+{
+	return 0;
+}
+
 /**
  * drm_fb_helper_remove_conflicting_framebuffers - remove firmware-configured framebuffers
  * @a: memory range, users of which are to be removed
diff --git a/include/drm/drm_fourcc.h b/include/drm/drm_fourcc.h
index b3d9d88ab290..306d1efeb5e0 100644
--- a/include/drm/drm_fourcc.h
+++ b/include/drm/drm_fourcc.h
@@ -260,6 +260,50 @@ drm_format_info_is_yuv_sampling_444(const struct drm_format_info *info)
 	return info->is_yuv && info->hsub == 1 && info->vsub == 1;
 }
 
+/**
+ * drm_format_info_plane_width - width of the plane given the first plane
+ * @info: pixel format info
+ * @width: width of the first plane
+ * @plane: plane index
+ *
+ * Returns:
+ * The width of @plane, given that the width of the first plane is @width.
+ */
+static inline
+int drm_format_info_plane_width(const struct drm_format_info *info, int width,
+				int plane)
+{
+	if (!info || plane >= info->num_planes)
+		return 0;
+
+	if (plane == 0)
+		return width;
+
+	return width / info->hsub;
+}
+
+/**
+ * drm_format_info_plane_height - height of the plane given the first plane
+ * @info: pixel format info
+ * @height: height of the first plane
+ * @plane: plane index
+ *
+ * Returns:
+ * The height of @plane, given that the height of the first plane is @height.
+ */
+static inline
+int drm_format_info_plane_height(const struct drm_format_info *info, int height,
+				 int plane)
+{
+	if (!info || plane >= info->num_planes)
+		return 0;
+
+	if (plane == 0)
+		return height;
+
+	return height / info->vsub;
+}
+
 const struct drm_format_info *__drm_format_info(u32 format);
 const struct drm_format_info *drm_format_info(u32 format);
 const struct drm_format_info *
@@ -268,12 +312,6 @@ drm_get_format_info(struct drm_device *dev,
 uint32_t drm_mode_legacy_fb_format(uint32_t bpp, uint32_t depth);
 uint32_t drm_driver_legacy_fb_format(struct drm_device *dev,
 				     uint32_t bpp, uint32_t depth);
-int drm_format_num_planes(uint32_t format);
-int drm_format_plane_cpp(uint32_t format, int plane);
-int drm_format_horz_chroma_subsampling(uint32_t format);
-int drm_format_vert_chroma_subsampling(uint32_t format);
-int drm_format_plane_width(int width, uint32_t format, int plane);
-int drm_format_plane_height(int height, uint32_t format, int plane);
 unsigned int drm_format_info_block_width(const struct drm_format_info *info,
 					 int plane);
 unsigned int drm_format_info_block_height(const struct drm_format_info *info,
diff --git a/include/drm/drm_framebuffer.h b/include/drm/drm_framebuffer.h
index c23016748e3f..c0e0256e3e98 100644
--- a/include/drm/drm_framebuffer.h
+++ b/include/drm/drm_framebuffer.h
@@ -87,6 +87,9 @@ struct drm_framebuffer_funcs {
 	 * for more information as all the semantics and arguments have a one to
 	 * one mapping on this function.
 	 *
+	 * Atomic drivers should use drm_atomic_helper_dirtyfb() to implement
+	 * this hook.
+	 *
 	 * RETURNS:
 	 *
 	 * 0 on success or a negative error code on failure.
diff --git a/include/drm/drm_gem.h b/include/drm/drm_gem.h
index 5047c7ee25f5..a9121fe66ea2 100644
--- a/include/drm/drm_gem.h
+++ b/include/drm/drm_gem.h
@@ -401,9 +401,4 @@ int drm_gem_dumb_destroy(struct drm_file *file,
 			 struct drm_device *dev,
 			 uint32_t handle);
 
-int drm_gem_pin(struct drm_gem_object *obj);
-void drm_gem_unpin(struct drm_gem_object *obj);
-void *drm_gem_vmap(struct drm_gem_object *obj);
-void drm_gem_vunmap(struct drm_gem_object *obj, void *vaddr);
-
 #endif /* __DRM_GEM_H__ */
diff --git a/include/drm/drm_gem_vram_helper.h b/include/drm/drm_gem_vram_helper.h
new file mode 100644
index 000000000000..9581ea0a4f7e
--- /dev/null
+++ b/include/drm/drm_gem_vram_helper.h
@@ -0,0 +1,153 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#ifndef DRM_GEM_VRAM_HELPER_H
+#define DRM_GEM_VRAM_HELPER_H
+
+#include <drm/drm_gem.h>
+#include <drm/ttm/ttm_bo_api.h>
+#include <drm/ttm/ttm_placement.h>
+#include <linux/kernel.h> /* for container_of() */
+
+struct drm_mode_create_dumb;
+struct drm_vram_mm_funcs;
+struct filp;
+struct vm_area_struct;
+
+#define DRM_GEM_VRAM_PL_FLAG_VRAM	TTM_PL_FLAG_VRAM
+#define DRM_GEM_VRAM_PL_FLAG_SYSTEM	TTM_PL_FLAG_SYSTEM
+
+/*
+ * Buffer-object helpers
+ */
+
+/**
+ * struct drm_gem_vram_object - GEM object backed by VRAM
+ * @gem:	GEM object
+ * @bo:		TTM buffer object
+ * @kmap:	Mapping information for @bo
+ * @placement:	TTM placement information. Supported placements are \
+	%TTM_PL_VRAM and %TTM_PL_SYSTEM
+ * @placements:	TTM placement information.
+ * @pin_count:	Pin counter
+ *
+ * The type struct drm_gem_vram_object represents a GEM object that is
+ * backed by VRAM. It can be used for simple framebuffer devices with
+ * dedicated memory. The buffer object can be evicted to system memory if
+ * video memory becomes scarce.
+ */
+struct drm_gem_vram_object {
+	struct drm_gem_object gem;
+	struct ttm_buffer_object bo;
+	struct ttm_bo_kmap_obj kmap;
+
+	/* Supported placements are %TTM_PL_VRAM and %TTM_PL_SYSTEM */
+	struct ttm_placement placement;
+	struct ttm_place placements[2];
+
+	int pin_count;
+};
+
+/**
+ * Returns the container of type &struct drm_gem_vram_object
+ * for field bo.
+ * @bo:		the VRAM buffer object
+ * Returns:	The containing GEM VRAM object
+ */
+static inline struct drm_gem_vram_object *drm_gem_vram_of_bo(
+	struct ttm_buffer_object *bo)
+{
+	return container_of(bo, struct drm_gem_vram_object, bo);
+}
+
+/**
+ * Returns the container of type &struct drm_gem_vram_object
+ * for field gem.
+ * @gem:	the GEM object
+ * Returns:	The containing GEM VRAM object
+ */
+static inline struct drm_gem_vram_object *drm_gem_vram_of_gem(
+	struct drm_gem_object *gem)
+{
+	return container_of(gem, struct drm_gem_vram_object, gem);
+}
+
+struct drm_gem_vram_object *drm_gem_vram_create(struct drm_device *dev,
+						struct ttm_bo_device *bdev,
+						size_t size,
+						unsigned long pg_align,
+						bool interruptible);
+void drm_gem_vram_put(struct drm_gem_vram_object *gbo);
+u64 drm_gem_vram_mmap_offset(struct drm_gem_vram_object *gbo);
+s64 drm_gem_vram_offset(struct drm_gem_vram_object *gbo);
+int drm_gem_vram_pin(struct drm_gem_vram_object *gbo, unsigned long pl_flag);
+int drm_gem_vram_unpin(struct drm_gem_vram_object *gbo);
+void *drm_gem_vram_kmap(struct drm_gem_vram_object *gbo, bool map,
+			bool *is_iomem);
+void drm_gem_vram_kunmap(struct drm_gem_vram_object *gbo);
+
+int drm_gem_vram_fill_create_dumb(struct drm_file *file,
+				  struct drm_device *dev,
+				  struct ttm_bo_device *bdev,
+				  unsigned long pg_align,
+				  bool interruptible,
+				  struct drm_mode_create_dumb *args);
+
+/*
+ * Helpers for struct ttm_bo_driver
+ */
+
+void drm_gem_vram_bo_driver_evict_flags(struct ttm_buffer_object *bo,
+					struct ttm_placement *pl);
+
+int drm_gem_vram_bo_driver_verify_access(struct ttm_buffer_object *bo,
+					 struct file *filp);
+
+extern const struct drm_vram_mm_funcs drm_gem_vram_mm_funcs;
+
+/*
+ * Helpers for struct drm_driver
+ */
+
+void drm_gem_vram_driver_gem_free_object_unlocked(struct drm_gem_object *gem);
+int drm_gem_vram_driver_dumb_create(struct drm_file *file,
+				    struct drm_device *dev,
+				    struct drm_mode_create_dumb *args);
+int drm_gem_vram_driver_dumb_mmap_offset(struct drm_file *file,
+					 struct drm_device *dev,
+					 uint32_t handle, uint64_t *offset);
+
+/**
+ * define DRM_GEM_VRAM_DRIVER - default callback functions for \
+	&struct drm_driver
+ *
+ * Drivers that use VRAM MM and GEM VRAM can use this macro to initialize
+ * &struct drm_driver with default functions.
+ */
+#define DRM_GEM_VRAM_DRIVER \
+	.gem_free_object_unlocked = \
+		drm_gem_vram_driver_gem_free_object_unlocked, \
+	.dumb_create		  = drm_gem_vram_driver_dumb_create, \
+	.dumb_map_offset	  = drm_gem_vram_driver_dumb_mmap_offset
+
+/*
+ * PRIME helpers for struct drm_driver
+ */
+
+int drm_gem_vram_driver_gem_prime_pin(struct drm_gem_object *obj);
+void drm_gem_vram_driver_gem_prime_unpin(struct drm_gem_object *obj);
+void *drm_gem_vram_driver_gem_prime_vmap(struct drm_gem_object *obj);
+void drm_gem_vram_driver_gem_prime_vunmap(struct drm_gem_object *obj,
+					  void *vaddr);
+int drm_gem_vram_driver_gem_prime_mmap(struct drm_gem_object *obj,
+				       struct vm_area_struct *vma);
+
+#define DRM_GEM_VRAM_DRIVER_PRIME \
+	.gem_prime_export = drm_gem_prime_export, \
+	.gem_prime_import = drm_gem_prime_import, \
+	.gem_prime_pin	  = drm_gem_vram_driver_gem_prime_pin, \
+	.gem_prime_unpin  = drm_gem_vram_driver_gem_prime_unpin, \
+	.gem_prime_vmap	  = drm_gem_vram_driver_gem_prime_vmap, \
+	.gem_prime_vunmap = drm_gem_vram_driver_gem_prime_vunmap, \
+	.gem_prime_mmap	  = drm_gem_vram_driver_gem_prime_mmap
+
+#endif
diff --git a/include/drm/drm_hdcp.h b/include/drm/drm_hdcp.h
index f243408ecf26..13771a496e2b 100644
--- a/include/drm/drm_hdcp.h
+++ b/include/drm/drm_hdcp.h
@@ -252,17 +252,44 @@ struct hdcp2_rep_stream_ready {
  * host format and back
  */
 static inline
-u32 drm_hdcp2_seq_num_to_u32(u8 seq_num[HDCP_2_2_SEQ_NUM_LEN])
+u32 drm_hdcp_be24_to_cpu(const u8 seq_num[HDCP_2_2_SEQ_NUM_LEN])
 {
 	return (u32)(seq_num[2] | seq_num[1] << 8 | seq_num[0] << 16);
 }
 
 static inline
-void drm_hdcp2_u32_to_seq_num(u8 seq_num[HDCP_2_2_SEQ_NUM_LEN], u32 val)
+void drm_hdcp_cpu_to_be24(u8 seq_num[HDCP_2_2_SEQ_NUM_LEN], u32 val)
 {
 	seq_num[0] = val >> 16;
 	seq_num[1] = val >> 8;
 	seq_num[2] = val;
 }
 
+#define DRM_HDCP_SRM_GEN1_MAX_BYTES		(5 * 1024)
+#define DRM_HDCP_1_4_SRM_ID			0x8
+#define DRM_HDCP_SRM_ID_MASK			(0xF << 4)
+#define DRM_HDCP_1_4_VRL_LENGTH_SIZE		3
+#define DRM_HDCP_1_4_DCP_SIG_SIZE		40
+#define DRM_HDCP_2_SRM_ID			0x9
+#define DRM_HDCP_2_INDICATOR			0x1
+#define DRM_HDCP_2_INDICATOR_MASK		0xF
+#define DRM_HDCP_2_VRL_LENGTH_SIZE		3
+#define DRM_HDCP_2_DCP_SIG_SIZE			384
+#define DRM_HDCP_2_NO_OF_DEV_PLUS_RESERVED_SZ	4
+#define DRM_HDCP_2_KSV_COUNT_2_LSBITS(byte)	(((byte) & 0xC) >> 6)
+
+struct hdcp_srm_header {
+	u8 srm_id;
+	u8 reserved;
+	__be16 srm_version;
+	u8 srm_gen_no;
+} __packed;
+
+struct drm_device;
+struct drm_connector;
+
+bool drm_hdcp_check_ksvs_revoked(struct drm_device *dev,
+				 u8 *ksvs, u32 ksv_count);
+int drm_connector_attach_content_protection_property(
+		struct drm_connector *connector);
 #endif
diff --git a/include/drm/drm_legacy.h b/include/drm/drm_legacy.h
index 2182a56ac421..58dc0c04bf99 100644
--- a/include/drm/drm_legacy.h
+++ b/include/drm/drm_legacy.h
@@ -1,11 +1,5 @@
 #ifndef __DRM_DRM_LEGACY_H__
 #define __DRM_DRM_LEGACY_H__
-
-#include <drm/drm_auth.h>
-#include <drm/drm_hashtab.h>
-
-struct drm_device;
-
 /*
  * Legacy driver interfaces for the Direct Rendering Manager
  *
@@ -39,6 +33,12 @@ struct drm_device;
  * OTHER DEALINGS IN THE SOFTWARE.
  */
 
+#include <drm/drm.h>
+#include <drm/drm_auth.h>
+#include <drm/drm_hashtab.h>
+
+struct drm_device;
+struct file;
 
 /*
  * Legacy Support for palateontologic DRM drivers
diff --git a/include/drm/drm_mode_config.h b/include/drm/drm_mode_config.h
index 7f60e8eb269a..759d462d028b 100644
--- a/include/drm/drm_mode_config.h
+++ b/include/drm/drm_mode_config.h
@@ -836,6 +836,19 @@ struct drm_mode_config {
 	 */
 	struct drm_property *writeback_out_fence_ptr_property;
 
+	/**
+	 * @hdr_output_metadata_property: Connector property containing hdr
+	 * metatada. This will be provided by userspace compositors based
+	 * on HDR content
+	 */
+	struct drm_property *hdr_output_metadata_property;
+
+	/**
+	 * @content_protection_property: DRM ENUM property for content
+	 * protection. See drm_connector_attach_content_protection_property().
+	 */
+	struct drm_property *content_protection_property;
+
 	/* dumb ioctl parameters */
 	uint32_t preferred_depth, prefer_shadow;
 
diff --git a/include/drm/drm_modeset_helper_vtables.h b/include/drm/drm_modeset_helper_vtables.h
index f7bbd0b0ecd1..6b18c8adfe9d 100644
--- a/include/drm/drm_modeset_helper_vtables.h
+++ b/include/drm/drm_modeset_helper_vtables.h
@@ -680,6 +680,52 @@ struct drm_encoder_helper_funcs {
 					    struct drm_connector *connector);
 
 	/**
+	 * @atomic_disable:
+	 *
+	 * This callback should be used to disable the encoder. With the atomic
+	 * drivers it is called before this encoder's CRTC has been shut off
+	 * using their own &drm_crtc_helper_funcs.atomic_disable hook. If that
+	 * sequence is too simple drivers can just add their own driver private
+	 * encoder hooks and call them from CRTC's callback by looping over all
+	 * encoders connected to it using for_each_encoder_on_crtc().
+	 *
+	 * This callback is a variant of @disable that provides the atomic state
+	 * to the driver. If @atomic_disable is implemented, @disable is not
+	 * called by the helpers.
+	 *
+	 * This hook is only used by atomic helpers. Atomic drivers don't need
+	 * to implement it if there's no need to disable anything at the encoder
+	 * level. To ensure that runtime PM handling (using either DPMS or the
+	 * new "ACTIVE" property) works @atomic_disable must be the inverse of
+	 * @atomic_enable.
+	 */
+	void (*atomic_disable)(struct drm_encoder *encoder,
+			       struct drm_atomic_state *state);
+
+	/**
+	 * @atomic_enable:
+	 *
+	 * This callback should be used to enable the encoder. It is called
+	 * after this encoder's CRTC has been enabled using their own
+	 * &drm_crtc_helper_funcs.atomic_enable hook. If that sequence is
+	 * too simple drivers can just add their own driver private encoder
+	 * hooks and call them from CRTC's callback by looping over all encoders
+	 * connected to it using for_each_encoder_on_crtc().
+	 *
+	 * This callback is a variant of @enable that provides the atomic state
+	 * to the driver. If @atomic_enable is implemented, @enable is not
+	 * called by the helpers.
+	 *
+	 * This hook is only used by atomic helpers, it is the opposite of
+	 * @atomic_disable. Atomic drivers don't need to implement it if there's
+	 * no need to enable anything at the encoder level. To ensure that
+	 * runtime PM handling works @atomic_enable must be the inverse of
+	 * @atomic_disable.
+	 */
+	void (*atomic_enable)(struct drm_encoder *encoder,
+			      struct drm_atomic_state *state);
+
+	/**
 	 * @disable:
 	 *
 	 * This callback should be used to disable the encoder. With the atomic
@@ -695,6 +741,9 @@ struct drm_encoder_helper_funcs {
 	 * handling (using either DPMS or the new "ACTIVE" property) works
 	 * @disable must be the inverse of @enable for atomic drivers.
 	 *
+	 * For atomic drivers also consider @atomic_disable and save yourself
+	 * from having to read the NOTE below!
+	 *
 	 * NOTE:
 	 *
 	 * With legacy CRTC helpers there's a big semantic difference between
@@ -719,11 +768,11 @@ struct drm_encoder_helper_funcs {
 	 * hooks and call them from CRTC's callback by looping over all encoders
 	 * connected to it using for_each_encoder_on_crtc().
 	 *
-	 * This hook is used only by atomic helpers, for symmetry with @disable.
-	 * Atomic drivers don't need to implement it if there's no need to
-	 * enable anything at the encoder level. To ensure that runtime PM handling
-	 * (using either DPMS or the new "ACTIVE" property) works
-	 * @enable must be the inverse of @disable for atomic drivers.
+	 * This hook is only used by atomic helpers, it is the opposite of
+	 * @disable. Atomic drivers don't need to implement it if there's no
+	 * need to enable anything at the encoder level. To ensure that
+	 * runtime PM handling (using either DPMS or the new "ACTIVE" property)
+	 * works @enable must be the inverse of @disable for atomic drivers.
 	 */
 	void (*enable)(struct drm_encoder *encoder);
 
@@ -979,7 +1028,7 @@ struct drm_connector_helper_funcs {
 	 * deadlock.
 	 */
 	int (*atomic_check)(struct drm_connector *connector,
-			    struct drm_connector_state *state);
+			    struct drm_atomic_state *state);
 
 	/**
 	 * @atomic_commit:
diff --git a/include/drm/drm_plane.h b/include/drm/drm_plane.h
index 6078c700d9ba..cd5903ad33f7 100644
--- a/include/drm/drm_plane.h
+++ b/include/drm/drm_plane.h
@@ -69,7 +69,7 @@ struct drm_plane_state {
 	 *
 	 * Optional fence to wait for before scanning out @fb. The core atomic
 	 * code will set this when userspace is using explicit fencing. Do not
-	 * write this directly for a driver's implicit fence, use
+	 * write this field directly for a driver's implicit fence, use
 	 * drm_atomic_set_fence_for_plane() to ensure that an explicit fence is
 	 * preserved.
 	 *
diff --git a/include/drm/drm_print.h b/include/drm/drm_print.h
index 3a4247319e63..a5d6f2f3e430 100644
--- a/include/drm/drm_print.h
+++ b/include/drm/drm_print.h
@@ -32,6 +32,8 @@
 #include <linux/device.h>
 #include <linux/debugfs.h>
 
+#include <drm/drm.h>
+
 /**
  * DOC: print
  *
diff --git a/include/drm/drm_self_refresh_helper.h b/include/drm/drm_self_refresh_helper.h
new file mode 100644
index 000000000000..397a583ccca7
--- /dev/null
+++ b/include/drm/drm_self_refresh_helper.h
@@ -0,0 +1,20 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright (C) 2019 Google, Inc.
+ *
+ * Authors:
+ * Sean Paul <seanpaul@chromium.org>
+ */
+#ifndef DRM_SELF_REFRESH_HELPER_H_
+#define DRM_SELF_REFRESH_HELPER_H_
+
+struct drm_atomic_state;
+struct drm_crtc;
+
+void drm_self_refresh_helper_alter_state(struct drm_atomic_state *state);
+
+int drm_self_refresh_helper_init(struct drm_crtc *crtc,
+				 unsigned int entry_delay_ms);
+
+void drm_self_refresh_helper_cleanup(struct drm_crtc *crtc);
+#endif
diff --git a/include/drm/drm_vram_mm_helper.h b/include/drm/drm_vram_mm_helper.h
new file mode 100644
index 000000000000..a8ffd8599b08
--- /dev/null
+++ b/include/drm/drm_vram_mm_helper.h
@@ -0,0 +1,102 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#ifndef DRM_VRAM_MM_HELPER_H
+#define DRM_VRAM_MM_HELPER_H
+
+#include <drm/ttm/ttm_bo_driver.h>
+
+struct drm_device;
+
+/**
+ * struct drm_vram_mm_funcs - Callback functions for &struct drm_vram_mm
+ * @evict_flags:	Provides an implementation for struct \
+	&ttm_bo_driver.evict_flags
+ * @verify_access:	Provides an implementation for \
+	struct &ttm_bo_driver.verify_access
+ *
+ * These callback function integrate VRAM MM with TTM buffer objects. New
+ * functions can be added if necessary.
+ */
+struct drm_vram_mm_funcs {
+	void (*evict_flags)(struct ttm_buffer_object *bo,
+			    struct ttm_placement *placement);
+	int (*verify_access)(struct ttm_buffer_object *bo, struct file *filp);
+};
+
+/**
+ * struct drm_vram_mm - An instance of VRAM MM
+ * @vram_base:	Base address of the managed video memory
+ * @vram_size:	Size of the managed video memory in bytes
+ * @bdev:	The TTM BO device.
+ * @funcs:	TTM BO functions
+ *
+ * The fields &struct drm_vram_mm.vram_base and
+ * &struct drm_vram_mm.vrm_size are managed by VRAM MM, but are
+ * available for public read access. Use the field
+ * &struct drm_vram_mm.bdev to access the TTM BO device.
+ */
+struct drm_vram_mm {
+	uint64_t vram_base;
+	size_t vram_size;
+
+	struct ttm_bo_device bdev;
+
+	const struct drm_vram_mm_funcs *funcs;
+};
+
+/**
+ * drm_vram_mm_of_bdev() - \
+	Returns the container of type &struct ttm_bo_device for field bdev.
+ * @bdev:	the TTM BO device
+ *
+ * Returns:
+ * The containing instance of &struct drm_vram_mm
+ */
+static inline struct drm_vram_mm *drm_vram_mm_of_bdev(
+	struct ttm_bo_device *bdev)
+{
+	return container_of(bdev, struct drm_vram_mm, bdev);
+}
+
+int drm_vram_mm_init(struct drm_vram_mm *vmm, struct drm_device *dev,
+		     uint64_t vram_base, size_t vram_size,
+		     const struct drm_vram_mm_funcs *funcs);
+void drm_vram_mm_cleanup(struct drm_vram_mm *vmm);
+
+int drm_vram_mm_mmap(struct file *filp, struct vm_area_struct *vma,
+		     struct drm_vram_mm *vmm);
+
+/*
+ * Helpers for integration with struct drm_device
+ */
+
+struct drm_vram_mm *drm_vram_helper_alloc_mm(
+	struct drm_device *dev, uint64_t vram_base, size_t vram_size,
+	const struct drm_vram_mm_funcs *funcs);
+void drm_vram_helper_release_mm(struct drm_device *dev);
+
+/*
+ * Helpers for &struct file_operations
+ */
+
+int drm_vram_mm_file_operations_mmap(
+	struct file *filp, struct vm_area_struct *vma);
+
+/**
+ * define DRM_VRAM_MM_FILE_OPERATIONS - default callback functions for \
+	&struct file_operations
+ *
+ * Drivers that use VRAM MM can use this macro to initialize
+ * &struct file_operations with default functions.
+ */
+#define DRM_VRAM_MM_FILE_OPERATIONS \
+	.llseek		= no_llseek, \
+	.read		= drm_read, \
+	.poll		= drm_poll, \
+	.unlocked_ioctl = drm_ioctl, \
+	.compat_ioctl	= drm_compat_ioctl, \
+	.mmap		= drm_vram_mm_file_operations_mmap, \
+	.open		= drm_open, \
+	.release	= drm_release \
+
+#endif
diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
index 0daca4d8dad9..57b4121c750a 100644
--- a/include/drm/gpu_scheduler.h
+++ b/include/drm/gpu_scheduler.h
@@ -167,9 +167,6 @@ struct drm_sched_fence *to_drm_sched_fence(struct dma_fence *f);
  * @sched: the scheduler instance on which this job is scheduled.
  * @s_fence: contains the fences for the scheduling of job.
  * @finish_cb: the callback for the finished fence.
- * @finish_work: schedules the function @drm_sched_job_finish once the job has
- *               finished to remove the job from the
- *               @drm_gpu_scheduler.ring_mirror_list.
  * @node: used to append this struct to the @drm_gpu_scheduler.ring_mirror_list.
  * @id: a unique id assigned to each job scheduled on the scheduler.
  * @karma: increment on every hang caused by this job. If this exceeds the hang
@@ -188,7 +185,6 @@ struct drm_sched_job {
 	struct drm_gpu_scheduler	*sched;
 	struct drm_sched_fence		*s_fence;
 	struct dma_fence_cb		finish_cb;
-	struct work_struct		finish_work;
 	struct list_head		node;
 	uint64_t			id;
 	atomic_t			karma;
@@ -263,6 +259,7 @@ struct drm_sched_backend_ops {
  *              guilty and it will be considered for scheduling further.
  * @num_jobs: the number of jobs in queue in the scheduler
  * @ready: marks if the underlying HW is ready to work
+ * @free_guilty: A hit to time out handler to free the guilty job.
  *
  * One scheduler is implemented for each hardware ring.
  */
@@ -283,6 +280,7 @@ struct drm_gpu_scheduler {
 	int				hang_limit;
 	atomic_t                        num_jobs;
 	bool			ready;
+	bool				free_guilty;
 };
 
 int drm_sched_init(struct drm_gpu_scheduler *sched,
@@ -296,7 +294,7 @@ int drm_sched_job_init(struct drm_sched_job *job,
 		       void *owner);
 void drm_sched_job_cleanup(struct drm_sched_job *job);
 void drm_sched_wakeup(struct drm_gpu_scheduler *sched);
-void drm_sched_stop(struct drm_gpu_scheduler *sched);
+void drm_sched_stop(struct drm_gpu_scheduler *sched, struct drm_sched_job *bad);
 void drm_sched_start(struct drm_gpu_scheduler *sched, bool full_recovery);
 void drm_sched_resubmit_jobs(struct drm_gpu_scheduler *sched);
 void drm_sched_increase_karma(struct drm_sched_job *bad);
diff --git a/include/drm/i915_pciids.h b/include/drm/i915_pciids.h
index 6477da22af28..6d60ea68c171 100644
--- a/include/drm/i915_pciids.h
+++ b/include/drm/i915_pciids.h
@@ -559,7 +559,6 @@
 #define INTEL_ICL_PORT_F_IDS(info) \
 	INTEL_VGA_DEVICE(0x8A50, info), \
 	INTEL_VGA_DEVICE(0x8A5C, info), \
-	INTEL_VGA_DEVICE(0x8A5D, info), \
 	INTEL_VGA_DEVICE(0x8A59, info),	\
 	INTEL_VGA_DEVICE(0x8A58, info),	\
 	INTEL_VGA_DEVICE(0x8A52, info), \
@@ -573,7 +572,8 @@
 
 #define INTEL_ICL_11_IDS(info) \
 	INTEL_ICL_PORT_F_IDS(info), \
-	INTEL_VGA_DEVICE(0x8A51, info)
+	INTEL_VGA_DEVICE(0x8A51, info), \
+	INTEL_VGA_DEVICE(0x8A5D, info)
 
 /* EHL */
 #define INTEL_EHL_IDS(info) \
diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h
index 49d9cdfc58f2..435d02f719a8 100644
--- a/include/drm/ttm/ttm_bo_api.h
+++ b/include/drm/ttm/ttm_bo_api.h
@@ -768,4 +768,14 @@ int ttm_bo_swapout(struct ttm_bo_global *glob,
 			struct ttm_operation_ctx *ctx);
 void ttm_bo_swapout_all(struct ttm_bo_device *bdev);
 int ttm_bo_wait_unreserved(struct ttm_buffer_object *bo);
+
+/* Default number of pre-faulted pages in the TTM fault handler */
+#define TTM_BO_VM_NUM_PREFAULT 16
+
+vm_fault_t ttm_bo_vm_reserve(struct ttm_buffer_object *bo,
+			     struct vm_fault *vmf);
+
+vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf,
+				    pgprot_t prot,
+				    pgoff_t num_prefault);
 #endif
diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
index 129dabbc002d..a2d810a2504d 100644
--- a/include/drm/ttm/ttm_bo_driver.h
+++ b/include/drm/ttm/ttm_bo_driver.h
@@ -442,6 +442,9 @@ extern struct ttm_bo_global {
  * @driver: Pointer to a struct ttm_bo_driver struct setup by the driver.
  * @man: An array of mem_type_managers.
  * @vma_manager: Address space manager
+ * @vm_ops: Pointer to the struct vm_operations_struct used for this
+ * device's VM operations. The driver may override this before the first
+ * mmap() call.
  * lru_lock: Spinlock that protects the buffer+device lru lists and
  * ddestroy lists.
  * @dev_mapping: A pointer to the struct address_space representing the
@@ -460,6 +463,7 @@ struct ttm_bo_device {
 	struct ttm_bo_global *glob;
 	struct ttm_bo_driver *driver;
 	struct ttm_mem_type_manager man[TTM_NUM_MEM_TYPES];
+	const struct vm_operations_struct *vm_ops;
 
 	/*
 	 * Protected by internal locks.
@@ -488,6 +492,8 @@ struct ttm_bo_device {
 	bool no_retry;
 };
 
+extern const struct vm_operations_struct ttm_bo_vm_ops;
+
 /**
  * struct ttm_lru_bulk_move_pos
  *
@@ -767,11 +773,12 @@ static inline int ttm_bo_reserve_slowpath(struct ttm_buffer_object *bo,
  */
 static inline void ttm_bo_unreserve(struct ttm_buffer_object *bo)
 {
-	if (!(bo->mem.placement & TTM_PL_FLAG_NO_EVICT)) {
-		spin_lock(&bo->bdev->glob->lru_lock);
+	spin_lock(&bo->bdev->glob->lru_lock);
+	if (list_empty(&bo->lru))
 		ttm_bo_add_to_lru(bo);
-		spin_unlock(&bo->bdev->glob->lru_lock);
-	}
+	else
+		ttm_bo_move_to_lru_tail(bo, NULL);
+	spin_unlock(&bo->bdev->glob->lru_lock);
 	reservation_object_unlock(bo->resv);
 }
 
diff --git a/include/drm/ttm/ttm_execbuf_util.h b/include/drm/ttm/ttm_execbuf_util.h
index 621615fa7728..7e46cc678e7e 100644
--- a/include/drm/ttm/ttm_execbuf_util.h
+++ b/include/drm/ttm/ttm_execbuf_util.h
@@ -70,6 +70,7 @@ extern void ttm_eu_backoff_reservation(struct ww_acquire_ctx *ticket,
  * @list:    thread private list of ttm_validate_buffer structs.
  * @intr:    should the wait be interruptible
  * @dups:    [out] optional list of duplicates.
+ * @del_lru: true if BOs should be removed from the LRU.
  *
  * Tries to reserve bos pointed to by the list entries for validation.
  * If the function returns 0, all buffers are marked as "unfenced",
@@ -98,7 +99,7 @@ extern void ttm_eu_backoff_reservation(struct ww_acquire_ctx *ticket,
 
 extern int ttm_eu_reserve_buffers(struct ww_acquire_ctx *ticket,
 				  struct list_head *list, bool intr,
-				  struct list_head *dups);
+				  struct list_head *dups, bool del_lru);
 
 /**
  * function ttm_eu_fence_buffer_objects.
diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index d71b079bb021..b4e766e93f6e 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -106,8 +106,6 @@ enum {
 	CFTYPE_WORLD_WRITABLE	= (1 << 4),	/* (DON'T USE FOR NEW FILES) S_IWUGO */
 	CFTYPE_DEBUG		= (1 << 5),	/* create when cgroup_debug */
 
-	CFTYPE_SYMLINKED	= (1 << 6),	/* pointed to by symlink too */
-
 	/* internal flags, do not use outside cgroup core proper */
 	__CFTYPE_ONLY_ON_DFL	= (1 << 16),	/* only on default hierarchy */
 	__CFTYPE_NOT_ON_DFL	= (1 << 17),	/* not on default hierarchy */
@@ -223,6 +221,7 @@ struct css_set {
 	 */
 	struct list_head tasks;
 	struct list_head mg_tasks;
+	struct list_head dying_tasks;
 
 	/* all css_task_iters currently walking this cset */
 	struct list_head task_iters;
@@ -545,7 +544,6 @@ struct cftype {
 	 * end of cftype array.
 	 */
 	char name[MAX_CFTYPE_NAME];
-	char link_name[MAX_CFTYPE_NAME];
 	unsigned long private;
 
 	/*
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index c0077adeea83..0297f930a56e 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -43,6 +43,9 @@
 /* walk all threaded css_sets in the domain */
 #define CSS_TASK_ITER_THREADED		(1U << 1)
 
+/* internal flags */
+#define CSS_TASK_ITER_SKIPPED		(1U << 16)
+
 /* a css_task_iter should be treated as an opaque object */
 struct css_task_iter {
 	struct cgroup_subsys		*ss;
@@ -57,6 +60,7 @@ struct css_task_iter {
 	struct list_head		*task_pos;
 	struct list_head		*tasks_head;
 	struct list_head		*mg_tasks_head;
+	struct list_head		*dying_tasks_head;
 
 	struct css_set			*cur_cset;
 	struct css_set			*cur_dcset;
@@ -487,7 +491,7 @@ static inline struct cgroup_subsys_state *task_css(struct task_struct *task,
  *
  * Find the css for the (@task, @subsys_id) combination, increment a
  * reference on and return it.  This function is guaranteed to return a
- * valid css.
+ * valid css.  The returned css may already have been offlined.
  */
 static inline struct cgroup_subsys_state *
 task_get_css(struct task_struct *task, int subsys_id)
@@ -497,7 +501,13 @@ task_get_css(struct task_struct *task, int subsys_id)
 	rcu_read_lock();
 	while (true) {
 		css = task_css(task, subsys_id);
-		if (likely(css_tryget_online(css)))
+		/*
+		 * Can't use css_tryget_online() here.  A task which has
+		 * PF_EXITING set may stay associated with an offline css.
+		 * If such task calls this function, css_tryget_online()
+		 * will keep failing.
+		 */
+		if (likely(css_tryget(css)))
 			break;
 		cpu_relax();
 	}
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 6a381594608c..5c6062206760 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -101,6 +101,7 @@ enum cpuhp_state {
 	CPUHP_AP_IRQ_BCM2836_STARTING,
 	CPUHP_AP_IRQ_MIPS_GIC_STARTING,
 	CPUHP_AP_ARM_MVEBU_COHERENCY,
+	CPUHP_AP_MICROCODE_LOADER,
 	CPUHP_AP_PERF_X86_AMD_UNCORE_STARTING,
 	CPUHP_AP_PERF_X86_STARTING,
 	CPUHP_AP_PERF_X86_AMD_IBS_STARTING,
diff --git a/include/linux/device.h b/include/linux/device.h
index e85264fb6616..848fc71c6ba6 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -713,6 +713,7 @@ void __iomem *devm_of_iomap(struct device *dev,
 /* allows to add/remove a custom action to devres stack */
 int devm_add_action(struct device *dev, void (*action)(void *), void *data);
 void devm_remove_action(struct device *dev, void (*action)(void *), void *data);
+void devm_release_action(struct device *dev, void (*action)(void *), void *data);
 
 static inline int devm_add_action_or_reset(struct device *dev,
 					   void (*action)(void *), void *data)
diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h
index 58725f890b5b..01ad5b942a6f 100644
--- a/include/linux/dma-buf.h
+++ b/include/linux/dma-buf.h
@@ -39,19 +39,21 @@ struct dma_buf_attachment;
 
 /**
  * struct dma_buf_ops - operations possible on struct dma_buf
- * @map_atomic: [optional] maps a page from the buffer into kernel address
- *		space, users may not block until the subsequent unmap call.
- *		This callback must not sleep.
- * @unmap_atomic: [optional] unmaps a atomically mapped page from the buffer.
- *		  This Callback must not sleep.
- * @map: [optional] maps a page from the buffer into kernel address space.
- * @unmap: [optional] unmaps a page from the buffer.
  * @vmap: [optional] creates a virtual mapping for the buffer into kernel
  *	  address space. Same restrictions as for vmap and friends apply.
  * @vunmap: [optional] unmaps a vmap from the buffer
  */
 struct dma_buf_ops {
 	/**
+	  * @cache_sgt_mapping:
+	  *
+	  * If true the framework will cache the first mapping made for each
+	  * attachment. This avoids creating mappings for attachments multiple
+	  * times.
+	  */
+	bool cache_sgt_mapping;
+
+	/**
 	 * @attach:
 	 *
 	 * This is called from dma_buf_attach() to make sure that a given
@@ -205,8 +207,6 @@ struct dma_buf_ops {
 	 * to be restarted.
 	 */
 	int (*end_cpu_access)(struct dma_buf *, enum dma_data_direction);
-	void *(*map)(struct dma_buf *, unsigned long);
-	void (*unmap)(struct dma_buf *, unsigned long, void *);
 
 	/**
 	 * @mmap:
@@ -245,6 +245,31 @@ struct dma_buf_ops {
 	 */
 	int (*mmap)(struct dma_buf *, struct vm_area_struct *vma);
 
+	/**
+	 * @map:
+	 *
+	 * Maps a page from the buffer into kernel address space. The page is
+	 * specified by offset into the buffer in PAGE_SIZE units.
+	 *
+	 * This callback is optional.
+	 *
+	 * Returns:
+	 *
+	 * Virtual address pointer where requested page can be accessed. NULL
+	 * on error or when this function is unimplemented by the exporter.
+	 */
+	void *(*map)(struct dma_buf *, unsigned long);
+
+	/**
+	 * @unmap:
+	 *
+	 * Unmaps a page from the buffer. Page offset and address pointer should
+	 * be the same as the one passed to and returned by matching call to map.
+	 *
+	 * This callback is optional.
+	 */
+	void (*unmap)(struct dma_buf *, unsigned long, void *);
+
 	void *(*vmap)(struct dma_buf *);
 	void (*vunmap)(struct dma_buf *, void *vaddr);
 };
@@ -255,10 +280,12 @@ struct dma_buf_ops {
  * @file: file pointer used for sharing buffers across, and for refcounting.
  * @attachments: list of dma_buf_attachment that denotes all devices attached.
  * @ops: dma_buf_ops associated with this buffer object.
- * @lock: used internally to serialize list manipulation, attach/detach and vmap/unmap
+ * @lock: used internally to serialize list manipulation, attach/detach and
+ *        vmap/unmap, and accesses to name
  * @vmapping_counter: used internally to refcnt the vmaps
  * @vmap_ptr: the current vmap ptr if vmapping_counter > 0
  * @exp_name: name of the exporter; useful for debugging.
+ * @name: userspace-provided name; useful for accounting and debugging.
  * @owner: pointer to exporter module; used for refcounting when exporter is a
  *         kernel module.
  * @list_node: node for dma_buf accounting and debugging.
@@ -286,6 +313,7 @@ struct dma_buf {
 	unsigned vmapping_counter;
 	void *vmap_ptr;
 	const char *exp_name;
+	const char *name;
 	struct module *owner;
 	struct list_head list_node;
 	void *priv;
@@ -307,6 +335,8 @@ struct dma_buf {
  * @dmabuf: buffer for this attachment.
  * @dev: device attached to the buffer.
  * @node: list of dma_buf_attachment.
+ * @sgt: cached mapping.
+ * @dir: direction of cached mapping.
  * @priv: exporter specific attachment data.
  *
  * This structure holds the attachment information between the dma_buf buffer
@@ -322,6 +352,8 @@ struct dma_buf_attachment {
 	struct dma_buf *dmabuf;
 	struct device *dev;
 	struct list_head node;
+	struct sg_table *sgt;
+	enum dma_data_direction dir;
 	void *priv;
 };
 
diff --git a/include/linux/genalloc.h b/include/linux/genalloc.h
index dd0a452373e7..a337313e064f 100644
--- a/include/linux/genalloc.h
+++ b/include/linux/genalloc.h
@@ -75,6 +75,7 @@ struct gen_pool_chunk {
 	struct list_head next_chunk;	/* next chunk in pool */
 	atomic_long_t avail;
 	phys_addr_t phys_addr;		/* physical starting address of memory chunk */
+	void *owner;			/* private data to retrieve at alloc time */
 	unsigned long start_addr;	/* start address of memory chunk */
 	unsigned long end_addr;		/* end address of memory chunk (inclusive) */
 	unsigned long bits[0];		/* bitmap for allocating memory chunk */
@@ -96,8 +97,15 @@ struct genpool_data_fixed {
 
 extern struct gen_pool *gen_pool_create(int, int);
 extern phys_addr_t gen_pool_virt_to_phys(struct gen_pool *pool, unsigned long);
-extern int gen_pool_add_virt(struct gen_pool *, unsigned long, phys_addr_t,
-			     size_t, int);
+extern int gen_pool_add_owner(struct gen_pool *, unsigned long, phys_addr_t,
+			     size_t, int, void *);
+
+static inline int gen_pool_add_virt(struct gen_pool *pool, unsigned long addr,
+		phys_addr_t phys, size_t size, int nid)
+{
+	return gen_pool_add_owner(pool, addr, phys, size, nid, NULL);
+}
+
 /**
  * gen_pool_add - add a new chunk of special memory to the pool
  * @pool: pool to add new memory chunk to
@@ -116,12 +124,47 @@ static inline int gen_pool_add(struct gen_pool *pool, unsigned long addr,
 	return gen_pool_add_virt(pool, addr, -1, size, nid);
 }
 extern void gen_pool_destroy(struct gen_pool *);
-extern unsigned long gen_pool_alloc(struct gen_pool *, size_t);
-extern unsigned long gen_pool_alloc_algo(struct gen_pool *, size_t,
-		genpool_algo_t algo, void *data);
+unsigned long gen_pool_alloc_algo_owner(struct gen_pool *pool, size_t size,
+		genpool_algo_t algo, void *data, void **owner);
+
+static inline unsigned long gen_pool_alloc_owner(struct gen_pool *pool,
+		size_t size, void **owner)
+{
+	return gen_pool_alloc_algo_owner(pool, size, pool->algo, pool->data,
+			owner);
+}
+
+static inline unsigned long gen_pool_alloc_algo(struct gen_pool *pool,
+		size_t size, genpool_algo_t algo, void *data)
+{
+	return gen_pool_alloc_algo_owner(pool, size, algo, data, NULL);
+}
+
+/**
+ * gen_pool_alloc - allocate special memory from the pool
+ * @pool: pool to allocate from
+ * @size: number of bytes to allocate from the pool
+ *
+ * Allocate the requested number of bytes from the specified pool.
+ * Uses the pool allocation function (with first-fit algorithm by default).
+ * Can not be used in NMI handler on architectures without
+ * NMI-safe cmpxchg implementation.
+ */
+static inline unsigned long gen_pool_alloc(struct gen_pool *pool, size_t size)
+{
+	return gen_pool_alloc_algo(pool, size, pool->algo, pool->data);
+}
+
 extern void *gen_pool_dma_alloc(struct gen_pool *pool, size_t size,
 		dma_addr_t *dma);
-extern void gen_pool_free(struct gen_pool *, unsigned long, size_t);
+extern void gen_pool_free_owner(struct gen_pool *pool, unsigned long addr,
+		size_t size, void **owner);
+static inline void gen_pool_free(struct gen_pool *pool, unsigned long addr,
+                size_t size)
+{
+	gen_pool_free_owner(pool, addr, size, NULL);
+}
+
 extern void gen_pool_for_each_chunk(struct gen_pool *,
 	void (*)(struct gen_pool *, struct gen_pool_chunk *, void *), void *);
 extern size_t gen_pool_avail(struct gen_pool *);
diff --git a/include/linux/hdmi.h b/include/linux/hdmi.h
index 927ad6451105..9918a6c910c5 100644
--- a/include/linux/hdmi.h
+++ b/include/linux/hdmi.h
@@ -47,6 +47,7 @@ enum hdmi_infoframe_type {
 	HDMI_INFOFRAME_TYPE_AVI = 0x82,
 	HDMI_INFOFRAME_TYPE_SPD = 0x83,
 	HDMI_INFOFRAME_TYPE_AUDIO = 0x84,
+	HDMI_INFOFRAME_TYPE_DRM = 0x87,
 };
 
 #define HDMI_IEEE_OUI 0x000c03
@@ -55,6 +56,7 @@ enum hdmi_infoframe_type {
 #define HDMI_AVI_INFOFRAME_SIZE    13
 #define HDMI_SPD_INFOFRAME_SIZE    25
 #define HDMI_AUDIO_INFOFRAME_SIZE  10
+#define HDMI_DRM_INFOFRAME_SIZE    26
 
 #define HDMI_INFOFRAME_SIZE(type)	\
 	(HDMI_INFOFRAME_HEADER_SIZE + HDMI_ ## type ## _INFOFRAME_SIZE)
@@ -152,6 +154,17 @@ enum hdmi_content_type {
 	HDMI_CONTENT_TYPE_GAME,
 };
 
+enum hdmi_metadata_type {
+	HDMI_STATIC_METADATA_TYPE1 = 1,
+};
+
+enum hdmi_eotf {
+	HDMI_EOTF_TRADITIONAL_GAMMA_SDR,
+	HDMI_EOTF_TRADITIONAL_GAMMA_HDR,
+	HDMI_EOTF_SMPTE_ST2084,
+	HDMI_EOTF_BT_2100_HLG,
+};
+
 struct hdmi_avi_infoframe {
 	enum hdmi_infoframe_type type;
 	unsigned char version;
@@ -175,12 +188,37 @@ struct hdmi_avi_infoframe {
 	unsigned short right_bar;
 };
 
+/* DRM Infoframe as per CTA 861.G spec */
+struct hdmi_drm_infoframe {
+	enum hdmi_infoframe_type type;
+	unsigned char version;
+	unsigned char length;
+	enum hdmi_eotf eotf;
+	enum hdmi_metadata_type metadata_type;
+	struct {
+		u16 x, y;
+	} display_primaries[3];
+	struct {
+		u16 x, y;
+	} white_point;
+	u16 max_display_mastering_luminance;
+	u16 min_display_mastering_luminance;
+	u16 max_cll;
+	u16 max_fall;
+};
+
 int hdmi_avi_infoframe_init(struct hdmi_avi_infoframe *frame);
 ssize_t hdmi_avi_infoframe_pack(struct hdmi_avi_infoframe *frame, void *buffer,
 				size_t size);
 ssize_t hdmi_avi_infoframe_pack_only(const struct hdmi_avi_infoframe *frame,
 				     void *buffer, size_t size);
 int hdmi_avi_infoframe_check(struct hdmi_avi_infoframe *frame);
+int hdmi_drm_infoframe_init(struct hdmi_drm_infoframe *frame);
+ssize_t hdmi_drm_infoframe_pack(struct hdmi_drm_infoframe *frame, void *buffer,
+				size_t size);
+ssize_t hdmi_drm_infoframe_pack_only(const struct hdmi_drm_infoframe *frame,
+				     void *buffer, size_t size);
+int hdmi_drm_infoframe_check(struct hdmi_drm_infoframe *frame);
 
 enum hdmi_spd_sdi {
 	HDMI_SPD_SDI_UNKNOWN,
@@ -320,6 +358,33 @@ struct hdmi_vendor_infoframe {
 	unsigned int s3d_ext_data;
 };
 
+/* HDR Metadata as per 861.G spec */
+struct hdr_static_metadata {
+	__u8 eotf;
+	__u8 metadata_type;
+	__u16 max_cll;
+	__u16 max_fall;
+	__u16 min_cll;
+};
+
+/**
+ * struct hdr_sink_metadata - HDR sink metadata
+ *
+ * Metadata Information read from Sink's EDID
+ */
+struct hdr_sink_metadata {
+	/**
+	 * @metadata_type: Static_Metadata_Descriptor_ID.
+	 */
+	__u32 metadata_type;
+	/**
+	 * @hdmi_type1: HDR Metadata Infoframe.
+	 */
+	union {
+		struct hdr_static_metadata hdmi_type1;
+	};
+};
+
 int hdmi_vendor_infoframe_init(struct hdmi_vendor_infoframe *frame);
 ssize_t hdmi_vendor_infoframe_pack(struct hdmi_vendor_infoframe *frame,
 				   void *buffer, size_t size);
@@ -344,6 +409,7 @@ union hdmi_vendor_any_infoframe {
  * @spd: spd infoframe
  * @vendor: union of all vendor infoframes
  * @audio: audio infoframe
+ * @drm: Dynamic Range and Mastering infoframe
  *
  * This is used by the generic pack function. This works since all infoframes
  * have the same header which also indicates which type of infoframe should be
@@ -355,6 +421,7 @@ union hdmi_infoframe {
 	struct hdmi_spd_infoframe spd;
 	union hdmi_vendor_any_infoframe vendor;
 	struct hdmi_audio_infoframe audio;
+	struct hdmi_drm_infoframe drm;
 };
 
 ssize_t hdmi_infoframe_pack(union hdmi_infoframe *frame, void *buffer,
diff --git a/include/linux/host1x.h b/include/linux/host1x.h
index cfff30b9a62e..e6eea45e1154 100644
--- a/include/linux/host1x.h
+++ b/include/linux/host1x.h
@@ -297,6 +297,8 @@ struct host1x_device {
 	struct list_head clients;
 
 	bool registered;
+
+	struct device_dma_parameters dma_parms;
 };
 
 static inline struct host1x_device *to_host1x_device(struct device *dev)
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index edf9e8f32d70..1dcb763bb610 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -117,9 +117,12 @@ struct memcg_shrinker_map {
 struct mem_cgroup_per_node {
 	struct lruvec		lruvec;
 
+	/* Legacy local VM stats */
+	struct lruvec_stat __percpu *lruvec_stat_local;
+
+	/* Subtree VM stats (batched updates) */
 	struct lruvec_stat __percpu *lruvec_stat_cpu;
 	atomic_long_t		lruvec_stat[NR_VM_NODE_STAT_ITEMS];
-	atomic_long_t		lruvec_stat_local[NR_VM_NODE_STAT_ITEMS];
 
 	unsigned long		lru_zone_size[MAX_NR_ZONES][NR_LRU_LISTS];
 
@@ -265,17 +268,18 @@ struct mem_cgroup {
 	atomic_t		moving_account;
 	struct task_struct	*move_lock_task;
 
-	/* memory.stat */
+	/* Legacy local VM stats and events */
+	struct memcg_vmstats_percpu __percpu *vmstats_local;
+
+	/* Subtree VM stats and events (batched updates) */
 	struct memcg_vmstats_percpu __percpu *vmstats_percpu;
 
 	MEMCG_PADDING(_pad2_);
 
 	atomic_long_t		vmstats[MEMCG_NR_STAT];
-	atomic_long_t		vmstats_local[MEMCG_NR_STAT];
-
 	atomic_long_t		vmevents[NR_VM_EVENT_ITEMS];
-	atomic_long_t		vmevents_local[NR_VM_EVENT_ITEMS];
 
+	/* memory.events */
 	atomic_long_t		memory_events[MEMCG_NR_MEMORY_EVENTS];
 
 	unsigned long		socket_pressure;
@@ -567,7 +571,11 @@ static inline unsigned long memcg_page_state(struct mem_cgroup *memcg, int idx)
 static inline unsigned long memcg_page_state_local(struct mem_cgroup *memcg,
 						   int idx)
 {
-	long x = atomic_long_read(&memcg->vmstats_local[idx]);
+	long x = 0;
+	int cpu;
+
+	for_each_possible_cpu(cpu)
+		x += per_cpu(memcg->vmstats_local->stat[idx], cpu);
 #ifdef CONFIG_SMP
 	if (x < 0)
 		x = 0;
@@ -641,13 +649,15 @@ static inline unsigned long lruvec_page_state_local(struct lruvec *lruvec,
 						    enum node_stat_item idx)
 {
 	struct mem_cgroup_per_node *pn;
-	long x;
+	long x = 0;
+	int cpu;
 
 	if (mem_cgroup_disabled())
 		return node_page_state(lruvec_pgdat(lruvec), idx);
 
 	pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
-	x = atomic_long_read(&pn->lruvec_stat_local[idx]);
+	for_each_possible_cpu(cpu)
+		x += per_cpu(pn->lruvec_stat_local->count[idx], cpu);
 #ifdef CONFIG_SMP
 	if (x < 0)
 		x = 0;
diff --git a/include/linux/memremap.h b/include/linux/memremap.h
index f0628660d541..1732dea030b2 100644
--- a/include/linux/memremap.h
+++ b/include/linux/memremap.h
@@ -81,6 +81,7 @@ typedef void (*dev_page_free_t)(struct page *page, void *data);
  * @res: physical address range covered by @ref
  * @ref: reference count that pins the devm_memremap_pages() mapping
  * @kill: callback to transition @ref to the dead state
+ * @cleanup: callback to wait for @ref to be idle and reap it
  * @dev: host device of the mapping for debug
  * @data: private data pointer for page_free()
  * @type: memory type: see MEMORY_* in memory_hotplug.h
@@ -92,6 +93,7 @@ struct dev_pagemap {
 	struct resource res;
 	struct percpu_ref *ref;
 	void (*kill)(struct percpu_ref *ref);
+	void (*cleanup)(struct percpu_ref *ref);
 	struct device *dev;
 	void *data;
 	enum memory_type type;
@@ -100,6 +102,7 @@ struct dev_pagemap {
 
 #ifdef CONFIG_ZONE_DEVICE
 void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap);
+void devm_memunmap_pages(struct device *dev, struct dev_pagemap *pgmap);
 struct dev_pagemap *get_dev_pagemap(unsigned long pfn,
 		struct dev_pagemap *pgmap);
 
@@ -118,6 +121,11 @@ static inline void *devm_memremap_pages(struct device *dev,
 	return ERR_PTR(-ENXIO);
 }
 
+static inline void devm_memunmap_pages(struct device *dev,
+		struct dev_pagemap *pgmap)
+{
+}
+
 static inline struct dev_pagemap *get_dev_pagemap(unsigned long pfn,
 		struct dev_pagemap *pgmap)
 {
diff --git a/include/linux/mm.h b/include/linux/mm.h
index dd0b5f4e1e45..798cdda9560e 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2686,7 +2686,24 @@ typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr,
 extern int apply_to_page_range(struct mm_struct *mm, unsigned long address,
 			       unsigned long size, pte_fn_t fn, void *data);
 
-
+struct pfn_range_apply;
+typedef int (*pter_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr,
+			 struct pfn_range_apply *closure);
+struct pfn_range_apply {
+	struct mm_struct *mm;
+	pter_fn_t ptefn;
+	unsigned int alloc;
+};
+extern int apply_to_pfn_range(struct pfn_range_apply *closure,
+			      unsigned long address, unsigned long size);
+unsigned long apply_as_wrprotect(struct address_space *mapping,
+				 pgoff_t first_index, pgoff_t nr);
+unsigned long apply_as_clean(struct address_space *mapping,
+			     pgoff_t first_index, pgoff_t nr,
+			     pgoff_t bitmap_pgoff,
+			     unsigned long *bitmap,
+			     pgoff_t *start,
+			     pgoff_t *end);
 #ifdef CONFIG_PAGE_POISONING
 extern bool page_poisoning_enabled(void);
 extern void kernel_poison_pages(struct page *page, int numpages, int enable);
diff --git a/include/linux/reservation.h b/include/linux/reservation.h
index ee750765cc94..644a22dbe53b 100644
--- a/include/linux/reservation.h
+++ b/include/linux/reservation.h
@@ -216,8 +216,12 @@ reservation_object_unlock(struct reservation_object *obj)
 {
 #ifdef CONFIG_DEBUG_MUTEXES
 	/* Test shared fence slot reservation */
-	if (obj->fence)
-		obj->fence->shared_max = obj->fence->shared_count;
+	if (rcu_access_pointer(obj->fence)) {
+		struct reservation_object_list *fence =
+			reservation_object_get_list(obj);
+
+		fence->shared_max = fence->shared_count;
+	}
 #endif
 	ww_mutex_unlock(&obj->lock);
 }
diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
index a3fda9f024c3..4a7944078cc3 100644
--- a/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@ -54,6 +54,10 @@ static inline void mmdrop(struct mm_struct *mm)
  * followed by taking the mmap_sem for writing before modifying the
  * vmas or anything the coredump pretends not to change from under it.
  *
+ * It also has to be called when mmgrab() is used in the context of
+ * the process, but then the mm_count refcount is transferred outside
+ * the context of the process to run down_write() on that pinned mm.
+ *
  * NOTE: find_extend_vma() called from GUP context is the only place
  * that can modify the "mm" (notably the vm_start/end) under mmap_sem
  * for reading and outside the context of the process, so it is also
diff --git a/include/sound/sof/dai.h b/include/sound/sof/dai.h
index 3b67c93ff101..3d174e20aa53 100644
--- a/include/sound/sof/dai.h
+++ b/include/sound/sof/dai.h
@@ -49,6 +49,7 @@ enum sof_ipc_dai_type {
 	SOF_DAI_INTEL_SSP,		/**< Intel SSP */
 	SOF_DAI_INTEL_DMIC,		/**< Intel DMIC */
 	SOF_DAI_INTEL_HDA,		/**< Intel HD/A */
+	SOF_DAI_INTEL_SOUNDWIRE,	/**< Intel SoundWire */
 };
 
 /* general purpose DAI configuration */
diff --git a/include/sound/sof/header.h b/include/sound/sof/header.h
index ccb6a004b37b..1efcf7b18ec2 100644
--- a/include/sound/sof/header.h
+++ b/include/sound/sof/header.h
@@ -48,6 +48,7 @@
 #define SOF_IPC_FW_READY			SOF_GLB_TYPE(0x7U)
 #define SOF_IPC_GLB_DAI_MSG			SOF_GLB_TYPE(0x8U)
 #define SOF_IPC_GLB_TRACE_MSG			SOF_GLB_TYPE(0x9U)
+#define SOF_IPC_GLB_GDB_DEBUG                   SOF_GLB_TYPE(0xAU)
 
 /*
  * DSP Command Message Types
@@ -78,6 +79,7 @@
 #define SOF_IPC_COMP_GET_VALUE			SOF_CMD_TYPE(0x002)
 #define SOF_IPC_COMP_SET_DATA			SOF_CMD_TYPE(0x003)
 #define SOF_IPC_COMP_GET_DATA			SOF_CMD_TYPE(0x004)
+#define SOF_IPC_COMP_NOTIFICATION		SOF_CMD_TYPE(0x005)
 
 /* DAI messages */
 #define SOF_IPC_DAI_CONFIG			SOF_CMD_TYPE(0x001)
@@ -153,6 +155,27 @@ struct sof_ipc_compound_hdr {
 	uint32_t count;		/**< count of 0 means end of compound sequence */
 }  __packed;
 
+/**
+ * OOPS header architecture specific data.
+ */
+struct sof_ipc_dsp_oops_arch_hdr {
+	uint32_t arch;		/* Identifier of architecture */
+	uint32_t totalsize;	/* Total size of oops message */
+}  __packed;
+
+/**
+ * OOPS header platform specific data.
+ */
+struct sof_ipc_dsp_oops_plat_hdr {
+	uint32_t configidhi;	/* ConfigID hi 32bits */
+	uint32_t configidlo;	/* ConfigID lo 32bits */
+	uint32_t numaregs;	/* Special regs num */
+	uint32_t stackoffset;	/* Offset to stack pointer from beginning of
+				 * oops message
+				 */
+	uint32_t stackptr;	/* Stack ptr */
+}  __packed;
+
 /** @}*/
 
 #endif
diff --git a/include/sound/sof/info.h b/include/sound/sof/info.h
index 21dae04d8183..16528d2b4a50 100644
--- a/include/sound/sof/info.h
+++ b/include/sound/sof/info.h
@@ -18,6 +18,14 @@
 
 #define SOF_IPC_MAX_ELEMS	16
 
+/*
+ * Firmware boot info flag bits (64-bit)
+ */
+#define SOF_IPC_INFO_BUILD		BIT(0)
+#define SOF_IPC_INFO_LOCKS		BIT(1)
+#define SOF_IPC_INFO_LOCKSV		BIT(2)
+#define SOF_IPC_INFO_GDB		BIT(3)
+
 /* extended data types that can be appended onto end of sof_ipc_fw_ready */
 enum sof_ipc_ext_data {
 	SOF_IPC_EXT_DMA_BUFFER = 0,
@@ -49,16 +57,8 @@ struct sof_ipc_fw_ready {
 	uint32_t hostbox_size;
 	struct sof_ipc_fw_version version;
 
-	/* Miscellaneous debug flags showing build/debug features enabled */
-	union {
-		uint64_t reserved;
-		struct {
-			uint64_t build:1;
-			uint64_t locks:1;
-			uint64_t locks_verbose:1;
-			uint64_t gdb:1;
-		} bits;
-	} debug;
+	/* Miscellaneous flags */
+	uint64_t flags;
 
 	/* reserved for future use */
 	uint32_t reserved[4];
diff --git a/include/sound/sof/xtensa.h b/include/sound/sof/xtensa.h
index a7189984000d..d25c764b10e8 100644
--- a/include/sound/sof/xtensa.h
+++ b/include/sound/sof/xtensa.h
@@ -17,7 +17,8 @@
 
 /* Xtensa Firmware Oops data */
 struct sof_ipc_dsp_oops_xtensa {
-	struct sof_ipc_hdr hdr;
+	struct sof_ipc_dsp_oops_arch_hdr arch_hdr;
+	struct sof_ipc_dsp_oops_plat_hdr plat_hdr;
 	uint32_t exccause;
 	uint32_t excvaddr;
 	uint32_t ps;
@@ -38,7 +39,11 @@ struct sof_ipc_dsp_oops_xtensa {
 	uint32_t intenable;
 	uint32_t interrupt;
 	uint32_t sar;
-	uint32_t stack;
+	uint32_t debugcause;
+	uint32_t windowbase;
+	uint32_t windowstart;
+	uint32_t excsave1;
+	uint32_t ar[];
 }  __packed;
 
 #endif
diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
index 661d73f9a919..8a5b2f8f8eb9 100644
--- a/include/uapi/drm/drm.h
+++ b/include/uapi/drm/drm.h
@@ -50,6 +50,7 @@ typedef unsigned int drm_handle_t;
 
 #else /* One of the BSDs */
 
+#include <stdint.h>
 #include <sys/ioccom.h>
 #include <sys/types.h>
 typedef int8_t   __s8;
diff --git a/include/uapi/drm/drm_mode.h b/include/uapi/drm/drm_mode.h
index 83cd1636b9be..5ab331e5dc23 100644
--- a/include/uapi/drm/drm_mode.h
+++ b/include/uapi/drm/drm_mode.h
@@ -33,6 +33,15 @@
 extern "C" {
 #endif
 
+/**
+ * DOC: overview
+ *
+ * DRM exposes many UAPI and structure definition to have a consistent
+ * and standardized interface with user.
+ * Userspace can refer to these structure definitions and UAPI formats
+ * to communicate to driver
+ */
+
 #define DRM_CONNECTOR_NAME_LEN	32
 #define DRM_DISPLAY_MODE_LEN	32
 #define DRM_PROP_NAME_LEN	32
@@ -630,6 +639,92 @@ struct drm_color_lut {
 	__u16 reserved;
 };
 
+/**
+ * struct hdr_metadata_infoframe - HDR Metadata Infoframe Data.
+ *
+ * HDR Metadata Infoframe as per CTA 861.G spec. This is expected
+ * to match exactly with the spec.
+ *
+ * Userspace is expected to pass the metadata information as per
+ * the format described in this structure.
+ */
+struct hdr_metadata_infoframe {
+	/**
+	 * @eotf: Electro-Optical Transfer Function (EOTF)
+	 * used in the stream.
+	 */
+	__u8 eotf;
+	/**
+	 * @metadata_type: Static_Metadata_Descriptor_ID.
+	 */
+	__u8 metadata_type;
+	/**
+	 * @display_primaries: Color Primaries of the Data.
+	 * These are coded as unsigned 16-bit values in units of
+	 * 0.00002, where 0x0000 represents zero and 0xC350
+	 * represents 1.0000.
+	 * @display_primaries.x: X cordinate of color primary.
+	 * @display_primaries.y: Y cordinate of color primary.
+	 */
+	struct {
+		__u16 x, y;
+		} display_primaries[3];
+	/**
+	 * @white_point: White Point of Colorspace Data.
+	 * These are coded as unsigned 16-bit values in units of
+	 * 0.00002, where 0x0000 represents zero and 0xC350
+	 * represents 1.0000.
+	 * @white_point.x: X cordinate of whitepoint of color primary.
+	 * @white_point.y: Y cordinate of whitepoint of color primary.
+	 */
+	struct {
+		__u16 x, y;
+		} white_point;
+	/**
+	 * @max_display_mastering_luminance: Max Mastering Display Luminance.
+	 * This value is coded as an unsigned 16-bit value in units of 1 cd/m2,
+	 * where 0x0001 represents 1 cd/m2 and 0xFFFF represents 65535 cd/m2.
+	 */
+	__u16 max_display_mastering_luminance;
+	/**
+	 * @min_display_mastering_luminance: Min Mastering Display Luminance.
+	 * This value is coded as an unsigned 16-bit value in units of
+	 * 0.0001 cd/m2, where 0x0001 represents 0.0001 cd/m2 and 0xFFFF
+	 * represents 6.5535 cd/m2.
+	 */
+	__u16 min_display_mastering_luminance;
+	/**
+	 * @max_cll: Max Content Light Level.
+	 * This value is coded as an unsigned 16-bit value in units of 1 cd/m2,
+	 * where 0x0001 represents 1 cd/m2 and 0xFFFF represents 65535 cd/m2.
+	 */
+	__u16 max_cll;
+	/**
+	 * @max_fall: Max Frame Average Light Level.
+	 * This value is coded as an unsigned 16-bit value in units of 1 cd/m2,
+	 * where 0x0001 represents 1 cd/m2 and 0xFFFF represents 65535 cd/m2.
+	 */
+	__u16 max_fall;
+};
+
+/**
+ * struct hdr_output_metadata - HDR output metadata
+ *
+ * Metadata Information to be passed from userspace
+ */
+struct hdr_output_metadata {
+	/**
+	 * @metadata_type: Static_Metadata_Descriptor_ID.
+	 */
+	__u32 metadata_type;
+	/**
+	 * @hdmi_metadata_type1: HDR Metadata Infoframe.
+	 */
+	union {
+		struct hdr_metadata_infoframe hdmi_metadata_type1;
+	};
+};
+
 #define DRM_MODE_PAGE_FLIP_EVENT 0x01
 #define DRM_MODE_PAGE_FLIP_ASYNC 0x02
 #define DRM_MODE_PAGE_FLIP_TARGET_ABSOLUTE 0x4
@@ -803,6 +898,10 @@ struct drm_format_modifier {
 };
 
 /**
+ * struct drm_mode_create_blob - Create New block property
+ * @data: Pointer to data to copy.
+ * @length: Length of data to copy.
+ * @blob_id: new property ID.
  * Create a new 'blob' data property, copying length bytes from data pointer,
  * and returning new blob ID.
  */
@@ -816,6 +915,8 @@ struct drm_mode_create_blob {
 };
 
 /**
+ * struct drm_mode_destroy_blob - Destroy user blob
+ * @blob_id: blob_id to destroy
  * Destroy a user-created blob property.
  */
 struct drm_mode_destroy_blob {
@@ -823,6 +924,12 @@ struct drm_mode_destroy_blob {
 };
 
 /**
+ * struct drm_mode_create_lease - Create lease
+ * @object_ids: Pointer to array of object ids.
+ * @object_count: Number of object ids.
+ * @flags: flags for new FD.
+ * @lessee_id: unique identifier for lessee.
+ * @fd: file descriptor to new drm_master file.
  * Lease mode resources, creating another drm_master.
  */
 struct drm_mode_create_lease {
@@ -840,6 +947,10 @@ struct drm_mode_create_lease {
 };
 
 /**
+ * struct drm_mode_list_lessees - List lessees
+ * @count_lessees: Number of lessees.
+ * @pad: pad.
+ * @lessees_ptr: Pointer to lessess.
  * List lesses from a drm_master
  */
 struct drm_mode_list_lessees {
@@ -860,6 +971,10 @@ struct drm_mode_list_lessees {
 };
 
 /**
+ * struct drm_mode_get_lease - Get Lease
+ * @count_objects: Number of leased objects.
+ * @pad: pad.
+ * @objects_ptr: Pointer to objects.
  * Get leased objects
  */
 struct drm_mode_get_lease {
@@ -880,6 +995,8 @@ struct drm_mode_get_lease {
 };
 
 /**
+ * struct drm_mode_revoke_lease - Revoke lease
+ * @lessee_id: Unique ID of lessee.
  * Revoke lease
  */
 struct drm_mode_revoke_lease {
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 3a73f5316766..328d05e77d9f 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -136,6 +136,8 @@ enum drm_i915_gem_engine_class {
 struct i915_engine_class_instance {
 	__u16 engine_class; /* see enum drm_i915_gem_engine_class */
 	__u16 engine_instance;
+#define I915_ENGINE_CLASS_INVALID_NONE -1
+#define I915_ENGINE_CLASS_INVALID_VIRTUAL -2
 };
 
 /**
@@ -355,6 +357,8 @@ typedef struct _drm_i915_sarea {
 #define DRM_I915_PERF_ADD_CONFIG	0x37
 #define DRM_I915_PERF_REMOVE_CONFIG	0x38
 #define DRM_I915_QUERY			0x39
+#define DRM_I915_GEM_VM_CREATE		0x3a
+#define DRM_I915_GEM_VM_DESTROY		0x3b
 /* Must be kept compact -- no holes */
 
 #define DRM_IOCTL_I915_INIT		DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t)
@@ -415,6 +419,8 @@ typedef struct _drm_i915_sarea {
 #define DRM_IOCTL_I915_PERF_ADD_CONFIG	DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_ADD_CONFIG, struct drm_i915_perf_oa_config)
 #define DRM_IOCTL_I915_PERF_REMOVE_CONFIG	DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_REMOVE_CONFIG, __u64)
 #define DRM_IOCTL_I915_QUERY			DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_QUERY, struct drm_i915_query)
+#define DRM_IOCTL_I915_GEM_VM_CREATE	DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_VM_CREATE, struct drm_i915_gem_vm_control)
+#define DRM_IOCTL_I915_GEM_VM_DESTROY	DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_VM_DESTROY, struct drm_i915_gem_vm_control)
 
 /* Allow drivers to submit batchbuffers directly to hardware, relying
  * on the security mechanisms provided by hardware.
@@ -598,6 +604,12 @@ typedef struct drm_i915_irq_wait {
  */
 #define I915_PARAM_MMAP_GTT_COHERENT	52
 
+/*
+ * Query whether DRM_I915_GEM_EXECBUFFER2 supports coordination of parallel
+ * execution through use of explicit fence support.
+ * See I915_EXEC_FENCE_OUT and I915_EXEC_FENCE_SUBMIT.
+ */
+#define I915_PARAM_HAS_EXEC_SUBMIT_FENCE 53
 /* Must be kept compact -- no holes and well documented */
 
 typedef struct drm_i915_getparam {
@@ -1120,7 +1132,16 @@ struct drm_i915_gem_execbuffer2 {
  */
 #define I915_EXEC_FENCE_ARRAY   (1<<19)
 
-#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_ARRAY<<1))
+/*
+ * Setting I915_EXEC_FENCE_SUBMIT implies that lower_32_bits(rsvd2) represent
+ * a sync_file fd to wait upon (in a nonblocking manner) prior to executing
+ * the batch.
+ *
+ * Returns -EINVAL if the sync_file fd cannot be found.
+ */
+#define I915_EXEC_FENCE_SUBMIT		(1 << 20)
+
+#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_SUBMIT << 1))
 
 #define I915_EXEC_CONTEXT_ID_MASK	(0xffffffff)
 #define i915_execbuffer2_set_context_id(eb2, context) \
@@ -1464,8 +1485,9 @@ struct drm_i915_gem_context_create_ext {
 	__u32 ctx_id; /* output: id of new context*/
 	__u32 flags;
 #define I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS	(1u << 0)
+#define I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE	(1u << 1)
 #define I915_CONTEXT_CREATE_FLAGS_UNKNOWN \
-	(-(I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS << 1))
+	(-(I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE << 1))
 	__u64 extensions;
 };
 
@@ -1507,6 +1529,41 @@ struct drm_i915_gem_context_param {
  * On creation, all new contexts are marked as recoverable.
  */
 #define I915_CONTEXT_PARAM_RECOVERABLE	0x8
+
+	/*
+	 * The id of the associated virtual memory address space (ppGTT) of
+	 * this context. Can be retrieved and passed to another context
+	 * (on the same fd) for both to use the same ppGTT and so share
+	 * address layouts, and avoid reloading the page tables on context
+	 * switches between themselves.
+	 *
+	 * See DRM_I915_GEM_VM_CREATE and DRM_I915_GEM_VM_DESTROY.
+	 */
+#define I915_CONTEXT_PARAM_VM		0x9
+
+/*
+ * I915_CONTEXT_PARAM_ENGINES:
+ *
+ * Bind this context to operate on this subset of available engines. Henceforth,
+ * the I915_EXEC_RING selector for DRM_IOCTL_I915_GEM_EXECBUFFER2 operates as
+ * an index into this array of engines; I915_EXEC_DEFAULT selecting engine[0]
+ * and upwards. Slots 0...N are filled in using the specified (class, instance).
+ * Use
+ *	engine_class: I915_ENGINE_CLASS_INVALID,
+ *	engine_instance: I915_ENGINE_CLASS_INVALID_NONE
+ * to specify a gap in the array that can be filled in later, e.g. by a
+ * virtual engine used for load balancing.
+ *
+ * Setting the number of engines bound to the context to 0, by passing a zero
+ * sized argument, will revert back to default settings.
+ *
+ * See struct i915_context_param_engines.
+ *
+ * Extensions:
+ *   i915_context_engines_load_balance (I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE)
+ *   i915_context_engines_bond (I915_CONTEXT_ENGINES_EXT_BOND)
+ */
+#define I915_CONTEXT_PARAM_ENGINES	0xa
 /* Must be kept compact -- no holes and well documented */
 
 	__u64 value;
@@ -1540,9 +1597,10 @@ struct drm_i915_gem_context_param_sseu {
 	struct i915_engine_class_instance engine;
 
 	/*
-	 * Unused for now. Must be cleared to zero.
+	 * Unknown flags must be cleared to zero.
 	 */
 	__u32 flags;
+#define I915_CONTEXT_SSEU_FLAG_ENGINE_INDEX (1u << 0)
 
 	/*
 	 * Mask of slices to enable for the context. Valid values are a subset
@@ -1570,12 +1628,115 @@ struct drm_i915_gem_context_param_sseu {
 	__u32 rsvd;
 };
 
+/*
+ * i915_context_engines_load_balance:
+ *
+ * Enable load balancing across this set of engines.
+ *
+ * Into the I915_EXEC_DEFAULT slot [0], a virtual engine is created that when
+ * used will proxy the execbuffer request onto one of the set of engines
+ * in such a way as to distribute the load evenly across the set.
+ *
+ * The set of engines must be compatible (e.g. the same HW class) as they
+ * will share the same logical GPU context and ring.
+ *
+ * To intermix rendering with the virtual engine and direct rendering onto
+ * the backing engines (bypassing the load balancing proxy), the context must
+ * be defined to use a single timeline for all engines.
+ */
+struct i915_context_engines_load_balance {
+	struct i915_user_extension base;
+
+	__u16 engine_index;
+	__u16 num_siblings;
+	__u32 flags; /* all undefined flags must be zero */
+
+	__u64 mbz64; /* reserved for future use; must be zero */
+
+	struct i915_engine_class_instance engines[0];
+} __attribute__((packed));
+
+#define I915_DEFINE_CONTEXT_ENGINES_LOAD_BALANCE(name__, N__) struct { \
+	struct i915_user_extension base; \
+	__u16 engine_index; \
+	__u16 num_siblings; \
+	__u32 flags; \
+	__u64 mbz64; \
+	struct i915_engine_class_instance engines[N__]; \
+} __attribute__((packed)) name__
+
+/*
+ * i915_context_engines_bond:
+ *
+ * Constructed bonded pairs for execution within a virtual engine.
+ *
+ * All engines are equal, but some are more equal than others. Given
+ * the distribution of resources in the HW, it may be preferable to run
+ * a request on a given subset of engines in parallel to a request on a
+ * specific engine. We enable this selection of engines within a virtual
+ * engine by specifying bonding pairs, for any given master engine we will
+ * only execute on one of the corresponding siblings within the virtual engine.
+ *
+ * To execute a request in parallel on the master engine and a sibling requires
+ * coordination with a I915_EXEC_FENCE_SUBMIT.
+ */
+struct i915_context_engines_bond {
+	struct i915_user_extension base;
+
+	struct i915_engine_class_instance master;
+
+	__u16 virtual_index; /* index of virtual engine in ctx->engines[] */
+	__u16 num_bonds;
+
+	__u64 flags; /* all undefined flags must be zero */
+	__u64 mbz64[4]; /* reserved for future use; must be zero */
+
+	struct i915_engine_class_instance engines[0];
+} __attribute__((packed));
+
+#define I915_DEFINE_CONTEXT_ENGINES_BOND(name__, N__) struct { \
+	struct i915_user_extension base; \
+	struct i915_engine_class_instance master; \
+	__u16 virtual_index; \
+	__u16 num_bonds; \
+	__u64 flags; \
+	__u64 mbz64[4]; \
+	struct i915_engine_class_instance engines[N__]; \
+} __attribute__((packed)) name__
+
+struct i915_context_param_engines {
+	__u64 extensions; /* linked chain of extension blocks, 0 terminates */
+#define I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE 0 /* see i915_context_engines_load_balance */
+#define I915_CONTEXT_ENGINES_EXT_BOND 1 /* see i915_context_engines_bond */
+	struct i915_engine_class_instance engines[0];
+} __attribute__((packed));
+
+#define I915_DEFINE_CONTEXT_PARAM_ENGINES(name__, N__) struct { \
+	__u64 extensions; \
+	struct i915_engine_class_instance engines[N__]; \
+} __attribute__((packed)) name__
+
 struct drm_i915_gem_context_create_ext_setparam {
 #define I915_CONTEXT_CREATE_EXT_SETPARAM 0
 	struct i915_user_extension base;
 	struct drm_i915_gem_context_param param;
 };
 
+struct drm_i915_gem_context_create_ext_clone {
+#define I915_CONTEXT_CREATE_EXT_CLONE 1
+	struct i915_user_extension base;
+	__u32 clone_id;
+	__u32 flags;
+#define I915_CONTEXT_CLONE_ENGINES	(1u << 0)
+#define I915_CONTEXT_CLONE_FLAGS	(1u << 1)
+#define I915_CONTEXT_CLONE_SCHEDATTR	(1u << 2)
+#define I915_CONTEXT_CLONE_SSEU		(1u << 3)
+#define I915_CONTEXT_CLONE_TIMELINE	(1u << 4)
+#define I915_CONTEXT_CLONE_VM		(1u << 5)
+#define I915_CONTEXT_CLONE_UNKNOWN -(I915_CONTEXT_CLONE_VM << 1)
+	__u64 rsvd;
+};
+
 struct drm_i915_gem_context_destroy {
 	__u32 ctx_id;
 	__u32 pad;
@@ -1821,6 +1982,7 @@ struct drm_i915_perf_oa_config {
 struct drm_i915_query_item {
 	__u64 query_id;
 #define DRM_I915_QUERY_TOPOLOGY_INFO    1
+#define DRM_I915_QUERY_ENGINE_INFO	2
 /* Must be kept compact -- no holes and well documented */
 
 	/*
@@ -1919,6 +2081,47 @@ struct drm_i915_query_topology_info {
 	__u8 data[];
 };
 
+/**
+ * struct drm_i915_engine_info
+ *
+ * Describes one engine and it's capabilities as known to the driver.
+ */
+struct drm_i915_engine_info {
+	/** Engine class and instance. */
+	struct i915_engine_class_instance engine;
+
+	/** Reserved field. */
+	__u32 rsvd0;
+
+	/** Engine flags. */
+	__u64 flags;
+
+	/** Capabilities of this engine. */
+	__u64 capabilities;
+#define I915_VIDEO_CLASS_CAPABILITY_HEVC		(1 << 0)
+#define I915_VIDEO_AND_ENHANCE_CLASS_CAPABILITY_SFC	(1 << 1)
+
+	/** Reserved fields. */
+	__u64 rsvd1[4];
+};
+
+/**
+ * struct drm_i915_query_engine_info
+ *
+ * Engine info query enumerates all engines known to the driver by filling in
+ * an array of struct drm_i915_engine_info structures.
+ */
+struct drm_i915_query_engine_info {
+	/** Number of struct drm_i915_engine_info structs following. */
+	__u32 num_engines;
+
+	/** MBZ */
+	__u32 rsvd[3];
+
+	/** Marker for drm_i915_engine_info structures. */
+	struct drm_i915_engine_info engines[];
+};
+
 #if defined(__cplusplus)
 }
 #endif
diff --git a/include/uapi/drm/panfrost_drm.h b/include/uapi/drm/panfrost_drm.h
index a52e0283b90d..b5d370638846 100644
--- a/include/uapi/drm/panfrost_drm.h
+++ b/include/uapi/drm/panfrost_drm.h
@@ -18,6 +18,8 @@ extern "C" {
 #define DRM_PANFROST_MMAP_BO			0x03
 #define DRM_PANFROST_GET_PARAM			0x04
 #define DRM_PANFROST_GET_BO_OFFSET		0x05
+#define DRM_PANFROST_PERFCNT_ENABLE		0x06
+#define DRM_PANFROST_PERFCNT_DUMP		0x07
 
 #define DRM_IOCTL_PANFROST_SUBMIT		DRM_IOW(DRM_COMMAND_BASE + DRM_PANFROST_SUBMIT, struct drm_panfrost_submit)
 #define DRM_IOCTL_PANFROST_WAIT_BO		DRM_IOW(DRM_COMMAND_BASE + DRM_PANFROST_WAIT_BO, struct drm_panfrost_wait_bo)
@@ -26,6 +28,15 @@ extern "C" {
 #define DRM_IOCTL_PANFROST_GET_PARAM		DRM_IOWR(DRM_COMMAND_BASE + DRM_PANFROST_GET_PARAM, struct drm_panfrost_get_param)
 #define DRM_IOCTL_PANFROST_GET_BO_OFFSET	DRM_IOWR(DRM_COMMAND_BASE + DRM_PANFROST_GET_BO_OFFSET, struct drm_panfrost_get_bo_offset)
 
+/*
+ * Unstable ioctl(s): only exposed when the unsafe unstable_ioctls module
+ * param is set to true.
+ * All these ioctl(s) are subject to deprecation, so please don't rely on
+ * them for anything but debugging purpose.
+ */
+#define DRM_IOCTL_PANFROST_PERFCNT_ENABLE	DRM_IOW(DRM_COMMAND_BASE + DRM_PANFROST_PERFCNT_ENABLE, struct drm_panfrost_perfcnt_enable)
+#define DRM_IOCTL_PANFROST_PERFCNT_DUMP		DRM_IOW(DRM_COMMAND_BASE + DRM_PANFROST_PERFCNT_DUMP, struct drm_panfrost_perfcnt_dump)
+
 #define PANFROST_JD_REQ_FS (1 << 0)
 /**
  * struct drm_panfrost_submit - ioctl argument for submitting commands to the 3D
@@ -135,6 +146,19 @@ struct drm_panfrost_get_bo_offset {
 	__u64 offset;
 };
 
+struct drm_panfrost_perfcnt_enable {
+	__u32 enable;
+	/*
+	 * On bifrost we have 2 sets of counters, this parameter defines the
+	 * one to track.
+	 */
+	__u32 counterset;
+};
+
+struct drm_panfrost_perfcnt_dump {
+	__u64 buf_ptr;
+};
+
 #if defined(__cplusplus)
 }
 #endif
diff --git a/include/uapi/drm/v3d_drm.h b/include/uapi/drm/v3d_drm.h
index ea70669d2138..58fbe48c91e9 100644
--- a/include/uapi/drm/v3d_drm.h
+++ b/include/uapi/drm/v3d_drm.h
@@ -37,6 +37,7 @@ extern "C" {
 #define DRM_V3D_GET_PARAM                         0x04
 #define DRM_V3D_GET_BO_OFFSET                     0x05
 #define DRM_V3D_SUBMIT_TFU                        0x06
+#define DRM_V3D_SUBMIT_CSD                        0x07
 
 #define DRM_IOCTL_V3D_SUBMIT_CL           DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_SUBMIT_CL, struct drm_v3d_submit_cl)
 #define DRM_IOCTL_V3D_WAIT_BO             DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_WAIT_BO, struct drm_v3d_wait_bo)
@@ -45,6 +46,7 @@ extern "C" {
 #define DRM_IOCTL_V3D_GET_PARAM           DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_GET_PARAM, struct drm_v3d_get_param)
 #define DRM_IOCTL_V3D_GET_BO_OFFSET       DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_GET_BO_OFFSET, struct drm_v3d_get_bo_offset)
 #define DRM_IOCTL_V3D_SUBMIT_TFU          DRM_IOW(DRM_COMMAND_BASE + DRM_V3D_SUBMIT_TFU, struct drm_v3d_submit_tfu)
+#define DRM_IOCTL_V3D_SUBMIT_CSD          DRM_IOW(DRM_COMMAND_BASE + DRM_V3D_SUBMIT_CSD, struct drm_v3d_submit_csd)
 
 /**
  * struct drm_v3d_submit_cl - ioctl argument for submitting commands to the 3D
@@ -190,6 +192,7 @@ enum drm_v3d_param {
 	DRM_V3D_PARAM_V3D_CORE0_IDENT1,
 	DRM_V3D_PARAM_V3D_CORE0_IDENT2,
 	DRM_V3D_PARAM_SUPPORTS_TFU,
+	DRM_V3D_PARAM_SUPPORTS_CSD,
 };
 
 struct drm_v3d_get_param {
@@ -230,6 +233,31 @@ struct drm_v3d_submit_tfu {
 	__u32 out_sync;
 };
 
+/* Submits a compute shader for dispatch.  This job will block on any
+ * previous compute shaders submitted on this fd, and any other
+ * synchronization must be performed with in_sync/out_sync.
+ */
+struct drm_v3d_submit_csd {
+	__u32 cfg[7];
+	__u32 coef[4];
+
+	/* Pointer to a u32 array of the BOs that are referenced by the job.
+	 */
+	__u64 bo_handles;
+
+	/* Number of BO handles passed in (size is that times 4). */
+	__u32 bo_handle_count;
+
+	/* sync object to block on before running the CSD job.  Each
+	 * CSD job will execute in the order submitted to its FD.
+	 * Synchronization against rendering/TFU jobs or CSD from
+	 * other fds requires using sync objects.
+	 */
+	__u32 in_sync;
+	/* Sync object to signal when the CSD job is done. */
+	__u32 out_sync;
+};
+
 #if defined(__cplusplus)
 }
 #endif
diff --git a/include/uapi/drm/vmwgfx_drm.h b/include/uapi/drm/vmwgfx_drm.h
index 399f58317cff..02cab33f2f25 100644
--- a/include/uapi/drm/vmwgfx_drm.h
+++ b/include/uapi/drm/vmwgfx_drm.h
@@ -891,11 +891,13 @@ struct drm_vmw_shader_arg {
  *                                      surface.
  * @drm_vmw_surface_flag_create_buffer: Create a backup buffer if none is
  *                                      given.
+ * @drm_vmw_surface_flag_coherent:      Back surface with coherent memory.
  */
 enum drm_vmw_surface_flags {
 	drm_vmw_surface_flag_shareable = (1 << 0),
 	drm_vmw_surface_flag_scanout = (1 << 1),
-	drm_vmw_surface_flag_create_buffer = (1 << 2)
+	drm_vmw_surface_flag_create_buffer = (1 << 2),
+	drm_vmw_surface_flag_coherent = (1 << 3),
 };
 
 /**
diff --git a/include/uapi/linux/dma-buf.h b/include/uapi/linux/dma-buf.h
index d75df5210a4a..dbc7092e04b5 100644
--- a/include/uapi/linux/dma-buf.h
+++ b/include/uapi/linux/dma-buf.h
@@ -35,7 +35,10 @@ struct dma_buf_sync {
 #define DMA_BUF_SYNC_VALID_FLAGS_MASK \
 	(DMA_BUF_SYNC_RW | DMA_BUF_SYNC_END)
 
+#define DMA_BUF_NAME_LEN	32
+
 #define DMA_BUF_BASE		'b'
 #define DMA_BUF_IOCTL_SYNC	_IOW(DMA_BUF_BASE, 0, struct dma_buf_sync)
+#define DMA_BUF_SET_NAME	_IOW(DMA_BUF_BASE, 1, const char *)
 
 #endif
diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
index dc067ed0b72d..070d1bc7e725 100644
--- a/include/uapi/linux/kfd_ioctl.h
+++ b/include/uapi/linux/kfd_ioctl.h
@@ -35,9 +35,10 @@ struct kfd_ioctl_get_version_args {
 };
 
 /* For kfd_ioctl_create_queue_args.queue_type. */
-#define KFD_IOC_QUEUE_TYPE_COMPUTE	0
-#define KFD_IOC_QUEUE_TYPE_SDMA		1
-#define KFD_IOC_QUEUE_TYPE_COMPUTE_AQL	2
+#define KFD_IOC_QUEUE_TYPE_COMPUTE		0x0
+#define KFD_IOC_QUEUE_TYPE_SDMA			0x1
+#define KFD_IOC_QUEUE_TYPE_COMPUTE_AQL		0x2
+#define KFD_IOC_QUEUE_TYPE_SDMA_XGMI		0x3
 
 #define KFD_MAX_QUEUE_PERCENTAGE	100
 #define KFD_MAX_QUEUE_PRIORITY		15
@@ -338,6 +339,7 @@ struct kfd_ioctl_acquire_vm_args {
 #define KFD_IOC_ALLOC_MEM_FLAGS_GTT		(1 << 1)
 #define KFD_IOC_ALLOC_MEM_FLAGS_USERPTR		(1 << 2)
 #define KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL	(1 << 3)
+#define KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP	(1 << 4)
 /* Allocation flags: attributes/access options */
 #define KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE	(1 << 31)
 #define KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE	(1 << 30)
@@ -408,6 +410,21 @@ struct kfd_ioctl_unmap_memory_from_gpu_args {
 	__u32 n_success;		/* to/from KFD */
 };
 
+/* Allocate GWS for specific queue
+ *
+ * @gpu_id:      device identifier
+ * @queue_id:    queue's id that GWS is allocated for
+ * @num_gws:     how many GWS to allocate
+ * @first_gws:   index of the first GWS allocated.
+ *               only support contiguous GWS allocation
+ */
+struct kfd_ioctl_alloc_queue_gws_args {
+	__u32 gpu_id;		/* to KFD */
+	__u32 queue_id;		/* to KFD */
+	__u32 num_gws;		/* to KFD */
+	__u32 first_gws;	/* from KFD */
+};
+
 struct kfd_ioctl_get_dmabuf_info_args {
 	__u64 size;		/* from KFD */
 	__u64 metadata_ptr;	/* to KFD */
@@ -426,6 +443,13 @@ struct kfd_ioctl_import_dmabuf_args {
 	__u32 dmabuf_fd;	/* to KFD */
 };
 
+/* Register offset inside the remapped mmio page
+ */
+enum kfd_mmio_remap {
+	KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL = 0,
+	KFD_MMIO_REMAP_HDP_REG_FLUSH_CNTL = 4,
+};
+
 #define AMDKFD_IOCTL_BASE 'K'
 #define AMDKFD_IO(nr)			_IO(AMDKFD_IOCTL_BASE, nr)
 #define AMDKFD_IOR(nr, type)		_IOR(AMDKFD_IOCTL_BASE, nr, type)
@@ -520,7 +544,10 @@ struct kfd_ioctl_import_dmabuf_args {
 #define AMDKFD_IOC_IMPORT_DMABUF		\
 		AMDKFD_IOWR(0x1D, struct kfd_ioctl_import_dmabuf_args)
 
+#define AMDKFD_IOC_ALLOC_QUEUE_GWS		\
+		AMDKFD_IOWR(0x1E, struct kfd_ioctl_alloc_queue_gws_args)
+
 #define AMDKFD_COMMAND_START		0x01
-#define AMDKFD_COMMAND_END		0x1E
+#define AMDKFD_COMMAND_END		0x1F
 
 #endif
diff --git a/include/uapi/linux/magic.h b/include/uapi/linux/magic.h
index f8c00045d537..665e18627f78 100644
--- a/include/uapi/linux/magic.h
+++ b/include/uapi/linux/magic.h
@@ -91,5 +91,6 @@
 #define UDF_SUPER_MAGIC		0x15013346
 #define BALLOON_KVM_MAGIC	0x13661366
 #define ZSMALLOC_MAGIC		0x58295829
+#define DMA_BUF_MAGIC		0x444d4142	/* "DMAB" */
 
 #endif /* __LINUX_MAGIC_H__ */
diff --git a/include/uapi/sound/sof/abi.h b/include/uapi/sound/sof/abi.h
index 37e0a90dc9e6..0868eb47acf7 100644
--- a/include/uapi/sound/sof/abi.h
+++ b/include/uapi/sound/sof/abi.h
@@ -26,7 +26,7 @@
 
 /* SOF ABI version major, minor and patch numbers */
 #define SOF_ABI_MAJOR 3
-#define SOF_ABI_MINOR 4
+#define SOF_ABI_MINOR 6
 #define SOF_ABI_PATCH 0
 
 /* SOF ABI version number. Format within 32bit word is MMmmmppp */
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 155048b0eca2..bf9dbffd46b1 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -215,7 +215,8 @@ static struct cftype cgroup_base_files[];
 
 static int cgroup_apply_control(struct cgroup *cgrp);
 static void cgroup_finalize_control(struct cgroup *cgrp, int ret);
-static void css_task_iter_advance(struct css_task_iter *it);
+static void css_task_iter_skip(struct css_task_iter *it,
+			       struct task_struct *task);
 static int cgroup_destroy_locked(struct cgroup *cgrp);
 static struct cgroup_subsys_state *css_create(struct cgroup *cgrp,
 					      struct cgroup_subsys *ss);
@@ -738,6 +739,7 @@ struct css_set init_css_set = {
 	.dom_cset		= &init_css_set,
 	.tasks			= LIST_HEAD_INIT(init_css_set.tasks),
 	.mg_tasks		= LIST_HEAD_INIT(init_css_set.mg_tasks),
+	.dying_tasks		= LIST_HEAD_INIT(init_css_set.dying_tasks),
 	.task_iters		= LIST_HEAD_INIT(init_css_set.task_iters),
 	.threaded_csets		= LIST_HEAD_INIT(init_css_set.threaded_csets),
 	.cgrp_links		= LIST_HEAD_INIT(init_css_set.cgrp_links),
@@ -843,6 +845,21 @@ static void css_set_update_populated(struct css_set *cset, bool populated)
 		cgroup_update_populated(link->cgrp, populated);
 }
 
+/*
+ * @task is leaving, advance task iterators which are pointing to it so
+ * that they can resume at the next position.  Advancing an iterator might
+ * remove it from the list, use safe walk.  See css_task_iter_skip() for
+ * details.
+ */
+static void css_set_skip_task_iters(struct css_set *cset,
+				    struct task_struct *task)
+{
+	struct css_task_iter *it, *pos;
+
+	list_for_each_entry_safe(it, pos, &cset->task_iters, iters_node)
+		css_task_iter_skip(it, task);
+}
+
 /**
  * css_set_move_task - move a task from one css_set to another
  * @task: task being moved
@@ -868,22 +885,9 @@ static void css_set_move_task(struct task_struct *task,
 		css_set_update_populated(to_cset, true);
 
 	if (from_cset) {
-		struct css_task_iter *it, *pos;
-
 		WARN_ON_ONCE(list_empty(&task->cg_list));
 
-		/*
-		 * @task is leaving, advance task iterators which are
-		 * pointing to it so that they can resume at the next
-		 * position.  Advancing an iterator might remove it from
-		 * the list, use safe walk.  See css_task_iter_advance*()
-		 * for details.
-		 */
-		list_for_each_entry_safe(it, pos, &from_cset->task_iters,
-					 iters_node)
-			if (it->task_pos == &task->cg_list)
-				css_task_iter_advance(it);
-
+		css_set_skip_task_iters(from_cset, task);
 		list_del_init(&task->cg_list);
 		if (!css_set_populated(from_cset))
 			css_set_update_populated(from_cset, false);
@@ -1210,6 +1214,7 @@ static struct css_set *find_css_set(struct css_set *old_cset,
 	cset->dom_cset = cset;
 	INIT_LIST_HEAD(&cset->tasks);
 	INIT_LIST_HEAD(&cset->mg_tasks);
+	INIT_LIST_HEAD(&cset->dying_tasks);
 	INIT_LIST_HEAD(&cset->task_iters);
 	INIT_LIST_HEAD(&cset->threaded_csets);
 	INIT_HLIST_NODE(&cset->hlist);
@@ -1460,8 +1465,8 @@ struct cgroup *task_cgroup_from_root(struct task_struct *task,
 
 static struct kernfs_syscall_ops cgroup_kf_syscall_ops;
 
-static char *cgroup_fill_name(struct cgroup *cgrp, const struct cftype *cft,
-			      char *buf, bool write_link_name)
+static char *cgroup_file_name(struct cgroup *cgrp, const struct cftype *cft,
+			      char *buf)
 {
 	struct cgroup_subsys *ss = cft->ss;
 
@@ -1471,26 +1476,13 @@ static char *cgroup_fill_name(struct cgroup *cgrp, const struct cftype *cft,
 
 		snprintf(buf, CGROUP_FILE_NAME_MAX, "%s%s.%s",
 			 dbg, cgroup_on_dfl(cgrp) ? ss->name : ss->legacy_name,
-			 write_link_name ? cft->link_name : cft->name);
+			 cft->name);
 	} else {
-		strscpy(buf, write_link_name ? cft->link_name : cft->name,
-			CGROUP_FILE_NAME_MAX);
+		strscpy(buf, cft->name, CGROUP_FILE_NAME_MAX);
 	}
 	return buf;
 }
 
-static char *cgroup_file_name(struct cgroup *cgrp, const struct cftype *cft,
-			      char *buf)
-{
-	return cgroup_fill_name(cgrp, cft, buf, false);
-}
-
-static char *cgroup_link_name(struct cgroup *cgrp, const struct cftype *cft,
-			      char *buf)
-{
-	return cgroup_fill_name(cgrp, cft, buf, true);
-}
-
 /**
  * cgroup_file_mode - deduce file mode of a control file
  * @cft: the control file in question
@@ -1649,9 +1641,6 @@ static void cgroup_rm_file(struct cgroup *cgrp, const struct cftype *cft)
 	}
 
 	kernfs_remove_by_name(cgrp->kn, cgroup_file_name(cgrp, cft, name));
-	if (cft->flags & CFTYPE_SYMLINKED)
-		kernfs_remove_by_name(cgrp->kn,
-				      cgroup_link_name(cgrp, cft, name));
 }
 
 /**
@@ -3837,7 +3826,6 @@ static int cgroup_add_file(struct cgroup_subsys_state *css, struct cgroup *cgrp,
 {
 	char name[CGROUP_FILE_NAME_MAX];
 	struct kernfs_node *kn;
-	struct kernfs_node *kn_link;
 	struct lock_class_key *key = NULL;
 	int ret;
 
@@ -3868,14 +3856,6 @@ static int cgroup_add_file(struct cgroup_subsys_state *css, struct cgroup *cgrp,
 		spin_unlock_irq(&cgroup_file_kn_lock);
 	}
 
-	if (cft->flags & CFTYPE_SYMLINKED) {
-		kn_link = kernfs_create_link(cgrp->kn,
-					     cgroup_link_name(cgrp, cft, name),
-					     kn);
-		if (IS_ERR(kn_link))
-			return PTR_ERR(kn_link);
-	}
-
 	return 0;
 }
 
@@ -4433,15 +4413,18 @@ static void css_task_iter_advance_css_set(struct css_task_iter *it)
 			it->task_pos = NULL;
 			return;
 		}
-	} while (!css_set_populated(cset));
+	} while (!css_set_populated(cset) && list_empty(&cset->dying_tasks));
 
 	if (!list_empty(&cset->tasks))
 		it->task_pos = cset->tasks.next;
-	else
+	else if (!list_empty(&cset->mg_tasks))
 		it->task_pos = cset->mg_tasks.next;
+	else
+		it->task_pos = cset->dying_tasks.next;
 
 	it->tasks_head = &cset->tasks;
 	it->mg_tasks_head = &cset->mg_tasks;
+	it->dying_tasks_head = &cset->dying_tasks;
 
 	/*
 	 * We don't keep css_sets locked across iteration steps and thus
@@ -4467,9 +4450,20 @@ static void css_task_iter_advance_css_set(struct css_task_iter *it)
 	list_add(&it->iters_node, &cset->task_iters);
 }
 
+static void css_task_iter_skip(struct css_task_iter *it,
+			       struct task_struct *task)
+{
+	lockdep_assert_held(&css_set_lock);
+
+	if (it->task_pos == &task->cg_list) {
+		it->task_pos = it->task_pos->next;
+		it->flags |= CSS_TASK_ITER_SKIPPED;
+	}
+}
+
 static void css_task_iter_advance(struct css_task_iter *it)
 {
-	struct list_head *next;
+	struct task_struct *task;
 
 	lockdep_assert_held(&css_set_lock);
 repeat:
@@ -4479,25 +4473,40 @@ repeat:
 		 * consumed first and then ->mg_tasks.  After ->mg_tasks,
 		 * we move onto the next cset.
 		 */
-		next = it->task_pos->next;
-
-		if (next == it->tasks_head)
-			next = it->mg_tasks_head->next;
+		if (it->flags & CSS_TASK_ITER_SKIPPED)
+			it->flags &= ~CSS_TASK_ITER_SKIPPED;
+		else
+			it->task_pos = it->task_pos->next;
 
-		if (next == it->mg_tasks_head)
+		if (it->task_pos == it->tasks_head)
+			it->task_pos = it->mg_tasks_head->next;
+		if (it->task_pos == it->mg_tasks_head)
+			it->task_pos = it->dying_tasks_head->next;
+		if (it->task_pos == it->dying_tasks_head)
 			css_task_iter_advance_css_set(it);
-		else
-			it->task_pos = next;
 	} else {
 		/* called from start, proceed to the first cset */
 		css_task_iter_advance_css_set(it);
 	}
 
-	/* if PROCS, skip over tasks which aren't group leaders */
-	if ((it->flags & CSS_TASK_ITER_PROCS) && it->task_pos &&
-	    !thread_group_leader(list_entry(it->task_pos, struct task_struct,
-					    cg_list)))
-		goto repeat;
+	if (!it->task_pos)
+		return;
+
+	task = list_entry(it->task_pos, struct task_struct, cg_list);
+
+	if (it->flags & CSS_TASK_ITER_PROCS) {
+		/* if PROCS, skip over tasks which aren't group leaders */
+		if (!thread_group_leader(task))
+			goto repeat;
+
+		/* and dying leaders w/o live member threads */
+		if (!atomic_read(&task->signal->live))
+			goto repeat;
+	} else {
+		/* skip all dying ones */
+		if (task->flags & PF_EXITING)
+			goto repeat;
+	}
 }
 
 /**
@@ -4553,6 +4562,10 @@ struct task_struct *css_task_iter_next(struct css_task_iter *it)
 
 	spin_lock_irq(&css_set_lock);
 
+	/* @it may be half-advanced by skips, finish advancing */
+	if (it->flags & CSS_TASK_ITER_SKIPPED)
+		css_task_iter_advance(it);
+
 	if (it->task_pos) {
 		it->cur_task = list_entry(it->task_pos, struct task_struct,
 					  cg_list);
@@ -6034,6 +6047,7 @@ void cgroup_exit(struct task_struct *tsk)
 	if (!list_empty(&tsk->cg_list)) {
 		spin_lock_irq(&css_set_lock);
 		css_set_move_task(tsk, cset, NULL, false);
+		list_add_tail(&tsk->cg_list, &cset->dying_tasks);
 		cset->nr_tasks--;
 
 		WARN_ON_ONCE(cgroup_task_frozen(tsk));
@@ -6059,6 +6073,13 @@ void cgroup_release(struct task_struct *task)
 	do_each_subsys_mask(ss, ssid, have_release_callback) {
 		ss->release(task);
 	} while_each_subsys_mask();
+
+	if (use_task_css_set_links) {
+		spin_lock_irq(&css_set_lock);
+		css_set_skip_task_iters(task_css_set(task), task);
+		list_del_init(&task->cg_list);
+		spin_unlock_irq(&css_set_lock);
+	}
 }
 
 void cgroup_free(struct task_struct *task)
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index 6a1942ed781c..515525ff1cfd 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -3254,10 +3254,23 @@ void cpuset_cpus_allowed(struct task_struct *tsk, struct cpumask *pmask)
 	spin_unlock_irqrestore(&callback_lock, flags);
 }
 
+/**
+ * cpuset_cpus_allowed_fallback - final fallback before complete catastrophe.
+ * @tsk: pointer to task_struct with which the scheduler is struggling
+ *
+ * Description: In the case that the scheduler cannot find an allowed cpu in
+ * tsk->cpus_allowed, we fall back to task_cs(tsk)->cpus_allowed. In legacy
+ * mode however, this value is the same as task_cs(tsk)->effective_cpus,
+ * which will not contain a sane cpumask during cases such as cpu hotplugging.
+ * This is the absolute last resort for the scheduler and it is only used if
+ * _every_ other avenue has been traveled.
+ **/
+
 void cpuset_cpus_allowed_fallback(struct task_struct *tsk)
 {
 	rcu_read_lock();
-	do_set_cpus_allowed(tsk, task_cs(tsk)->effective_cpus);
+	do_set_cpus_allowed(tsk, is_in_v2_mode() ?
+		task_cs(tsk)->cpus_allowed : cpu_possible_mask);
 	rcu_read_unlock();
 
 	/*
diff --git a/kernel/cred.c b/kernel/cred.c
index e74ffdc98a92..c73a87a4df13 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -446,6 +446,15 @@ int commit_creds(struct cred *new)
 		if (task->mm)
 			set_dumpable(task->mm, suid_dumpable);
 		task->pdeath_signal = 0;
+		/*
+		 * If a task drops privileges and becomes nondumpable,
+		 * the dumpability change must become visible before
+		 * the credential change; otherwise, a __ptrace_may_access()
+		 * racing with this change may be able to attach to a task it
+		 * shouldn't be able to attach to (as if the task had dropped
+		 * privileges without becoming nondumpable).
+		 * Pairs with a read barrier in __ptrace_may_access().
+		 */
 		smp_wmb();
 	}
 
diff --git a/kernel/exit.c b/kernel/exit.c
index 1803efb2922f..a75b6a7f458a 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -195,6 +195,7 @@ repeat:
 	rcu_read_unlock();
 
 	proc_flush_task(p);
+	cgroup_release(p);
 
 	write_lock_irq(&tasklist_lock);
 	ptrace_release_task(p);
@@ -220,7 +221,6 @@ repeat:
 	}
 
 	write_unlock_irq(&tasklist_lock);
-	cgroup_release(p);
 	release_thread(p);
 	call_rcu(&p->rcu, delayed_put_task_struct);
 
diff --git a/kernel/livepatch/core.c b/kernel/livepatch/core.c
index 2398832947c6..c4ce08f43bd6 100644
--- a/kernel/livepatch/core.c
+++ b/kernel/livepatch/core.c
@@ -18,6 +18,7 @@
 #include <linux/elf.h>
 #include <linux/moduleloader.h>
 #include <linux/completion.h>
+#include <linux/memory.h>
 #include <asm/cacheflush.h>
 #include "core.h"
 #include "patch.h"
@@ -718,16 +719,21 @@ static int klp_init_object_loaded(struct klp_patch *patch,
 	struct klp_func *func;
 	int ret;
 
+	mutex_lock(&text_mutex);
+
 	module_disable_ro(patch->mod);
 	ret = klp_write_object_relocations(patch->mod, obj);
 	if (ret) {
 		module_enable_ro(patch->mod, true);
+		mutex_unlock(&text_mutex);
 		return ret;
 	}
 
 	arch_klp_init_object_loaded(patch, obj);
 	module_enable_ro(patch->mod, true);
 
+	mutex_unlock(&text_mutex);
+
 	klp_for_each_func(obj, func) {
 		ret = klp_find_object_symbol(obj->name, func->old_name,
 					     func->old_sympos,
diff --git a/kernel/memremap.c b/kernel/memremap.c
index 1490e63f69a9..6e1970719dc2 100644
--- a/kernel/memremap.c
+++ b/kernel/memremap.c
@@ -95,6 +95,7 @@ static void devm_memremap_pages_release(void *data)
 	pgmap->kill(pgmap->ref);
 	for_each_device_pfn(pfn, pgmap)
 		put_page(pfn_to_page(pfn));
+	pgmap->cleanup(pgmap->ref);
 
 	/* pages are dead and unused, undo the arch mapping */
 	align_start = res->start & ~(SECTION_SIZE - 1);
@@ -133,8 +134,8 @@ static void devm_memremap_pages_release(void *data)
  * 2/ The altmap field may optionally be initialized, in which case altmap_valid
  *    must be set to true
  *
- * 3/ pgmap->ref must be 'live' on entry and will be killed at
- *    devm_memremap_pages_release() time, or if this routine fails.
+ * 3/ pgmap->ref must be 'live' on entry and will be killed and reaped
+ *    at devm_memremap_pages_release() time, or if this routine fails.
  *
  * 4/ res is expected to be a host memory range that could feasibly be
  *    treated as a "System RAM" range, i.e. not a device mmio range, but
@@ -156,8 +157,10 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
 	pgprot_t pgprot = PAGE_KERNEL;
 	int error, nid, is_ram;
 
-	if (!pgmap->ref || !pgmap->kill)
+	if (!pgmap->ref || !pgmap->kill || !pgmap->cleanup) {
+		WARN(1, "Missing reference count teardown definition\n");
 		return ERR_PTR(-EINVAL);
+	}
 
 	align_start = res->start & ~(SECTION_SIZE - 1);
 	align_size = ALIGN(res->start + resource_size(res), SECTION_SIZE)
@@ -168,14 +171,16 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
 	if (conflict_pgmap) {
 		dev_WARN(dev, "Conflicting mapping in same section\n");
 		put_dev_pagemap(conflict_pgmap);
-		return ERR_PTR(-ENOMEM);
+		error = -ENOMEM;
+		goto err_array;
 	}
 
 	conflict_pgmap = get_dev_pagemap(PHYS_PFN(align_end), NULL);
 	if (conflict_pgmap) {
 		dev_WARN(dev, "Conflicting mapping in same section\n");
 		put_dev_pagemap(conflict_pgmap);
-		return ERR_PTR(-ENOMEM);
+		error = -ENOMEM;
+		goto err_array;
 	}
 
 	is_ram = region_intersects(align_start, align_size,
@@ -267,10 +272,18 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
 	pgmap_array_delete(res);
  err_array:
 	pgmap->kill(pgmap->ref);
+	pgmap->cleanup(pgmap->ref);
+
 	return ERR_PTR(error);
 }
 EXPORT_SYMBOL_GPL(devm_memremap_pages);
 
+void devm_memunmap_pages(struct device *dev, struct dev_pagemap *pgmap)
+{
+	devm_release_action(dev, devm_memremap_pages_release, pgmap);
+}
+EXPORT_SYMBOL_GPL(devm_memunmap_pages);
+
 unsigned long vmem_altmap_offset(struct vmem_altmap *altmap)
 {
 	/* number of pfns from base where pfn_to_page() is valid */
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 5710d07e67cf..8456b6e2205f 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -324,6 +324,16 @@ static int __ptrace_may_access(struct task_struct *task, unsigned int mode)
 	return -EPERM;
 ok:
 	rcu_read_unlock();
+	/*
+	 * If a task drops privileges and becomes nondumpable (through a syscall
+	 * like setresuid()) while we are trying to access it, we must ensure
+	 * that the dumpability is read after the credentials; otherwise,
+	 * we may be able to attach to a task that we shouldn't be able to
+	 * attach to (as if the task had dropped privileges without becoming
+	 * nondumpable).
+	 * Pairs with a write barrier in commit_creds().
+	 */
+	smp_rmb();
 	mm = task->mm;
 	if (mm &&
 	    ((get_dumpable(mm) != SUID_DUMP_USER) &&
@@ -705,6 +715,10 @@ static int ptrace_peek_siginfo(struct task_struct *child,
 	if (arg.nr < 0)
 		return -EINVAL;
 
+	/* Ensure arg.off fits in an unsigned long */
+	if (arg.off > ULONG_MAX)
+		return 0;
+
 	if (arg.flags & PTRACE_PEEKSIGINFO_SHARED)
 		pending = &child->signal->shared_pending;
 	else
@@ -712,18 +726,20 @@ static int ptrace_peek_siginfo(struct task_struct *child,
 
 	for (i = 0; i < arg.nr; ) {
 		kernel_siginfo_t info;
-		s32 off = arg.off + i;
+		unsigned long off = arg.off + i;
+		bool found = false;
 
 		spin_lock_irq(&child->sighand->siglock);
 		list_for_each_entry(q, &pending->list, list) {
 			if (!off--) {
+				found = true;
 				copy_siginfo(&info, &q->info);
 				break;
 			}
 		}
 		spin_unlock_irq(&child->sighand->siglock);
 
-		if (off >= 0) /* beyond the end of the list */
+		if (!found) /* beyond the end of the list */
 			break;
 
 #ifdef CONFIG_COMPAT
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 85f5912d8f70..44b726bab4bd 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -808,17 +808,18 @@ ktime_t ktime_get_coarse_with_offset(enum tk_offsets offs)
 	struct timekeeper *tk = &tk_core.timekeeper;
 	unsigned int seq;
 	ktime_t base, *offset = offsets[offs];
+	u64 nsecs;
 
 	WARN_ON(timekeeping_suspended);
 
 	do {
 		seq = read_seqcount_begin(&tk_core.seq);
 		base = ktime_add(tk->tkr_mono.base, *offset);
+		nsecs = tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift;
 
 	} while (read_seqcount_retry(&tk_core.seq, seq));
 
-	return base;
-
+	return base + nsecs;
 }
 EXPORT_SYMBOL_GPL(ktime_get_coarse_with_offset);
 
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index a12aff849c04..38277af44f5c 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -34,6 +34,7 @@
 #include <linux/hash.h>
 #include <linux/rcupdate.h>
 #include <linux/kprobes.h>
+#include <linux/memory.h>
 
 #include <trace/events/sched.h>
 
@@ -2610,10 +2611,12 @@ static void ftrace_run_update_code(int command)
 {
 	int ret;
 
+	mutex_lock(&text_mutex);
+
 	ret = ftrace_arch_code_modify_prepare();
 	FTRACE_WARN_ON(ret);
 	if (ret)
-		return;
+		goto out_unlock;
 
 	/*
 	 * By default we use stop_machine() to modify the code.
@@ -2625,6 +2628,9 @@ static void ftrace_run_update_code(int command)
 
 	ret = ftrace_arch_code_modify_post_process();
 	FTRACE_WARN_ON(ret);
+
+out_unlock:
+	mutex_unlock(&text_mutex);
 }
 
 static void ftrace_run_modify_code(struct ftrace_ops *ops, int command,
@@ -2935,14 +2941,13 @@ static int ftrace_update_code(struct module *mod, struct ftrace_page *new_pgs)
 			p = &pg->records[i];
 			p->flags = rec_flags;
 
-#ifndef CC_USING_NOP_MCOUNT
 			/*
 			 * Do the initial record conversion from mcount jump
 			 * to the NOP instructions.
 			 */
-			if (!ftrace_code_disable(mod, p))
+			if (!__is_defined(CC_USING_NOP_MCOUNT) &&
+			    !ftrace_code_disable(mod, p))
 				break;
-#endif
 
 			update_cnt++;
 		}
@@ -4221,10 +4226,13 @@ void free_ftrace_func_mapper(struct ftrace_func_mapper *mapper,
 	struct ftrace_func_entry *entry;
 	struct ftrace_func_map *map;
 	struct hlist_head *hhd;
-	int size = 1 << mapper->hash.size_bits;
-	int i;
+	int size, i;
+
+	if (!mapper)
+		return;
 
 	if (free_func && mapper->hash.count) {
+		size = 1 << mapper->hash.size_bits;
 		for (i = 0; i < size; i++) {
 			hhd = &mapper->hash.buckets[i];
 			hlist_for_each_entry(entry, hhd, hlist) {
@@ -5776,6 +5784,7 @@ void ftrace_module_enable(struct module *mod)
 	struct ftrace_page *pg;
 
 	mutex_lock(&ftrace_lock);
+	mutex_lock(&text_mutex);
 
 	if (ftrace_disabled)
 		goto out_unlock;
@@ -5837,6 +5846,7 @@ void ftrace_module_enable(struct module *mod)
 		ftrace_arch_code_modify_post_process();
 
  out_unlock:
+	mutex_unlock(&text_mutex);
 	mutex_unlock(&ftrace_lock);
 
 	process_cached_mods(mod->name);
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 1c80521fd436..83e08b78dbee 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -6923,7 +6923,7 @@ struct tracing_log_err {
 
 static DEFINE_MUTEX(tracing_err_log_lock);
 
-struct tracing_log_err *get_tracing_log_err(struct trace_array *tr)
+static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr)
 {
 	struct tracing_log_err *err;
 
@@ -8192,7 +8192,7 @@ static const struct file_operations buffer_percent_fops = {
 	.llseek		= default_llseek,
 };
 
-struct dentry *trace_instance_dir;
+static struct dentry *trace_instance_dir;
 
 static void
 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index 54373d93e251..ba751f993c3b 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -1057,7 +1057,7 @@ static enum print_line_t trace_stack_print(struct trace_iterator *iter,
 
 	trace_seq_puts(s, "<stack trace>\n");
 
-	for (p = field->caller; p && *p != ULONG_MAX && p < end; p++) {
+	for (p = field->caller; p && p < end && *p != ULONG_MAX; p++) {
 
 		if (trace_seq_has_overflowed(s))
 			break;
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index eb7e06b54741..b55906c77ce0 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -426,8 +426,6 @@ end:
 /*
  * Argument syntax:
  *  - Add uprobe: p|r[:[GRP/]EVENT] PATH:OFFSET [FETCHARGS]
- *
- *  - Remove uprobe: -:[GRP/]EVENT
  */
 static int trace_uprobe_create(int argc, const char **argv)
 {
@@ -443,10 +441,17 @@ static int trace_uprobe_create(int argc, const char **argv)
 	ret = 0;
 	ref_ctr_offset = 0;
 
-	/* argc must be >= 1 */
-	if (argv[0][0] == 'r')
+	switch (argv[0][0]) {
+	case 'r':
 		is_return = true;
-	else if (argv[0][0] != 'p' || argc < 2)
+		break;
+	case 'p':
+		break;
+	default:
+		return -ECANCELED;
+	}
+
+	if (argc < 2)
 		return -ECANCELED;
 
 	if (argv[0][1] == ':')
diff --git a/lib/genalloc.c b/lib/genalloc.c
index 7e85d1e37a6e..770c769d7cb7 100644
--- a/lib/genalloc.c
+++ b/lib/genalloc.c
@@ -168,20 +168,21 @@ struct gen_pool *gen_pool_create(int min_alloc_order, int nid)
 EXPORT_SYMBOL(gen_pool_create);
 
 /**
- * gen_pool_add_virt - add a new chunk of special memory to the pool
+ * gen_pool_add_owner- add a new chunk of special memory to the pool
  * @pool: pool to add new memory chunk to
  * @virt: virtual starting address of memory chunk to add to pool
  * @phys: physical starting address of memory chunk to add to pool
  * @size: size in bytes of the memory chunk to add to pool
  * @nid: node id of the node the chunk structure and bitmap should be
  *       allocated on, or -1
+ * @owner: private data the publisher would like to recall at alloc time
  *
  * Add a new chunk of special memory to the specified pool.
  *
  * Returns 0 on success or a -ve errno on failure.
  */
-int gen_pool_add_virt(struct gen_pool *pool, unsigned long virt, phys_addr_t phys,
-		 size_t size, int nid)
+int gen_pool_add_owner(struct gen_pool *pool, unsigned long virt, phys_addr_t phys,
+		 size_t size, int nid, void *owner)
 {
 	struct gen_pool_chunk *chunk;
 	int nbits = size >> pool->min_alloc_order;
@@ -195,6 +196,7 @@ int gen_pool_add_virt(struct gen_pool *pool, unsigned long virt, phys_addr_t phy
 	chunk->phys_addr = phys;
 	chunk->start_addr = virt;
 	chunk->end_addr = virt + size - 1;
+	chunk->owner = owner;
 	atomic_long_set(&chunk->avail, size);
 
 	spin_lock(&pool->lock);
@@ -203,7 +205,7 @@ int gen_pool_add_virt(struct gen_pool *pool, unsigned long virt, phys_addr_t phy
 
 	return 0;
 }
-EXPORT_SYMBOL(gen_pool_add_virt);
+EXPORT_SYMBOL(gen_pool_add_owner);
 
 /**
  * gen_pool_virt_to_phys - return the physical address of memory
@@ -260,35 +262,20 @@ void gen_pool_destroy(struct gen_pool *pool)
 EXPORT_SYMBOL(gen_pool_destroy);
 
 /**
- * gen_pool_alloc - allocate special memory from the pool
- * @pool: pool to allocate from
- * @size: number of bytes to allocate from the pool
- *
- * Allocate the requested number of bytes from the specified pool.
- * Uses the pool allocation function (with first-fit algorithm by default).
- * Can not be used in NMI handler on architectures without
- * NMI-safe cmpxchg implementation.
- */
-unsigned long gen_pool_alloc(struct gen_pool *pool, size_t size)
-{
-	return gen_pool_alloc_algo(pool, size, pool->algo, pool->data);
-}
-EXPORT_SYMBOL(gen_pool_alloc);
-
-/**
- * gen_pool_alloc_algo - allocate special memory from the pool
+ * gen_pool_alloc_algo_owner - allocate special memory from the pool
  * @pool: pool to allocate from
  * @size: number of bytes to allocate from the pool
  * @algo: algorithm passed from caller
  * @data: data passed to algorithm
+ * @owner: optionally retrieve the chunk owner
  *
  * Allocate the requested number of bytes from the specified pool.
  * Uses the pool allocation function (with first-fit algorithm by default).
  * Can not be used in NMI handler on architectures without
  * NMI-safe cmpxchg implementation.
  */
-unsigned long gen_pool_alloc_algo(struct gen_pool *pool, size_t size,
-		genpool_algo_t algo, void *data)
+unsigned long gen_pool_alloc_algo_owner(struct gen_pool *pool, size_t size,
+		genpool_algo_t algo, void *data, void **owner)
 {
 	struct gen_pool_chunk *chunk;
 	unsigned long addr = 0;
@@ -299,6 +286,9 @@ unsigned long gen_pool_alloc_algo(struct gen_pool *pool, size_t size,
 	BUG_ON(in_nmi());
 #endif
 
+	if (owner)
+		*owner = NULL;
+
 	if (size == 0)
 		return 0;
 
@@ -326,12 +316,14 @@ retry:
 		addr = chunk->start_addr + ((unsigned long)start_bit << order);
 		size = nbits << order;
 		atomic_long_sub(size, &chunk->avail);
+		if (owner)
+			*owner = chunk->owner;
 		break;
 	}
 	rcu_read_unlock();
 	return addr;
 }
-EXPORT_SYMBOL(gen_pool_alloc_algo);
+EXPORT_SYMBOL(gen_pool_alloc_algo_owner);
 
 /**
  * gen_pool_dma_alloc - allocate special memory from the pool for DMA usage
@@ -367,12 +359,14 @@ EXPORT_SYMBOL(gen_pool_dma_alloc);
  * @pool: pool to free to
  * @addr: starting address of memory to free back to pool
  * @size: size in bytes of memory to free
+ * @owner: private data stashed at gen_pool_add() time
  *
  * Free previously allocated special memory back to the specified
  * pool.  Can not be used in NMI handler on architectures without
  * NMI-safe cmpxchg implementation.
  */
-void gen_pool_free(struct gen_pool *pool, unsigned long addr, size_t size)
+void gen_pool_free_owner(struct gen_pool *pool, unsigned long addr, size_t size,
+		void **owner)
 {
 	struct gen_pool_chunk *chunk;
 	int order = pool->min_alloc_order;
@@ -382,6 +376,9 @@ void gen_pool_free(struct gen_pool *pool, unsigned long addr, size_t size)
 	BUG_ON(in_nmi());
 #endif
 
+	if (owner)
+		*owner = NULL;
+
 	nbits = (size + (1UL << order) - 1) >> order;
 	rcu_read_lock();
 	list_for_each_entry_rcu(chunk, &pool->chunks, next_chunk) {
@@ -392,6 +389,8 @@ void gen_pool_free(struct gen_pool *pool, unsigned long addr, size_t size)
 			BUG_ON(remain);
 			size = nbits << order;
 			atomic_long_add(size, &chunk->avail);
+			if (owner)
+				*owner = chunk->owner;
 			rcu_read_unlock();
 			return;
 		}
@@ -399,7 +398,7 @@ void gen_pool_free(struct gen_pool *pool, unsigned long addr, size_t size)
 	rcu_read_unlock();
 	BUG();
 }
-EXPORT_SYMBOL(gen_pool_free);
+EXPORT_SYMBOL(gen_pool_free_owner);
 
 /**
  * gen_pool_for_each_chunk - call func for every chunk of generic memory pool
diff --git a/mm/Kconfig b/mm/Kconfig
index f0c76ba47695..5006d0e6a5c7 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -765,4 +765,7 @@ config GUP_BENCHMARK
 config ARCH_HAS_PTE_SPECIAL
 	bool
 
+config AS_DIRTY_HELPERS
+        bool
+
 endmenu
diff --git a/mm/Makefile b/mm/Makefile
index ac5e5ba78874..f5d412bbc2f7 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -104,3 +104,4 @@ obj-$(CONFIG_HARDENED_USERCOPY) += usercopy.o
 obj-$(CONFIG_PERCPU_STATS) += percpu-stats.o
 obj-$(CONFIG_HMM) += hmm.o
 obj-$(CONFIG_MEMFD_CREATE) += memfd.o
+obj-$(CONFIG_AS_DIRTY_HELPERS) += as_dirty_helpers.o
diff --git a/mm/as_dirty_helpers.c b/mm/as_dirty_helpers.c
new file mode 100644
index 000000000000..f600e31534fb
--- /dev/null
+++ b/mm/as_dirty_helpers.c
@@ -0,0 +1,300 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/mm.h>
+#include <linux/mm_types.h>
+#include <linux/hugetlb.h>
+#include <linux/bitops.h>
+#include <linux/mmu_notifier.h>
+#include <asm/cacheflush.h>
+#include <asm/tlbflush.h>
+
+/**
+ * struct apply_as - Closure structure for apply_as_range
+ * @base: struct pfn_range_apply we derive from
+ * @start: Address of first modified pte
+ * @end: Address of last modified pte + 1
+ * @total: Total number of modified ptes
+ * @vma: Pointer to the struct vm_area_struct we're currently operating on
+ */
+struct apply_as {
+	struct pfn_range_apply base;
+	unsigned long start;
+	unsigned long end;
+	unsigned long total;
+	struct vm_area_struct *vma;
+};
+
+/**
+ * apply_pt_wrprotect - Leaf pte callback to write-protect a pte
+ * @pte: Pointer to the pte
+ * @token: Page table token, see apply_to_pfn_range()
+ * @addr: The virtual page address
+ * @closure: Pointer to a struct pfn_range_apply embedded in a
+ * struct apply_as
+ *
+ * The function write-protects a pte and records the range in
+ * virtual address space of touched ptes for efficient range TLB flushes.
+ *
+ * Return: Always zero.
+ */
+static int apply_pt_wrprotect(pte_t *pte, pgtable_t token,
+			      unsigned long addr,
+			      struct pfn_range_apply *closure)
+{
+	struct apply_as *aas = container_of(closure, typeof(*aas), base);
+	pte_t ptent = *pte;
+
+	if (pte_write(ptent)) {
+		pte_t old_pte = ptep_modify_prot_start(aas->vma, addr, pte);
+
+		ptent = pte_wrprotect(old_pte);
+		ptep_modify_prot_commit(aas->vma, addr, pte, old_pte, ptent);
+		aas->total++;
+		aas->start = min(aas->start, addr);
+		aas->end = max(aas->end, addr + PAGE_SIZE);
+	}
+
+	return 0;
+}
+
+/**
+ * struct apply_as_clean - Closure structure for apply_as_clean
+ * @base: struct apply_as we derive from
+ * @bitmap_pgoff: Address_space Page offset of the first bit in @bitmap
+ * @bitmap: Bitmap with one bit for each page offset in the address_space range
+ * covered.
+ * @start: Address_space page offset of first modified pte relative
+ * to @bitmap_pgoff
+ * @end: Address_space page offset of last modified pte relative
+ * to @bitmap_pgoff
+ */
+struct apply_as_clean {
+	struct apply_as base;
+	pgoff_t bitmap_pgoff;
+	unsigned long *bitmap;
+	pgoff_t start;
+	pgoff_t end;
+};
+
+/**
+ * apply_pt_clean - Leaf pte callback to clean a pte
+ * @pte: Pointer to the pte
+ * @token: Page table token, see apply_to_pfn_range()
+ * @addr: The virtual page address
+ * @closure: Pointer to a struct pfn_range_apply embedded in a
+ * struct apply_as_clean
+ *
+ * The function cleans a pte and records the range in
+ * virtual address space of touched ptes for efficient TLB flushes.
+ * It also records dirty ptes in a bitmap representing page offsets
+ * in the address_space, as well as the first and last of the bits
+ * touched.
+ *
+ * Return: Always zero.
+ */
+static int apply_pt_clean(pte_t *pte, pgtable_t token,
+			  unsigned long addr,
+			  struct pfn_range_apply *closure)
+{
+	struct apply_as *aas = container_of(closure, typeof(*aas), base);
+	struct apply_as_clean *clean = container_of(aas, typeof(*clean), base);
+	pte_t ptent = *pte;
+
+	if (pte_dirty(ptent)) {
+		pgoff_t pgoff = ((addr - aas->vma->vm_start) >> PAGE_SHIFT) +
+			aas->vma->vm_pgoff - clean->bitmap_pgoff;
+		pte_t old_pte = ptep_modify_prot_start(aas->vma, addr, pte);
+
+		ptent = pte_mkclean(old_pte);
+		ptep_modify_prot_commit(aas->vma, addr, pte, old_pte, ptent);
+
+		aas->total++;
+		aas->start = min(aas->start, addr);
+		aas->end = max(aas->end, addr + PAGE_SIZE);
+
+		__set_bit(pgoff, clean->bitmap);
+		clean->start = min(clean->start, pgoff);
+		clean->end = max(clean->end, pgoff + 1);
+	}
+
+	return 0;
+}
+
+/**
+ * apply_as_range - Apply a pte callback to all PTEs pointing into a range
+ * of an address_space.
+ * @mapping: Pointer to the struct address_space
+ * @aas: Closure structure
+ * @first_index: First page offset in the address_space
+ * @nr: Number of incremental page offsets to cover
+ *
+ * Return: Number of ptes touched. Note that this number might be larger
+ * than @nr if there are overlapping vmas
+ */
+static unsigned long apply_as_range(struct address_space *mapping,
+				    struct apply_as *aas,
+				    pgoff_t first_index, pgoff_t nr)
+{
+	struct vm_area_struct *vma;
+	pgoff_t vba, vea, cba, cea;
+	unsigned long start_addr, end_addr;
+	struct mmu_notifier_range range;
+
+	i_mmap_lock_read(mapping);
+	vma_interval_tree_foreach(vma, &mapping->i_mmap, first_index,
+				  first_index + nr - 1) {
+		unsigned long vm_flags = READ_ONCE(vma->vm_flags);
+
+		/*
+		 * We can only do advisory flag tests below, since we can't
+		 * require the vm's mmap_sem to be held to protect the flags.
+		 * Therefore, callers that strictly depend on specific mmap
+		 * flags to remain constant throughout the operation must
+		 * either ensure those flags are immutable for all relevant
+		 * vmas or can't use this function. Fixing this properly would
+		 * require the vma::vm_flags to be protected by a separate
+		 * lock taken after the i_mmap_lock
+		 */
+
+		/* Skip non-applicable VMAs */
+		if ((vm_flags & (VM_SHARED | VM_WRITE)) !=
+		    (VM_SHARED | VM_WRITE))
+			continue;
+
+		/* Warn on and skip VMAs whose flags indicate illegal usage */
+		if (WARN_ON((vm_flags & (VM_HUGETLB | VM_IO)) != VM_IO))
+			continue;
+
+		/* Clip to the vma */
+		vba = vma->vm_pgoff;
+		vea = vba + vma_pages(vma);
+		cba = first_index;
+		cba = max(cba, vba);
+		cea = first_index + nr;
+		cea = min(cea, vea);
+
+		/* Translate to virtual address */
+		start_addr = ((cba - vba) << PAGE_SHIFT) + vma->vm_start;
+		end_addr = ((cea - vba) << PAGE_SHIFT) + vma->vm_start;
+		if (start_addr >= end_addr)
+			continue;
+
+		aas->base.mm = vma->vm_mm;
+		aas->vma = vma;
+		aas->start = end_addr;
+		aas->end = start_addr;
+
+		mmu_notifier_range_init(&range, MMU_NOTIFY_PROTECTION_PAGE, 0,
+					vma, vma->vm_mm, start_addr, end_addr);
+		mmu_notifier_invalidate_range_start(&range);
+
+		/* Needed when we only change protection? */
+		flush_cache_range(vma, start_addr, end_addr);
+
+		/*
+		 * We're not using tlb_gather_mmu() since typically
+		 * only a small subrange of PTEs are affected.
+		 */
+		inc_tlb_flush_pending(vma->vm_mm);
+
+		/* Should not error since aas->base.alloc == 0 */
+		WARN_ON(apply_to_pfn_range(&aas->base, start_addr,
+					   end_addr - start_addr));
+		if (aas->end > aas->start)
+			flush_tlb_range(vma, aas->start, aas->end);
+
+		mmu_notifier_invalidate_range_end(&range);
+		dec_tlb_flush_pending(vma->vm_mm);
+	}
+	i_mmap_unlock_read(mapping);
+
+	return aas->total;
+}
+
+/**
+ * apply_as_wrprotect - Write-protect all ptes in an address_space range
+ * @mapping: The address_space we want to write protect
+ * @first_index: The first page offset in the range
+ * @nr: Number of incremental page offsets to cover
+ *
+ * WARNING: This function should only be used for address spaces whose
+ * vmas are marked VM_IO and that do not contain huge pages.
+ * To avoid interference with COW'd pages, vmas not marked VM_SHARED are
+ * simply skipped.
+ *
+ * Return: The number of ptes actually write-protected. Note that
+ * already write-protected ptes are not counted.
+ */
+unsigned long apply_as_wrprotect(struct address_space *mapping,
+				 pgoff_t first_index, pgoff_t nr)
+{
+	struct apply_as aas = {
+		.base = {
+			.alloc = 0,
+			.ptefn = apply_pt_wrprotect,
+		},
+		.total = 0,
+	};
+
+	return apply_as_range(mapping, &aas, first_index, nr);
+}
+EXPORT_SYMBOL_GPL(apply_as_wrprotect);
+
+/**
+ * apply_as_clean - Clean all ptes in an address_space range
+ * @mapping: The address_space we want to clean
+ * @first_index: The first page offset in the range
+ * @nr: Number of incremental page offsets to cover
+ * @bitmap_pgoff: The page offset of the first bit in @bitmap
+ * @bitmap: Pointer to a bitmap of at least @nr bits. The bitmap needs to
+ * cover the whole range @first_index..@first_index + @nr.
+ * @start: Pointer to number of the first set bit in @bitmap.
+ * is modified as new bits are set by the function.
+ * @end: Pointer to the number of the last set bit in @bitmap.
+ * none set. The value is modified as new bits are set by the function.
+ *
+ * Note: When this function returns there is no guarantee that a CPU has
+ * not already dirtied new ptes. However it will not clean any ptes not
+ * reported in the bitmap.
+ *
+ * If a caller needs to make sure all dirty ptes are picked up and none
+ * additional are added, it first needs to write-protect the address-space
+ * range and make sure new writers are blocked in page_mkwrite() or
+ * pfn_mkwrite(). And then after a TLB flush following the write-protection
+ * pick up all dirty bits.
+ *
+ * WARNING: This function should only be used for address spaces whose
+ * vmas are marked VM_IO and that do not contain huge pages.
+ * To avoid interference with COW'd pages, vmas not marked VM_SHARED are
+ * simply skipped.
+ *
+ * Return: The number of dirty ptes actually cleaned.
+ */
+unsigned long apply_as_clean(struct address_space *mapping,
+			     pgoff_t first_index, pgoff_t nr,
+			     pgoff_t bitmap_pgoff,
+			     unsigned long *bitmap,
+			     pgoff_t *start,
+			     pgoff_t *end)
+{
+	bool none_set = (*start >= *end);
+	struct apply_as_clean clean = {
+		.base = {
+			.base = {
+				.alloc = 0,
+				.ptefn = apply_pt_clean,
+			},
+			.total = 0,
+		},
+		.bitmap_pgoff = bitmap_pgoff,
+		.bitmap = bitmap,
+		.start = none_set ? nr : *start,
+		.end = none_set ? 0 : *end,
+	};
+	unsigned long ret = apply_as_range(mapping, &clean.base, first_index,
+					   nr);
+
+	*start = clean.start;
+	*end = clean.end;
+	return ret;
+}
+EXPORT_SYMBOL_GPL(apply_as_clean);
diff --git a/mm/hmm.c b/mm/hmm.c
index c5d840e34b28..f702a3895d05 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -1354,9 +1354,8 @@ static void hmm_devmem_ref_release(struct percpu_ref *ref)
 	complete(&devmem->completion);
 }
 
-static void hmm_devmem_ref_exit(void *data)
+static void hmm_devmem_ref_exit(struct percpu_ref *ref)
 {
-	struct percpu_ref *ref = data;
 	struct hmm_devmem *devmem;
 
 	devmem = container_of(ref, struct hmm_devmem, ref);
@@ -1433,10 +1432,6 @@ struct hmm_devmem *hmm_devmem_add(const struct hmm_devmem_ops *ops,
 	if (ret)
 		return ERR_PTR(ret);
 
-	ret = devm_add_action_or_reset(device, hmm_devmem_ref_exit, &devmem->ref);
-	if (ret)
-		return ERR_PTR(ret);
-
 	size = ALIGN(size, PA_SECTION_SIZE);
 	addr = min((unsigned long)iomem_resource.end,
 		   (1UL << MAX_PHYSMEM_BITS) - 1);
@@ -1475,6 +1470,7 @@ struct hmm_devmem *hmm_devmem_add(const struct hmm_devmem_ops *ops,
 	devmem->pagemap.ref = &devmem->ref;
 	devmem->pagemap.data = devmem;
 	devmem->pagemap.kill = hmm_devmem_ref_kill;
+	devmem->pagemap.cleanup = hmm_devmem_ref_exit;
 
 	result = devm_memremap_pages(devmem->device, &devmem->pagemap);
 	if (IS_ERR(result))
@@ -1512,11 +1508,6 @@ struct hmm_devmem *hmm_devmem_add_resource(const struct hmm_devmem_ops *ops,
 	if (ret)
 		return ERR_PTR(ret);
 
-	ret = devm_add_action_or_reset(device, hmm_devmem_ref_exit,
-			&devmem->ref);
-	if (ret)
-		return ERR_PTR(ret);
-
 	devmem->pfn_first = devmem->resource->start >> PAGE_SHIFT;
 	devmem->pfn_last = devmem->pfn_first +
 			   (resource_size(devmem->resource) >> PAGE_SHIFT);
@@ -1529,6 +1520,7 @@ struct hmm_devmem *hmm_devmem_add_resource(const struct hmm_devmem_ops *ops,
 	devmem->pagemap.ref = &devmem->ref;
 	devmem->pagemap.data = devmem;
 	devmem->pagemap.kill = hmm_devmem_ref_kill;
+	devmem->pagemap.cleanup = hmm_devmem_ref_exit;
 
 	result = devm_memremap_pages(devmem->device, &devmem->pagemap);
 	if (IS_ERR(result))
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index a335f7c1fac4..0f7419938008 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -1004,6 +1004,9 @@ static void collapse_huge_page(struct mm_struct *mm,
 	 * handled by the anon_vma lock + PG_lock.
 	 */
 	down_write(&mm->mmap_sem);
+	result = SCAN_ANY_PROCESS;
+	if (!mmget_still_valid(mm))
+		goto out;
 	result = hugepage_vma_revalidate(mm, address, &vma);
 	if (result)
 		goto out;
diff --git a/mm/list_lru.c b/mm/list_lru.c
index e4709fdaa8e6..927d85be32f6 100644
--- a/mm/list_lru.c
+++ b/mm/list_lru.c
@@ -354,7 +354,7 @@ static int __memcg_init_list_lru_node(struct list_lru_memcg *memcg_lrus,
 	}
 	return 0;
 fail:
-	__memcg_destroy_list_lru_node(memcg_lrus, begin, i - 1);
+	__memcg_destroy_list_lru_node(memcg_lrus, begin, i);
 	return -ENOMEM;
 }
 
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index ca0bc6e6be13..ba9138a4a1de 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -691,11 +691,12 @@ void __mod_memcg_state(struct mem_cgroup *memcg, int idx, int val)
 	if (mem_cgroup_disabled())
 		return;
 
+	__this_cpu_add(memcg->vmstats_local->stat[idx], val);
+
 	x = val + __this_cpu_read(memcg->vmstats_percpu->stat[idx]);
 	if (unlikely(abs(x) > MEMCG_CHARGE_BATCH)) {
 		struct mem_cgroup *mi;
 
-		atomic_long_add(x, &memcg->vmstats_local[idx]);
 		for (mi = memcg; mi; mi = parent_mem_cgroup(mi))
 			atomic_long_add(x, &mi->vmstats[idx]);
 		x = 0;
@@ -745,11 +746,12 @@ void __mod_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx,
 	__mod_memcg_state(memcg, idx, val);
 
 	/* Update lruvec */
+	__this_cpu_add(pn->lruvec_stat_local->count[idx], val);
+
 	x = val + __this_cpu_read(pn->lruvec_stat_cpu->count[idx]);
 	if (unlikely(abs(x) > MEMCG_CHARGE_BATCH)) {
 		struct mem_cgroup_per_node *pi;
 
-		atomic_long_add(x, &pn->lruvec_stat_local[idx]);
 		for (pi = pn; pi; pi = parent_nodeinfo(pi, pgdat->node_id))
 			atomic_long_add(x, &pi->lruvec_stat[idx]);
 		x = 0;
@@ -771,11 +773,12 @@ void __count_memcg_events(struct mem_cgroup *memcg, enum vm_event_item idx,
 	if (mem_cgroup_disabled())
 		return;
 
+	__this_cpu_add(memcg->vmstats_local->events[idx], count);
+
 	x = count + __this_cpu_read(memcg->vmstats_percpu->events[idx]);
 	if (unlikely(x > MEMCG_CHARGE_BATCH)) {
 		struct mem_cgroup *mi;
 
-		atomic_long_add(x, &memcg->vmevents_local[idx]);
 		for (mi = memcg; mi; mi = parent_mem_cgroup(mi))
 			atomic_long_add(x, &mi->vmevents[idx]);
 		x = 0;
@@ -790,7 +793,12 @@ static unsigned long memcg_events(struct mem_cgroup *memcg, int event)
 
 static unsigned long memcg_events_local(struct mem_cgroup *memcg, int event)
 {
-	return atomic_long_read(&memcg->vmevents_local[event]);
+	long x = 0;
+	int cpu;
+
+	for_each_possible_cpu(cpu)
+		x += per_cpu(memcg->vmstats_local->events[event], cpu);
+	return x;
 }
 
 static void mem_cgroup_charge_statistics(struct mem_cgroup *memcg,
@@ -2191,11 +2199,9 @@ static int memcg_hotplug_cpu_dead(unsigned int cpu)
 			long x;
 
 			x = this_cpu_xchg(memcg->vmstats_percpu->stat[i], 0);
-			if (x) {
-				atomic_long_add(x, &memcg->vmstats_local[i]);
+			if (x)
 				for (mi = memcg; mi; mi = parent_mem_cgroup(mi))
 					atomic_long_add(x, &memcg->vmstats[i]);
-			}
 
 			if (i >= NR_VM_NODE_STAT_ITEMS)
 				continue;
@@ -2205,12 +2211,10 @@ static int memcg_hotplug_cpu_dead(unsigned int cpu)
 
 				pn = mem_cgroup_nodeinfo(memcg, nid);
 				x = this_cpu_xchg(pn->lruvec_stat_cpu->count[i], 0);
-				if (x) {
-					atomic_long_add(x, &pn->lruvec_stat_local[i]);
+				if (x)
 					do {
 						atomic_long_add(x, &pn->lruvec_stat[i]);
 					} while ((pn = parent_nodeinfo(pn, nid)));
-				}
 			}
 		}
 
@@ -2218,11 +2222,9 @@ static int memcg_hotplug_cpu_dead(unsigned int cpu)
 			long x;
 
 			x = this_cpu_xchg(memcg->vmstats_percpu->events[i], 0);
-			if (x) {
-				atomic_long_add(x, &memcg->vmevents_local[i]);
+			if (x)
 				for (mi = memcg; mi; mi = parent_mem_cgroup(mi))
 					atomic_long_add(x, &memcg->vmevents[i]);
-			}
 		}
 	}
 
@@ -4483,8 +4485,15 @@ static int alloc_mem_cgroup_per_node_info(struct mem_cgroup *memcg, int node)
 	if (!pn)
 		return 1;
 
+	pn->lruvec_stat_local = alloc_percpu(struct lruvec_stat);
+	if (!pn->lruvec_stat_local) {
+		kfree(pn);
+		return 1;
+	}
+
 	pn->lruvec_stat_cpu = alloc_percpu(struct lruvec_stat);
 	if (!pn->lruvec_stat_cpu) {
+		free_percpu(pn->lruvec_stat_local);
 		kfree(pn);
 		return 1;
 	}
@@ -4506,6 +4515,7 @@ static void free_mem_cgroup_per_node_info(struct mem_cgroup *memcg, int node)
 		return;
 
 	free_percpu(pn->lruvec_stat_cpu);
+	free_percpu(pn->lruvec_stat_local);
 	kfree(pn);
 }
 
@@ -4516,6 +4526,7 @@ static void __mem_cgroup_free(struct mem_cgroup *memcg)
 	for_each_node(node)
 		free_mem_cgroup_per_node_info(memcg, node);
 	free_percpu(memcg->vmstats_percpu);
+	free_percpu(memcg->vmstats_local);
 	kfree(memcg);
 }
 
@@ -4544,6 +4555,10 @@ static struct mem_cgroup *mem_cgroup_alloc(void)
 	if (memcg->id.id < 0)
 		goto fail;
 
+	memcg->vmstats_local = alloc_percpu(struct memcg_vmstats_percpu);
+	if (!memcg->vmstats_local)
+		goto fail;
+
 	memcg->vmstats_percpu = alloc_percpu(struct memcg_vmstats_percpu);
 	if (!memcg->vmstats_percpu)
 		goto fail;
diff --git a/mm/memory.c b/mm/memory.c
index ddf20bd0c317..462aa47f8878 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2032,18 +2032,17 @@ int vm_iomap_memory(struct vm_area_struct *vma, phys_addr_t start, unsigned long
 }
 EXPORT_SYMBOL(vm_iomap_memory);
 
-static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd,
-				     unsigned long addr, unsigned long end,
-				     pte_fn_t fn, void *data)
+static int apply_to_pte_range(struct pfn_range_apply *closure, pmd_t *pmd,
+			      unsigned long addr, unsigned long end)
 {
 	pte_t *pte;
 	int err;
 	pgtable_t token;
 	spinlock_t *uninitialized_var(ptl);
 
-	pte = (mm == &init_mm) ?
+	pte = (closure->mm == &init_mm) ?
 		pte_alloc_kernel(pmd, addr) :
-		pte_alloc_map_lock(mm, pmd, addr, &ptl);
+		pte_alloc_map_lock(closure->mm, pmd, addr, &ptl);
 	if (!pte)
 		return -ENOMEM;
 
@@ -2054,86 +2053,109 @@ static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd,
 	token = pmd_pgtable(*pmd);
 
 	do {
-		err = fn(pte++, token, addr, data);
+		err = closure->ptefn(pte++, token, addr, closure);
 		if (err)
 			break;
 	} while (addr += PAGE_SIZE, addr != end);
 
 	arch_leave_lazy_mmu_mode();
 
-	if (mm != &init_mm)
+	if (closure->mm != &init_mm)
 		pte_unmap_unlock(pte-1, ptl);
 	return err;
 }
 
-static int apply_to_pmd_range(struct mm_struct *mm, pud_t *pud,
-				     unsigned long addr, unsigned long end,
-				     pte_fn_t fn, void *data)
+static int apply_to_pmd_range(struct pfn_range_apply *closure, pud_t *pud,
+			      unsigned long addr, unsigned long end)
 {
 	pmd_t *pmd;
 	unsigned long next;
-	int err;
+	int err = 0;
 
 	BUG_ON(pud_huge(*pud));
 
-	pmd = pmd_alloc(mm, pud, addr);
+	pmd = pmd_alloc(closure->mm, pud, addr);
 	if (!pmd)
 		return -ENOMEM;
+
 	do {
 		next = pmd_addr_end(addr, end);
-		err = apply_to_pte_range(mm, pmd, addr, next, fn, data);
+		if (!closure->alloc && pmd_none_or_clear_bad(pmd))
+			continue;
+		err = apply_to_pte_range(closure, pmd, addr, next);
 		if (err)
 			break;
 	} while (pmd++, addr = next, addr != end);
 	return err;
 }
 
-static int apply_to_pud_range(struct mm_struct *mm, p4d_t *p4d,
-				     unsigned long addr, unsigned long end,
-				     pte_fn_t fn, void *data)
+static int apply_to_pud_range(struct pfn_range_apply *closure, p4d_t *p4d,
+			      unsigned long addr, unsigned long end)
 {
 	pud_t *pud;
 	unsigned long next;
-	int err;
+	int err = 0;
 
-	pud = pud_alloc(mm, p4d, addr);
+	pud = pud_alloc(closure->mm, p4d, addr);
 	if (!pud)
 		return -ENOMEM;
+
 	do {
 		next = pud_addr_end(addr, end);
-		err = apply_to_pmd_range(mm, pud, addr, next, fn, data);
+		if (!closure->alloc && pud_none_or_clear_bad(pud))
+			continue;
+		err = apply_to_pmd_range(closure, pud, addr, next);
 		if (err)
 			break;
 	} while (pud++, addr = next, addr != end);
 	return err;
 }
 
-static int apply_to_p4d_range(struct mm_struct *mm, pgd_t *pgd,
-				     unsigned long addr, unsigned long end,
-				     pte_fn_t fn, void *data)
+static int apply_to_p4d_range(struct pfn_range_apply *closure, pgd_t *pgd,
+			      unsigned long addr, unsigned long end)
 {
 	p4d_t *p4d;
 	unsigned long next;
-	int err;
+	int err = 0;
 
-	p4d = p4d_alloc(mm, pgd, addr);
+	p4d = p4d_alloc(closure->mm, pgd, addr);
 	if (!p4d)
 		return -ENOMEM;
+
 	do {
 		next = p4d_addr_end(addr, end);
-		err = apply_to_pud_range(mm, p4d, addr, next, fn, data);
+		if (!closure->alloc && p4d_none_or_clear_bad(p4d))
+			continue;
+		err = apply_to_pud_range(closure, p4d, addr, next);
 		if (err)
 			break;
 	} while (p4d++, addr = next, addr != end);
 	return err;
 }
 
-/*
- * Scan a region of virtual memory, filling in page tables as necessary
- * and calling a provided function on each leaf page table.
+/**
+ * apply_to_pfn_range - Scan a region of virtual memory, calling a provided
+ * function on each leaf page table entry
+ * @closure: Details about how to scan and what function to apply
+ * @addr: Start virtual address
+ * @size: Size of the region
+ *
+ * If @closure->alloc is set to 1, the function will fill in the page table
+ * as necessary. Otherwise it will skip non-present parts.
+ * Note: The caller must ensure that the range does not contain huge pages.
+ * The caller must also assure that the proper mmu_notifier functions are
+ * called before and after the call to apply_to_pfn_range.
+ *
+ * WARNING: Do not use this function unless you know exactly what you are
+ * doing. It is lacking support for huge pages and transparent huge pages.
+ *
+ * Return: Zero on success. If the provided function returns a non-zero status,
+ * the page table walk will terminate and that status will be returned.
+ * If @closure->alloc is set to 1, then this function may also return memory
+ * allocation errors arising from allocating page table memory.
  */
-int apply_to_page_range(struct mm_struct *mm, unsigned long addr,
-			unsigned long size, pte_fn_t fn, void *data)
+int apply_to_pfn_range(struct pfn_range_apply *closure,
+		       unsigned long addr, unsigned long size)
 {
 	pgd_t *pgd;
 	unsigned long next;
@@ -2143,16 +2165,65 @@ int apply_to_page_range(struct mm_struct *mm, unsigned long addr,
 	if (WARN_ON(addr >= end))
 		return -EINVAL;
 
-	pgd = pgd_offset(mm, addr);
+	pgd = pgd_offset(closure->mm, addr);
 	do {
 		next = pgd_addr_end(addr, end);
-		err = apply_to_p4d_range(mm, pgd, addr, next, fn, data);
+		if (!closure->alloc && pgd_none_or_clear_bad(pgd))
+			continue;
+		err = apply_to_p4d_range(closure, pgd, addr, next);
 		if (err)
 			break;
 	} while (pgd++, addr = next, addr != end);
 
 	return err;
 }
+
+/**
+ * struct page_range_apply - Closure structure for apply_to_page_range()
+ * @pter: The base closure structure we derive from
+ * @fn: The leaf pte function to call
+ * @data: The leaf pte function closure
+ */
+struct page_range_apply {
+	struct pfn_range_apply pter;
+	pte_fn_t fn;
+	void *data;
+};
+
+/*
+ * Callback wrapper to enable use of apply_to_pfn_range for
+ * the apply_to_page_range interface
+ */
+static int apply_to_page_range_wrapper(pte_t *pte, pgtable_t token,
+				       unsigned long addr,
+				       struct pfn_range_apply *pter)
+{
+	struct page_range_apply *pra =
+		container_of(pter, typeof(*pra), pter);
+
+	return pra->fn(pte, token, addr, pra->data);
+}
+
+/*
+ * Scan a region of virtual memory, filling in page tables as necessary
+ * and calling a provided function on each leaf page table.
+ *
+ * WARNING: Do not use this function unless you know exactly what you are
+ * doing. It is lacking support for huge pages and transparent huge pages.
+ */
+int apply_to_page_range(struct mm_struct *mm, unsigned long addr,
+			unsigned long size, pte_fn_t fn, void *data)
+{
+	struct page_range_apply pra = {
+		.pter = {.mm = mm,
+			 .alloc = 1,
+			 .ptefn = apply_to_page_range_wrapper },
+		.fn = fn,
+		.data = data
+	};
+
+	return apply_to_pfn_range(&pra.pter, addr, size);
+}
 EXPORT_SYMBOL_GPL(apply_to_page_range);
 
 /*
@@ -2238,7 +2309,7 @@ static vm_fault_t do_page_mkwrite(struct vm_fault *vmf)
 	ret = vmf->vma->vm_ops->page_mkwrite(vmf);
 	/* Restore original flags so that caller is not surprised */
 	vmf->flags = old_flags;
-	if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE)))
+	if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY)))
 		return ret;
 	if (unlikely(!(ret & VM_FAULT_LOCKED))) {
 		lock_page(page);
@@ -2515,7 +2586,7 @@ static vm_fault_t wp_pfn_shared(struct vm_fault *vmf)
 		pte_unmap_unlock(vmf->pte, vmf->ptl);
 		vmf->flags |= FAULT_FLAG_MKWRITE;
 		ret = vma->vm_ops->pfn_mkwrite(vmf);
-		if (ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE))
+		if (ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY))
 			return ret;
 		return finish_mkwrite_fault(vmf);
 	}
@@ -2536,7 +2607,8 @@ static vm_fault_t wp_page_shared(struct vm_fault *vmf)
 		pte_unmap_unlock(vmf->pte, vmf->ptl);
 		tmp = do_page_mkwrite(vmf);
 		if (unlikely(!tmp || (tmp &
-				      (VM_FAULT_ERROR | VM_FAULT_NOPAGE)))) {
+				      (VM_FAULT_ERROR | VM_FAULT_NOPAGE |
+				       VM_FAULT_RETRY)))) {
 			put_page(vmf->page);
 			return tmp;
 		}
@@ -3601,7 +3673,8 @@ static vm_fault_t do_shared_fault(struct vm_fault *vmf)
 		unlock_page(vmf->page);
 		tmp = do_page_mkwrite(vmf);
 		if (unlikely(!tmp ||
-				(tmp & (VM_FAULT_ERROR | VM_FAULT_NOPAGE)))) {
+				(tmp & (VM_FAULT_ERROR | VM_FAULT_NOPAGE |
+					VM_FAULT_RETRY)))) {
 			put_page(vmf->page);
 			return tmp;
 		}
diff --git a/mm/mlock.c b/mm/mlock.c
index 080f3b36415b..a90099da4fb4 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -636,11 +636,11 @@ static int apply_vma_lock_flags(unsigned long start, size_t len,
  * is also counted.
  * Return value: previously mlocked page counts
  */
-static int count_mm_mlocked_page_nr(struct mm_struct *mm,
+static unsigned long count_mm_mlocked_page_nr(struct mm_struct *mm,
 		unsigned long start, size_t len)
 {
 	struct vm_area_struct *vma;
-	int count = 0;
+	unsigned long count = 0;
 
 	if (mm == NULL)
 		mm = current->mm;
@@ -797,7 +797,8 @@ SYSCALL_DEFINE1(mlockall, int, flags)
 	unsigned long lock_limit;
 	int ret;
 
-	if (!flags || (flags & ~(MCL_CURRENT | MCL_FUTURE | MCL_ONFAULT)))
+	if (!flags || (flags & ~(MCL_CURRENT | MCL_FUTURE | MCL_ONFAULT)) ||
+	    flags == MCL_ONFAULT)
 		return -EINVAL;
 
 	if (!can_do_mlock())
diff --git a/mm/mmu_gather.c b/mm/mmu_gather.c
index 99740e1dd273..8c943a6e1696 100644
--- a/mm/mmu_gather.c
+++ b/mm/mmu_gather.c
@@ -245,14 +245,28 @@ void tlb_finish_mmu(struct mmu_gather *tlb,
 {
 	/*
 	 * If there are parallel threads are doing PTE changes on same range
-	 * under non-exclusive lock(e.g., mmap_sem read-side) but defer TLB
-	 * flush by batching, a thread has stable TLB entry can fail to flush
-	 * the TLB by observing pte_none|!pte_dirty, for example so flush TLB
-	 * forcefully if we detect parallel PTE batching threads.
+	 * under non-exclusive lock (e.g., mmap_sem read-side) but defer TLB
+	 * flush by batching, one thread may end up seeing inconsistent PTEs
+	 * and result in having stale TLB entries.  So flush TLB forcefully
+	 * if we detect parallel PTE batching threads.
+	 *
+	 * However, some syscalls, e.g. munmap(), may free page tables, this
+	 * needs force flush everything in the given range. Otherwise this
+	 * may result in having stale TLB entries for some architectures,
+	 * e.g. aarch64, that could specify flush what level TLB.
 	 */
 	if (mm_tlb_flush_nested(tlb->mm)) {
+		/*
+		 * The aarch64 yields better performance with fullmm by
+		 * avoiding multiple CPUs spamming TLBI messages at the
+		 * same time.
+		 *
+		 * On x86 non-fullmm doesn't yield significant difference
+		 * against fullmm.
+		 */
+		tlb->fullmm = 1;
 		__tlb_reset_range(tlb);
-		__tlb_adjust_range(tlb, start, end - start);
+		tlb->freed_tables = 1;
 	}
 
 	tlb_flush_mmu(tlb);
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 7350a124524b..4c9e150e5ad3 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -2123,9 +2123,9 @@ static inline void set_area_direct_map(const struct vm_struct *area,
 /* Handle removing and resetting vm mappings related to the vm_struct. */
 static void vm_remove_mappings(struct vm_struct *area, int deallocate_pages)
 {
-	unsigned long addr = (unsigned long)area->addr;
 	unsigned long start = ULONG_MAX, end = 0;
 	int flush_reset = area->flags & VM_FLUSH_RESET_PERMS;
+	int flush_dmap = 0;
 	int i;
 
 	/*
@@ -2135,8 +2135,8 @@ static void vm_remove_mappings(struct vm_struct *area, int deallocate_pages)
 	 * execute permissions, without leaving a RW+X window.
 	 */
 	if (flush_reset && !IS_ENABLED(CONFIG_ARCH_HAS_SET_DIRECT_MAP)) {
-		set_memory_nx(addr, area->nr_pages);
-		set_memory_rw(addr, area->nr_pages);
+		set_memory_nx((unsigned long)area->addr, area->nr_pages);
+		set_memory_rw((unsigned long)area->addr, area->nr_pages);
 	}
 
 	remove_vm_area(area->addr);
@@ -2160,9 +2160,11 @@ static void vm_remove_mappings(struct vm_struct *area, int deallocate_pages)
 	 * the vm_unmap_aliases() flush includes the direct map.
 	 */
 	for (i = 0; i < area->nr_pages; i++) {
-		if (page_address(area->pages[i])) {
+		unsigned long addr = (unsigned long)page_address(area->pages[i]);
+		if (addr) {
 			start = min(addr, start);
-			end = max(addr, end);
+			end = max(addr + PAGE_SIZE, end);
+			flush_dmap = 1;
 		}
 	}
 
@@ -2172,7 +2174,7 @@ static void vm_remove_mappings(struct vm_struct *area, int deallocate_pages)
 	 * reset the direct map permissions to the default.
 	 */
 	set_area_direct_map(area, set_direct_map_invalid_noflush);
-	_vm_unmap_aliases(start, end, 1);
+	_vm_unmap_aliases(start, end, flush_dmap);
 	set_area_direct_map(area, set_direct_map_default_noflush);
 }
 
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 7acd0afdfc2a..7889f583ced9 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1505,7 +1505,7 @@ unsigned long reclaim_clean_pages_from_list(struct zone *zone,
 
 	list_for_each_entry_safe(page, next, page_list, lru) {
 		if (page_is_file_cache(page) && !PageDirty(page) &&
-		    !__PageMovable(page)) {
+		    !__PageMovable(page) && !PageUnevictable(page)) {
 			ClearPageActive(page);
 			list_move(&page->lru, &clean_pages);
 		}
@@ -1953,8 +1953,8 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
 	if (global_reclaim(sc))
 		__count_vm_events(item, nr_reclaimed);
 	__count_memcg_events(lruvec_memcg(lruvec), item, nr_reclaimed);
-	reclaim_stat->recent_rotated[0] = stat.nr_activate[0];
-	reclaim_stat->recent_rotated[1] = stat.nr_activate[1];
+	reclaim_stat->recent_rotated[0] += stat.nr_activate[0];
+	reclaim_stat->recent_rotated[1] += stat.nr_activate[1];
 
 	move_pages_to_lru(lruvec, &page_list);
 
diff --git a/scripts/decode_stacktrace.sh b/scripts/decode_stacktrace.sh
index bcdd45df3f51..a7a36209a193 100755
--- a/scripts/decode_stacktrace.sh
+++ b/scripts/decode_stacktrace.sh
@@ -73,7 +73,7 @@ parse_symbol() {
 	if [[ "${cache[$module,$address]+isset}" == "isset" ]]; then
 		local code=${cache[$module,$address]}
 	else
-		local code=$(addr2line -i -e "$objfile" "$address")
+		local code=$(${CROSS_COMPILE}addr2line -i -e "$objfile" "$address")
 		cache[$module,$address]=$code
 	fi
 
diff --git a/security/selinux/avc.c b/security/selinux/avc.c
index 8346a4f7c5d7..a99be508f93d 100644
--- a/security/selinux/avc.c
+++ b/security/selinux/avc.c
@@ -739,14 +739,20 @@ static void avc_audit_post_callback(struct audit_buffer *ab, void *a)
 	rc = security_sid_to_context_inval(sad->state, sad->ssid, &scontext,
 					   &scontext_len);
 	if (!rc && scontext) {
-		audit_log_format(ab, " srawcon=%s", scontext);
+		if (scontext_len && scontext[scontext_len - 1] == '\0')
+			scontext_len--;
+		audit_log_format(ab, " srawcon=");
+		audit_log_n_untrustedstring(ab, scontext, scontext_len);
 		kfree(scontext);
 	}
 
 	rc = security_sid_to_context_inval(sad->state, sad->tsid, &scontext,
 					   &scontext_len);
 	if (!rc && scontext) {
-		audit_log_format(ab, " trawcon=%s", scontext);
+		if (scontext_len && scontext[scontext_len - 1] == '\0')
+			scontext_len--;
+		audit_log_format(ab, " trawcon=");
+		audit_log_n_untrustedstring(ab, scontext, scontext_len);
 		kfree(scontext);
 	}
 }
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 3ec702cf46ca..fea66f6b31bf 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -1052,15 +1052,24 @@ static int selinux_add_mnt_opt(const char *option, const char *val, int len,
 	if (token == Opt_error)
 		return -EINVAL;
 
-	if (token != Opt_seclabel)
+	if (token != Opt_seclabel) {
 		val = kmemdup_nul(val, len, GFP_KERNEL);
+		if (!val) {
+			rc = -ENOMEM;
+			goto free_opt;
+		}
+	}
 	rc = selinux_add_opt(token, val, mnt_opts);
 	if (unlikely(rc)) {
 		kfree(val);
-		if (*mnt_opts) {
-			selinux_free_mnt_opts(*mnt_opts);
-			*mnt_opts = NULL;
-		}
+		goto free_opt;
+	}
+	return rc;
+
+free_opt:
+	if (*mnt_opts) {
+		selinux_free_mnt_opts(*mnt_opts);
+		*mnt_opts = NULL;
 	}
 	return rc;
 }
@@ -2616,10 +2625,11 @@ static int selinux_sb_eat_lsm_opts(char *options, void **mnt_opts)
 	char *from = options;
 	char *to = options;
 	bool first = true;
+	int rc;
 
 	while (1) {
 		int len = opt_len(from);
-		int token, rc;
+		int token;
 		char *arg = NULL;
 
 		token = match_opt_prefix(from, len, &arg);
@@ -2635,15 +2645,15 @@ static int selinux_sb_eat_lsm_opts(char *options, void **mnt_opts)
 						*q++ = c;
 				}
 				arg = kmemdup_nul(arg, q - arg, GFP_KERNEL);
+				if (!arg) {
+					rc = -ENOMEM;
+					goto free_opt;
+				}
 			}
 			rc = selinux_add_opt(token, arg, mnt_opts);
 			if (unlikely(rc)) {
 				kfree(arg);
-				if (*mnt_opts) {
-					selinux_free_mnt_opts(*mnt_opts);
-					*mnt_opts = NULL;
-				}
-				return rc;
+				goto free_opt;
 			}
 		} else {
 			if (!first) {	// copy with preceding comma
@@ -2661,6 +2671,13 @@ static int selinux_sb_eat_lsm_opts(char *options, void **mnt_opts)
 	}
 	*to = '\0';
 	return 0;
+
+free_opt:
+	if (*mnt_opts) {
+		selinux_free_mnt_opts(*mnt_opts);
+		*mnt_opts = NULL;
+	}
+	return rc;
 }
 
 static int selinux_sb_remount(struct super_block *sb, void *mnt_opts)
diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
index 0de725f88bed..d99450b4f511 100644
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c
@@ -68,6 +68,7 @@ static struct {
 	int len;
 	int opt;
 } smk_mount_opts[] = {
+	{"smackfsdef", sizeof("smackfsdef") - 1, Opt_fsdefault},
 	A(fsdefault), A(fsfloor), A(fshat), A(fsroot), A(fstransmute)
 };
 #undef A
@@ -682,11 +683,12 @@ static int smack_fs_context_dup(struct fs_context *fc,
 }
 
 static const struct fs_parameter_spec smack_param_specs[] = {
-	fsparam_string("fsdefault",	Opt_fsdefault),
-	fsparam_string("fsfloor",	Opt_fsfloor),
-	fsparam_string("fshat",		Opt_fshat),
-	fsparam_string("fsroot",	Opt_fsroot),
-	fsparam_string("fstransmute",	Opt_fstransmute),
+	fsparam_string("smackfsdef",		Opt_fsdefault),
+	fsparam_string("smackfsdefault",	Opt_fsdefault),
+	fsparam_string("smackfsfloor",		Opt_fsfloor),
+	fsparam_string("smackfshat",		Opt_fshat),
+	fsparam_string("smackfsroot",		Opt_fsroot),
+	fsparam_string("smackfstransmute",	Opt_fstransmute),
 	{}
 };
 
diff --git a/sound/firewire/motu/motu-stream.c b/sound/firewire/motu/motu-stream.c
index 37e47fa7b0e3..81f7edc560d0 100644
--- a/sound/firewire/motu/motu-stream.c
+++ b/sound/firewire/motu/motu-stream.c
@@ -344,7 +344,7 @@ static void destroy_stream(struct snd_motu *motu,
 	}
 
 	amdtp_stream_destroy(stream);
-	fw_iso_resources_free(resources);
+	fw_iso_resources_destroy(resources);
 }
 
 int snd_motu_stream_init_duplex(struct snd_motu *motu)
diff --git a/sound/firewire/oxfw/oxfw.c b/sound/firewire/oxfw/oxfw.c
index 5e31c460a90f..9fd145cc4b07 100644
--- a/sound/firewire/oxfw/oxfw.c
+++ b/sound/firewire/oxfw/oxfw.c
@@ -148,9 +148,6 @@ static int detect_quirks(struct snd_oxfw *oxfw)
 		oxfw->midi_input_ports = 0;
 		oxfw->midi_output_ports = 0;
 
-		/* Output stream exists but no data channels are useful. */
-		oxfw->has_output = false;
-
 		return snd_oxfw_scs1x_add(oxfw);
 	}
 
diff --git a/sound/hda/ext/hdac_ext_bus.c b/sound/hda/ext/hdac_ext_bus.c
index ad1b55a1f045..a3a113ef5d56 100644
--- a/sound/hda/ext/hdac_ext_bus.c
+++ b/sound/hda/ext/hdac_ext_bus.c
@@ -162,7 +162,6 @@ EXPORT_SYMBOL_GPL(snd_hdac_ext_bus_device_init);
 void snd_hdac_ext_bus_device_exit(struct hdac_device *hdev)
 {
 	snd_hdac_device_exit(hdev);
-	kfree(hdev);
 }
 EXPORT_SYMBOL_GPL(snd_hdac_ext_bus_device_exit);
 
diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c
index c5e46df9c548..6c51b8363f8b 100644
--- a/sound/pci/hda/hda_codec.c
+++ b/sound/pci/hda/hda_codec.c
@@ -826,7 +826,14 @@ static int snd_hda_codec_dev_free(struct snd_device *device)
 	if (codec->core.type == HDA_DEV_LEGACY)
 		snd_hdac_device_unregister(&codec->core);
 	codec_display_power(codec, false);
-	put_device(hda_codec_dev(codec));
+
+	/*
+	 * In the case of ASoC HD-audio bus, the device refcount is released in
+	 * snd_hdac_ext_bus_device_remove() explicitly.
+	 */
+	if (codec->core.type == HDA_DEV_LEGACY)
+		put_device(hda_codec_dev(codec));
+
 	return 0;
 }
 
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 974244978509..5b3c26991f26 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -4120,18 +4120,19 @@ static struct coef_fw alc225_pre_hsmode[] = {
 static void alc_headset_mode_unplugged(struct hda_codec *codec)
 {
 	static struct coef_fw coef0255[] = {
+		WRITE_COEF(0x1b, 0x0c0b), /* LDO and MISC control */
 		WRITE_COEF(0x45, 0xd089), /* UAJ function set to menual mode */
 		UPDATE_COEFEX(0x57, 0x05, 1<<14, 0), /* Direct Drive HP Amp control(Set to verb control)*/
 		WRITE_COEF(0x06, 0x6104), /* Set MIC2 Vref gate with HP */
 		WRITE_COEFEX(0x57, 0x03, 0x8aa6), /* Direct Drive HP Amp control */
 		{}
 	};
-	static struct coef_fw coef0255_1[] = {
-		WRITE_COEF(0x1b, 0x0c0b), /* LDO and MISC control */
-		{}
-	};
 	static struct coef_fw coef0256[] = {
 		WRITE_COEF(0x1b, 0x0c4b), /* LDO and MISC control */
+		WRITE_COEF(0x45, 0xd089), /* UAJ function set to menual mode */
+		WRITE_COEF(0x06, 0x6104), /* Set MIC2 Vref gate with HP */
+		WRITE_COEFEX(0x57, 0x03, 0x09a3), /* Direct Drive HP Amp control */
+		UPDATE_COEFEX(0x57, 0x05, 1<<14, 0), /* Direct Drive HP Amp control(Set to verb control)*/
 		{}
 	};
 	static struct coef_fw coef0233[] = {
@@ -4194,13 +4195,11 @@ static void alc_headset_mode_unplugged(struct hda_codec *codec)
 
 	switch (codec->core.vendor_id) {
 	case 0x10ec0255:
-		alc_process_coef_fw(codec, coef0255_1);
 		alc_process_coef_fw(codec, coef0255);
 		break;
 	case 0x10ec0236:
 	case 0x10ec0256:
 		alc_process_coef_fw(codec, coef0256);
-		alc_process_coef_fw(codec, coef0255);
 		break;
 	case 0x10ec0234:
 	case 0x10ec0274:
@@ -4253,6 +4252,12 @@ static void alc_headset_mode_mic_in(struct hda_codec *codec, hda_nid_t hp_pin,
 		WRITE_COEF(0x06, 0x6100), /* Set MIC2 Vref gate to normal */
 		{}
 	};
+	static struct coef_fw coef0256[] = {
+		UPDATE_COEFEX(0x57, 0x05, 1<<14, 1<<14), /* Direct Drive HP Amp control(Set to verb control)*/
+		WRITE_COEFEX(0x57, 0x03, 0x09a3),
+		WRITE_COEF(0x06, 0x6100), /* Set MIC2 Vref gate to normal */
+		{}
+	};
 	static struct coef_fw coef0233[] = {
 		UPDATE_COEF(0x35, 0, 1<<14),
 		WRITE_COEF(0x06, 0x2100),
@@ -4300,14 +4305,19 @@ static void alc_headset_mode_mic_in(struct hda_codec *codec, hda_nid_t hp_pin,
 	};
 
 	switch (codec->core.vendor_id) {
-	case 0x10ec0236:
 	case 0x10ec0255:
-	case 0x10ec0256:
 		alc_write_coef_idx(codec, 0x45, 0xc489);
 		snd_hda_set_pin_ctl_cache(codec, hp_pin, 0);
 		alc_process_coef_fw(codec, coef0255);
 		snd_hda_set_pin_ctl_cache(codec, mic_pin, PIN_VREF50);
 		break;
+	case 0x10ec0236:
+	case 0x10ec0256:
+		alc_write_coef_idx(codec, 0x45, 0xc489);
+		snd_hda_set_pin_ctl_cache(codec, hp_pin, 0);
+		alc_process_coef_fw(codec, coef0256);
+		snd_hda_set_pin_ctl_cache(codec, mic_pin, PIN_VREF50);
+		break;
 	case 0x10ec0234:
 	case 0x10ec0274:
 	case 0x10ec0294:
@@ -4389,6 +4399,14 @@ static void alc_headset_mode_default(struct hda_codec *codec)
 		WRITE_COEF(0x49, 0x0049),
 		{}
 	};
+	static struct coef_fw coef0256[] = {
+		WRITE_COEF(0x45, 0xc489),
+		WRITE_COEFEX(0x57, 0x03, 0x0da3),
+		WRITE_COEF(0x49, 0x0049),
+		UPDATE_COEFEX(0x57, 0x05, 1<<14, 0), /* Direct Drive HP Amp control(Set to verb control)*/
+		WRITE_COEF(0x06, 0x6100),
+		{}
+	};
 	static struct coef_fw coef0233[] = {
 		WRITE_COEF(0x06, 0x2100),
 		WRITE_COEF(0x32, 0x4ea3),
@@ -4439,11 +4457,16 @@ static void alc_headset_mode_default(struct hda_codec *codec)
 		alc_process_coef_fw(codec, alc225_pre_hsmode);
 		alc_process_coef_fw(codec, coef0225);
 		break;
-	case 0x10ec0236:
 	case 0x10ec0255:
-	case 0x10ec0256:
 		alc_process_coef_fw(codec, coef0255);
 		break;
+	case 0x10ec0236:
+	case 0x10ec0256:
+		alc_write_coef_idx(codec, 0x1b, 0x0e4b);
+		alc_write_coef_idx(codec, 0x45, 0xc089);
+		msleep(50);
+		alc_process_coef_fw(codec, coef0256);
+		break;
 	case 0x10ec0234:
 	case 0x10ec0274:
 	case 0x10ec0294:
@@ -4487,8 +4510,7 @@ static void alc_headset_mode_ctia(struct hda_codec *codec)
 	};
 	static struct coef_fw coef0256[] = {
 		WRITE_COEF(0x45, 0xd489), /* Set to CTIA type */
-		WRITE_COEF(0x1b, 0x0c6b),
-		WRITE_COEFEX(0x57, 0x03, 0x8ea6),
+		WRITE_COEF(0x1b, 0x0e6b),
 		{}
 	};
 	static struct coef_fw coef0233[] = {
@@ -4606,8 +4628,7 @@ static void alc_headset_mode_omtp(struct hda_codec *codec)
 	};
 	static struct coef_fw coef0256[] = {
 		WRITE_COEF(0x45, 0xe489), /* Set to OMTP Type */
-		WRITE_COEF(0x1b, 0x0c6b),
-		WRITE_COEFEX(0x57, 0x03, 0x8ea6),
+		WRITE_COEF(0x1b, 0x0e6b),
 		{}
 	};
 	static struct coef_fw coef0233[] = {
@@ -4739,13 +4760,37 @@ static void alc_determine_headset_type(struct hda_codec *codec)
 	};
 
 	switch (codec->core.vendor_id) {
-	case 0x10ec0236:
 	case 0x10ec0255:
+		alc_process_coef_fw(codec, coef0255);
+		msleep(300);
+		val = alc_read_coef_idx(codec, 0x46);
+		is_ctia = (val & 0x0070) == 0x0070;
+		break;
+	case 0x10ec0236:
 	case 0x10ec0256:
+		alc_write_coef_idx(codec, 0x1b, 0x0e4b);
+		alc_write_coef_idx(codec, 0x06, 0x6104);
+		alc_write_coefex_idx(codec, 0x57, 0x3, 0x09a3);
+
+		snd_hda_codec_write(codec, 0x21, 0,
+			    AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE);
+		msleep(80);
+		snd_hda_codec_write(codec, 0x21, 0,
+			    AC_VERB_SET_PIN_WIDGET_CONTROL, 0x0);
+
 		alc_process_coef_fw(codec, coef0255);
 		msleep(300);
 		val = alc_read_coef_idx(codec, 0x46);
 		is_ctia = (val & 0x0070) == 0x0070;
+
+		alc_write_coefex_idx(codec, 0x57, 0x3, 0x0da3);
+		alc_update_coefex_idx(codec, 0x57, 0x5, 1<<14, 0);
+
+		snd_hda_codec_write(codec, 0x21, 0,
+			    AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT);
+		msleep(80);
+		snd_hda_codec_write(codec, 0x21, 0,
+			    AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE);
 		break;
 	case 0x10ec0234:
 	case 0x10ec0274:
@@ -6210,15 +6255,13 @@ static const struct hda_fixup alc269_fixups[] = {
 		.chain_id = ALC269_FIXUP_THINKPAD_ACPI,
 	},
 	[ALC255_FIXUP_ACER_MIC_NO_PRESENCE] = {
-		.type = HDA_FIXUP_VERBS,
-		.v.verbs = (const struct hda_verb[]) {
-			/* Enable the Mic */
-			{ 0x20, AC_VERB_SET_COEF_INDEX, 0x45 },
-			{ 0x20, AC_VERB_SET_PROC_COEF, 0x5089 },
-			{}
+		.type = HDA_FIXUP_PINS,
+		.v.pins = (const struct hda_pintbl[]) {
+			{ 0x19, 0x01a1913c }, /* use as headset mic, without its own jack detect */
+			{ }
 		},
 		.chained = true,
-		.chain_id = ALC269_FIXUP_LIFEBOOK_EXTMIC
+		.chain_id = ALC255_FIXUP_HEADSET_MODE
 	},
 	[ALC255_FIXUP_ASUS_MIC_NO_PRESENCE] = {
 		.type = HDA_FIXUP_PINS,
@@ -7263,10 +7306,6 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = {
 		{0x19, 0x0181303F},
 		{0x21, 0x0221102f}),
 	SND_HDA_PIN_QUIRK(0x10ec0255, 0x1025, "Acer", ALC255_FIXUP_ACER_MIC_NO_PRESENCE,
-		{0x12, 0x90a60140},
-		{0x14, 0x90170120},
-		{0x21, 0x02211030}),
-	SND_HDA_PIN_QUIRK(0x10ec0255, 0x1025, "Acer", ALC255_FIXUP_ACER_MIC_NO_PRESENCE,
 		{0x12, 0x90a601c0},
 		{0x14, 0x90171120},
 		{0x21, 0x02211030}),
diff --git a/sound/pci/ice1712/ews.c b/sound/pci/ice1712/ews.c
index 3729b132ad85..fe08dd9f1564 100644
--- a/sound/pci/ice1712/ews.c
+++ b/sound/pci/ice1712/ews.c
@@ -812,7 +812,7 @@ static int snd_ice1712_6fire_read_pca(struct snd_ice1712 *ice, unsigned char reg
 
 	snd_i2c_lock(ice->i2c);
 	byte = reg;
-	if (snd_i2c_sendbytes(spec->i2cdevs[EWS_I2C_6FIRE], &byte, 1)) {
+	if (snd_i2c_sendbytes(spec->i2cdevs[EWS_I2C_6FIRE], &byte, 1) != 1) {
 		snd_i2c_unlock(ice->i2c);
 		dev_err(ice->card->dev, "cannot send pca\n");
 		return -EIO;
diff --git a/sound/soc/codecs/ak4458.c b/sound/soc/codecs/ak4458.c
index eab7c76cfcd9..71562154c0b1 100644
--- a/sound/soc/codecs/ak4458.c
+++ b/sound/soc/codecs/ak4458.c
@@ -304,7 +304,10 @@ static int ak4458_rstn_control(struct snd_soc_component *component, int bit)
 					  AK4458_00_CONTROL1,
 					  AK4458_RSTN_MASK,
 					  0x0);
-	return ret;
+	if (ret < 0)
+		return ret;
+
+	return 0;
 }
 
 static int ak4458_hw_params(struct snd_pcm_substream *substream,
@@ -536,9 +539,10 @@ static void ak4458_power_on(struct ak4458_priv *ak4458)
 	}
 }
 
-static void ak4458_init(struct snd_soc_component *component)
+static int ak4458_init(struct snd_soc_component *component)
 {
 	struct ak4458_priv *ak4458 = snd_soc_component_get_drvdata(component);
+	int ret;
 
 	/* External Mute ON */
 	if (ak4458->mute_gpiod)
@@ -546,21 +550,21 @@ static void ak4458_init(struct snd_soc_component *component)
 
 	ak4458_power_on(ak4458);
 
-	snd_soc_component_update_bits(component, AK4458_00_CONTROL1,
+	ret = snd_soc_component_update_bits(component, AK4458_00_CONTROL1,
 			    0x80, 0x80);   /* ACKS bit = 1; 10000000 */
+	if (ret < 0)
+		return ret;
 
-	ak4458_rstn_control(component, 1);
+	return ak4458_rstn_control(component, 1);
 }
 
 static int ak4458_probe(struct snd_soc_component *component)
 {
 	struct ak4458_priv *ak4458 = snd_soc_component_get_drvdata(component);
 
-	ak4458_init(component);
-
 	ak4458->fs = 48000;
 
-	return 0;
+	return ak4458_init(component);
 }
 
 static void ak4458_remove(struct snd_soc_component *component)
diff --git a/sound/soc/codecs/cs4265.c b/sound/soc/codecs/cs4265.c
index ab27d2b94d02..c0190ec59e74 100644
--- a/sound/soc/codecs/cs4265.c
+++ b/sound/soc/codecs/cs4265.c
@@ -60,7 +60,7 @@ static const struct reg_default cs4265_reg_defaults[] = {
 static bool cs4265_readable_register(struct device *dev, unsigned int reg)
 {
 	switch (reg) {
-	case CS4265_CHIP_ID ... CS4265_SPDIF_CTL2:
+	case CS4265_CHIP_ID ... CS4265_MAX_REGISTER:
 		return true;
 	default:
 		return false;
diff --git a/sound/soc/codecs/cs42xx8.c b/sound/soc/codecs/cs42xx8.c
index ebb9e0cf8364..28a4ac36c4f8 100644
--- a/sound/soc/codecs/cs42xx8.c
+++ b/sound/soc/codecs/cs42xx8.c
@@ -558,6 +558,7 @@ static int cs42xx8_runtime_resume(struct device *dev)
 	msleep(5);
 
 	regcache_cache_only(cs42xx8->regmap, false);
+	regcache_mark_dirty(cs42xx8->regmap);
 
 	ret = regcache_sync(cs42xx8->regmap);
 	if (ret) {
diff --git a/sound/soc/codecs/max98090.c b/sound/soc/codecs/max98090.c
index 7619ea31ab50..ada8c25e643d 100644
--- a/sound/soc/codecs/max98090.c
+++ b/sound/soc/codecs/max98090.c
@@ -1909,6 +1909,21 @@ static int max98090_configure_dmic(struct max98090_priv *max98090,
 	return 0;
 }
 
+static int max98090_dai_startup(struct snd_pcm_substream *substream,
+				struct snd_soc_dai *dai)
+{
+	struct snd_soc_component *component = dai->component;
+	struct max98090_priv *max98090 = snd_soc_component_get_drvdata(component);
+	unsigned int fmt = max98090->dai_fmt;
+
+	/* Remove 24-bit format support if it is not in right justified mode. */
+	if ((fmt & SND_SOC_DAIFMT_FORMAT_MASK) != SND_SOC_DAIFMT_RIGHT_J) {
+		substream->runtime->hw.formats = SNDRV_PCM_FMTBIT_S16_LE;
+		snd_pcm_hw_constraint_msbits(substream->runtime, 0, 16, 16);
+	}
+	return 0;
+}
+
 static int max98090_dai_hw_params(struct snd_pcm_substream *substream,
 				   struct snd_pcm_hw_params *params,
 				   struct snd_soc_dai *dai)
@@ -2316,6 +2331,7 @@ EXPORT_SYMBOL_GPL(max98090_mic_detect);
 #define MAX98090_FORMATS (SNDRV_PCM_FMTBIT_S16_LE | SNDRV_PCM_FMTBIT_S24_LE)
 
 static const struct snd_soc_dai_ops max98090_dai_ops = {
+	.startup = max98090_dai_startup,
 	.set_sysclk = max98090_dai_set_sysclk,
 	.set_fmt = max98090_dai_set_fmt,
 	.set_tdm_slot = max98090_set_tdm_slot,
diff --git a/sound/soc/codecs/rt274.c b/sound/soc/codecs/rt274.c
index adf59039a3b6..cdd312db3e78 100644
--- a/sound/soc/codecs/rt274.c
+++ b/sound/soc/codecs/rt274.c
@@ -405,6 +405,8 @@ static int rt274_mic_detect(struct snd_soc_component *component,
 {
 	struct rt274_priv *rt274 = snd_soc_component_get_drvdata(component);
 
+	rt274->jack = jack;
+
 	if (jack == NULL) {
 		/* Disable jack detection */
 		regmap_update_bits(rt274->regmap, RT274_EAPD_GPIO_IRQ_CTRL,
@@ -412,7 +414,6 @@ static int rt274_mic_detect(struct snd_soc_component *component,
 
 		return 0;
 	}
-	rt274->jack = jack;
 
 	regmap_update_bits(rt274->regmap, RT274_EAPD_GPIO_IRQ_CTRL,
 				RT274_IRQ_EN, RT274_IRQ_EN);
diff --git a/sound/soc/codecs/rt5670.c b/sound/soc/codecs/rt5670.c
index 9a037108b1ae..a746e11ccfe3 100644
--- a/sound/soc/codecs/rt5670.c
+++ b/sound/soc/codecs/rt5670.c
@@ -2882,6 +2882,18 @@ static const struct dmi_system_id dmi_platform_intel_quirks[] = {
 						 RT5670_DEV_GPIO |
 						 RT5670_JD_MODE3),
 	},
+	{
+		.callback = rt5670_quirk_cb,
+		.ident = "Aegex 10 tablet (RU2)",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "AEGEX"),
+			DMI_MATCH(DMI_PRODUCT_VERSION, "RU2"),
+		},
+		.driver_data = (unsigned long *)(RT5670_DMIC_EN |
+						 RT5670_DMIC2_INR |
+						 RT5670_DEV_GPIO |
+						 RT5670_JD_MODE3),
+	},
 	{}
 };
 
diff --git a/sound/soc/codecs/rt5677-spi.c b/sound/soc/codecs/rt5677-spi.c
index 84b6bd8b50e1..a4dfa0345c6e 100644
--- a/sound/soc/codecs/rt5677-spi.c
+++ b/sound/soc/codecs/rt5677-spi.c
@@ -101,7 +101,7 @@ static void rt5677_spi_reverse(u8 *dst, u32 dstlen, const u8 *src, u32 srclen)
 	u32 word_size = min_t(u32, dstlen, 8);
 
 	for (w = 0; w < dstlen; w += word_size) {
-		for (i = 0; i < word_size; i++) {
+		for (i = 0; i < word_size && i + w < dstlen; i++) {
 			si = w + word_size - i - 1;
 			dst[w + i] = si < srclen ? src[si] : 0;
 		}
@@ -152,8 +152,9 @@ int rt5677_spi_read(u32 addr, void *rxbuf, size_t len)
 		status |= spi_sync(g_spi, &m);
 		mutex_unlock(&spi_mutex);
 
+
 		/* Copy data back to caller buffer */
-		rt5677_spi_reverse(cb + offset, t[1].len, body, t[1].len);
+		rt5677_spi_reverse(cb + offset, len - offset, body, t[1].len);
 	}
 	return status;
 }
diff --git a/sound/soc/fsl/fsl_asrc.c b/sound/soc/fsl/fsl_asrc.c
index 0b937924d2e4..ea035c12a325 100644
--- a/sound/soc/fsl/fsl_asrc.c
+++ b/sound/soc/fsl/fsl_asrc.c
@@ -282,8 +282,8 @@ static int fsl_asrc_config_pair(struct fsl_asrc_pair *pair)
 		return -EINVAL;
 	}
 
-	if ((outrate > 8000 && outrate < 30000) &&
-	    (outrate/inrate > 24 || inrate/outrate > 8)) {
+	if ((outrate >= 8000 && outrate <= 30000) &&
+	    (outrate > 24 * inrate || inrate > 8 * outrate)) {
 		pair_err("exceed supported ratio range [1/24, 8] for \
 				inrate/outrate: %d/%d\n", inrate, outrate);
 		return -EINVAL;
diff --git a/sound/soc/intel/atom/sst/sst_pvt.c b/sound/soc/intel/atom/sst/sst_pvt.c
index 1ec298dd0e4f..13db2854db3e 100644
--- a/sound/soc/intel/atom/sst/sst_pvt.c
+++ b/sound/soc/intel/atom/sst/sst_pvt.c
@@ -158,11 +158,11 @@ int sst_create_ipc_msg(struct ipc_post **arg, bool large)
 {
 	struct ipc_post *msg;
 
-	msg = kzalloc(sizeof(*msg), GFP_KERNEL);
+	msg = kzalloc(sizeof(*msg), GFP_ATOMIC);
 	if (!msg)
 		return -ENOMEM;
 	if (large) {
-		msg->mailbox_data = kzalloc(SST_MAILBOX_SIZE, GFP_KERNEL);
+		msg->mailbox_data = kzalloc(SST_MAILBOX_SIZE, GFP_ATOMIC);
 		if (!msg->mailbox_data) {
 			kfree(msg);
 			return -ENOMEM;
diff --git a/sound/soc/intel/boards/bytcht_es8316.c b/sound/soc/intel/boards/bytcht_es8316.c
index 1f00d473793f..2fe1ce879123 100644
--- a/sound/soc/intel/boards/bytcht_es8316.c
+++ b/sound/soc/intel/boards/bytcht_es8316.c
@@ -487,6 +487,7 @@ static int snd_byt_cht_es8316_mc_probe(struct platform_device *pdev)
 	}
 
 	/* override plaform name, if required */
+	byt_cht_es8316_card.dev = dev;
 	platform_name = mach->mach_params.platform;
 
 	ret = snd_soc_fixup_dai_links_platform_name(&byt_cht_es8316_card,
@@ -567,7 +568,6 @@ static int snd_byt_cht_es8316_mc_probe(struct platform_device *pdev)
 		 (quirk & BYT_CHT_ES8316_MONO_SPEAKER) ? "mono" : "stereo",
 		 mic_name[BYT_CHT_ES8316_MAP(quirk)]);
 	byt_cht_es8316_card.long_name = long_name;
-	byt_cht_es8316_card.dev = dev;
 	snd_soc_card_set_drvdata(&byt_cht_es8316_card, priv);
 
 	ret = devm_snd_soc_register_card(dev, &byt_cht_es8316_card);
diff --git a/sound/soc/intel/boards/cht_bsw_max98090_ti.c b/sound/soc/intel/boards/cht_bsw_max98090_ti.c
index d72623f508b7..613b37172441 100644
--- a/sound/soc/intel/boards/cht_bsw_max98090_ti.c
+++ b/sound/soc/intel/boards/cht_bsw_max98090_ti.c
@@ -446,6 +446,7 @@ static int snd_cht_mc_probe(struct platform_device *pdev)
 	}
 
 	/* override plaform name, if required */
+	snd_soc_card_cht.dev = &pdev->dev;
 	mach = (&pdev->dev)->platform_data;
 	platform_name = mach->mach_params.platform;
 
@@ -455,7 +456,6 @@ static int snd_cht_mc_probe(struct platform_device *pdev)
 		return ret_val;
 
 	/* register the soc card */
-	snd_soc_card_cht.dev = &pdev->dev;
 	snd_soc_card_set_drvdata(&snd_soc_card_cht, drv);
 
 	if (drv->quirks & QUIRK_PMC_PLT_CLK_0)
diff --git a/sound/soc/intel/boards/cht_bsw_nau8824.c b/sound/soc/intel/boards/cht_bsw_nau8824.c
index 1c17f82fc922..b0d658e3d3f7 100644
--- a/sound/soc/intel/boards/cht_bsw_nau8824.c
+++ b/sound/soc/intel/boards/cht_bsw_nau8824.c
@@ -249,6 +249,7 @@ static int snd_cht_mc_probe(struct platform_device *pdev)
 	snd_soc_card_set_drvdata(&snd_soc_card_cht, drv);
 
 	/* override plaform name, if required */
+	snd_soc_card_cht.dev = &pdev->dev;
 	mach = (&pdev->dev)->platform_data;
 	platform_name = mach->mach_params.platform;
 
@@ -258,7 +259,6 @@ static int snd_cht_mc_probe(struct platform_device *pdev)
 		return ret_val;
 
 	/* register the soc card */
-	snd_soc_card_cht.dev = &pdev->dev;
 	ret_val = devm_snd_soc_register_card(&pdev->dev, &snd_soc_card_cht);
 	if (ret_val) {
 		dev_err(&pdev->dev,
diff --git a/sound/soc/intel/boards/cht_bsw_rt5672.c b/sound/soc/intel/boards/cht_bsw_rt5672.c
index 1731cc0fac25..028e571f6a77 100644
--- a/sound/soc/intel/boards/cht_bsw_rt5672.c
+++ b/sound/soc/intel/boards/cht_bsw_rt5672.c
@@ -418,6 +418,7 @@ static int snd_cht_mc_probe(struct platform_device *pdev)
 	}
 
 	/* override plaform name, if required */
+	snd_soc_card_cht.dev = &pdev->dev;
 	platform_name = mach->mach_params.platform;
 
 	ret_val = snd_soc_fixup_dai_links_platform_name(&snd_soc_card_cht,
@@ -435,7 +436,6 @@ static int snd_cht_mc_probe(struct platform_device *pdev)
 	snd_soc_card_set_drvdata(&snd_soc_card_cht, drv);
 
 	/* register the soc card */
-	snd_soc_card_cht.dev = &pdev->dev;
 	ret_val = devm_snd_soc_register_card(&pdev->dev, &snd_soc_card_cht);
 	if (ret_val) {
 		dev_err(&pdev->dev,
diff --git a/sound/soc/intel/boards/sof_rt5682.c b/sound/soc/intel/boards/sof_rt5682.c
index f28fb98cc306..3343dbcd506f 100644
--- a/sound/soc/intel/boards/sof_rt5682.c
+++ b/sound/soc/intel/boards/sof_rt5682.c
@@ -29,9 +29,10 @@
 #define SOF_RT5682_MCLK_EN			BIT(3)
 #define SOF_RT5682_MCLK_24MHZ			BIT(4)
 #define SOF_SPEAKER_AMP_PRESENT		BIT(5)
-#define SOF_RT5682_SSP_AMP(quirk)		((quirk) & GENMASK(8, 6))
-#define SOF_RT5682_SSP_AMP_MASK			(GENMASK(8, 6))
 #define SOF_RT5682_SSP_AMP_SHIFT		6
+#define SOF_RT5682_SSP_AMP_MASK                 (GENMASK(8, 6))
+#define SOF_RT5682_SSP_AMP(quirk)	\
+	(((quirk) << SOF_RT5682_SSP_AMP_SHIFT) & SOF_RT5682_SSP_AMP_MASK)
 
 /* Default: MCLK on, MCLK 19.2M, SSP0  */
 static unsigned long sof_rt5682_quirk = SOF_RT5682_MCLK_EN |
@@ -144,9 +145,9 @@ static int sof_rt5682_codec_init(struct snd_soc_pcm_runtime *rtd)
 	jack = &ctx->sof_headset;
 
 	snd_jack_set_key(jack->jack, SND_JACK_BTN_0, KEY_PLAYPAUSE);
-	snd_jack_set_key(jack->jack, SND_JACK_BTN_1, KEY_VOLUMEUP);
-	snd_jack_set_key(jack->jack, SND_JACK_BTN_2, KEY_VOLUMEDOWN);
-	snd_jack_set_key(jack->jack, SND_JACK_BTN_3, KEY_VOICECOMMAND);
+	snd_jack_set_key(jack->jack, SND_JACK_BTN_1, KEY_VOICECOMMAND);
+	snd_jack_set_key(jack->jack, SND_JACK_BTN_2, KEY_VOLUMEUP);
+	snd_jack_set_key(jack->jack, SND_JACK_BTN_3, KEY_VOLUMEDOWN);
 	ret = snd_soc_component_set_jack(component, jack, NULL);
 
 	if (ret) {
diff --git a/sound/soc/intel/common/soc-acpi-intel-byt-match.c b/sound/soc/intel/common/soc-acpi-intel-byt-match.c
index abd34fa27749..55e80c3d2af0 100644
--- a/sound/soc/intel/common/soc-acpi-intel-byt-match.c
+++ b/sound/soc/intel/common/soc-acpi-intel-byt-match.c
@@ -13,6 +13,7 @@ static unsigned long byt_machine_id;
 
 #define BYT_THINKPAD_10  1
 #define BYT_POV_P1006W   2
+#define BYT_AEGEX_10     3
 
 static int byt_thinkpad10_quirk_cb(const struct dmi_system_id *id)
 {
@@ -26,6 +27,12 @@ static int byt_pov_p1006w_quirk_cb(const struct dmi_system_id *id)
 	return 1;
 }
 
+static int byt_aegex10_quirk_cb(const struct dmi_system_id *id)
+{
+	byt_machine_id = BYT_AEGEX_10;
+	return 1;
+}
+
 static const struct dmi_system_id byt_table[] = {
 	{
 		.callback = byt_thinkpad10_quirk_cb,
@@ -66,9 +73,18 @@ static const struct dmi_system_id byt_table[] = {
 			DMI_EXACT_MATCH(DMI_BOARD_NAME, "0E57"),
 		},
 	},
+	{
+		/* Aegex 10 tablet (RU2) */
+		.callback = byt_aegex10_quirk_cb,
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "AEGEX"),
+			DMI_MATCH(DMI_PRODUCT_VERSION, "RU2"),
+		},
+	},
 	{ }
 };
 
+/* The Thinkapd 10 and Aegex 10 tablets have the same ID problem */
 static struct snd_soc_acpi_mach byt_thinkpad_10 = {
 	.id = "10EC5640",
 	.drv_name = "cht-bsw-rt5672",
@@ -95,6 +111,7 @@ static struct snd_soc_acpi_mach *byt_quirk(void *arg)
 
 	switch (byt_machine_id) {
 	case BYT_THINKPAD_10:
+	case BYT_AEGEX_10:
 		return &byt_thinkpad_10;
 	case BYT_POV_P1006W:
 		return &byt_pov_p1006w;
diff --git a/sound/soc/intel/common/soc-acpi-intel-cnl-match.c b/sound/soc/intel/common/soc-acpi-intel-cnl-match.c
index df7c52cad5c3..c36c0aa4f683 100644
--- a/sound/soc/intel/common/soc-acpi-intel-cnl-match.c
+++ b/sound/soc/intel/common/soc-acpi-intel-cnl-match.c
@@ -29,17 +29,17 @@ struct snd_soc_acpi_mach snd_soc_acpi_intel_cnl_machines[] = {
 		.sof_tplg_filename = "sof-cnl-rt274.tplg",
 	},
 	{
-		.id = "10EC5682",
+		.id = "MX98357A",
 		.drv_name = "sof_rt5682",
+		.quirk_data = &cml_codecs,
 		.sof_fw_filename = "sof-cnl.ri",
-		.sof_tplg_filename = "sof-cml-rt5682.tplg",
+		.sof_tplg_filename = "sof-cml-rt5682-max98357a.tplg",
 	},
 	{
-		.id = "MX98357A",
+		.id = "10EC5682",
 		.drv_name = "sof_rt5682",
-		.quirk_data = &cml_codecs,
 		.sof_fw_filename = "sof-cnl.ri",
-		.sof_tplg_filename = "sof-cml-rt5682-max98357a.tplg",
+		.sof_tplg_filename = "sof-cml-rt5682.tplg",
 	},
 
 	{},
diff --git a/sound/soc/mediatek/Kconfig b/sound/soc/mediatek/Kconfig
index 933ab51af05b..111e44b64b38 100644
--- a/sound/soc/mediatek/Kconfig
+++ b/sound/soc/mediatek/Kconfig
@@ -133,7 +133,7 @@ config SND_SOC_MT8183_MT6358_TS3A227E_MAX98357A
 
 config SND_SOC_MT8183_DA7219_MAX98357A
 	tristate "ASoC Audio driver for MT8183 with DA7219 MAX98357A codec"
-	depends on SND_SOC_MT8183
+	depends on SND_SOC_MT8183 && I2C
 	select SND_SOC_MT6358
 	select SND_SOC_MAX98357A
 	select SND_SOC_DA7219
diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c
index 2403bec2fccf..41c0cfaf2db5 100644
--- a/sound/soc/soc-core.c
+++ b/sound/soc/soc-core.c
@@ -228,7 +228,10 @@ static void soc_init_card_debugfs(struct snd_soc_card *card)
 
 static void soc_cleanup_card_debugfs(struct snd_soc_card *card)
 {
+	if (!card->debugfs_card_root)
+		return;
 	debugfs_remove_recursive(card->debugfs_card_root);
+	card->debugfs_card_root = NULL;
 }
 
 static void snd_soc_debugfs_init(void)
@@ -2037,8 +2040,10 @@ match:
 static int soc_cleanup_card_resources(struct snd_soc_card *card)
 {
 	/* free the ALSA card at first; this syncs with pending operations */
-	if (card->snd_card)
+	if (card->snd_card) {
 		snd_card_free(card->snd_card);
+		card->snd_card = NULL;
+	}
 
 	/* remove and free each DAI */
 	soc_remove_dai_links(card);
@@ -2065,6 +2070,16 @@ static int snd_soc_instantiate_card(struct snd_soc_card *card)
 	int ret, i, order;
 
 	mutex_lock(&client_mutex);
+	for_each_card_prelinks(card, i, dai_link) {
+		ret = soc_init_dai_link(card, dai_link);
+		if (ret) {
+			soc_cleanup_platform(card);
+			dev_err(card->dev, "ASoC: failed to init link %s: %d\n",
+				dai_link->name, ret);
+			mutex_unlock(&client_mutex);
+			return ret;
+		}
+	}
 	mutex_lock_nested(&card->mutex, SND_SOC_CARD_CLASS_INIT);
 
 	card->dapm.bias_level = SND_SOC_BIAS_OFF;
@@ -2789,26 +2804,9 @@ static int snd_soc_bind_card(struct snd_soc_card *card)
  */
 int snd_soc_register_card(struct snd_soc_card *card)
 {
-	int i, ret;
-	struct snd_soc_dai_link *link;
-
 	if (!card->name || !card->dev)
 		return -EINVAL;
 
-	mutex_lock(&client_mutex);
-	for_each_card_prelinks(card, i, link) {
-
-		ret = soc_init_dai_link(card, link);
-		if (ret) {
-			soc_cleanup_platform(card);
-			dev_err(card->dev, "ASoC: failed to init link %s\n",
-				link->name);
-			mutex_unlock(&client_mutex);
-			return ret;
-		}
-	}
-	mutex_unlock(&client_mutex);
-
 	dev_set_drvdata(card->dev, card);
 
 	snd_soc_initialize_card_lists(card);
@@ -2839,12 +2837,14 @@ static void snd_soc_unbind_card(struct snd_soc_card *card, bool unregister)
 		snd_soc_dapm_shutdown(card);
 		snd_soc_flush_all_delayed_work(card);
 
+		mutex_lock(&client_mutex);
 		/* remove all components used by DAI links on this card */
 		for_each_comp_order(order) {
 			for_each_card_rtds(card, rtd) {
 				soc_remove_link_components(card, rtd, order);
 			}
 		}
+		mutex_unlock(&client_mutex);
 
 		soc_cleanup_card_resources(card);
 		if (!unregister)
diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c
index 81a7a12196ff..55f8278077f4 100644
--- a/sound/soc/soc-dapm.c
+++ b/sound/soc/soc-dapm.c
@@ -2193,7 +2193,10 @@ static void dapm_debugfs_add_widget(struct snd_soc_dapm_widget *w)
 
 static void dapm_debugfs_cleanup(struct snd_soc_dapm_context *dapm)
 {
+	if (!dapm->debugfs_dapm)
+		return;
 	debugfs_remove_recursive(dapm->debugfs_dapm);
+	dapm->debugfs_dapm = NULL;
 }
 
 #else
@@ -3831,8 +3834,8 @@ static int snd_soc_dai_link_event(struct snd_soc_dapm_widget *w,
 						ret);
 					goto out;
 				}
-				source->active++;
 			}
+			source->active++;
 			ret = soc_dai_hw_params(&substream, params, source);
 			if (ret < 0)
 				goto out;
@@ -3853,8 +3856,8 @@ static int snd_soc_dai_link_event(struct snd_soc_dapm_widget *w,
 						ret);
 					goto out;
 				}
-				sink->active++;
 			}
+			sink->active++;
 			ret = soc_dai_hw_params(&substream, params, sink);
 			if (ret < 0)
 				goto out;
diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c
index 0a4f60c7a188..c46ad0f66292 100644
--- a/sound/soc/soc-pcm.c
+++ b/sound/soc/soc-pcm.c
@@ -2479,7 +2479,8 @@ int dpcm_be_dai_prepare(struct snd_soc_pcm_runtime *fe, int stream)
 
 		if ((be->dpcm[stream].state != SND_SOC_DPCM_STATE_HW_PARAMS) &&
 		    (be->dpcm[stream].state != SND_SOC_DPCM_STATE_STOP) &&
-		    (be->dpcm[stream].state != SND_SOC_DPCM_STATE_SUSPEND))
+		    (be->dpcm[stream].state != SND_SOC_DPCM_STATE_SUSPEND) &&
+		    (be->dpcm[stream].state != SND_SOC_DPCM_STATE_PAUSED))
 			continue;
 
 		dev_dbg(be->dev, "ASoC: prepare BE %s\n",
diff --git a/sound/soc/sof/Kconfig b/sound/soc/sof/Kconfig
index 048c7b034d70..71d87a86f060 100644
--- a/sound/soc/sof/Kconfig
+++ b/sound/soc/sof/Kconfig
@@ -45,7 +45,10 @@ config SND_SOC_SOF_OPTIONS
 if SND_SOC_SOF_OPTIONS
 
 config SND_SOC_SOF_NOCODEC
-	tristate "SOF nocodec mode Support"
+	tristate
+
+config SND_SOC_SOF_NOCODEC_SUPPORT
+	bool "SOF nocodec mode support"
 	help
 	  This adds support for a dummy/nocodec machine driver fallback
 	  option if no known codec is detected. This is typically only
@@ -81,7 +84,7 @@ if SND_SOC_SOF_DEBUG
 
 config SND_SOC_SOF_FORCE_NOCODEC_MODE
 	bool "SOF force nocodec Mode"
-	depends on SND_SOC_SOF_NOCODEC
+	depends on SND_SOC_SOF_NOCODEC_SUPPORT
 	help
 	  This forces SOF to use dummy/nocodec as machine driver, even
 	  though there is a codec detected on the real platform. This is
@@ -136,6 +139,7 @@ endif ## SND_SOC_SOF_OPTIONS
 config SND_SOC_SOF
 	tristate
 	select SND_SOC_TOPOLOGY
+	select SND_SOC_SOF_NOCODEC if SND_SOC_SOF_NOCODEC_SUPPORT
 	help
 	  This option is not user-selectable but automagically handled by
 	  'select' statements at a higher level
diff --git a/sound/soc/sof/control.c b/sound/soc/sof/control.c
index 11762c4580f1..84e2cbfbbcbb 100644
--- a/sound/soc/sof/control.c
+++ b/sound/soc/sof/control.c
@@ -349,6 +349,7 @@ int snd_sof_bytes_put(struct snd_kcontrol *kcontrol,
 	struct snd_sof_dev *sdev = scontrol->sdev;
 	struct sof_ipc_ctrl_data *cdata = scontrol->control_data;
 	struct sof_abi_hdr *data = cdata->data;
+	size_t size = data->size + sizeof(*data);
 	int ret, err;
 
 	if (be->max > sizeof(ucontrol->value.bytes.data)) {
@@ -358,10 +359,10 @@ int snd_sof_bytes_put(struct snd_kcontrol *kcontrol,
 		return -EINVAL;
 	}
 
-	if (data->size > be->max) {
+	if (size > be->max) {
 		dev_err_ratelimited(sdev->dev,
-				    "error: size too big %d bytes max is %d\n",
-				    data->size, be->max);
+				    "error: size too big %zu bytes max is %d\n",
+				    size, be->max);
 		return -EINVAL;
 	}
 
@@ -375,7 +376,7 @@ int snd_sof_bytes_put(struct snd_kcontrol *kcontrol,
 	}
 
 	/* copy from kcontrol */
-	memcpy(data, ucontrol->value.bytes.data, data->size);
+	memcpy(data, ucontrol->value.bytes.data, size);
 
 	/* notify DSP of byte control updates */
 	snd_sof_ipc_set_get_comp_data(sdev->ipc, scontrol,
diff --git a/sound/soc/sof/core.c b/sound/soc/sof/core.c
index 32105e0fabe8..5beda47cdf9f 100644
--- a/sound/soc/sof/core.c
+++ b/sound/soc/sof/core.c
@@ -382,7 +382,7 @@ static int sof_probe_continue(struct snd_sof_dev *sdev)
 
 	if (IS_ERR(plat_data->pdev_mach)) {
 		ret = PTR_ERR(plat_data->pdev_mach);
-		goto comp_err;
+		goto fw_run_err;
 	}
 
 	dev_dbg(sdev->dev, "created machine %s\n",
@@ -393,8 +393,7 @@ static int sof_probe_continue(struct snd_sof_dev *sdev)
 
 	return 0;
 
-comp_err:
-	snd_soc_unregister_component(sdev->dev);
+#if !IS_ENABLED(CONFIG_SND_SOC_SOF_PROBE_WORK_QUEUE)
 fw_run_err:
 	snd_sof_fw_unload(sdev);
 fw_load_err:
@@ -403,6 +402,21 @@ ipc_err:
 	snd_sof_free_debug(sdev);
 dbg_err:
 	snd_sof_remove(sdev);
+#else
+
+	/*
+	 * when the probe_continue is handled in a work queue, the
+	 * probe does not fail so we don't release resources here.
+	 * They will be released with an explicit call to
+	 * snd_sof_device_remove() when the PCI/ACPI device is removed
+	 */
+
+fw_run_err:
+fw_load_err:
+ipc_err:
+dbg_err:
+
+#endif
 
 	return ret;
 }
@@ -484,7 +498,6 @@ int snd_sof_device_remove(struct device *dev)
 	snd_sof_ipc_free(sdev);
 	snd_sof_free_debug(sdev);
 	snd_sof_free_trace(sdev);
-	snd_sof_remove(sdev);
 
 	/*
 	 * Unregister machine driver. This will unbind the snd_card which
@@ -494,6 +507,14 @@ int snd_sof_device_remove(struct device *dev)
 	if (!IS_ERR_OR_NULL(pdata->pdev_mach))
 		platform_device_unregister(pdata->pdev_mach);
 
+	/*
+	 * Unregistering the machine driver results in unloading the topology.
+	 * Some widgets, ex: scheduler, attempt to power down the core they are
+	 * scheduled on, when they are unloaded. Therefore, the DSP must be
+	 * removed only after the topology has been unloaded.
+	 */
+	snd_sof_remove(sdev);
+
 	/* release firmware */
 	release_firmware(pdata->fw);
 	pdata->fw = NULL;
diff --git a/sound/soc/sof/intel/bdw.c b/sound/soc/sof/intel/bdw.c
index 065cb868bdfa..70d524ef9bc0 100644
--- a/sound/soc/sof/intel/bdw.c
+++ b/sound/soc/sof/intel/bdw.c
@@ -220,17 +220,20 @@ static void bdw_get_registers(struct snd_sof_dev *sdev,
 			      struct sof_ipc_panic_info *panic_info,
 			      u32 *stack, size_t stack_words)
 {
-	/* first read regsisters */
-	sof_mailbox_read(sdev, sdev->dsp_oops_offset, xoops, sizeof(*xoops));
+	u32 offset = sdev->dsp_oops_offset;
+
+	/* first read registers */
+	sof_mailbox_read(sdev, offset, xoops, sizeof(*xoops));
+
+	/* note: variable AR register array is not read */
 
 	/* then get panic info */
-	sof_mailbox_read(sdev, sdev->dsp_oops_offset + sizeof(*xoops),
-			 panic_info, sizeof(*panic_info));
+	offset += xoops->arch_hdr.totalsize;
+	sof_mailbox_read(sdev, offset, panic_info, sizeof(*panic_info));
 
 	/* then get the stack */
-	sof_mailbox_read(sdev, sdev->dsp_oops_offset + sizeof(*xoops) +
-			   sizeof(*panic_info), stack,
-			   stack_words * sizeof(u32));
+	offset += sizeof(*panic_info);
+	sof_mailbox_read(sdev, offset, stack, stack_words * sizeof(u32));
 }
 
 static void bdw_dump(struct snd_sof_dev *sdev, u32 flags)
@@ -283,6 +286,8 @@ static irqreturn_t bdw_irq_thread(int irq, void *context)
 						 SHIM_IMRX, SHIM_IMRX_DONE,
 						 SHIM_IMRX_DONE);
 
+		spin_lock_irq(&sdev->ipc_lock);
+
 		/*
 		 * handle immediate reply from DSP core. If the msg is
 		 * found, set done bit in cmd_done which is called at the
@@ -294,6 +299,8 @@ static irqreturn_t bdw_irq_thread(int irq, void *context)
 		snd_sof_ipc_reply(sdev, ipcx);
 
 		bdw_dsp_done(sdev);
+
+		spin_unlock_irq(&sdev->ipc_lock);
 	}
 
 	ipcd = snd_sof_dsp_read(sdev, BDW_DSP_BAR, SHIM_IPCD);
@@ -485,7 +492,6 @@ static void bdw_get_reply(struct snd_sof_dev *sdev)
 {
 	struct snd_sof_ipc_msg *msg = sdev->msg;
 	struct sof_ipc_reply reply;
-	unsigned long flags;
 	int ret = 0;
 
 	/*
@@ -501,8 +507,6 @@ static void bdw_get_reply(struct snd_sof_dev *sdev)
 	/* get reply */
 	sof_mailbox_read(sdev, sdev->host_box.offset, &reply, sizeof(reply));
 
-	spin_lock_irqsave(&sdev->ipc_lock, flags);
-
 	if (reply.error < 0) {
 		memcpy(msg->reply_data, &reply, sizeof(reply));
 		ret = reply.error;
@@ -521,8 +525,6 @@ static void bdw_get_reply(struct snd_sof_dev *sdev)
 	}
 
 	msg->reply_error = ret;
-
-	spin_unlock_irqrestore(&sdev->ipc_lock, flags);
 }
 
 static void bdw_host_done(struct snd_sof_dev *sdev)
diff --git a/sound/soc/sof/intel/byt.c b/sound/soc/sof/intel/byt.c
index 7bf9143d3106..39d1ae01c45d 100644
--- a/sound/soc/sof/intel/byt.c
+++ b/sound/soc/sof/intel/byt.c
@@ -265,17 +265,20 @@ static void byt_get_registers(struct snd_sof_dev *sdev,
 			      struct sof_ipc_panic_info *panic_info,
 			      u32 *stack, size_t stack_words)
 {
+	u32 offset = sdev->dsp_oops_offset;
+
 	/* first read regsisters */
-	sof_mailbox_read(sdev, sdev->dsp_oops_offset, xoops, sizeof(*xoops));
+	sof_mailbox_read(sdev, offset, xoops, sizeof(*xoops));
+
+	/* note: variable AR register array is not read */
 
 	/* then get panic info */
-	sof_mailbox_read(sdev, sdev->dsp_oops_offset + sizeof(*xoops),
-			 panic_info, sizeof(*panic_info));
+	offset += xoops->arch_hdr.totalsize;
+	sof_mailbox_read(sdev, offset, panic_info, sizeof(*panic_info));
 
 	/* then get the stack */
-	sof_mailbox_read(sdev, sdev->dsp_oops_offset + sizeof(*xoops) +
-			   sizeof(*panic_info), stack,
-			   stack_words * sizeof(u32));
+	offset += sizeof(*panic_info);
+	sof_mailbox_read(sdev, offset, stack, stack_words * sizeof(u32));
 }
 
 static void byt_dump(struct snd_sof_dev *sdev, u32 flags)
@@ -329,6 +332,9 @@ static irqreturn_t byt_irq_thread(int irq, void *context)
 						   SHIM_IMRX,
 						   SHIM_IMRX_DONE,
 						   SHIM_IMRX_DONE);
+
+		spin_lock_irq(&sdev->ipc_lock);
+
 		/*
 		 * handle immediate reply from DSP core. If the msg is
 		 * found, set done bit in cmd_done which is called at the
@@ -340,6 +346,8 @@ static irqreturn_t byt_irq_thread(int irq, void *context)
 		snd_sof_ipc_reply(sdev, ipcx);
 
 		byt_dsp_done(sdev);
+
+		spin_unlock_irq(&sdev->ipc_lock);
 	}
 
 	/* new message from DSP */
@@ -383,7 +391,6 @@ static void byt_get_reply(struct snd_sof_dev *sdev)
 {
 	struct snd_sof_ipc_msg *msg = sdev->msg;
 	struct sof_ipc_reply reply;
-	unsigned long flags;
 	int ret = 0;
 
 	/*
@@ -399,8 +406,6 @@ static void byt_get_reply(struct snd_sof_dev *sdev)
 	/* get reply */
 	sof_mailbox_read(sdev, sdev->host_box.offset, &reply, sizeof(reply));
 
-	spin_lock_irqsave(&sdev->ipc_lock, flags);
-
 	if (reply.error < 0) {
 		memcpy(msg->reply_data, &reply, sizeof(reply));
 		ret = reply.error;
@@ -419,8 +424,6 @@ static void byt_get_reply(struct snd_sof_dev *sdev)
 	}
 
 	msg->reply_error = ret;
-
-	spin_unlock_irqrestore(&sdev->ipc_lock, flags);
 }
 
 static void byt_host_done(struct snd_sof_dev *sdev)
diff --git a/sound/soc/sof/intel/cnl.c b/sound/soc/sof/intel/cnl.c
index 08a1a3d3c08d..b2eba7adcad8 100644
--- a/sound/soc/sof/intel/cnl.c
+++ b/sound/soc/sof/intel/cnl.c
@@ -64,6 +64,8 @@ static irqreturn_t cnl_ipc_irq_thread(int irq, void *context)
 					CNL_DSP_REG_HIPCCTL,
 					CNL_DSP_REG_HIPCCTL_DONE, 0);
 
+		spin_lock_irq(&sdev->ipc_lock);
+
 		/* handle immediate reply from DSP core */
 		hda_dsp_ipc_get_reply(sdev);
 		snd_sof_ipc_reply(sdev, msg);
@@ -75,6 +77,8 @@ static irqreturn_t cnl_ipc_irq_thread(int irq, void *context)
 
 		cnl_ipc_dsp_done(sdev);
 
+		spin_unlock_irq(&sdev->ipc_lock);
+
 		ret = IRQ_HANDLED;
 	}
 
diff --git a/sound/soc/sof/intel/hda-ctrl.c b/sound/soc/sof/intel/hda-ctrl.c
index 2c3645736e1f..07bc123112c9 100644
--- a/sound/soc/sof/intel/hda-ctrl.c
+++ b/sound/soc/sof/intel/hda-ctrl.c
@@ -161,21 +161,105 @@ int hda_dsp_ctrl_clock_power_gating(struct snd_sof_dev *sdev, bool enable)
 	return 0;
 }
 
-#if IS_ENABLED(CONFIG_SND_SOC_SOF_HDA)
-/*
- * While performing reset, controller may not come back properly and causing
- * issues, so recommendation is to set CGCTL.MISCBDCGE to 0 then do reset
- * (init chip) and then again set CGCTL.MISCBDCGE to 1
- */
 int hda_dsp_ctrl_init_chip(struct snd_sof_dev *sdev, bool full_reset)
 {
 	struct hdac_bus *bus = sof_to_bus(sdev);
-	int ret;
+	struct hdac_stream *stream;
+	int sd_offset, ret = 0;
+
+	if (bus->chip_init)
+		return 0;
 
 	hda_dsp_ctrl_misc_clock_gating(sdev, false);
-	ret = snd_hdac_bus_init_chip(bus, full_reset);
+
+	if (full_reset) {
+		/* clear WAKESTS */
+		snd_sof_dsp_update_bits(sdev, HDA_DSP_HDA_BAR, SOF_HDA_WAKESTS,
+					SOF_HDA_WAKESTS_INT_MASK,
+					SOF_HDA_WAKESTS_INT_MASK);
+
+		/* reset HDA controller */
+		ret = hda_dsp_ctrl_link_reset(sdev, true);
+		if (ret < 0) {
+			dev_err(sdev->dev, "error: failed to reset HDA controller\n");
+			return ret;
+		}
+
+		usleep_range(500, 1000);
+
+		/* exit HDA controller reset */
+		ret = hda_dsp_ctrl_link_reset(sdev, false);
+		if (ret < 0) {
+			dev_err(sdev->dev, "error: failed to exit HDA controller reset\n");
+			return ret;
+		}
+
+		usleep_range(1000, 1200);
+	}
+
+#if IS_ENABLED(CONFIG_SND_SOC_SOF_HDA)
+	/* check to see if controller is ready */
+	if (!snd_hdac_chip_readb(bus, GCTL)) {
+		dev_dbg(bus->dev, "controller not ready!\n");
+		return -EBUSY;
+	}
+
+	/* Accept unsolicited responses */
+	snd_hdac_chip_updatel(bus, GCTL, AZX_GCTL_UNSOL, AZX_GCTL_UNSOL);
+
+	/* detect codecs */
+	if (!bus->codec_mask) {
+		bus->codec_mask = snd_hdac_chip_readw(bus, STATESTS);
+		dev_dbg(bus->dev, "codec_mask = 0x%lx\n", bus->codec_mask);
+	}
+#endif
+
+	/* clear stream status */
+	list_for_each_entry(stream, &bus->stream_list, list) {
+		sd_offset = SOF_STREAM_SD_OFFSET(stream);
+		snd_sof_dsp_update_bits(sdev, HDA_DSP_HDA_BAR,
+					sd_offset +
+					SOF_HDA_ADSP_REG_CL_SD_STS,
+					SOF_HDA_CL_DMA_SD_INT_MASK,
+					SOF_HDA_CL_DMA_SD_INT_MASK);
+	}
+
+	/* clear WAKESTS */
+	snd_sof_dsp_update_bits(sdev, HDA_DSP_HDA_BAR, SOF_HDA_WAKESTS,
+				SOF_HDA_WAKESTS_INT_MASK,
+				SOF_HDA_WAKESTS_INT_MASK);
+
+#if IS_ENABLED(CONFIG_SND_SOC_SOF_HDA)
+	/* clear rirb status */
+	snd_hdac_chip_writeb(bus, RIRBSTS, RIRB_INT_MASK);
+#endif
+
+	/* clear interrupt status register */
+	snd_sof_dsp_write(sdev, HDA_DSP_HDA_BAR, SOF_HDA_INTSTS,
+			  SOF_HDA_INT_CTRL_EN | SOF_HDA_INT_ALL_STREAM);
+
+#if IS_ENABLED(CONFIG_SND_SOC_SOF_HDA)
+	/* initialize the codec command I/O */
+	snd_hdac_bus_init_cmd_io(bus);
+#endif
+
+	/* enable CIE and GIE interrupts */
+	snd_sof_dsp_update_bits(sdev, HDA_DSP_HDA_BAR, SOF_HDA_INTCTL,
+				SOF_HDA_INT_CTRL_EN | SOF_HDA_INT_GLOBAL_EN,
+				SOF_HDA_INT_CTRL_EN | SOF_HDA_INT_GLOBAL_EN);
+
+#if IS_ENABLED(CONFIG_SND_SOC_SOF_HDA)
+	/* program the position buffer */
+	if (bus->use_posbuf && bus->posbuf.addr) {
+		snd_hdac_chip_writel(bus, DPLBASE, (u32)bus->posbuf.addr);
+		snd_hdac_chip_writel(bus, DPUBASE,
+				     upper_32_bits(bus->posbuf.addr));
+	}
+#endif
+
+	bus->chip_init = true;
+
 	hda_dsp_ctrl_misc_clock_gating(sdev, true);
 
 	return ret;
 }
-#endif
diff --git a/sound/soc/sof/intel/hda-ipc.c b/sound/soc/sof/intel/hda-ipc.c
index 73ead7070cde..51b285103394 100644
--- a/sound/soc/sof/intel/hda-ipc.c
+++ b/sound/soc/sof/intel/hda-ipc.c
@@ -72,7 +72,6 @@ void hda_dsp_ipc_get_reply(struct snd_sof_dev *sdev)
 	struct snd_sof_ipc_msg *msg = sdev->msg;
 	struct sof_ipc_reply reply;
 	struct sof_ipc_cmd_hdr *hdr;
-	unsigned long flags;
 	int ret = 0;
 
 	/*
@@ -84,7 +83,6 @@ void hda_dsp_ipc_get_reply(struct snd_sof_dev *sdev)
 		dev_warn(sdev->dev, "unexpected ipc interrupt raised!\n");
 		return;
 	}
-	spin_lock_irqsave(&sdev->ipc_lock, flags);
 
 	hdr = msg->msg_data;
 	if (hdr->cmd == (SOF_IPC_GLB_PM_MSG | SOF_IPC_PM_CTX_SAVE)) {
@@ -123,7 +121,6 @@ void hda_dsp_ipc_get_reply(struct snd_sof_dev *sdev)
 out:
 	msg->reply_error = ret;
 
-	spin_unlock_irqrestore(&sdev->ipc_lock, flags);
 }
 
 static bool hda_dsp_ipc_is_sof(uint32_t msg)
@@ -172,6 +169,18 @@ irqreturn_t hda_dsp_ipc_irq_thread(int irq, void *context)
 					HDA_DSP_REG_HIPCCTL,
 					HDA_DSP_REG_HIPCCTL_DONE, 0);
 
+		/*
+		 * Make sure the interrupt thread cannot be preempted between
+		 * waking up the sender and re-enabling the interrupt. Also
+		 * protect against a theoretical race with sof_ipc_tx_message():
+		 * if the DSP is fast enough to receive an IPC message, reply to
+		 * it, and the host interrupt processing calls this function on
+		 * a different core from the one, where the sending is taking
+		 * place, the message might not yet be marked as expecting a
+		 * reply.
+		 */
+		spin_lock_irq(&sdev->ipc_lock);
+
 		/* handle immediate reply from DSP core - ignore ROM messages */
 		if (hda_dsp_ipc_is_sof(msg)) {
 			hda_dsp_ipc_get_reply(sdev);
@@ -187,6 +196,8 @@ irqreturn_t hda_dsp_ipc_irq_thread(int irq, void *context)
 		/* set the done bit */
 		hda_dsp_ipc_dsp_done(sdev);
 
+		spin_unlock_irq(&sdev->ipc_lock);
+
 		ret = IRQ_HANDLED;
 	}
 
diff --git a/sound/soc/sof/intel/hda.c b/sound/soc/sof/intel/hda.c
index 7e3980a2f7ba..faf1a8ada091 100644
--- a/sound/soc/sof/intel/hda.c
+++ b/sound/soc/sof/intel/hda.c
@@ -108,17 +108,21 @@ static void hda_dsp_get_registers(struct snd_sof_dev *sdev,
 				  struct sof_ipc_panic_info *panic_info,
 				  u32 *stack, size_t stack_words)
 {
+	u32 offset = sdev->dsp_oops_offset;
+
 	/* first read registers */
-	sof_block_read(sdev, sdev->mmio_bar, sdev->dsp_oops_offset, xoops,
-		       sizeof(*xoops));
+	sof_mailbox_read(sdev, offset, xoops, sizeof(*xoops));
+
+	/* note: variable AR register array is not read */
 
 	/* then get panic info */
-	sof_block_read(sdev, sdev->mmio_bar, sdev->dsp_oops_offset +
-		       sizeof(*xoops), panic_info, sizeof(*panic_info));
+	offset += xoops->arch_hdr.totalsize;
+	sof_block_read(sdev, sdev->mmio_bar, offset,
+		       panic_info, sizeof(*panic_info));
 
 	/* then get the stack */
-	sof_block_read(sdev, sdev->mmio_bar, sdev->dsp_oops_offset +
-		       sizeof(*xoops) + sizeof(*panic_info), stack,
+	offset += sizeof(*panic_info);
+	sof_block_read(sdev, sdev->mmio_bar, offset, stack,
 		       stack_words * sizeof(u32));
 }
 
@@ -223,7 +227,9 @@ static int hda_init(struct snd_sof_dev *sdev)
 
 	/* initialise hdac bus */
 	bus->addr = pci_resource_start(pci, 0);
+#if IS_ENABLED(CONFIG_PCI)
 	bus->remap_addr = pci_ioremap_bar(pci, 0);
+#endif
 	if (!bus->remap_addr) {
 		dev_err(bus->dev, "error: ioremap error\n");
 		return -ENXIO;
@@ -264,9 +270,12 @@ static const char *fixup_tplg_name(struct snd_sof_dev *sdev,
 	return tplg_filename;
 }
 
+#endif
+
 static int hda_init_caps(struct snd_sof_dev *sdev)
 {
 	struct hdac_bus *bus = sof_to_bus(sdev);
+#if IS_ENABLED(CONFIG_SND_SOC_SOF_HDA)
 	struct hdac_ext_link *hlink;
 	struct snd_soc_acpi_mach_params *mach_params;
 	struct snd_soc_acpi_mach *hda_mach;
@@ -274,8 +283,9 @@ static int hda_init_caps(struct snd_sof_dev *sdev)
 	struct snd_soc_acpi_mach *mach;
 	const char *tplg_filename;
 	int codec_num = 0;
-	int ret = 0;
 	int i;
+#endif
+	int ret = 0;
 
 	device_disable_async_suspend(bus->dev);
 
@@ -283,6 +293,14 @@ static int hda_init_caps(struct snd_sof_dev *sdev)
 	if (bus->ppcap)
 		dev_dbg(sdev->dev, "PP capability, will probe DSP later.\n");
 
+	ret = hda_dsp_ctrl_init_chip(sdev, true);
+	if (ret < 0) {
+		dev_err(bus->dev, "error: init chip failed with ret: %d\n",
+			ret);
+		return ret;
+	}
+
+#if IS_ENABLED(CONFIG_SND_SOC_SOF_HDA)
 	if (bus->mlcap)
 		snd_hdac_ext_bus_get_ml_capabilities(bus);
 
@@ -293,12 +311,6 @@ static int hda_init_caps(struct snd_sof_dev *sdev)
 		return ret;
 	}
 
-	ret = hda_dsp_ctrl_init_chip(sdev, true);
-	if (ret < 0) {
-		dev_err(bus->dev, "error: init chip failed with ret: %d\n", ret);
-		goto out;
-	}
-
 	/* codec detection */
 	if (!bus->codec_mask) {
 		dev_info(bus->dev, "no hda codecs found!\n");
@@ -339,8 +351,10 @@ static int hda_init_caps(struct snd_sof_dev *sdev)
 				/* use local variable for readability */
 				tplg_filename = pdata->tplg_filename;
 				tplg_filename = fixup_tplg_name(sdev, tplg_filename);
-				if (!tplg_filename)
-					goto out;
+				if (!tplg_filename) {
+					hda_codec_i915_exit(sdev);
+					return ret;
+				}
 				pdata->tplg_filename = tplg_filename;
 			}
 		}
@@ -364,35 +378,10 @@ static int hda_init_caps(struct snd_sof_dev *sdev)
 	 */
 	list_for_each_entry(hlink, &bus->hlink_list, list)
 		snd_hdac_ext_bus_link_put(bus, hlink);
-
-	return 0;
-
-out:
-	hda_codec_i915_exit(sdev);
-	return ret;
-}
-
-#else
-
-static int hda_init_caps(struct snd_sof_dev *sdev)
-{
-	/*
-	 * set CGCTL.MISCBDCGE to 0 during reset and set back to 1
-	 * when reset finished.
-	 * TODO: maybe no need for init_caps?
-	 */
-	hda_dsp_ctrl_misc_clock_gating(sdev, 0);
-
-	/* clear WAKESTS */
-	snd_sof_dsp_update_bits(sdev, HDA_DSP_HDA_BAR, SOF_HDA_WAKESTS,
-				SOF_HDA_WAKESTS_INT_MASK,
-				SOF_HDA_WAKESTS_INT_MASK);
-
+#endif
 	return 0;
 }
 
-#endif
-
 static const struct sof_intel_dsp_desc
 	*get_chip_info(struct snd_sof_pdata *pdata)
 {
@@ -409,9 +398,8 @@ int hda_dsp_probe(struct snd_sof_dev *sdev)
 	struct pci_dev *pci = to_pci_dev(sdev->dev);
 	struct sof_intel_hda_dev *hdev;
 	struct hdac_bus *bus;
-	struct hdac_stream *stream;
 	const struct sof_intel_dsp_desc *chip;
-	int sd_offset, ret = 0;
+	int ret = 0;
 
 	/*
 	 * detect DSP by checking class/subclass/prog-id information
@@ -468,7 +456,9 @@ int hda_dsp_probe(struct snd_sof_dev *sdev)
 		goto hdac_bus_unmap;
 
 	/* DSP base */
+#if IS_ENABLED(CONFIG_PCI)
 	sdev->bar[HDA_DSP_BAR] = pci_ioremap_bar(pci, HDA_DSP_BAR);
+#endif
 	if (!sdev->bar[HDA_DSP_BAR]) {
 		dev_err(sdev->dev, "error: ioremap error\n");
 		ret = -ENXIO;
@@ -558,56 +548,9 @@ int hda_dsp_probe(struct snd_sof_dev *sdev)
 	if (ret < 0)
 		goto free_ipc_irq;
 
-	/* reset HDA controller */
-	ret = hda_dsp_ctrl_link_reset(sdev, true);
-	if (ret < 0) {
-		dev_err(sdev->dev, "error: failed to reset HDA controller\n");
-		goto free_ipc_irq;
-	}
-
-	/* exit HDA controller reset */
-	ret = hda_dsp_ctrl_link_reset(sdev, false);
-	if (ret < 0) {
-		dev_err(sdev->dev, "error: failed to exit HDA controller reset\n");
-		goto free_ipc_irq;
-	}
-
-	/* clear stream status */
-	list_for_each_entry(stream, &bus->stream_list, list) {
-		sd_offset = SOF_STREAM_SD_OFFSET(stream);
-		snd_sof_dsp_update_bits(sdev, HDA_DSP_HDA_BAR,
-					sd_offset +
-					SOF_HDA_ADSP_REG_CL_SD_STS,
-					SOF_HDA_CL_DMA_SD_INT_MASK,
-					SOF_HDA_CL_DMA_SD_INT_MASK);
-	}
-
-	/* clear WAKESTS */
-	snd_sof_dsp_update_bits(sdev, HDA_DSP_HDA_BAR, SOF_HDA_WAKESTS,
-				SOF_HDA_WAKESTS_INT_MASK,
-				SOF_HDA_WAKESTS_INT_MASK);
-
-	/* clear interrupt status register */
-	snd_sof_dsp_write(sdev, HDA_DSP_HDA_BAR, SOF_HDA_INTSTS,
-			  SOF_HDA_INT_CTRL_EN | SOF_HDA_INT_ALL_STREAM);
-
-	/* enable CIE and GIE interrupts */
-	snd_sof_dsp_update_bits(sdev, HDA_DSP_HDA_BAR, SOF_HDA_INTCTL,
-				SOF_HDA_INT_CTRL_EN | SOF_HDA_INT_GLOBAL_EN,
-				SOF_HDA_INT_CTRL_EN | SOF_HDA_INT_GLOBAL_EN);
-
-	/* re-enable CGCTL.MISCBDCGE after reset */
-	hda_dsp_ctrl_misc_clock_gating(sdev, true);
-
-	device_disable_async_suspend(&pci->dev);
-
-	/* enable DSP features */
-	snd_sof_dsp_update_bits(sdev, HDA_DSP_PP_BAR, SOF_HDA_REG_PP_PPCTL,
-				SOF_HDA_PPCTL_GPROCEN, SOF_HDA_PPCTL_GPROCEN);
-
-	/* enable DSP IRQ */
-	snd_sof_dsp_update_bits(sdev, HDA_DSP_PP_BAR, SOF_HDA_REG_PP_PPCTL,
-				SOF_HDA_PPCTL_PIE, SOF_HDA_PPCTL_PIE);
+	/* enable ppcap interrupt */
+	hda_dsp_ctrl_ppcap_enable(sdev, true);
+	hda_dsp_ctrl_ppcap_int_enable(sdev, true);
 
 	/* initialize waitq for code loading */
 	init_waitqueue_head(&sdev->waitq);
diff --git a/sound/soc/sof/ipc.c b/sound/soc/sof/ipc.c
index f0b9d3c53f6f..2414640a32d1 100644
--- a/sound/soc/sof/ipc.c
+++ b/sound/soc/sof/ipc.c
@@ -115,7 +115,7 @@ static void ipc_log_header(struct device *dev, u8 *text, u32 cmd)
 		}
 		break;
 	case SOF_IPC_GLB_COMP_MSG:
-		str = "GLB_COMP_MSG: SET_VALUE";
+		str = "GLB_COMP_MSG";
 		switch (type) {
 		case SOF_IPC_COMP_SET_VALUE:
 			str2 = "SET_VALUE"; break;
@@ -308,19 +308,8 @@ EXPORT_SYMBOL(sof_ipc_tx_message);
 int snd_sof_ipc_reply(struct snd_sof_dev *sdev, u32 msg_id)
 {
 	struct snd_sof_ipc_msg *msg = &sdev->ipc->msg;
-	unsigned long flags;
-
-	/*
-	 * Protect against a theoretical race with sof_ipc_tx_message(): if the
-	 * DSP is fast enough to receive an IPC message, reply to it, and the
-	 * host interrupt processing calls this function on a different core
-	 * from the one, where the sending is taking place, the message might
-	 * not yet be marked as expecting a reply.
-	 */
-	spin_lock_irqsave(&sdev->ipc_lock, flags);
 
 	if (msg->ipc_complete) {
-		spin_unlock_irqrestore(&sdev->ipc_lock, flags);
 		dev_err(sdev->dev, "error: no reply expected, received 0x%x",
 			msg_id);
 		return -EINVAL;
@@ -330,8 +319,6 @@ int snd_sof_ipc_reply(struct snd_sof_dev *sdev, u32 msg_id)
 	msg->ipc_complete = true;
 	wake_up(&msg->waitq);
 
-	spin_unlock_irqrestore(&sdev->ipc_lock, flags);
-
 	return 0;
 }
 EXPORT_SYMBOL(snd_sof_ipc_reply);
@@ -776,16 +763,19 @@ int snd_sof_ipc_valid(struct snd_sof_dev *sdev)
 		}
 	}
 
-	if (ready->debug.bits.build) {
+	if (ready->flags & SOF_IPC_INFO_BUILD) {
 		dev_info(sdev->dev,
 			 "Firmware debug build %d on %s-%s - options:\n"
 			 " GDB: %s\n"
 			 " lock debug: %s\n"
 			 " lock vdebug: %s\n",
 			 v->build, v->date, v->time,
-			 ready->debug.bits.gdb ? "enabled" : "disabled",
-			 ready->debug.bits.locks ? "enabled" : "disabled",
-			 ready->debug.bits.locks_verbose ? "enabled" : "disabled");
+			 ready->flags & SOF_IPC_INFO_GDB ?
+				"enabled" : "disabled",
+			 ready->flags & SOF_IPC_INFO_LOCKS ?
+				"enabled" : "disabled",
+			 ready->flags & SOF_IPC_INFO_LOCKSV ?
+				"enabled" : "disabled");
 	}
 
 	/* copy the fw_version into debugfs at first boot */
diff --git a/sound/soc/sof/loader.c b/sound/soc/sof/loader.c
index 81c7452aae17..628fae552442 100644
--- a/sound/soc/sof/loader.c
+++ b/sound/soc/sof/loader.c
@@ -372,6 +372,8 @@ int snd_sof_run_firmware(struct snd_sof_dev *sdev)
 				 msecs_to_jiffies(sdev->boot_timeout));
 	if (ret == 0) {
 		dev_err(sdev->dev, "error: firmware boot failure\n");
+		/* after this point FW_READY msg should be ignored */
+		sdev->boot_complete = true;
 		snd_sof_dsp_dbg_dump(sdev, SOF_DBG_REGS | SOF_DBG_MBOX |
 			SOF_DBG_TEXT | SOF_DBG_PCI);
 		return -EIO;
diff --git a/sound/soc/sof/pcm.c b/sound/soc/sof/pcm.c
index 649968841dad..dace6c4cd91e 100644
--- a/sound/soc/sof/pcm.c
+++ b/sound/soc/sof/pcm.c
@@ -211,8 +211,8 @@ static int sof_pcm_hw_params(struct snd_pcm_substream *substream,
 	/* save pcm hw_params */
 	memcpy(&spcm->params[substream->stream], params, sizeof(*params));
 
-	INIT_WORK(&spcm->stream[substream->stream].period_elapsed_work,
-		  sof_pcm_period_elapsed_work);
+	/* clear hw_params_upon_resume flag */
+	spcm->hw_params_upon_resume[substream->stream] = 0;
 
 	return ret;
 }
@@ -429,8 +429,8 @@ static int sof_pcm_open(struct snd_pcm_substream *substream)
 	dev_dbg(sdev->dev, "pcm: open stream %d dir %d\n", spcm->pcm.pcm_id,
 		substream->stream);
 
-	/* clear hw_params_upon_resume flag */
-	spcm->hw_params_upon_resume[substream->stream] = 0;
+	INIT_WORK(&spcm->stream[substream->stream].period_elapsed_work,
+		  sof_pcm_period_elapsed_work);
 
 	caps = &spcm->pcm.caps[substream->stream];
 
diff --git a/sound/soc/sof/xtensa/core.c b/sound/soc/sof/xtensa/core.c
index c3ad23a85b99..46a4905a9dce 100644
--- a/sound/soc/sof/xtensa/core.c
+++ b/sound/soc/sof/xtensa/core.c
@@ -110,7 +110,7 @@ static void xtensa_stack(struct snd_sof_dev *sdev, void *oops, u32 *stack,
 			 u32 stack_words)
 {
 	struct sof_ipc_dsp_oops_xtensa *xoops = oops;
-	u32 stack_ptr = xoops->stack;
+	u32 stack_ptr = xoops->plat_hdr.stackptr;
 	/* 4 * 8chars + 3 ws + 1 terminating NUL */
 	unsigned char buf[4 * 8 + 3 + 1];
 	int i;
diff --git a/sound/soc/sunxi/sun4i-codec.c b/sound/soc/sunxi/sun4i-codec.c
index f2deffe026c5..9e1f00e8c32b 100644
--- a/sound/soc/sunxi/sun4i-codec.c
+++ b/sound/soc/sunxi/sun4i-codec.c
@@ -1320,6 +1320,15 @@ static int sun4i_codec_spk_event(struct snd_soc_dapm_widget *w,
 	gpiod_set_value_cansleep(scodec->gpio_pa,
 				 !!SND_SOC_DAPM_EVENT_ON(event));
 
+	if (SND_SOC_DAPM_EVENT_ON(event)) {
+		/*
+		 * Need a delay to wait for DAC to push the data. 700ms seems
+		 * to be the best compromise not to feel this delay while
+		 * playing a sound.
+		 */
+		msleep(700);
+	}
+
 	return 0;
 }
 
diff --git a/sound/soc/sunxi/sun4i-i2s.c b/sound/soc/sunxi/sun4i-i2s.c
index c53bfed8d4c2..fd7c37596f21 100644
--- a/sound/soc/sunxi/sun4i-i2s.c
+++ b/sound/soc/sunxi/sun4i-i2s.c
@@ -106,7 +106,7 @@
 
 #define SUN8I_I2S_TX_CHAN_MAP_REG	0x44
 #define SUN8I_I2S_TX_CHAN_SEL_REG	0x34
-#define SUN8I_I2S_TX_CHAN_OFFSET_MASK		GENMASK(13, 11)
+#define SUN8I_I2S_TX_CHAN_OFFSET_MASK		GENMASK(13, 12)
 #define SUN8I_I2S_TX_CHAN_OFFSET(offset)	(offset << 12)
 #define SUN8I_I2S_TX_CHAN_EN_MASK		GENMASK(11, 4)
 #define SUN8I_I2S_TX_CHAN_EN(num_chan)		(((1 << num_chan) - 1) << 4)
@@ -456,6 +456,10 @@ static int sun4i_i2s_set_fmt(struct snd_soc_dai *dai, unsigned int fmt)
 		regmap_update_bits(i2s->regmap, SUN8I_I2S_TX_CHAN_SEL_REG,
 				   SUN8I_I2S_TX_CHAN_OFFSET_MASK,
 				   SUN8I_I2S_TX_CHAN_OFFSET(offset));
+
+		regmap_update_bits(i2s->regmap, SUN8I_I2S_RX_CHAN_SEL_REG,
+				   SUN8I_I2S_TX_CHAN_OFFSET_MASK,
+				   SUN8I_I2S_TX_CHAN_OFFSET(offset));
 	}
 
 	regmap_field_write(i2s->field_fmt_mode, val);
diff --git a/tools/testing/nvdimm/test/iomap.c b/tools/testing/nvdimm/test/iomap.c
index 280015c22598..076df22e4bda 100644
--- a/tools/testing/nvdimm/test/iomap.c
+++ b/tools/testing/nvdimm/test/iomap.c
@@ -100,7 +100,9 @@ static void nfit_test_kill(void *_pgmap)
 {
 	struct dev_pagemap *pgmap = _pgmap;
 
+	WARN_ON(!pgmap || !pgmap->ref || !pgmap->kill || !pgmap->cleanup);
 	pgmap->kill(pgmap->ref);
+	pgmap->cleanup(pgmap->ref);
 }
 
 void *__wrap_devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)