677 files changed, 27485 insertions, 6017 deletions
diff --git a/Documentation/DocBook/gpu.tmpl b/Documentation/DocBook/gpu.tmpl
index 03f01e76add7..6c6e81a9eaf4 100644
--- a/Documentation/DocBook/gpu.tmpl
+++ b/Documentation/DocBook/gpu.tmpl
@@ -124,6 +124,43 @@
     <para>
       [Insert diagram of typical DRM stack here]
     </para>
+  <sect1>
+    <title>Style Guidelines</title>
+    <para>
+      For consistency this documentation uses American English. Abbreviations
+      are written as all-uppercase, for example: DRM, KMS, IOCTL, CRTC, and so
+      on. To aid in reading, documentations make full use of the markup
+      characters kerneldoc provides: @parameter for function parameters, @member
+      for structure members, &amp;structure to reference structures and
+      function() for functions. These all get automatically hyperlinked if
+      kerneldoc for the referenced objects exists. When referencing entries in
+      function vtables please use -&gt;vfunc(). Note that kerneldoc does
+      not support referencing struct members directly, so please add a reference
+      to the vtable struct somewhere in the same paragraph or at least section.
+    </para>
+    <para>
+      Except in special situations (to separate locked from unlocked variants)
+      locking requirements for functions aren't documented in the kerneldoc.
+      Instead locking should be check at runtime using e.g.
+      <code>WARN_ON(!mutex_is_locked(...));</code>. Since it's much easier to
+      ignore documentation than runtime noise this provides more value. And on
+      top of that runtime checks do need to be updated when the locking rules
+      change, increasing the chances that they're correct. Within the
+      documentation the locking rules should be explained in the relevant
+      structures: Either in the comment for the lock explaining what it
+      protects, or data fields need a note about which lock protects them, or
+      both.
+    </para>
+    <para>
+      Functions which have a non-<code>void</code> return value should have a
+      section called "Returns" explaining the expected return values in
+      different cases and their meanings. Currently there's no consensus whether
+      that section name should be all upper-case or not, and whether it should
+      end in a colon or not. Go with the file-local style. Other common section
+      names are "Notes" with information for dangerous or tricky corner cases,
+      and "FIXME" where the interface could be cleaned up.
+    </para>
+  </sect1>
   </chapter>
 
   <!-- Internals -->
@@ -946,12 +983,10 @@ int max_width, max_height;</synopsis>
     <sect2>
       <title>Atomic Mode Setting Function Reference</title>
 !Edrivers/gpu/drm/drm_atomic.c
+!Idrivers/gpu/drm/drm_atomic.c
     </sect2>
     <sect2>
-      <title>Frame Buffer Creation</title>
-      <synopsis>struct drm_framebuffer *(*fb_create)(struct drm_device *dev,
-				     struct drm_file *file_priv,
-				     struct drm_mode_fb_cmd2 *mode_cmd);</synopsis>
+      <title>Frame Buffer Abstraction</title>
       <para>
         Frame buffers are abstract memory objects that provide a source of
         pixels to scanout to a CRTC. Applications explicitly request the
@@ -970,73 +1005,6 @@ int max_width, max_height;</synopsis>
 	and so expects TTM handles in the create ioctl and not GEM handles.
       </para>
       <para>
-        Drivers must first validate the requested frame buffer parameters passed
-        through the mode_cmd argument. In particular this is where invalid
-        sizes, pixel formats or pitches can be caught.
-      </para>
-      <para>
-        If the parameters are deemed valid, drivers then create, initialize and
-        return an instance of struct <structname>drm_framebuffer</structname>.
-        If desired the instance can be embedded in a larger driver-specific
-	structure. Drivers must fill its <structfield>width</structfield>,
-	<structfield>height</structfield>, <structfield>pitches</structfield>,
-        <structfield>offsets</structfield>, <structfield>depth</structfield>,
-        <structfield>bits_per_pixel</structfield> and
-        <structfield>pixel_format</structfield> fields from the values passed
-        through the <parameter>drm_mode_fb_cmd2</parameter> argument. They
-        should call the <function>drm_helper_mode_fill_fb_struct</function>
-        helper function to do so.
-      </para>
-
-      <para>
-	The initialization of the new framebuffer instance is finalized with a
-	call to <function>drm_framebuffer_init</function> which takes a pointer
-	to DRM frame buffer operations (struct
-	<structname>drm_framebuffer_funcs</structname>). Note that this function
-	publishes the framebuffer and so from this point on it can be accessed
-	concurrently from other threads. Hence it must be the last step in the
-	driver's framebuffer initialization sequence. Frame buffer operations
-	are
-        <itemizedlist>
-          <listitem>
-            <synopsis>int (*create_handle)(struct drm_framebuffer *fb,
-		     struct drm_file *file_priv, unsigned int *handle);</synopsis>
-            <para>
-              Create a handle to the frame buffer underlying memory object. If
-              the frame buffer uses a multi-plane format, the handle will
-              reference the memory object associated with the first plane.
-            </para>
-            <para>
-              Drivers call <function>drm_gem_handle_create</function> to create
-              the handle.
-            </para>
-          </listitem>
-          <listitem>
-            <synopsis>void (*destroy)(struct drm_framebuffer *framebuffer);</synopsis>
-            <para>
-              Destroy the frame buffer object and frees all associated
-              resources. Drivers must call
-              <function>drm_framebuffer_cleanup</function> to free resources
-              allocated by the DRM core for the frame buffer object, and must
-              make sure to unreference all memory objects associated with the
-              frame buffer. Handles created by the
-              <methodname>create_handle</methodname> operation are released by
-              the DRM core.
-            </para>
-          </listitem>
-          <listitem>
-            <synopsis>int (*dirty)(struct drm_framebuffer *framebuffer,
-	     struct drm_file *file_priv, unsigned flags, unsigned color,
-	     struct drm_clip_rect *clips, unsigned num_clips);</synopsis>
-            <para>
-              This optional operation notifies the driver that a region of the
-              frame buffer has changed in response to a DRM_IOCTL_MODE_DIRTYFB
-              ioctl call.
-            </para>
-          </listitem>
-        </itemizedlist>
-      </para>
-      <para>
 	The lifetime of a drm framebuffer is controlled with a reference count,
 	drivers can grab additional references with
 	<function>drm_framebuffer_reference</function>and drop them
@@ -1173,137 +1141,6 @@ int max_width, max_height;</synopsis>
           pointer to CRTC functions.
         </para>
       </sect3>
-      <sect3 id="drm-kms-crtcops">
-        <title>CRTC Operations</title>
-        <sect4>
-          <title>Set Configuration</title>
-          <synopsis>int (*set_config)(struct drm_mode_set *set);</synopsis>
-          <para>
-            Apply a new CRTC configuration to the device. The configuration
-            specifies a CRTC, a frame buffer to scan out from, a (x,y) position in
-            the frame buffer, a display mode and an array of connectors to drive
-            with the CRTC if possible.
-          </para>
-          <para>
-            If the frame buffer specified in the configuration is NULL, the driver
-            must detach all encoders connected to the CRTC and all connectors
-            attached to those encoders and disable them.
-          </para>
-          <para>
-            This operation is called with the mode config lock held.
-          </para>
-          <note><para>
-	    Note that the drm core has no notion of restoring the mode setting
-	    state after resume, since all resume handling is in the full
-	    responsibility of the driver. The common mode setting helper library
-	    though provides a helper which can be used for this:
-	    <function>drm_helper_resume_force_mode</function>.
-          </para></note>
-        </sect4>
-        <sect4>
-          <title>Page Flipping</title>
-          <synopsis>int (*page_flip)(struct drm_crtc *crtc, struct drm_framebuffer *fb,
-                   struct drm_pending_vblank_event *event);</synopsis>
-          <para>
-            Schedule a page flip to the given frame buffer for the CRTC. This
-            operation is called with the mode config mutex held.
-          </para>
-          <para>
-            Page flipping is a synchronization mechanism that replaces the frame
-            buffer being scanned out by the CRTC with a new frame buffer during
-            vertical blanking, avoiding tearing. When an application requests a page
-            flip the DRM core verifies that the new frame buffer is large enough to
-            be scanned out by  the CRTC in the currently configured mode and then
-            calls the CRTC <methodname>page_flip</methodname> operation with a
-            pointer to the new frame buffer.
-          </para>
-          <para>
-            The <methodname>page_flip</methodname> operation schedules a page flip.
-            Once any pending rendering targeting the new frame buffer has
-            completed, the CRTC will be reprogrammed to display that frame buffer
-            after the next vertical refresh. The operation must return immediately
-            without waiting for rendering or page flip to complete and must block
-            any new rendering to the frame buffer until the page flip completes.
-          </para>
-          <para>
-            If a page flip can be successfully scheduled the driver must set the
-            <code>drm_crtc-&gt;fb</code> field to the new framebuffer pointed to
-            by <code>fb</code>. This is important so that the reference counting
-            on framebuffers stays balanced.
-          </para>
-          <para>
-            If a page flip is already pending, the
-            <methodname>page_flip</methodname> operation must return
-            -<errorname>EBUSY</errorname>.
-          </para>
-          <para>
-            To synchronize page flip to vertical blanking the driver will likely
-            need to enable vertical blanking interrupts. It should call
-            <function>drm_vblank_get</function> for that purpose, and call
-            <function>drm_vblank_put</function> after the page flip completes.
-          </para>
-          <para>
-            If the application has requested to be notified when page flip completes
-            the <methodname>page_flip</methodname> operation will be called with a
-            non-NULL <parameter>event</parameter> argument pointing to a
-            <structname>drm_pending_vblank_event</structname> instance. Upon page
-            flip completion the driver must call <methodname>drm_send_vblank_event</methodname>
-            to fill in the event and send to wake up any waiting processes.
-            This can be performed with
-            <programlisting><![CDATA[
-            spin_lock_irqsave(&dev->event_lock, flags);
-            ...
-            drm_send_vblank_event(dev, pipe, event);
-            spin_unlock_irqrestore(&dev->event_lock, flags);
-            ]]></programlisting>
-          </para>
-          <note><para>
-            FIXME: Could drivers that don't need to wait for rendering to complete
-            just add the event to <literal>dev-&gt;vblank_event_list</literal> and
-            let the DRM core handle everything, as for "normal" vertical blanking
-            events?
-          </para></note>
-          <para>
-            While waiting for the page flip to complete, the
-            <literal>event-&gt;base.link</literal> list head can be used freely by
-            the driver to store the pending event in a driver-specific list.
-          </para>
-          <para>
-            If the file handle is closed before the event is signaled, drivers must
-            take care to destroy the event in their
-            <methodname>preclose</methodname> operation (and, if needed, call
-            <function>drm_vblank_put</function>).
-          </para>
-        </sect4>
-        <sect4>
-          <title>Miscellaneous</title>
-          <itemizedlist>
-            <listitem>
-              <synopsis>void (*set_property)(struct drm_crtc *crtc,
-                     struct drm_property *property, uint64_t value);</synopsis>
-              <para>
-                Set the value of the given CRTC property to
-                <parameter>value</parameter>. See <xref linkend="drm-kms-properties"/>
-                for more information about properties.
-              </para>
-            </listitem>
-            <listitem>
-              <synopsis>void (*gamma_set)(struct drm_crtc *crtc, u16 *r, u16 *g, u16 *b,
-                        uint32_t start, uint32_t size);</synopsis>
-              <para>
-                Apply a gamma table to the device. The operation is optional.
-              </para>
-            </listitem>
-            <listitem>
-              <synopsis>void (*destroy)(struct drm_crtc *crtc);</synopsis>
-              <para>
-                Destroy the CRTC when not needed anymore. See
-                <xref linkend="drm-kms-init"/>.
-              </para>
-            </listitem>
-          </itemizedlist>
-        </sect4>
-      </sect3>
     </sect2>
     <sect2>
       <title>Planes (struct <structname>drm_plane</structname>)</title>
@@ -1320,7 +1157,7 @@ int max_width, max_height;</synopsis>
         <listitem>
         DRM_PLANE_TYPE_PRIMARY represents a "main" plane for a CRTC.  Primary
         planes are the planes operated upon by CRTC modesetting and flipping
-        operations described in <xref linkend="drm-kms-crtcops"/>.
+	operations described in the page_flip hook in <structname>drm_crtc_funcs</structname>.
         </listitem>
         <listitem>
         DRM_PLANE_TYPE_CURSOR represents a "cursor" plane for a CRTC.  Cursor
@@ -1357,52 +1194,6 @@ int max_width, max_height;</synopsis>
           primary plane with standard capabilities.
         </para>
       </sect3>
-      <sect3>
-        <title>Plane Operations</title>
-        <itemizedlist>
-          <listitem>
-            <synopsis>int (*update_plane)(struct drm_plane *plane, struct drm_crtc *crtc,
-                        struct drm_framebuffer *fb, int crtc_x, int crtc_y,
-                        unsigned int crtc_w, unsigned int crtc_h,
-                        uint32_t src_x, uint32_t src_y,
-                        uint32_t src_w, uint32_t src_h);</synopsis>
-            <para>
-              Enable and configure the plane to use the given CRTC and frame buffer.
-            </para>
-            <para>
-              The source rectangle in frame buffer memory coordinates is given by
-              the <parameter>src_x</parameter>, <parameter>src_y</parameter>,
-              <parameter>src_w</parameter> and <parameter>src_h</parameter>
-              parameters (as 16.16 fixed point values). Devices that don't support
-              subpixel plane coordinates can ignore the fractional part.
-            </para>
-            <para>
-              The destination rectangle in CRTC coordinates is given by the
-              <parameter>crtc_x</parameter>, <parameter>crtc_y</parameter>,
-              <parameter>crtc_w</parameter> and <parameter>crtc_h</parameter>
-              parameters (as integer values). Devices scale the source rectangle to
-              the destination rectangle. If scaling is not supported, and the source
-              rectangle size doesn't match the destination rectangle size, the
-              driver must return a -<errorname>EINVAL</errorname> error.
-            </para>
-          </listitem>
-          <listitem>
-            <synopsis>int (*disable_plane)(struct drm_plane *plane);</synopsis>
-            <para>
-              Disable the plane. The DRM core calls this method in response to a
-              DRM_IOCTL_MODE_SETPLANE ioctl call with the frame buffer ID set to 0.
-              Disabled planes must not be processed by the CRTC.
-            </para>
-          </listitem>
-          <listitem>
-            <synopsis>void (*destroy)(struct drm_plane *plane);</synopsis>
-            <para>
-              Destroy the plane when not needed anymore. See
-              <xref linkend="drm-kms-init"/>.
-            </para>
-          </listitem>
-        </itemizedlist>
-      </sect3>
     </sect2>
     <sect2>
       <title>Encoders (struct <structname>drm_encoder</structname>)</title>
@@ -1459,27 +1250,6 @@ int max_width, max_height;</synopsis>
           encoders they want to use to a CRTC.
         </para>
       </sect3>
-      <sect3>
-        <title>Encoder Operations</title>
-        <itemizedlist>
-          <listitem>
-            <synopsis>void (*destroy)(struct drm_encoder *encoder);</synopsis>
-            <para>
-              Called to destroy the encoder when not needed anymore. See
-              <xref linkend="drm-kms-init"/>.
-            </para>
-          </listitem>
-          <listitem>
-            <synopsis>void (*set_property)(struct drm_plane *plane,
-                     struct drm_property *property, uint64_t value);</synopsis>
-            <para>
-              Set the value of the given plane property to
-              <parameter>value</parameter>. See <xref linkend="drm-kms-properties"/>
-              for more information about properties.
-            </para>
-          </listitem>
-        </itemizedlist>
-      </sect3>
     </sect2>
     <sect2>
       <title>Connectors (struct <structname>drm_connector</structname>)</title>
@@ -1683,27 +1453,6 @@ int max_width, max_height;</synopsis>
             connector_status_unknown.
           </para>
         </sect4>
-        <sect4>
-          <title>Miscellaneous</title>
-          <itemizedlist>
-            <listitem>
-              <synopsis>void (*set_property)(struct drm_connector *connector,
-                     struct drm_property *property, uint64_t value);</synopsis>
-              <para>
-                Set the value of the given connector property to
-                <parameter>value</parameter>. See <xref linkend="drm-kms-properties"/>
-                for more information about properties.
-              </para>
-            </listitem>
-            <listitem>
-              <synopsis>void (*destroy)(struct drm_connector *connector);</synopsis>
-              <para>
-                Destroy the connector when not needed anymore. See
-                <xref linkend="drm-kms-init"/>.
-              </para>
-            </listitem>
-          </itemizedlist>
-        </sect4>
       </sect3>
     </sect2>
     <sect2>
@@ -1830,83 +1579,7 @@ void intel_crt_init(struct drm_device *dev)
       entities.
     </para>
     <sect2>
-      <title>Helper Functions</title>
-      <itemizedlist>
-        <listitem>
-          <synopsis>int drm_crtc_helper_set_config(struct drm_mode_set *set);</synopsis>
-          <para>
-            The <function>drm_crtc_helper_set_config</function> helper function
-            is a CRTC <methodname>set_config</methodname> implementation. It
-            first tries to locate the best encoder for each connector by calling
-            the connector <methodname>best_encoder</methodname> helper
-            operation.
-          </para>
-          <para>
-            After locating the appropriate encoders, the helper function will
-            call the <methodname>mode_fixup</methodname> encoder and CRTC helper
-            operations to adjust the requested mode, or reject it completely in
-            which case an error will be returned to the application. If the new
-            configuration after mode adjustment is identical to the current
-            configuration the helper function will return without performing any
-            other operation.
-          </para>
-          <para>
-            If the adjusted mode is identical to the current mode but changes to
-            the frame buffer need to be applied, the
-            <function>drm_crtc_helper_set_config</function> function will call
-            the CRTC <methodname>mode_set_base</methodname> helper operation. If
-            the adjusted mode differs from the current mode, or if the
-            <methodname>mode_set_base</methodname> helper operation is not
-            provided, the helper function performs a full mode set sequence by
-            calling the <methodname>prepare</methodname>,
-            <methodname>mode_set</methodname> and
-            <methodname>commit</methodname> CRTC and encoder helper operations,
-            in that order.
-          </para>
-        </listitem>
-        <listitem>
-          <synopsis>void drm_helper_connector_dpms(struct drm_connector *connector, int mode);</synopsis>
-          <para>
-            The <function>drm_helper_connector_dpms</function> helper function
-            is a connector <methodname>dpms</methodname> implementation that
-            tracks power state of connectors. To use the function, drivers must
-            provide <methodname>dpms</methodname> helper operations for CRTCs
-            and encoders to apply the DPMS state to the device.
-          </para>
-          <para>
-            The mid-layer doesn't track the power state of CRTCs and encoders.
-            The <methodname>dpms</methodname> helper operations can thus be
-            called with a mode identical to the currently active mode.
-          </para>
-        </listitem>
-        <listitem>
-          <synopsis>int drm_helper_probe_single_connector_modes(struct drm_connector *connector,
-                                            uint32_t maxX, uint32_t maxY);</synopsis>
-          <para>
-            The <function>drm_helper_probe_single_connector_modes</function> helper
-            function is a connector <methodname>fill_modes</methodname>
-            implementation that updates the connection status for the connector
-            and then retrieves a list of modes by calling the connector
-            <methodname>get_modes</methodname> helper operation.
-          </para>
-         <para>
-            If the helper operation returns no mode, and if the connector status
-            is connector_status_connected, standard VESA DMT modes up to
-            1024x768 are automatically added to the modes list by a call to
-            <function>drm_add_modes_noedid</function>.
-          </para>
-          <para>
-            The function then filters out modes larger than
-            <parameter>max_width</parameter> and <parameter>max_height</parameter>
-            if specified. It finally calls the optional connector
-            <methodname>mode_valid</methodname> helper operation for each mode in
-            the probed list to check whether the mode is valid for the connector.
-          </para>
-        </listitem>
-      </itemizedlist>
-    </sect2>
-    <sect2>
-      <title>CRTC Helper Operations</title>
+      <title>Legacy CRTC Helper Operations</title>
       <itemizedlist>
         <listitem id="drm-helper-crtc-mode-fixup">
           <synopsis>bool (*mode_fixup)(struct drm_crtc *crtc,
@@ -2052,198 +1725,6 @@ void intel_crt_init(struct drm_device *dev)
             <function>drm_add_edid_modes</function> manually in that case.
           </para>
           <para>
-            When adding modes manually the driver creates each mode with a call to
-            <function>drm_mode_create</function> and must fill the following fields.
-            <itemizedlist>
-              <listitem>
-                <synopsis>__u32 type;</synopsis>
-                <para>
-                  Mode type bitmask, a combination of
-                  <variablelist>
-                    <varlistentry>
-                      <term>DRM_MODE_TYPE_BUILTIN</term>
-                      <listitem><para>not used?</para></listitem>
-                    </varlistentry>
-                    <varlistentry>
-                      <term>DRM_MODE_TYPE_CLOCK_C</term>
-                      <listitem><para>not used?</para></listitem>
-                    </varlistentry>
-                    <varlistentry>
-                      <term>DRM_MODE_TYPE_CRTC_C</term>
-                      <listitem><para>not used?</para></listitem>
-                    </varlistentry>
-                    <varlistentry>
-                      <term>
-        DRM_MODE_TYPE_PREFERRED - The preferred mode for the connector
-                      </term>
-                      <listitem>
-                        <para>not used?</para>
-                      </listitem>
-                    </varlistentry>
-                    <varlistentry>
-                      <term>DRM_MODE_TYPE_DEFAULT</term>
-                      <listitem><para>not used?</para></listitem>
-                    </varlistentry>
-                    <varlistentry>
-                      <term>DRM_MODE_TYPE_USERDEF</term>
-                      <listitem><para>not used?</para></listitem>
-                    </varlistentry>
-                    <varlistentry>
-                      <term>DRM_MODE_TYPE_DRIVER</term>
-                      <listitem>
-                        <para>
-                          The mode has been created by the driver (as opposed to
-                          to user-created modes).
-                        </para>
-                      </listitem>
-                    </varlistentry>
-                  </variablelist>
-                  Drivers must set the DRM_MODE_TYPE_DRIVER bit for all modes they
-                  create, and set the DRM_MODE_TYPE_PREFERRED bit for the preferred
-                  mode.
-                </para>
-              </listitem>
-              <listitem>
-                <synopsis>__u32 clock;</synopsis>
-                <para>Pixel clock frequency in kHz unit</para>
-              </listitem>
-              <listitem>
-                <synopsis>__u16 hdisplay, hsync_start, hsync_end, htotal;
-    __u16 vdisplay, vsync_start, vsync_end, vtotal;</synopsis>
-                <para>Horizontal and vertical timing information</para>
-                <screen><![CDATA[
-             Active                 Front           Sync           Back
-             Region                 Porch                          Porch
-    <-----------------------><----------------><-------------><-------------->
-
-      //////////////////////|
-     ////////////////////// |
-    //////////////////////  |..................               ................
-                                               _______________
-
-    <----- [hv]display ----->
-    <------------- [hv]sync_start ------------>
-    <--------------------- [hv]sync_end --------------------->
-    <-------------------------------- [hv]total ----------------------------->
-]]></screen>
-              </listitem>
-              <listitem>
-                <synopsis>__u16 hskew;
-    __u16 vscan;</synopsis>
-                <para>Unknown</para>
-              </listitem>
-              <listitem>
-                <synopsis>__u32 flags;</synopsis>
-                <para>
-                  Mode flags, a combination of
-                  <variablelist>
-                    <varlistentry>
-                      <term>DRM_MODE_FLAG_PHSYNC</term>
-                      <listitem><para>
-                        Horizontal sync is active high
-                      </para></listitem>
-                    </varlistentry>
-                    <varlistentry>
-                      <term>DRM_MODE_FLAG_NHSYNC</term>
-                      <listitem><para>
-                        Horizontal sync is active low
-                      </para></listitem>
-                    </varlistentry>
-                    <varlistentry>
-                      <term>DRM_MODE_FLAG_PVSYNC</term>
-                      <listitem><para>
-                        Vertical sync is active high
-                      </para></listitem>
-                    </varlistentry>
-                    <varlistentry>
-                      <term>DRM_MODE_FLAG_NVSYNC</term>
-                      <listitem><para>
-                        Vertical sync is active low
-                      </para></listitem>
-                    </varlistentry>
-                    <varlistentry>
-                      <term>DRM_MODE_FLAG_INTERLACE</term>
-                      <listitem><para>
-                        Mode is interlaced
-                      </para></listitem>
-                    </varlistentry>
-                    <varlistentry>
-                      <term>DRM_MODE_FLAG_DBLSCAN</term>
-                      <listitem><para>
-                        Mode uses doublescan
-                      </para></listitem>
-                    </varlistentry>
-                    <varlistentry>
-                      <term>DRM_MODE_FLAG_CSYNC</term>
-                      <listitem><para>
-                        Mode uses composite sync
-                      </para></listitem>
-                    </varlistentry>
-                    <varlistentry>
-                      <term>DRM_MODE_FLAG_PCSYNC</term>
-                      <listitem><para>
-                        Composite sync is active high
-                      </para></listitem>
-                    </varlistentry>
-                    <varlistentry>
-                      <term>DRM_MODE_FLAG_NCSYNC</term>
-                      <listitem><para>
-                        Composite sync is active low
-                      </para></listitem>
-                    </varlistentry>
-                    <varlistentry>
-                      <term>DRM_MODE_FLAG_HSKEW</term>
-                      <listitem><para>
-                        hskew provided (not used?)
-                      </para></listitem>
-                    </varlistentry>
-                    <varlistentry>
-                      <term>DRM_MODE_FLAG_BCAST</term>
-                      <listitem><para>
-                        not used?
-                      </para></listitem>
-                    </varlistentry>
-                    <varlistentry>
-                      <term>DRM_MODE_FLAG_PIXMUX</term>
-                      <listitem><para>
-                        not used?
-                      </para></listitem>
-                    </varlistentry>
-                    <varlistentry>
-                      <term>DRM_MODE_FLAG_DBLCLK</term>
-                      <listitem><para>
-                        not used?
-                      </para></listitem>
-                    </varlistentry>
-                    <varlistentry>
-                      <term>DRM_MODE_FLAG_CLKDIV2</term>
-                      <listitem><para>
-                        ?
-                      </para></listitem>
-                    </varlistentry>
-                  </variablelist>
-                </para>
-                <para>
-                  Note that modes marked with the INTERLACE or DBLSCAN flags will be
-                  filtered out by
-                  <function>drm_helper_probe_single_connector_modes</function> if
-                  the connector's <structfield>interlace_allowed</structfield> or
-                  <structfield>doublescan_allowed</structfield> field is set to 0.
-                </para>
-              </listitem>
-              <listitem>
-                <synopsis>char name[DRM_DISPLAY_MODE_LEN];</synopsis>
-                <para>
-                  Mode name. The driver must call
-                  <function>drm_mode_set_name</function> to fill the mode name from
-                  <structfield>hdisplay</structfield>,
-                  <structfield>vdisplay</structfield> and interlace flag after
-                  filling the corresponding fields.
-                </para>
-              </listitem>
-            </itemizedlist>
-          </para>
-          <para>
             The <structfield>vrefresh</structfield> value is computed by
             <function>drm_helper_probe_single_connector_modes</function>.
           </para>
@@ -2303,8 +1784,12 @@ void intel_crt_init(struct drm_device *dev)
 !Edrivers/gpu/drm/drm_atomic_helper.c
     </sect2>
     <sect2>
-      <title>Modeset Helper Functions Reference</title>
-!Iinclude/drm/drm_crtc_helper.h
+      <title>Modeset Helper Reference for Common Vtables</title>
+!Iinclude/drm/drm_modeset_helper_vtables.h
+!Pinclude/drm/drm_modeset_helper_vtables.h overview
+    </sect2>
+    <sect2>
+      <title>Legacy CRTC/Modeset Helper Functions Reference</title>
 !Edrivers/gpu/drm/drm_crtc_helper.c
 !Pdrivers/gpu/drm/drm_crtc_helper.c overview
     </sect2>
@@ -4015,92 +3500,6 @@ int num_ioctls;</synopsis>
       <sect2>
         <title>DPIO</title>
 !Pdrivers/gpu/drm/i915/i915_reg.h DPIO
-	<table id="dpiox2">
-	  <title>Dual channel PHY (VLV/CHV/BXT)</title>
-	  <tgroup cols="8">
-	    <colspec colname="c0" />
-	    <colspec colname="c1" />
-	    <colspec colname="c2" />
-	    <colspec colname="c3" />
-	    <colspec colname="c4" />
-	    <colspec colname="c5" />
-	    <colspec colname="c6" />
-	    <colspec colname="c7" />
-	    <spanspec spanname="ch0" namest="c0" nameend="c3" />
-	    <spanspec spanname="ch1" namest="c4" nameend="c7" />
-	    <spanspec spanname="ch0pcs01" namest="c0" nameend="c1" />
-	    <spanspec spanname="ch0pcs23" namest="c2" nameend="c3" />
-	    <spanspec spanname="ch1pcs01" namest="c4" nameend="c5" />
-	    <spanspec spanname="ch1pcs23" namest="c6" nameend="c7" />
-	    <thead>
-	      <row>
-		<entry spanname="ch0">CH0</entry>
-		<entry spanname="ch1">CH1</entry>
-	      </row>
-	    </thead>
-	    <tbody valign="top" align="center">
-	      <row>
-		<entry spanname="ch0">CMN/PLL/REF</entry>
-		<entry spanname="ch1">CMN/PLL/REF</entry>
-	      </row>
-	      <row>
-		<entry spanname="ch0pcs01">PCS01</entry>
-		<entry spanname="ch0pcs23">PCS23</entry>
-		<entry spanname="ch1pcs01">PCS01</entry>
-		<entry spanname="ch1pcs23">PCS23</entry>
-	      </row>
-	      <row>
-		<entry>TX0</entry>
-		<entry>TX1</entry>
-		<entry>TX2</entry>
-		<entry>TX3</entry>
-		<entry>TX0</entry>
-		<entry>TX1</entry>
-		<entry>TX2</entry>
-		<entry>TX3</entry>
-	      </row>
-	      <row>
-		<entry spanname="ch0">DDI0</entry>
-		<entry spanname="ch1">DDI1</entry>
-	      </row>
-	    </tbody>
-	  </tgroup>
-	</table>
-	<table id="dpiox1">
-	  <title>Single channel PHY (CHV/BXT)</title>
-	  <tgroup cols="4">
-	    <colspec colname="c0" />
-	    <colspec colname="c1" />
-	    <colspec colname="c2" />
-	    <colspec colname="c3" />
-	    <spanspec spanname="ch0" namest="c0" nameend="c3" />
-	    <spanspec spanname="ch0pcs01" namest="c0" nameend="c1" />
-	    <spanspec spanname="ch0pcs23" namest="c2" nameend="c3" />
-	    <thead>
-	      <row>
-		<entry spanname="ch0">CH0</entry>
-	      </row>
-	    </thead>
-	    <tbody valign="top" align="center">
-	      <row>
-		<entry spanname="ch0">CMN/PLL/REF</entry>
-	      </row>
-	      <row>
-		<entry spanname="ch0pcs01">PCS01</entry>
-		<entry spanname="ch0pcs23">PCS23</entry>
-	      </row>
-	      <row>
-		<entry>TX0</entry>
-		<entry>TX1</entry>
-		<entry>TX2</entry>
-		<entry>TX3</entry>
-	      </row>
-	      <row>
-		<entry spanname="ch0">DDI2</entry>
-	      </row>
-	    </tbody>
-	  </tgroup>
-	</table>
       </sect2>
 
       <sect2>
diff --git a/Documentation/devicetree/bindings/display/etnaviv/etnaviv-drm.txt b/Documentation/devicetree/bindings/display/etnaviv/etnaviv-drm.txt
new file mode 100644
index 000000000000..ed5e0a7894ad
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/etnaviv/etnaviv-drm.txt
@@ -0,0 +1,54 @@
+Etnaviv DRM master device
+=========================
+
+The Etnaviv DRM master device is a virtual device needed to list all
+Vivante GPU cores that comprise the GPU subsystem.
+
+Required properties:
+- compatible: Should be one of
+    "fsl,imx-gpu-subsystem"
+    "marvell,dove-gpu-subsystem"
+- cores: Should contain a list of phandles pointing to Vivante GPU devices
+
+example:
+
+gpu-subsystem {
+	compatible = "fsl,imx-gpu-subsystem";
+	cores = <&gpu_2d>, <&gpu_3d>;
+};
+
+
+Vivante GPU core devices
+========================
+
+Required properties:
+- compatible: Should be "vivante,gc"
+  A more specific compatible is not needed, as the cores contain chip
+  identification registers at fixed locations, which provide all the
+  necessary information to the driver.
+- reg: should be register base and length as documented in the
+  datasheet
+- interrupts: Should contain the cores interrupt line
+- clocks: should contain one clock for entry in clock-names
+  see Documentation/devicetree/bindings/clock/clock-bindings.txt
+- clock-names:
+   - "bus":    AXI/register clock
+   - "core":   GPU core clock
+   - "shader": Shader clock (only required if GPU has feature PIPE_3D)
+
+Optional properties:
+- power-domains: a power domain consumer specifier according to
+  Documentation/devicetree/bindings/power/power_domain.txt
+
+example:
+
+gpu_3d: gpu@00130000 {
+	compatible = "vivante,gc";
+	reg = <0x00130000 0x4000>;
+	interrupts = <0 9 IRQ_TYPE_LEVEL_HIGH>;
+	clocks = <&clks IMX6QDL_CLK_GPU3D_AXI>,
+	         <&clks IMX6QDL_CLK_GPU3D_CORE>,
+	         <&clks IMX6QDL_CLK_GPU3D_SHADER>;
+	clock-names = "bus", "core", "shader";
+	power-domains = <&gpc 1>;
+};
diff --git a/Documentation/devicetree/bindings/display/exynos/exynos_dp.txt b/Documentation/devicetree/bindings/display/exynos/exynos_dp.txt
index 64693f2ebc51..fe4a7a2dea9c 100644
--- a/Documentation/devicetree/bindings/display/exynos/exynos_dp.txt
+++ b/Documentation/devicetree/bindings/display/exynos/exynos_dp.txt
@@ -1,3 +1,20 @@
+Device-Tree bindings for Samsung Exynos Embedded DisplayPort Transmitter(eDP)
+
+DisplayPort is industry standard to accommodate the growing board adoption
+of digital display technology within the PC and CE industries.
+It consolidates the internal and external connection methods to reduce device
+complexity and cost. It also supports necessary features for important cross
+industry applications and provides performance scalability to enable the next
+generation of displays that feature higher color depths, refresh rates, and
+display resolutions.
+
+eDP (embedded display port) device is compliant with Embedded DisplayPort
+standard as follows,
+- DisplayPort standard 1.1a for Exynos5250 and Exynos5260.
+- DisplayPort standard 1.3 for Exynos5422s and Exynos5800.
+
+eDP resides between FIMD and panel or FIMD and bridge such as LVDS.
+
 The Exynos display port interface should be configured based on
 the type of panel connected to it.
 
@@ -66,8 +83,15 @@ Optional properties for dp-controller:
 		Hotplug detect GPIO.
 			Indicates which GPIO should be used for hotplug
 			detection
-	-video interfaces: Device node can contain video interface port
-			    nodes according to [1].
+Video interfaces:
+  Device node can contain video interface port nodes according to [1].
+  The following are properties specific to those nodes:
+
+  endpoint node connected to bridge or panel node:
+   - remote-endpoint: specifies the endpoint in panel or bridge node.
+		      This node is required in all kinds of exynos dp
+		      to represent the connection between dp and bridge
+		      or dp and panel.
 
 [1]: Documentation/devicetree/bindings/media/video-interfaces.txt
 
@@ -111,9 +135,18 @@ Board Specific portion:
 		};
 
 		ports {
-			port@0 {
+			port {
 				dp_out: endpoint {
-					remote-endpoint = <&bridge_in>;
+					remote-endpoint = <&dp_in>;
+				};
+			};
+		};
+
+		panel {
+			...
+			port {
+				dp_in: endpoint {
+					remote-endpoint = <&dp_out>;
 				};
 			};
 		};
diff --git a/Documentation/devicetree/bindings/display/msm/dsi.txt b/Documentation/devicetree/bindings/display/msm/dsi.txt
index f344b9e49198..e7423bea1424 100644
--- a/Documentation/devicetree/bindings/display/msm/dsi.txt
+++ b/Documentation/devicetree/bindings/display/msm/dsi.txt
@@ -14,17 +14,20 @@ Required properties:
 - clocks: device clocks
   See Documentation/devicetree/bindings/clocks/clock-bindings.txt for details.
 - clock-names: the following clocks are required:
+  * "mdp_core_clk"
+  * "iface_clk"
   * "bus_clk"
-  * "byte_clk"
-  * "core_clk"
   * "core_mmss_clk"
-  * "iface_clk"
-  * "mdp_core_clk"
+  * "byte_clk"
   * "pixel_clk"
+  * "core_clk"
+  For DSIv2, we need an additional clock:
+   * "src_clk"
 - vdd-supply: phandle to vdd regulator device node
 - vddio-supply: phandle to vdd-io regulator device node
 - vdda-supply: phandle to vdda regulator device node
 - qcom,dsi-phy: phandle to DSI PHY device node
+- syscon-sfpb: A phandle to mmss_sfpb syscon node (only for DSIv2)
 
 Optional properties:
 - panel@0: Node of panel connected to this DSI controller.
@@ -51,6 +54,7 @@ Required properties:
   * "qcom,dsi-phy-28nm-hpm"
   * "qcom,dsi-phy-28nm-lp"
   * "qcom,dsi-phy-20nm"
+  * "qcom,dsi-phy-28nm-8960"
 - reg: Physical base address and length of the registers of PLL, PHY and PHY
   regulator
 - reg-names: The names of register regions. The following regions are required:
diff --git a/Documentation/devicetree/bindings/display/msm/mdp.txt b/Documentation/devicetree/bindings/display/msm/mdp.txt
index 0833edaba4c3..a214f6cd0363 100644
--- a/Documentation/devicetree/bindings/display/msm/mdp.txt
+++ b/Documentation/devicetree/bindings/display/msm/mdp.txt
@@ -2,18 +2,28 @@ Qualcomm adreno/snapdragon display controller
 
 Required properties:
 - compatible:
-  * "qcom,mdp" - mdp4
+  * "qcom,mdp4" - mdp4
+  * "qcom,mdp5" - mdp5
 - reg: Physical base address and length of the controller's registers.
 - interrupts: The interrupt signal from the display controller.
 - connectors: array of phandles for output device(s)
 - clocks: device clocks
   See ../clocks/clock-bindings.txt for details.
-- clock-names: the following clocks are required:
-  * "core_clk"
-  * "iface_clk"
-  * "src_clk"
-  * "hdmi_clk"
-  * "mpd_clk"
+- clock-names: the following clocks are required.
+  For MDP4:
+   * "core_clk"
+   * "iface_clk"
+   * "lut_clk"
+   * "src_clk"
+   * "hdmi_clk"
+   * "mdp_clk"
+  For MDP5:
+   * "bus_clk"
+   * "iface_clk"
+   * "core_clk_src"
+   * "core_clk"
+   * "lut_clk" (some MDP5 versions may not need this)
+   * "vsync_clk"
 
 Optional properties:
 - gpus: phandle for gpu device
@@ -26,7 +36,7 @@ Example:
 	...
 
 	mdp: qcom,mdp@5100000 {
-		compatible = "qcom,mdp";
+		compatible = "qcom,mdp4";
 		reg = <0x05100000 0xf0000>;
 		interrupts = <GIC_SPI 75 0>;
 		connectors = <&hdmi>;
diff --git a/Documentation/devicetree/bindings/display/panel/boe,tv080wum-nl0.txt b/Documentation/devicetree/bindings/display/panel/boe,tv080wum-nl0.txt
new file mode 100644
index 000000000000..50be5e2438b2
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/panel/boe,tv080wum-nl0.txt
@@ -0,0 +1,7 @@
+Boe Corporation 8.0" WUXGA TFT LCD panel
+
+Required properties:
+- compatible: should be "boe,tv080wum-nl0"
+
+This binding is compatible with the simple-panel binding, which is specified
+in simple-panel.txt in this directory.
diff --git a/Documentation/devicetree/bindings/display/panel/innolux,g121x1-l03.txt b/Documentation/devicetree/bindings/display/panel/innolux,g121x1-l03.txt
new file mode 100644
index 000000000000..649744620ae1
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/panel/innolux,g121x1-l03.txt
@@ -0,0 +1,7 @@
+Innolux Corporation 12.1" G121X1-L03 XGA (1024x768) TFT LCD panel
+
+Required properties:
+- compatible: should be "innolux,g121x1-l03"
+
+This binding is compatible with the simple-panel binding, which is specified
+in simple-panel.txt in this directory.
diff --git a/Documentation/devicetree/bindings/display/panel/kyo,tcg121xglp.txt b/Documentation/devicetree/bindings/display/panel/kyo,tcg121xglp.txt
new file mode 100644
index 000000000000..a8e940fe731e
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/panel/kyo,tcg121xglp.txt
@@ -0,0 +1,7 @@
+Kyocera Corporation 12.1" XGA (1024x768) TFT LCD panel
+
+Required properties:
+- compatible: should be "kyo,tcg121xglp"
+
+This binding is compatible with the simple-panel binding, which is specified
+in simple-panel.txt in this directory.
diff --git a/Documentation/devicetree/bindings/display/panel/panasonic,vvx10f034n00.txt b/Documentation/devicetree/bindings/display/panel/panasonic,vvx10f034n00.txt
new file mode 100644
index 000000000000..37dedf6a6702
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/panel/panasonic,vvx10f034n00.txt
@@ -0,0 +1,20 @@
+Panasonic 10" WUXGA TFT LCD panel
+
+Required properties:
+- compatible: should be "panasonic,vvx10f034n00"
+- reg: DSI virtual channel of the peripheral
+- power-supply: phandle of the regulator that provides the supply voltage
+
+Optional properties:
+- backlight: phandle of the backlight device attached to the panel
+
+Example:
+
+	mdss_dsi@fd922800 {
+		panel@0 {
+			compatible = "panasonic,vvx10f034n00";
+			reg = <0>;
+			power-supply = <&vreg_vsp>;
+			backlight = <&lp8566_wled>;
+		};
+	};
diff --git a/Documentation/devicetree/bindings/display/panel/qiaodian,qd43003c0-40.txt b/Documentation/devicetree/bindings/display/panel/qiaodian,qd43003c0-40.txt
new file mode 100644
index 000000000000..0fbdab89ac3d
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/panel/qiaodian,qd43003c0-40.txt
@@ -0,0 +1,7 @@
+QiaoDian XianShi Corporation 4"3 TFT LCD panel
+
+Required properties:
+- compatible: should be "qiaodian,qd43003c0-40"
+
+This binding is compatible with the simple-panel binding, which is specified
+in simple-panel.txt in this directory.
diff --git a/Documentation/devicetree/bindings/display/panel/sharp,ls043t1le01.txt b/Documentation/devicetree/bindings/display/panel/sharp,ls043t1le01.txt
new file mode 100644
index 000000000000..3770a111968b
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/panel/sharp,ls043t1le01.txt
@@ -0,0 +1,22 @@
+Sharp Microelectronics 4.3" qHD TFT LCD panel
+
+Required properties:
+- compatible: should be "sharp,ls043t1le01-qhd"
+- reg: DSI virtual channel of the peripheral
+- power-supply: phandle of the regulator that provides the supply voltage
+
+Optional properties:
+- backlight: phandle of the backlight device attached to the panel
+- reset-gpios: a GPIO spec for the reset pin
+
+Example:
+
+	mdss_dsi@fd922800 {
+		panel@0 {
+			compatible = "sharp,ls043t1le01-qhd";
+			reg = <0>;
+			avdd-supply = <&pm8941_l22>;
+			backlight = <&pm8941_wled>;
+			reset-gpios = <&pm8941_gpios 19 GPIO_ACTIVE_HIGH>;
+		};
+	};
diff --git a/Documentation/devicetree/bindings/media/exynos5-gsc.txt b/Documentation/devicetree/bindings/media/exynos5-gsc.txt
index 0604d42f38d1..5fe9372abb37 100644
--- a/Documentation/devicetree/bindings/media/exynos5-gsc.txt
+++ b/Documentation/devicetree/bindings/media/exynos5-gsc.txt
@@ -7,6 +7,10 @@ Required properties:
 - reg: should contain G-Scaler physical address location and length.
 - interrupts: should contain G-Scaler interrupt number
 
+Optional properties:
+- samsung,sysreg: handle to syscon used to control the system registers to
+  set writeback input and destination
+
 Example:
 
 gsc_0:  gsc@0x13e00000 {
diff --git a/Documentation/devicetree/bindings/net/marvell-armada-370-neta.txt b/Documentation/devicetree/bindings/net/marvell-armada-370-neta.txt
index f5a8ca29aff0..aeea50c84e92 100644
--- a/Documentation/devicetree/bindings/net/marvell-armada-370-neta.txt
+++ b/Documentation/devicetree/bindings/net/marvell-armada-370-neta.txt
@@ -8,6 +8,11 @@ Required properties:
 - phy-mode: See ethernet.txt file in the same directory
 - clocks: a pointer to the reference clock for this device.
 
+Optional properties:
+- tx-csum-limit: maximum mtu supported by port that allow TX checksum.
+  Value is presented in bytes. If not used, by default 1600B is set for
+  "marvell,armada-370-neta" and 9800B for others.
+
 Example:
 
 ethernet@d0070000 {
@@ -15,6 +20,7 @@ ethernet@d0070000 {
 	reg = <0xd0070000 0x2500>;
 	interrupts = <8>;
 	clocks = <&gate_clk 4>;
+	tx-csum-limit = <9800>
 	status = "okay";
 	phy = <&phy0>;
 	phy-mode = "rgmii-id";
diff --git a/Documentation/devicetree/bindings/vendor-prefixes.txt b/Documentation/devicetree/bindings/vendor-prefixes.txt
index 55df1d444e9f..b123731b2dca 100644
--- a/Documentation/devicetree/bindings/vendor-prefixes.txt
+++ b/Documentation/devicetree/bindings/vendor-prefixes.txt
@@ -33,6 +33,7 @@ auo	AU Optronics Corporation
 avago	Avago Technologies
 avic	Shanghai AVIC Optoelectronics Co., Ltd.
 axis	Axis Communications AB
+boe	BOE Technology Group Co., Ltd.
 bosch	Bosch Sensortec GmbH
 boundary	Boundary Devices Inc.
 brcm	Broadcom Corporation
@@ -123,6 +124,7 @@ jedec	JEDEC Solid State Technology Association
 karo	Ka-Ro electronics GmbH
 keymile	Keymile GmbH
 kinetic Kinetic Technologies
+kyo	Kyocera Corporation
 lacie	LaCie
 lantiq	Lantiq Semiconductor
 lenovo	Lenovo Group Ltd.
@@ -180,6 +182,7 @@ qca	Qualcomm Atheros, Inc.
 qcom	Qualcomm Technologies, Inc
 qemu	QEMU, a generic and open source machine emulator and virtualizer
 qi	Qi Hardware
+qiaodian	QiaoDian XianShi Corporation
 qnap	QNAP Systems, Inc.
 radxa	Radxa
 raidsonic	RaidSonic Technology GmbH
@@ -238,6 +241,7 @@ v3	V3 Semiconductor
 variscite	Variscite Ltd.
 via	VIA Technologies, Inc.
 virtio	Virtual I/O Device Specification, developed by the OASIS consortium
+vivante	Vivante Corporation
 voipac	Voipac Technologies s.r.o.
 wexler	Wexler
 winbond Winbond Electronics corp.
diff --git a/MAINTAINERS b/MAINTAINERS
index cba790b42f23..9a68ea992436 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -318,7 +318,7 @@ M:	Zhang Rui <rui.zhang@intel.com>
 L:	linux-acpi@vger.kernel.org
 W:	https://01.org/linux-acpi
 S:	Supported
-F:	drivers/acpi/video.c
+F:	drivers/acpi/acpi_video.c
 
 ACPI WMI DRIVER
 L:	platform-driver-x86@vger.kernel.org
@@ -1847,7 +1847,7 @@ S:	Supported
 F:	drivers/net/wireless/ath/ath6kl/
 
 WILOCITY WIL6210 WIRELESS DRIVER
-M:	Vladimir Kondratiev <qca_vkondrat@qca.qualcomm.com>
+M:	Maya Erez <qca_merez@qca.qualcomm.com>
 L:	linux-wireless@vger.kernel.org
 L:	wil6210@qca.qualcomm.com
 S:	Supported
@@ -3743,6 +3743,15 @@ S:	Maintained
 F:	drivers/gpu/drm/sti
 F:	Documentation/devicetree/bindings/display/st,stih4xx.txt
 
+DRM DRIVERS FOR VIVANTE GPU IP
+M:	Lucas Stach <l.stach@pengutronix.de>
+R:	Russell King <linux+etnaviv@arm.linux.org.uk>
+R:	Christian Gmeiner <christian.gmeiner@gmail.com>
+L:	dri-devel@lists.freedesktop.org
+S:	Maintained
+F:	drivers/gpu/drm/etnaviv
+F:	Documentation/devicetree/bindings/display/etnaviv
+
 DSBR100 USB FM RADIO DRIVER
 M:	Alexey Klimov <klimov.linux@gmail.com>
 L:	linux-media@vger.kernel.org
@@ -9427,8 +9436,10 @@ F:	include/scsi/sg.h
 
 SCSI SUBSYSTEM
 M:	"James E.J. Bottomley" <JBottomley@odin.com>
-L:	linux-scsi@vger.kernel.org
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi.git
+M:	"Martin K. Petersen" <martin.petersen@oracle.com>
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/mkp/scsi.git
+L:	linux-scsi@vger.kernel.org
 S:	Maintained
 F:	drivers/scsi/
 F:	include/scsi/
@@ -10903,9 +10914,9 @@ S:	Maintained
 F:	drivers/media/tuners/tua9001*
 
 TULIP NETWORK DRIVERS
-M:	Grant Grundler <grundler@parisc-linux.org>
 L:	netdev@vger.kernel.org
-S:	Maintained
+L:	linux-parisc@vger.kernel.org
+S:	Orphan
 F:	drivers/net/ethernet/dec/tulip/
 
 TUN/TAP driver
diff --git a/Makefile b/Makefile
index 904a1d65df30..d644f6e92cf6 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 VERSION = 4
 PATCHLEVEL = 4
 SUBLEVEL = 0
-EXTRAVERSION = -rc3
+EXTRAVERSION = -rc4
 NAME = Blurry Fish Butt
 
 # *DOCUMENTATION*
diff --git a/arch/arm/boot/dts/armada-38x.dtsi b/arch/arm/boot/dts/armada-38x.dtsi
index c6a0e9d7f1a9..e8b7f6726772 100644
--- a/arch/arm/boot/dts/armada-38x.dtsi
+++ b/arch/arm/boot/dts/armada-38x.dtsi
@@ -498,6 +498,7 @@
 				reg = <0x70000 0x4000>;
 				interrupts-extended = <&mpic 8>;
 				clocks = <&gateclk 4>;
+				tx-csum-limit = <9800>;
 				status = "disabled";
 			};
 
diff --git a/arch/arm/boot/dts/exynos5800-peach-pi.dts b/arch/arm/boot/dts/exynos5800-peach-pi.dts
index 49a4f43e5ac2..1cc2e95ffc66 100644
--- a/arch/arm/boot/dts/exynos5800-peach-pi.dts
+++ b/arch/arm/boot/dts/exynos5800-peach-pi.dts
@@ -122,6 +122,12 @@
 		compatible = "auo,b133htn01";
 		power-supply = <&tps65090_fet6>;
 		backlight = <&backlight>;
+
+		port {
+			panel_in: endpoint {
+				remote-endpoint = <&dp_out>;
+			};
+		};
 	};
 
 	mmc1_pwrseq: mmc1_pwrseq {
@@ -148,7 +154,14 @@
 	samsung,link-rate = <0x0a>;
 	samsung,lane-count = <2>;
 	samsung,hpd-gpio = <&gpx2 6 GPIO_ACTIVE_HIGH>;
-	panel = <&panel>;
+
+	ports {
+		port {
+			dp_out: endpoint {
+				remote-endpoint = <&panel_in>;
+			};
+		};
+	};
 };
 
 &fimd {
diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h
index a9c80a2ea1a7..3095df091ff8 100644
--- a/arch/arm/include/asm/kvm_emulate.h
+++ b/arch/arm/include/asm/kvm_emulate.h
@@ -28,6 +28,18 @@
 unsigned long *vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num);
 unsigned long *vcpu_spsr(struct kvm_vcpu *vcpu);
 
+static inline unsigned long vcpu_get_reg(struct kvm_vcpu *vcpu,
+					 u8 reg_num)
+{
+	return *vcpu_reg(vcpu, reg_num);
+}
+
+static inline void vcpu_set_reg(struct kvm_vcpu *vcpu, u8 reg_num,
+				unsigned long val)
+{
+	*vcpu_reg(vcpu, reg_num) = val;
+}
+
 bool kvm_condition_valid(struct kvm_vcpu *vcpu);
 void kvm_skip_instr(struct kvm_vcpu *vcpu, bool is_wide_instr);
 void kvm_inject_undefined(struct kvm_vcpu *vcpu);
diff --git a/arch/arm/kvm/mmio.c b/arch/arm/kvm/mmio.c
index 974b1c606d04..3a10c9f1d0a4 100644
--- a/arch/arm/kvm/mmio.c
+++ b/arch/arm/kvm/mmio.c
@@ -115,7 +115,7 @@ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run)
 		trace_kvm_mmio(KVM_TRACE_MMIO_READ, len, run->mmio.phys_addr,
 			       data);
 		data = vcpu_data_host_to_guest(vcpu, data, len);
-		*vcpu_reg(vcpu, vcpu->arch.mmio_decode.rt) = data;
+		vcpu_set_reg(vcpu, vcpu->arch.mmio_decode.rt, data);
 	}
 
 	return 0;
@@ -186,7 +186,8 @@ int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
 	rt = vcpu->arch.mmio_decode.rt;
 
 	if (is_write) {
-		data = vcpu_data_guest_to_host(vcpu, *vcpu_reg(vcpu, rt), len);
+		data = vcpu_data_guest_to_host(vcpu, vcpu_get_reg(vcpu, rt),
+					       len);
 
 		trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, len, fault_ipa, data);
 		mmio_write_buf(data_buf, len, data);
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index 7dace909d5cf..61d96a645ff3 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -218,7 +218,7 @@ static void unmap_ptes(struct kvm *kvm, pmd_t *pmd,
 			kvm_tlb_flush_vmid_ipa(kvm, addr);
 
 			/* No need to invalidate the cache for device mappings */
-			if (!kvm_is_device_pfn(__phys_to_pfn(addr)))
+			if (!kvm_is_device_pfn(pte_pfn(old_pte)))
 				kvm_flush_dcache_pte(old_pte);
 
 			put_page(virt_to_page(pte));
@@ -310,7 +310,7 @@ static void stage2_flush_ptes(struct kvm *kvm, pmd_t *pmd,
 
 	pte = pte_offset_kernel(pmd, addr);
 	do {
-		if (!pte_none(*pte) && !kvm_is_device_pfn(__phys_to_pfn(addr)))
+		if (!pte_none(*pte) && !kvm_is_device_pfn(pte_pfn(*pte)))
 			kvm_flush_dcache_pte(*pte);
 	} while (pte++, addr += PAGE_SIZE, addr != end);
 }
diff --git a/arch/arm/kvm/psci.c b/arch/arm/kvm/psci.c
index 0b556968a6da..a9b3b905e661 100644
--- a/arch/arm/kvm/psci.c
+++ b/arch/arm/kvm/psci.c
@@ -75,7 +75,7 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
 	unsigned long context_id;
 	phys_addr_t target_pc;
 
-	cpu_id = *vcpu_reg(source_vcpu, 1) & MPIDR_HWID_BITMASK;
+	cpu_id = vcpu_get_reg(source_vcpu, 1) & MPIDR_HWID_BITMASK;
 	if (vcpu_mode_is_32bit(source_vcpu))
 		cpu_id &= ~((u32) 0);
 
@@ -94,8 +94,8 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
 			return PSCI_RET_INVALID_PARAMS;
 	}
 
-	target_pc = *vcpu_reg(source_vcpu, 2);
-	context_id = *vcpu_reg(source_vcpu, 3);
+	target_pc = vcpu_get_reg(source_vcpu, 2);
+	context_id = vcpu_get_reg(source_vcpu, 3);
 
 	kvm_reset_vcpu(vcpu);
 
@@ -114,7 +114,7 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
 	 * NOTE: We always update r0 (or x0) because for PSCI v0.1
 	 * the general puspose registers are undefined upon CPU_ON.
 	 */
-	*vcpu_reg(vcpu, 0) = context_id;
+	vcpu_set_reg(vcpu, 0, context_id);
 	vcpu->arch.power_off = false;
 	smp_mb();		/* Make sure the above is visible */
 
@@ -134,8 +134,8 @@ static unsigned long kvm_psci_vcpu_affinity_info(struct kvm_vcpu *vcpu)
 	struct kvm *kvm = vcpu->kvm;
 	struct kvm_vcpu *tmp;
 
-	target_affinity = *vcpu_reg(vcpu, 1);
-	lowest_affinity_level = *vcpu_reg(vcpu, 2);
+	target_affinity = vcpu_get_reg(vcpu, 1);
+	lowest_affinity_level = vcpu_get_reg(vcpu, 2);
 
 	/* Determine target affinity mask */
 	target_affinity_mask = psci_affinity_mask(lowest_affinity_level);
@@ -209,7 +209,7 @@ int kvm_psci_version(struct kvm_vcpu *vcpu)
 static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu)
 {
 	int ret = 1;
-	unsigned long psci_fn = *vcpu_reg(vcpu, 0) & ~((u32) 0);
+	unsigned long psci_fn = vcpu_get_reg(vcpu, 0) & ~((u32) 0);
 	unsigned long val;
 
 	switch (psci_fn) {
@@ -273,13 +273,13 @@ static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu)
 		break;
 	}
 
-	*vcpu_reg(vcpu, 0) = val;
+	vcpu_set_reg(vcpu, 0, val);
 	return ret;
 }
 
 static int kvm_psci_0_1_call(struct kvm_vcpu *vcpu)
 {
-	unsigned long psci_fn = *vcpu_reg(vcpu, 0) & ~((u32) 0);
+	unsigned long psci_fn = vcpu_get_reg(vcpu, 0) & ~((u32) 0);
 	unsigned long val;
 
 	switch (psci_fn) {
@@ -295,7 +295,7 @@ static int kvm_psci_0_1_call(struct kvm_vcpu *vcpu)
 		break;
 	}
 
-	*vcpu_reg(vcpu, 0) = val;
+	vcpu_set_reg(vcpu, 0, val);
 	return 1;
 }
 
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index 3ca894ecf699..25a40213bd9b 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -100,13 +100,21 @@ static inline void vcpu_set_thumb(struct kvm_vcpu *vcpu)
 }
 
 /*
- * vcpu_reg should always be passed a register number coming from a
- * read of ESR_EL2. Otherwise, it may give the wrong result on AArch32
- * with banked registers.
+ * vcpu_get_reg and vcpu_set_reg should always be passed a register number
+ * coming from a read of ESR_EL2. Otherwise, it may give the wrong result on
+ * AArch32 with banked registers.
  */
-static inline unsigned long *vcpu_reg(const struct kvm_vcpu *vcpu, u8 reg_num)
+static inline unsigned long vcpu_get_reg(const struct kvm_vcpu *vcpu,
+					 u8 reg_num)
 {
-	return (unsigned long *)&vcpu_gp_regs(vcpu)->regs.regs[reg_num];
+	return (reg_num == 31) ? 0 : vcpu_gp_regs(vcpu)->regs.regs[reg_num];
+}
+
+static inline void vcpu_set_reg(struct kvm_vcpu *vcpu, u8 reg_num,
+				unsigned long val)
+{
+	if (reg_num != 31)
+		vcpu_gp_regs(vcpu)->regs.regs[reg_num] = val;
 }
 
 /* Get vcpu SPSR for current mode */
diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
index 68a0759b1375..15f0477b0d2a 100644
--- a/arch/arm64/kvm/handle_exit.c
+++ b/arch/arm64/kvm/handle_exit.c
@@ -37,7 +37,7 @@ static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run)
 {
 	int ret;
 
-	trace_kvm_hvc_arm64(*vcpu_pc(vcpu), *vcpu_reg(vcpu, 0),
+	trace_kvm_hvc_arm64(*vcpu_pc(vcpu), vcpu_get_reg(vcpu, 0),
 			    kvm_vcpu_hvc_get_imm(vcpu));
 
 	ret = kvm_psci_call(vcpu);
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 87a64e8db04c..d2650e84faf2 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -78,7 +78,7 @@ static u32 get_ccsidr(u32 csselr)
  * See note at ARMv7 ARM B1.14.4 (TL;DR: S/W ops are not easily virtualized).
  */
 static bool access_dcsw(struct kvm_vcpu *vcpu,
-			const struct sys_reg_params *p,
+			struct sys_reg_params *p,
 			const struct sys_reg_desc *r)
 {
 	if (!p->is_write)
@@ -94,21 +94,19 @@ static bool access_dcsw(struct kvm_vcpu *vcpu,
  * sys_regs and leave it in complete control of the caches.
  */
 static bool access_vm_reg(struct kvm_vcpu *vcpu,
-			  const struct sys_reg_params *p,
+			  struct sys_reg_params *p,
 			  const struct sys_reg_desc *r)
 {
-	unsigned long val;
 	bool was_enabled = vcpu_has_cache_enabled(vcpu);
 
 	BUG_ON(!p->is_write);
 
-	val = *vcpu_reg(vcpu, p->Rt);
 	if (!p->is_aarch32) {
-		vcpu_sys_reg(vcpu, r->reg) = val;
+		vcpu_sys_reg(vcpu, r->reg) = p->regval;
 	} else {
 		if (!p->is_32bit)
-			vcpu_cp15_64_high(vcpu, r->reg) = val >> 32;
-		vcpu_cp15_64_low(vcpu, r->reg) = val & 0xffffffffUL;
+			vcpu_cp15_64_high(vcpu, r->reg) = upper_32_bits(p->regval);
+		vcpu_cp15_64_low(vcpu, r->reg) = lower_32_bits(p->regval);
 	}
 
 	kvm_toggle_cache(vcpu, was_enabled);
@@ -122,22 +120,19 @@ static bool access_vm_reg(struct kvm_vcpu *vcpu,
  * for both AArch64 and AArch32 accesses.
  */
 static bool access_gic_sgi(struct kvm_vcpu *vcpu,
-			   const struct sys_reg_params *p,
+			   struct sys_reg_params *p,
 			   const struct sys_reg_desc *r)
 {
-	u64 val;
-
 	if (!p->is_write)
 		return read_from_write_only(vcpu, p);
 
-	val = *vcpu_reg(vcpu, p->Rt);
-	vgic_v3_dispatch_sgi(vcpu, val);
+	vgic_v3_dispatch_sgi(vcpu, p->regval);
 
 	return true;
 }
 
 static bool trap_raz_wi(struct kvm_vcpu *vcpu,
-			const struct sys_reg_params *p,
+			struct sys_reg_params *p,
 			const struct sys_reg_desc *r)
 {
 	if (p->is_write)
@@ -147,19 +142,19 @@ static bool trap_raz_wi(struct kvm_vcpu *vcpu,
 }
 
 static bool trap_oslsr_el1(struct kvm_vcpu *vcpu,
-			   const struct sys_reg_params *p,
+			   struct sys_reg_params *p,
 			   const struct sys_reg_desc *r)
 {
 	if (p->is_write) {
 		return ignore_write(vcpu, p);
 	} else {
-		*vcpu_reg(vcpu, p->Rt) = (1 << 3);
+		p->regval = (1 << 3);
 		return true;
 	}
 }
 
 static bool trap_dbgauthstatus_el1(struct kvm_vcpu *vcpu,
-				   const struct sys_reg_params *p,
+				   struct sys_reg_params *p,
 				   const struct sys_reg_desc *r)
 {
 	if (p->is_write) {
@@ -167,7 +162,7 @@ static bool trap_dbgauthstatus_el1(struct kvm_vcpu *vcpu,
 	} else {
 		u32 val;
 		asm volatile("mrs %0, dbgauthstatus_el1" : "=r" (val));
-		*vcpu_reg(vcpu, p->Rt) = val;
+		p->regval = val;
 		return true;
 	}
 }
@@ -200,17 +195,17 @@ static bool trap_dbgauthstatus_el1(struct kvm_vcpu *vcpu,
  *   now use the debug registers.
  */
 static bool trap_debug_regs(struct kvm_vcpu *vcpu,
-			    const struct sys_reg_params *p,
+			    struct sys_reg_params *p,
 			    const struct sys_reg_desc *r)
 {
 	if (p->is_write) {
-		vcpu_sys_reg(vcpu, r->reg) = *vcpu_reg(vcpu, p->Rt);
+		vcpu_sys_reg(vcpu, r->reg) = p->regval;
 		vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY;
 	} else {
-		*vcpu_reg(vcpu, p->Rt) = vcpu_sys_reg(vcpu, r->reg);
+		p->regval = vcpu_sys_reg(vcpu, r->reg);
 	}
 
-	trace_trap_reg(__func__, r->reg, p->is_write, *vcpu_reg(vcpu, p->Rt));
+	trace_trap_reg(__func__, r->reg, p->is_write, p->regval);
 
 	return true;
 }
@@ -225,10 +220,10 @@ static bool trap_debug_regs(struct kvm_vcpu *vcpu,
  * hyp.S code switches between host and guest values in future.
  */
 static inline void reg_to_dbg(struct kvm_vcpu *vcpu,
-			      const struct sys_reg_params *p,
+			      struct sys_reg_params *p,
 			      u64 *dbg_reg)
 {
-	u64 val = *vcpu_reg(vcpu, p->Rt);
+	u64 val = p->regval;
 
 	if (p->is_32bit) {
 		val &= 0xffffffffUL;
@@ -240,19 +235,16 @@ static inline void reg_to_dbg(struct kvm_vcpu *vcpu,
 }
 
 static inline void dbg_to_reg(struct kvm_vcpu *vcpu,
-			      const struct sys_reg_params *p,
+			      struct sys_reg_params *p,
 			      u64 *dbg_reg)
 {
-	u64 val = *dbg_reg;
-
+	p->regval = *dbg_reg;
 	if (p->is_32bit)
-		val &= 0xffffffffUL;
-
-	*vcpu_reg(vcpu, p->Rt) = val;
+		p->regval &= 0xffffffffUL;
 }
 
 static inline bool trap_bvr(struct kvm_vcpu *vcpu,
-			    const struct sys_reg_params *p,
+			    struct sys_reg_params *p,
 			    const struct sys_reg_desc *rd)
 {
 	u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg];
@@ -294,7 +286,7 @@ static inline void reset_bvr(struct kvm_vcpu *vcpu,
 }
 
 static inline bool trap_bcr(struct kvm_vcpu *vcpu,
-			    const struct sys_reg_params *p,
+			    struct sys_reg_params *p,
 			    const struct sys_reg_desc *rd)
 {
 	u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bcr[rd->reg];
@@ -337,7 +329,7 @@ static inline void reset_bcr(struct kvm_vcpu *vcpu,
 }
 
 static inline bool trap_wvr(struct kvm_vcpu *vcpu,
-			    const struct sys_reg_params *p,
+			    struct sys_reg_params *p,
 			    const struct sys_reg_desc *rd)
 {
 	u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_wvr[rd->reg];
@@ -380,7 +372,7 @@ static inline void reset_wvr(struct kvm_vcpu *vcpu,
 }
 
 static inline bool trap_wcr(struct kvm_vcpu *vcpu,
-			    const struct sys_reg_params *p,
+			    struct sys_reg_params *p,
 			    const struct sys_reg_desc *rd)
 {
 	u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_wcr[rd->reg];
@@ -687,7 +679,7 @@ static const struct sys_reg_desc sys_reg_descs[] = {
 };
 
 static bool trap_dbgidr(struct kvm_vcpu *vcpu,
-			const struct sys_reg_params *p,
+			struct sys_reg_params *p,
 			const struct sys_reg_desc *r)
 {
 	if (p->is_write) {
@@ -697,23 +689,23 @@ static bool trap_dbgidr(struct kvm_vcpu *vcpu,
 		u64 pfr = read_system_reg(SYS_ID_AA64PFR0_EL1);
 		u32 el3 = !!cpuid_feature_extract_field(pfr, ID_AA64PFR0_EL3_SHIFT);
 
-		*vcpu_reg(vcpu, p->Rt) = ((((dfr >> ID_AA64DFR0_WRPS_SHIFT) & 0xf) << 28) |
-					  (((dfr >> ID_AA64DFR0_BRPS_SHIFT) & 0xf) << 24) |
-					  (((dfr >> ID_AA64DFR0_CTX_CMPS_SHIFT) & 0xf) << 20) |
-					  (6 << 16) | (el3 << 14) | (el3 << 12));
+		p->regval = ((((dfr >> ID_AA64DFR0_WRPS_SHIFT) & 0xf) << 28) |
+			     (((dfr >> ID_AA64DFR0_BRPS_SHIFT) & 0xf) << 24) |
+			     (((dfr >> ID_AA64DFR0_CTX_CMPS_SHIFT) & 0xf) << 20)
+			     | (6 << 16) | (el3 << 14) | (el3 << 12));
 		return true;
 	}
 }
 
 static bool trap_debug32(struct kvm_vcpu *vcpu,
-			 const struct sys_reg_params *p,
+			 struct sys_reg_params *p,
 			 const struct sys_reg_desc *r)
 {
 	if (p->is_write) {
-		vcpu_cp14(vcpu, r->reg) = *vcpu_reg(vcpu, p->Rt);
+		vcpu_cp14(vcpu, r->reg) = p->regval;
 		vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY;
 	} else {
-		*vcpu_reg(vcpu, p->Rt) = vcpu_cp14(vcpu, r->reg);
+		p->regval = vcpu_cp14(vcpu, r->reg);
 	}
 
 	return true;
@@ -731,7 +723,7 @@ static bool trap_debug32(struct kvm_vcpu *vcpu,
  */
 
 static inline bool trap_xvr(struct kvm_vcpu *vcpu,
-			    const struct sys_reg_params *p,
+			    struct sys_reg_params *p,
 			    const struct sys_reg_desc *rd)
 {
 	u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg];
@@ -740,12 +732,12 @@ static inline bool trap_xvr(struct kvm_vcpu *vcpu,
 		u64 val = *dbg_reg;
 
 		val &= 0xffffffffUL;
-		val |= *vcpu_reg(vcpu, p->Rt) << 32;
+		val |= p->regval << 32;
 		*dbg_reg = val;
 
 		vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY;
 	} else {
-		*vcpu_reg(vcpu, p->Rt) = *dbg_reg >> 32;
+		p->regval = *dbg_reg >> 32;
 	}
 
 	trace_trap_reg(__func__, rd->reg, p->is_write, *dbg_reg);
@@ -991,7 +983,7 @@ int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu, struct kvm_run *run)
  * Return 0 if the access has been handled, and -1 if not.
  */
 static int emulate_cp(struct kvm_vcpu *vcpu,
-		      const struct sys_reg_params *params,
+		      struct sys_reg_params *params,
 		      const struct sys_reg_desc *table,
 		      size_t num)
 {
@@ -1062,12 +1054,12 @@ static int kvm_handle_cp_64(struct kvm_vcpu *vcpu,
 {
 	struct sys_reg_params params;
 	u32 hsr = kvm_vcpu_get_hsr(vcpu);
+	int Rt = (hsr >> 5) & 0xf;
 	int Rt2 = (hsr >> 10) & 0xf;
 
 	params.is_aarch32 = true;
 	params.is_32bit = false;
 	params.CRm = (hsr >> 1) & 0xf;
-	params.Rt = (hsr >> 5) & 0xf;
 	params.is_write = ((hsr & 1) == 0);
 
 	params.Op0 = 0;
@@ -1076,15 +1068,12 @@ static int kvm_handle_cp_64(struct kvm_vcpu *vcpu,
 	params.CRn = 0;
 
 	/*
-	 * Massive hack here. Store Rt2 in the top 32bits so we only
-	 * have one register to deal with. As we use the same trap
+	 * Make a 64-bit value out of Rt and Rt2. As we use the same trap
 	 * backends between AArch32 and AArch64, we get away with it.
 	 */
 	if (params.is_write) {
-		u64 val = *vcpu_reg(vcpu, params.Rt);
-		val &= 0xffffffff;
-		val |= *vcpu_reg(vcpu, Rt2) << 32;
-		*vcpu_reg(vcpu, params.Rt) = val;
+		params.regval = vcpu_get_reg(vcpu, Rt) & 0xffffffff;
+		params.regval |= vcpu_get_reg(vcpu, Rt2) << 32;
 	}
 
 	if (!emulate_cp(vcpu, &params, target_specific, nr_specific))
@@ -1095,11 +1084,10 @@ static int kvm_handle_cp_64(struct kvm_vcpu *vcpu,
 	unhandled_cp_access(vcpu, &params);
 
 out:
-	/* Do the opposite hack for the read side */
+	/* Split up the value between registers for the read side */
 	if (!params.is_write) {
-		u64 val = *vcpu_reg(vcpu, params.Rt);
-		val >>= 32;
-		*vcpu_reg(vcpu, Rt2) = val;
+		vcpu_set_reg(vcpu, Rt, lower_32_bits(params.regval));
+		vcpu_set_reg(vcpu, Rt2, upper_32_bits(params.regval));
 	}
 
 	return 1;
@@ -1118,21 +1106,24 @@ static int kvm_handle_cp_32(struct kvm_vcpu *vcpu,
 {
 	struct sys_reg_params params;
 	u32 hsr = kvm_vcpu_get_hsr(vcpu);
+	int Rt  = (hsr >> 5) & 0xf;
 
 	params.is_aarch32 = true;
 	params.is_32bit = true;
 	params.CRm = (hsr >> 1) & 0xf;
-	params.Rt  = (hsr >> 5) & 0xf;
+	params.regval = vcpu_get_reg(vcpu, Rt);
 	params.is_write = ((hsr & 1) == 0);
 	params.CRn = (hsr >> 10) & 0xf;
 	params.Op0 = 0;
 	params.Op1 = (hsr >> 14) & 0x7;
 	params.Op2 = (hsr >> 17) & 0x7;
 
-	if (!emulate_cp(vcpu, &params, target_specific, nr_specific))
-		return 1;
-	if (!emulate_cp(vcpu, &params, global, nr_global))
+	if (!emulate_cp(vcpu, &params, target_specific, nr_specific) ||
+	    !emulate_cp(vcpu, &params, global, nr_global)) {
+		if (!params.is_write)
+			vcpu_set_reg(vcpu, Rt, params.regval);
 		return 1;
+	}
 
 	unhandled_cp_access(vcpu, &params);
 	return 1;
@@ -1175,7 +1166,7 @@ int kvm_handle_cp14_32(struct kvm_vcpu *vcpu, struct kvm_run *run)
 }
 
 static int emulate_sys_reg(struct kvm_vcpu *vcpu,
-			   const struct sys_reg_params *params)
+			   struct sys_reg_params *params)
 {
 	size_t num;
 	const struct sys_reg_desc *table, *r;
@@ -1230,6 +1221,8 @@ int kvm_handle_sys_reg(struct kvm_vcpu *vcpu, struct kvm_run *run)
 {
 	struct sys_reg_params params;
 	unsigned long esr = kvm_vcpu_get_hsr(vcpu);
+	int Rt = (esr >> 5) & 0x1f;
+	int ret;
 
 	trace_kvm_handle_sys_reg(esr);
 
@@ -1240,10 +1233,14 @@ int kvm_handle_sys_reg(struct kvm_vcpu *vcpu, struct kvm_run *run)
 	params.CRn = (esr >> 10) & 0xf;
 	params.CRm = (esr >> 1) & 0xf;
 	params.Op2 = (esr >> 17) & 0x7;
-	params.Rt = (esr >> 5) & 0x1f;
+	params.regval = vcpu_get_reg(vcpu, Rt);
 	params.is_write = !(esr & 1);
 
-	return emulate_sys_reg(vcpu, &params);
+	ret = emulate_sys_reg(vcpu, &params);
+
+	if (!params.is_write)
+		vcpu_set_reg(vcpu, Rt, params.regval);
+	return ret;
 }
 
 /******************************************************************************
diff --git a/arch/arm64/kvm/sys_regs.h b/arch/arm64/kvm/sys_regs.h
index eaa324e4db4d..dbbb01cfbee9 100644
--- a/arch/arm64/kvm/sys_regs.h
+++ b/arch/arm64/kvm/sys_regs.h
@@ -28,7 +28,7 @@ struct sys_reg_params {
 	u8	CRn;
 	u8	CRm;
 	u8	Op2;
-	u8	Rt;
+	u64	regval;
 	bool	is_write;
 	bool	is_aarch32;
 	bool	is_32bit;	/* Only valid if is_aarch32 is true */
@@ -44,7 +44,7 @@ struct sys_reg_desc {
 
 	/* Trapped access from guest, if non-NULL. */
 	bool (*access)(struct kvm_vcpu *,
-		       const struct sys_reg_params *,
+		       struct sys_reg_params *,
 		       const struct sys_reg_desc *);
 
 	/* Initialization for vcpu. */
@@ -77,9 +77,9 @@ static inline bool ignore_write(struct kvm_vcpu *vcpu,
 }
 
 static inline bool read_zero(struct kvm_vcpu *vcpu,
-			     const struct sys_reg_params *p)
+			     struct sys_reg_params *p)
 {
-	*vcpu_reg(vcpu, p->Rt) = 0;
+	p->regval = 0;
 	return true;
 }
 
diff --git a/arch/arm64/kvm/sys_regs_generic_v8.c b/arch/arm64/kvm/sys_regs_generic_v8.c
index 1e4576824165..ed90578fa120 100644
--- a/arch/arm64/kvm/sys_regs_generic_v8.c
+++ b/arch/arm64/kvm/sys_regs_generic_v8.c
@@ -31,13 +31,13 @@
 #include "sys_regs.h"
 
 static bool access_actlr(struct kvm_vcpu *vcpu,
-			 const struct sys_reg_params *p,
+			 struct sys_reg_params *p,
 			 const struct sys_reg_desc *r)
 {
 	if (p->is_write)
 		return ignore_write(vcpu, p);
 
-	*vcpu_reg(vcpu, p->Rt) = vcpu_sys_reg(vcpu, ACTLR_EL1);
+	p->regval = vcpu_sys_reg(vcpu, ACTLR_EL1);
 	return true;
 }
 
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index d6a53ef2350b..b162ad70effc 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -139,6 +139,12 @@ static inline int epilogue_offset(const struct jit_ctx *ctx)
 /* Stack must be multiples of 16B */
 #define STACK_ALIGN(sz) (((sz) + 15) & ~15)
 
+#define _STACK_SIZE \
+	(MAX_BPF_STACK \
+	 + 4 /* extra for skb_copy_bits buffer */)
+
+#define STACK_SIZE STACK_ALIGN(_STACK_SIZE)
+
 static void build_prologue(struct jit_ctx *ctx)
 {
 	const u8 r6 = bpf2a64[BPF_REG_6];
@@ -150,10 +156,6 @@ static void build_prologue(struct jit_ctx *ctx)
 	const u8 rx = bpf2a64[BPF_REG_X];
 	const u8 tmp1 = bpf2a64[TMP_REG_1];
 	const u8 tmp2 = bpf2a64[TMP_REG_2];
-	int stack_size = MAX_BPF_STACK;
-
-	stack_size += 4; /* extra for skb_copy_bits buffer */
-	stack_size = STACK_ALIGN(stack_size);
 
 	/*
 	 * BPF prog stack layout
@@ -165,12 +167,13 @@ static void build_prologue(struct jit_ctx *ctx)
 	 *                        | ... | callee saved registers
 	 *                        +-----+
 	 *                        |     | x25/x26
-	 * BPF fp register => -80:+-----+
+	 * BPF fp register => -80:+-----+ <= (BPF_FP)
 	 *                        |     |
 	 *                        | ... | BPF prog stack
 	 *                        |     |
-	 *                        |     |
-	 * current A64_SP =>      +-----+
+	 *                        +-----+ <= (BPF_FP - MAX_BPF_STACK)
+	 *                        |RSVD | JIT scratchpad
+	 * current A64_SP =>      +-----+ <= (BPF_FP - STACK_SIZE)
 	 *                        |     |
 	 *                        | ... | Function call stack
 	 *                        |     |
@@ -196,7 +199,7 @@ static void build_prologue(struct jit_ctx *ctx)
 	emit(A64_MOV(1, fp, A64_SP), ctx);
 
 	/* Set up function call stack */
-	emit(A64_SUB_I(1, A64_SP, A64_SP, stack_size), ctx);
+	emit(A64_SUB_I(1, A64_SP, A64_SP, STACK_SIZE), ctx);
 
 	/* Clear registers A and X */
 	emit_a64_mov_i64(ra, 0, ctx);
@@ -213,13 +216,9 @@ static void build_epilogue(struct jit_ctx *ctx)
 	const u8 fp = bpf2a64[BPF_REG_FP];
 	const u8 tmp1 = bpf2a64[TMP_REG_1];
 	const u8 tmp2 = bpf2a64[TMP_REG_2];
-	int stack_size = MAX_BPF_STACK;
-
-	stack_size += 4; /* extra for skb_copy_bits buffer */
-	stack_size = STACK_ALIGN(stack_size);
 
 	/* We're done with BPF stack */
-	emit(A64_ADD_I(1, A64_SP, A64_SP, stack_size), ctx);
+	emit(A64_ADD_I(1, A64_SP, A64_SP, STACK_SIZE), ctx);
 
 	/* Restore fs (x25) and x26 */
 	emit(A64_POP(fp, A64_R(26), A64_SP), ctx);
@@ -591,7 +590,25 @@ emit_cond_jmp:
 	case BPF_ST | BPF_MEM | BPF_H:
 	case BPF_ST | BPF_MEM | BPF_B:
 	case BPF_ST | BPF_MEM | BPF_DW:
-		goto notyet;
+		/* Load imm to a register then store it */
+		ctx->tmp_used = 1;
+		emit_a64_mov_i(1, tmp2, off, ctx);
+		emit_a64_mov_i(1, tmp, imm, ctx);
+		switch (BPF_SIZE(code)) {
+		case BPF_W:
+			emit(A64_STR32(tmp, dst, tmp2), ctx);
+			break;
+		case BPF_H:
+			emit(A64_STRH(tmp, dst, tmp2), ctx);
+			break;
+		case BPF_B:
+			emit(A64_STRB(tmp, dst, tmp2), ctx);
+			break;
+		case BPF_DW:
+			emit(A64_STR64(tmp, dst, tmp2), ctx);
+			break;
+		}
+		break;
 
 	/* STX: *(size *)(dst + off) = src */
 	case BPF_STX | BPF_MEM | BPF_W:
@@ -658,7 +675,7 @@ emit_cond_jmp:
 			return -EINVAL;
 		}
 		emit_a64_mov_i64(r3, size, ctx);
-		emit(A64_ADD_I(1, r4, fp, MAX_BPF_STACK), ctx);
+		emit(A64_SUB_I(1, r4, fp, STACK_SIZE), ctx);
 		emit_a64_mov_i64(r5, (unsigned long)bpf_load_pointer, ctx);
 		emit(A64_PUSH(A64_FP, A64_LR, A64_SP), ctx);
 		emit(A64_MOV(1, A64_FP, A64_SP), ctx);
diff --git a/arch/mn10300/Kconfig b/arch/mn10300/Kconfig
index 4434b54e1d87..78ae5552fdb8 100644
--- a/arch/mn10300/Kconfig
+++ b/arch/mn10300/Kconfig
@@ -1,6 +1,7 @@
 config MN10300
 	def_bool y
 	select HAVE_OPROFILE
+	select HAVE_UID16
 	select GENERIC_IRQ_SHOW
 	select ARCH_WANT_IPC_PARSE_VERSION
 	select HAVE_ARCH_TRACEHOOK
@@ -37,9 +38,6 @@ config HIGHMEM
 config NUMA
 	def_bool n
 
-config UID16
-	def_bool y
-
 config RWSEM_GENERIC_SPINLOCK
 	def_bool y
 
diff --git a/arch/x86/boot/boot.h b/arch/x86/boot/boot.h
index 0033e96c3f09..9011a88353de 100644
--- a/arch/x86/boot/boot.h
+++ b/arch/x86/boot/boot.h
@@ -23,7 +23,6 @@
 #include <stdarg.h>
 #include <linux/types.h>
 #include <linux/edd.h>
-#include <asm/boot.h>
 #include <asm/setup.h>
 #include "bitops.h"
 #include "ctype.h"
diff --git a/arch/x86/boot/video-mode.c b/arch/x86/boot/video-mode.c
index aa8a96b052e3..95c7a818c0ed 100644
--- a/arch/x86/boot/video-mode.c
+++ b/arch/x86/boot/video-mode.c
@@ -19,6 +19,8 @@
 #include "video.h"
 #include "vesa.h"
 
+#include <uapi/asm/boot.h>
+
 /*
  * Common variables
  */
diff --git a/arch/x86/boot/video.c b/arch/x86/boot/video.c
index 05111bb8d018..77780e386e9b 100644
--- a/arch/x86/boot/video.c
+++ b/arch/x86/boot/video.c
@@ -13,6 +13,8 @@
  * Select video mode
  */
 
+#include <uapi/asm/boot.h>
+
 #include "boot.h"
 #include "video.h"
 #include "vesa.h"
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 53616ca03244..a55697d19824 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -509,6 +509,17 @@ END(irq_entries_start)
 	 * tracking that we're in kernel mode.
 	 */
 	SWAPGS
+
+	/*
+	 * We need to tell lockdep that IRQs are off.  We can't do this until
+	 * we fix gsbase, and we should do it before enter_from_user_mode
+	 * (which can take locks).  Since TRACE_IRQS_OFF idempotent,
+	 * the simplest way to handle it is to just call it twice if
+	 * we enter from user mode.  There's no reason to optimize this since
+	 * TRACE_IRQS_OFF is a no-op if lockdep is off.
+	 */
+	TRACE_IRQS_OFF
+
 #ifdef CONFIG_CONTEXT_TRACKING
 	call enter_from_user_mode
 #endif
@@ -1049,12 +1060,18 @@ ENTRY(error_entry)
 	SWAPGS
 
 .Lerror_entry_from_usermode_after_swapgs:
+	/*
+	 * We need to tell lockdep that IRQs are off.  We can't do this until
+	 * we fix gsbase, and we should do it before enter_from_user_mode
+	 * (which can take locks).
+	 */
+	TRACE_IRQS_OFF
 #ifdef CONFIG_CONTEXT_TRACKING
 	call enter_from_user_mode
 #endif
+	ret
 
 .Lerror_entry_done:
-
 	TRACE_IRQS_OFF
 	ret
 
diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h
index c5b7fb2774d0..cc071c6f7d4d 100644
--- a/arch/x86/include/asm/page_types.h
+++ b/arch/x86/include/asm/page_types.h
@@ -9,19 +9,21 @@
 #define PAGE_SIZE	(_AC(1,UL) << PAGE_SHIFT)
 #define PAGE_MASK	(~(PAGE_SIZE-1))
 
+#define PMD_PAGE_SIZE		(_AC(1, UL) << PMD_SHIFT)
+#define PMD_PAGE_MASK		(~(PMD_PAGE_SIZE-1))
+
+#define PUD_PAGE_SIZE		(_AC(1, UL) << PUD_SHIFT)
+#define PUD_PAGE_MASK		(~(PUD_PAGE_SIZE-1))
+
 #define __PHYSICAL_MASK		((phys_addr_t)((1ULL << __PHYSICAL_MASK_SHIFT) - 1))
 #define __VIRTUAL_MASK		((1UL << __VIRTUAL_MASK_SHIFT) - 1)
 
-/* Cast PAGE_MASK to a signed type so that it is sign-extended if
+/* Cast *PAGE_MASK to a signed type so that it is sign-extended if
    virtual addresses are 32-bits but physical addresses are larger
    (ie, 32-bit PAE). */
 #define PHYSICAL_PAGE_MASK	(((signed long)PAGE_MASK) & __PHYSICAL_MASK)
-
-#define PMD_PAGE_SIZE		(_AC(1, UL) << PMD_SHIFT)
-#define PMD_PAGE_MASK		(~(PMD_PAGE_SIZE-1))
-
-#define PUD_PAGE_SIZE		(_AC(1, UL) << PUD_SHIFT)
-#define PUD_PAGE_MASK		(~(PUD_PAGE_SIZE-1))
+#define PHYSICAL_PMD_PAGE_MASK	(((signed long)PMD_PAGE_MASK) & __PHYSICAL_MASK)
+#define PHYSICAL_PUD_PAGE_MASK	(((signed long)PUD_PAGE_MASK) & __PHYSICAL_MASK)
 
 #define HPAGE_SHIFT		PMD_SHIFT
 #define HPAGE_SIZE		(_AC(1,UL) << HPAGE_SHIFT)
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index dd5b0aa9dd2f..a471cadb9630 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -279,17 +279,14 @@ static inline pmdval_t native_pmd_val(pmd_t pmd)
 static inline pudval_t pud_pfn_mask(pud_t pud)
 {
 	if (native_pud_val(pud) & _PAGE_PSE)
-		return PUD_PAGE_MASK & PHYSICAL_PAGE_MASK;
+		return PHYSICAL_PUD_PAGE_MASK;
 	else
 		return PTE_PFN_MASK;
 }
 
 static inline pudval_t pud_flags_mask(pud_t pud)
 {
-	if (native_pud_val(pud) & _PAGE_PSE)
-		return ~(PUD_PAGE_MASK & (pudval_t)PHYSICAL_PAGE_MASK);
-	else
-		return ~PTE_PFN_MASK;
+	return ~pud_pfn_mask(pud);
 }
 
 static inline pudval_t pud_flags(pud_t pud)
@@ -300,17 +297,14 @@ static inline pudval_t pud_flags(pud_t pud)
 static inline pmdval_t pmd_pfn_mask(pmd_t pmd)
 {
 	if (native_pmd_val(pmd) & _PAGE_PSE)
-		return PMD_PAGE_MASK & PHYSICAL_PAGE_MASK;
+		return PHYSICAL_PMD_PAGE_MASK;
 	else
 		return PTE_PFN_MASK;
 }
 
 static inline pmdval_t pmd_flags_mask(pmd_t pmd)
 {
-	if (native_pmd_val(pmd) & _PAGE_PSE)
-		return ~(PMD_PAGE_MASK & (pmdval_t)PHYSICAL_PAGE_MASK);
-	else
-		return ~PTE_PFN_MASK;
+	return ~pmd_pfn_mask(pmd);
 }
 
 static inline pmdval_t pmd_flags(pmd_t pmd)
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index 48d34d28f5a6..cd0fc0cc78bc 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -1,7 +1,6 @@
 #ifndef _ASM_X86_PLATFORM_H
 #define _ASM_X86_PLATFORM_H
 
-#include <asm/pgtable_types.h>
 #include <asm/bootparam.h>
 
 struct mpc_bus;
diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
index 7fc27f1cca58..b3e94ef461fd 100644
--- a/arch/x86/kernel/cpu/microcode/core.c
+++ b/arch/x86/kernel/cpu/microcode/core.c
@@ -698,3 +698,4 @@ int __init microcode_init(void)
 	return error;
 
 }
+late_initcall(microcode_init);
diff --git a/arch/x86/kernel/pmem.c b/arch/x86/kernel/pmem.c
index 4f00b63d7ff3..14415aff1813 100644
--- a/arch/x86/kernel/pmem.c
+++ b/arch/x86/kernel/pmem.c
@@ -4,10 +4,22 @@
  */
 #include <linux/platform_device.h>
 #include <linux/module.h>
+#include <linux/ioport.h>
+
+static int found(u64 start, u64 end, void *data)
+{
+	return 1;
+}
 
 static __init int register_e820_pmem(void)
 {
+	char *pmem = "Persistent Memory (legacy)";
 	struct platform_device *pdev;
+	int rc;
+
+	rc = walk_iomem_res(pmem, IORESOURCE_MEM, 0, -1, NULL, found);
+	if (rc <= 0)
+		return 0;
 
 	/*
 	 * See drivers/nvdimm/e820.c for the implementation, this is
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 29db25f9a745..d2bbe343fda7 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -1250,8 +1250,6 @@ void __init setup_arch(char **cmdline_p)
 	if (efi_enabled(EFI_BOOT))
 		efi_apply_memmap_quirks();
 #endif
-
-	microcode_init();
 }
 
 #ifdef CONFIG_X86_32
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index b7ffb7c00075..cb6282c3638f 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -690,12 +690,15 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs)
 	signal_setup_done(failed, ksig, stepping);
 }
 
-#ifdef CONFIG_X86_32
-#define NR_restart_syscall	__NR_restart_syscall
-#else /* !CONFIG_X86_32 */
-#define NR_restart_syscall	\
-	test_thread_flag(TIF_IA32) ? __NR_ia32_restart_syscall : __NR_restart_syscall
-#endif /* CONFIG_X86_32 */
+static inline unsigned long get_nr_restart_syscall(const struct pt_regs *regs)
+{
+#if defined(CONFIG_X86_32) || !defined(CONFIG_X86_64)
+	return __NR_restart_syscall;
+#else /* !CONFIG_X86_32 && CONFIG_X86_64 */
+	return test_thread_flag(TIF_IA32) ? __NR_ia32_restart_syscall :
+		__NR_restart_syscall | (regs->orig_ax & __X32_SYSCALL_BIT);
+#endif /* CONFIG_X86_32 || !CONFIG_X86_64 */
+}
 
 /*
  * Note that 'init' is a special process: it doesn't get signals it doesn't
@@ -724,7 +727,7 @@ void do_signal(struct pt_regs *regs)
 			break;
 
 		case -ERESTART_RESTARTBLOCK:
-			regs->ax = NR_restart_syscall;
+			regs->ax = get_nr_restart_syscall(regs);
 			regs->ip -= 2;
 			break;
 		}
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 892ee2e5ecbc..fbabe4fcc7fb 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -509,7 +509,7 @@ void __inquire_remote_apic(int apicid)
  */
 #define UDELAY_10MS_DEFAULT 10000
 
-static unsigned int init_udelay = INT_MAX;
+static unsigned int init_udelay = UINT_MAX;
 
 static int __init cpu_init_udelay(char *str)
 {
@@ -522,14 +522,15 @@ early_param("cpu_init_udelay", cpu_init_udelay);
 static void __init smp_quirk_init_udelay(void)
 {
 	/* if cmdline changed it from default, leave it alone */
-	if (init_udelay != INT_MAX)
+	if (init_udelay != UINT_MAX)
 		return;
 
 	/* if modern processor, use no delay */
 	if (((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && (boot_cpu_data.x86 == 6)) ||
-	    ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) && (boot_cpu_data.x86 >= 0xF)))
+	    ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) && (boot_cpu_data.x86 >= 0xF))) {
 		init_udelay = 0;
-
+		return;
+	}
 	/* else, use legacy delay */
 	init_udelay = UDELAY_10MS_DEFAULT;
 }
diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c
index 1202d5ca2fb5..b2fd67da1701 100644
--- a/arch/x86/mm/mpx.c
+++ b/arch/x86/mm/mpx.c
@@ -101,19 +101,19 @@ static int get_reg_offset(struct insn *insn, struct pt_regs *regs,
 	switch (type) {
 	case REG_TYPE_RM:
 		regno = X86_MODRM_RM(insn->modrm.value);
-		if (X86_REX_B(insn->rex_prefix.value) == 1)
+		if (X86_REX_B(insn->rex_prefix.value))
 			regno += 8;
 		break;
 
 	case REG_TYPE_INDEX:
 		regno = X86_SIB_INDEX(insn->sib.value);
-		if (X86_REX_X(insn->rex_prefix.value) == 1)
+		if (X86_REX_X(insn->rex_prefix.value))
 			regno += 8;
 		break;
 
 	case REG_TYPE_BASE:
 		regno = X86_SIB_BASE(insn->sib.value);
-		if (X86_REX_B(insn->rex_prefix.value) == 1)
+		if (X86_REX_B(insn->rex_prefix.value))
 			regno += 8;
 		break;
 
diff --git a/arch/x86/pci/bus_numa.c b/arch/x86/pci/bus_numa.c
index 7bcf06a7cd12..6eb3c8af96e2 100644
--- a/arch/x86/pci/bus_numa.c
+++ b/arch/x86/pci/bus_numa.c
@@ -50,18 +50,9 @@ void x86_pci_root_bus_resources(int bus, struct list_head *resources)
 	if (!found)
 		pci_add_resource(resources, &info->busn);
 
-	list_for_each_entry(root_res, &info->resources, list) {
-		struct resource *res;
-		struct resource *root;
+	list_for_each_entry(root_res, &info->resources, list)
+		pci_add_resource(resources, &root_res->res);
 
-		res = &root_res->res;
-		pci_add_resource(resources, res);
-		if (res->flags & IORESOURCE_IO)
-			root = &ioport_resource;
-		else
-			root = &iomem_resource;
-		insert_resource(root, res);
-	}
 	return;
 
 default_resources:
diff --git a/block/blk-core.c b/block/blk-core.c
index 5131993b23a1..a0af4043dda2 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -2114,7 +2114,8 @@ blk_qc_t submit_bio(int rw, struct bio *bio)
 EXPORT_SYMBOL(submit_bio);
 
 /**
- * blk_rq_check_limits - Helper function to check a request for the queue limit
+ * blk_cloned_rq_check_limits - Helper function to check a cloned request
+ *                              for new the queue limits
  * @q:  the queue
  * @rq: the request being checked
  *
@@ -2125,20 +2126,13 @@ EXPORT_SYMBOL(submit_bio);
  *    after it is inserted to @q, it should be checked against @q before
  *    the insertion using this generic function.
  *
- *    This function should also be useful for request stacking drivers
- *    in some cases below, so export this function.
  *    Request stacking drivers like request-based dm may change the queue
- *    limits while requests are in the queue (e.g. dm's table swapping).
- *    Such request stacking drivers should check those requests against
- *    the new queue limits again when they dispatch those requests,
- *    although such checkings are also done against the old queue limits
- *    when submitting requests.
+ *    limits when retrying requests on other queues. Those requests need
+ *    to be checked against the new queue limits again during dispatch.
  */
-int blk_rq_check_limits(struct request_queue *q, struct request *rq)
+static int blk_cloned_rq_check_limits(struct request_queue *q,
+				      struct request *rq)
 {
-	if (!rq_mergeable(rq))
-		return 0;
-
 	if (blk_rq_sectors(rq) > blk_queue_get_max_sectors(q, rq->cmd_flags)) {
 		printk(KERN_ERR "%s: over max size limit.\n", __func__);
 		return -EIO;
@@ -2158,7 +2152,6 @@ int blk_rq_check_limits(struct request_queue *q, struct request *rq)
 
 	return 0;
 }
-EXPORT_SYMBOL_GPL(blk_rq_check_limits);
 
 /**
  * blk_insert_cloned_request - Helper for stacking drivers to submit a request
@@ -2170,7 +2163,7 @@ int blk_insert_cloned_request(struct request_queue *q, struct request *rq)
 	unsigned long flags;
 	int where = ELEVATOR_INSERT_BACK;
 
-	if (blk_rq_check_limits(q, rq))
+	if (blk_cloned_rq_check_limits(q, rq))
 		return -EIO;
 
 	if (rq->rq_disk &&
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 41a55ba0d78e..e01405a3e8b3 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -103,6 +103,9 @@ static struct bio *blk_bio_segment_split(struct request_queue *q,
 			bvprv = bv;
 			bvprvp = &bvprv;
 			sectors += bv.bv_len >> 9;
+
+			if (nsegs == 1 && seg_size > front_seg_size)
+				front_seg_size = seg_size;
 			continue;
 		}
 new_segment:
diff --git a/block/blk-settings.c b/block/blk-settings.c
index 7d8f129a1516..dd4973583978 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -91,7 +91,8 @@ void blk_set_default_limits(struct queue_limits *lim)
 	lim->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK;
 	lim->virt_boundary_mask = 0;
 	lim->max_segment_size = BLK_MAX_SEGMENT_SIZE;
-	lim->max_sectors = lim->max_hw_sectors = BLK_SAFE_MAX_SECTORS;
+	lim->max_sectors = lim->max_dev_sectors = lim->max_hw_sectors =
+		BLK_SAFE_MAX_SECTORS;
 	lim->chunk_sectors = 0;
 	lim->max_write_same_sectors = 0;
 	lim->max_discard_sectors = 0;
@@ -127,6 +128,7 @@ void blk_set_stacking_limits(struct queue_limits *lim)
 	lim->max_hw_sectors = UINT_MAX;
 	lim->max_segment_size = UINT_MAX;
 	lim->max_sectors = UINT_MAX;
+	lim->max_dev_sectors = UINT_MAX;
 	lim->max_write_same_sectors = UINT_MAX;
 }
 EXPORT_SYMBOL(blk_set_stacking_limits);
@@ -214,8 +216,8 @@ void blk_queue_bounce_limit(struct request_queue *q, u64 max_addr)
 EXPORT_SYMBOL(blk_queue_bounce_limit);
 
 /**
- * blk_limits_max_hw_sectors - set hard and soft limit of max sectors for request
- * @limits: the queue limits
+ * blk_queue_max_hw_sectors - set max sectors for a request for this queue
+ * @q:  the request queue for the device
  * @max_hw_sectors:  max hardware sectors in the usual 512b unit
  *
  * Description:
@@ -224,13 +226,19 @@ EXPORT_SYMBOL(blk_queue_bounce_limit);
  *    the device driver based upon the capabilities of the I/O
  *    controller.
  *
+ *    max_dev_sectors is a hard limit imposed by the storage device for
+ *    READ/WRITE requests. It is set by the disk driver.
+ *
  *    max_sectors is a soft limit imposed by the block layer for
  *    filesystem type requests.  This value can be overridden on a
  *    per-device basis in /sys/block/<device>/queue/max_sectors_kb.
  *    The soft limit can not exceed max_hw_sectors.
  **/
-void blk_limits_max_hw_sectors(struct queue_limits *limits, unsigned int max_hw_sectors)
+void blk_queue_max_hw_sectors(struct request_queue *q, unsigned int max_hw_sectors)
 {
+	struct queue_limits *limits = &q->limits;
+	unsigned int max_sectors;
+
 	if ((max_hw_sectors << 9) < PAGE_CACHE_SIZE) {
 		max_hw_sectors = 1 << (PAGE_CACHE_SHIFT - 9);
 		printk(KERN_INFO "%s: set to minimum %d\n",
@@ -238,22 +246,9 @@ void blk_limits_max_hw_sectors(struct queue_limits *limits, unsigned int max_hw_
 	}
 
 	limits->max_hw_sectors = max_hw_sectors;
-	limits->max_sectors = min_t(unsigned int, max_hw_sectors,
-				    BLK_DEF_MAX_SECTORS);
-}
-EXPORT_SYMBOL(blk_limits_max_hw_sectors);
-
-/**
- * blk_queue_max_hw_sectors - set max sectors for a request for this queue
- * @q:  the request queue for the device
- * @max_hw_sectors:  max hardware sectors in the usual 512b unit
- *
- * Description:
- *    See description for blk_limits_max_hw_sectors().
- **/
-void blk_queue_max_hw_sectors(struct request_queue *q, unsigned int max_hw_sectors)
-{
-	blk_limits_max_hw_sectors(&q->limits, max_hw_sectors);
+	max_sectors = min_not_zero(max_hw_sectors, limits->max_dev_sectors);
+	max_sectors = min_t(unsigned int, max_sectors, BLK_DEF_MAX_SECTORS);
+	limits->max_sectors = max_sectors;
 }
 EXPORT_SYMBOL(blk_queue_max_hw_sectors);
 
@@ -527,6 +522,7 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
 
 	t->max_sectors = min_not_zero(t->max_sectors, b->max_sectors);
 	t->max_hw_sectors = min_not_zero(t->max_hw_sectors, b->max_hw_sectors);
+	t->max_dev_sectors = min_not_zero(t->max_dev_sectors, b->max_dev_sectors);
 	t->max_write_same_sectors = min(t->max_write_same_sectors,
 					b->max_write_same_sectors);
 	t->bounce_pfn = min_not_zero(t->bounce_pfn, b->bounce_pfn);
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 565b8dac5782..e140cc487ce1 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -205,6 +205,9 @@ queue_max_sectors_store(struct request_queue *q, const char *page, size_t count)
 	if (ret < 0)
 		return ret;
 
+	max_hw_sectors_kb = min_not_zero(max_hw_sectors_kb, (unsigned long)
+					 q->limits.max_dev_sectors >> 1);
+
 	if (max_sectors_kb > max_hw_sectors_kb || max_sectors_kb < page_kb)
 		return -EINVAL;
 
diff --git a/block/partition-generic.c b/block/partition-generic.c
index 3b030157ec85..746935a5973c 100644
--- a/block/partition-generic.c
+++ b/block/partition-generic.c
@@ -397,7 +397,7 @@ static int drop_partitions(struct gendisk *disk, struct block_device *bdev)
 	struct hd_struct *part;
 	int res;
 
-	if (bdev->bd_part_count)
+	if (bdev->bd_part_count || bdev->bd_super)
 		return -EBUSY;
 	res = invalidate_partition(disk, 0);
 	if (res)
diff --git a/crypto/algif_aead.c b/crypto/algif_aead.c
index 0aa6fdfb448a..6d4d4569447e 100644
--- a/crypto/algif_aead.c
+++ b/crypto/algif_aead.c
@@ -125,7 +125,7 @@ static int aead_wait_for_data(struct sock *sk, unsigned flags)
 	if (flags & MSG_DONTWAIT)
 		return -EAGAIN;
 
-	set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
+	sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
 
 	for (;;) {
 		if (signal_pending(current))
@@ -139,7 +139,7 @@ static int aead_wait_for_data(struct sock *sk, unsigned flags)
 	}
 	finish_wait(sk_sleep(sk), &wait);
 
-	clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
+	sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
 
 	return err;
 }
diff --git a/crypto/algif_skcipher.c b/crypto/algif_skcipher.c
index af31a0ee4057..ca9efe17db1a 100644
--- a/crypto/algif_skcipher.c
+++ b/crypto/algif_skcipher.c
@@ -212,7 +212,7 @@ static int skcipher_wait_for_wmem(struct sock *sk, unsigned flags)
 	if (flags & MSG_DONTWAIT)
 		return -EAGAIN;
 
-	set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+	sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
 
 	for (;;) {
 		if (signal_pending(current))
@@ -258,7 +258,7 @@ static int skcipher_wait_for_data(struct sock *sk, unsigned flags)
 		return -EAGAIN;
 	}
 
-	set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
+	sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
 
 	for (;;) {
 		if (signal_pending(current))
@@ -272,7 +272,7 @@ static int skcipher_wait_for_data(struct sock *sk, unsigned flags)
 	}
 	finish_wait(sk_sleep(sk), &wait);
 
-	clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
+	sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
 
 	return err;
 }
diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig
index 25dbb76c02cc..5eef4cb4f70e 100644
--- a/drivers/acpi/Kconfig
+++ b/drivers/acpi/Kconfig
@@ -58,10 +58,10 @@ config ACPI_CCA_REQUIRED
 	bool
 
 config ACPI_DEBUGGER
-	bool "In-kernel debugger (EXPERIMENTAL)"
+	bool "AML debugger interface (EXPERIMENTAL)"
 	select ACPI_DEBUG
 	help
-	  Enable in-kernel debugging facilities: statistics, internal
+	  Enable in-kernel debugging of AML facilities: statistics, internal
 	  object dump, single step control method execution.
 	  This is still under development, currently enabling this only
 	  results in the compilation of the ACPICA debugger files.
diff --git a/drivers/acpi/nfit.c b/drivers/acpi/nfit.c
index f7dab53b352a..e7ed39bab97d 100644
--- a/drivers/acpi/nfit.c
+++ b/drivers/acpi/nfit.c
@@ -233,11 +233,12 @@ static bool add_spa(struct acpi_nfit_desc *acpi_desc,
 		struct nfit_table_prev *prev,
 		struct acpi_nfit_system_address *spa)
 {
+	size_t length = min_t(size_t, sizeof(*spa), spa->header.length);
 	struct device *dev = acpi_desc->dev;
 	struct nfit_spa *nfit_spa;
 
 	list_for_each_entry(nfit_spa, &prev->spas, list) {
-		if (memcmp(nfit_spa->spa, spa, sizeof(*spa)) == 0) {
+		if (memcmp(nfit_spa->spa, spa, length) == 0) {
 			list_move_tail(&nfit_spa->list, &acpi_desc->spas);
 			return true;
 		}
@@ -259,11 +260,12 @@ static bool add_memdev(struct acpi_nfit_desc *acpi_desc,
 		struct nfit_table_prev *prev,
 		struct acpi_nfit_memory_map *memdev)
 {
+	size_t length = min_t(size_t, sizeof(*memdev), memdev->header.length);
 	struct device *dev = acpi_desc->dev;
 	struct nfit_memdev *nfit_memdev;
 
 	list_for_each_entry(nfit_memdev, &prev->memdevs, list)
-		if (memcmp(nfit_memdev->memdev, memdev, sizeof(*memdev)) == 0) {
+		if (memcmp(nfit_memdev->memdev, memdev, length) == 0) {
 			list_move_tail(&nfit_memdev->list, &acpi_desc->memdevs);
 			return true;
 		}
@@ -284,11 +286,12 @@ static bool add_dcr(struct acpi_nfit_desc *acpi_desc,
 		struct nfit_table_prev *prev,
 		struct acpi_nfit_control_region *dcr)
 {
+	size_t length = min_t(size_t, sizeof(*dcr), dcr->header.length);
 	struct device *dev = acpi_desc->dev;
 	struct nfit_dcr *nfit_dcr;
 
 	list_for_each_entry(nfit_dcr, &prev->dcrs, list)
-		if (memcmp(nfit_dcr->dcr, dcr, sizeof(*dcr)) == 0) {
+		if (memcmp(nfit_dcr->dcr, dcr, length) == 0) {
 			list_move_tail(&nfit_dcr->list, &acpi_desc->dcrs);
 			return true;
 		}
@@ -308,11 +311,12 @@ static bool add_bdw(struct acpi_nfit_desc *acpi_desc,
 		struct nfit_table_prev *prev,
 		struct acpi_nfit_data_region *bdw)
 {
+	size_t length = min_t(size_t, sizeof(*bdw), bdw->header.length);
 	struct device *dev = acpi_desc->dev;
 	struct nfit_bdw *nfit_bdw;
 
 	list_for_each_entry(nfit_bdw, &prev->bdws, list)
-		if (memcmp(nfit_bdw->bdw, bdw, sizeof(*bdw)) == 0) {
+		if (memcmp(nfit_bdw->bdw, bdw, length) == 0) {
 			list_move_tail(&nfit_bdw->list, &acpi_desc->bdws);
 			return true;
 		}
@@ -332,11 +336,12 @@ static bool add_idt(struct acpi_nfit_desc *acpi_desc,
 		struct nfit_table_prev *prev,
 		struct acpi_nfit_interleave *idt)
 {
+	size_t length = min_t(size_t, sizeof(*idt), idt->header.length);
 	struct device *dev = acpi_desc->dev;
 	struct nfit_idt *nfit_idt;
 
 	list_for_each_entry(nfit_idt, &prev->idts, list)
-		if (memcmp(nfit_idt->idt, idt, sizeof(*idt)) == 0) {
+		if (memcmp(nfit_idt->idt, idt, length) == 0) {
 			list_move_tail(&nfit_idt->list, &acpi_desc->idts);
 			return true;
 		}
@@ -356,11 +361,12 @@ static bool add_flush(struct acpi_nfit_desc *acpi_desc,
 		struct nfit_table_prev *prev,
 		struct acpi_nfit_flush_address *flush)
 {
+	size_t length = min_t(size_t, sizeof(*flush), flush->header.length);
 	struct device *dev = acpi_desc->dev;
 	struct nfit_flush *nfit_flush;
 
 	list_for_each_entry(nfit_flush, &prev->flushes, list)
-		if (memcmp(nfit_flush->flush, flush, sizeof(*flush)) == 0) {
+		if (memcmp(nfit_flush->flush, flush, length) == 0) {
 			list_move_tail(&nfit_flush->list, &acpi_desc->flushes);
 			return true;
 		}
@@ -655,7 +661,7 @@ static ssize_t revision_show(struct device *dev,
 	struct nvdimm_bus_descriptor *nd_desc = to_nd_desc(nvdimm_bus);
 	struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc);
 
-	return sprintf(buf, "%d\n", acpi_desc->nfit->header.revision);
+	return sprintf(buf, "%d\n", acpi_desc->acpi_header.revision);
 }
 static DEVICE_ATTR_RO(revision);
 
@@ -1652,7 +1658,6 @@ int acpi_nfit_init(struct acpi_nfit_desc *acpi_desc, acpi_size sz)
 
 	data = (u8 *) acpi_desc->nfit;
 	end = data + sz;
-	data += sizeof(struct acpi_table_nfit);
 	while (!IS_ERR_OR_NULL(data))
 		data = add_table(acpi_desc, &prev, data, end);
 
@@ -1748,13 +1753,29 @@ static int acpi_nfit_add(struct acpi_device *adev)
 		return PTR_ERR(acpi_desc);
 	}
 
-	acpi_desc->nfit = (struct acpi_table_nfit *) tbl;
+	/*
+	 * Save the acpi header for later and then skip it,
+	 * making nfit point to the first nfit table header.
+	 */
+	acpi_desc->acpi_header = *tbl;
+	acpi_desc->nfit = (void *) tbl + sizeof(struct acpi_table_nfit);
+	sz -= sizeof(struct acpi_table_nfit);
 
 	/* Evaluate _FIT and override with that if present */
 	status = acpi_evaluate_object(adev->handle, "_FIT", NULL, &buf);
 	if (ACPI_SUCCESS(status) && buf.length > 0) {
-		acpi_desc->nfit = (struct acpi_table_nfit *)buf.pointer;
-		sz = buf.length;
+		union acpi_object *obj;
+		/*
+		 * Adjust for the acpi_object header of the _FIT
+		 */
+		obj = buf.pointer;
+		if (obj->type == ACPI_TYPE_BUFFER) {
+			acpi_desc->nfit =
+				(struct acpi_nfit_header *)obj->buffer.pointer;
+			sz = obj->buffer.length;
+		} else
+			dev_dbg(dev, "%s invalid type %d, ignoring _FIT\n",
+				 __func__, (int) obj->type);
 	}
 
 	rc = acpi_nfit_init(acpi_desc, sz);
@@ -1777,7 +1798,8 @@ static void acpi_nfit_notify(struct acpi_device *adev, u32 event)
 {
 	struct acpi_nfit_desc *acpi_desc = dev_get_drvdata(&adev->dev);
 	struct acpi_buffer buf = { ACPI_ALLOCATE_BUFFER, NULL };
-	struct acpi_table_nfit *nfit_saved;
+	struct acpi_nfit_header *nfit_saved;
+	union acpi_object *obj;
 	struct device *dev = &adev->dev;
 	acpi_status status;
 	int ret;
@@ -1808,12 +1830,19 @@ static void acpi_nfit_notify(struct acpi_device *adev, u32 event)
 	}
 
 	nfit_saved = acpi_desc->nfit;
-	acpi_desc->nfit = (struct acpi_table_nfit *)buf.pointer;
-	ret = acpi_nfit_init(acpi_desc, buf.length);
-	if (!ret) {
-		/* Merge failed, restore old nfit, and exit */
-		acpi_desc->nfit = nfit_saved;
-		dev_err(dev, "failed to merge updated NFIT\n");
+	obj = buf.pointer;
+	if (obj->type == ACPI_TYPE_BUFFER) {
+		acpi_desc->nfit =
+			(struct acpi_nfit_header *)obj->buffer.pointer;
+		ret = acpi_nfit_init(acpi_desc, obj->buffer.length);
+		if (ret) {
+			/* Merge failed, restore old nfit, and exit */
+			acpi_desc->nfit = nfit_saved;
+			dev_err(dev, "failed to merge updated NFIT\n");
+		}
+	} else {
+		/* Bad _FIT, restore old nfit */
+		dev_err(dev, "Invalid _FIT\n");
 	}
 	kfree(buf.pointer);
 
diff --git a/drivers/acpi/nfit.h b/drivers/acpi/nfit.h
index 2ea5c0797c8f..3d549a383659 100644
--- a/drivers/acpi/nfit.h
+++ b/drivers/acpi/nfit.h
@@ -96,7 +96,8 @@ struct nfit_mem {
 
 struct acpi_nfit_desc {
 	struct nvdimm_bus_descriptor nd_desc;
-	struct acpi_table_nfit *nfit;
+	struct acpi_table_header acpi_header;
+	struct acpi_nfit_header *nfit;
 	struct mutex spa_map_mutex;
 	struct mutex init_mutex;
 	struct list_head spa_maps;
diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c
index 850d7bf0c873..ae3fe4e64203 100644
--- a/drivers/acpi/pci_root.c
+++ b/drivers/acpi/pci_root.c
@@ -768,6 +768,13 @@ static void pci_acpi_root_add_resources(struct acpi_pci_root_info *info)
 		else
 			continue;
 
+		/*
+		 * Some legacy x86 host bridge drivers use iomem_resource and
+		 * ioport_resource as default resource pool, skip it.
+		 */
+		if (res == root)
+			continue;
+
 		conflict = insert_resource_conflict(root, res);
 		if (conflict) {
 			dev_info(&info->bridge->dev,
diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index e03b1ad25a90..167418e73445 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -1775,10 +1775,10 @@ int genpd_dev_pm_attach(struct device *dev)
 	}
 
 	pd = of_genpd_get_from_provider(&pd_args);
+	of_node_put(pd_args.np);
 	if (IS_ERR(pd)) {
 		dev_dbg(dev, "%s() failed to find PM domain: %ld\n",
 			__func__, PTR_ERR(pd));
-		of_node_put(dev->of_node);
 		return -EPROBE_DEFER;
 	}
 
@@ -1796,7 +1796,6 @@ int genpd_dev_pm_attach(struct device *dev)
 	if (ret < 0) {
 		dev_err(dev, "failed to add to PM domain %s: %d",
 			pd->name, ret);
-		of_node_put(dev->of_node);
 		goto out;
 	}
 
diff --git a/drivers/base/power/domain_governor.c b/drivers/base/power/domain_governor.c
index e60dd12e23aa..1e937ac5f456 100644
--- a/drivers/base/power/domain_governor.c
+++ b/drivers/base/power/domain_governor.c
@@ -160,9 +160,6 @@ static bool default_power_down_ok(struct dev_pm_domain *pd)
 		struct gpd_timing_data *td;
 		s64 constraint_ns;
 
-		if (!pdd->dev->driver)
-			continue;
-
 		/*
 		 * Check if the device is allowed to be off long enough for the
 		 * domain to turn off and on (that's how much time it will
diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c
index 5c8ba5484d86..0c3940ec5e62 100644
--- a/drivers/block/null_blk.c
+++ b/drivers/block/null_blk.c
@@ -18,6 +18,7 @@ struct nullb_cmd {
 	struct bio *bio;
 	unsigned int tag;
 	struct nullb_queue *nq;
+	struct hrtimer timer;
 };
 
 struct nullb_queue {
@@ -49,17 +50,6 @@ static int null_major;
 static int nullb_indexes;
 static struct kmem_cache *ppa_cache;
 
-struct completion_queue {
-	struct llist_head list;
-	struct hrtimer timer;
-};
-
-/*
- * These are per-cpu for now, they will need to be configured by the
- * complete_queues parameter and appropriately mapped.
- */
-static DEFINE_PER_CPU(struct completion_queue, completion_queues);
-
 enum {
 	NULL_IRQ_NONE		= 0,
 	NULL_IRQ_SOFTIRQ	= 1,
@@ -142,8 +132,8 @@ static const struct kernel_param_ops null_irqmode_param_ops = {
 device_param_cb(irqmode, &null_irqmode_param_ops, &irqmode, S_IRUGO);
 MODULE_PARM_DESC(irqmode, "IRQ completion handler. 0-none, 1-softirq, 2-timer");
 
-static int completion_nsec = 10000;
-module_param(completion_nsec, int, S_IRUGO);
+static unsigned long completion_nsec = 10000;
+module_param(completion_nsec, ulong, S_IRUGO);
 MODULE_PARM_DESC(completion_nsec, "Time in ns to complete a request in hardware. Default: 10,000ns");
 
 static int hw_queue_depth = 64;
@@ -180,6 +170,8 @@ static void free_cmd(struct nullb_cmd *cmd)
 	put_tag(cmd->nq, cmd->tag);
 }
 
+static enum hrtimer_restart null_cmd_timer_expired(struct hrtimer *timer);
+
 static struct nullb_cmd *__alloc_cmd(struct nullb_queue *nq)
 {
 	struct nullb_cmd *cmd;
@@ -190,6 +182,11 @@ static struct nullb_cmd *__alloc_cmd(struct nullb_queue *nq)
 		cmd = &nq->cmds[tag];
 		cmd->tag = tag;
 		cmd->nq = nq;
+		if (irqmode == NULL_IRQ_TIMER) {
+			hrtimer_init(&cmd->timer, CLOCK_MONOTONIC,
+				     HRTIMER_MODE_REL);
+			cmd->timer.function = null_cmd_timer_expired;
+		}
 		return cmd;
 	}
 
@@ -220,6 +217,8 @@ static struct nullb_cmd *alloc_cmd(struct nullb_queue *nq, int can_wait)
 
 static void end_cmd(struct nullb_cmd *cmd)
 {
+	struct request_queue *q = NULL;
+
 	switch (queue_mode)  {
 	case NULL_Q_MQ:
 		blk_mq_end_request(cmd->rq, 0);
@@ -230,55 +229,37 @@ static void end_cmd(struct nullb_cmd *cmd)
 		break;
 	case NULL_Q_BIO:
 		bio_endio(cmd->bio);
-		break;
+		goto free_cmd;
 	}
 
+	if (cmd->rq)
+		q = cmd->rq->q;
+
+	/* Restart queue if needed, as we are freeing a tag */
+	if (q && !q->mq_ops && blk_queue_stopped(q)) {
+		unsigned long flags;
+
+		spin_lock_irqsave(q->queue_lock, flags);
+		if (blk_queue_stopped(q))
+			blk_start_queue(q);
+		spin_unlock_irqrestore(q->queue_lock, flags);
+	}
+free_cmd:
 	free_cmd(cmd);
 }
 
 static enum hrtimer_restart null_cmd_timer_expired(struct hrtimer *timer)
 {
-	struct completion_queue *cq;
-	struct llist_node *entry;
-	struct nullb_cmd *cmd;
-
-	cq = &per_cpu(completion_queues, smp_processor_id());
-
-	while ((entry = llist_del_all(&cq->list)) != NULL) {
-		entry = llist_reverse_order(entry);
-		do {
-			struct request_queue *q = NULL;
-
-			cmd = container_of(entry, struct nullb_cmd, ll_list);
-			entry = entry->next;
-			if (cmd->rq)
-				q = cmd->rq->q;
-			end_cmd(cmd);
-
-			if (q && !q->mq_ops && blk_queue_stopped(q)) {
-				spin_lock(q->queue_lock);
-				if (blk_queue_stopped(q))
-					blk_start_queue(q);
-				spin_unlock(q->queue_lock);
-			}
-		} while (entry);
-	}
+	end_cmd(container_of(timer, struct nullb_cmd, timer));
 
 	return HRTIMER_NORESTART;
 }
 
 static void null_cmd_end_timer(struct nullb_cmd *cmd)
 {
-	struct completion_queue *cq = &per_cpu(completion_queues, get_cpu());
-
-	cmd->ll_list.next = NULL;
-	if (llist_add(&cmd->ll_list, &cq->list)) {
-		ktime_t kt = ktime_set(0, completion_nsec);
-
-		hrtimer_start(&cq->timer, kt, HRTIMER_MODE_REL_PINNED);
-	}
+	ktime_t kt = ktime_set(0, completion_nsec);
 
-	put_cpu();
+	hrtimer_start(&cmd->timer, kt, HRTIMER_MODE_REL);
 }
 
 static void null_softirq_done_fn(struct request *rq)
@@ -376,6 +357,10 @@ static int null_queue_rq(struct blk_mq_hw_ctx *hctx,
 {
 	struct nullb_cmd *cmd = blk_mq_rq_to_pdu(bd->rq);
 
+	if (irqmode == NULL_IRQ_TIMER) {
+		hrtimer_init(&cmd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+		cmd->timer.function = null_cmd_timer_expired;
+	}
 	cmd->rq = bd->rq;
 	cmd->nq = hctx->driver_data;
 
@@ -813,19 +798,6 @@ static int __init null_init(void)
 
 	mutex_init(&lock);
 
-	/* Initialize a separate list for each CPU for issuing softirqs */
-	for_each_possible_cpu(i) {
-		struct completion_queue *cq = &per_cpu(completion_queues, i);
-
-		init_llist_head(&cq->list);
-
-		if (irqmode != NULL_IRQ_TIMER)
-			continue;
-
-		hrtimer_init(&cq->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
-		cq->timer.function = null_cmd_timer_expired;
-	}
-
 	null_major = register_blkdev(0, "nullb");
 	if (null_major < 0)
 		return null_major;
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 235708c7c46e..81ea69fee7ca 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -3442,6 +3442,7 @@ static void rbd_queue_workfn(struct work_struct *work)
 		goto err_rq;
 	}
 	img_request->rq = rq;
+	snapc = NULL; /* img_request consumes a ref */
 
 	if (op_type == OBJ_OP_DISCARD)
 		result = rbd_img_request_fill(img_request, OBJ_REQUEST_NODATA,
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index a83c995a62df..8412ce5f93a7 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -976,10 +976,14 @@ static int cpufreq_init_policy(struct cpufreq_policy *policy)
 
 	new_policy.governor = gov;
 
-	/* Use the default policy if its valid. */
-	if (cpufreq_driver->setpolicy)
-		cpufreq_parse_governor(gov->name, &new_policy.policy, NULL);
-
+	/* Use the default policy if there is no last_policy. */
+	if (cpufreq_driver->setpolicy) {
+		if (policy->last_policy)
+			new_policy.policy = policy->last_policy;
+		else
+			cpufreq_parse_governor(gov->name, &new_policy.policy,
+					       NULL);
+	}
 	/* set default policy */
 	return cpufreq_set_policy(policy, &new_policy);
 }
@@ -1330,6 +1334,8 @@ static void cpufreq_offline_prepare(unsigned int cpu)
 		if (has_target())
 			strncpy(policy->last_governor, policy->governor->name,
 				CPUFREQ_NAME_LEN);
+		else
+			policy->last_policy = policy->policy;
 	} else if (cpu == policy->cpu) {
 		/* Nominate new CPU */
 		policy->cpu = cpumask_any(policy->cpus);
diff --git a/drivers/crypto/nx/nx-aes-ccm.c b/drivers/crypto/nx/nx-aes-ccm.c
index 73ef49922788..7038f364acb5 100644
--- a/drivers/crypto/nx/nx-aes-ccm.c
+++ b/drivers/crypto/nx/nx-aes-ccm.c
@@ -409,7 +409,7 @@ static int ccm_nx_decrypt(struct aead_request   *req,
 		processed += to_process;
 	} while (processed < nbytes);
 
-	rc = memcmp(csbcpb->cpb.aes_ccm.out_pat_or_mac, priv->oauth_tag,
+	rc = crypto_memneq(csbcpb->cpb.aes_ccm.out_pat_or_mac, priv->oauth_tag,
 		    authsize) ? -EBADMSG : 0;
 out:
 	spin_unlock_irqrestore(&nx_ctx->lock, irq_flags);
diff --git a/drivers/crypto/nx/nx-aes-gcm.c b/drivers/crypto/nx/nx-aes-gcm.c
index eee624f589b6..abd465f479c4 100644
--- a/drivers/crypto/nx/nx-aes-gcm.c
+++ b/drivers/crypto/nx/nx-aes-gcm.c
@@ -21,6 +21,7 @@
 
 #include <crypto/internal/aead.h>
 #include <crypto/aes.h>
+#include <crypto/algapi.h>
 #include <crypto/scatterwalk.h>
 #include <linux/module.h>
 #include <linux/types.h>
@@ -418,7 +419,7 @@ mac:
 			itag, req->src, req->assoclen + nbytes,
 			crypto_aead_authsize(crypto_aead_reqtfm(req)),
 			SCATTERWALK_FROM_SG);
-		rc = memcmp(itag, otag,
+		rc = crypto_memneq(itag, otag,
 			    crypto_aead_authsize(crypto_aead_reqtfm(req))) ?
 		     -EBADMSG : 0;
 	}
diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c
index 46f531e19ccf..b6f9f42e2985 100644
--- a/drivers/crypto/talitos.c
+++ b/drivers/crypto/talitos.c
@@ -977,7 +977,7 @@ static void ipsec_esp_decrypt_swauth_done(struct device *dev,
 		} else
 			oicv = (char *)&edesc->link_tbl[0];
 
-		err = memcmp(oicv, icv, authsize) ? -EBADMSG : 0;
+		err = crypto_memneq(oicv, icv, authsize) ? -EBADMSG : 0;
 	}
 
 	kfree(edesc);
diff --git a/drivers/gpio/gpio-74xx-mmio.c b/drivers/gpio/gpio-74xx-mmio.c
index 6ed7c0fb3378..6b186829087c 100644
--- a/drivers/gpio/gpio-74xx-mmio.c
+++ b/drivers/gpio/gpio-74xx-mmio.c
@@ -113,13 +113,16 @@ static int mmio_74xx_dir_out(struct gpio_chip *gc, unsigned int gpio, int val)
 
 static int mmio_74xx_gpio_probe(struct platform_device *pdev)
 {
-	const struct of_device_id *of_id =
-		of_match_device(mmio_74xx_gpio_ids, &pdev->dev);
+	const struct of_device_id *of_id;
 	struct mmio_74xx_gpio_priv *priv;
 	struct resource *res;
 	void __iomem *dat;
 	int err;
 
+	of_id = of_match_device(mmio_74xx_gpio_ids, &pdev->dev);
+	if (!of_id)
+		return -ENODEV;
+
 	priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
 	if (!priv)
 		return -ENOMEM;
diff --git a/drivers/gpio/gpio-omap.c b/drivers/gpio/gpio-omap.c
index 56d2d026e62e..f7fbb46d5d79 100644
--- a/drivers/gpio/gpio-omap.c
+++ b/drivers/gpio/gpio-omap.c
@@ -1122,8 +1122,6 @@ static int omap_gpio_chip_init(struct gpio_bank *bank, struct irq_chip *irqc)
 	/* MPUIO is a bit different, reading IRQ status clears it */
 	if (bank->is_mpuio) {
 		irqc->irq_ack = dummy_irq_chip.irq_ack;
-		irqc->irq_mask = irq_gc_mask_set_bit;
-		irqc->irq_unmask = irq_gc_mask_clr_bit;
 		if (!bank->regs->wkup_en)
 			irqc->irq_set_wake = NULL;
 	}
diff --git a/drivers/gpio/gpio-palmas.c b/drivers/gpio/gpio-palmas.c
index 171a6389f9ce..52b447c071cb 100644
--- a/drivers/gpio/gpio-palmas.c
+++ b/drivers/gpio/gpio-palmas.c
@@ -167,6 +167,8 @@ static int palmas_gpio_probe(struct platform_device *pdev)
 	const struct palmas_device_data *dev_data;
 
 	match = of_match_device(of_palmas_gpio_match, &pdev->dev);
+	if (!match)
+		return -ENODEV;
 	dev_data = match->data;
 	if (!dev_data)
 		dev_data = &palmas_dev_data;
diff --git a/drivers/gpio/gpio-syscon.c b/drivers/gpio/gpio-syscon.c
index 045a952576c7..7b25fdf64802 100644
--- a/drivers/gpio/gpio-syscon.c
+++ b/drivers/gpio/gpio-syscon.c
@@ -187,11 +187,15 @@ MODULE_DEVICE_TABLE(of, syscon_gpio_ids);
 static int syscon_gpio_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
-	const struct of_device_id *of_id = of_match_device(syscon_gpio_ids, dev);
+	const struct of_device_id *of_id;
 	struct syscon_gpio_priv *priv;
 	struct device_node *np = dev->of_node;
 	int ret;
 
+	of_id = of_match_device(syscon_gpio_ids, dev);
+	if (!of_id)
+		return -ENODEV;
+
 	priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
 	if (!priv)
 		return -ENOMEM;
diff --git a/drivers/gpio/gpio-tegra.c b/drivers/gpio/gpio-tegra.c
index 027e5f47dd28..896bf29776b0 100644
--- a/drivers/gpio/gpio-tegra.c
+++ b/drivers/gpio/gpio-tegra.c
@@ -375,6 +375,60 @@ static int tegra_gpio_irq_set_wake(struct irq_data *d, unsigned int enable)
 }
 #endif
 
+#ifdef	CONFIG_DEBUG_FS
+
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+static int dbg_gpio_show(struct seq_file *s, void *unused)
+{
+	int i;
+	int j;
+
+	for (i = 0; i < tegra_gpio_bank_count; i++) {
+		for (j = 0; j < 4; j++) {
+			int gpio = tegra_gpio_compose(i, j, 0);
+			seq_printf(s,
+				"%d:%d %02x %02x %02x %02x %02x %02x %06x\n",
+				i, j,
+				tegra_gpio_readl(GPIO_CNF(gpio)),
+				tegra_gpio_readl(GPIO_OE(gpio)),
+				tegra_gpio_readl(GPIO_OUT(gpio)),
+				tegra_gpio_readl(GPIO_IN(gpio)),
+				tegra_gpio_readl(GPIO_INT_STA(gpio)),
+				tegra_gpio_readl(GPIO_INT_ENB(gpio)),
+				tegra_gpio_readl(GPIO_INT_LVL(gpio)));
+		}
+	}
+	return 0;
+}
+
+static int dbg_gpio_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, dbg_gpio_show, &inode->i_private);
+}
+
+static const struct file_operations debug_fops = {
+	.open		= dbg_gpio_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static void tegra_gpio_debuginit(void)
+{
+	(void) debugfs_create_file("tegra_gpio", S_IRUGO,
+					NULL, NULL, &debug_fops);
+}
+
+#else
+
+static inline void tegra_gpio_debuginit(void)
+{
+}
+
+#endif
+
 static struct irq_chip tegra_gpio_irq_chip = {
 	.name		= "GPIO",
 	.irq_ack	= tegra_gpio_irq_ack,
@@ -519,6 +573,8 @@ static int tegra_gpio_probe(struct platform_device *pdev)
 			spin_lock_init(&bank->lvl_lock[j]);
 	}
 
+	tegra_gpio_debuginit();
+
 	return 0;
 }
 
@@ -536,52 +592,3 @@ static int __init tegra_gpio_init(void)
 	return platform_driver_register(&tegra_gpio_driver);
 }
 postcore_initcall(tegra_gpio_init);
-
-#ifdef	CONFIG_DEBUG_FS
-
-#include <linux/debugfs.h>
-#include <linux/seq_file.h>
-
-static int dbg_gpio_show(struct seq_file *s, void *unused)
-{
-	int i;
-	int j;
-
-	for (i = 0; i < tegra_gpio_bank_count; i++) {
-		for (j = 0; j < 4; j++) {
-			int gpio = tegra_gpio_compose(i, j, 0);
-			seq_printf(s,
-				"%d:%d %02x %02x %02x %02x %02x %02x %06x\n",
-				i, j,
-				tegra_gpio_readl(GPIO_CNF(gpio)),
-				tegra_gpio_readl(GPIO_OE(gpio)),
-				tegra_gpio_readl(GPIO_OUT(gpio)),
-				tegra_gpio_readl(GPIO_IN(gpio)),
-				tegra_gpio_readl(GPIO_INT_STA(gpio)),
-				tegra_gpio_readl(GPIO_INT_ENB(gpio)),
-				tegra_gpio_readl(GPIO_INT_LVL(gpio)));
-		}
-	}
-	return 0;
-}
-
-static int dbg_gpio_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, dbg_gpio_show, &inode->i_private);
-}
-
-static const struct file_operations debug_fops = {
-	.open		= dbg_gpio_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
-static int __init tegra_gpio_debuginit(void)
-{
-	(void) debugfs_create_file("tegra_gpio", S_IRUGO,
-					NULL, NULL, &debug_fops);
-	return 0;
-}
-late_initcall(tegra_gpio_debuginit);
-#endif
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index a18f00fc1bb8..2a91f3287e3b 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -233,7 +233,7 @@ static struct gpio_desc *gpio_name_to_desc(const char * const name)
 		for (i = 0; i != chip->ngpio; ++i) {
 			struct gpio_desc *gpio = &chip->desc[i];
 
-			if (!gpio->name)
+			if (!gpio->name || !name)
 				continue;
 
 			if (!strcmp(gpio->name, name)) {
diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index b42c1ba8df9a..59babd5a5396 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -267,3 +267,5 @@ source "drivers/gpu/drm/amd/amdkfd/Kconfig"
 source "drivers/gpu/drm/imx/Kconfig"
 
 source "drivers/gpu/drm/vc4/Kconfig"
+
+source "drivers/gpu/drm/etnaviv/Kconfig"
diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
index 1e9ff4c3e3db..f858aa25fbb2 100644
--- a/drivers/gpu/drm/Makefile
+++ b/drivers/gpu/drm/Makefile
@@ -75,3 +75,4 @@ obj-y			+= i2c/
 obj-y			+= panel/
 obj-y			+= bridge/
 obj-$(CONFIG_DRM_FSL_DCU) += fsl-dcu/
+obj-$(CONFIG_DRM_ETNAVIV) += etnaviv/
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index a3fc43e52483..fca4ef78589c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -541,6 +541,7 @@ struct amdgpu_bo {
 	/* Constant after initialization */
 	struct amdgpu_device		*adev;
 	struct drm_gem_object		gem_base;
+	struct amdgpu_bo		*parent;
 
 	struct ttm_bo_kmap_obj		dma_buf_vmap;
 	pid_t				pid;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 6ce595ff1aff..fa0e3276e8da 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -222,6 +222,8 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
 				}
 
 				p->uf.bo = gem_to_amdgpu_bo(gobj);
+				amdgpu_bo_ref(p->uf.bo);
+				drm_gem_object_unreference_unlocked(gobj);
 				p->uf.offset = fence_data->offset;
 			} else {
 				ret = -EINVAL;
@@ -487,7 +489,7 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bo
 			amdgpu_ib_free(parser->adev, &parser->ibs[i]);
 	kfree(parser->ibs);
 	if (parser->uf.bo)
-		drm_gem_object_unreference_unlocked(&parser->uf.bo->gem_base);
+		amdgpu_bo_unref(&parser->uf.bo);
 }
 
 static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p,
@@ -776,7 +778,7 @@ static int amdgpu_cs_free_job(struct amdgpu_job *job)
 			amdgpu_ib_free(job->adev, &job->ibs[i]);
 	kfree(job->ibs);
 	if (job->uf.bo)
-		drm_gem_object_unreference_unlocked(&job->uf.bo->gem_base);
+		amdgpu_bo_unref(&job->uf.bo);
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
index 7d5e0583c95c..acd066d0a805 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
@@ -73,6 +73,8 @@ static void amdgpu_flip_work_func(struct work_struct *__work)
 	struct drm_crtc *crtc = &amdgpuCrtc->base;
 	unsigned long flags;
 	unsigned i;
+	int vpos, hpos, stat, min_udelay;
+	struct drm_vblank_crtc *vblank = &crtc->dev->vblank[work->crtc_id];
 
 	amdgpu_flip_wait_fence(adev, &work->excl);
 	for (i = 0; i < work->shared_count; ++i)
@@ -81,6 +83,41 @@ static void amdgpu_flip_work_func(struct work_struct *__work)
 	/* We borrow the event spin lock for protecting flip_status */
 	spin_lock_irqsave(&crtc->dev->event_lock, flags);
 
+	/* If this happens to execute within the "virtually extended" vblank
+	 * interval before the start of the real vblank interval then it needs
+	 * to delay programming the mmio flip until the real vblank is entered.
+	 * This prevents completing a flip too early due to the way we fudge
+	 * our vblank counter and vblank timestamps in order to work around the
+	 * problem that the hw fires vblank interrupts before actual start of
+	 * vblank (when line buffer refilling is done for a frame). It
+	 * complements the fudging logic in amdgpu_get_crtc_scanoutpos() for
+	 * timestamping and amdgpu_get_vblank_counter_kms() for vblank counts.
+	 *
+	 * In practice this won't execute very often unless on very fast
+	 * machines because the time window for this to happen is very small.
+	 */
+	for (;;) {
+		/* GET_DISTANCE_TO_VBLANKSTART returns distance to real vblank
+		 * start in hpos, and to the "fudged earlier" vblank start in
+		 * vpos.
+		 */
+		stat = amdgpu_get_crtc_scanoutpos(adev->ddev, work->crtc_id,
+						  GET_DISTANCE_TO_VBLANKSTART,
+						  &vpos, &hpos, NULL, NULL,
+						  &crtc->hwmode);
+
+		if ((stat & (DRM_SCANOUTPOS_VALID | DRM_SCANOUTPOS_ACCURATE)) !=
+		    (DRM_SCANOUTPOS_VALID | DRM_SCANOUTPOS_ACCURATE) ||
+		    !(vpos >= 0 && hpos <= 0))
+			break;
+
+		/* Sleep at least until estimated real start of hw vblank */
+		spin_unlock_irqrestore(&crtc->dev->event_lock, flags);
+		min_udelay = (-hpos + 1) * max(vblank->linedur_ns / 1000, 5);
+		usleep_range(min_udelay, 2 * min_udelay);
+		spin_lock_irqsave(&crtc->dev->event_lock, flags);
+	};
+
 	/* do the flip (mmio) */
 	adev->mode_info.funcs->page_flip(adev, work->crtc_id, work->base);
 	/* set the flip status */
@@ -109,7 +146,7 @@ static void amdgpu_unpin_work_func(struct work_struct *__work)
 	} else
 		DRM_ERROR("failed to reserve buffer after flip\n");
 
-	drm_gem_object_unreference_unlocked(&work->old_rbo->gem_base);
+	amdgpu_bo_unref(&work->old_rbo);
 	kfree(work->shared);
 	kfree(work);
 }
@@ -148,8 +185,8 @@ int amdgpu_crtc_page_flip(struct drm_crtc *crtc,
 	obj = old_amdgpu_fb->obj;
 
 	/* take a reference to the old object */
-	drm_gem_object_reference(obj);
 	work->old_rbo = gem_to_amdgpu_bo(obj);
+	amdgpu_bo_ref(work->old_rbo);
 
 	new_amdgpu_fb = to_amdgpu_framebuffer(fb);
 	obj = new_amdgpu_fb->obj;
@@ -222,7 +259,7 @@ pflip_cleanup:
 	amdgpu_bo_unreserve(new_rbo);
 
 cleanup:
-	drm_gem_object_unreference_unlocked(&work->old_rbo->gem_base);
+	amdgpu_bo_unref(&work->old_rbo);
 	fence_put(work->excl);
 	for (i = 0; i < work->shared_count; ++i)
 		fence_put(work->shared[i]);
@@ -712,6 +749,15 @@ bool amdgpu_crtc_scaling_mode_fixup(struct drm_crtc *crtc,
  * \param dev Device to query.
  * \param pipe Crtc to query.
  * \param flags Flags from caller (DRM_CALLED_FROM_VBLIRQ or 0).
+ *              For driver internal use only also supports these flags:
+ *
+ *              USE_REAL_VBLANKSTART to use the real start of vblank instead
+ *              of a fudged earlier start of vblank.
+ *
+ *              GET_DISTANCE_TO_VBLANKSTART to return distance to the
+ *              fudged earlier start of vblank in *vpos and the distance
+ *              to true start of vblank in *hpos.
+ *
  * \param *vpos Location where vertical scanout position should be stored.
  * \param *hpos Location where horizontal scanout position should go.
  * \param *stime Target location for timestamp taken immediately before
@@ -776,10 +822,40 @@ int amdgpu_get_crtc_scanoutpos(struct drm_device *dev, unsigned int pipe,
 		vbl_end = 0;
 	}
 
+	/* Called from driver internal vblank counter query code? */
+	if (flags & GET_DISTANCE_TO_VBLANKSTART) {
+	    /* Caller wants distance from real vbl_start in *hpos */
+	    *hpos = *vpos - vbl_start;
+	}
+
+	/* Fudge vblank to start a few scanlines earlier to handle the
+	 * problem that vblank irqs fire a few scanlines before start
+	 * of vblank. Some driver internal callers need the true vblank
+	 * start to be used and signal this via the USE_REAL_VBLANKSTART flag.
+	 *
+	 * The cause of the "early" vblank irq is that the irq is triggered
+	 * by the line buffer logic when the line buffer read position enters
+	 * the vblank, whereas our crtc scanout position naturally lags the
+	 * line buffer read position.
+	 */
+	if (!(flags & USE_REAL_VBLANKSTART))
+		vbl_start -= adev->mode_info.crtcs[pipe]->lb_vblank_lead_lines;
+
 	/* Test scanout position against vblank region. */
 	if ((*vpos < vbl_start) && (*vpos >= vbl_end))
 		in_vbl = false;
 
+	/* In vblank? */
+	if (in_vbl)
+	    ret |= DRM_SCANOUTPOS_IN_VBLANK;
+
+	/* Called from driver internal vblank counter query code? */
+	if (flags & GET_DISTANCE_TO_VBLANKSTART) {
+		/* Caller wants distance from fudged earlier vbl_start */
+		*vpos -= vbl_start;
+		return ret;
+	}
+
 	/* Check if inside vblank area and apply corrective offsets:
 	 * vpos will then be >=0 in video scanout area, but negative
 	 * within vblank area, counting down the number of lines until
@@ -795,32 +871,6 @@ int amdgpu_get_crtc_scanoutpos(struct drm_device *dev, unsigned int pipe,
 	/* Correct for shifted end of vbl at vbl_end. */
 	*vpos = *vpos - vbl_end;
 
-	/* In vblank? */
-	if (in_vbl)
-		ret |= DRM_SCANOUTPOS_IN_VBLANK;
-
-	/* Is vpos outside nominal vblank area, but less than
-	 * 1/100 of a frame height away from start of vblank?
-	 * If so, assume this isn't a massively delayed vblank
-	 * interrupt, but a vblank interrupt that fired a few
-	 * microseconds before true start of vblank. Compensate
-	 * by adding a full frame duration to the final timestamp.
-	 * Happens, e.g., on ATI R500, R600.
-	 *
-	 * We only do this if DRM_CALLED_FROM_VBLIRQ.
-	 */
-	if ((flags & DRM_CALLED_FROM_VBLIRQ) && !in_vbl) {
-		vbl_start = mode->crtc_vdisplay;
-		vtotal = mode->crtc_vtotal;
-
-		if (vbl_start - *vpos < vtotal / 100) {
-			*vpos -= vtotal;
-
-			/* Signal this correction as "applied". */
-			ret |= 0x8;
-		}
-	}
-
 	return ret;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index 8c5687e4a6d1..6d136b260bb3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -235,8 +235,9 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,
 	    AMDGPU_GEM_USERPTR_REGISTER))
 		return -EINVAL;
 
-	if (!(args->flags & AMDGPU_GEM_USERPTR_ANONONLY) ||
-		   !(args->flags & AMDGPU_GEM_USERPTR_REGISTER)) {
+	if (!(args->flags & AMDGPU_GEM_USERPTR_READONLY) && (
+	     !(args->flags & AMDGPU_GEM_USERPTR_ANONONLY) ||
+	     !(args->flags & AMDGPU_GEM_USERPTR_REGISTER))) {
 
 		/* if we want to write to it we must require anonymous
 		   memory and install a MMU notifier */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 1618e2294a16..e23843f4d877 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -611,13 +611,59 @@ void amdgpu_driver_preclose_kms(struct drm_device *dev,
 u32 amdgpu_get_vblank_counter_kms(struct drm_device *dev, unsigned int pipe)
 {
 	struct amdgpu_device *adev = dev->dev_private;
+	int vpos, hpos, stat;
+	u32 count;
 
 	if (pipe >= adev->mode_info.num_crtc) {
 		DRM_ERROR("Invalid crtc %u\n", pipe);
 		return -EINVAL;
 	}
 
-	return amdgpu_display_vblank_get_counter(adev, pipe);
+	/* The hw increments its frame counter at start of vsync, not at start
+	 * of vblank, as is required by DRM core vblank counter handling.
+	 * Cook the hw count here to make it appear to the caller as if it
+	 * incremented at start of vblank. We measure distance to start of
+	 * vblank in vpos. vpos therefore will be >= 0 between start of vblank
+	 * and start of vsync, so vpos >= 0 means to bump the hw frame counter
+	 * result by 1 to give the proper appearance to caller.
+	 */
+	if (adev->mode_info.crtcs[pipe]) {
+		/* Repeat readout if needed to provide stable result if
+		 * we cross start of vsync during the queries.
+		 */
+		do {
+			count = amdgpu_display_vblank_get_counter(adev, pipe);
+			/* Ask amdgpu_get_crtc_scanoutpos to return vpos as
+			 * distance to start of vblank, instead of regular
+			 * vertical scanout pos.
+			 */
+			stat = amdgpu_get_crtc_scanoutpos(
+				dev, pipe, GET_DISTANCE_TO_VBLANKSTART,
+				&vpos, &hpos, NULL, NULL,
+				&adev->mode_info.crtcs[pipe]->base.hwmode);
+		} while (count != amdgpu_display_vblank_get_counter(adev, pipe));
+
+		if (((stat & (DRM_SCANOUTPOS_VALID | DRM_SCANOUTPOS_ACCURATE)) !=
+		    (DRM_SCANOUTPOS_VALID | DRM_SCANOUTPOS_ACCURATE))) {
+			DRM_DEBUG_VBL("Query failed! stat %d\n", stat);
+		} else {
+			DRM_DEBUG_VBL("crtc %d: dist from vblank start %d\n",
+				      pipe, vpos);
+
+			/* Bump counter if we are at >= leading edge of vblank,
+			 * but before vsync where vpos would turn negative and
+			 * the hw counter really increments.
+			 */
+			if (vpos >= 0)
+				count++;
+		}
+	} else {
+		/* Fallback to use value as is. */
+		count = amdgpu_display_vblank_get_counter(adev, pipe);
+		DRM_DEBUG_VBL("NULL mode info! Returned count may be wrong.\n");
+	}
+
+	return count;
 }
 
 /**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
index de4529969778..fdc1be8550da 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
@@ -35,6 +35,7 @@
 #include <drm/drm_dp_helper.h>
 #include <drm/drm_fixed.h>
 #include <drm/drm_crtc_helper.h>
+#include <drm/drm_fb_helper.h>
 #include <drm/drm_plane_helper.h>
 #include <linux/i2c.h>
 #include <linux/i2c-algo-bit.h>
@@ -407,6 +408,7 @@ struct amdgpu_crtc {
 	u32 line_time;
 	u32 wm_low;
 	u32 wm_high;
+	u32 lb_vblank_lead_lines;
 	struct drm_display_mode hw_mode;
 };
 
@@ -528,6 +530,10 @@ struct amdgpu_framebuffer {
 #define ENCODER_MODE_IS_DP(em) (((em) == ATOM_ENCODER_MODE_DP) || \
 				((em) == ATOM_ENCODER_MODE_DP_MST))
 
+/* Driver internal use only flags of amdgpu_get_crtc_scanoutpos() */
+#define USE_REAL_VBLANKSTART 		(1 << 30)
+#define GET_DISTANCE_TO_VBLANKSTART	(1 << 31)
+
 void amdgpu_link_encoder_connector(struct drm_device *dev);
 
 struct drm_connector *
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 0d524384ff79..c3ce103b6a33 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -100,6 +100,7 @@ static void amdgpu_ttm_bo_destroy(struct ttm_buffer_object *tbo)
 	list_del_init(&bo->list);
 	mutex_unlock(&bo->adev->gem.mutex);
 	drm_gem_object_release(&bo->gem_base);
+	amdgpu_bo_unref(&bo->parent);
 	kfree(bo->metadata);
 	kfree(bo);
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 8051cb9b8c1e..8a1752ff3d8e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -801,11 +801,12 @@ uint32_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm,
 	if (mem && mem->mem_type != TTM_PL_SYSTEM)
 		flags |= AMDGPU_PTE_VALID;
 
-	if (mem && mem->mem_type == TTM_PL_TT)
+	if (mem && mem->mem_type == TTM_PL_TT) {
 		flags |= AMDGPU_PTE_SYSTEM;
 
-	if (!ttm || ttm->caching_state == tt_cached)
-		flags |= AMDGPU_PTE_SNOOPED;
+		if (ttm->caching_state == tt_cached)
+			flags |= AMDGPU_PTE_SNOOPED;
+	}
 
 	if (adev->asic_type >= CHIP_TOPAZ)
 		flags |= AMDGPU_PTE_EXECUTABLE;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index d6ff5dad98f6..8f7688e598a1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1085,6 +1085,11 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
 		if (r)
 			goto error_free;
 
+		/* Keep a reference to the page table to avoid freeing
+		 * them up in the wrong order.
+		 */
+		pt->parent = amdgpu_bo_ref(vm->page_directory);
+
 		r = amdgpu_vm_clear_bo(adev, pt);
 		if (r) {
 			amdgpu_bo_unref(&pt);
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
index cb0f7747e3dc..093599aba64b 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
@@ -1250,7 +1250,7 @@ static void dce_v10_0_program_watermarks(struct amdgpu_device *adev,
 	u32 pixel_period;
 	u32 line_time = 0;
 	u32 latency_watermark_a = 0, latency_watermark_b = 0;
-	u32 tmp, wm_mask;
+	u32 tmp, wm_mask, lb_vblank_lead_lines = 0;
 
 	if (amdgpu_crtc->base.enabled && num_heads && mode) {
 		pixel_period = 1000000 / (u32)mode->clock;
@@ -1333,6 +1333,7 @@ static void dce_v10_0_program_watermarks(struct amdgpu_device *adev,
 		    (adev->mode_info.disp_priority == 2)) {
 			DRM_DEBUG_KMS("force priority to high\n");
 		}
+		lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay);
 	}
 
 	/* select wm A */
@@ -1357,6 +1358,8 @@ static void dce_v10_0_program_watermarks(struct amdgpu_device *adev,
 	amdgpu_crtc->line_time = line_time;
 	amdgpu_crtc->wm_high = latency_watermark_a;
 	amdgpu_crtc->wm_low = latency_watermark_b;
+	/* Save number of lines the linebuffer leads before the scanout */
+	amdgpu_crtc->lb_vblank_lead_lines = lb_vblank_lead_lines;
 }
 
 /**
@@ -3726,7 +3729,7 @@ static void dce_v10_0_encoder_add(struct amdgpu_device *adev,
 	case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1:
 	case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2:
 		drm_encoder_init(dev, encoder, &dce_v10_0_encoder_funcs,
-				 DRM_MODE_ENCODER_DAC);
+				 DRM_MODE_ENCODER_DAC, NULL);
 		drm_encoder_helper_add(encoder, &dce_v10_0_dac_helper_funcs);
 		break;
 	case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DVO1:
@@ -3737,15 +3740,15 @@ static void dce_v10_0_encoder_add(struct amdgpu_device *adev,
 		if (amdgpu_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT)) {
 			amdgpu_encoder->rmx_type = RMX_FULL;
 			drm_encoder_init(dev, encoder, &dce_v10_0_encoder_funcs,
-					 DRM_MODE_ENCODER_LVDS);
+					 DRM_MODE_ENCODER_LVDS, NULL);
 			amdgpu_encoder->enc_priv = amdgpu_atombios_encoder_get_lcd_info(amdgpu_encoder);
 		} else if (amdgpu_encoder->devices & (ATOM_DEVICE_CRT_SUPPORT)) {
 			drm_encoder_init(dev, encoder, &dce_v10_0_encoder_funcs,
-					 DRM_MODE_ENCODER_DAC);
+					 DRM_MODE_ENCODER_DAC, NULL);
 			amdgpu_encoder->enc_priv = amdgpu_atombios_encoder_get_dig_info(amdgpu_encoder);
 		} else {
 			drm_encoder_init(dev, encoder, &dce_v10_0_encoder_funcs,
-					 DRM_MODE_ENCODER_TMDS);
+					 DRM_MODE_ENCODER_TMDS, NULL);
 			amdgpu_encoder->enc_priv = amdgpu_atombios_encoder_get_dig_info(amdgpu_encoder);
 		}
 		drm_encoder_helper_add(encoder, &dce_v10_0_dig_helper_funcs);
@@ -3763,13 +3766,13 @@ static void dce_v10_0_encoder_add(struct amdgpu_device *adev,
 		amdgpu_encoder->is_ext_encoder = true;
 		if (amdgpu_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT))
 			drm_encoder_init(dev, encoder, &dce_v10_0_encoder_funcs,
-					 DRM_MODE_ENCODER_LVDS);
+					 DRM_MODE_ENCODER_LVDS, NULL);
 		else if (amdgpu_encoder->devices & (ATOM_DEVICE_CRT_SUPPORT))
 			drm_encoder_init(dev, encoder, &dce_v10_0_encoder_funcs,
-					 DRM_MODE_ENCODER_DAC);
+					 DRM_MODE_ENCODER_DAC, NULL);
 		else
 			drm_encoder_init(dev, encoder, &dce_v10_0_encoder_funcs,
-					 DRM_MODE_ENCODER_TMDS);
+					 DRM_MODE_ENCODER_TMDS, NULL);
 		drm_encoder_helper_add(encoder, &dce_v10_0_ext_helper_funcs);
 		break;
 	}
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
index 5af3721851d6..8701661a8868 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
@@ -1238,7 +1238,7 @@ static void dce_v11_0_program_watermarks(struct amdgpu_device *adev,
 	u32 pixel_period;
 	u32 line_time = 0;
 	u32 latency_watermark_a = 0, latency_watermark_b = 0;
-	u32 tmp, wm_mask;
+	u32 tmp, wm_mask, lb_vblank_lead_lines = 0;
 
 	if (amdgpu_crtc->base.enabled && num_heads && mode) {
 		pixel_period = 1000000 / (u32)mode->clock;
@@ -1321,6 +1321,7 @@ static void dce_v11_0_program_watermarks(struct amdgpu_device *adev,
 		    (adev->mode_info.disp_priority == 2)) {
 			DRM_DEBUG_KMS("force priority to high\n");
 		}
+		lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay);
 	}
 
 	/* select wm A */
@@ -1345,6 +1346,8 @@ static void dce_v11_0_program_watermarks(struct amdgpu_device *adev,
 	amdgpu_crtc->line_time = line_time;
 	amdgpu_crtc->wm_high = latency_watermark_a;
 	amdgpu_crtc->wm_low = latency_watermark_b;
+	/* Save number of lines the linebuffer leads before the scanout */
+	amdgpu_crtc->lb_vblank_lead_lines = lb_vblank_lead_lines;
 }
 
 /**
@@ -3719,7 +3722,7 @@ static void dce_v11_0_encoder_add(struct amdgpu_device *adev,
 	case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1:
 	case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2:
 		drm_encoder_init(dev, encoder, &dce_v11_0_encoder_funcs,
-				 DRM_MODE_ENCODER_DAC);
+				 DRM_MODE_ENCODER_DAC, NULL);
 		drm_encoder_helper_add(encoder, &dce_v11_0_dac_helper_funcs);
 		break;
 	case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DVO1:
@@ -3730,15 +3733,15 @@ static void dce_v11_0_encoder_add(struct amdgpu_device *adev,
 		if (amdgpu_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT)) {
 			amdgpu_encoder->rmx_type = RMX_FULL;
 			drm_encoder_init(dev, encoder, &dce_v11_0_encoder_funcs,
-					 DRM_MODE_ENCODER_LVDS);
+					 DRM_MODE_ENCODER_LVDS, NULL);
 			amdgpu_encoder->enc_priv = amdgpu_atombios_encoder_get_lcd_info(amdgpu_encoder);
 		} else if (amdgpu_encoder->devices & (ATOM_DEVICE_CRT_SUPPORT)) {
 			drm_encoder_init(dev, encoder, &dce_v11_0_encoder_funcs,
-					 DRM_MODE_ENCODER_DAC);
+					 DRM_MODE_ENCODER_DAC, NULL);
 			amdgpu_encoder->enc_priv = amdgpu_atombios_encoder_get_dig_info(amdgpu_encoder);
 		} else {
 			drm_encoder_init(dev, encoder, &dce_v11_0_encoder_funcs,
-					 DRM_MODE_ENCODER_TMDS);
+					 DRM_MODE_ENCODER_TMDS, NULL);
 			amdgpu_encoder->enc_priv = amdgpu_atombios_encoder_get_dig_info(amdgpu_encoder);
 		}
 		drm_encoder_helper_add(encoder, &dce_v11_0_dig_helper_funcs);
@@ -3756,13 +3759,13 @@ static void dce_v11_0_encoder_add(struct amdgpu_device *adev,
 		amdgpu_encoder->is_ext_encoder = true;
 		if (amdgpu_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT))
 			drm_encoder_init(dev, encoder, &dce_v11_0_encoder_funcs,
-					 DRM_MODE_ENCODER_LVDS);
+					 DRM_MODE_ENCODER_LVDS, NULL);
 		else if (amdgpu_encoder->devices & (ATOM_DEVICE_CRT_SUPPORT))
 			drm_encoder_init(dev, encoder, &dce_v11_0_encoder_funcs,
-					 DRM_MODE_ENCODER_DAC);
+					 DRM_MODE_ENCODER_DAC, NULL);
 		else
 			drm_encoder_init(dev, encoder, &dce_v11_0_encoder_funcs,
-					 DRM_MODE_ENCODER_TMDS);
+					 DRM_MODE_ENCODER_TMDS, NULL);
 		drm_encoder_helper_add(encoder, &dce_v11_0_ext_helper_funcs);
 		break;
 	}
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
index 4f7b49a6dc50..d0e128c24813 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
@@ -1193,7 +1193,7 @@ static void dce_v8_0_program_watermarks(struct amdgpu_device *adev,
 	u32 pixel_period;
 	u32 line_time = 0;
 	u32 latency_watermark_a = 0, latency_watermark_b = 0;
-	u32 tmp, wm_mask;
+	u32 tmp, wm_mask, lb_vblank_lead_lines = 0;
 
 	if (amdgpu_crtc->base.enabled && num_heads && mode) {
 		pixel_period = 1000000 / (u32)mode->clock;
@@ -1276,6 +1276,7 @@ static void dce_v8_0_program_watermarks(struct amdgpu_device *adev,
 		    (adev->mode_info.disp_priority == 2)) {
 			DRM_DEBUG_KMS("force priority to high\n");
 		}
+		lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay);
 	}
 
 	/* select wm A */
@@ -1302,6 +1303,8 @@ static void dce_v8_0_program_watermarks(struct amdgpu_device *adev,
 	amdgpu_crtc->line_time = line_time;
 	amdgpu_crtc->wm_high = latency_watermark_a;
 	amdgpu_crtc->wm_low = latency_watermark_b;
+	/* Save number of lines the linebuffer leads before the scanout */
+	amdgpu_crtc->lb_vblank_lead_lines = lb_vblank_lead_lines;
 }
 
 /**
@@ -3656,7 +3659,7 @@ static void dce_v8_0_encoder_add(struct amdgpu_device *adev,
 	case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1:
 	case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2:
 		drm_encoder_init(dev, encoder, &dce_v8_0_encoder_funcs,
-				 DRM_MODE_ENCODER_DAC);
+				 DRM_MODE_ENCODER_DAC, NULL);
 		drm_encoder_helper_add(encoder, &dce_v8_0_dac_helper_funcs);
 		break;
 	case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DVO1:
@@ -3667,15 +3670,15 @@ static void dce_v8_0_encoder_add(struct amdgpu_device *adev,
 		if (amdgpu_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT)) {
 			amdgpu_encoder->rmx_type = RMX_FULL;
 			drm_encoder_init(dev, encoder, &dce_v8_0_encoder_funcs,
-					 DRM_MODE_ENCODER_LVDS);
+					 DRM_MODE_ENCODER_LVDS, NULL);
 			amdgpu_encoder->enc_priv = amdgpu_atombios_encoder_get_lcd_info(amdgpu_encoder);
 		} else if (amdgpu_encoder->devices & (ATOM_DEVICE_CRT_SUPPORT)) {
 			drm_encoder_init(dev, encoder, &dce_v8_0_encoder_funcs,
-					 DRM_MODE_ENCODER_DAC);
+					 DRM_MODE_ENCODER_DAC, NULL);
 			amdgpu_encoder->enc_priv = amdgpu_atombios_encoder_get_dig_info(amdgpu_encoder);
 		} else {
 			drm_encoder_init(dev, encoder, &dce_v8_0_encoder_funcs,
-					 DRM_MODE_ENCODER_TMDS);
+					 DRM_MODE_ENCODER_TMDS, NULL);
 			amdgpu_encoder->enc_priv = amdgpu_atombios_encoder_get_dig_info(amdgpu_encoder);
 		}
 		drm_encoder_helper_add(encoder, &dce_v8_0_dig_helper_funcs);
@@ -3693,13 +3696,13 @@ static void dce_v8_0_encoder_add(struct amdgpu_device *adev,
 		amdgpu_encoder->is_ext_encoder = true;
 		if (amdgpu_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT))
 			drm_encoder_init(dev, encoder, &dce_v8_0_encoder_funcs,
-					 DRM_MODE_ENCODER_LVDS);
+					 DRM_MODE_ENCODER_LVDS, NULL);
 		else if (amdgpu_encoder->devices & (ATOM_DEVICE_CRT_SUPPORT))
 			drm_encoder_init(dev, encoder, &dce_v8_0_encoder_funcs,
-					 DRM_MODE_ENCODER_DAC);
+					 DRM_MODE_ENCODER_DAC, NULL);
 		else
 			drm_encoder_init(dev, encoder, &dce_v8_0_encoder_funcs,
-					 DRM_MODE_ENCODER_TMDS);
+					 DRM_MODE_ENCODER_TMDS, NULL);
 		drm_encoder_helper_add(encoder, &dce_v8_0_ext_helper_funcs);
 		break;
 	}
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
index 868505753a9a..dababe40a685 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
@@ -517,7 +517,7 @@ static int gmc_v7_0_gart_enable(struct amdgpu_device *adev)
 	WREG32(mmVM_L2_CNTL3, tmp);
 	/* setup context0 */
 	WREG32(mmVM_CONTEXT0_PAGE_TABLE_START_ADDR, adev->mc.gtt_start >> 12);
-	WREG32(mmVM_CONTEXT0_PAGE_TABLE_END_ADDR, (adev->mc.gtt_end >> 12) - 1);
+	WREG32(mmVM_CONTEXT0_PAGE_TABLE_END_ADDR, adev->mc.gtt_end >> 12);
 	WREG32(mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR, adev->gart.table_addr >> 12);
 	WREG32(mmVM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
 			(u32)(adev->dummy_page.addr >> 12));
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
index 6e2331f70b39..adc25f87fc18 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
@@ -661,7 +661,7 @@ static int gmc_v8_0_gart_enable(struct amdgpu_device *adev)
 	WREG32(mmVM_L2_CNTL4, tmp);
 	/* setup context0 */
 	WREG32(mmVM_CONTEXT0_PAGE_TABLE_START_ADDR, adev->mc.gtt_start >> 12);
-	WREG32(mmVM_CONTEXT0_PAGE_TABLE_END_ADDR, (adev->mc.gtt_end >> 12) - 1);
+	WREG32(mmVM_CONTEXT0_PAGE_TABLE_END_ADDR, adev->mc.gtt_end >> 12);
 	WREG32(mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR, adev->gart.table_addr >> 12);
 	WREG32(mmVM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
 			(u32)(adev->dummy_page.addr >> 12));
diff --git a/drivers/gpu/drm/amd/powerplay/inc/amd_powerplay.h b/drivers/gpu/drm/amd/powerplay/inc/amd_powerplay.h
index d9b8d3f768ab..e61a3e67852e 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/amd_powerplay.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/amd_powerplay.h
@@ -25,6 +25,7 @@
 
 #include <linux/seq_file.h>
 #include <linux/types.h>
+#include <linux/errno.h>
 #include "amd_shared.h"
 #include "cgs_common.h"
 
diff --git a/drivers/gpu/drm/armada/armada_crtc.c b/drivers/gpu/drm/armada/armada_crtc.c
index cebcab560626..0293eb74d777 100644
--- a/drivers/gpu/drm/armada/armada_crtc.c
+++ b/drivers/gpu/drm/armada/armada_crtc.c
@@ -928,11 +928,10 @@ static int armada_drm_crtc_cursor_set(struct drm_crtc *crtc,
 		}
 	}
 
-	mutex_lock(&dev->struct_mutex);
 	if (dcrtc->cursor_obj) {
 		dcrtc->cursor_obj->update = NULL;
 		dcrtc->cursor_obj->update_data = NULL;
-		drm_gem_object_unreference(&dcrtc->cursor_obj->obj);
+		drm_gem_object_unreference_unlocked(&dcrtc->cursor_obj->obj);
 	}
 	dcrtc->cursor_obj = obj;
 	dcrtc->cursor_w = w;
@@ -942,14 +941,12 @@ static int armada_drm_crtc_cursor_set(struct drm_crtc *crtc,
 		obj->update_data = dcrtc;
 		obj->update = cursor_update;
 	}
-	mutex_unlock(&dev->struct_mutex);
 
 	return ret;
 }
 
 static int armada_drm_crtc_cursor_move(struct drm_crtc *crtc, int x, int y)
 {
-	struct drm_device *dev = crtc->dev;
 	struct armada_crtc *dcrtc = drm_to_armada_crtc(crtc);
 	int ret;
 
@@ -957,11 +954,9 @@ static int armada_drm_crtc_cursor_move(struct drm_crtc *crtc, int x, int y)
 	if (!dcrtc->variant->has_spu_adv_reg)
 		return -EFAULT;
 
-	mutex_lock(&dev->struct_mutex);
 	dcrtc->cursor_x = x;
 	dcrtc->cursor_y = y;
 	ret = armada_drm_crtc_cursor_update(dcrtc, false);
-	mutex_unlock(&dev->struct_mutex);
 
 	return ret;
 }
@@ -972,7 +967,7 @@ static void armada_drm_crtc_destroy(struct drm_crtc *crtc)
 	struct armada_private *priv = crtc->dev->dev_private;
 
 	if (dcrtc->cursor_obj)
-		drm_gem_object_unreference(&dcrtc->cursor_obj->obj);
+		drm_gem_object_unreference_unlocked(&dcrtc->cursor_obj->obj);
 
 	priv->dcrtc[dcrtc->num] = NULL;
 	drm_crtc_cleanup(&dcrtc->crtc);
@@ -1074,7 +1069,7 @@ armada_drm_crtc_set_property(struct drm_crtc *crtc,
 	return 0;
 }
 
-static struct drm_crtc_funcs armada_crtc_funcs = {
+static const struct drm_crtc_funcs armada_crtc_funcs = {
 	.cursor_set	= armada_drm_crtc_cursor_set,
 	.cursor_move	= armada_drm_crtc_cursor_move,
 	.destroy	= armada_drm_crtc_destroy,
@@ -1216,14 +1211,14 @@ static int armada_drm_crtc_create(struct drm_device *drm, struct device *dev,
 				       &armada_primary_plane_funcs,
 				       armada_primary_formats,
 				       ARRAY_SIZE(armada_primary_formats),
-				       DRM_PLANE_TYPE_PRIMARY);
+				       DRM_PLANE_TYPE_PRIMARY, NULL);
 	if (ret) {
 		kfree(primary);
 		return ret;
 	}
 
 	ret = drm_crtc_init_with_planes(drm, &dcrtc->crtc, &primary->base, NULL,
-					&armada_crtc_funcs);
+					&armada_crtc_funcs, NULL);
 	if (ret)
 		goto err_crtc_init;
 
diff --git a/drivers/gpu/drm/armada/armada_debugfs.c b/drivers/gpu/drm/armada/armada_debugfs.c
index 471e45627f1e..d4f7ab0a30d4 100644
--- a/drivers/gpu/drm/armada/armada_debugfs.c
+++ b/drivers/gpu/drm/armada/armada_debugfs.c
@@ -21,9 +21,9 @@ static int armada_debugfs_gem_linear_show(struct seq_file *m, void *data)
 	struct armada_private *priv = dev->dev_private;
 	int ret;
 
-	mutex_lock(&dev->struct_mutex);
+	mutex_lock(&priv->linear_lock);
 	ret = drm_mm_dump_table(m, &priv->linear);
-	mutex_unlock(&dev->struct_mutex);
+	mutex_unlock(&priv->linear_lock);
 
 	return ret;
 }
diff --git a/drivers/gpu/drm/armada/armada_drm.h b/drivers/gpu/drm/armada/armada_drm.h
index 4df6f2af2b21..3b2bb6128d40 100644
--- a/drivers/gpu/drm/armada/armada_drm.h
+++ b/drivers/gpu/drm/armada/armada_drm.h
@@ -57,7 +57,8 @@ struct armada_private {
 	DECLARE_KFIFO(fb_unref, struct drm_framebuffer *, 8);
 	struct drm_fb_helper	*fbdev;
 	struct armada_crtc	*dcrtc[2];
-	struct drm_mm		linear;
+	struct drm_mm		linear; /* protected by linear_lock */
+	struct mutex		linear_lock;
 	struct drm_property	*csc_yuv_prop;
 	struct drm_property	*csc_rgb_prop;
 	struct drm_property	*colorkey_prop;
diff --git a/drivers/gpu/drm/armada/armada_drv.c b/drivers/gpu/drm/armada/armada_drv.c
index 77ab93d60125..3bd7e1cde99e 100644
--- a/drivers/gpu/drm/armada/armada_drv.c
+++ b/drivers/gpu/drm/armada/armada_drv.c
@@ -102,6 +102,7 @@ static int armada_drm_load(struct drm_device *dev, unsigned long flags)
 	dev->mode_config.preferred_depth = 24;
 	dev->mode_config.funcs = &armada_drm_mode_config_funcs;
 	drm_mm_init(&priv->linear, mem->start, resource_size(mem));
+	mutex_init(&priv->linear_lock);
 
 	ret = component_bind_all(dev->dev, dev);
 	if (ret)
diff --git a/drivers/gpu/drm/armada/armada_gem.c b/drivers/gpu/drm/armada/armada_gem.c
index 60a688ef81c7..6e731db31aa4 100644
--- a/drivers/gpu/drm/armada/armada_gem.c
+++ b/drivers/gpu/drm/armada/armada_gem.c
@@ -46,22 +46,26 @@ static size_t roundup_gem_size(size_t size)
 	return roundup(size, PAGE_SIZE);
 }
 
-/* dev->struct_mutex is held here */
 void armada_gem_free_object(struct drm_gem_object *obj)
 {
 	struct armada_gem_object *dobj = drm_to_armada_gem(obj);
+	struct armada_private *priv = obj->dev->dev_private;
 
 	DRM_DEBUG_DRIVER("release obj %p\n", dobj);
 
 	drm_gem_free_mmap_offset(&dobj->obj);
 
+	might_lock(&priv->linear_lock);
+
 	if (dobj->page) {
 		/* page backed memory */
 		unsigned int order = get_order(dobj->obj.size);
 		__free_pages(dobj->page, order);
 	} else if (dobj->linear) {
 		/* linear backed memory */
+		mutex_lock(&priv->linear_lock);
 		drm_mm_remove_node(dobj->linear);
+		mutex_unlock(&priv->linear_lock);
 		kfree(dobj->linear);
 		if (dobj->addr)
 			iounmap(dobj->addr);
@@ -144,10 +148,10 @@ armada_gem_linear_back(struct drm_device *dev, struct armada_gem_object *obj)
 		if (!node)
 			return -ENOSPC;
 
-		mutex_lock(&dev->struct_mutex);
+		mutex_lock(&priv->linear_lock);
 		ret = drm_mm_insert_node(&priv->linear, node, size, align,
 					 DRM_MM_SEARCH_DEFAULT);
-		mutex_unlock(&dev->struct_mutex);
+		mutex_unlock(&priv->linear_lock);
 		if (ret) {
 			kfree(node);
 			return ret;
@@ -158,9 +162,9 @@ armada_gem_linear_back(struct drm_device *dev, struct armada_gem_object *obj)
 		/* Ensure that the memory we're returning is cleared. */
 		ptr = ioremap_wc(obj->linear->start, size);
 		if (!ptr) {
-			mutex_lock(&dev->struct_mutex);
+			mutex_lock(&priv->linear_lock);
 			drm_mm_remove_node(obj->linear);
-			mutex_unlock(&dev->struct_mutex);
+			mutex_unlock(&priv->linear_lock);
 			kfree(obj->linear);
 			obj->linear = NULL;
 			return -ENOMEM;
@@ -274,18 +278,16 @@ int armada_gem_dumb_map_offset(struct drm_file *file, struct drm_device *dev,
 	struct armada_gem_object *obj;
 	int ret = 0;
 
-	mutex_lock(&dev->struct_mutex);
 	obj = armada_gem_object_lookup(dev, file, handle);
 	if (!obj) {
 		DRM_ERROR("failed to lookup gem object\n");
-		ret = -EINVAL;
-		goto err_unlock;
+		return -EINVAL;
 	}
 
 	/* Don't allow imported objects to be mapped */
 	if (obj->obj.import_attach) {
 		ret = -EINVAL;
-		goto err_unlock;
+		goto err_unref;
 	}
 
 	ret = drm_gem_create_mmap_offset(&obj->obj);
@@ -294,9 +296,8 @@ int armada_gem_dumb_map_offset(struct drm_file *file, struct drm_device *dev,
 		DRM_DEBUG_DRIVER("handle %#x offset %llx\n", handle, *offset);
 	}
 
-	drm_gem_object_unreference(&obj->obj);
- err_unlock:
-	mutex_unlock(&dev->struct_mutex);
+ err_unref:
+	drm_gem_object_unreference_unlocked(&obj->obj);
 
 	return ret;
 }
@@ -352,13 +353,13 @@ int armada_gem_mmap_ioctl(struct drm_device *dev, void *data,
 		return -ENOENT;
 
 	if (!dobj->obj.filp) {
-		drm_gem_object_unreference(&dobj->obj);
+		drm_gem_object_unreference_unlocked(&dobj->obj);
 		return -EINVAL;
 	}
 
 	addr = vm_mmap(dobj->obj.filp, 0, args->size, PROT_READ | PROT_WRITE,
 		       MAP_SHARED, args->offset);
-	drm_gem_object_unreference(&dobj->obj);
+	drm_gem_object_unreference_unlocked(&dobj->obj);
 	if (IS_ERR_VALUE(addr))
 		return addr;
 
diff --git a/drivers/gpu/drm/armada/armada_overlay.c b/drivers/gpu/drm/armada/armada_overlay.c
index 5c22b380f8f3..148e8a42b2c6 100644
--- a/drivers/gpu/drm/armada/armada_overlay.c
+++ b/drivers/gpu/drm/armada/armada_overlay.c
@@ -460,7 +460,7 @@ int armada_overlay_plane_create(struct drm_device *dev, unsigned long crtcs)
 				       &armada_ovl_plane_funcs,
 				       armada_ovl_formats,
 				       ARRAY_SIZE(armada_ovl_formats),
-				       DRM_PLANE_TYPE_OVERLAY);
+				       DRM_PLANE_TYPE_OVERLAY, NULL);
 	if (ret) {
 		kfree(dplane);
 		return ret;
diff --git a/drivers/gpu/drm/ast/ast_mode.c b/drivers/gpu/drm/ast/ast_mode.c
index 69d19f3304a5..0123458cbd83 100644
--- a/drivers/gpu/drm/ast/ast_mode.c
+++ b/drivers/gpu/drm/ast/ast_mode.c
@@ -751,7 +751,7 @@ static int ast_encoder_init(struct drm_device *dev)
 		return -ENOMEM;
 
 	drm_encoder_init(dev, &ast_encoder->base, &ast_enc_funcs,
-			 DRM_MODE_ENCODER_DAC);
+			 DRM_MODE_ENCODER_DAC, NULL);
 	drm_encoder_helper_add(&ast_encoder->base, &ast_enc_helper_funcs);
 
 	ast_encoder->base.possible_crtcs = 1;
diff --git a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_crtc.c b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_crtc.c
index 9f6e234e7029..468a14f266a7 100644
--- a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_crtc.c
+++ b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_crtc.c
@@ -344,7 +344,7 @@ int atmel_hlcdc_crtc_create(struct drm_device *dev)
 	ret = drm_crtc_init_with_planes(dev, &crtc->base,
 				&planes->primary->base,
 				planes->cursor ? &planes->cursor->base : NULL,
-				&atmel_hlcdc_crtc_funcs);
+				&atmel_hlcdc_crtc_funcs, NULL);
 	if (ret < 0)
 		goto fail;
 
diff --git a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.c b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.c
index 816895447155..a45b32ba029e 100644
--- a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.c
+++ b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.c
@@ -333,6 +333,10 @@ static const struct of_device_id atmel_hlcdc_of_match[] = {
 		.data = &atmel_hlcdc_dc_at91sam9x5,
 	},
 	{
+		.compatible = "atmel,sama5d2-hlcdc",
+		.data = &atmel_hlcdc_dc_sama5d4,
+	},
+	{
 		.compatible = "atmel,sama5d3-hlcdc",
 		.data = &atmel_hlcdc_dc_sama5d3,
 	},
@@ -342,6 +346,7 @@ static const struct of_device_id atmel_hlcdc_of_match[] = {
 	},
 	{ /* sentinel */ },
 };
+MODULE_DEVICE_TABLE(of, atmel_hlcdc_of_match);
 
 int atmel_hlcdc_dc_mode_valid(struct atmel_hlcdc_dc *dc,
 			      struct drm_display_mode *mode)
@@ -733,10 +738,6 @@ static int atmel_hlcdc_dc_drm_probe(struct platform_device *pdev)
 	if (!ddev)
 		return -ENOMEM;
 
-	ret = drm_dev_set_unique(ddev, dev_name(ddev->dev));
-	if (ret)
-		goto err_unref;
-
 	ret = atmel_hlcdc_dc_load(ddev);
 	if (ret)
 		goto err_unref;
diff --git a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_output.c b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_output.c
index 067e4c144bd6..0f7ec016e7a9 100644
--- a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_output.c
+++ b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_output.c
@@ -146,7 +146,7 @@ atmel_hlcdc_rgb_encoder_mode_set(struct drm_encoder *encoder,
 			   cfg);
 }
 
-static struct drm_encoder_helper_funcs atmel_hlcdc_panel_encoder_helper_funcs = {
+static const struct drm_encoder_helper_funcs atmel_hlcdc_panel_encoder_helper_funcs = {
 	.mode_fixup = atmel_hlcdc_panel_encoder_mode_fixup,
 	.mode_set = atmel_hlcdc_rgb_encoder_mode_set,
 	.disable = atmel_hlcdc_panel_encoder_disable,
@@ -192,7 +192,7 @@ atmel_hlcdc_rgb_best_encoder(struct drm_connector *connector)
 	return &rgb->encoder;
 }
 
-static struct drm_connector_helper_funcs atmel_hlcdc_panel_connector_helper_funcs = {
+static const struct drm_connector_helper_funcs atmel_hlcdc_panel_connector_helper_funcs = {
 	.get_modes = atmel_hlcdc_panel_get_modes,
 	.mode_valid = atmel_hlcdc_rgb_mode_valid,
 	.best_encoder = atmel_hlcdc_rgb_best_encoder,
@@ -256,7 +256,7 @@ static int atmel_hlcdc_create_panel_output(struct drm_device *dev,
 			       &atmel_hlcdc_panel_encoder_helper_funcs);
 	ret = drm_encoder_init(dev, &panel->base.encoder,
 			       &atmel_hlcdc_panel_encoder_funcs,
-			       DRM_MODE_ENCODER_LVDS);
+			       DRM_MODE_ENCODER_LVDS, NULL);
 	if (ret)
 		return ret;
 
diff --git a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c
index d0299aed517e..1ffe9c329c46 100644
--- a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c
+++ b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c
@@ -941,7 +941,7 @@ atmel_hlcdc_plane_create(struct drm_device *dev,
 	ret = drm_universal_plane_init(dev, &plane->base, 0,
 				       &layer_plane_funcs,
 				       desc->formats->formats,
-				       desc->formats->nformats, type);
+				       desc->formats->nformats, type, NULL);
 	if (ret)
 		return ERR_PTR(ret);
 
diff --git a/drivers/gpu/drm/bochs/bochs_kms.c b/drivers/gpu/drm/bochs/bochs_kms.c
index 26bcd03a8cb6..2849f1b95eec 100644
--- a/drivers/gpu/drm/bochs/bochs_kms.c
+++ b/drivers/gpu/drm/bochs/bochs_kms.c
@@ -119,7 +119,7 @@ static int bochs_crtc_page_flip(struct drm_crtc *crtc,
 	bochs_crtc_mode_set_base(crtc, 0, 0, old_fb);
 	if (event) {
 		spin_lock_irqsave(&bochs->dev->event_lock, irqflags);
-		drm_send_vblank_event(bochs->dev, -1, event);
+		drm_crtc_send_vblank_event(crtc, event);
 		spin_unlock_irqrestore(&bochs->dev->event_lock, irqflags);
 	}
 	return 0;
@@ -196,7 +196,7 @@ static void bochs_encoder_init(struct drm_device *dev)
 
 	encoder->possible_crtcs = 0x1;
 	drm_encoder_init(dev, encoder, &bochs_encoder_encoder_funcs,
-			 DRM_MODE_ENCODER_DAC);
+			 DRM_MODE_ENCODER_DAC, NULL);
 	drm_encoder_helper_add(encoder, &bochs_encoder_helper_funcs);
 }
 
@@ -245,13 +245,13 @@ static enum drm_connector_status bochs_connector_detect(struct drm_connector
 	return connector_status_connected;
 }
 
-struct drm_connector_helper_funcs bochs_connector_connector_helper_funcs = {
+static const struct drm_connector_helper_funcs bochs_connector_connector_helper_funcs = {
 	.get_modes = bochs_connector_get_modes,
 	.mode_valid = bochs_connector_mode_valid,
 	.best_encoder = bochs_connector_best_encoder,
 };
 
-struct drm_connector_funcs bochs_connector_connector_funcs = {
+static const struct drm_connector_funcs bochs_connector_connector_funcs = {
 	.dpms = drm_helper_connector_dpms,
 	.detect = bochs_connector_detect,
 	.fill_modes = drm_helper_probe_single_connector_modes,
@@ -283,7 +283,7 @@ int bochs_kms_init(struct bochs_device *bochs)
 	bochs->dev->mode_config.preferred_depth = 24;
 	bochs->dev->mode_config.prefer_shadow = 0;
 
-	bochs->dev->mode_config.funcs = (void *)&bochs_mode_funcs;
+	bochs->dev->mode_config.funcs = &bochs_mode_funcs;
 
 	bochs_crtc_init(bochs->dev);
 	bochs_encoder_init(bochs->dev);
diff --git a/drivers/gpu/drm/bridge/Kconfig b/drivers/gpu/drm/bridge/Kconfig
index 6dddd392aa42..27e2022de89d 100644
--- a/drivers/gpu/drm/bridge/Kconfig
+++ b/drivers/gpu/drm/bridge/Kconfig
@@ -22,7 +22,6 @@ config DRM_DW_HDMI_AHB_AUDIO
 	  Designware HDMI block.  This is used in conjunction with
 	  the i.MX6 HDMI driver.
 
-
 config DRM_NXP_PTN3460
 	tristate "NXP PTN3460 DP/LVDS bridge"
 	depends on OF
diff --git a/drivers/gpu/drm/bridge/Makefile b/drivers/gpu/drm/bridge/Makefile
index d4e28beec30e..f13c33d67c03 100644
--- a/drivers/gpu/drm/bridge/Makefile
+++ b/drivers/gpu/drm/bridge/Makefile
@@ -1,6 +1,6 @@
 ccflags-y := -Iinclude/drm
 
-obj-$(CONFIG_DRM_DW_HDMI) += dw_hdmi.o
-obj-$(CONFIG_DRM_DW_HDMI_AHB_AUDIO) += dw_hdmi-ahb-audio.o
+obj-$(CONFIG_DRM_DW_HDMI) += dw-hdmi.o
+obj-$(CONFIG_DRM_DW_HDMI_AHB_AUDIO) += dw-hdmi-ahb-audio.o
 obj-$(CONFIG_DRM_NXP_PTN3460) += nxp-ptn3460.o
 obj-$(CONFIG_DRM_PARADE_PS8622) += parade-ps8622.o
diff --git a/drivers/gpu/drm/bridge/dw_hdmi-ahb-audio.c b/drivers/gpu/drm/bridge/dw-hdmi-ahb-audio.c
index 59f630f1c61a..122bb015f4a9 100644
--- a/drivers/gpu/drm/bridge/dw_hdmi-ahb-audio.c
+++ b/drivers/gpu/drm/bridge/dw-hdmi-ahb-audio.c
@@ -21,7 +21,7 @@
 #include <sound/pcm_drm_eld.h>
 #include <sound/pcm_iec958.h>
 
-#include "dw_hdmi-audio.h"
+#include "dw-hdmi-audio.h"
 
 #define DRIVER_NAME "dw-hdmi-ahb-audio"
 
diff --git a/drivers/gpu/drm/bridge/dw_hdmi-audio.h b/drivers/gpu/drm/bridge/dw-hdmi-audio.h
index 91f631beecc7..91f631beecc7 100644
--- a/drivers/gpu/drm/bridge/dw_hdmi-audio.h
+++ b/drivers/gpu/drm/bridge/dw-hdmi-audio.h
diff --git a/drivers/gpu/drm/bridge/dw_hdmi.c b/drivers/gpu/drm/bridge/dw-hdmi.c
index 56de9f1c95fc..77cafa9aa41c 100644
--- a/drivers/gpu/drm/bridge/dw_hdmi.c
+++ b/drivers/gpu/drm/bridge/dw-hdmi.c
@@ -27,8 +27,8 @@
 #include <drm/drm_encoder_slave.h>
 #include <drm/bridge/dw_hdmi.h>
 
-#include "dw_hdmi.h"
-#include "dw_hdmi-audio.h"
+#include "dw-hdmi.h"
+#include "dw-hdmi-audio.h"
 
 #define HDMI_EDID_LEN		512
 
@@ -1514,7 +1514,7 @@ static void dw_hdmi_connector_force(struct drm_connector *connector)
 	mutex_unlock(&hdmi->mutex);
 }
 
-static struct drm_connector_funcs dw_hdmi_connector_funcs = {
+static const struct drm_connector_funcs dw_hdmi_connector_funcs = {
 	.dpms = drm_helper_connector_dpms,
 	.fill_modes = drm_helper_probe_single_connector_modes,
 	.detect = dw_hdmi_connector_detect,
@@ -1522,13 +1522,13 @@ static struct drm_connector_funcs dw_hdmi_connector_funcs = {
 	.force = dw_hdmi_connector_force,
 };
 
-static struct drm_connector_helper_funcs dw_hdmi_connector_helper_funcs = {
+static const struct drm_connector_helper_funcs dw_hdmi_connector_helper_funcs = {
 	.get_modes = dw_hdmi_connector_get_modes,
 	.mode_valid = dw_hdmi_connector_mode_valid,
 	.best_encoder = dw_hdmi_connector_best_encoder,
 };
 
-static struct drm_bridge_funcs dw_hdmi_bridge_funcs = {
+static const struct drm_bridge_funcs dw_hdmi_bridge_funcs = {
 	.enable = dw_hdmi_bridge_enable,
 	.disable = dw_hdmi_bridge_disable,
 	.pre_enable = dw_hdmi_bridge_nop,
diff --git a/drivers/gpu/drm/bridge/dw_hdmi.h b/drivers/gpu/drm/bridge/dw-hdmi.h
index fc9a560429d6..fc9a560429d6 100644
--- a/drivers/gpu/drm/bridge/dw_hdmi.h
+++ b/drivers/gpu/drm/bridge/dw-hdmi.h
diff --git a/drivers/gpu/drm/bridge/nxp-ptn3460.c b/drivers/gpu/drm/bridge/nxp-ptn3460.c
index 0ffa3a6a206a..7ecd59f70b8e 100644
--- a/drivers/gpu/drm/bridge/nxp-ptn3460.c
+++ b/drivers/gpu/drm/bridge/nxp-ptn3460.c
@@ -242,7 +242,7 @@ static struct drm_encoder *ptn3460_best_encoder(struct drm_connector *connector)
 	return ptn_bridge->bridge.encoder;
 }
 
-static struct drm_connector_helper_funcs ptn3460_connector_helper_funcs = {
+static const struct drm_connector_helper_funcs ptn3460_connector_helper_funcs = {
 	.get_modes = ptn3460_get_modes,
 	.best_encoder = ptn3460_best_encoder,
 };
@@ -258,7 +258,7 @@ static void ptn3460_connector_destroy(struct drm_connector *connector)
 	drm_connector_cleanup(connector);
 }
 
-static struct drm_connector_funcs ptn3460_connector_funcs = {
+static const struct drm_connector_funcs ptn3460_connector_funcs = {
 	.dpms = drm_atomic_helper_connector_dpms,
 	.fill_modes = drm_helper_probe_single_connector_modes,
 	.detect = ptn3460_detect,
@@ -299,7 +299,7 @@ static int ptn3460_bridge_attach(struct drm_bridge *bridge)
 	return ret;
 }
 
-static struct drm_bridge_funcs ptn3460_bridge_funcs = {
+static const struct drm_bridge_funcs ptn3460_bridge_funcs = {
 	.pre_enable = ptn3460_pre_enable,
 	.enable = ptn3460_enable,
 	.disable = ptn3460_disable,
diff --git a/drivers/gpu/drm/cirrus/cirrus_mode.c b/drivers/gpu/drm/cirrus/cirrus_mode.c
index 61385f2298bf..4a02854a6963 100644
--- a/drivers/gpu/drm/cirrus/cirrus_mode.c
+++ b/drivers/gpu/drm/cirrus/cirrus_mode.c
@@ -489,7 +489,7 @@ static struct drm_encoder *cirrus_encoder_init(struct drm_device *dev)
 	encoder->possible_crtcs = 0x1;
 
 	drm_encoder_init(dev, encoder, &cirrus_encoder_encoder_funcs,
-			 DRM_MODE_ENCODER_DAC);
+			 DRM_MODE_ENCODER_DAC, NULL);
 	drm_encoder_helper_add(encoder, &cirrus_encoder_helper_funcs);
 
 	return encoder;
@@ -533,12 +533,12 @@ static void cirrus_connector_destroy(struct drm_connector *connector)
 	kfree(connector);
 }
 
-struct drm_connector_helper_funcs cirrus_vga_connector_helper_funcs = {
+static const struct drm_connector_helper_funcs cirrus_vga_connector_helper_funcs = {
 	.get_modes = cirrus_vga_get_modes,
 	.best_encoder = cirrus_connector_best_encoder,
 };
 
-struct drm_connector_funcs cirrus_vga_connector_funcs = {
+static const struct drm_connector_funcs cirrus_vga_connector_funcs = {
 	.dpms = drm_helper_connector_dpms,
 	.detect = cirrus_vga_detect,
 	.fill_modes = drm_helper_probe_single_connector_modes,
diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c
index 55b4debad79b..6a21e5c378c1 100644
--- a/drivers/gpu/drm/drm_atomic.c
+++ b/drivers/gpu/drm/drm_atomic.c
@@ -288,8 +288,8 @@ drm_atomic_get_crtc_state(struct drm_atomic_state *state,
 	state->crtcs[index] = crtc;
 	crtc_state->state = state;
 
-	DRM_DEBUG_ATOMIC("Added [CRTC:%d] %p state to %p\n",
-			 crtc->base.id, crtc_state, state);
+	DRM_DEBUG_ATOMIC("Added [CRTC:%d:%s] %p state to %p\n",
+			 crtc->base.id, crtc->name, crtc_state, state);
 
 	return crtc_state;
 }
@@ -429,11 +429,20 @@ int drm_atomic_crtc_set_property(struct drm_crtc *crtc,
 }
 EXPORT_SYMBOL(drm_atomic_crtc_set_property);
 
-/*
+/**
+ * drm_atomic_crtc_get_property - get property value from CRTC state
+ * @crtc: the drm CRTC to set a property on
+ * @state: the state object to get the property value from
+ * @property: the property to set
+ * @val: return location for the property value
+ *
  * This function handles generic/core properties and calls out to
  * driver's ->atomic_get_property() for driver properties.  To ensure
  * consistent behavior you must call this function rather than the
  * driver hook directly.
+ *
+ * RETURNS:
+ * Zero on success, error code on failure
  */
 static int
 drm_atomic_crtc_get_property(struct drm_crtc *crtc,
@@ -477,8 +486,8 @@ static int drm_atomic_crtc_check(struct drm_crtc *crtc,
 	 */
 
 	if (state->active && !state->enable) {
-		DRM_DEBUG_ATOMIC("[CRTC:%d] active without enabled\n",
-				 crtc->base.id);
+		DRM_DEBUG_ATOMIC("[CRTC:%d:%s] active without enabled\n",
+				 crtc->base.id, crtc->name);
 		return -EINVAL;
 	}
 
@@ -487,15 +496,15 @@ static int drm_atomic_crtc_check(struct drm_crtc *crtc,
 	 * be able to trigger. */
 	if (drm_core_check_feature(crtc->dev, DRIVER_ATOMIC) &&
 	    WARN_ON(state->enable && !state->mode_blob)) {
-		DRM_DEBUG_ATOMIC("[CRTC:%d] enabled without mode blob\n",
-				 crtc->base.id);
+		DRM_DEBUG_ATOMIC("[CRTC:%d:%s] enabled without mode blob\n",
+				 crtc->base.id, crtc->name);
 		return -EINVAL;
 	}
 
 	if (drm_core_check_feature(crtc->dev, DRIVER_ATOMIC) &&
 	    WARN_ON(!state->enable && state->mode_blob)) {
-		DRM_DEBUG_ATOMIC("[CRTC:%d] disabled with mode blob\n",
-				 crtc->base.id);
+		DRM_DEBUG_ATOMIC("[CRTC:%d:%s] disabled with mode blob\n",
+				 crtc->base.id, crtc->name);
 		return -EINVAL;
 	}
 
@@ -540,8 +549,8 @@ drm_atomic_get_plane_state(struct drm_atomic_state *state,
 	state->planes[index] = plane;
 	plane_state->state = state;
 
-	DRM_DEBUG_ATOMIC("Added [PLANE:%d] %p state to %p\n",
-			 plane->base.id, plane_state, state);
+	DRM_DEBUG_ATOMIC("Added [PLANE:%d:%s] %p state to %p\n",
+			 plane->base.id, plane->name, plane_state, state);
 
 	if (plane_state->crtc) {
 		struct drm_crtc_state *crtc_state;
@@ -616,11 +625,20 @@ int drm_atomic_plane_set_property(struct drm_plane *plane,
 }
 EXPORT_SYMBOL(drm_atomic_plane_set_property);
 
-/*
+/**
+ * drm_atomic_plane_get_property - get property value from plane state
+ * @plane: the drm plane to set a property on
+ * @state: the state object to get the property value from
+ * @property: the property to set
+ * @val: return location for the property value
+ *
  * This function handles generic/core properties and calls out to
  * driver's ->atomic_get_property() for driver properties.  To ensure
  * consistent behavior you must call this function rather than the
  * driver hook directly.
+ *
+ * RETURNS:
+ * Zero on success, error code on failure
  */
 static int
 drm_atomic_plane_get_property(struct drm_plane *plane,
@@ -752,8 +770,8 @@ static int drm_atomic_plane_check(struct drm_plane *plane,
 	}
 
 	if (plane_switching_crtc(state->state, plane, state)) {
-		DRM_DEBUG_ATOMIC("[PLANE:%d] switching CRTC directly\n",
-				 plane->base.id);
+		DRM_DEBUG_ATOMIC("[PLANE:%d:%s] switching CRTC directly\n",
+				 plane->base.id, plane->name);
 		return -EINVAL;
 	}
 
@@ -872,11 +890,20 @@ int drm_atomic_connector_set_property(struct drm_connector *connector,
 }
 EXPORT_SYMBOL(drm_atomic_connector_set_property);
 
-/*
+/**
+ * drm_atomic_connector_get_property - get property value from connector state
+ * @connector: the drm connector to set a property on
+ * @state: the state object to get the property value from
+ * @property: the property to set
+ * @val: return location for the property value
+ *
  * This function handles generic/core properties and calls out to
  * driver's ->atomic_get_property() for driver properties.  To ensure
  * consistent behavior you must call this function rather than the
  * driver hook directly.
+ *
+ * RETURNS:
+ * Zero on success, error code on failure
  */
 static int
 drm_atomic_connector_get_property(struct drm_connector *connector,
@@ -977,8 +1004,8 @@ drm_atomic_set_crtc_for_plane(struct drm_plane_state *plane_state,
 	}
 
 	if (crtc)
-		DRM_DEBUG_ATOMIC("Link plane state %p to [CRTC:%d]\n",
-				 plane_state, crtc->base.id);
+		DRM_DEBUG_ATOMIC("Link plane state %p to [CRTC:%d:%s]\n",
+				 plane_state, crtc->base.id, crtc->name);
 	else
 		DRM_DEBUG_ATOMIC("Link plane state %p to [NOCRTC]\n",
 				 plane_state);
@@ -1045,8 +1072,8 @@ drm_atomic_set_crtc_for_connector(struct drm_connector_state *conn_state,
 	conn_state->crtc = crtc;
 
 	if (crtc)
-		DRM_DEBUG_ATOMIC("Link connector state %p to [CRTC:%d]\n",
-				 conn_state, crtc->base.id);
+		DRM_DEBUG_ATOMIC("Link connector state %p to [CRTC:%d:%s]\n",
+				 conn_state, crtc->base.id, crtc->name);
 	else
 		DRM_DEBUG_ATOMIC("Link connector state %p to [NOCRTC]\n",
 				 conn_state);
@@ -1085,8 +1112,8 @@ drm_atomic_add_affected_connectors(struct drm_atomic_state *state,
 	if (ret)
 		return ret;
 
-	DRM_DEBUG_ATOMIC("Adding all current connectors for [CRTC:%d] to %p\n",
-			 crtc->base.id, state);
+	DRM_DEBUG_ATOMIC("Adding all current connectors for [CRTC:%d:%s] to %p\n",
+			 crtc->base.id, crtc->name, state);
 
 	/*
 	 * Changed connectors are already in @state, so only need to look at the
@@ -1166,8 +1193,9 @@ drm_atomic_connectors_for_crtc(struct drm_atomic_state *state,
 			num_connected_connectors++;
 	}
 
-	DRM_DEBUG_ATOMIC("State %p has %i connectors for [CRTC:%d]\n",
-			 state, num_connected_connectors, crtc->base.id);
+	DRM_DEBUG_ATOMIC("State %p has %i connectors for [CRTC:%d:%s]\n",
+			 state, num_connected_connectors,
+			 crtc->base.id, crtc->name);
 
 	return num_connected_connectors;
 }
@@ -1188,12 +1216,7 @@ void drm_atomic_legacy_backoff(struct drm_atomic_state *state)
 retry:
 	drm_modeset_backoff(state->acquire_ctx);
 
-	ret = drm_modeset_lock(&state->dev->mode_config.connection_mutex,
-			       state->acquire_ctx);
-	if (ret)
-		goto retry;
-	ret = drm_modeset_lock_all_crtcs(state->dev,
-					 state->acquire_ctx);
+	ret = drm_modeset_lock_all_ctx(state->dev, state->acquire_ctx);
 	if (ret)
 		goto retry;
 }
@@ -1225,8 +1248,8 @@ int drm_atomic_check_only(struct drm_atomic_state *state)
 	for_each_plane_in_state(state, plane, plane_state, i) {
 		ret = drm_atomic_plane_check(plane, plane_state);
 		if (ret) {
-			DRM_DEBUG_ATOMIC("[PLANE:%d] atomic core check failed\n",
-					 plane->base.id);
+			DRM_DEBUG_ATOMIC("[PLANE:%d:%s] atomic core check failed\n",
+					 plane->base.id, plane->name);
 			return ret;
 		}
 	}
@@ -1234,8 +1257,8 @@ int drm_atomic_check_only(struct drm_atomic_state *state)
 	for_each_crtc_in_state(state, crtc, crtc_state, i) {
 		ret = drm_atomic_crtc_check(crtc, crtc_state);
 		if (ret) {
-			DRM_DEBUG_ATOMIC("[CRTC:%d] atomic core check failed\n",
-					 crtc->base.id);
+			DRM_DEBUG_ATOMIC("[CRTC:%d:%s] atomic core check failed\n",
+					 crtc->base.id, crtc->name);
 			return ret;
 		}
 	}
@@ -1246,8 +1269,8 @@ int drm_atomic_check_only(struct drm_atomic_state *state)
 	if (!state->allow_modeset) {
 		for_each_crtc_in_state(state, crtc, crtc_state, i) {
 			if (drm_atomic_crtc_needs_modeset(crtc_state)) {
-				DRM_DEBUG_ATOMIC("[CRTC:%d] requires full modeset\n",
-						 crtc->base.id);
+				DRM_DEBUG_ATOMIC("[CRTC:%d:%s] requires full modeset\n",
+						 crtc->base.id, crtc->name);
 				return -EINVAL;
 			}
 		}
diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c
index 3731a26979bc..268d37f26960 100644
--- a/drivers/gpu/drm/drm_atomic_helper.c
+++ b/drivers/gpu/drm/drm_atomic_helper.c
@@ -52,6 +52,12 @@
  * drm_atomic_helper_disable_plane(), drm_atomic_helper_disable_plane() and the
  * various functions to implement set_property callbacks. New drivers must not
  * implement these functions themselves but must use the provided helpers.
+ *
+ * The atomic helper uses the same function table structures as all other
+ * modesetting helpers. See the documentation for struct &drm_crtc_helper_funcs,
+ * struct &drm_encoder_helper_funcs and struct &drm_connector_helper_funcs. It
+ * also shares the struct &drm_plane_helper_funcs function table with the plane
+ * helpers.
  */
 static void
 drm_atomic_helper_plane_changed(struct drm_atomic_state *state,
@@ -80,6 +86,26 @@ drm_atomic_helper_plane_changed(struct drm_atomic_state *state,
 	}
 }
 
+static bool
+check_pending_encoder_assignment(struct drm_atomic_state *state,
+				 struct drm_encoder *new_encoder)
+{
+	struct drm_connector *connector;
+	struct drm_connector_state *conn_state;
+	int i;
+
+	for_each_connector_in_state(state, connector, conn_state, i) {
+		if (conn_state->best_encoder != new_encoder)
+			continue;
+
+		/* encoder already assigned and we're trying to re-steal it! */
+		if (connector->state->best_encoder != conn_state->best_encoder)
+			return false;
+	}
+
+	return true;
+}
+
 static struct drm_crtc *
 get_current_crtc_for_encoder(struct drm_device *dev,
 			     struct drm_encoder *encoder)
@@ -116,9 +142,9 @@ steal_encoder(struct drm_atomic_state *state,
 	 */
 	WARN_ON(!drm_modeset_is_locked(&config->connection_mutex));
 
-	DRM_DEBUG_ATOMIC("[ENCODER:%d:%s] in use on [CRTC:%d], stealing it\n",
+	DRM_DEBUG_ATOMIC("[ENCODER:%d:%s] in use on [CRTC:%d:%s], stealing it\n",
 			 encoder->base.id, encoder->name,
-			 encoder_crtc->base.id);
+			 encoder_crtc->base.id, encoder_crtc->name);
 
 	crtc_state = drm_atomic_get_crtc_state(state, encoder_crtc);
 	if (IS_ERR(crtc_state))
@@ -219,16 +245,24 @@ update_connector_routing(struct drm_atomic_state *state, int conn_idx)
 	}
 
 	if (new_encoder == connector_state->best_encoder) {
-		DRM_DEBUG_ATOMIC("[CONNECTOR:%d:%s] keeps [ENCODER:%d:%s], now on [CRTC:%d]\n",
+		DRM_DEBUG_ATOMIC("[CONNECTOR:%d:%s] keeps [ENCODER:%d:%s], now on [CRTC:%d:%s]\n",
 				 connector->base.id,
 				 connector->name,
 				 new_encoder->base.id,
 				 new_encoder->name,
-				 connector_state->crtc->base.id);
+				 connector_state->crtc->base.id,
+				 connector_state->crtc->name);
 
 		return 0;
 	}
 
+	if (!check_pending_encoder_assignment(state, new_encoder)) {
+		DRM_DEBUG_ATOMIC("Encoder for [CONNECTOR:%d:%s] already assigned\n",
+				 connector->base.id,
+				 connector->name);
+		return -EINVAL;
+	}
+
 	encoder_crtc = get_current_crtc_for_encoder(state->dev,
 						    new_encoder);
 
@@ -251,12 +285,13 @@ update_connector_routing(struct drm_atomic_state *state, int conn_idx)
 	crtc_state = state->crtc_states[idx];
 	crtc_state->connectors_changed = true;
 
-	DRM_DEBUG_ATOMIC("[CONNECTOR:%d:%s] using [ENCODER:%d:%s] on [CRTC:%d]\n",
+	DRM_DEBUG_ATOMIC("[CONNECTOR:%d:%s] using [ENCODER:%d:%s] on [CRTC:%d:%s]\n",
 			 connector->base.id,
 			 connector->name,
 			 new_encoder->base.id,
 			 new_encoder->name,
-			 connector_state->crtc->base.id);
+			 connector_state->crtc->base.id,
+			 connector_state->crtc->name);
 
 	return 0;
 }
@@ -340,8 +375,8 @@ mode_fixup(struct drm_atomic_state *state)
 		ret = funcs->mode_fixup(crtc, &crtc_state->mode,
 					&crtc_state->adjusted_mode);
 		if (!ret) {
-			DRM_DEBUG_ATOMIC("[CRTC:%d] fixup failed\n",
-					 crtc->base.id);
+			DRM_DEBUG_ATOMIC("[CRTC:%d:%s] fixup failed\n",
+					 crtc->base.id, crtc->name);
 			return -EINVAL;
 		}
 	}
@@ -388,14 +423,14 @@ drm_atomic_helper_check_modeset(struct drm_device *dev,
 
 	for_each_crtc_in_state(state, crtc, crtc_state, i) {
 		if (!drm_mode_equal(&crtc->state->mode, &crtc_state->mode)) {
-			DRM_DEBUG_ATOMIC("[CRTC:%d] mode changed\n",
-					 crtc->base.id);
+			DRM_DEBUG_ATOMIC("[CRTC:%d:%s] mode changed\n",
+					 crtc->base.id, crtc->name);
 			crtc_state->mode_changed = true;
 		}
 
 		if (crtc->state->enable != crtc_state->enable) {
-			DRM_DEBUG_ATOMIC("[CRTC:%d] enable changed\n",
-					 crtc->base.id);
+			DRM_DEBUG_ATOMIC("[CRTC:%d:%s] enable changed\n",
+					 crtc->base.id, crtc->name);
 
 			/*
 			 * For clarity this assignment is done here, but
@@ -436,18 +471,18 @@ drm_atomic_helper_check_modeset(struct drm_device *dev,
 		 * a full modeset because update_connector_routing force that.
 		 */
 		if (crtc->state->active != crtc_state->active) {
-			DRM_DEBUG_ATOMIC("[CRTC:%d] active changed\n",
-					 crtc->base.id);
+			DRM_DEBUG_ATOMIC("[CRTC:%d:%s] active changed\n",
+					 crtc->base.id, crtc->name);
 			crtc_state->active_changed = true;
 		}
 
 		if (!drm_atomic_crtc_needs_modeset(crtc_state))
 			continue;
 
-		DRM_DEBUG_ATOMIC("[CRTC:%d] needs all connectors, enable: %c, active: %c\n",
-				 crtc->base.id,
+		DRM_DEBUG_ATOMIC("[CRTC:%d:%s] needs all connectors, enable: %c, active: %c\n",
+				 crtc->base.id, crtc->name,
 				 crtc_state->enable ? 'y' : 'n',
-			      crtc_state->active ? 'y' : 'n');
+				 crtc_state->active ? 'y' : 'n');
 
 		ret = drm_atomic_add_affected_connectors(state, crtc);
 		if (ret != 0)
@@ -461,8 +496,8 @@ drm_atomic_helper_check_modeset(struct drm_device *dev,
 								crtc);
 
 		if (crtc_state->enable != !!num_connectors) {
-			DRM_DEBUG_ATOMIC("[CRTC:%d] enabled/connectors mismatch\n",
-					 crtc->base.id);
+			DRM_DEBUG_ATOMIC("[CRTC:%d:%s] enabled/connectors mismatch\n",
+					 crtc->base.id, crtc->name);
 
 			return -EINVAL;
 		}
@@ -509,8 +544,8 @@ drm_atomic_helper_check_planes(struct drm_device *dev,
 
 		ret = funcs->atomic_check(plane, plane_state);
 		if (ret) {
-			DRM_DEBUG_ATOMIC("[PLANE:%d] atomic driver check failed\n",
-					 plane->base.id);
+			DRM_DEBUG_ATOMIC("[PLANE:%d:%s] atomic driver check failed\n",
+					 plane->base.id, plane->name);
 			return ret;
 		}
 	}
@@ -525,8 +560,8 @@ drm_atomic_helper_check_planes(struct drm_device *dev,
 
 		ret = funcs->atomic_check(crtc, state->crtc_states[i]);
 		if (ret) {
-			DRM_DEBUG_ATOMIC("[CRTC:%d] atomic driver check failed\n",
-					 crtc->base.id);
+			DRM_DEBUG_ATOMIC("[CRTC:%d:%s] atomic driver check failed\n",
+					 crtc->base.id, crtc->name);
 			return ret;
 		}
 	}
@@ -639,8 +674,8 @@ disable_outputs(struct drm_device *dev, struct drm_atomic_state *old_state)
 
 		funcs = crtc->helper_private;
 
-		DRM_DEBUG_ATOMIC("disabling [CRTC:%d]\n",
-				 crtc->base.id);
+		DRM_DEBUG_ATOMIC("disabling [CRTC:%d:%s]\n",
+				 crtc->base.id, crtc->name);
 
 
 		/* Right function depends upon target state. */
@@ -751,8 +786,8 @@ crtc_set_mode(struct drm_device *dev, struct drm_atomic_state *old_state)
 		funcs = crtc->helper_private;
 
 		if (crtc->state->enable && funcs->mode_set_nofb) {
-			DRM_DEBUG_ATOMIC("modeset on [CRTC:%d]\n",
-					 crtc->base.id);
+			DRM_DEBUG_ATOMIC("modeset on [CRTC:%d:%s]\n",
+					 crtc->base.id, crtc->name);
 
 			funcs->mode_set_nofb(crtc);
 		}
@@ -851,8 +886,8 @@ void drm_atomic_helper_commit_modeset_enables(struct drm_device *dev,
 		funcs = crtc->helper_private;
 
 		if (crtc->state->enable) {
-			DRM_DEBUG_ATOMIC("enabling [CRTC:%d]\n",
-					 crtc->base.id);
+			DRM_DEBUG_ATOMIC("enabling [CRTC:%d:%s]\n",
+					 crtc->base.id, crtc->name);
 
 			if (funcs->enable)
 				funcs->enable(crtc);
@@ -1342,6 +1377,49 @@ drm_atomic_helper_commit_planes_on_crtc(struct drm_crtc_state *old_crtc_state)
 EXPORT_SYMBOL(drm_atomic_helper_commit_planes_on_crtc);
 
 /**
+ * drm_atomic_helper_disable_planes_on_crtc - helper to disable CRTC's planes
+ * @crtc: CRTC
+ * @atomic: if set, synchronize with CRTC's atomic_begin/flush hooks
+ *
+ * Disables all planes associated with the given CRTC. This can be
+ * used for instance in the CRTC helper disable callback to disable
+ * all planes before shutting down the display pipeline.
+ *
+ * If the atomic-parameter is set the function calls the CRTC's
+ * atomic_begin hook before and atomic_flush hook after disabling the
+ * planes.
+ *
+ * It is a bug to call this function without having implemented the
+ * ->atomic_disable() plane hook.
+ */
+void drm_atomic_helper_disable_planes_on_crtc(struct drm_crtc *crtc,
+					      bool atomic)
+{
+	const struct drm_crtc_helper_funcs *crtc_funcs =
+		crtc->helper_private;
+	struct drm_plane *plane;
+
+	if (atomic && crtc_funcs && crtc_funcs->atomic_begin)
+		crtc_funcs->atomic_begin(crtc, NULL);
+
+	drm_for_each_plane(plane, crtc->dev) {
+		const struct drm_plane_helper_funcs *plane_funcs =
+			plane->helper_private;
+
+		if (plane->state->crtc != crtc || !plane_funcs)
+			continue;
+
+		WARN_ON(!plane_funcs->atomic_disable);
+		if (plane_funcs->atomic_disable)
+			plane_funcs->atomic_disable(plane, NULL);
+	}
+
+	if (atomic && crtc_funcs && crtc_funcs->atomic_flush)
+		crtc_funcs->atomic_flush(crtc, NULL);
+}
+EXPORT_SYMBOL(drm_atomic_helper_disable_planes_on_crtc);
+
+/**
  * drm_atomic_helper_cleanup_planes - cleanup plane resources after commit
  * @dev: DRM device
  * @old_state: atomic state object with old state structures
@@ -1818,6 +1896,161 @@ commit:
 }
 
 /**
+ * drm_atomic_helper_disable_all - disable all currently active outputs
+ * @dev: DRM device
+ * @ctx: lock acquisition context
+ *
+ * Loops through all connectors, finding those that aren't turned off and then
+ * turns them off by setting their DPMS mode to OFF and deactivating the CRTC
+ * that they are connected to.
+ *
+ * This is used for example in suspend/resume to disable all currently active
+ * functions when suspending.
+ *
+ * Note that if callers haven't already acquired all modeset locks this might
+ * return -EDEADLK, which must be handled by calling drm_modeset_backoff().
+ *
+ * Returns:
+ * 0 on success or a negative error code on failure.
+ *
+ * See also:
+ * drm_atomic_helper_suspend(), drm_atomic_helper_resume()
+ */
+int drm_atomic_helper_disable_all(struct drm_device *dev,
+				  struct drm_modeset_acquire_ctx *ctx)
+{
+	struct drm_atomic_state *state;
+	struct drm_connector *conn;
+	int err;
+
+	state = drm_atomic_state_alloc(dev);
+	if (!state)
+		return -ENOMEM;
+
+	state->acquire_ctx = ctx;
+
+	drm_for_each_connector(conn, dev) {
+		struct drm_crtc *crtc = conn->state->crtc;
+		struct drm_crtc_state *crtc_state;
+
+		if (!crtc || conn->dpms != DRM_MODE_DPMS_ON)
+			continue;
+
+		crtc_state = drm_atomic_get_crtc_state(state, crtc);
+		if (IS_ERR(crtc_state)) {
+			err = PTR_ERR(crtc_state);
+			goto free;
+		}
+
+		crtc_state->active = false;
+	}
+
+	err = drm_atomic_commit(state);
+
+free:
+	if (err < 0)
+		drm_atomic_state_free(state);
+
+	return err;
+}
+EXPORT_SYMBOL(drm_atomic_helper_disable_all);
+
+/**
+ * drm_atomic_helper_suspend - subsystem-level suspend helper
+ * @dev: DRM device
+ *
+ * Duplicates the current atomic state, disables all active outputs and then
+ * returns a pointer to the original atomic state to the caller. Drivers can
+ * pass this pointer to the drm_atomic_helper_resume() helper upon resume to
+ * restore the output configuration that was active at the time the system
+ * entered suspend.
+ *
+ * Note that it is potentially unsafe to use this. The atomic state object
+ * returned by this function is assumed to be persistent. Drivers must ensure
+ * that this holds true. Before calling this function, drivers must make sure
+ * to suspend fbdev emulation so that nothing can be using the device.
+ *
+ * Returns:
+ * A pointer to a copy of the state before suspend on success or an ERR_PTR()-
+ * encoded error code on failure. Drivers should store the returned atomic
+ * state object and pass it to the drm_atomic_helper_resume() helper upon
+ * resume.
+ *
+ * See also:
+ * drm_atomic_helper_duplicate_state(), drm_atomic_helper_disable_all(),
+ * drm_atomic_helper_resume()
+ */
+struct drm_atomic_state *drm_atomic_helper_suspend(struct drm_device *dev)
+{
+	struct drm_modeset_acquire_ctx ctx;
+	struct drm_atomic_state *state;
+	int err;
+
+	drm_modeset_acquire_init(&ctx, 0);
+
+retry:
+	err = drm_modeset_lock_all_ctx(dev, &ctx);
+	if (err < 0) {
+		state = ERR_PTR(err);
+		goto unlock;
+	}
+
+	state = drm_atomic_helper_duplicate_state(dev, &ctx);
+	if (IS_ERR(state))
+		goto unlock;
+
+	err = drm_atomic_helper_disable_all(dev, &ctx);
+	if (err < 0) {
+		drm_atomic_state_free(state);
+		state = ERR_PTR(err);
+		goto unlock;
+	}
+
+unlock:
+	if (PTR_ERR(state) == -EDEADLK) {
+		drm_modeset_backoff(&ctx);
+		goto retry;
+	}
+
+	drm_modeset_drop_locks(&ctx);
+	drm_modeset_acquire_fini(&ctx);
+	return state;
+}
+EXPORT_SYMBOL(drm_atomic_helper_suspend);
+
+/**
+ * drm_atomic_helper_resume - subsystem-level resume helper
+ * @dev: DRM device
+ * @state: atomic state to resume to
+ *
+ * Calls drm_mode_config_reset() to synchronize hardware and software states,
+ * grabs all modeset locks and commits the atomic state object. This can be
+ * used in conjunction with the drm_atomic_helper_suspend() helper to
+ * implement suspend/resume for drivers that support atomic mode-setting.
+ *
+ * Returns:
+ * 0 on success or a negative error code on failure.
+ *
+ * See also:
+ * drm_atomic_helper_suspend()
+ */
+int drm_atomic_helper_resume(struct drm_device *dev,
+			     struct drm_atomic_state *state)
+{
+	struct drm_mode_config *config = &dev->mode_config;
+	int err;
+
+	drm_mode_config_reset(dev);
+	drm_modeset_lock_all(dev);
+	state->acquire_ctx = config->acquire_ctx;
+	err = drm_atomic_commit(state);
+	drm_modeset_unlock_all(dev);
+
+	return err;
+}
+EXPORT_SYMBOL(drm_atomic_helper_resume);
+
+/**
  * drm_atomic_helper_crtc_set_property - helper for crtc properties
  * @crtc: DRM crtc
  * @property: DRM property
@@ -2173,6 +2406,12 @@ EXPORT_SYMBOL(drm_atomic_helper_connector_dpms);
  * The simpler solution is to just reset the software state to everything off,
  * which is easiest to do by calling drm_mode_config_reset(). To facilitate this
  * the atomic helpers provide default reset implementations for all hooks.
+ *
+ * On the upside the precise state tracking of atomic simplifies system suspend
+ * and resume a lot. For drivers using drm_mode_config_reset() a complete recipe
+ * is implemented in drm_atomic_helper_suspend() and drm_atomic_helper_resume().
+ * For other drivers the building blocks are split out, see the documentation
+ * for these functions.
  */
 
 /**
@@ -2429,7 +2668,9 @@ EXPORT_SYMBOL(drm_atomic_helper_connector_duplicate_state);
  * @ctx: lock acquisition context
  *
  * Makes a copy of the current atomic state by looping over all objects and
- * duplicating their respective states.
+ * duplicating their respective states. This is used for example by suspend/
+ * resume support code to save the state prior to suspend such that it can
+ * be restored upon resume.
  *
  * Note that this treats atomic state as persistent between save and restore.
  * Drivers must make sure that this is possible and won't result in confusion
@@ -2441,6 +2682,9 @@ EXPORT_SYMBOL(drm_atomic_helper_connector_duplicate_state);
  * Returns:
  * A pointer to the copy of the atomic state object on success or an
  * ERR_PTR()-encoded error code on failure.
+ *
+ * See also:
+ * drm_atomic_helper_suspend(), drm_atomic_helper_resume()
  */
 struct drm_atomic_state *
 drm_atomic_helper_duplicate_state(struct drm_device *dev,
diff --git a/drivers/gpu/drm/drm_bridge.c b/drivers/gpu/drm/drm_bridge.c
index 6b8f7211e543..bd93453afa61 100644
--- a/drivers/gpu/drm/drm_bridge.c
+++ b/drivers/gpu/drm/drm_bridge.c
@@ -31,14 +31,14 @@
 /**
  * DOC: overview
  *
- * drm_bridge represents a device that hangs on to an encoder. These are handy
- * when a regular drm_encoder entity isn't enough to represent the entire
+ * struct &drm_bridge represents a device that hangs on to an encoder. These are
+ * handy when a regular &drm_encoder entity isn't enough to represent the entire
  * encoder chain.
  *
- * A bridge is always associated to a single drm_encoder at a time, but can be
+ * A bridge is always attached to a single &drm_encoder at a time, but can be
  * either connected to it directly, or through an intermediate bridge:
  *
- * encoder ---> bridge B ---> bridge A
+ *     encoder ---> bridge B ---> bridge A
  *
  * Here, the output of the encoder feeds to bridge B, and that furthers feeds to
  * bridge A.
@@ -46,11 +46,16 @@
  * The driver using the bridge is responsible to make the associations between
  * the encoder and bridges. Once these links are made, the bridges will
  * participate along with encoder functions to perform mode_set/enable/disable
- * through the ops provided in drm_bridge_funcs.
+ * through the ops provided in &drm_bridge_funcs.
  *
  * drm_bridge, like drm_panel, aren't drm_mode_object entities like planes,
- * crtcs, encoders or connectors. They just provide additional hooks to get the
- * desired output at the end of the encoder chain.
+ * CRTCs, encoders or connectors and hence are not visible to userspace. They
+ * just provide additional hooks to get the desired output at the end of the
+ * encoder chain.
+ *
+ * Bridges can also be chained up using the next pointer in struct &drm_bridge.
+ *
+ * Both legacy CRTC helpers and the new atomic modeset helpers support bridges.
  */
 
 static DEFINE_MUTEX(bridge_lock);
@@ -122,34 +127,12 @@ EXPORT_SYMBOL(drm_bridge_attach);
 /**
  * DOC: bridge callbacks
  *
- * The drm_bridge_funcs ops are populated by the bridge driver. The drm
- * internals(atomic and crtc helpers) use the helpers defined in drm_bridge.c
- * These helpers call a specific drm_bridge_funcs op for all the bridges
+ * The &drm_bridge_funcs ops are populated by the bridge driver. The DRM
+ * internals (atomic and CRTC helpers) use the helpers defined in drm_bridge.c
+ * These helpers call a specific &drm_bridge_funcs op for all the bridges
  * during encoder configuration.
  *
- * When creating a bridge driver, one can implement drm_bridge_funcs op with
- * the help of these rough rules:
- *
- * pre_enable: this contains things needed to be done for the bridge before
- * its clock and timings are enabled by its source. For a bridge, its source
- * is generally the encoder or bridge just before it in the encoder chain.
- *
- * enable: this contains things needed to be done for the bridge once its
- * source is enabled. In other words, enable is called once the source is
- * ready with clock and timing needed by the bridge.
- *
- * disable: this contains things needed to be done for the bridge assuming
- * that its source is still enabled, i.e. clock and timings are still on.
- *
- * post_disable: this contains things needed to be done for the bridge once
- * its source is disabled, i.e. once clocks and timings are off.
- *
- * mode_fixup: this should fixup the given mode for the bridge. It is called
- * after the encoder's mode fixup. mode_fixup can also reject a mode completely
- * if it's unsuitable for the hardware.
- *
- * mode_set: this sets up the mode for the bridge. It assumes that its source
- * (an encoder or a bridge) has set the mode too.
+ * For detailed specification of the bridge callbacks see &drm_bridge_funcs.
  */
 
 /**
@@ -159,7 +142,7 @@ EXPORT_SYMBOL(drm_bridge_attach);
  * @mode: desired mode to be set for the bridge
  * @adjusted_mode: updated mode that works for this bridge
  *
- * Calls 'mode_fixup' drm_bridge_funcs op for all the bridges in the
+ * Calls ->mode_fixup() &drm_bridge_funcs op for all the bridges in the
  * encoder chain, starting from the first bridge to the last.
  *
  * Note: the bridge passed should be the one closest to the encoder
@@ -186,11 +169,11 @@ bool drm_bridge_mode_fixup(struct drm_bridge *bridge,
 EXPORT_SYMBOL(drm_bridge_mode_fixup);
 
 /**
- * drm_bridge_disable - calls 'disable' drm_bridge_funcs op for all
+ * drm_bridge_disable - calls ->disable() &drm_bridge_funcs op for all
  *			bridges in the encoder chain.
  * @bridge: bridge control structure
  *
- * Calls 'disable' drm_bridge_funcs op for all the bridges in the encoder
+ * Calls ->disable() &drm_bridge_funcs op for all the bridges in the encoder
  * chain, starting from the last bridge to the first. These are called before
  * calling the encoder's prepare op.
  *
@@ -208,11 +191,11 @@ void drm_bridge_disable(struct drm_bridge *bridge)
 EXPORT_SYMBOL(drm_bridge_disable);
 
 /**
- * drm_bridge_post_disable - calls 'post_disable' drm_bridge_funcs op for
+ * drm_bridge_post_disable - calls ->post_disable() &drm_bridge_funcs op for
  *			     all bridges in the encoder chain.
  * @bridge: bridge control structure
  *
- * Calls 'post_disable' drm_bridge_funcs op for all the bridges in the
+ * Calls ->post_disable() &drm_bridge_funcs op for all the bridges in the
  * encoder chain, starting from the first bridge to the last. These are called
  * after completing the encoder's prepare op.
  *
@@ -236,7 +219,7 @@ EXPORT_SYMBOL(drm_bridge_post_disable);
  * @mode: desired mode to be set for the bridge
  * @adjusted_mode: updated mode that works for this bridge
  *
- * Calls 'mode_set' drm_bridge_funcs op for all the bridges in the
+ * Calls ->mode_set() &drm_bridge_funcs op for all the bridges in the
  * encoder chain, starting from the first bridge to the last.
  *
  * Note: the bridge passed should be the one closest to the encoder
@@ -256,11 +239,11 @@ void drm_bridge_mode_set(struct drm_bridge *bridge,
 EXPORT_SYMBOL(drm_bridge_mode_set);
 
 /**
- * drm_bridge_pre_enable - calls 'pre_enable' drm_bridge_funcs op for all
+ * drm_bridge_pre_enable - calls ->pre_enable() &drm_bridge_funcs op for all
  *			   bridges in the encoder chain.
  * @bridge: bridge control structure
  *
- * Calls 'pre_enable' drm_bridge_funcs op for all the bridges in the encoder
+ * Calls ->pre_enable() &drm_bridge_funcs op for all the bridges in the encoder
  * chain, starting from the last bridge to the first. These are called
  * before calling the encoder's commit op.
  *
@@ -278,11 +261,11 @@ void drm_bridge_pre_enable(struct drm_bridge *bridge)
 EXPORT_SYMBOL(drm_bridge_pre_enable);
 
 /**
- * drm_bridge_enable - calls 'enable' drm_bridge_funcs op for all bridges
+ * drm_bridge_enable - calls ->enable() &drm_bridge_funcs op for all bridges
  *		       in the encoder chain.
  * @bridge: bridge control structure
  *
- * Calls 'enable' drm_bridge_funcs op for all the bridges in the encoder
+ * Calls ->enable() &drm_bridge_funcs op for all the bridges in the encoder
  * chain, starting from the first bridge to the last. These are called
  * after completing the encoder's commit op.
  *
diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c
index 32dd134700bd..62fa95fa5471 100644
--- a/drivers/gpu/drm/drm_crtc.c
+++ b/drivers/gpu/drm/drm_crtc.c
@@ -649,6 +649,18 @@ EXPORT_SYMBOL(drm_framebuffer_remove);
 
 DEFINE_WW_CLASS(crtc_ww_class);
 
+static unsigned int drm_num_crtcs(struct drm_device *dev)
+{
+	unsigned int num = 0;
+	struct drm_crtc *tmp;
+
+	drm_for_each_crtc(tmp, dev) {
+		num++;
+	}
+
+	return num;
+}
+
 /**
  * drm_crtc_init_with_planes - Initialise a new CRTC object with
  *    specified primary and cursor planes.
@@ -657,6 +669,7 @@ DEFINE_WW_CLASS(crtc_ww_class);
  * @primary: Primary plane for CRTC
  * @cursor: Cursor plane for CRTC
  * @funcs: callbacks for the new CRTC
+ * @name: printf style format string for the CRTC name, or NULL for default name
  *
  * Inits a new object created as base part of a driver crtc object.
  *
@@ -666,7 +679,8 @@ DEFINE_WW_CLASS(crtc_ww_class);
 int drm_crtc_init_with_planes(struct drm_device *dev, struct drm_crtc *crtc,
 			      struct drm_plane *primary,
 			      struct drm_plane *cursor,
-			      const struct drm_crtc_funcs *funcs)
+			      const struct drm_crtc_funcs *funcs,
+			      const char *name, ...)
 {
 	struct drm_mode_config *config = &dev->mode_config;
 	int ret;
@@ -682,6 +696,21 @@ int drm_crtc_init_with_planes(struct drm_device *dev, struct drm_crtc *crtc,
 	if (ret)
 		return ret;
 
+	if (name) {
+		va_list ap;
+
+		va_start(ap, name);
+		crtc->name = kvasprintf(GFP_KERNEL, name, ap);
+		va_end(ap);
+	} else {
+		crtc->name = kasprintf(GFP_KERNEL, "crtc-%d",
+				       drm_num_crtcs(dev));
+	}
+	if (!crtc->name) {
+		drm_mode_object_put(dev, &crtc->base);
+		return -ENOMEM;
+	}
+
 	crtc->base.properties = &crtc->properties;
 
 	list_add_tail(&crtc->head, &config->crtc_list);
@@ -728,6 +757,8 @@ void drm_crtc_cleanup(struct drm_crtc *crtc)
 	if (crtc->state && crtc->funcs->atomic_destroy_state)
 		crtc->funcs->atomic_destroy_state(crtc, crtc->state);
 
+	kfree(crtc->name);
+
 	memset(crtc, 0, sizeof(*crtc));
 }
 EXPORT_SYMBOL(drm_crtc_cleanup);
@@ -1075,6 +1106,7 @@ EXPORT_SYMBOL(drm_connector_unplug_all);
  * @encoder: the encoder to init
  * @funcs: callbacks for this encoder
  * @encoder_type: user visible type of the encoder
+ * @name: printf style format string for the encoder name, or NULL for default name
  *
  * Initialises a preallocated encoder. Encoder should be
  * subclassed as part of driver encoder objects.
@@ -1085,7 +1117,7 @@ EXPORT_SYMBOL(drm_connector_unplug_all);
 int drm_encoder_init(struct drm_device *dev,
 		      struct drm_encoder *encoder,
 		      const struct drm_encoder_funcs *funcs,
-		      int encoder_type)
+		      int encoder_type, const char *name, ...)
 {
 	int ret;
 
@@ -1098,9 +1130,17 @@ int drm_encoder_init(struct drm_device *dev,
 	encoder->dev = dev;
 	encoder->encoder_type = encoder_type;
 	encoder->funcs = funcs;
-	encoder->name = kasprintf(GFP_KERNEL, "%s-%d",
-				  drm_encoder_enum_list[encoder_type].name,
-				  encoder->base.id);
+	if (name) {
+		va_list ap;
+
+		va_start(ap, name);
+		encoder->name = kvasprintf(GFP_KERNEL, name, ap);
+		va_end(ap);
+	} else {
+		encoder->name = kasprintf(GFP_KERNEL, "%s-%d",
+					  drm_encoder_enum_list[encoder_type].name,
+					  encoder->base.id);
+	}
 	if (!encoder->name) {
 		ret = -ENOMEM;
 		goto out_put;
@@ -1141,6 +1181,18 @@ void drm_encoder_cleanup(struct drm_encoder *encoder)
 }
 EXPORT_SYMBOL(drm_encoder_cleanup);
 
+static unsigned int drm_num_planes(struct drm_device *dev)
+{
+	unsigned int num = 0;
+	struct drm_plane *tmp;
+
+	drm_for_each_plane(tmp, dev) {
+		num++;
+	}
+
+	return num;
+}
+
 /**
  * drm_universal_plane_init - Initialize a new universal plane object
  * @dev: DRM device
@@ -1150,6 +1202,7 @@ EXPORT_SYMBOL(drm_encoder_cleanup);
  * @formats: array of supported formats (%DRM_FORMAT_*)
  * @format_count: number of elements in @formats
  * @type: type of plane (overlay, primary, cursor)
+ * @name: printf style format string for the plane name, or NULL for default name
  *
  * Initializes a plane object of type @type.
  *
@@ -1160,7 +1213,8 @@ int drm_universal_plane_init(struct drm_device *dev, struct drm_plane *plane,
 			     unsigned long possible_crtcs,
 			     const struct drm_plane_funcs *funcs,
 			     const uint32_t *formats, unsigned int format_count,
-			     enum drm_plane_type type)
+			     enum drm_plane_type type,
+			     const char *name, ...)
 {
 	struct drm_mode_config *config = &dev->mode_config;
 	int ret;
@@ -1182,6 +1236,22 @@ int drm_universal_plane_init(struct drm_device *dev, struct drm_plane *plane,
 		return -ENOMEM;
 	}
 
+	if (name) {
+		va_list ap;
+
+		va_start(ap, name);
+		plane->name = kvasprintf(GFP_KERNEL, name, ap);
+		va_end(ap);
+	} else {
+		plane->name = kasprintf(GFP_KERNEL, "plane-%d",
+					drm_num_planes(dev));
+	}
+	if (!plane->name) {
+		kfree(plane->format_types);
+		drm_mode_object_put(dev, &plane->base);
+		return -ENOMEM;
+	}
+
 	memcpy(plane->format_types, formats, format_count * sizeof(uint32_t));
 	plane->format_count = format_count;
 	plane->possible_crtcs = possible_crtcs;
@@ -1240,7 +1310,7 @@ int drm_plane_init(struct drm_device *dev, struct drm_plane *plane,
 
 	type = is_primary ? DRM_PLANE_TYPE_PRIMARY : DRM_PLANE_TYPE_OVERLAY;
 	return drm_universal_plane_init(dev, plane, possible_crtcs, funcs,
-					formats, format_count, type);
+					formats, format_count, type, NULL);
 }
 EXPORT_SYMBOL(drm_plane_init);
 
@@ -1272,6 +1342,8 @@ void drm_plane_cleanup(struct drm_plane *plane)
 	if (plane->state && plane->funcs->atomic_destroy_state)
 		plane->funcs->atomic_destroy_state(plane, plane->state);
 
+	kfree(plane->name);
+
 	memset(plane, 0, sizeof(*plane));
 }
 EXPORT_SYMBOL(drm_plane_cleanup);
@@ -1801,7 +1873,8 @@ int drm_mode_getresources(struct drm_device *dev, void *data,
 		copied = 0;
 		crtc_id = (uint32_t __user *)(unsigned long)card_res->crtc_id_ptr;
 		drm_for_each_crtc(crtc, dev) {
-			DRM_DEBUG_KMS("[CRTC:%d]\n", crtc->base.id);
+			DRM_DEBUG_KMS("[CRTC:%d:%s]\n",
+				      crtc->base.id, crtc->name);
 			if (put_user(crtc->base.id, crtc_id + copied)) {
 				ret = -EFAULT;
 				goto out;
@@ -2646,7 +2719,7 @@ int drm_mode_setcrtc(struct drm_device *dev, void *data,
 		ret = -ENOENT;
 		goto out;
 	}
-	DRM_DEBUG_KMS("[CRTC:%d]\n", crtc->base.id);
+	DRM_DEBUG_KMS("[CRTC:%d:%s]\n", crtc->base.id, crtc->name);
 
 	if (crtc_req->mode_valid) {
 		/* If we have a mode we need a framebuffer. */
@@ -4785,9 +4858,7 @@ static int drm_mode_connector_set_obj_prop(struct drm_mode_object *obj,
 
 	/* Do DPMS ourselves */
 	if (property == connector->dev->mode_config.dpms_property) {
-		ret = 0;
-		if (connector->funcs->dpms)
-			ret = (*connector->funcs->dpms)(connector, (int)value);
+		ret = (*connector->funcs->dpms)(connector, (int)value);
 	} else if (connector->funcs->set_property)
 		ret = connector->funcs->set_property(connector, property, value);
 
diff --git a/drivers/gpu/drm/drm_crtc_helper.c b/drivers/gpu/drm/drm_crtc_helper.c
index 6b4cf25fed12..a02a7f9a6a9d 100644
--- a/drivers/gpu/drm/drm_crtc_helper.c
+++ b/drivers/gpu/drm/drm_crtc_helper.c
@@ -51,6 +51,11 @@
  * the same callbacks which drivers can use to e.g. restore the modeset
  * configuration on resume with drm_helper_resume_force_mode().
  *
+ * Note that this helper library doesn't track the current power state of CRTCs
+ * and encoders. It can call callbacks like ->dpms() even though the hardware is
+ * already in the desired state. This deficiency has been fixed in the atomic
+ * helpers.
+ *
  * The driver callbacks are mostly compatible with the atomic modeset helpers,
  * except for the handling of the primary plane: Atomic helpers require that the
  * primary plane is implemented as a real standalone plane and not directly tied
@@ -62,6 +67,11 @@
  * converting to the plane helpers). New drivers must not use these functions
  * but need to implement the atomic interface instead, potentially using the
  * atomic helpers for that.
+ *
+ * These legacy modeset helpers use the same function table structures as
+ * all other modesetting helpers. See the documentation for struct
+ * &drm_crtc_helper_funcs, struct &drm_encoder_helper_funcs and struct
+ * &drm_connector_helper_funcs.
  */
 MODULE_AUTHOR("David Airlie, Jesse Barnes");
 MODULE_DESCRIPTION("DRM KMS helper");
@@ -206,8 +216,8 @@ static void __drm_helper_disable_unused_functions(struct drm_device *dev)
  * @dev: DRM device
  *
  * This function walks through the entire mode setting configuration of @dev. It
- * will remove any crtc links of unused encoders and encoder links of
- * disconnected connectors. Then it will disable all unused encoders and crtcs
+ * will remove any CRTC links of unused encoders and encoder links of
+ * disconnected connectors. Then it will disable all unused encoders and CRTCs
  * either by calling their disable callback if available or by calling their
  * dpms callback with DRM_MODE_DPMS_OFF.
  */
@@ -329,7 +339,7 @@ bool drm_crtc_helper_set_mode(struct drm_crtc *crtc,
 		DRM_DEBUG_KMS("CRTC fixup failed\n");
 		goto done;
 	}
-	DRM_DEBUG_KMS("[CRTC:%d]\n", crtc->base.id);
+	DRM_DEBUG_KMS("[CRTC:%d:%s]\n", crtc->base.id, crtc->name);
 
 	crtc->hwmode = *adjusted_mode;
 
@@ -445,11 +455,36 @@ drm_crtc_helper_disable(struct drm_crtc *crtc)
  * drm_crtc_helper_set_config - set a new config from userspace
  * @set: mode set configuration
  *
- * Setup a new configuration, provided by the upper layers (either an ioctl call
- * from userspace or internally e.g. from the fbdev support code) in @set, and
- * enable it. This is the main helper functions for drivers that implement
- * kernel mode setting with the crtc helper functions and the assorted
- * ->prepare(), ->modeset() and ->commit() helper callbacks.
+ * The drm_crtc_helper_set_config() helper function implements the set_config
+ * callback of struct &drm_crtc_funcs for drivers using the legacy CRTC helpers.
+ *
+ * It first tries to locate the best encoder for each connector by calling the
+ * connector ->best_encoder() (struct &drm_connector_helper_funcs) helper
+ * operation.
+ *
+ * After locating the appropriate encoders, the helper function will call the
+ * mode_fixup encoder and CRTC helper operations to adjust the requested mode,
+ * or reject it completely in which case an error will be returned to the
+ * application. If the new configuration after mode adjustment is identical to
+ * the current configuration the helper function will return without performing
+ * any other operation.
+ *
+ * If the adjusted mode is identical to the current mode but changes to the
+ * frame buffer need to be applied, the drm_crtc_helper_set_config() function
+ * will call the CRTC ->mode_set_base() (struct &drm_crtc_helper_funcs) helper
+ * operation.
+ *
+ * If the adjusted mode differs from the current mode, or if the
+ * ->mode_set_base() helper operation is not provided, the helper function
+ * performs a full mode set sequence by calling the ->prepare(), ->mode_set()
+ * and ->commit() CRTC and encoder helper operations, in that order.
+ * Alternatively it can also use the dpms and disable helper operations. For
+ * details see struct &drm_crtc_helper_funcs and struct
+ * &drm_encoder_helper_funcs.
+ *
+ * This function is deprecated.  New drivers must implement atomic modeset
+ * support, for which this function is unsuitable. Instead drivers should use
+ * drm_atomic_helper_set_config().
  *
  * Returns:
  * Returns 0 on success, negative errno numbers on failure.
@@ -484,11 +519,13 @@ int drm_crtc_helper_set_config(struct drm_mode_set *set)
 		set->fb = NULL;
 
 	if (set->fb) {
-		DRM_DEBUG_KMS("[CRTC:%d] [FB:%d] #connectors=%d (x y) (%i %i)\n",
-				set->crtc->base.id, set->fb->base.id,
-				(int)set->num_connectors, set->x, set->y);
+		DRM_DEBUG_KMS("[CRTC:%d:%s] [FB:%d] #connectors=%d (x y) (%i %i)\n",
+			      set->crtc->base.id, set->crtc->name,
+			      set->fb->base.id,
+			      (int)set->num_connectors, set->x, set->y);
 	} else {
-		DRM_DEBUG_KMS("[CRTC:%d] [NOFB]\n", set->crtc->base.id);
+		DRM_DEBUG_KMS("[CRTC:%d:%s] [NOFB]\n",
+			      set->crtc->base.id, set->crtc->name);
 		drm_crtc_helper_disable(set->crtc);
 		return 0;
 	}
@@ -628,12 +665,12 @@ int drm_crtc_helper_set_config(struct drm_mode_set *set)
 			connector->encoder->crtc = new_crtc;
 		}
 		if (new_crtc) {
-			DRM_DEBUG_KMS("[CONNECTOR:%d:%s] to [CRTC:%d]\n",
-				connector->base.id, connector->name,
-				new_crtc->base.id);
+			DRM_DEBUG_KMS("[CONNECTOR:%d:%s] to [CRTC:%d:%s]\n",
+				      connector->base.id, connector->name,
+				      new_crtc->base.id, new_crtc->name);
 		} else {
 			DRM_DEBUG_KMS("[CONNECTOR:%d:%s] to [NOCRTC]\n",
-				connector->base.id, connector->name);
+				      connector->base.id, connector->name);
 		}
 	}
 
@@ -650,8 +687,8 @@ int drm_crtc_helper_set_config(struct drm_mode_set *set)
 			if (!drm_crtc_helper_set_mode(set->crtc, set->mode,
 						      set->x, set->y,
 						      save_set.fb)) {
-				DRM_ERROR("failed to set mode on [CRTC:%d]\n",
-					  set->crtc->base.id);
+				DRM_ERROR("failed to set mode on [CRTC:%d:%s]\n",
+					  set->crtc->base.id, set->crtc->name);
 				set->crtc->primary->fb = save_set.fb;
 				ret = -EINVAL;
 				goto fail;
@@ -758,10 +795,18 @@ static int drm_helper_choose_crtc_dpms(struct drm_crtc *crtc)
  * @connector: affected connector
  * @mode: DPMS mode
  *
- * This is the main helper function provided by the crtc helper framework for
+ * The drm_helper_connector_dpms() helper function implements the ->dpms()
+ * callback of struct &drm_connector_funcs for drivers using the legacy CRTC helpers.
+ *
+ * This is the main helper function provided by the CRTC helper framework for
  * implementing the DPMS connector attribute. It computes the new desired DPMS
- * state for all encoders and crtcs in the output mesh and calls the ->dpms()
- * callback provided by the driver appropriately.
+ * state for all encoders and CRTCs in the output mesh and calls the ->dpms()
+ * callbacks provided by the driver in struct &drm_crtc_helper_funcs and struct
+ * &drm_encoder_helper_funcs appropriately.
+ *
+ * This function is deprecated.  New drivers must implement atomic modeset
+ * support, for which this function is unsuitable. Instead drivers should use
+ * drm_atomic_helper_connector_dpms().
  *
  * Returns:
  * Always returns 0.
@@ -855,6 +900,12 @@ EXPORT_SYMBOL(drm_helper_mode_fill_fb_struct);
  * due to slight differences in allocating shared resources when the
  * configuration is restored in a different order than when userspace set it up)
  * need to use their own restore logic.
+ *
+ * This function is deprecated. New drivers should implement atomic mode-
+ * setting and use the atomic suspend/resume helpers.
+ *
+ * See also:
+ * drm_atomic_helper_suspend(), drm_atomic_helper_resume()
  */
 void drm_helper_resume_force_mode(struct drm_device *dev)
 {
@@ -913,9 +964,9 @@ EXPORT_SYMBOL(drm_helper_resume_force_mode);
  * @old_fb: previous framebuffer
  *
  * This function implements a callback useable as the ->mode_set callback
- * required by the crtc helpers. Besides the atomic plane helper functions for
+ * required by the CRTC helpers. Besides the atomic plane helper functions for
  * the primary plane the driver must also provide the ->mode_set_nofb callback
- * to set up the crtc.
+ * to set up the CRTC.
  *
  * This is a transitional helper useful for converting drivers to the atomic
  * interfaces.
@@ -979,7 +1030,7 @@ EXPORT_SYMBOL(drm_helper_crtc_mode_set);
  * @old_fb: previous framebuffer
  *
  * This function implements a callback useable as the ->mode_set_base used
- * required by the crtc helpers. The driver must provide the atomic plane helper
+ * required by the CRTC helpers. The driver must provide the atomic plane helper
  * functions for the primary plane.
  *
  * This is a transitional helper useful for converting drivers to the atomic
diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c
index 809959d56d78..3b6627dde9ff 100644
--- a/drivers/gpu/drm/drm_dp_mst_topology.c
+++ b/drivers/gpu/drm/drm_dp_mst_topology.c
@@ -1673,6 +1673,7 @@ int drm_dp_update_payload_part1(struct drm_dp_mst_topology_mgr *mgr)
 		if (mgr->proposed_vcpis[i]) {
 			port = container_of(mgr->proposed_vcpis[i], struct drm_dp_mst_port, vcpi);
 			req_payload.num_slots = mgr->proposed_vcpis[i]->num_slots;
+			req_payload.vcpi = mgr->proposed_vcpis[i]->vcpi;
 		} else {
 			port = NULL;
 			req_payload.num_slots = 0;
@@ -1688,6 +1689,7 @@ int drm_dp_update_payload_part1(struct drm_dp_mst_topology_mgr *mgr)
 			if (req_payload.num_slots) {
 				drm_dp_create_payload_step1(mgr, mgr->proposed_vcpis[i]->vcpi, &req_payload);
 				mgr->payloads[i].num_slots = req_payload.num_slots;
+				mgr->payloads[i].vcpi = req_payload.vcpi;
 			} else if (mgr->payloads[i].num_slots) {
 				mgr->payloads[i].num_slots = 0;
 				drm_dp_destroy_payload_step1(mgr, port, port->vcpi.vcpi, &mgr->payloads[i]);
@@ -1823,7 +1825,7 @@ static int drm_dp_encode_up_ack_reply(struct drm_dp_sideband_msg_tx *msg, u8 req
 {
 	struct drm_dp_sideband_msg_reply_body reply;
 
-	reply.reply_type = 1;
+	reply.reply_type = 0;
 	reply.req_type = req_type;
 	drm_dp_encode_sideband_reply(&reply, msg);
 	return 0;
diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c
index 9362609df38a..bf934cdea21c 100644
--- a/drivers/gpu/drm/drm_drv.c
+++ b/drivers/gpu/drm/drm_drv.c
@@ -160,6 +160,11 @@ int drm_setmaster_ioctl(struct drm_device *dev, void *data,
 		goto out_unlock;
 	}
 
+	if (!file_priv->allowed_master) {
+		ret = drm_new_set_master(dev, file_priv);
+		goto out_unlock;
+	}
+
 	file_priv->minor->master = drm_master_get(file_priv->master);
 	file_priv->is_master = 1;
 	if (dev->driver->master_set) {
@@ -628,8 +633,17 @@ struct drm_device *drm_dev_alloc(struct drm_driver *driver,
 		}
 	}
 
+	if (parent) {
+		ret = drm_dev_set_unique(dev, dev_name(parent));
+		if (ret)
+			goto err_setunique;
+	}
+
 	return dev;
 
+err_setunique:
+	if (drm_core_check_feature(dev, DRIVER_GEM))
+		drm_gem_destroy(dev);
 err_ctxbitmap:
 	drm_legacy_ctxbitmap_cleanup(dev);
 	drm_ht_remove(&dev->map_hash);
@@ -792,23 +806,18 @@ EXPORT_SYMBOL(drm_dev_unregister);
 /**
  * drm_dev_set_unique - Set the unique name of a DRM device
  * @dev: device of which to set the unique name
- * @fmt: format string for unique name
+ * @name: unique name
  *
- * Sets the unique name of a DRM device using the specified format string and
- * a variable list of arguments. Drivers can use this at driver probe time if
- * the unique name of the devices they drive is static.
+ * Sets the unique name of a DRM device using the specified string. Drivers
+ * can use this at driver probe time if the unique name of the devices they
+ * drive is static.
  *
  * Return: 0 on success or a negative error code on failure.
  */
-int drm_dev_set_unique(struct drm_device *dev, const char *fmt, ...)
+int drm_dev_set_unique(struct drm_device *dev, const char *name)
 {
-	va_list ap;
-
 	kfree(dev->unique);
-
-	va_start(ap, fmt);
-	dev->unique = kvasprintf(GFP_KERNEL, fmt, ap);
-	va_end(ap);
+	dev->unique = kstrdup(name, GFP_KERNEL);
 
 	return dev->unique ? 0 : -ENOMEM;
 }
diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c
index d5d2c03fd136..c214f1246cb4 100644
--- a/drivers/gpu/drm/drm_edid.c
+++ b/drivers/gpu/drm/drm_edid.c
@@ -2545,6 +2545,33 @@ cea_mode_alternate_clock(const struct drm_display_mode *cea_mode)
 	return clock;
 }
 
+static u8 drm_match_cea_mode_clock_tolerance(const struct drm_display_mode *to_match,
+					     unsigned int clock_tolerance)
+{
+	u8 mode;
+
+	if (!to_match->clock)
+		return 0;
+
+	for (mode = 0; mode < ARRAY_SIZE(edid_cea_modes); mode++) {
+		const struct drm_display_mode *cea_mode = &edid_cea_modes[mode];
+		unsigned int clock1, clock2;
+
+		/* Check both 60Hz and 59.94Hz */
+		clock1 = cea_mode->clock;
+		clock2 = cea_mode_alternate_clock(cea_mode);
+
+		if (abs(to_match->clock - clock1) > clock_tolerance &&
+		    abs(to_match->clock - clock2) > clock_tolerance)
+			continue;
+
+		if (drm_mode_equal_no_clocks(to_match, cea_mode))
+			return mode + 1;
+	}
+
+	return 0;
+}
+
 /**
  * drm_match_cea_mode - look for a CEA mode matching given mode
  * @to_match: display mode
@@ -2609,6 +2636,33 @@ hdmi_mode_alternate_clock(const struct drm_display_mode *hdmi_mode)
 	return cea_mode_alternate_clock(hdmi_mode);
 }
 
+static u8 drm_match_hdmi_mode_clock_tolerance(const struct drm_display_mode *to_match,
+					      unsigned int clock_tolerance)
+{
+	u8 mode;
+
+	if (!to_match->clock)
+		return 0;
+
+	for (mode = 0; mode < ARRAY_SIZE(edid_4k_modes); mode++) {
+		const struct drm_display_mode *hdmi_mode = &edid_4k_modes[mode];
+		unsigned int clock1, clock2;
+
+		/* Make sure to also match alternate clocks */
+		clock1 = hdmi_mode->clock;
+		clock2 = hdmi_mode_alternate_clock(hdmi_mode);
+
+		if (abs(to_match->clock - clock1) > clock_tolerance &&
+		    abs(to_match->clock - clock2) > clock_tolerance)
+			continue;
+
+		if (drm_mode_equal_no_clocks(to_match, hdmi_mode))
+			return mode + 1;
+	}
+
+	return 0;
+}
+
 /*
  * drm_match_hdmi_mode - look for a HDMI mode matching given mode
  * @to_match: display mode
@@ -3119,14 +3173,18 @@ static void fixup_detailed_cea_mode_clock(struct drm_display_mode *mode)
 	u8 mode_idx;
 	const char *type;
 
-	mode_idx = drm_match_cea_mode(mode) - 1;
+	/*
+	 * allow 5kHz clock difference either way to account for
+	 * the 10kHz clock resolution limit of detailed timings.
+	 */
+	mode_idx = drm_match_cea_mode_clock_tolerance(mode, 5) - 1;
 	if (mode_idx < ARRAY_SIZE(edid_cea_modes)) {
 		type = "CEA";
 		cea_mode = &edid_cea_modes[mode_idx];
 		clock1 = cea_mode->clock;
 		clock2 = cea_mode_alternate_clock(cea_mode);
 	} else {
-		mode_idx = drm_match_hdmi_mode(mode) - 1;
+		mode_idx = drm_match_hdmi_mode_clock_tolerance(mode, 5) - 1;
 		if (mode_idx < ARRAY_SIZE(edid_4k_modes)) {
 			type = "HDMI";
 			cea_mode = &edid_4k_modes[mode_idx];
diff --git a/drivers/gpu/drm/drm_encoder_slave.c b/drivers/gpu/drm/drm_encoder_slave.c
index d18b88b755c3..e8629076de32 100644
--- a/drivers/gpu/drm/drm_encoder_slave.c
+++ b/drivers/gpu/drm/drm_encoder_slave.c
@@ -124,7 +124,7 @@ EXPORT_SYMBOL(drm_i2c_encoder_destroy);
  * Wrapper fxns which can be plugged in to drm_encoder_helper_funcs:
  */
 
-static inline struct drm_encoder_slave_funcs *
+static inline const struct drm_encoder_slave_funcs *
 get_slave_funcs(struct drm_encoder *enc)
 {
 	return to_encoder_slave(enc)->slave_funcs;
diff --git a/drivers/gpu/drm/drm_fb_cma_helper.c b/drivers/gpu/drm/drm_fb_cma_helper.c
index b7d5b848d2f8..5543fa806aec 100644
--- a/drivers/gpu/drm/drm_fb_cma_helper.c
+++ b/drivers/gpu/drm/drm_fb_cma_helper.c
@@ -266,7 +266,7 @@ static int drm_fbdev_cma_create(struct drm_fb_helper *helper,
 	fbi = drm_fb_helper_alloc_fbi(helper);
 	if (IS_ERR(fbi)) {
 		ret = PTR_ERR(fbi);
-		goto err_drm_gem_cma_free_object;
+		goto err_gem_free_object;
 	}
 
 	fbdev_cma->fb = drm_fb_cma_alloc(dev, &mode_cmd, &obj, 1);
@@ -299,8 +299,8 @@ static int drm_fbdev_cma_create(struct drm_fb_helper *helper,
 
 err_fb_info_destroy:
 	drm_fb_helper_release_fbi(helper);
-err_drm_gem_cma_free_object:
-	drm_gem_cma_free_object(&obj->base);
+err_gem_free_object:
+	dev->driver->gem_free_object(&obj->base);
 	return ret;
 }
 
diff --git a/drivers/gpu/drm/drm_fops.c b/drivers/gpu/drm/drm_fops.c
index c59ce4d0ef75..1ea8790e5090 100644
--- a/drivers/gpu/drm/drm_fops.c
+++ b/drivers/gpu/drm/drm_fops.c
@@ -126,6 +126,60 @@ static int drm_cpu_valid(void)
 }
 
 /**
+ * drm_new_set_master - Allocate a new master object and become master for the
+ * associated master realm.
+ *
+ * @dev: The associated device.
+ * @fpriv: File private identifying the client.
+ *
+ * This function must be called with dev::struct_mutex held.
+ * Returns negative error code on failure. Zero on success.
+ */
+int drm_new_set_master(struct drm_device *dev, struct drm_file *fpriv)
+{
+	struct drm_master *old_master;
+	int ret;
+
+	lockdep_assert_held_once(&dev->master_mutex);
+
+	/* create a new master */
+	fpriv->minor->master = drm_master_create(fpriv->minor);
+	if (!fpriv->minor->master)
+		return -ENOMEM;
+
+	/* take another reference for the copy in the local file priv */
+	old_master = fpriv->master;
+	fpriv->master = drm_master_get(fpriv->minor->master);
+
+	if (dev->driver->master_create) {
+		ret = dev->driver->master_create(dev, fpriv->master);
+		if (ret)
+			goto out_err;
+	}
+	if (dev->driver->master_set) {
+		ret = dev->driver->master_set(dev, fpriv, true);
+		if (ret)
+			goto out_err;
+	}
+
+	fpriv->is_master = 1;
+	fpriv->allowed_master = 1;
+	fpriv->authenticated = 1;
+	if (old_master)
+		drm_master_put(&old_master);
+
+	return 0;
+
+out_err:
+	/* drop both references and restore old master on failure */
+	drm_master_put(&fpriv->minor->master);
+	drm_master_put(&fpriv->master);
+	fpriv->master = old_master;
+
+	return ret;
+}
+
+/**
  * Called whenever a process opens /dev/drm.
  *
  * \param filp file pointer.
@@ -172,6 +226,8 @@ static int drm_open_helper(struct file *filp, struct drm_minor *minor)
 	init_waitqueue_head(&priv->event_wait);
 	priv->event_space = 4096; /* set aside 4k for event buffer */
 
+	mutex_init(&priv->event_read_lock);
+
 	if (drm_core_check_feature(dev, DRIVER_GEM))
 		drm_gem_open(dev, priv);
 
@@ -189,35 +245,9 @@ static int drm_open_helper(struct file *filp, struct drm_minor *minor)
 	mutex_lock(&dev->master_mutex);
 	if (drm_is_primary_client(priv) && !priv->minor->master) {
 		/* create a new master */
-		priv->minor->master = drm_master_create(priv->minor);
-		if (!priv->minor->master) {
-			ret = -ENOMEM;
+		ret = drm_new_set_master(dev, priv);
+		if (ret)
 			goto out_close;
-		}
-
-		priv->is_master = 1;
-		/* take another reference for the copy in the local file priv */
-		priv->master = drm_master_get(priv->minor->master);
-		priv->authenticated = 1;
-
-		if (dev->driver->master_create) {
-			ret = dev->driver->master_create(dev, priv->master);
-			if (ret) {
-				/* drop both references if this fails */
-				drm_master_put(&priv->minor->master);
-				drm_master_put(&priv->master);
-				goto out_close;
-			}
-		}
-		if (dev->driver->master_set) {
-			ret = dev->driver->master_set(dev, priv, true);
-			if (ret) {
-				/* drop both references if this fails */
-				drm_master_put(&priv->minor->master);
-				drm_master_put(&priv->master);
-				goto out_close;
-			}
-		}
 	} else if (drm_is_primary_client(priv)) {
 		/* get a reference to the master */
 		priv->master = drm_master_get(priv->minor->master);
@@ -483,14 +513,28 @@ ssize_t drm_read(struct file *filp, char __user *buffer,
 {
 	struct drm_file *file_priv = filp->private_data;
 	struct drm_device *dev = file_priv->minor->dev;
-	ssize_t ret = 0;
+	ssize_t ret;
 
 	if (!access_ok(VERIFY_WRITE, buffer, count))
 		return -EFAULT;
 
-	spin_lock_irq(&dev->event_lock);
+	ret = mutex_lock_interruptible(&file_priv->event_read_lock);
+	if (ret)
+		return ret;
+
 	for (;;) {
-		if (list_empty(&file_priv->event_list)) {
+		struct drm_pending_event *e = NULL;
+
+		spin_lock_irq(&dev->event_lock);
+		if (!list_empty(&file_priv->event_list)) {
+			e = list_first_entry(&file_priv->event_list,
+					struct drm_pending_event, link);
+			file_priv->event_space += e->event->length;
+			list_del(&e->link);
+		}
+		spin_unlock_irq(&dev->event_lock);
+
+		if (e == NULL) {
 			if (ret)
 				break;
 
@@ -499,36 +543,36 @@ ssize_t drm_read(struct file *filp, char __user *buffer,
 				break;
 			}
 
-			spin_unlock_irq(&dev->event_lock);
+			mutex_unlock(&file_priv->event_read_lock);
 			ret = wait_event_interruptible(file_priv->event_wait,
 						       !list_empty(&file_priv->event_list));
-			spin_lock_irq(&dev->event_lock);
-			if (ret < 0)
-				break;
-
-			ret = 0;
+			if (ret >= 0)
+				ret = mutex_lock_interruptible(&file_priv->event_read_lock);
+			if (ret)
+				return ret;
 		} else {
-			struct drm_pending_event *e;
-
-			e = list_first_entry(&file_priv->event_list,
-					     struct drm_pending_event, link);
-			if (e->event->length + ret > count)
+			unsigned length = e->event->length;
+
+			if (length > count - ret) {
+put_back_event:
+				spin_lock_irq(&dev->event_lock);
+				file_priv->event_space -= length;
+				list_add(&e->link, &file_priv->event_list);
+				spin_unlock_irq(&dev->event_lock);
 				break;
+			}
 
-			if (__copy_to_user_inatomic(buffer + ret,
-						    e->event, e->event->length)) {
+			if (copy_to_user(buffer + ret, e->event, length)) {
 				if (ret == 0)
 					ret = -EFAULT;
-				break;
+				goto put_back_event;
 			}
 
-			file_priv->event_space += e->event->length;
-			ret += e->event->length;
-			list_del(&e->link);
+			ret += length;
 			e->destroy(e);
 		}
 	}
-	spin_unlock_irq(&dev->event_lock);
+	mutex_unlock(&file_priv->event_read_lock);
 
 	return ret;
 }
diff --git a/drivers/gpu/drm/drm_gem_cma_helper.c b/drivers/gpu/drm/drm_gem_cma_helper.c
index e109b49cd25d..e5df53b6e229 100644
--- a/drivers/gpu/drm/drm_gem_cma_helper.c
+++ b/drivers/gpu/drm/drm_gem_cma_helper.c
@@ -59,11 +59,13 @@ __drm_gem_cma_create(struct drm_device *drm, size_t size)
 	struct drm_gem_object *gem_obj;
 	int ret;
 
-	cma_obj = kzalloc(sizeof(*cma_obj), GFP_KERNEL);
-	if (!cma_obj)
+	if (drm->driver->gem_create_object)
+		gem_obj = drm->driver->gem_create_object(drm, size);
+	else
+		gem_obj = kzalloc(sizeof(*cma_obj), GFP_KERNEL);
+	if (!gem_obj)
 		return ERR_PTR(-ENOMEM);
-
-	gem_obj = &cma_obj->base;
+	cma_obj = container_of(gem_obj, struct drm_gem_cma_object, base);
 
 	ret = drm_gem_object_init(drm, gem_obj, size);
 	if (ret)
@@ -119,7 +121,7 @@ struct drm_gem_cma_object *drm_gem_cma_create(struct drm_device *drm,
 	return cma_obj;
 
 error:
-	drm_gem_cma_free_object(&cma_obj->base);
+	drm->driver->gem_free_object(&cma_obj->base);
 	return ERR_PTR(ret);
 }
 EXPORT_SYMBOL_GPL(drm_gem_cma_create);
@@ -169,7 +171,7 @@ drm_gem_cma_create_with_handle(struct drm_file *file_priv,
 	return cma_obj;
 
 err_handle_create:
-	drm_gem_cma_free_object(gem_obj);
+	drm->driver->gem_free_object(gem_obj);
 
 	return ERR_PTR(ret);
 }
diff --git a/drivers/gpu/drm/drm_irq.c b/drivers/gpu/drm/drm_irq.c
index 2151ea551d3b..607f493ae801 100644
--- a/drivers/gpu/drm/drm_irq.c
+++ b/drivers/gpu/drm/drm_irq.c
@@ -980,7 +980,8 @@ static void send_vblank_event(struct drm_device *dev,
 		struct drm_pending_vblank_event *e,
 		unsigned long seq, struct timeval *now)
 {
-	WARN_ON_SMP(!spin_is_locked(&dev->event_lock));
+	assert_spin_locked(&dev->event_lock);
+
 	e->event.sequence = seq;
 	e->event.tv_sec = now->tv_sec;
 	e->event.tv_usec = now->tv_usec;
@@ -993,6 +994,57 @@ static void send_vblank_event(struct drm_device *dev,
 }
 
 /**
+ * drm_arm_vblank_event - arm vblank event after pageflip
+ * @dev: DRM device
+ * @pipe: CRTC index
+ * @e: the event to prepare to send
+ *
+ * A lot of drivers need to generate vblank events for the very next vblank
+ * interrupt. For example when the page flip interrupt happens when the page
+ * flip gets armed, but not when it actually executes within the next vblank
+ * period. This helper function implements exactly the required vblank arming
+ * behaviour.
+ *
+ * Caller must hold event lock. Caller must also hold a vblank reference for
+ * the event @e, which will be dropped when the next vblank arrives.
+ *
+ * This is the legacy version of drm_crtc_arm_vblank_event().
+ */
+void drm_arm_vblank_event(struct drm_device *dev, unsigned int pipe,
+			  struct drm_pending_vblank_event *e)
+{
+	assert_spin_locked(&dev->event_lock);
+
+	e->pipe = pipe;
+	e->event.sequence = drm_vblank_count(dev, pipe);
+	list_add_tail(&e->base.link, &dev->vblank_event_list);
+}
+EXPORT_SYMBOL(drm_arm_vblank_event);
+
+/**
+ * drm_crtc_arm_vblank_event - arm vblank event after pageflip
+ * @crtc: the source CRTC of the vblank event
+ * @e: the event to send
+ *
+ * A lot of drivers need to generate vblank events for the very next vblank
+ * interrupt. For example when the page flip interrupt happens when the page
+ * flip gets armed, but not when it actually executes within the next vblank
+ * period. This helper function implements exactly the required vblank arming
+ * behaviour.
+ *
+ * Caller must hold event lock. Caller must also hold a vblank reference for
+ * the event @e, which will be dropped when the next vblank arrives.
+ *
+ * This is the native KMS version of drm_arm_vblank_event().
+ */
+void drm_crtc_arm_vblank_event(struct drm_crtc *crtc,
+			       struct drm_pending_vblank_event *e)
+{
+	drm_arm_vblank_event(crtc->dev, drm_crtc_index(crtc), e);
+}
+EXPORT_SYMBOL(drm_crtc_arm_vblank_event);
+
+/**
  * drm_send_vblank_event - helper to send vblank event after pageflip
  * @dev: DRM device
  * @pipe: CRTC index
diff --git a/drivers/gpu/drm/drm_mipi_dsi.c b/drivers/gpu/drm/drm_mipi_dsi.c
index 2d5ca8eec13a..6e6a9c58d404 100644
--- a/drivers/gpu/drm/drm_mipi_dsi.c
+++ b/drivers/gpu/drm/drm_mipi_dsi.c
@@ -365,6 +365,44 @@ int mipi_dsi_create_packet(struct mipi_dsi_packet *packet,
 }
 EXPORT_SYMBOL(mipi_dsi_create_packet);
 
+/**
+ * mipi_dsi_shutdown_peripheral() - sends a Shutdown Peripheral command
+ * @dsi: DSI peripheral device
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int mipi_dsi_shutdown_peripheral(struct mipi_dsi_device *dsi)
+{
+	struct mipi_dsi_msg msg = {
+		.channel = dsi->channel,
+		.type = MIPI_DSI_SHUTDOWN_PERIPHERAL,
+		.tx_buf = (u8 [2]) { 0, 0 },
+		.tx_len = 2,
+	};
+
+	return mipi_dsi_device_transfer(dsi, &msg);
+}
+EXPORT_SYMBOL(mipi_dsi_shutdown_peripheral);
+
+/**
+ * mipi_dsi_turn_on_peripheral() - sends a Turn On Peripheral command
+ * @dsi: DSI peripheral device
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int mipi_dsi_turn_on_peripheral(struct mipi_dsi_device *dsi)
+{
+	struct mipi_dsi_msg msg = {
+		.channel = dsi->channel,
+		.type = MIPI_DSI_TURN_ON_PERIPHERAL,
+		.tx_buf = (u8 [2]) { 0, 0 },
+		.tx_len = 2,
+	};
+
+	return mipi_dsi_device_transfer(dsi, &msg);
+}
+EXPORT_SYMBOL(mipi_dsi_turn_on_peripheral);
+
 /*
  * mipi_dsi_set_maximum_return_packet_size() - specify the maximum size of the
  *    the payload in a long packet transmitted from the peripheral back to the
diff --git a/drivers/gpu/drm/drm_modes.c b/drivers/gpu/drm/drm_modes.c
index bde9b2911dc2..20775c05235a 100644
--- a/drivers/gpu/drm/drm_modes.c
+++ b/drivers/gpu/drm/drm_modes.c
@@ -553,10 +553,10 @@ EXPORT_SYMBOL(drm_gtf_mode_complex);
  * drivers/video/fbmon.c
  *
  * Standard GTF parameters:
- * M = 600
- * C = 40
- * K = 128
- * J = 20
+ *     M = 600
+ *     C = 40
+ *     K = 128
+ *     J = 20
  *
  * Returns:
  * The modeline based on the GTF algorithm stored in a drm_display_mode object.
@@ -708,7 +708,8 @@ void drm_mode_set_name(struct drm_display_mode *mode)
 }
 EXPORT_SYMBOL(drm_mode_set_name);
 
-/** drm_mode_hsync - get the hsync of a mode
+/**
+ * drm_mode_hsync - get the hsync of a mode
  * @mode: mode
  *
  * Returns:
@@ -917,13 +918,30 @@ bool drm_mode_equal(const struct drm_display_mode *mode1, const struct drm_displ
 	} else if (mode1->clock != mode2->clock)
 		return false;
 
+	return drm_mode_equal_no_clocks(mode1, mode2);
+}
+EXPORT_SYMBOL(drm_mode_equal);
+
+/**
+ * drm_mode_equal_no_clocks - test modes for equality
+ * @mode1: first mode
+ * @mode2: second mode
+ *
+ * Check to see if @mode1 and @mode2 are equivalent, but
+ * don't check the pixel clocks.
+ *
+ * Returns:
+ * True if the modes are equal, false otherwise.
+ */
+bool drm_mode_equal_no_clocks(const struct drm_display_mode *mode1, const struct drm_display_mode *mode2)
+{
 	if ((mode1->flags & DRM_MODE_FLAG_3D_MASK) !=
 	    (mode2->flags & DRM_MODE_FLAG_3D_MASK))
 		return false;
 
 	return drm_mode_equal_no_clocks_no_stereo(mode1, mode2);
 }
-EXPORT_SYMBOL(drm_mode_equal);
+EXPORT_SYMBOL(drm_mode_equal_no_clocks);
 
 /**
  * drm_mode_equal_no_clocks_no_stereo - test modes for equality
@@ -1056,7 +1074,7 @@ static const char * const drm_mode_status_names[] = {
 	MODE_STATUS(ONE_SIZE),
 	MODE_STATUS(NO_REDUCED),
 	MODE_STATUS(NO_STEREO),
-	MODE_STATUS(UNVERIFIED),
+	MODE_STATUS(STALE),
 	MODE_STATUS(BAD),
 	MODE_STATUS(ERROR),
 };
@@ -1154,7 +1172,6 @@ EXPORT_SYMBOL(drm_mode_sort);
 /**
  * drm_mode_connector_list_update - update the mode list for the connector
  * @connector: the connector to update
- * @merge_type_bits: whether to merge or overwrite type bits
  *
  * This moves the modes from the @connector probed_modes list
  * to the actual mode list. It compares the probed mode against the current
@@ -1163,33 +1180,48 @@ EXPORT_SYMBOL(drm_mode_sort);
  * This is just a helper functions doesn't validate any modes itself and also
  * doesn't prune any invalid modes. Callers need to do that themselves.
  */
-void drm_mode_connector_list_update(struct drm_connector *connector,
-				    bool merge_type_bits)
+void drm_mode_connector_list_update(struct drm_connector *connector)
 {
-	struct drm_display_mode *mode;
 	struct drm_display_mode *pmode, *pt;
-	int found_it;
 
 	WARN_ON(!mutex_is_locked(&connector->dev->mode_config.mutex));
 
-	list_for_each_entry_safe(pmode, pt, &connector->probed_modes,
-				 head) {
-		found_it = 0;
+	list_for_each_entry_safe(pmode, pt, &connector->probed_modes, head) {
+		struct drm_display_mode *mode;
+		bool found_it = false;
+
 		/* go through current modes checking for the new probed mode */
 		list_for_each_entry(mode, &connector->modes, head) {
-			if (drm_mode_equal(pmode, mode)) {
-				found_it = 1;
-				/* if equal delete the probed mode */
-				mode->status = pmode->status;
-				/* Merge type bits together */
-				if (merge_type_bits)
-					mode->type |= pmode->type;
-				else
-					mode->type = pmode->type;
-				list_del(&pmode->head);
-				drm_mode_destroy(connector->dev, pmode);
-				break;
+			if (!drm_mode_equal(pmode, mode))
+				continue;
+
+			found_it = true;
+
+			/*
+			 * If the old matching mode is stale (ie. left over
+			 * from a previous probe) just replace it outright.
+			 * Otherwise just merge the type bits between all
+			 * equal probed modes.
+			 *
+			 * If two probed modes are considered equal, pick the
+			 * actual timings from the one that's marked as
+			 * preferred (in case the match isn't 100%). If
+			 * multiple or zero preferred modes are present, favor
+			 * the mode added to the probed_modes list first.
+			 */
+			if (mode->status == MODE_STALE) {
+				drm_mode_copy(mode, pmode);
+			} else if ((mode->type & DRM_MODE_TYPE_PREFERRED) == 0 &&
+				   (pmode->type & DRM_MODE_TYPE_PREFERRED) != 0) {
+				pmode->type |= mode->type;
+				drm_mode_copy(mode, pmode);
+			} else {
+				mode->type |= pmode->type;
 			}
+
+			list_del(&pmode->head);
+			drm_mode_destroy(connector->dev, pmode);
+			break;
 		}
 
 		if (!found_it) {
@@ -1212,7 +1244,7 @@ EXPORT_SYMBOL(drm_mode_connector_list_update);
  * This uses the same parameters as the fb modedb.c, except for an extra
  * force-enable, force-enable-digital and force-disable bit at the end:
  *
- *	<xres>x<yres>[M][R][-<bpp>][@<refresh>][i][m][eDd]
+ * <xres>x<yres>[M][R][-<bpp>][@<refresh>][i][m][eDd]
  *
  * The intermediate drm_cmdline_mode structure is required to store additional
  * options from the command line modline like the force-enable/disable flag.
@@ -1230,7 +1262,7 @@ bool drm_mode_parse_command_line_for_connector(const char *mode_option,
 	unsigned int xres = 0, yres = 0, bpp = 32, refresh = 0;
 	bool yres_specified = false, cvt = false, rb = false;
 	bool interlace = false, margins = false, was_digit = false;
-	int i, err;
+	int i;
 	enum drm_connector_force force = DRM_FORCE_UNSPECIFIED;
 
 #ifdef CONFIG_FB
@@ -1250,9 +1282,7 @@ bool drm_mode_parse_command_line_for_connector(const char *mode_option,
 		case '@':
 			if (!refresh_specified && !bpp_specified &&
 			    !yres_specified && !cvt && !rb && was_digit) {
-				err = kstrtouint(&name[i + 1], 10, &refresh);
-				if (err)
-					return false;
+				refresh = simple_strtol(&name[i+1], NULL, 10);
 				refresh_specified = true;
 				was_digit = false;
 			} else
@@ -1261,9 +1291,7 @@ bool drm_mode_parse_command_line_for_connector(const char *mode_option,
 		case '-':
 			if (!bpp_specified && !yres_specified && !cvt &&
 			    !rb && was_digit) {
-				err = kstrtouint(&name[i + 1], 10, &bpp);
-				if (err)
-					return false;
+				bpp = simple_strtol(&name[i+1], NULL, 10);
 				bpp_specified = true;
 				was_digit = false;
 			} else
@@ -1271,9 +1299,7 @@ bool drm_mode_parse_command_line_for_connector(const char *mode_option,
 			break;
 		case 'x':
 			if (!yres_specified && was_digit) {
-				err = kstrtouint(&name[i + 1], 10, &yres);
-				if (err)
-					return false;
+				yres = simple_strtol(&name[i+1], NULL, 10);
 				yres_specified = true;
 				was_digit = false;
 			} else
diff --git a/drivers/gpu/drm/drm_modeset_lock.c b/drivers/gpu/drm/drm_modeset_lock.c
index 6675b1428410..e3a4adf03e7b 100644
--- a/drivers/gpu/drm/drm_modeset_lock.c
+++ b/drivers/gpu/drm/drm_modeset_lock.c
@@ -40,28 +40,33 @@
  * The basic usage pattern is to:
  *
  *     drm_modeset_acquire_init(&ctx)
- *   retry:
+ *     retry:
  *     foreach (lock in random_ordered_set_of_locks) {
- *       ret = drm_modeset_lock(lock, &ctx)
- *       if (ret == -EDEADLK) {
- *          drm_modeset_backoff(&ctx);
- *          goto retry;
- *       }
+ *         ret = drm_modeset_lock(lock, &ctx)
+ *         if (ret == -EDEADLK) {
+ *             drm_modeset_backoff(&ctx);
+ *             goto retry;
+ *         }
  *     }
- *
  *     ... do stuff ...
- *
  *     drm_modeset_drop_locks(&ctx);
  *     drm_modeset_acquire_fini(&ctx);
  */
 
 /**
  * drm_modeset_lock_all - take all modeset locks
- * @dev: drm device
+ * @dev: DRM device
  *
  * This function takes all modeset locks, suitable where a more fine-grained
- * scheme isn't (yet) implemented. Locks must be dropped with
- * drm_modeset_unlock_all.
+ * scheme isn't (yet) implemented. Locks must be dropped by calling the
+ * drm_modeset_unlock_all() function.
+ *
+ * This function is deprecated. It allocates a lock acquisition context and
+ * stores it in the DRM device's ->mode_config. This facilitate conversion of
+ * existing code because it removes the need to manually deal with the
+ * acquisition context, but it is also brittle because the context is global
+ * and care must be taken not to nest calls. New code should use the
+ * drm_modeset_lock_all_ctx() function and pass in the context explicitly.
  */
 void drm_modeset_lock_all(struct drm_device *dev)
 {
@@ -78,39 +83,43 @@ void drm_modeset_lock_all(struct drm_device *dev)
 	drm_modeset_acquire_init(ctx, 0);
 
 retry:
-	ret = drm_modeset_lock(&config->connection_mutex, ctx);
-	if (ret)
-		goto fail;
-	ret = drm_modeset_lock_all_crtcs(dev, ctx);
-	if (ret)
-		goto fail;
+	ret = drm_modeset_lock_all_ctx(dev, ctx);
+	if (ret < 0) {
+		if (ret == -EDEADLK) {
+			drm_modeset_backoff(ctx);
+			goto retry;
+		}
+
+		drm_modeset_acquire_fini(ctx);
+		kfree(ctx);
+		return;
+	}
 
 	WARN_ON(config->acquire_ctx);
 
-	/* now we hold the locks, so now that it is safe, stash the
-	 * ctx for drm_modeset_unlock_all():
+	/*
+	 * We hold the locks now, so it is safe to stash the acquisition
+	 * context for drm_modeset_unlock_all().
 	 */
 	config->acquire_ctx = ctx;
 
 	drm_warn_on_modeset_not_all_locked(dev);
-
-	return;
-
-fail:
-	if (ret == -EDEADLK) {
-		drm_modeset_backoff(ctx);
-		goto retry;
-	}
-
-	kfree(ctx);
 }
 EXPORT_SYMBOL(drm_modeset_lock_all);
 
 /**
  * drm_modeset_unlock_all - drop all modeset locks
- * @dev: device
+ * @dev: DRM device
  *
- * This function drop all modeset locks taken by drm_modeset_lock_all.
+ * This function drops all modeset locks taken by a previous call to the
+ * drm_modeset_lock_all() function.
+ *
+ * This function is deprecated. It uses the lock acquisition context stored
+ * in the DRM device's ->mode_config. This facilitates conversion of existing
+ * code because it removes the need to manually deal with the acquisition
+ * context, but it is also brittle because the context is global and care must
+ * be taken not to nest calls. New code should pass the acquisition context
+ * directly to the drm_modeset_drop_locks() function.
  */
 void drm_modeset_unlock_all(struct drm_device *dev)
 {
@@ -431,14 +440,34 @@ void drm_modeset_unlock(struct drm_modeset_lock *lock)
 }
 EXPORT_SYMBOL(drm_modeset_unlock);
 
-/* In some legacy codepaths it's convenient to just grab all the crtc and plane
- * related locks. */
-int drm_modeset_lock_all_crtcs(struct drm_device *dev,
-		struct drm_modeset_acquire_ctx *ctx)
+/**
+ * drm_modeset_lock_all_ctx - take all modeset locks
+ * @dev: DRM device
+ * @ctx: lock acquisition context
+ *
+ * This function takes all modeset locks, suitable where a more fine-grained
+ * scheme isn't (yet) implemented.
+ *
+ * Unlike drm_modeset_lock_all(), it doesn't take the dev->mode_config.mutex
+ * since that lock isn't required for modeset state changes. Callers which
+ * need to grab that lock too need to do so outside of the acquire context
+ * @ctx.
+ *
+ * Locks acquired with this function should be released by calling the
+ * drm_modeset_drop_locks() function on @ctx.
+ *
+ * Returns: 0 on success or a negative error-code on failure.
+ */
+int drm_modeset_lock_all_ctx(struct drm_device *dev,
+			     struct drm_modeset_acquire_ctx *ctx)
 {
 	struct drm_crtc *crtc;
 	struct drm_plane *plane;
-	int ret = 0;
+	int ret;
+
+	ret = drm_modeset_lock(&dev->mode_config.connection_mutex, ctx);
+	if (ret)
+		return ret;
 
 	drm_for_each_crtc(crtc, dev) {
 		ret = drm_modeset_lock(&crtc->mutex, ctx);
@@ -454,4 +483,4 @@ int drm_modeset_lock_all_crtcs(struct drm_device *dev,
 
 	return 0;
 }
-EXPORT_SYMBOL(drm_modeset_lock_all_crtcs);
+EXPORT_SYMBOL(drm_modeset_lock_all_ctx);
diff --git a/drivers/gpu/drm/drm_plane_helper.c b/drivers/gpu/drm/drm_plane_helper.c
index a6983d41920d..369d2898ff9e 100644
--- a/drivers/gpu/drm/drm_plane_helper.c
+++ b/drivers/gpu/drm/drm_plane_helper.c
@@ -57,6 +57,10 @@
  * by the atomic helpers.
  *
  * Again drivers are strongly urged to switch to the new interfaces.
+ *
+ * The plane helpers share the function table structures with other helpers,
+ * specifically also the atomic helpers. See struct &drm_plane_helper_funcs for
+ * the details.
  */
 
 /*
@@ -371,7 +375,7 @@ static struct drm_plane *create_primary_plane(struct drm_device *dev)
 				       &drm_primary_helper_funcs,
 				       safe_modeset_formats,
 				       ARRAY_SIZE(safe_modeset_formats),
-				       DRM_PLANE_TYPE_PRIMARY);
+				       DRM_PLANE_TYPE_PRIMARY, NULL);
 	if (ret) {
 		kfree(primary);
 		primary = NULL;
@@ -398,7 +402,8 @@ int drm_crtc_init(struct drm_device *dev, struct drm_crtc *crtc,
 	struct drm_plane *primary;
 
 	primary = create_primary_plane(dev);
-	return drm_crtc_init_with_planes(dev, crtc, primary, NULL, funcs);
+	return drm_crtc_init_with_planes(dev, crtc, primary, NULL, funcs,
+					 NULL);
 }
 EXPORT_SYMBOL(drm_crtc_init);
 
diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c
index 9f935f55d74c..27aa7183b20b 100644
--- a/drivers/gpu/drm/drm_prime.c
+++ b/drivers/gpu/drm/drm_prime.c
@@ -313,19 +313,15 @@ static const struct dma_buf_ops drm_gem_prime_dmabuf_ops =  {
  *
  * Export callbacks:
  *
- *  - @gem_prime_pin (optional): prepare a GEM object for exporting
- *
- *  - @gem_prime_get_sg_table: provide a scatter/gather table of pinned pages
- *
- *  - @gem_prime_vmap: vmap a buffer exported by your driver
- *
- *  - @gem_prime_vunmap: vunmap a buffer exported by your driver
- *
- *  - @gem_prime_mmap (optional): mmap a buffer exported by your driver
+ *  * @gem_prime_pin (optional): prepare a GEM object for exporting
+ *  * @gem_prime_get_sg_table: provide a scatter/gather table of pinned pages
+ *  * @gem_prime_vmap: vmap a buffer exported by your driver
+ *  * @gem_prime_vunmap: vunmap a buffer exported by your driver
+ *  * @gem_prime_mmap (optional): mmap a buffer exported by your driver
  *
  * Import callback:
  *
- *  - @gem_prime_import_sg_table (import): produce a GEM object from another
+ *  * @gem_prime_import_sg_table (import): produce a GEM object from another
  *    driver's scatter/gather table
  */
 
diff --git a/drivers/gpu/drm/drm_probe_helper.c b/drivers/gpu/drm/drm_probe_helper.c
index 94ba39e34299..e714b5a7955f 100644
--- a/drivers/gpu/drm/drm_probe_helper.c
+++ b/drivers/gpu/drm/drm_probe_helper.c
@@ -53,6 +53,9 @@
  * This helper library can be used independently of the modeset helper library.
  * Drivers can also overwrite different parts e.g. use their own hotplug
  * handling code to avoid probing unrelated outputs.
+ *
+ * The probe helpers share the function table structures with other display
+ * helper libraries. See struct &drm_connector_helper_funcs for the details.
  */
 
 static bool drm_kms_helper_poll = true;
@@ -126,9 +129,64 @@ void drm_kms_helper_poll_enable_locked(struct drm_device *dev)
 }
 EXPORT_SYMBOL(drm_kms_helper_poll_enable_locked);
 
-
-static int drm_helper_probe_single_connector_modes_merge_bits(struct drm_connector *connector,
-							      uint32_t maxX, uint32_t maxY, bool merge_type_bits)
+/**
+ * drm_helper_probe_single_connector_modes - get complete set of display modes
+ * @connector: connector to probe
+ * @maxX: max width for modes
+ * @maxY: max height for modes
+ *
+ * Based on the helper callbacks implemented by @connector in struct
+ * &drm_connector_helper_funcs try to detect all valid modes.  Modes will first
+ * be added to the connector's probed_modes list, then culled (based on validity
+ * and the @maxX, @maxY parameters) and put into the normal modes list.
+ *
+ * Intended to be used as a generic implementation of the ->fill_modes()
+ * @connector vfunc for drivers that use the CRTC helpers for output mode
+ * filtering and detection.
+ *
+ * The basic procedure is as follows
+ *
+ * 1. All modes currently on the connector's modes list are marked as stale
+ *
+ * 2. New modes are added to the connector's probed_modes list with
+ *    drm_mode_probed_add(). New modes start their life with status as OK.
+ *    Modes are added from a single source using the following priority order.
+ *
+ *    - debugfs 'override_edid' (used for testing only)
+ *    - firmware EDID (drm_load_edid_firmware())
+ *    - connector helper ->get_modes() vfunc
+ *    - if the connector status is connector_status_connected, standard
+ *      VESA DMT modes up to 1024x768 are automatically added
+ *      (drm_add_modes_noedid())
+ *
+ *    Finally modes specified via the kernel command line (video=...) are
+ *    added in addition to what the earlier probes produced
+ *    (drm_helper_probe_add_cmdline_mode()). These modes are generated
+ *    using the VESA GTF/CVT formulas.
+ *
+ * 3. Modes are moved from the probed_modes list to the modes list. Potential
+ *    duplicates are merged together (see drm_mode_connector_list_update()).
+ *    After this step the probed_modes list will be empty again.
+ *
+ * 4. Any non-stale mode on the modes list then undergoes validation
+ *
+ *    - drm_mode_validate_basic() performs basic sanity checks
+ *    - drm_mode_validate_size() filters out modes larger than @maxX and @maxY
+ *      (if specified)
+ *    - drm_mode_validate_flag() checks the modes againt basic connector
+ *      capabilites (interlace_allowed,doublescan_allowed,stereo_allowed)
+ *    - the optional connector ->mode_valid() helper can perform driver and/or
+ *      hardware specific checks
+ *
+ * 5. Any mode whose status is not OK is pruned from the connector's modes list,
+ *    accompanied by a debug message indicating the reason for the mode's
+ *    rejection (see drm_mode_prune_invalid()).
+ *
+ * Returns:
+ * The number of modes found on @connector.
+ */
+int drm_helper_probe_single_connector_modes(struct drm_connector *connector,
+					    uint32_t maxX, uint32_t maxY)
 {
 	struct drm_device *dev = connector->dev;
 	struct drm_display_mode *mode;
@@ -143,9 +201,9 @@ static int drm_helper_probe_single_connector_modes_merge_bits(struct drm_connect
 
 	DRM_DEBUG_KMS("[CONNECTOR:%d:%s]\n", connector->base.id,
 			connector->name);
-	/* set all modes to the unverified state */
+	/* set all old modes to the stale state */
 	list_for_each_entry(mode, &connector->modes, head)
-		mode->status = MODE_UNVERIFIED;
+		mode->status = MODE_STALE;
 
 	old_status = connector->status;
 
@@ -168,10 +226,11 @@ static int drm_helper_probe_single_connector_modes_merge_bits(struct drm_connect
 	 * check here, and if anything changed start the hotplug code.
 	 */
 	if (old_status != connector->status) {
-		DRM_DEBUG_KMS("[CONNECTOR:%d:%s] status updated from %d to %d\n",
+		DRM_DEBUG_KMS("[CONNECTOR:%d:%s] status updated from %s to %s\n",
 			      connector->base.id,
 			      connector->name,
-			      old_status, connector->status);
+			      drm_get_connector_status_name(old_status),
+			      drm_get_connector_status_name(connector->status));
 
 		/*
 		 * The hotplug event code might call into the fb
@@ -199,17 +258,16 @@ static int drm_helper_probe_single_connector_modes_merge_bits(struct drm_connect
 		goto prune;
 	}
 
+	if (connector->override_edid) {
+		struct edid *edid = (struct edid *) connector->edid_blob_ptr->data;
+
+		count = drm_add_edid_modes(connector, edid);
+		drm_edid_to_eld(connector, edid);
+	} else {
 #ifdef CONFIG_DRM_LOAD_EDID_FIRMWARE
-	count = drm_load_edid_firmware(connector);
-	if (count == 0)
+		count = drm_load_edid_firmware(connector);
+		if (count == 0)
 #endif
-	{
-		if (connector->override_edid) {
-			struct edid *edid = (struct edid *) connector->edid_blob_ptr->data;
-
-			count = drm_add_edid_modes(connector, edid);
-			drm_edid_to_eld(connector, edid);
-		} else
 			count = (*connector_funcs->get_modes)(connector);
 	}
 
@@ -219,7 +277,7 @@ static int drm_helper_probe_single_connector_modes_merge_bits(struct drm_connect
 	if (count == 0)
 		goto prune;
 
-	drm_mode_connector_list_update(connector, merge_type_bits);
+	drm_mode_connector_list_update(connector);
 
 	if (connector->interlace_allowed)
 		mode_flags |= DRM_MODE_FLAG_INTERLACE;
@@ -229,7 +287,8 @@ static int drm_helper_probe_single_connector_modes_merge_bits(struct drm_connect
 		mode_flags |= DRM_MODE_FLAG_3D_MASK;
 
 	list_for_each_entry(mode, &connector->modes, head) {
-		mode->status = drm_mode_validate_basic(mode);
+		if (mode->status == MODE_OK)
+			mode->status = drm_mode_validate_basic(mode);
 
 		if (mode->status == MODE_OK)
 			mode->status = drm_mode_validate_size(mode, maxX, maxY);
@@ -262,49 +321,9 @@ prune:
 
 	return count;
 }
-
-/**
- * drm_helper_probe_single_connector_modes - get complete set of display modes
- * @connector: connector to probe
- * @maxX: max width for modes
- * @maxY: max height for modes
- *
- * Based on the helper callbacks implemented by @connector try to detect all
- * valid modes.  Modes will first be added to the connector's probed_modes list,
- * then culled (based on validity and the @maxX, @maxY parameters) and put into
- * the normal modes list.
- *
- * Intended to be use as a generic implementation of the ->fill_modes()
- * @connector vfunc for drivers that use the crtc helpers for output mode
- * filtering and detection.
- *
- * Returns:
- * The number of modes found on @connector.
- */
-int drm_helper_probe_single_connector_modes(struct drm_connector *connector,
-					    uint32_t maxX, uint32_t maxY)
-{
-	return drm_helper_probe_single_connector_modes_merge_bits(connector, maxX, maxY, true);
-}
 EXPORT_SYMBOL(drm_helper_probe_single_connector_modes);
 
 /**
- * drm_helper_probe_single_connector_modes_nomerge - get complete set of display modes
- * @connector: connector to probe
- * @maxX: max width for modes
- * @maxY: max height for modes
- *
- * This operates like drm_hehlper_probe_single_connector_modes except it
- * replaces the mode bits instead of merging them for preferred modes.
- */
-int drm_helper_probe_single_connector_modes_nomerge(struct drm_connector *connector,
-					    uint32_t maxX, uint32_t maxY)
-{
-	return drm_helper_probe_single_connector_modes_merge_bits(connector, maxX, maxY, false);
-}
-EXPORT_SYMBOL(drm_helper_probe_single_connector_modes_nomerge);
-
-/**
  * drm_kms_helper_hotplug_event - fire off KMS hotplug events
  * @dev: drm_device whose connector state changed
  *
diff --git a/drivers/gpu/drm/etnaviv/Kconfig b/drivers/gpu/drm/etnaviv/Kconfig
new file mode 100644
index 000000000000..2cde7a5442fb
--- /dev/null
+++ b/drivers/gpu/drm/etnaviv/Kconfig
@@ -0,0 +1,20 @@
+
+config DRM_ETNAVIV
+	tristate "ETNAVIV (DRM support for Vivante GPU IP cores)"
+	depends on DRM
+	depends on ARCH_MXC || ARCH_DOVE
+	select SHMEM
+	select TMPFS
+	select IOMMU_API
+	select IOMMU_SUPPORT
+	select WANT_DEV_COREDUMP
+	help
+	  DRM driver for Vivante GPUs.
+
+config DRM_ETNAVIV_REGISTER_LOGGING
+	bool "enable ETNAVIV register logging"
+	depends on DRM_ETNAVIV
+	help
+	  Compile in support for logging register reads/writes in a format
+	  that can be parsed by envytools demsm tool.  If enabled, register
+	  logging can be switched on via etnaviv.reglog=y module param.
diff --git a/drivers/gpu/drm/etnaviv/Makefile b/drivers/gpu/drm/etnaviv/Makefile
new file mode 100644
index 000000000000..1086e9876f91
--- /dev/null
+++ b/drivers/gpu/drm/etnaviv/Makefile
@@ -0,0 +1,14 @@
+etnaviv-y := \
+	etnaviv_buffer.o \
+	etnaviv_cmd_parser.o \
+	etnaviv_drv.o \
+	etnaviv_dump.o \
+	etnaviv_gem_prime.o \
+	etnaviv_gem_submit.o \
+	etnaviv_gem.o \
+	etnaviv_gpu.o \
+	etnaviv_iommu_v2.o \
+	etnaviv_iommu.o \
+	etnaviv_mmu.o
+
+obj-$(CONFIG_DRM_ETNAVIV)	+= etnaviv.o
diff --git a/drivers/gpu/drm/etnaviv/cmdstream.xml.h b/drivers/gpu/drm/etnaviv/cmdstream.xml.h
new file mode 100644
index 000000000000..8c44ba9a694e
--- /dev/null
+++ b/drivers/gpu/drm/etnaviv/cmdstream.xml.h
@@ -0,0 +1,218 @@
+#ifndef CMDSTREAM_XML
+#define CMDSTREAM_XML
+
+/* Autogenerated file, DO NOT EDIT manually!
+
+This file was generated by the rules-ng-ng headergen tool in this git repository:
+http://0x04.net/cgit/index.cgi/rules-ng-ng
+git clone git://0x04.net/rules-ng-ng
+
+The rules-ng-ng source files this header was generated from are:
+- cmdstream.xml (  12589 bytes, from 2014-02-17 14:57:56)
+- common.xml    (  18437 bytes, from 2015-03-25 11:27:41)
+
+Copyright (C) 2014
+*/
+
+
+#define FE_OPCODE_LOAD_STATE					0x00000001
+#define FE_OPCODE_END						0x00000002
+#define FE_OPCODE_NOP						0x00000003
+#define FE_OPCODE_DRAW_2D					0x00000004
+#define FE_OPCODE_DRAW_PRIMITIVES				0x00000005
+#define FE_OPCODE_DRAW_INDEXED_PRIMITIVES			0x00000006
+#define FE_OPCODE_WAIT						0x00000007
+#define FE_OPCODE_LINK						0x00000008
+#define FE_OPCODE_STALL						0x00000009
+#define FE_OPCODE_CALL						0x0000000a
+#define FE_OPCODE_RETURN					0x0000000b
+#define FE_OPCODE_CHIP_SELECT					0x0000000d
+#define PRIMITIVE_TYPE_POINTS					0x00000001
+#define PRIMITIVE_TYPE_LINES					0x00000002
+#define PRIMITIVE_TYPE_LINE_STRIP				0x00000003
+#define PRIMITIVE_TYPE_TRIANGLES				0x00000004
+#define PRIMITIVE_TYPE_TRIANGLE_STRIP				0x00000005
+#define PRIMITIVE_TYPE_TRIANGLE_FAN				0x00000006
+#define PRIMITIVE_TYPE_LINE_LOOP				0x00000007
+#define PRIMITIVE_TYPE_QUADS					0x00000008
+#define VIV_FE_LOAD_STATE					0x00000000
+
+#define VIV_FE_LOAD_STATE_HEADER				0x00000000
+#define VIV_FE_LOAD_STATE_HEADER_OP__MASK			0xf8000000
+#define VIV_FE_LOAD_STATE_HEADER_OP__SHIFT			27
+#define VIV_FE_LOAD_STATE_HEADER_OP_LOAD_STATE			0x08000000
+#define VIV_FE_LOAD_STATE_HEADER_FIXP				0x04000000
+#define VIV_FE_LOAD_STATE_HEADER_COUNT__MASK			0x03ff0000
+#define VIV_FE_LOAD_STATE_HEADER_COUNT__SHIFT			16
+#define VIV_FE_LOAD_STATE_HEADER_COUNT(x)			(((x) << VIV_FE_LOAD_STATE_HEADER_COUNT__SHIFT) & VIV_FE_LOAD_STATE_HEADER_COUNT__MASK)
+#define VIV_FE_LOAD_STATE_HEADER_OFFSET__MASK			0x0000ffff
+#define VIV_FE_LOAD_STATE_HEADER_OFFSET__SHIFT			0
+#define VIV_FE_LOAD_STATE_HEADER_OFFSET(x)			(((x) << VIV_FE_LOAD_STATE_HEADER_OFFSET__SHIFT) & VIV_FE_LOAD_STATE_HEADER_OFFSET__MASK)
+#define VIV_FE_LOAD_STATE_HEADER_OFFSET__SHR			2
+
+#define VIV_FE_END						0x00000000
+
+#define VIV_FE_END_HEADER					0x00000000
+#define VIV_FE_END_HEADER_EVENT_ID__MASK			0x0000001f
+#define VIV_FE_END_HEADER_EVENT_ID__SHIFT			0
+#define VIV_FE_END_HEADER_EVENT_ID(x)				(((x) << VIV_FE_END_HEADER_EVENT_ID__SHIFT) & VIV_FE_END_HEADER_EVENT_ID__MASK)
+#define VIV_FE_END_HEADER_EVENT_ENABLE				0x00000100
+#define VIV_FE_END_HEADER_OP__MASK				0xf8000000
+#define VIV_FE_END_HEADER_OP__SHIFT				27
+#define VIV_FE_END_HEADER_OP_END				0x10000000
+
+#define VIV_FE_NOP						0x00000000
+
+#define VIV_FE_NOP_HEADER					0x00000000
+#define VIV_FE_NOP_HEADER_OP__MASK				0xf8000000
+#define VIV_FE_NOP_HEADER_OP__SHIFT				27
+#define VIV_FE_NOP_HEADER_OP_NOP				0x18000000
+
+#define VIV_FE_DRAW_2D						0x00000000
+
+#define VIV_FE_DRAW_2D_HEADER					0x00000000
+#define VIV_FE_DRAW_2D_HEADER_COUNT__MASK			0x0000ff00
+#define VIV_FE_DRAW_2D_HEADER_COUNT__SHIFT			8
+#define VIV_FE_DRAW_2D_HEADER_COUNT(x)				(((x) << VIV_FE_DRAW_2D_HEADER_COUNT__SHIFT) & VIV_FE_DRAW_2D_HEADER_COUNT__MASK)
+#define VIV_FE_DRAW_2D_HEADER_DATA_COUNT__MASK			0x07ff0000
+#define VIV_FE_DRAW_2D_HEADER_DATA_COUNT__SHIFT			16
+#define VIV_FE_DRAW_2D_HEADER_DATA_COUNT(x)			(((x) << VIV_FE_DRAW_2D_HEADER_DATA_COUNT__SHIFT) & VIV_FE_DRAW_2D_HEADER_DATA_COUNT__MASK)
+#define VIV_FE_DRAW_2D_HEADER_OP__MASK				0xf8000000
+#define VIV_FE_DRAW_2D_HEADER_OP__SHIFT				27
+#define VIV_FE_DRAW_2D_HEADER_OP_DRAW_2D			0x20000000
+
+#define VIV_FE_DRAW_2D_TOP_LEFT					0x00000008
+#define VIV_FE_DRAW_2D_TOP_LEFT_X__MASK				0x0000ffff
+#define VIV_FE_DRAW_2D_TOP_LEFT_X__SHIFT			0
+#define VIV_FE_DRAW_2D_TOP_LEFT_X(x)				(((x) << VIV_FE_DRAW_2D_TOP_LEFT_X__SHIFT) & VIV_FE_DRAW_2D_TOP_LEFT_X__MASK)
+#define VIV_FE_DRAW_2D_TOP_LEFT_Y__MASK				0xffff0000
+#define VIV_FE_DRAW_2D_TOP_LEFT_Y__SHIFT			16
+#define VIV_FE_DRAW_2D_TOP_LEFT_Y(x)				(((x) << VIV_FE_DRAW_2D_TOP_LEFT_Y__SHIFT) & VIV_FE_DRAW_2D_TOP_LEFT_Y__MASK)
+
+#define VIV_FE_DRAW_2D_BOTTOM_RIGHT				0x0000000c
+#define VIV_FE_DRAW_2D_BOTTOM_RIGHT_X__MASK			0x0000ffff
+#define VIV_FE_DRAW_2D_BOTTOM_RIGHT_X__SHIFT			0
+#define VIV_FE_DRAW_2D_BOTTOM_RIGHT_X(x)			(((x) << VIV_FE_DRAW_2D_BOTTOM_RIGHT_X__SHIFT) & VIV_FE_DRAW_2D_BOTTOM_RIGHT_X__MASK)
+#define VIV_FE_DRAW_2D_BOTTOM_RIGHT_Y__MASK			0xffff0000
+#define VIV_FE_DRAW_2D_BOTTOM_RIGHT_Y__SHIFT			16
+#define VIV_FE_DRAW_2D_BOTTOM_RIGHT_Y(x)			(((x) << VIV_FE_DRAW_2D_BOTTOM_RIGHT_Y__SHIFT) & VIV_FE_DRAW_2D_BOTTOM_RIGHT_Y__MASK)
+
+#define VIV_FE_DRAW_PRIMITIVES					0x00000000
+
+#define VIV_FE_DRAW_PRIMITIVES_HEADER				0x00000000
+#define VIV_FE_DRAW_PRIMITIVES_HEADER_OP__MASK			0xf8000000
+#define VIV_FE_DRAW_PRIMITIVES_HEADER_OP__SHIFT			27
+#define VIV_FE_DRAW_PRIMITIVES_HEADER_OP_DRAW_PRIMITIVES	0x28000000
+
+#define VIV_FE_DRAW_PRIMITIVES_COMMAND				0x00000004
+#define VIV_FE_DRAW_PRIMITIVES_COMMAND_TYPE__MASK		0x000000ff
+#define VIV_FE_DRAW_PRIMITIVES_COMMAND_TYPE__SHIFT		0
+#define VIV_FE_DRAW_PRIMITIVES_COMMAND_TYPE(x)			(((x) << VIV_FE_DRAW_PRIMITIVES_COMMAND_TYPE__SHIFT) & VIV_FE_DRAW_PRIMITIVES_COMMAND_TYPE__MASK)
+
+#define VIV_FE_DRAW_PRIMITIVES_START				0x00000008
+
+#define VIV_FE_DRAW_PRIMITIVES_COUNT				0x0000000c
+
+#define VIV_FE_DRAW_INDEXED_PRIMITIVES				0x00000000
+
+#define VIV_FE_DRAW_INDEXED_PRIMITIVES_HEADER			0x00000000
+#define VIV_FE_DRAW_INDEXED_PRIMITIVES_HEADER_OP__MASK		0xf8000000
+#define VIV_FE_DRAW_INDEXED_PRIMITIVES_HEADER_OP__SHIFT		27
+#define VIV_FE_DRAW_INDEXED_PRIMITIVES_HEADER_OP_DRAW_INDEXED_PRIMITIVES	0x30000000
+
+#define VIV_FE_DRAW_INDEXED_PRIMITIVES_COMMAND			0x00000004
+#define VIV_FE_DRAW_INDEXED_PRIMITIVES_COMMAND_TYPE__MASK	0x000000ff
+#define VIV_FE_DRAW_INDEXED_PRIMITIVES_COMMAND_TYPE__SHIFT	0
+#define VIV_FE_DRAW_INDEXED_PRIMITIVES_COMMAND_TYPE(x)		(((x) << VIV_FE_DRAW_INDEXED_PRIMITIVES_COMMAND_TYPE__SHIFT) & VIV_FE_DRAW_INDEXED_PRIMITIVES_COMMAND_TYPE__MASK)
+
+#define VIV_FE_DRAW_INDEXED_PRIMITIVES_START			0x00000008
+
+#define VIV_FE_DRAW_INDEXED_PRIMITIVES_COUNT			0x0000000c
+
+#define VIV_FE_DRAW_INDEXED_PRIMITIVES_OFFSET			0x00000010
+
+#define VIV_FE_WAIT						0x00000000
+
+#define VIV_FE_WAIT_HEADER					0x00000000
+#define VIV_FE_WAIT_HEADER_DELAY__MASK				0x0000ffff
+#define VIV_FE_WAIT_HEADER_DELAY__SHIFT				0
+#define VIV_FE_WAIT_HEADER_DELAY(x)				(((x) << VIV_FE_WAIT_HEADER_DELAY__SHIFT) & VIV_FE_WAIT_HEADER_DELAY__MASK)
+#define VIV_FE_WAIT_HEADER_OP__MASK				0xf8000000
+#define VIV_FE_WAIT_HEADER_OP__SHIFT				27
+#define VIV_FE_WAIT_HEADER_OP_WAIT				0x38000000
+
+#define VIV_FE_LINK						0x00000000
+
+#define VIV_FE_LINK_HEADER					0x00000000
+#define VIV_FE_LINK_HEADER_PREFETCH__MASK			0x0000ffff
+#define VIV_FE_LINK_HEADER_PREFETCH__SHIFT			0
+#define VIV_FE_LINK_HEADER_PREFETCH(x)				(((x) << VIV_FE_LINK_HEADER_PREFETCH__SHIFT) & VIV_FE_LINK_HEADER_PREFETCH__MASK)
+#define VIV_FE_LINK_HEADER_OP__MASK				0xf8000000
+#define VIV_FE_LINK_HEADER_OP__SHIFT				27
+#define VIV_FE_LINK_HEADER_OP_LINK				0x40000000
+
+#define VIV_FE_LINK_ADDRESS					0x00000004
+
+#define VIV_FE_STALL						0x00000000
+
+#define VIV_FE_STALL_HEADER					0x00000000
+#define VIV_FE_STALL_HEADER_OP__MASK				0xf8000000
+#define VIV_FE_STALL_HEADER_OP__SHIFT				27
+#define VIV_FE_STALL_HEADER_OP_STALL				0x48000000
+
+#define VIV_FE_STALL_TOKEN					0x00000004
+#define VIV_FE_STALL_TOKEN_FROM__MASK				0x0000001f
+#define VIV_FE_STALL_TOKEN_FROM__SHIFT				0
+#define VIV_FE_STALL_TOKEN_FROM(x)				(((x) << VIV_FE_STALL_TOKEN_FROM__SHIFT) & VIV_FE_STALL_TOKEN_FROM__MASK)
+#define VIV_FE_STALL_TOKEN_TO__MASK				0x00001f00
+#define VIV_FE_STALL_TOKEN_TO__SHIFT				8
+#define VIV_FE_STALL_TOKEN_TO(x)				(((x) << VIV_FE_STALL_TOKEN_TO__SHIFT) & VIV_FE_STALL_TOKEN_TO__MASK)
+
+#define VIV_FE_CALL						0x00000000
+
+#define VIV_FE_CALL_HEADER					0x00000000
+#define VIV_FE_CALL_HEADER_PREFETCH__MASK			0x0000ffff
+#define VIV_FE_CALL_HEADER_PREFETCH__SHIFT			0
+#define VIV_FE_CALL_HEADER_PREFETCH(x)				(((x) << VIV_FE_CALL_HEADER_PREFETCH__SHIFT) & VIV_FE_CALL_HEADER_PREFETCH__MASK)
+#define VIV_FE_CALL_HEADER_OP__MASK				0xf8000000
+#define VIV_FE_CALL_HEADER_OP__SHIFT				27
+#define VIV_FE_CALL_HEADER_OP_CALL				0x50000000
+
+#define VIV_FE_CALL_ADDRESS					0x00000004
+
+#define VIV_FE_CALL_RETURN_PREFETCH				0x00000008
+
+#define VIV_FE_CALL_RETURN_ADDRESS				0x0000000c
+
+#define VIV_FE_RETURN						0x00000000
+
+#define VIV_FE_RETURN_HEADER					0x00000000
+#define VIV_FE_RETURN_HEADER_OP__MASK				0xf8000000
+#define VIV_FE_RETURN_HEADER_OP__SHIFT				27
+#define VIV_FE_RETURN_HEADER_OP_RETURN				0x58000000
+
+#define VIV_FE_CHIP_SELECT					0x00000000
+
+#define VIV_FE_CHIP_SELECT_HEADER				0x00000000
+#define VIV_FE_CHIP_SELECT_HEADER_OP__MASK			0xf8000000
+#define VIV_FE_CHIP_SELECT_HEADER_OP__SHIFT			27
+#define VIV_FE_CHIP_SELECT_HEADER_OP_CHIP_SELECT		0x68000000
+#define VIV_FE_CHIP_SELECT_HEADER_ENABLE_CHIP15			0x00008000
+#define VIV_FE_CHIP_SELECT_HEADER_ENABLE_CHIP14			0x00004000
+#define VIV_FE_CHIP_SELECT_HEADER_ENABLE_CHIP13			0x00002000
+#define VIV_FE_CHIP_SELECT_HEADER_ENABLE_CHIP12			0x00001000
+#define VIV_FE_CHIP_SELECT_HEADER_ENABLE_CHIP11			0x00000800
+#define VIV_FE_CHIP_SELECT_HEADER_ENABLE_CHIP10			0x00000400
+#define VIV_FE_CHIP_SELECT_HEADER_ENABLE_CHIP9			0x00000200
+#define VIV_FE_CHIP_SELECT_HEADER_ENABLE_CHIP8			0x00000100
+#define VIV_FE_CHIP_SELECT_HEADER_ENABLE_CHIP7			0x00000080
+#define VIV_FE_CHIP_SELECT_HEADER_ENABLE_CHIP6			0x00000040
+#define VIV_FE_CHIP_SELECT_HEADER_ENABLE_CHIP5			0x00000020
+#define VIV_FE_CHIP_SELECT_HEADER_ENABLE_CHIP4			0x00000010
+#define VIV_FE_CHIP_SELECT_HEADER_ENABLE_CHIP3			0x00000008
+#define VIV_FE_CHIP_SELECT_HEADER_ENABLE_CHIP2			0x00000004
+#define VIV_FE_CHIP_SELECT_HEADER_ENABLE_CHIP1			0x00000002
+#define VIV_FE_CHIP_SELECT_HEADER_ENABLE_CHIP0			0x00000001
+
+
+#endif /* CMDSTREAM_XML */
diff --git a/drivers/gpu/drm/etnaviv/common.xml.h b/drivers/gpu/drm/etnaviv/common.xml.h
new file mode 100644
index 000000000000..9e585d51fb78
--- /dev/null
+++ b/drivers/gpu/drm/etnaviv/common.xml.h
@@ -0,0 +1,249 @@
+#ifndef COMMON_XML
+#define COMMON_XML
+
+/* Autogenerated file, DO NOT EDIT manually!
+
+This file was generated by the rules-ng-ng headergen tool in this git repository:
+http://0x04.net/cgit/index.cgi/rules-ng-ng
+git clone git://0x04.net/rules-ng-ng
+
+The rules-ng-ng source files this header was generated from are:
+- state_vg.xml (   5973 bytes, from 2015-03-25 11:26:01)
+- common.xml   (  18437 bytes, from 2015-03-25 11:27:41)
+
+Copyright (C) 2015
+*/
+
+
+#define PIPE_ID_PIPE_3D						0x00000000
+#define PIPE_ID_PIPE_2D						0x00000001
+#define SYNC_RECIPIENT_FE					0x00000001
+#define SYNC_RECIPIENT_RA					0x00000005
+#define SYNC_RECIPIENT_PE					0x00000007
+#define SYNC_RECIPIENT_DE					0x0000000b
+#define SYNC_RECIPIENT_VG					0x0000000f
+#define SYNC_RECIPIENT_TESSELATOR				0x00000010
+#define SYNC_RECIPIENT_VG2					0x00000011
+#define SYNC_RECIPIENT_TESSELATOR2				0x00000012
+#define SYNC_RECIPIENT_VG3					0x00000013
+#define SYNC_RECIPIENT_TESSELATOR3				0x00000014
+#define ENDIAN_MODE_NO_SWAP					0x00000000
+#define ENDIAN_MODE_SWAP_16					0x00000001
+#define ENDIAN_MODE_SWAP_32					0x00000002
+#define chipModel_GC300						0x00000300
+#define chipModel_GC320						0x00000320
+#define chipModel_GC350						0x00000350
+#define chipModel_GC355						0x00000355
+#define chipModel_GC400						0x00000400
+#define chipModel_GC410						0x00000410
+#define chipModel_GC420						0x00000420
+#define chipModel_GC450						0x00000450
+#define chipModel_GC500						0x00000500
+#define chipModel_GC530						0x00000530
+#define chipModel_GC600						0x00000600
+#define chipModel_GC700						0x00000700
+#define chipModel_GC800						0x00000800
+#define chipModel_GC860						0x00000860
+#define chipModel_GC880						0x00000880
+#define chipModel_GC1000					0x00001000
+#define chipModel_GC2000					0x00002000
+#define chipModel_GC2100					0x00002100
+#define chipModel_GC4000					0x00004000
+#define RGBA_BITS_R						0x00000001
+#define RGBA_BITS_G						0x00000002
+#define RGBA_BITS_B						0x00000004
+#define RGBA_BITS_A						0x00000008
+#define chipFeatures_FAST_CLEAR					0x00000001
+#define chipFeatures_SPECIAL_ANTI_ALIASING			0x00000002
+#define chipFeatures_PIPE_3D					0x00000004
+#define chipFeatures_DXT_TEXTURE_COMPRESSION			0x00000008
+#define chipFeatures_DEBUG_MODE					0x00000010
+#define chipFeatures_Z_COMPRESSION				0x00000020
+#define chipFeatures_YUV420_SCALER				0x00000040
+#define chipFeatures_MSAA					0x00000080
+#define chipFeatures_DC						0x00000100
+#define chipFeatures_PIPE_2D					0x00000200
+#define chipFeatures_ETC1_TEXTURE_COMPRESSION			0x00000400
+#define chipFeatures_FAST_SCALER				0x00000800
+#define chipFeatures_HIGH_DYNAMIC_RANGE				0x00001000
+#define chipFeatures_YUV420_TILER				0x00002000
+#define chipFeatures_MODULE_CG					0x00004000
+#define chipFeatures_MIN_AREA					0x00008000
+#define chipFeatures_NO_EARLY_Z					0x00010000
+#define chipFeatures_NO_422_TEXTURE				0x00020000
+#define chipFeatures_BUFFER_INTERLEAVING			0x00040000
+#define chipFeatures_BYTE_WRITE_2D				0x00080000
+#define chipFeatures_NO_SCALER					0x00100000
+#define chipFeatures_YUY2_AVERAGING				0x00200000
+#define chipFeatures_HALF_PE_CACHE				0x00400000
+#define chipFeatures_HALF_TX_CACHE				0x00800000
+#define chipFeatures_YUY2_RENDER_TARGET				0x01000000
+#define chipFeatures_MEM32					0x02000000
+#define chipFeatures_PIPE_VG					0x04000000
+#define chipFeatures_VGTS					0x08000000
+#define chipFeatures_FE20					0x10000000
+#define chipFeatures_BYTE_WRITE_3D				0x20000000
+#define chipFeatures_RS_YUV_TARGET				0x40000000
+#define chipFeatures_32_BIT_INDICES				0x80000000
+#define chipMinorFeatures0_FLIP_Y				0x00000001
+#define chipMinorFeatures0_DUAL_RETURN_BUS			0x00000002
+#define chipMinorFeatures0_ENDIANNESS_CONFIG			0x00000004
+#define chipMinorFeatures0_TEXTURE_8K				0x00000008
+#define chipMinorFeatures0_CORRECT_TEXTURE_CONVERTER		0x00000010
+#define chipMinorFeatures0_SPECIAL_MSAA_LOD			0x00000020
+#define chipMinorFeatures0_FAST_CLEAR_FLUSH			0x00000040
+#define chipMinorFeatures0_2DPE20				0x00000080
+#define chipMinorFeatures0_CORRECT_AUTO_DISABLE			0x00000100
+#define chipMinorFeatures0_RENDERTARGET_8K			0x00000200
+#define chipMinorFeatures0_2BITPERTILE				0x00000400
+#define chipMinorFeatures0_SEPARATE_TILE_STATUS_WHEN_INTERLEAVED	0x00000800
+#define chipMinorFeatures0_SUPER_TILED				0x00001000
+#define chipMinorFeatures0_VG_20				0x00002000
+#define chipMinorFeatures0_TS_EXTENDED_COMMANDS			0x00004000
+#define chipMinorFeatures0_COMPRESSION_FIFO_FIXED		0x00008000
+#define chipMinorFeatures0_HAS_SIGN_FLOOR_CEIL			0x00010000
+#define chipMinorFeatures0_VG_FILTER				0x00020000
+#define chipMinorFeatures0_VG_21				0x00040000
+#define chipMinorFeatures0_SHADER_HAS_W				0x00080000
+#define chipMinorFeatures0_HAS_SQRT_TRIG			0x00100000
+#define chipMinorFeatures0_MORE_MINOR_FEATURES			0x00200000
+#define chipMinorFeatures0_MC20					0x00400000
+#define chipMinorFeatures0_MSAA_SIDEBAND			0x00800000
+#define chipMinorFeatures0_BUG_FIXES0				0x01000000
+#define chipMinorFeatures0_VAA					0x02000000
+#define chipMinorFeatures0_BYPASS_IN_MSAA			0x04000000
+#define chipMinorFeatures0_HZ					0x08000000
+#define chipMinorFeatures0_NEW_TEXTURE				0x10000000
+#define chipMinorFeatures0_2D_A8_TARGET				0x20000000
+#define chipMinorFeatures0_CORRECT_STENCIL			0x40000000
+#define chipMinorFeatures0_ENHANCE_VR				0x80000000
+#define chipMinorFeatures1_RSUV_SWIZZLE				0x00000001
+#define chipMinorFeatures1_V2_COMPRESSION			0x00000002
+#define chipMinorFeatures1_VG_DOUBLE_BUFFER			0x00000004
+#define chipMinorFeatures1_EXTRA_EVENT_STATES			0x00000008
+#define chipMinorFeatures1_NO_STRIPING_NEEDED			0x00000010
+#define chipMinorFeatures1_TEXTURE_STRIDE			0x00000020
+#define chipMinorFeatures1_BUG_FIXES3				0x00000040
+#define chipMinorFeatures1_AUTO_DISABLE				0x00000080
+#define chipMinorFeatures1_AUTO_RESTART_TS			0x00000100
+#define chipMinorFeatures1_DISABLE_PE_GATING			0x00000200
+#define chipMinorFeatures1_L2_WINDOWING				0x00000400
+#define chipMinorFeatures1_HALF_FLOAT				0x00000800
+#define chipMinorFeatures1_PIXEL_DITHER				0x00001000
+#define chipMinorFeatures1_TWO_STENCIL_REFERENCE		0x00002000
+#define chipMinorFeatures1_EXTENDED_PIXEL_FORMAT		0x00004000
+#define chipMinorFeatures1_CORRECT_MIN_MAX_DEPTH		0x00008000
+#define chipMinorFeatures1_2D_DITHER				0x00010000
+#define chipMinorFeatures1_BUG_FIXES5				0x00020000
+#define chipMinorFeatures1_NEW_2D				0x00040000
+#define chipMinorFeatures1_NEW_FP				0x00080000
+#define chipMinorFeatures1_TEXTURE_HALIGN			0x00100000
+#define chipMinorFeatures1_NON_POWER_OF_TWO			0x00200000
+#define chipMinorFeatures1_LINEAR_TEXTURE_SUPPORT		0x00400000
+#define chipMinorFeatures1_HALTI0				0x00800000
+#define chipMinorFeatures1_CORRECT_OVERFLOW_VG			0x01000000
+#define chipMinorFeatures1_NEGATIVE_LOG_FIX			0x02000000
+#define chipMinorFeatures1_RESOLVE_OFFSET			0x04000000
+#define chipMinorFeatures1_OK_TO_GATE_AXI_CLOCK			0x08000000
+#define chipMinorFeatures1_MMU_VERSION				0x10000000
+#define chipMinorFeatures1_WIDE_LINE				0x20000000
+#define chipMinorFeatures1_BUG_FIXES6				0x40000000
+#define chipMinorFeatures1_FC_FLUSH_STALL			0x80000000
+#define chipMinorFeatures2_LINE_LOOP				0x00000001
+#define chipMinorFeatures2_LOGIC_OP				0x00000002
+#define chipMinorFeatures2_UNK2					0x00000004
+#define chipMinorFeatures2_SUPERTILED_TEXTURE			0x00000008
+#define chipMinorFeatures2_UNK4					0x00000010
+#define chipMinorFeatures2_RECT_PRIMITIVE			0x00000020
+#define chipMinorFeatures2_COMPOSITION				0x00000040
+#define chipMinorFeatures2_CORRECT_AUTO_DISABLE_COUNT		0x00000080
+#define chipMinorFeatures2_UNK8					0x00000100
+#define chipMinorFeatures2_UNK9					0x00000200
+#define chipMinorFeatures2_UNK10				0x00000400
+#define chipMinorFeatures2_SAMPLERBASE_16			0x00000800
+#define chipMinorFeatures2_UNK12				0x00001000
+#define chipMinorFeatures2_UNK13				0x00002000
+#define chipMinorFeatures2_UNK14				0x00004000
+#define chipMinorFeatures2_EXTRA_TEXTURE_STATE			0x00008000
+#define chipMinorFeatures2_FULL_DIRECTFB			0x00010000
+#define chipMinorFeatures2_2D_TILING				0x00020000
+#define chipMinorFeatures2_THREAD_WALKER_IN_PS			0x00040000
+#define chipMinorFeatures2_TILE_FILLER				0x00080000
+#define chipMinorFeatures2_UNK20				0x00100000
+#define chipMinorFeatures2_2D_MULTI_SOURCE_BLIT			0x00200000
+#define chipMinorFeatures2_UNK22				0x00400000
+#define chipMinorFeatures2_UNK23				0x00800000
+#define chipMinorFeatures2_UNK24				0x01000000
+#define chipMinorFeatures2_MIXED_STREAMS			0x02000000
+#define chipMinorFeatures2_2D_420_L2CACHE			0x04000000
+#define chipMinorFeatures2_UNK27				0x08000000
+#define chipMinorFeatures2_2D_NO_INDEX8_BRUSH			0x10000000
+#define chipMinorFeatures2_TEXTURE_TILED_READ			0x20000000
+#define chipMinorFeatures2_UNK30				0x40000000
+#define chipMinorFeatures2_UNK31				0x80000000
+#define chipMinorFeatures3_ROTATION_STALL_FIX			0x00000001
+#define chipMinorFeatures3_UNK1					0x00000002
+#define chipMinorFeatures3_2D_MULTI_SOURCE_BLT_EX		0x00000004
+#define chipMinorFeatures3_UNK3					0x00000008
+#define chipMinorFeatures3_UNK4					0x00000010
+#define chipMinorFeatures3_UNK5					0x00000020
+#define chipMinorFeatures3_UNK6					0x00000040
+#define chipMinorFeatures3_UNK7					0x00000080
+#define chipMinorFeatures3_UNK8					0x00000100
+#define chipMinorFeatures3_UNK9					0x00000200
+#define chipMinorFeatures3_BUG_FIXES10				0x00000400
+#define chipMinorFeatures3_UNK11				0x00000800
+#define chipMinorFeatures3_BUG_FIXES11				0x00001000
+#define chipMinorFeatures3_UNK13				0x00002000
+#define chipMinorFeatures3_UNK14				0x00004000
+#define chipMinorFeatures3_UNK15				0x00008000
+#define chipMinorFeatures3_UNK16				0x00010000
+#define chipMinorFeatures3_UNK17				0x00020000
+#define chipMinorFeatures3_UNK18				0x00040000
+#define chipMinorFeatures3_UNK19				0x00080000
+#define chipMinorFeatures3_UNK20				0x00100000
+#define chipMinorFeatures3_UNK21				0x00200000
+#define chipMinorFeatures3_UNK22				0x00400000
+#define chipMinorFeatures3_UNK23				0x00800000
+#define chipMinorFeatures3_UNK24				0x01000000
+#define chipMinorFeatures3_UNK25				0x02000000
+#define chipMinorFeatures3_UNK26				0x04000000
+#define chipMinorFeatures3_UNK27				0x08000000
+#define chipMinorFeatures3_UNK28				0x10000000
+#define chipMinorFeatures3_UNK29				0x20000000
+#define chipMinorFeatures3_UNK30				0x40000000
+#define chipMinorFeatures3_UNK31				0x80000000
+#define chipMinorFeatures4_UNK0					0x00000001
+#define chipMinorFeatures4_UNK1					0x00000002
+#define chipMinorFeatures4_UNK2					0x00000004
+#define chipMinorFeatures4_UNK3					0x00000008
+#define chipMinorFeatures4_UNK4					0x00000010
+#define chipMinorFeatures4_UNK5					0x00000020
+#define chipMinorFeatures4_UNK6					0x00000040
+#define chipMinorFeatures4_UNK7					0x00000080
+#define chipMinorFeatures4_UNK8					0x00000100
+#define chipMinorFeatures4_UNK9					0x00000200
+#define chipMinorFeatures4_UNK10				0x00000400
+#define chipMinorFeatures4_UNK11				0x00000800
+#define chipMinorFeatures4_UNK12				0x00001000
+#define chipMinorFeatures4_UNK13				0x00002000
+#define chipMinorFeatures4_UNK14				0x00004000
+#define chipMinorFeatures4_UNK15				0x00008000
+#define chipMinorFeatures4_UNK16				0x00010000
+#define chipMinorFeatures4_UNK17				0x00020000
+#define chipMinorFeatures4_UNK18				0x00040000
+#define chipMinorFeatures4_UNK19				0x00080000
+#define chipMinorFeatures4_UNK20				0x00100000
+#define chipMinorFeatures4_UNK21				0x00200000
+#define chipMinorFeatures4_UNK22				0x00400000
+#define chipMinorFeatures4_UNK23				0x00800000
+#define chipMinorFeatures4_UNK24				0x01000000
+#define chipMinorFeatures4_UNK25				0x02000000
+#define chipMinorFeatures4_UNK26				0x04000000
+#define chipMinorFeatures4_UNK27				0x08000000
+#define chipMinorFeatures4_UNK28				0x10000000
+#define chipMinorFeatures4_UNK29				0x20000000
+#define chipMinorFeatures4_UNK30				0x40000000
+#define chipMinorFeatures4_UNK31				0x80000000
+
+#endif /* COMMON_XML */
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_buffer.c b/drivers/gpu/drm/etnaviv/etnaviv_buffer.c
new file mode 100644
index 000000000000..332c55ebba6d
--- /dev/null
+++ b/drivers/gpu/drm/etnaviv/etnaviv_buffer.c
@@ -0,0 +1,268 @@
+/*
+ * Copyright (C) 2014 Etnaviv Project
+ * Author: Christian Gmeiner <christian.gmeiner@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "etnaviv_gpu.h"
+#include "etnaviv_gem.h"
+#include "etnaviv_mmu.h"
+
+#include "common.xml.h"
+#include "state.xml.h"
+#include "cmdstream.xml.h"
+
+/*
+ * Command Buffer helper:
+ */
+
+
+static inline void OUT(struct etnaviv_cmdbuf *buffer, u32 data)
+{
+	u32 *vaddr = (u32 *)buffer->vaddr;
+
+	BUG_ON(buffer->user_size >= buffer->size);
+
+	vaddr[buffer->user_size / 4] = data;
+	buffer->user_size += 4;
+}
+
+static inline void CMD_LOAD_STATE(struct etnaviv_cmdbuf *buffer,
+	u32 reg, u32 value)
+{
+	u32 index = reg >> VIV_FE_LOAD_STATE_HEADER_OFFSET__SHR;
+
+	buffer->user_size = ALIGN(buffer->user_size, 8);
+
+	/* write a register via cmd stream */
+	OUT(buffer, VIV_FE_LOAD_STATE_HEADER_OP_LOAD_STATE |
+		    VIV_FE_LOAD_STATE_HEADER_COUNT(1) |
+		    VIV_FE_LOAD_STATE_HEADER_OFFSET(index));
+	OUT(buffer, value);
+}
+
+static inline void CMD_END(struct etnaviv_cmdbuf *buffer)
+{
+	buffer->user_size = ALIGN(buffer->user_size, 8);
+
+	OUT(buffer, VIV_FE_END_HEADER_OP_END);
+}
+
+static inline void CMD_WAIT(struct etnaviv_cmdbuf *buffer)
+{
+	buffer->user_size = ALIGN(buffer->user_size, 8);
+
+	OUT(buffer, VIV_FE_WAIT_HEADER_OP_WAIT | 200);
+}
+
+static inline void CMD_LINK(struct etnaviv_cmdbuf *buffer,
+	u16 prefetch, u32 address)
+{
+	buffer->user_size = ALIGN(buffer->user_size, 8);
+
+	OUT(buffer, VIV_FE_LINK_HEADER_OP_LINK |
+		    VIV_FE_LINK_HEADER_PREFETCH(prefetch));
+	OUT(buffer, address);
+}
+
+static inline void CMD_STALL(struct etnaviv_cmdbuf *buffer,
+	u32 from, u32 to)
+{
+	buffer->user_size = ALIGN(buffer->user_size, 8);
+
+	OUT(buffer, VIV_FE_STALL_HEADER_OP_STALL);
+	OUT(buffer, VIV_FE_STALL_TOKEN_FROM(from) | VIV_FE_STALL_TOKEN_TO(to));
+}
+
+static void etnaviv_cmd_select_pipe(struct etnaviv_cmdbuf *buffer, u8 pipe)
+{
+	u32 flush;
+	u32 stall;
+
+	/*
+	 * This assumes that if we're switching to 2D, we're switching
+	 * away from 3D, and vice versa.  Hence, if we're switching to
+	 * the 2D core, we need to flush the 3D depth and color caches,
+	 * otherwise we need to flush the 2D pixel engine cache.
+	 */
+	if (pipe == ETNA_PIPE_2D)
+		flush = VIVS_GL_FLUSH_CACHE_DEPTH | VIVS_GL_FLUSH_CACHE_COLOR;
+	else
+		flush = VIVS_GL_FLUSH_CACHE_PE2D;
+
+	stall = VIVS_GL_SEMAPHORE_TOKEN_FROM(SYNC_RECIPIENT_FE) |
+		VIVS_GL_SEMAPHORE_TOKEN_TO(SYNC_RECIPIENT_PE);
+
+	CMD_LOAD_STATE(buffer, VIVS_GL_FLUSH_CACHE, flush);
+	CMD_LOAD_STATE(buffer, VIVS_GL_SEMAPHORE_TOKEN, stall);
+
+	CMD_STALL(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_PE);
+
+	CMD_LOAD_STATE(buffer, VIVS_GL_PIPE_SELECT,
+		       VIVS_GL_PIPE_SELECT_PIPE(pipe));
+}
+
+static u32 gpu_va(struct etnaviv_gpu *gpu, struct etnaviv_cmdbuf *buf)
+{
+	return buf->paddr - gpu->memory_base;
+}
+
+static void etnaviv_buffer_dump(struct etnaviv_gpu *gpu,
+	struct etnaviv_cmdbuf *buf, u32 off, u32 len)
+{
+	u32 size = buf->size;
+	u32 *ptr = buf->vaddr + off;
+
+	dev_info(gpu->dev, "virt %p phys 0x%08x free 0x%08x\n",
+			ptr, gpu_va(gpu, buf) + off, size - len * 4 - off);
+
+	print_hex_dump(KERN_INFO, "cmd ", DUMP_PREFIX_OFFSET, 16, 4,
+			ptr, len * 4, 0);
+}
+
+u16 etnaviv_buffer_init(struct etnaviv_gpu *gpu)
+{
+	struct etnaviv_cmdbuf *buffer = gpu->buffer;
+
+	/* initialize buffer */
+	buffer->user_size = 0;
+
+	CMD_WAIT(buffer);
+	CMD_LINK(buffer, 2, gpu_va(gpu, buffer) + buffer->user_size - 4);
+
+	return buffer->user_size / 8;
+}
+
+void etnaviv_buffer_end(struct etnaviv_gpu *gpu)
+{
+	struct etnaviv_cmdbuf *buffer = gpu->buffer;
+
+	/* Replace the last WAIT with an END */
+	buffer->user_size -= 16;
+
+	CMD_END(buffer);
+	mb();
+}
+
+void etnaviv_buffer_queue(struct etnaviv_gpu *gpu, unsigned int event,
+	struct etnaviv_cmdbuf *cmdbuf)
+{
+	struct etnaviv_cmdbuf *buffer = gpu->buffer;
+	u32 *lw = buffer->vaddr + buffer->user_size - 16;
+	u32 back, link_target, link_size, reserve_size, extra_size = 0;
+
+	if (drm_debug & DRM_UT_DRIVER)
+		etnaviv_buffer_dump(gpu, buffer, 0, 0x50);
+
+	/*
+	 * If we need to flush the MMU prior to submitting this buffer, we
+	 * will need to append a mmu flush load state, followed by a new
+	 * link to this buffer - a total of four additional words.
+	 */
+	if (gpu->mmu->need_flush || gpu->switch_context) {
+		/* link command */
+		extra_size += 2;
+		/* flush command */
+		if (gpu->mmu->need_flush)
+			extra_size += 2;
+		/* pipe switch commands */
+		if (gpu->switch_context)
+			extra_size += 8;
+	}
+
+	reserve_size = (6 + extra_size) * 4;
+
+	/*
+	 * if we are going to completely overflow the buffer, we need to wrap.
+	 */
+	if (buffer->user_size + reserve_size > buffer->size)
+		buffer->user_size = 0;
+
+	/* save offset back into main buffer */
+	back = buffer->user_size + reserve_size - 6 * 4;
+	link_target = gpu_va(gpu, buffer) + buffer->user_size;
+	link_size = 6;
+
+	/* Skip over any extra instructions */
+	link_target += extra_size * sizeof(u32);
+
+	if (drm_debug & DRM_UT_DRIVER)
+		pr_info("stream link to 0x%08x @ 0x%08x %p\n",
+			link_target, gpu_va(gpu, cmdbuf), cmdbuf->vaddr);
+
+	/* jump back from cmd to main buffer */
+	CMD_LINK(cmdbuf, link_size, link_target);
+
+	link_target = gpu_va(gpu, cmdbuf);
+	link_size = cmdbuf->size / 8;
+
+
+
+	if (drm_debug & DRM_UT_DRIVER) {
+		print_hex_dump(KERN_INFO, "cmd ", DUMP_PREFIX_OFFSET, 16, 4,
+			       cmdbuf->vaddr, cmdbuf->size, 0);
+
+		pr_info("link op: %p\n", lw);
+		pr_info("link addr: %p\n", lw + 1);
+		pr_info("addr: 0x%08x\n", link_target);
+		pr_info("back: 0x%08x\n", gpu_va(gpu, buffer) + back);
+		pr_info("event: %d\n", event);
+	}
+
+	if (gpu->mmu->need_flush || gpu->switch_context) {
+		u32 new_target = gpu_va(gpu, buffer) + buffer->user_size;
+
+		if (gpu->mmu->need_flush) {
+			/* Add the MMU flush */
+			CMD_LOAD_STATE(buffer, VIVS_GL_FLUSH_MMU,
+				       VIVS_GL_FLUSH_MMU_FLUSH_FEMMU |
+				       VIVS_GL_FLUSH_MMU_FLUSH_UNK1 |
+				       VIVS_GL_FLUSH_MMU_FLUSH_UNK2 |
+				       VIVS_GL_FLUSH_MMU_FLUSH_PEMMU |
+				       VIVS_GL_FLUSH_MMU_FLUSH_UNK4);
+
+			gpu->mmu->need_flush = false;
+		}
+
+		if (gpu->switch_context) {
+			etnaviv_cmd_select_pipe(buffer, cmdbuf->exec_state);
+			gpu->switch_context = false;
+		}
+
+		/* And the link to the first buffer */
+		CMD_LINK(buffer, link_size, link_target);
+
+		/* Update the link target to point to above instructions */
+		link_target = new_target;
+		link_size = extra_size;
+	}
+
+	/* trigger event */
+	CMD_LOAD_STATE(buffer, VIVS_GL_EVENT, VIVS_GL_EVENT_EVENT_ID(event) |
+		       VIVS_GL_EVENT_FROM_PE);
+
+	/* append WAIT/LINK to main buffer */
+	CMD_WAIT(buffer);
+	CMD_LINK(buffer, 2, gpu_va(gpu, buffer) + (buffer->user_size - 4));
+
+	/* Change WAIT into a LINK command; write the address first. */
+	*(lw + 1) = link_target;
+	mb();
+	*(lw) = VIV_FE_LINK_HEADER_OP_LINK |
+		VIV_FE_LINK_HEADER_PREFETCH(link_size);
+	mb();
+
+	if (drm_debug & DRM_UT_DRIVER)
+		etnaviv_buffer_dump(gpu, buffer, 0, 0x50);
+}
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_cmd_parser.c b/drivers/gpu/drm/etnaviv/etnaviv_cmd_parser.c
new file mode 100644
index 000000000000..dcfd565c88d1
--- /dev/null
+++ b/drivers/gpu/drm/etnaviv/etnaviv_cmd_parser.c
@@ -0,0 +1,209 @@
+/*
+ * Copyright (C) 2015 Etnaviv Project
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/kernel.h>
+
+#include "etnaviv_gem.h"
+#include "etnaviv_gpu.h"
+
+#include "cmdstream.xml.h"
+
+#define EXTRACT(val, field) (((val) & field##__MASK) >> field##__SHIFT)
+
+struct etna_validation_state {
+	struct etnaviv_gpu *gpu;
+	const struct drm_etnaviv_gem_submit_reloc *relocs;
+	unsigned int num_relocs;
+	u32 *start;
+};
+
+static const struct {
+	u16 offset;
+	u16 size;
+} etnaviv_sensitive_states[] __initconst = {
+#define ST(start, num) { (start) >> 2, (num) }
+	/* 2D */
+	ST(0x1200, 1),
+	ST(0x1228, 1),
+	ST(0x1238, 1),
+	ST(0x1284, 1),
+	ST(0x128c, 1),
+	ST(0x1304, 1),
+	ST(0x1310, 1),
+	ST(0x1318, 1),
+	ST(0x12800, 4),
+	ST(0x128a0, 4),
+	ST(0x128c0, 4),
+	ST(0x12970, 4),
+	ST(0x12a00, 8),
+	ST(0x12b40, 8),
+	ST(0x12b80, 8),
+	ST(0x12ce0, 8),
+	/* 3D */
+	ST(0x0644, 1),
+	ST(0x064c, 1),
+	ST(0x0680, 8),
+	ST(0x1410, 1),
+	ST(0x1430, 1),
+	ST(0x1458, 1),
+	ST(0x1460, 8),
+	ST(0x1480, 8),
+	ST(0x1500, 8),
+	ST(0x1520, 8),
+	ST(0x1608, 1),
+	ST(0x1610, 1),
+	ST(0x1658, 1),
+	ST(0x165c, 1),
+	ST(0x1664, 1),
+	ST(0x1668, 1),
+	ST(0x16a4, 1),
+	ST(0x16c0, 8),
+	ST(0x16e0, 8),
+	ST(0x1740, 8),
+	ST(0x2400, 14 * 16),
+	ST(0x10800, 32 * 16),
+#undef ST
+};
+
+#define ETNAVIV_STATES_SIZE (VIV_FE_LOAD_STATE_HEADER_OFFSET__MASK + 1u)
+static DECLARE_BITMAP(etnaviv_states, ETNAVIV_STATES_SIZE);
+
+void __init etnaviv_validate_init(void)
+{
+	unsigned int i;
+
+	for (i = 0; i < ARRAY_SIZE(etnaviv_sensitive_states); i++)
+		bitmap_set(etnaviv_states, etnaviv_sensitive_states[i].offset,
+			   etnaviv_sensitive_states[i].size);
+}
+
+static void etnaviv_warn_if_non_sensitive(struct etna_validation_state *state,
+	unsigned int buf_offset, unsigned int state_addr)
+{
+	if (state->num_relocs && state->relocs->submit_offset < buf_offset) {
+		dev_warn_once(state->gpu->dev,
+			      "%s: relocation for non-sensitive state 0x%x at offset %u\n",
+			      __func__, state_addr,
+			      state->relocs->submit_offset);
+		while (state->num_relocs &&
+		       state->relocs->submit_offset < buf_offset) {
+			state->relocs++;
+			state->num_relocs--;
+		}
+	}
+}
+
+static bool etnaviv_validate_load_state(struct etna_validation_state *state,
+	u32 *ptr, unsigned int state_offset, unsigned int num)
+{
+	unsigned int size = min(ETNAVIV_STATES_SIZE, state_offset + num);
+	unsigned int st_offset = state_offset, buf_offset;
+
+	for_each_set_bit_from(st_offset, etnaviv_states, size) {
+		buf_offset = (ptr - state->start +
+			      st_offset - state_offset) * 4;
+
+		etnaviv_warn_if_non_sensitive(state, buf_offset, st_offset * 4);
+		if (state->num_relocs &&
+		    state->relocs->submit_offset == buf_offset) {
+			state->relocs++;
+			state->num_relocs--;
+			continue;
+		}
+
+		dev_warn_ratelimited(state->gpu->dev,
+				     "%s: load state touches restricted state 0x%x at offset %u\n",
+				     __func__, st_offset * 4, buf_offset);
+		return false;
+	}
+
+	if (state->num_relocs) {
+		buf_offset = (ptr - state->start + num) * 4;
+		etnaviv_warn_if_non_sensitive(state, buf_offset, st_offset * 4 +
+					      state->relocs->submit_offset -
+					      buf_offset);
+	}
+
+	return true;
+}
+
+static uint8_t cmd_length[32] = {
+	[FE_OPCODE_DRAW_PRIMITIVES] = 4,
+	[FE_OPCODE_DRAW_INDEXED_PRIMITIVES] = 6,
+	[FE_OPCODE_NOP] = 2,
+	[FE_OPCODE_STALL] = 2,
+};
+
+bool etnaviv_cmd_validate_one(struct etnaviv_gpu *gpu, u32 *stream,
+			      unsigned int size,
+			      struct drm_etnaviv_gem_submit_reloc *relocs,
+			      unsigned int reloc_size)
+{
+	struct etna_validation_state state;
+	u32 *buf = stream;
+	u32 *end = buf + size;
+
+	state.gpu = gpu;
+	state.relocs = relocs;
+	state.num_relocs = reloc_size;
+	state.start = stream;
+
+	while (buf < end) {
+		u32 cmd = *buf;
+		unsigned int len, n, off;
+		unsigned int op = cmd >> 27;
+
+		switch (op) {
+		case FE_OPCODE_LOAD_STATE:
+			n = EXTRACT(cmd, VIV_FE_LOAD_STATE_HEADER_COUNT);
+			len = ALIGN(1 + n, 2);
+			if (buf + len > end)
+				break;
+
+			off = EXTRACT(cmd, VIV_FE_LOAD_STATE_HEADER_OFFSET);
+			if (!etnaviv_validate_load_state(&state, buf + 1,
+							 off, n))
+				return false;
+			break;
+
+		case FE_OPCODE_DRAW_2D:
+			n = EXTRACT(cmd, VIV_FE_DRAW_2D_HEADER_COUNT);
+			if (n == 0)
+				n = 256;
+			len = 2 + n * 2;
+			break;
+
+		default:
+			len = cmd_length[op];
+			if (len == 0) {
+				dev_err(gpu->dev, "%s: op %u not permitted at offset %tu\n",
+					__func__, op, buf - state.start);
+				return false;
+			}
+			break;
+		}
+
+		buf += len;
+	}
+
+	if (buf > end) {
+		dev_err(gpu->dev, "%s: commands overflow end of buffer: %tu > %u\n",
+			__func__, buf - state.start, size);
+		return false;
+	}
+
+	return true;
+}
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_drv.c b/drivers/gpu/drm/etnaviv/etnaviv_drv.c
new file mode 100644
index 000000000000..5c89ebb52fd2
--- /dev/null
+++ b/drivers/gpu/drm/etnaviv/etnaviv_drv.c
@@ -0,0 +1,707 @@
+/*
+ * Copyright (C) 2015 Etnaviv Project
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/component.h>
+#include <linux/of_platform.h>
+
+#include "etnaviv_drv.h"
+#include "etnaviv_gpu.h"
+#include "etnaviv_gem.h"
+#include "etnaviv_mmu.h"
+#include "etnaviv_gem.h"
+
+#ifdef CONFIG_DRM_ETNAVIV_REGISTER_LOGGING
+static bool reglog;
+MODULE_PARM_DESC(reglog, "Enable register read/write logging");
+module_param(reglog, bool, 0600);
+#else
+#define reglog 0
+#endif
+
+void __iomem *etnaviv_ioremap(struct platform_device *pdev, const char *name,
+		const char *dbgname)
+{
+	struct resource *res;
+	void __iomem *ptr;
+
+	if (name)
+		res = platform_get_resource_byname(pdev, IORESOURCE_MEM, name);
+	else
+		res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+
+	ptr = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(ptr)) {
+		dev_err(&pdev->dev, "failed to ioremap %s: %ld\n", name,
+			PTR_ERR(ptr));
+		return ptr;
+	}
+
+	if (reglog)
+		dev_printk(KERN_DEBUG, &pdev->dev, "IO:region %s 0x%p %08zx\n",
+			   dbgname, ptr, (size_t)resource_size(res));
+
+	return ptr;
+}
+
+void etnaviv_writel(u32 data, void __iomem *addr)
+{
+	if (reglog)
+		printk(KERN_DEBUG "IO:W %p %08x\n", addr, data);
+
+	writel(data, addr);
+}
+
+u32 etnaviv_readl(const void __iomem *addr)
+{
+	u32 val = readl(addr);
+
+	if (reglog)
+		printk(KERN_DEBUG "IO:R %p %08x\n", addr, val);
+
+	return val;
+}
+
+/*
+ * DRM operations:
+ */
+
+
+static void load_gpu(struct drm_device *dev)
+{
+	struct etnaviv_drm_private *priv = dev->dev_private;
+	unsigned int i;
+
+	for (i = 0; i < ETNA_MAX_PIPES; i++) {
+		struct etnaviv_gpu *g = priv->gpu[i];
+
+		if (g) {
+			int ret;
+
+			ret = etnaviv_gpu_init(g);
+			if (ret) {
+				dev_err(g->dev, "hw init failed: %d\n", ret);
+				priv->gpu[i] = NULL;
+			}
+		}
+	}
+}
+
+static int etnaviv_open(struct drm_device *dev, struct drm_file *file)
+{
+	struct etnaviv_file_private *ctx;
+
+	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+	if (!ctx)
+		return -ENOMEM;
+
+	file->driver_priv = ctx;
+
+	return 0;
+}
+
+static void etnaviv_preclose(struct drm_device *dev, struct drm_file *file)
+{
+	struct etnaviv_drm_private *priv = dev->dev_private;
+	struct etnaviv_file_private *ctx = file->driver_priv;
+	unsigned int i;
+
+	for (i = 0; i < ETNA_MAX_PIPES; i++) {
+		struct etnaviv_gpu *gpu = priv->gpu[i];
+
+		if (gpu) {
+			mutex_lock(&gpu->lock);
+			if (gpu->lastctx == ctx)
+				gpu->lastctx = NULL;
+			mutex_unlock(&gpu->lock);
+		}
+	}
+
+	kfree(ctx);
+}
+
+/*
+ * DRM debugfs:
+ */
+
+#ifdef CONFIG_DEBUG_FS
+static int etnaviv_gem_show(struct drm_device *dev, struct seq_file *m)
+{
+	struct etnaviv_drm_private *priv = dev->dev_private;
+
+	etnaviv_gem_describe_objects(priv, m);
+
+	return 0;
+}
+
+static int etnaviv_mm_show(struct drm_device *dev, struct seq_file *m)
+{
+	int ret;
+
+	read_lock(&dev->vma_offset_manager->vm_lock);
+	ret = drm_mm_dump_table(m, &dev->vma_offset_manager->vm_addr_space_mm);
+	read_unlock(&dev->vma_offset_manager->vm_lock);
+
+	return ret;
+}
+
+static int etnaviv_mmu_show(struct etnaviv_gpu *gpu, struct seq_file *m)
+{
+	seq_printf(m, "Active Objects (%s):\n", dev_name(gpu->dev));
+
+	mutex_lock(&gpu->mmu->lock);
+	drm_mm_dump_table(m, &gpu->mmu->mm);
+	mutex_unlock(&gpu->mmu->lock);
+
+	return 0;
+}
+
+static void etnaviv_buffer_dump(struct etnaviv_gpu *gpu, struct seq_file *m)
+{
+	struct etnaviv_cmdbuf *buf = gpu->buffer;
+	u32 size = buf->size;
+	u32 *ptr = buf->vaddr;
+	u32 i;
+
+	seq_printf(m, "virt %p - phys 0x%llx - free 0x%08x\n",
+			buf->vaddr, (u64)buf->paddr, size - buf->user_size);
+
+	for (i = 0; i < size / 4; i++) {
+		if (i && !(i % 4))
+			seq_puts(m, "\n");
+		if (i % 4 == 0)
+			seq_printf(m, "\t0x%p: ", ptr + i);
+		seq_printf(m, "%08x ", *(ptr + i));
+	}
+	seq_puts(m, "\n");
+}
+
+static int etnaviv_ring_show(struct etnaviv_gpu *gpu, struct seq_file *m)
+{
+	seq_printf(m, "Ring Buffer (%s): ", dev_name(gpu->dev));
+
+	mutex_lock(&gpu->lock);
+	etnaviv_buffer_dump(gpu, m);
+	mutex_unlock(&gpu->lock);
+
+	return 0;
+}
+
+static int show_unlocked(struct seq_file *m, void *arg)
+{
+	struct drm_info_node *node = (struct drm_info_node *) m->private;
+	struct drm_device *dev = node->minor->dev;
+	int (*show)(struct drm_device *dev, struct seq_file *m) =
+			node->info_ent->data;
+
+	return show(dev, m);
+}
+
+static int show_each_gpu(struct seq_file *m, void *arg)
+{
+	struct drm_info_node *node = (struct drm_info_node *) m->private;
+	struct drm_device *dev = node->minor->dev;
+	struct etnaviv_drm_private *priv = dev->dev_private;
+	struct etnaviv_gpu *gpu;
+	int (*show)(struct etnaviv_gpu *gpu, struct seq_file *m) =
+			node->info_ent->data;
+	unsigned int i;
+	int ret = 0;
+
+	for (i = 0; i < ETNA_MAX_PIPES; i++) {
+		gpu = priv->gpu[i];
+		if (!gpu)
+			continue;
+
+		ret = show(gpu, m);
+		if (ret < 0)
+			break;
+	}
+
+	return ret;
+}
+
+static struct drm_info_list etnaviv_debugfs_list[] = {
+		{"gpu", show_each_gpu, 0, etnaviv_gpu_debugfs},
+		{"gem", show_unlocked, 0, etnaviv_gem_show},
+		{ "mm", show_unlocked, 0, etnaviv_mm_show },
+		{"mmu", show_each_gpu, 0, etnaviv_mmu_show},
+		{"ring", show_each_gpu, 0, etnaviv_ring_show},
+};
+
+static int etnaviv_debugfs_init(struct drm_minor *minor)
+{
+	struct drm_device *dev = minor->dev;
+	int ret;
+
+	ret = drm_debugfs_create_files(etnaviv_debugfs_list,
+			ARRAY_SIZE(etnaviv_debugfs_list),
+			minor->debugfs_root, minor);
+
+	if (ret) {
+		dev_err(dev->dev, "could not install etnaviv_debugfs_list\n");
+		return ret;
+	}
+
+	return ret;
+}
+
+static void etnaviv_debugfs_cleanup(struct drm_minor *minor)
+{
+	drm_debugfs_remove_files(etnaviv_debugfs_list,
+			ARRAY_SIZE(etnaviv_debugfs_list), minor);
+}
+#endif
+
+/*
+ * DRM ioctls:
+ */
+
+static int etnaviv_ioctl_get_param(struct drm_device *dev, void *data,
+		struct drm_file *file)
+{
+	struct etnaviv_drm_private *priv = dev->dev_private;
+	struct drm_etnaviv_param *args = data;
+	struct etnaviv_gpu *gpu;
+
+	if (args->pipe >= ETNA_MAX_PIPES)
+		return -EINVAL;
+
+	gpu = priv->gpu[args->pipe];
+	if (!gpu)
+		return -ENXIO;
+
+	return etnaviv_gpu_get_param(gpu, args->param, &args->value);
+}
+
+static int etnaviv_ioctl_gem_new(struct drm_device *dev, void *data,
+		struct drm_file *file)
+{
+	struct drm_etnaviv_gem_new *args = data;
+
+	if (args->flags & ~(ETNA_BO_CACHED | ETNA_BO_WC | ETNA_BO_UNCACHED |
+			    ETNA_BO_FORCE_MMU))
+		return -EINVAL;
+
+	return etnaviv_gem_new_handle(dev, file, args->size,
+			args->flags, &args->handle);
+}
+
+#define TS(t) ((struct timespec){ \
+	.tv_sec = (t).tv_sec, \
+	.tv_nsec = (t).tv_nsec \
+})
+
+static int etnaviv_ioctl_gem_cpu_prep(struct drm_device *dev, void *data,
+		struct drm_file *file)
+{
+	struct drm_etnaviv_gem_cpu_prep *args = data;
+	struct drm_gem_object *obj;
+	int ret;
+
+	if (args->op & ~(ETNA_PREP_READ | ETNA_PREP_WRITE | ETNA_PREP_NOSYNC))
+		return -EINVAL;
+
+	obj = drm_gem_object_lookup(dev, file, args->handle);
+	if (!obj)
+		return -ENOENT;
+
+	ret = etnaviv_gem_cpu_prep(obj, args->op, &TS(args->timeout));
+
+	drm_gem_object_unreference_unlocked(obj);
+
+	return ret;
+}
+
+static int etnaviv_ioctl_gem_cpu_fini(struct drm_device *dev, void *data,
+		struct drm_file *file)
+{
+	struct drm_etnaviv_gem_cpu_fini *args = data;
+	struct drm_gem_object *obj;
+	int ret;
+
+	if (args->flags)
+		return -EINVAL;
+
+	obj = drm_gem_object_lookup(dev, file, args->handle);
+	if (!obj)
+		return -ENOENT;
+
+	ret = etnaviv_gem_cpu_fini(obj);
+
+	drm_gem_object_unreference_unlocked(obj);
+
+	return ret;
+}
+
+static int etnaviv_ioctl_gem_info(struct drm_device *dev, void *data,
+		struct drm_file *file)
+{
+	struct drm_etnaviv_gem_info *args = data;
+	struct drm_gem_object *obj;
+	int ret;
+
+	if (args->pad)
+		return -EINVAL;
+
+	obj = drm_gem_object_lookup(dev, file, args->handle);
+	if (!obj)
+		return -ENOENT;
+
+	ret = etnaviv_gem_mmap_offset(obj, &args->offset);
+	drm_gem_object_unreference_unlocked(obj);
+
+	return ret;
+}
+
+static int etnaviv_ioctl_wait_fence(struct drm_device *dev, void *data,
+		struct drm_file *file)
+{
+	struct drm_etnaviv_wait_fence *args = data;
+	struct etnaviv_drm_private *priv = dev->dev_private;
+	struct timespec *timeout = &TS(args->timeout);
+	struct etnaviv_gpu *gpu;
+
+	if (args->flags & ~(ETNA_WAIT_NONBLOCK))
+		return -EINVAL;
+
+	if (args->pipe >= ETNA_MAX_PIPES)
+		return -EINVAL;
+
+	gpu = priv->gpu[args->pipe];
+	if (!gpu)
+		return -ENXIO;
+
+	if (args->flags & ETNA_WAIT_NONBLOCK)
+		timeout = NULL;
+
+	return etnaviv_gpu_wait_fence_interruptible(gpu, args->fence,
+						    timeout);
+}
+
+static int etnaviv_ioctl_gem_userptr(struct drm_device *dev, void *data,
+	struct drm_file *file)
+{
+	struct drm_etnaviv_gem_userptr *args = data;
+	int access;
+
+	if (args->flags & ~(ETNA_USERPTR_READ|ETNA_USERPTR_WRITE) ||
+	    args->flags == 0)
+		return -EINVAL;
+
+	if (offset_in_page(args->user_ptr | args->user_size) ||
+	    (uintptr_t)args->user_ptr != args->user_ptr ||
+	    (u32)args->user_size != args->user_size ||
+	    args->user_ptr & ~PAGE_MASK)
+		return -EINVAL;
+
+	if (args->flags & ETNA_USERPTR_WRITE)
+		access = VERIFY_WRITE;
+	else
+		access = VERIFY_READ;
+
+	if (!access_ok(access, (void __user *)(unsigned long)args->user_ptr,
+		       args->user_size))
+		return -EFAULT;
+
+	return etnaviv_gem_new_userptr(dev, file, args->user_ptr,
+				       args->user_size, args->flags,
+				       &args->handle);
+}
+
+static int etnaviv_ioctl_gem_wait(struct drm_device *dev, void *data,
+	struct drm_file *file)
+{
+	struct etnaviv_drm_private *priv = dev->dev_private;
+	struct drm_etnaviv_gem_wait *args = data;
+	struct timespec *timeout = &TS(args->timeout);
+	struct drm_gem_object *obj;
+	struct etnaviv_gpu *gpu;
+	int ret;
+
+	if (args->flags & ~(ETNA_WAIT_NONBLOCK))
+		return -EINVAL;
+
+	if (args->pipe >= ETNA_MAX_PIPES)
+		return -EINVAL;
+
+	gpu = priv->gpu[args->pipe];
+	if (!gpu)
+		return -ENXIO;
+
+	obj = drm_gem_object_lookup(dev, file, args->handle);
+	if (!obj)
+		return -ENOENT;
+
+	if (args->flags & ETNA_WAIT_NONBLOCK)
+		timeout = NULL;
+
+	ret = etnaviv_gem_wait_bo(gpu, obj, timeout);
+
+	drm_gem_object_unreference_unlocked(obj);
+
+	return ret;
+}
+
+static const struct drm_ioctl_desc etnaviv_ioctls[] = {
+#define ETNA_IOCTL(n, func, flags) \
+	DRM_IOCTL_DEF_DRV(ETNAVIV_##n, etnaviv_ioctl_##func, flags)
+	ETNA_IOCTL(GET_PARAM,    get_param,    DRM_AUTH|DRM_RENDER_ALLOW),
+	ETNA_IOCTL(GEM_NEW,      gem_new,      DRM_AUTH|DRM_RENDER_ALLOW),
+	ETNA_IOCTL(GEM_INFO,     gem_info,     DRM_AUTH|DRM_RENDER_ALLOW),
+	ETNA_IOCTL(GEM_CPU_PREP, gem_cpu_prep, DRM_AUTH|DRM_RENDER_ALLOW),
+	ETNA_IOCTL(GEM_CPU_FINI, gem_cpu_fini, DRM_AUTH|DRM_RENDER_ALLOW),
+	ETNA_IOCTL(GEM_SUBMIT,   gem_submit,   DRM_AUTH|DRM_RENDER_ALLOW),
+	ETNA_IOCTL(WAIT_FENCE,   wait_fence,   DRM_AUTH|DRM_RENDER_ALLOW),
+	ETNA_IOCTL(GEM_USERPTR,  gem_userptr,  DRM_AUTH|DRM_RENDER_ALLOW),
+	ETNA_IOCTL(GEM_WAIT,     gem_wait,     DRM_AUTH|DRM_RENDER_ALLOW),
+};
+
+static const struct vm_operations_struct vm_ops = {
+	.fault = etnaviv_gem_fault,
+	.open = drm_gem_vm_open,
+	.close = drm_gem_vm_close,
+};
+
+static const struct file_operations fops = {
+	.owner              = THIS_MODULE,
+	.open               = drm_open,
+	.release            = drm_release,
+	.unlocked_ioctl     = drm_ioctl,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl       = drm_compat_ioctl,
+#endif
+	.poll               = drm_poll,
+	.read               = drm_read,
+	.llseek             = no_llseek,
+	.mmap               = etnaviv_gem_mmap,
+};
+
+static struct drm_driver etnaviv_drm_driver = {
+	.driver_features    = DRIVER_HAVE_IRQ |
+				DRIVER_GEM |
+				DRIVER_PRIME |
+				DRIVER_RENDER,
+	.open               = etnaviv_open,
+	.preclose           = etnaviv_preclose,
+	.set_busid          = drm_platform_set_busid,
+	.gem_free_object    = etnaviv_gem_free_object,
+	.gem_vm_ops         = &vm_ops,
+	.prime_handle_to_fd = drm_gem_prime_handle_to_fd,
+	.prime_fd_to_handle = drm_gem_prime_fd_to_handle,
+	.gem_prime_export   = drm_gem_prime_export,
+	.gem_prime_import   = drm_gem_prime_import,
+	.gem_prime_pin      = etnaviv_gem_prime_pin,
+	.gem_prime_unpin    = etnaviv_gem_prime_unpin,
+	.gem_prime_get_sg_table = etnaviv_gem_prime_get_sg_table,
+	.gem_prime_import_sg_table = etnaviv_gem_prime_import_sg_table,
+	.gem_prime_vmap     = etnaviv_gem_prime_vmap,
+	.gem_prime_vunmap   = etnaviv_gem_prime_vunmap,
+#ifdef CONFIG_DEBUG_FS
+	.debugfs_init       = etnaviv_debugfs_init,
+	.debugfs_cleanup    = etnaviv_debugfs_cleanup,
+#endif
+	.ioctls             = etnaviv_ioctls,
+	.num_ioctls         = DRM_ETNAVIV_NUM_IOCTLS,
+	.fops               = &fops,
+	.name               = "etnaviv",
+	.desc               = "etnaviv DRM",
+	.date               = "20151214",
+	.major              = 1,
+	.minor              = 0,
+};
+
+/*
+ * Platform driver:
+ */
+static int etnaviv_bind(struct device *dev)
+{
+	struct etnaviv_drm_private *priv;
+	struct drm_device *drm;
+	int ret;
+
+	drm = drm_dev_alloc(&etnaviv_drm_driver, dev);
+	if (!drm)
+		return -ENOMEM;
+
+	drm->platformdev = to_platform_device(dev);
+
+	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+	if (!priv) {
+		dev_err(dev, "failed to allocate private data\n");
+		ret = -ENOMEM;
+		goto out_unref;
+	}
+	drm->dev_private = priv;
+
+	priv->wq = alloc_ordered_workqueue("etnaviv", 0);
+	if (!priv->wq) {
+		ret = -ENOMEM;
+		goto out_wq;
+	}
+
+	mutex_init(&priv->gem_lock);
+	INIT_LIST_HEAD(&priv->gem_list);
+	priv->num_gpus = 0;
+
+	dev_set_drvdata(dev, drm);
+
+	ret = component_bind_all(dev, drm);
+	if (ret < 0)
+		goto out_bind;
+
+	load_gpu(drm);
+
+	ret = drm_dev_register(drm, 0);
+	if (ret)
+		goto out_register;
+
+	return 0;
+
+out_register:
+	component_unbind_all(dev, drm);
+out_bind:
+	flush_workqueue(priv->wq);
+	destroy_workqueue(priv->wq);
+out_wq:
+	kfree(priv);
+out_unref:
+	drm_dev_unref(drm);
+
+	return ret;
+}
+
+static void etnaviv_unbind(struct device *dev)
+{
+	struct drm_device *drm = dev_get_drvdata(dev);
+	struct etnaviv_drm_private *priv = drm->dev_private;
+
+	drm_dev_unregister(drm);
+
+	flush_workqueue(priv->wq);
+	destroy_workqueue(priv->wq);
+
+	component_unbind_all(dev, drm);
+
+	drm->dev_private = NULL;
+	kfree(priv);
+
+	drm_put_dev(drm);
+}
+
+static const struct component_master_ops etnaviv_master_ops = {
+	.bind = etnaviv_bind,
+	.unbind = etnaviv_unbind,
+};
+
+static int compare_of(struct device *dev, void *data)
+{
+	struct device_node *np = data;
+
+	return dev->of_node == np;
+}
+
+static int compare_str(struct device *dev, void *data)
+{
+	return !strcmp(dev_name(dev), data);
+}
+
+static int etnaviv_pdev_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct device_node *node = dev->of_node;
+	struct component_match *match = NULL;
+
+	dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
+
+	if (node) {
+		struct device_node *core_node;
+		int i;
+
+		for (i = 0; ; i++) {
+			core_node = of_parse_phandle(node, "cores", i);
+			if (!core_node)
+				break;
+
+			component_match_add(&pdev->dev, &match, compare_of,
+					    core_node);
+			of_node_put(core_node);
+		}
+	} else if (dev->platform_data) {
+		char **names = dev->platform_data;
+		unsigned i;
+
+		for (i = 0; names[i]; i++)
+			component_match_add(dev, &match, compare_str, names[i]);
+	}
+
+	return component_master_add_with_match(dev, &etnaviv_master_ops, match);
+}
+
+static int etnaviv_pdev_remove(struct platform_device *pdev)
+{
+	component_master_del(&pdev->dev, &etnaviv_master_ops);
+
+	return 0;
+}
+
+static const struct of_device_id dt_match[] = {
+	{ .compatible = "fsl,imx-gpu-subsystem" },
+	{ .compatible = "marvell,dove-gpu-subsystem" },
+	{}
+};
+MODULE_DEVICE_TABLE(of, dt_match);
+
+static struct platform_driver etnaviv_platform_driver = {
+	.probe      = etnaviv_pdev_probe,
+	.remove     = etnaviv_pdev_remove,
+	.driver     = {
+		.owner  = THIS_MODULE,
+		.name   = "etnaviv",
+		.of_match_table = dt_match,
+	},
+};
+
+static int __init etnaviv_init(void)
+{
+	int ret;
+
+	etnaviv_validate_init();
+
+	ret = platform_driver_register(&etnaviv_gpu_driver);
+	if (ret != 0)
+		return ret;
+
+	ret = platform_driver_register(&etnaviv_platform_driver);
+	if (ret != 0)
+		platform_driver_unregister(&etnaviv_gpu_driver);
+
+	return ret;
+}
+module_init(etnaviv_init);
+
+static void __exit etnaviv_exit(void)
+{
+	platform_driver_unregister(&etnaviv_gpu_driver);
+	platform_driver_unregister(&etnaviv_platform_driver);
+}
+module_exit(etnaviv_exit);
+
+MODULE_AUTHOR("Christian Gmeiner <christian.gmeiner@gmail.com>");
+MODULE_AUTHOR("Russell King <rmk+kernel@arm.linux.org.uk>");
+MODULE_AUTHOR("Lucas Stach <l.stach@pengutronix.de>");
+MODULE_DESCRIPTION("etnaviv DRM Driver");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("platform:etnaviv");
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_drv.h b/drivers/gpu/drm/etnaviv/etnaviv_drv.h
new file mode 100644
index 000000000000..d6bd438bd5be
--- /dev/null
+++ b/drivers/gpu/drm/etnaviv/etnaviv_drv.h
@@ -0,0 +1,161 @@
+/*
+ * Copyright (C) 2015 Etnaviv Project
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ETNAVIV_DRV_H__
+#define __ETNAVIV_DRV_H__
+
+#include <linux/kernel.h>
+#include <linux/clk.h>
+#include <linux/cpufreq.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/pm.h>
+#include <linux/pm_runtime.h>
+#include <linux/slab.h>
+#include <linux/list.h>
+#include <linux/iommu.h>
+#include <linux/types.h>
+#include <linux/sizes.h>
+
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+#include <drm/drm_fb_helper.h>
+#include <drm/drm_gem.h>
+#include <drm/etnaviv_drm.h>
+
+struct etnaviv_cmdbuf;
+struct etnaviv_gpu;
+struct etnaviv_mmu;
+struct etnaviv_gem_object;
+struct etnaviv_gem_submit;
+
+struct etnaviv_file_private {
+	/* currently we don't do anything useful with this.. but when
+	 * per-context address spaces are supported we'd keep track of
+	 * the context's page-tables here.
+	 */
+	int dummy;
+};
+
+struct etnaviv_drm_private {
+	int num_gpus;
+	struct etnaviv_gpu *gpu[ETNA_MAX_PIPES];
+
+	/* list of GEM objects: */
+	struct mutex gem_lock;
+	struct list_head gem_list;
+
+	struct workqueue_struct *wq;
+};
+
+static inline void etnaviv_queue_work(struct drm_device *dev,
+	struct work_struct *w)
+{
+	struct etnaviv_drm_private *priv = dev->dev_private;
+
+	queue_work(priv->wq, w);
+}
+
+int etnaviv_ioctl_gem_submit(struct drm_device *dev, void *data,
+		struct drm_file *file);
+
+int etnaviv_gem_mmap(struct file *filp, struct vm_area_struct *vma);
+int etnaviv_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
+int etnaviv_gem_mmap_offset(struct drm_gem_object *obj, u64 *offset);
+int etnaviv_gem_get_iova(struct etnaviv_gpu *gpu,
+	struct drm_gem_object *obj, u32 *iova);
+void etnaviv_gem_put_iova(struct etnaviv_gpu *gpu, struct drm_gem_object *obj);
+struct sg_table *etnaviv_gem_prime_get_sg_table(struct drm_gem_object *obj);
+void *etnaviv_gem_prime_vmap(struct drm_gem_object *obj);
+void etnaviv_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr);
+struct drm_gem_object *etnaviv_gem_prime_import_sg_table(struct drm_device *dev,
+	struct dma_buf_attachment *attach, struct sg_table *sg);
+int etnaviv_gem_prime_pin(struct drm_gem_object *obj);
+void etnaviv_gem_prime_unpin(struct drm_gem_object *obj);
+void *etnaviv_gem_vaddr(struct drm_gem_object *obj);
+int etnaviv_gem_cpu_prep(struct drm_gem_object *obj, u32 op,
+		struct timespec *timeout);
+int etnaviv_gem_cpu_fini(struct drm_gem_object *obj);
+void etnaviv_gem_free_object(struct drm_gem_object *obj);
+int etnaviv_gem_new_handle(struct drm_device *dev, struct drm_file *file,
+		u32 size, u32 flags, u32 *handle);
+struct drm_gem_object *etnaviv_gem_new_locked(struct drm_device *dev,
+		u32 size, u32 flags);
+struct drm_gem_object *etnaviv_gem_new(struct drm_device *dev,
+		u32 size, u32 flags);
+int etnaviv_gem_new_userptr(struct drm_device *dev, struct drm_file *file,
+	uintptr_t ptr, u32 size, u32 flags, u32 *handle);
+u16 etnaviv_buffer_init(struct etnaviv_gpu *gpu);
+void etnaviv_buffer_end(struct etnaviv_gpu *gpu);
+void etnaviv_buffer_queue(struct etnaviv_gpu *gpu, unsigned int event,
+	struct etnaviv_cmdbuf *cmdbuf);
+void etnaviv_validate_init(void);
+bool etnaviv_cmd_validate_one(struct etnaviv_gpu *gpu,
+	u32 *stream, unsigned int size,
+	struct drm_etnaviv_gem_submit_reloc *relocs, unsigned int reloc_size);
+
+#ifdef CONFIG_DEBUG_FS
+void etnaviv_gem_describe_objects(struct etnaviv_drm_private *priv,
+	struct seq_file *m);
+#endif
+
+void __iomem *etnaviv_ioremap(struct platform_device *pdev, const char *name,
+		const char *dbgname);
+void etnaviv_writel(u32 data, void __iomem *addr);
+u32 etnaviv_readl(const void __iomem *addr);
+
+#define DBG(fmt, ...) DRM_DEBUG(fmt"\n", ##__VA_ARGS__)
+#define VERB(fmt, ...) if (0) DRM_DEBUG(fmt"\n", ##__VA_ARGS__)
+
+/*
+ * Return the storage size of a structure with a variable length array.
+ * The array is nelem elements of elem_size, where the base structure
+ * is defined by base.  If the size overflows size_t, return zero.
+ */
+static inline size_t size_vstruct(size_t nelem, size_t elem_size, size_t base)
+{
+	if (elem_size && nelem > (SIZE_MAX - base) / elem_size)
+		return 0;
+	return base + nelem * elem_size;
+}
+
+/* returns true if fence a comes after fence b */
+static inline bool fence_after(u32 a, u32 b)
+{
+	return (s32)(a - b) > 0;
+}
+
+static inline bool fence_after_eq(u32 a, u32 b)
+{
+	return (s32)(a - b) >= 0;
+}
+
+static inline unsigned long etnaviv_timeout_to_jiffies(
+	const struct timespec *timeout)
+{
+	unsigned long timeout_jiffies = timespec_to_jiffies(timeout);
+	unsigned long start_jiffies = jiffies;
+	unsigned long remaining_jiffies;
+
+	if (time_after(start_jiffies, timeout_jiffies))
+		remaining_jiffies = 0;
+	else
+		remaining_jiffies = timeout_jiffies - start_jiffies;
+
+	return remaining_jiffies;
+}
+
+#endif /* __ETNAVIV_DRV_H__ */
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_dump.c b/drivers/gpu/drm/etnaviv/etnaviv_dump.c
new file mode 100644
index 000000000000..bf8fa859e8be
--- /dev/null
+++ b/drivers/gpu/drm/etnaviv/etnaviv_dump.c
@@ -0,0 +1,227 @@
+/*
+ * Copyright (C) 2015 Etnaviv Project
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/devcoredump.h>
+#include "etnaviv_dump.h"
+#include "etnaviv_gem.h"
+#include "etnaviv_gpu.h"
+#include "etnaviv_mmu.h"
+#include "state.xml.h"
+#include "state_hi.xml.h"
+
+struct core_dump_iterator {
+	void *start;
+	struct etnaviv_dump_object_header *hdr;
+	void *data;
+};
+
+static const unsigned short etnaviv_dump_registers[] = {
+	VIVS_HI_AXI_STATUS,
+	VIVS_HI_CLOCK_CONTROL,
+	VIVS_HI_IDLE_STATE,
+	VIVS_HI_AXI_CONFIG,
+	VIVS_HI_INTR_ENBL,
+	VIVS_HI_CHIP_IDENTITY,
+	VIVS_HI_CHIP_FEATURE,
+	VIVS_HI_CHIP_MODEL,
+	VIVS_HI_CHIP_REV,
+	VIVS_HI_CHIP_DATE,
+	VIVS_HI_CHIP_TIME,
+	VIVS_HI_CHIP_MINOR_FEATURE_0,
+	VIVS_HI_CACHE_CONTROL,
+	VIVS_HI_AXI_CONTROL,
+	VIVS_PM_POWER_CONTROLS,
+	VIVS_PM_MODULE_CONTROLS,
+	VIVS_PM_MODULE_STATUS,
+	VIVS_PM_PULSE_EATER,
+	VIVS_MC_MMU_FE_PAGE_TABLE,
+	VIVS_MC_MMU_TX_PAGE_TABLE,
+	VIVS_MC_MMU_PE_PAGE_TABLE,
+	VIVS_MC_MMU_PEZ_PAGE_TABLE,
+	VIVS_MC_MMU_RA_PAGE_TABLE,
+	VIVS_MC_DEBUG_MEMORY,
+	VIVS_MC_MEMORY_BASE_ADDR_RA,
+	VIVS_MC_MEMORY_BASE_ADDR_FE,
+	VIVS_MC_MEMORY_BASE_ADDR_TX,
+	VIVS_MC_MEMORY_BASE_ADDR_PEZ,
+	VIVS_MC_MEMORY_BASE_ADDR_PE,
+	VIVS_MC_MEMORY_TIMING_CONTROL,
+	VIVS_MC_BUS_CONFIG,
+	VIVS_FE_DMA_STATUS,
+	VIVS_FE_DMA_DEBUG_STATE,
+	VIVS_FE_DMA_ADDRESS,
+	VIVS_FE_DMA_LOW,
+	VIVS_FE_DMA_HIGH,
+	VIVS_FE_AUTO_FLUSH,
+};
+
+static void etnaviv_core_dump_header(struct core_dump_iterator *iter,
+	u32 type, void *data_end)
+{
+	struct etnaviv_dump_object_header *hdr = iter->hdr;
+
+	hdr->magic = cpu_to_le32(ETDUMP_MAGIC);
+	hdr->type = cpu_to_le32(type);
+	hdr->file_offset = cpu_to_le32(iter->data - iter->start);
+	hdr->file_size = cpu_to_le32(data_end - iter->data);
+
+	iter->hdr++;
+	iter->data += hdr->file_size;
+}
+
+static void etnaviv_core_dump_registers(struct core_dump_iterator *iter,
+	struct etnaviv_gpu *gpu)
+{
+	struct etnaviv_dump_registers *reg = iter->data;
+	unsigned int i;
+
+	for (i = 0; i < ARRAY_SIZE(etnaviv_dump_registers); i++, reg++) {
+		reg->reg = etnaviv_dump_registers[i];
+		reg->value = gpu_read(gpu, etnaviv_dump_registers[i]);
+	}
+
+	etnaviv_core_dump_header(iter, ETDUMP_BUF_REG, reg);
+}
+
+static void etnaviv_core_dump_mmu(struct core_dump_iterator *iter,
+	struct etnaviv_gpu *gpu, size_t mmu_size)
+{
+	etnaviv_iommu_dump(gpu->mmu, iter->data);
+
+	etnaviv_core_dump_header(iter, ETDUMP_BUF_MMU, iter->data + mmu_size);
+}
+
+static void etnaviv_core_dump_mem(struct core_dump_iterator *iter, u32 type,
+	void *ptr, size_t size, u64 iova)
+{
+	memcpy(iter->data, ptr, size);
+
+	iter->hdr->iova = cpu_to_le64(iova);
+
+	etnaviv_core_dump_header(iter, type, iter->data + size);
+}
+
+void etnaviv_core_dump(struct etnaviv_gpu *gpu)
+{
+	struct core_dump_iterator iter;
+	struct etnaviv_vram_mapping *vram;
+	struct etnaviv_gem_object *obj;
+	struct etnaviv_cmdbuf *cmd;
+	unsigned int n_obj, n_bomap_pages;
+	size_t file_size, mmu_size;
+	__le64 *bomap, *bomap_start;
+
+	mmu_size = etnaviv_iommu_dump_size(gpu->mmu);
+
+	/* We always dump registers, mmu, ring and end marker */
+	n_obj = 4;
+	n_bomap_pages = 0;
+	file_size = ARRAY_SIZE(etnaviv_dump_registers) *
+			sizeof(struct etnaviv_dump_registers) +
+		    mmu_size + gpu->buffer->size;
+
+	/* Add in the active command buffers */
+	list_for_each_entry(cmd, &gpu->active_cmd_list, node) {
+		file_size += cmd->size;
+		n_obj++;
+	}
+
+	/* Add in the active buffer objects */
+	list_for_each_entry(vram, &gpu->mmu->mappings, mmu_node) {
+		if (!vram->use)
+			continue;
+
+		obj = vram->object;
+		file_size += obj->base.size;
+		n_bomap_pages += obj->base.size >> PAGE_SHIFT;
+		n_obj++;
+	}
+
+	/* If we have any buffer objects, add a bomap object */
+	if (n_bomap_pages) {
+		file_size += n_bomap_pages * sizeof(__le64);
+		n_obj++;
+	}
+
+	/* Add the size of the headers */
+	file_size += sizeof(*iter.hdr) * n_obj;
+
+	/* Allocate the file in vmalloc memory, it's likely to be big */
+	iter.start = vmalloc(file_size);
+	if (!iter.start) {
+		dev_warn(gpu->dev, "failed to allocate devcoredump file\n");
+		return;
+	}
+
+	/* Point the data member after the headers */
+	iter.hdr = iter.start;
+	iter.data = &iter.hdr[n_obj];
+
+	memset(iter.hdr, 0, iter.data - iter.start);
+
+	etnaviv_core_dump_registers(&iter, gpu);
+	etnaviv_core_dump_mmu(&iter, gpu, mmu_size);
+	etnaviv_core_dump_mem(&iter, ETDUMP_BUF_RING, gpu->buffer->vaddr,
+			      gpu->buffer->size, gpu->buffer->paddr);
+
+	list_for_each_entry(cmd, &gpu->active_cmd_list, node)
+		etnaviv_core_dump_mem(&iter, ETDUMP_BUF_CMD, cmd->vaddr,
+				      cmd->size, cmd->paddr);
+
+	/* Reserve space for the bomap */
+	if (n_bomap_pages) {
+		bomap_start = bomap = iter.data;
+		memset(bomap, 0, sizeof(*bomap) * n_bomap_pages);
+		etnaviv_core_dump_header(&iter, ETDUMP_BUF_BOMAP,
+					 bomap + n_bomap_pages);
+	} else {
+		/* Silence warning */
+		bomap_start = bomap = NULL;
+	}
+
+	list_for_each_entry(vram, &gpu->mmu->mappings, mmu_node) {
+		struct page **pages;
+		void *vaddr;
+
+		if (vram->use == 0)
+			continue;
+
+		obj = vram->object;
+
+		pages = etnaviv_gem_get_pages(obj);
+		if (pages) {
+			int j;
+
+			iter.hdr->data[0] = bomap - bomap_start;
+
+			for (j = 0; j < obj->base.size >> PAGE_SHIFT; j++)
+				*bomap++ = cpu_to_le64(page_to_phys(*pages++));
+		}
+
+		iter.hdr->iova = cpu_to_le64(vram->iova);
+
+		vaddr = etnaviv_gem_vaddr(&obj->base);
+		if (vaddr && !IS_ERR(vaddr))
+			memcpy(iter.data, vaddr, obj->base.size);
+
+		etnaviv_core_dump_header(&iter, ETDUMP_BUF_BO, iter.data +
+					 obj->base.size);
+	}
+
+	etnaviv_core_dump_header(&iter, ETDUMP_BUF_END, iter.data);
+
+	dev_coredumpv(gpu->dev, iter.start, iter.data - iter.start, GFP_KERNEL);
+}
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_dump.h b/drivers/gpu/drm/etnaviv/etnaviv_dump.h
new file mode 100644
index 000000000000..97f2f8db9133
--- /dev/null
+++ b/drivers/gpu/drm/etnaviv/etnaviv_dump.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (C) 2015 Etnaviv Project
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Etnaviv devcoredump file definitions
+ */
+#ifndef ETNAVIV_DUMP_H
+#define ETNAVIV_DUMP_H
+
+#include <linux/types.h>
+
+enum {
+	ETDUMP_MAGIC = 0x414e5445,
+	ETDUMP_BUF_REG = 0,
+	ETDUMP_BUF_MMU,
+	ETDUMP_BUF_RING,
+	ETDUMP_BUF_CMD,
+	ETDUMP_BUF_BOMAP,
+	ETDUMP_BUF_BO,
+	ETDUMP_BUF_END,
+};
+
+struct etnaviv_dump_object_header {
+	__le32 magic;
+	__le32 type;
+	__le32 file_offset;
+	__le32 file_size;
+	__le64 iova;
+	__le32 data[2];
+};
+
+/* Registers object, an array of these */
+struct etnaviv_dump_registers {
+	__le32 reg;
+	__le32 value;
+};
+
+#ifdef __KERNEL__
+struct etnaviv_gpu;
+void etnaviv_core_dump(struct etnaviv_gpu *gpu);
+#endif
+
+#endif
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem.c b/drivers/gpu/drm/etnaviv/etnaviv_gem.c
new file mode 100644
index 000000000000..8d6f859f8200
--- /dev/null
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gem.c
@@ -0,0 +1,897 @@
+/*
+ * Copyright (C) 2015 Etnaviv Project
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/spinlock.h>
+#include <linux/shmem_fs.h>
+
+#include "etnaviv_drv.h"
+#include "etnaviv_gem.h"
+#include "etnaviv_gpu.h"
+#include "etnaviv_mmu.h"
+
+static void etnaviv_gem_scatter_map(struct etnaviv_gem_object *etnaviv_obj)
+{
+	struct drm_device *dev = etnaviv_obj->base.dev;
+	struct sg_table *sgt = etnaviv_obj->sgt;
+
+	/*
+	 * For non-cached buffers, ensure the new pages are clean
+	 * because display controller, GPU, etc. are not coherent.
+	 */
+	if (etnaviv_obj->flags & ETNA_BO_CACHE_MASK)
+		dma_map_sg(dev->dev, sgt->sgl, sgt->nents, DMA_BIDIRECTIONAL);
+}
+
+static void etnaviv_gem_scatterlist_unmap(struct etnaviv_gem_object *etnaviv_obj)
+{
+	struct drm_device *dev = etnaviv_obj->base.dev;
+	struct sg_table *sgt = etnaviv_obj->sgt;
+
+	/*
+	 * For non-cached buffers, ensure the new pages are clean
+	 * because display controller, GPU, etc. are not coherent:
+	 *
+	 * WARNING: The DMA API does not support concurrent CPU
+	 * and device access to the memory area.  With BIDIRECTIONAL,
+	 * we will clean the cache lines which overlap the region,
+	 * and invalidate all cache lines (partially) contained in
+	 * the region.
+	 *
+	 * If you have dirty data in the overlapping cache lines,
+	 * that will corrupt the GPU-written data.  If you have
+	 * written into the remainder of the region, this can
+	 * discard those writes.
+	 */
+	if (etnaviv_obj->flags & ETNA_BO_CACHE_MASK)
+		dma_unmap_sg(dev->dev, sgt->sgl, sgt->nents, DMA_BIDIRECTIONAL);
+}
+
+/* called with etnaviv_obj->lock held */
+static int etnaviv_gem_shmem_get_pages(struct etnaviv_gem_object *etnaviv_obj)
+{
+	struct drm_device *dev = etnaviv_obj->base.dev;
+	struct page **p = drm_gem_get_pages(&etnaviv_obj->base);
+
+	if (IS_ERR(p)) {
+		dev_err(dev->dev, "could not get pages: %ld\n", PTR_ERR(p));
+		return PTR_ERR(p);
+	}
+
+	etnaviv_obj->pages = p;
+
+	return 0;
+}
+
+static void put_pages(struct etnaviv_gem_object *etnaviv_obj)
+{
+	if (etnaviv_obj->sgt) {
+		etnaviv_gem_scatterlist_unmap(etnaviv_obj);
+		sg_free_table(etnaviv_obj->sgt);
+		kfree(etnaviv_obj->sgt);
+		etnaviv_obj->sgt = NULL;
+	}
+	if (etnaviv_obj->pages) {
+		drm_gem_put_pages(&etnaviv_obj->base, etnaviv_obj->pages,
+				  true, false);
+
+		etnaviv_obj->pages = NULL;
+	}
+}
+
+struct page **etnaviv_gem_get_pages(struct etnaviv_gem_object *etnaviv_obj)
+{
+	int ret;
+
+	lockdep_assert_held(&etnaviv_obj->lock);
+
+	if (!etnaviv_obj->pages) {
+		ret = etnaviv_obj->ops->get_pages(etnaviv_obj);
+		if (ret < 0)
+			return ERR_PTR(ret);
+	}
+
+	if (!etnaviv_obj->sgt) {
+		struct drm_device *dev = etnaviv_obj->base.dev;
+		int npages = etnaviv_obj->base.size >> PAGE_SHIFT;
+		struct sg_table *sgt;
+
+		sgt = drm_prime_pages_to_sg(etnaviv_obj->pages, npages);
+		if (IS_ERR(sgt)) {
+			dev_err(dev->dev, "failed to allocate sgt: %ld\n",
+				PTR_ERR(sgt));
+			return ERR_CAST(sgt);
+		}
+
+		etnaviv_obj->sgt = sgt;
+
+		etnaviv_gem_scatter_map(etnaviv_obj);
+	}
+
+	return etnaviv_obj->pages;
+}
+
+void etnaviv_gem_put_pages(struct etnaviv_gem_object *etnaviv_obj)
+{
+	lockdep_assert_held(&etnaviv_obj->lock);
+	/* when we start tracking the pin count, then do something here */
+}
+
+static int etnaviv_gem_mmap_obj(struct drm_gem_object *obj,
+		struct vm_area_struct *vma)
+{
+	struct etnaviv_gem_object *etnaviv_obj = to_etnaviv_bo(obj);
+	pgprot_t vm_page_prot;
+
+	vma->vm_flags &= ~VM_PFNMAP;
+	vma->vm_flags |= VM_MIXEDMAP;
+
+	vm_page_prot = vm_get_page_prot(vma->vm_flags);
+
+	if (etnaviv_obj->flags & ETNA_BO_WC) {
+		vma->vm_page_prot = pgprot_writecombine(vm_page_prot);
+	} else if (etnaviv_obj->flags & ETNA_BO_UNCACHED) {
+		vma->vm_page_prot = pgprot_noncached(vm_page_prot);
+	} else {
+		/*
+		 * Shunt off cached objs to shmem file so they have their own
+		 * address_space (so unmap_mapping_range does what we want,
+		 * in particular in the case of mmap'd dmabufs)
+		 */
+		fput(vma->vm_file);
+		get_file(obj->filp);
+		vma->vm_pgoff = 0;
+		vma->vm_file  = obj->filp;
+
+		vma->vm_page_prot = vm_page_prot;
+	}
+
+	return 0;
+}
+
+int etnaviv_gem_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+	struct etnaviv_gem_object *obj;
+	int ret;
+
+	ret = drm_gem_mmap(filp, vma);
+	if (ret) {
+		DBG("mmap failed: %d", ret);
+		return ret;
+	}
+
+	obj = to_etnaviv_bo(vma->vm_private_data);
+	return etnaviv_gem_mmap_obj(vma->vm_private_data, vma);
+}
+
+int etnaviv_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+	struct drm_gem_object *obj = vma->vm_private_data;
+	struct etnaviv_gem_object *etnaviv_obj = to_etnaviv_bo(obj);
+	struct page **pages, *page;
+	pgoff_t pgoff;
+	int ret;
+
+	/*
+	 * Make sure we don't parallel update on a fault, nor move or remove
+	 * something from beneath our feet.  Note that vm_insert_page() is
+	 * specifically coded to take care of this, so we don't have to.
+	 */
+	ret = mutex_lock_interruptible(&etnaviv_obj->lock);
+	if (ret)
+		goto out;
+
+	/* make sure we have pages attached now */
+	pages = etnaviv_gem_get_pages(etnaviv_obj);
+	mutex_unlock(&etnaviv_obj->lock);
+
+	if (IS_ERR(pages)) {
+		ret = PTR_ERR(pages);
+		goto out;
+	}
+
+	/* We don't use vmf->pgoff since that has the fake offset: */
+	pgoff = ((unsigned long)vmf->virtual_address -
+			vma->vm_start) >> PAGE_SHIFT;
+
+	page = pages[pgoff];
+
+	VERB("Inserting %p pfn %lx, pa %lx", vmf->virtual_address,
+	     page_to_pfn(page), page_to_pfn(page) << PAGE_SHIFT);
+
+	ret = vm_insert_page(vma, (unsigned long)vmf->virtual_address, page);
+
+out:
+	switch (ret) {
+	case -EAGAIN:
+	case 0:
+	case -ERESTARTSYS:
+	case -EINTR:
+	case -EBUSY:
+		/*
+		 * EBUSY is ok: this just means that another thread
+		 * already did the job.
+		 */
+		return VM_FAULT_NOPAGE;
+	case -ENOMEM:
+		return VM_FAULT_OOM;
+	default:
+		return VM_FAULT_SIGBUS;
+	}
+}
+
+int etnaviv_gem_mmap_offset(struct drm_gem_object *obj, u64 *offset)
+{
+	int ret;
+
+	/* Make it mmapable */
+	ret = drm_gem_create_mmap_offset(obj);
+	if (ret)
+		dev_err(obj->dev->dev, "could not allocate mmap offset\n");
+	else
+		*offset = drm_vma_node_offset_addr(&obj->vma_node);
+
+	return ret;
+}
+
+static struct etnaviv_vram_mapping *
+etnaviv_gem_get_vram_mapping(struct etnaviv_gem_object *obj,
+			     struct etnaviv_iommu *mmu)
+{
+	struct etnaviv_vram_mapping *mapping;
+
+	list_for_each_entry(mapping, &obj->vram_list, obj_node) {
+		if (mapping->mmu == mmu)
+			return mapping;
+	}
+
+	return NULL;
+}
+
+int etnaviv_gem_get_iova(struct etnaviv_gpu *gpu,
+	struct drm_gem_object *obj, u32 *iova)
+{
+	struct etnaviv_gem_object *etnaviv_obj = to_etnaviv_bo(obj);
+	struct etnaviv_vram_mapping *mapping;
+	struct page **pages;
+	int ret = 0;
+
+	mutex_lock(&etnaviv_obj->lock);
+	mapping = etnaviv_gem_get_vram_mapping(etnaviv_obj, gpu->mmu);
+	if (mapping) {
+		/*
+		 * Holding the object lock prevents the use count changing
+		 * beneath us.  If the use count is zero, the MMU might be
+		 * reaping this object, so take the lock and re-check that
+		 * the MMU owns this mapping to close this race.
+		 */
+		if (mapping->use == 0) {
+			mutex_lock(&gpu->mmu->lock);
+			if (mapping->mmu == gpu->mmu)
+				mapping->use += 1;
+			else
+				mapping = NULL;
+			mutex_unlock(&gpu->mmu->lock);
+			if (mapping)
+				goto out;
+		} else {
+			mapping->use += 1;
+			goto out;
+		}
+	}
+
+	pages = etnaviv_gem_get_pages(etnaviv_obj);
+	if (IS_ERR(pages)) {
+		ret = PTR_ERR(pages);
+		goto out;
+	}
+
+	/*
+	 * See if we have a reaped vram mapping we can re-use before
+	 * allocating a fresh mapping.
+	 */
+	mapping = etnaviv_gem_get_vram_mapping(etnaviv_obj, NULL);
+	if (!mapping) {
+		mapping = kzalloc(sizeof(*mapping), GFP_KERNEL);
+		if (!mapping)
+			return -ENOMEM;
+
+		INIT_LIST_HEAD(&mapping->scan_node);
+		mapping->object = etnaviv_obj;
+	} else {
+		list_del(&mapping->obj_node);
+	}
+
+	mapping->mmu = gpu->mmu;
+	mapping->use = 1;
+
+	ret = etnaviv_iommu_map_gem(gpu->mmu, etnaviv_obj, gpu->memory_base,
+				    mapping);
+	if (ret < 0)
+		kfree(mapping);
+	else
+		list_add_tail(&mapping->obj_node, &etnaviv_obj->vram_list);
+
+out:
+	mutex_unlock(&etnaviv_obj->lock);
+
+	if (!ret) {
+		/* Take a reference on the object */
+		drm_gem_object_reference(obj);
+		*iova = mapping->iova;
+	}
+
+	return ret;
+}
+
+void etnaviv_gem_put_iova(struct etnaviv_gpu *gpu, struct drm_gem_object *obj)
+{
+	struct etnaviv_gem_object *etnaviv_obj = to_etnaviv_bo(obj);
+	struct etnaviv_vram_mapping *mapping;
+
+	mutex_lock(&etnaviv_obj->lock);
+	mapping = etnaviv_gem_get_vram_mapping(etnaviv_obj, gpu->mmu);
+
+	WARN_ON(mapping->use == 0);
+	mapping->use -= 1;
+	mutex_unlock(&etnaviv_obj->lock);
+
+	drm_gem_object_unreference_unlocked(obj);
+}
+
+void *etnaviv_gem_vaddr(struct drm_gem_object *obj)
+{
+	struct etnaviv_gem_object *etnaviv_obj = to_etnaviv_bo(obj);
+
+	mutex_lock(&etnaviv_obj->lock);
+	if (!etnaviv_obj->vaddr) {
+		struct page **pages = etnaviv_gem_get_pages(etnaviv_obj);
+
+		if (IS_ERR(pages))
+			return ERR_CAST(pages);
+
+		etnaviv_obj->vaddr = vmap(pages, obj->size >> PAGE_SHIFT,
+				VM_MAP, pgprot_writecombine(PAGE_KERNEL));
+	}
+	mutex_unlock(&etnaviv_obj->lock);
+
+	return etnaviv_obj->vaddr;
+}
+
+static inline enum dma_data_direction etnaviv_op_to_dma_dir(u32 op)
+{
+	if (op & ETNA_PREP_READ)
+		return DMA_FROM_DEVICE;
+	else if (op & ETNA_PREP_WRITE)
+		return DMA_TO_DEVICE;
+	else
+		return DMA_BIDIRECTIONAL;
+}
+
+int etnaviv_gem_cpu_prep(struct drm_gem_object *obj, u32 op,
+		struct timespec *timeout)
+{
+	struct etnaviv_gem_object *etnaviv_obj = to_etnaviv_bo(obj);
+	struct drm_device *dev = obj->dev;
+	bool write = !!(op & ETNA_PREP_WRITE);
+	int ret;
+
+	if (op & ETNA_PREP_NOSYNC) {
+		if (!reservation_object_test_signaled_rcu(etnaviv_obj->resv,
+							  write))
+			return -EBUSY;
+	} else {
+		unsigned long remain = etnaviv_timeout_to_jiffies(timeout);
+
+		ret = reservation_object_wait_timeout_rcu(etnaviv_obj->resv,
+							  write, true, remain);
+		if (ret <= 0)
+			return ret == 0 ? -ETIMEDOUT : ret;
+	}
+
+	if (etnaviv_obj->flags & ETNA_BO_CACHED) {
+		if (!etnaviv_obj->sgt) {
+			void *ret;
+
+			mutex_lock(&etnaviv_obj->lock);
+			ret = etnaviv_gem_get_pages(etnaviv_obj);
+			mutex_unlock(&etnaviv_obj->lock);
+			if (IS_ERR(ret))
+				return PTR_ERR(ret);
+		}
+
+		dma_sync_sg_for_cpu(dev->dev, etnaviv_obj->sgt->sgl,
+				    etnaviv_obj->sgt->nents,
+				    etnaviv_op_to_dma_dir(op));
+		etnaviv_obj->last_cpu_prep_op = op;
+	}
+
+	return 0;
+}
+
+int etnaviv_gem_cpu_fini(struct drm_gem_object *obj)
+{
+	struct drm_device *dev = obj->dev;
+	struct etnaviv_gem_object *etnaviv_obj = to_etnaviv_bo(obj);
+
+	if (etnaviv_obj->flags & ETNA_BO_CACHED) {
+		/* fini without a prep is almost certainly a userspace error */
+		WARN_ON(etnaviv_obj->last_cpu_prep_op == 0);
+		dma_sync_sg_for_device(dev->dev, etnaviv_obj->sgt->sgl,
+			etnaviv_obj->sgt->nents,
+			etnaviv_op_to_dma_dir(etnaviv_obj->last_cpu_prep_op));
+		etnaviv_obj->last_cpu_prep_op = 0;
+	}
+
+	return 0;
+}
+
+int etnaviv_gem_wait_bo(struct etnaviv_gpu *gpu, struct drm_gem_object *obj,
+	struct timespec *timeout)
+{
+	struct etnaviv_gem_object *etnaviv_obj = to_etnaviv_bo(obj);
+
+	return etnaviv_gpu_wait_obj_inactive(gpu, etnaviv_obj, timeout);
+}
+
+#ifdef CONFIG_DEBUG_FS
+static void etnaviv_gem_describe_fence(struct fence *fence,
+	const char *type, struct seq_file *m)
+{
+	if (!test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->flags))
+		seq_printf(m, "\t%9s: %s %s seq %u\n",
+			   type,
+			   fence->ops->get_driver_name(fence),
+			   fence->ops->get_timeline_name(fence),
+			   fence->seqno);
+}
+
+static void etnaviv_gem_describe(struct drm_gem_object *obj, struct seq_file *m)
+{
+	struct etnaviv_gem_object *etnaviv_obj = to_etnaviv_bo(obj);
+	struct reservation_object *robj = etnaviv_obj->resv;
+	struct reservation_object_list *fobj;
+	struct fence *fence;
+	unsigned long off = drm_vma_node_start(&obj->vma_node);
+
+	seq_printf(m, "%08x: %c %2d (%2d) %08lx %p %zd\n",
+			etnaviv_obj->flags, is_active(etnaviv_obj) ? 'A' : 'I',
+			obj->name, obj->refcount.refcount.counter,
+			off, etnaviv_obj->vaddr, obj->size);
+
+	rcu_read_lock();
+	fobj = rcu_dereference(robj->fence);
+	if (fobj) {
+		unsigned int i, shared_count = fobj->shared_count;
+
+		for (i = 0; i < shared_count; i++) {
+			fence = rcu_dereference(fobj->shared[i]);
+			etnaviv_gem_describe_fence(fence, "Shared", m);
+		}
+	}
+
+	fence = rcu_dereference(robj->fence_excl);
+	if (fence)
+		etnaviv_gem_describe_fence(fence, "Exclusive", m);
+	rcu_read_unlock();
+}
+
+void etnaviv_gem_describe_objects(struct etnaviv_drm_private *priv,
+	struct seq_file *m)
+{
+	struct etnaviv_gem_object *etnaviv_obj;
+	int count = 0;
+	size_t size = 0;
+
+	mutex_lock(&priv->gem_lock);
+	list_for_each_entry(etnaviv_obj, &priv->gem_list, gem_node) {
+		struct drm_gem_object *obj = &etnaviv_obj->base;
+
+		seq_puts(m, "   ");
+		etnaviv_gem_describe(obj, m);
+		count++;
+		size += obj->size;
+	}
+	mutex_unlock(&priv->gem_lock);
+
+	seq_printf(m, "Total %d objects, %zu bytes\n", count, size);
+}
+#endif
+
+static void etnaviv_gem_shmem_release(struct etnaviv_gem_object *etnaviv_obj)
+{
+	if (etnaviv_obj->vaddr)
+		vunmap(etnaviv_obj->vaddr);
+	put_pages(etnaviv_obj);
+}
+
+static const struct etnaviv_gem_ops etnaviv_gem_shmem_ops = {
+	.get_pages = etnaviv_gem_shmem_get_pages,
+	.release = etnaviv_gem_shmem_release,
+};
+
+void etnaviv_gem_free_object(struct drm_gem_object *obj)
+{
+	struct etnaviv_gem_object *etnaviv_obj = to_etnaviv_bo(obj);
+	struct etnaviv_vram_mapping *mapping, *tmp;
+
+	/* object should not be active */
+	WARN_ON(is_active(etnaviv_obj));
+
+	list_del(&etnaviv_obj->gem_node);
+
+	list_for_each_entry_safe(mapping, tmp, &etnaviv_obj->vram_list,
+				 obj_node) {
+		struct etnaviv_iommu *mmu = mapping->mmu;
+
+		WARN_ON(mapping->use);
+
+		if (mmu)
+			etnaviv_iommu_unmap_gem(mmu, mapping);
+
+		list_del(&mapping->obj_node);
+		kfree(mapping);
+	}
+
+	drm_gem_free_mmap_offset(obj);
+	etnaviv_obj->ops->release(etnaviv_obj);
+	if (etnaviv_obj->resv == &etnaviv_obj->_resv)
+		reservation_object_fini(&etnaviv_obj->_resv);
+	drm_gem_object_release(obj);
+
+	kfree(etnaviv_obj);
+}
+
+int etnaviv_gem_obj_add(struct drm_device *dev, struct drm_gem_object *obj)
+{
+	struct etnaviv_drm_private *priv = dev->dev_private;
+	struct etnaviv_gem_object *etnaviv_obj = to_etnaviv_bo(obj);
+
+	mutex_lock(&priv->gem_lock);
+	list_add_tail(&etnaviv_obj->gem_node, &priv->gem_list);
+	mutex_unlock(&priv->gem_lock);
+
+	return 0;
+}
+
+static int etnaviv_gem_new_impl(struct drm_device *dev, u32 size, u32 flags,
+	struct reservation_object *robj, const struct etnaviv_gem_ops *ops,
+	struct drm_gem_object **obj)
+{
+	struct etnaviv_gem_object *etnaviv_obj;
+	unsigned sz = sizeof(*etnaviv_obj);
+	bool valid = true;
+
+	/* validate flags */
+	switch (flags & ETNA_BO_CACHE_MASK) {
+	case ETNA_BO_UNCACHED:
+	case ETNA_BO_CACHED:
+	case ETNA_BO_WC:
+		break;
+	default:
+		valid = false;
+	}
+
+	if (!valid) {
+		dev_err(dev->dev, "invalid cache flag: %x\n",
+			(flags & ETNA_BO_CACHE_MASK));
+		return -EINVAL;
+	}
+
+	etnaviv_obj = kzalloc(sz, GFP_KERNEL);
+	if (!etnaviv_obj)
+		return -ENOMEM;
+
+	etnaviv_obj->flags = flags;
+	etnaviv_obj->ops = ops;
+	if (robj) {
+		etnaviv_obj->resv = robj;
+	} else {
+		etnaviv_obj->resv = &etnaviv_obj->_resv;
+		reservation_object_init(&etnaviv_obj->_resv);
+	}
+
+	mutex_init(&etnaviv_obj->lock);
+	INIT_LIST_HEAD(&etnaviv_obj->vram_list);
+
+	*obj = &etnaviv_obj->base;
+
+	return 0;
+}
+
+static struct drm_gem_object *__etnaviv_gem_new(struct drm_device *dev,
+		u32 size, u32 flags)
+{
+	struct drm_gem_object *obj = NULL;
+	int ret;
+
+	size = PAGE_ALIGN(size);
+
+	ret = etnaviv_gem_new_impl(dev, size, flags, NULL,
+				   &etnaviv_gem_shmem_ops, &obj);
+	if (ret)
+		goto fail;
+
+	ret = drm_gem_object_init(dev, obj, size);
+	if (ret == 0) {
+		struct address_space *mapping;
+
+		/*
+		 * Our buffers are kept pinned, so allocating them
+		 * from the MOVABLE zone is a really bad idea, and
+		 * conflicts with CMA.  See coments above new_inode()
+		 * why this is required _and_ expected if you're
+		 * going to pin these pages.
+		 */
+		mapping = file_inode(obj->filp)->i_mapping;
+		mapping_set_gfp_mask(mapping, GFP_HIGHUSER);
+	}
+
+	if (ret)
+		goto fail;
+
+	return obj;
+
+fail:
+	if (obj)
+		drm_gem_object_unreference_unlocked(obj);
+
+	return ERR_PTR(ret);
+}
+
+/* convenience method to construct a GEM buffer object, and userspace handle */
+int etnaviv_gem_new_handle(struct drm_device *dev, struct drm_file *file,
+		u32 size, u32 flags, u32 *handle)
+{
+	struct drm_gem_object *obj;
+	int ret;
+
+	obj = __etnaviv_gem_new(dev, size, flags);
+	if (IS_ERR(obj))
+		return PTR_ERR(obj);
+
+	ret = etnaviv_gem_obj_add(dev, obj);
+	if (ret < 0) {
+		drm_gem_object_unreference_unlocked(obj);
+		return ret;
+	}
+
+	ret = drm_gem_handle_create(file, obj, handle);
+
+	/* drop reference from allocate - handle holds it now */
+	drm_gem_object_unreference_unlocked(obj);
+
+	return ret;
+}
+
+struct drm_gem_object *etnaviv_gem_new(struct drm_device *dev,
+		u32 size, u32 flags)
+{
+	struct drm_gem_object *obj;
+	int ret;
+
+	obj = __etnaviv_gem_new(dev, size, flags);
+	if (IS_ERR(obj))
+		return obj;
+
+	ret = etnaviv_gem_obj_add(dev, obj);
+	if (ret < 0) {
+		drm_gem_object_unreference_unlocked(obj);
+		return ERR_PTR(ret);
+	}
+
+	return obj;
+}
+
+int etnaviv_gem_new_private(struct drm_device *dev, size_t size, u32 flags,
+	struct reservation_object *robj, const struct etnaviv_gem_ops *ops,
+	struct etnaviv_gem_object **res)
+{
+	struct drm_gem_object *obj;
+	int ret;
+
+	ret = etnaviv_gem_new_impl(dev, size, flags, robj, ops, &obj);
+	if (ret)
+		return ret;
+
+	drm_gem_private_object_init(dev, obj, size);
+
+	*res = to_etnaviv_bo(obj);
+
+	return 0;
+}
+
+struct get_pages_work {
+	struct work_struct work;
+	struct mm_struct *mm;
+	struct task_struct *task;
+	struct etnaviv_gem_object *etnaviv_obj;
+};
+
+static struct page **etnaviv_gem_userptr_do_get_pages(
+	struct etnaviv_gem_object *etnaviv_obj, struct mm_struct *mm, struct task_struct *task)
+{
+	int ret = 0, pinned, npages = etnaviv_obj->base.size >> PAGE_SHIFT;
+	struct page **pvec;
+	uintptr_t ptr;
+
+	pvec = drm_malloc_ab(npages, sizeof(struct page *));
+	if (!pvec)
+		return ERR_PTR(-ENOMEM);
+
+	pinned = 0;
+	ptr = etnaviv_obj->userptr.ptr;
+
+	down_read(&mm->mmap_sem);
+	while (pinned < npages) {
+		ret = get_user_pages(task, mm, ptr, npages - pinned,
+				     !etnaviv_obj->userptr.ro, 0,
+				     pvec + pinned, NULL);
+		if (ret < 0)
+			break;
+
+		ptr += ret * PAGE_SIZE;
+		pinned += ret;
+	}
+	up_read(&mm->mmap_sem);
+
+	if (ret < 0) {
+		release_pages(pvec, pinned, 0);
+		drm_free_large(pvec);
+		return ERR_PTR(ret);
+	}
+
+	return pvec;
+}
+
+static void __etnaviv_gem_userptr_get_pages(struct work_struct *_work)
+{
+	struct get_pages_work *work = container_of(_work, typeof(*work), work);
+	struct etnaviv_gem_object *etnaviv_obj = work->etnaviv_obj;
+	struct page **pvec;
+
+	pvec = etnaviv_gem_userptr_do_get_pages(etnaviv_obj, work->mm, work->task);
+
+	mutex_lock(&etnaviv_obj->lock);
+	if (IS_ERR(pvec)) {
+		etnaviv_obj->userptr.work = ERR_CAST(pvec);
+	} else {
+		etnaviv_obj->userptr.work = NULL;
+		etnaviv_obj->pages = pvec;
+	}
+
+	mutex_unlock(&etnaviv_obj->lock);
+	drm_gem_object_unreference_unlocked(&etnaviv_obj->base);
+
+	mmput(work->mm);
+	put_task_struct(work->task);
+	kfree(work);
+}
+
+static int etnaviv_gem_userptr_get_pages(struct etnaviv_gem_object *etnaviv_obj)
+{
+	struct page **pvec = NULL;
+	struct get_pages_work *work;
+	struct mm_struct *mm;
+	int ret, pinned, npages = etnaviv_obj->base.size >> PAGE_SHIFT;
+
+	if (etnaviv_obj->userptr.work) {
+		if (IS_ERR(etnaviv_obj->userptr.work)) {
+			ret = PTR_ERR(etnaviv_obj->userptr.work);
+			etnaviv_obj->userptr.work = NULL;
+		} else {
+			ret = -EAGAIN;
+		}
+		return ret;
+	}
+
+	mm = get_task_mm(etnaviv_obj->userptr.task);
+	pinned = 0;
+	if (mm == current->mm) {
+		pvec = drm_malloc_ab(npages, sizeof(struct page *));
+		if (!pvec) {
+			mmput(mm);
+			return -ENOMEM;
+		}
+
+		pinned = __get_user_pages_fast(etnaviv_obj->userptr.ptr, npages,
+					       !etnaviv_obj->userptr.ro, pvec);
+		if (pinned < 0) {
+			drm_free_large(pvec);
+			mmput(mm);
+			return pinned;
+		}
+
+		if (pinned == npages) {
+			etnaviv_obj->pages = pvec;
+			mmput(mm);
+			return 0;
+		}
+	}
+
+	release_pages(pvec, pinned, 0);
+	drm_free_large(pvec);
+
+	work = kmalloc(sizeof(*work), GFP_KERNEL);
+	if (!work) {
+		mmput(mm);
+		return -ENOMEM;
+	}
+
+	get_task_struct(current);
+	drm_gem_object_reference(&etnaviv_obj->base);
+
+	work->mm = mm;
+	work->task = current;
+	work->etnaviv_obj = etnaviv_obj;
+
+	etnaviv_obj->userptr.work = &work->work;
+	INIT_WORK(&work->work, __etnaviv_gem_userptr_get_pages);
+
+	etnaviv_queue_work(etnaviv_obj->base.dev, &work->work);
+
+	return -EAGAIN;
+}
+
+static void etnaviv_gem_userptr_release(struct etnaviv_gem_object *etnaviv_obj)
+{
+	if (etnaviv_obj->sgt) {
+		etnaviv_gem_scatterlist_unmap(etnaviv_obj);
+		sg_free_table(etnaviv_obj->sgt);
+		kfree(etnaviv_obj->sgt);
+	}
+	if (etnaviv_obj->pages) {
+		int npages = etnaviv_obj->base.size >> PAGE_SHIFT;
+
+		release_pages(etnaviv_obj->pages, npages, 0);
+		drm_free_large(etnaviv_obj->pages);
+	}
+	put_task_struct(etnaviv_obj->userptr.task);
+}
+
+static const struct etnaviv_gem_ops etnaviv_gem_userptr_ops = {
+	.get_pages = etnaviv_gem_userptr_get_pages,
+	.release = etnaviv_gem_userptr_release,
+};
+
+int etnaviv_gem_new_userptr(struct drm_device *dev, struct drm_file *file,
+	uintptr_t ptr, u32 size, u32 flags, u32 *handle)
+{
+	struct etnaviv_gem_object *etnaviv_obj;
+	int ret;
+
+	ret = etnaviv_gem_new_private(dev, size, ETNA_BO_CACHED, NULL,
+				      &etnaviv_gem_userptr_ops, &etnaviv_obj);
+	if (ret)
+		return ret;
+
+	etnaviv_obj->userptr.ptr = ptr;
+	etnaviv_obj->userptr.task = current;
+	etnaviv_obj->userptr.ro = !(flags & ETNA_USERPTR_WRITE);
+	get_task_struct(current);
+
+	ret = etnaviv_gem_obj_add(dev, &etnaviv_obj->base);
+	if (ret) {
+		drm_gem_object_unreference_unlocked(&etnaviv_obj->base);
+		return ret;
+	}
+
+	ret = drm_gem_handle_create(file, &etnaviv_obj->base, handle);
+
+	/* drop reference from allocate - handle holds it now */
+	drm_gem_object_unreference_unlocked(&etnaviv_obj->base);
+
+	return ret;
+}
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem.h b/drivers/gpu/drm/etnaviv/etnaviv_gem.h
new file mode 100644
index 000000000000..a300b4b3d545
--- /dev/null
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gem.h
@@ -0,0 +1,117 @@
+/*
+ * Copyright (C) 2015 Etnaviv Project
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ETNAVIV_GEM_H__
+#define __ETNAVIV_GEM_H__
+
+#include <linux/reservation.h>
+#include "etnaviv_drv.h"
+
+struct etnaviv_gem_ops;
+struct etnaviv_gem_object;
+
+struct etnaviv_gem_userptr {
+	uintptr_t ptr;
+	struct task_struct *task;
+	struct work_struct *work;
+	bool ro;
+};
+
+struct etnaviv_vram_mapping {
+	struct list_head obj_node;
+	struct list_head scan_node;
+	struct list_head mmu_node;
+	struct etnaviv_gem_object *object;
+	struct etnaviv_iommu *mmu;
+	struct drm_mm_node vram_node;
+	unsigned int use;
+	u32 iova;
+};
+
+struct etnaviv_gem_object {
+	struct drm_gem_object base;
+	const struct etnaviv_gem_ops *ops;
+	struct mutex lock;
+
+	u32 flags;
+
+	struct list_head gem_node;
+	struct etnaviv_gpu *gpu;     /* non-null if active */
+	atomic_t gpu_active;
+	u32 access;
+
+	struct page **pages;
+	struct sg_table *sgt;
+	void *vaddr;
+
+	/* normally (resv == &_resv) except for imported bo's */
+	struct reservation_object *resv;
+	struct reservation_object _resv;
+
+	struct list_head vram_list;
+
+	/* cache maintenance */
+	u32 last_cpu_prep_op;
+
+	struct etnaviv_gem_userptr userptr;
+};
+
+static inline
+struct etnaviv_gem_object *to_etnaviv_bo(struct drm_gem_object *obj)
+{
+	return container_of(obj, struct etnaviv_gem_object, base);
+}
+
+struct etnaviv_gem_ops {
+	int (*get_pages)(struct etnaviv_gem_object *);
+	void (*release)(struct etnaviv_gem_object *);
+};
+
+static inline bool is_active(struct etnaviv_gem_object *etnaviv_obj)
+{
+	return atomic_read(&etnaviv_obj->gpu_active) != 0;
+}
+
+#define MAX_CMDS 4
+
+/* Created per submit-ioctl, to track bo's and cmdstream bufs, etc,
+ * associated with the cmdstream submission for synchronization (and
+ * make it easier to unwind when things go wrong, etc).  This only
+ * lasts for the duration of the submit-ioctl.
+ */
+struct etnaviv_gem_submit {
+	struct drm_device *dev;
+	struct etnaviv_gpu *gpu;
+	struct ww_acquire_ctx ticket;
+	u32 fence;
+	unsigned int nr_bos;
+	struct {
+		u32 flags;
+		struct etnaviv_gem_object *obj;
+		u32 iova;
+	} bos[0];
+};
+
+int etnaviv_gem_wait_bo(struct etnaviv_gpu *gpu, struct drm_gem_object *obj,
+	struct timespec *timeout);
+int etnaviv_gem_new_private(struct drm_device *dev, size_t size, u32 flags,
+	struct reservation_object *robj, const struct etnaviv_gem_ops *ops,
+	struct etnaviv_gem_object **res);
+int etnaviv_gem_obj_add(struct drm_device *dev, struct drm_gem_object *obj);
+struct page **etnaviv_gem_get_pages(struct etnaviv_gem_object *obj);
+void etnaviv_gem_put_pages(struct etnaviv_gem_object *obj);
+
+#endif /* __ETNAVIV_GEM_H__ */
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c b/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c
new file mode 100644
index 000000000000..e94db4f95770
--- /dev/null
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c
@@ -0,0 +1,122 @@
+/*
+ * Copyright (C) 2013 Red Hat
+ * Author: Rob Clark <robdclark@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/dma-buf.h>
+#include "etnaviv_drv.h"
+#include "etnaviv_gem.h"
+
+
+struct sg_table *etnaviv_gem_prime_get_sg_table(struct drm_gem_object *obj)
+{
+	struct etnaviv_gem_object *etnaviv_obj = to_etnaviv_bo(obj);
+
+	BUG_ON(!etnaviv_obj->sgt);  /* should have already pinned! */
+
+	return etnaviv_obj->sgt;
+}
+
+void *etnaviv_gem_prime_vmap(struct drm_gem_object *obj)
+{
+	return etnaviv_gem_vaddr(obj);
+}
+
+void etnaviv_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr)
+{
+	/* TODO msm_gem_vunmap() */
+}
+
+int etnaviv_gem_prime_pin(struct drm_gem_object *obj)
+{
+	if (!obj->import_attach) {
+		struct etnaviv_gem_object *etnaviv_obj = to_etnaviv_bo(obj);
+
+		mutex_lock(&etnaviv_obj->lock);
+		etnaviv_gem_get_pages(etnaviv_obj);
+		mutex_unlock(&etnaviv_obj->lock);
+	}
+	return 0;
+}
+
+void etnaviv_gem_prime_unpin(struct drm_gem_object *obj)
+{
+	if (!obj->import_attach) {
+		struct etnaviv_gem_object *etnaviv_obj = to_etnaviv_bo(obj);
+
+		mutex_lock(&etnaviv_obj->lock);
+		etnaviv_gem_put_pages(to_etnaviv_bo(obj));
+		mutex_unlock(&etnaviv_obj->lock);
+	}
+}
+
+static void etnaviv_gem_prime_release(struct etnaviv_gem_object *etnaviv_obj)
+{
+	if (etnaviv_obj->vaddr)
+		dma_buf_vunmap(etnaviv_obj->base.import_attach->dmabuf,
+			       etnaviv_obj->vaddr);
+
+	/* Don't drop the pages for imported dmabuf, as they are not
+	 * ours, just free the array we allocated:
+	 */
+	if (etnaviv_obj->pages)
+		drm_free_large(etnaviv_obj->pages);
+
+	drm_prime_gem_destroy(&etnaviv_obj->base, etnaviv_obj->sgt);
+}
+
+static const struct etnaviv_gem_ops etnaviv_gem_prime_ops = {
+	/* .get_pages should never be called */
+	.release = etnaviv_gem_prime_release,
+};
+
+struct drm_gem_object *etnaviv_gem_prime_import_sg_table(struct drm_device *dev,
+	struct dma_buf_attachment *attach, struct sg_table *sgt)
+{
+	struct etnaviv_gem_object *etnaviv_obj;
+	size_t size = PAGE_ALIGN(attach->dmabuf->size);
+	int ret, npages;
+
+	ret = etnaviv_gem_new_private(dev, size, ETNA_BO_WC,
+				      attach->dmabuf->resv,
+				      &etnaviv_gem_prime_ops, &etnaviv_obj);
+	if (ret < 0)
+		return ERR_PTR(ret);
+
+	npages = size / PAGE_SIZE;
+
+	etnaviv_obj->sgt = sgt;
+	etnaviv_obj->pages = drm_malloc_ab(npages, sizeof(struct page *));
+	if (!etnaviv_obj->pages) {
+		ret = -ENOMEM;
+		goto fail;
+	}
+
+	ret = drm_prime_sg_to_page_addr_arrays(sgt, etnaviv_obj->pages,
+					       NULL, npages);
+	if (ret)
+		goto fail;
+
+	ret = etnaviv_gem_obj_add(dev, &etnaviv_obj->base);
+	if (ret)
+		goto fail;
+
+	return &etnaviv_obj->base;
+
+fail:
+	drm_gem_object_unreference_unlocked(&etnaviv_obj->base);
+
+	return ERR_PTR(ret);
+}
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
new file mode 100644
index 000000000000..1aba01a999df
--- /dev/null
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
@@ -0,0 +1,443 @@
+/*
+ * Copyright (C) 2015 Etnaviv Project
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/reservation.h>
+#include "etnaviv_drv.h"
+#include "etnaviv_gpu.h"
+#include "etnaviv_gem.h"
+
+/*
+ * Cmdstream submission:
+ */
+
+#define BO_INVALID_FLAGS ~(ETNA_SUBMIT_BO_READ | ETNA_SUBMIT_BO_WRITE)
+/* make sure these don't conflict w/ ETNAVIV_SUBMIT_BO_x */
+#define BO_LOCKED   0x4000
+#define BO_PINNED   0x2000
+
+static inline void __user *to_user_ptr(u64 address)
+{
+	return (void __user *)(uintptr_t)address;
+}
+
+static struct etnaviv_gem_submit *submit_create(struct drm_device *dev,
+		struct etnaviv_gpu *gpu, size_t nr)
+{
+	struct etnaviv_gem_submit *submit;
+	size_t sz = size_vstruct(nr, sizeof(submit->bos[0]), sizeof(*submit));
+
+	submit = kmalloc(sz, GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY);
+	if (submit) {
+		submit->dev = dev;
+		submit->gpu = gpu;
+
+		/* initially, until copy_from_user() and bo lookup succeeds: */
+		submit->nr_bos = 0;
+
+		ww_acquire_init(&submit->ticket, &reservation_ww_class);
+	}
+
+	return submit;
+}
+
+static int submit_lookup_objects(struct etnaviv_gem_submit *submit,
+	struct drm_file *file, struct drm_etnaviv_gem_submit_bo *submit_bos,
+	unsigned nr_bos)
+{
+	struct drm_etnaviv_gem_submit_bo *bo;
+	unsigned i;
+	int ret = 0;
+
+	spin_lock(&file->table_lock);
+
+	for (i = 0, bo = submit_bos; i < nr_bos; i++, bo++) {
+		struct drm_gem_object *obj;
+
+		if (bo->flags & BO_INVALID_FLAGS) {
+			DRM_ERROR("invalid flags: %x\n", bo->flags);
+			ret = -EINVAL;
+			goto out_unlock;
+		}
+
+		submit->bos[i].flags = bo->flags;
+
+		/* normally use drm_gem_object_lookup(), but for bulk lookup
+		 * all under single table_lock just hit object_idr directly:
+		 */
+		obj = idr_find(&file->object_idr, bo->handle);
+		if (!obj) {
+			DRM_ERROR("invalid handle %u at index %u\n",
+				  bo->handle, i);
+			ret = -EINVAL;
+			goto out_unlock;
+		}
+
+		/*
+		 * Take a refcount on the object. The file table lock
+		 * prevents the object_idr's refcount on this being dropped.
+		 */
+		drm_gem_object_reference(obj);
+
+		submit->bos[i].obj = to_etnaviv_bo(obj);
+	}
+
+out_unlock:
+	submit->nr_bos = i;
+	spin_unlock(&file->table_lock);
+
+	return ret;
+}
+
+static void submit_unlock_object(struct etnaviv_gem_submit *submit, int i)
+{
+	if (submit->bos[i].flags & BO_LOCKED) {
+		struct etnaviv_gem_object *etnaviv_obj = submit->bos[i].obj;
+
+		ww_mutex_unlock(&etnaviv_obj->resv->lock);
+		submit->bos[i].flags &= ~BO_LOCKED;
+	}
+}
+
+static int submit_lock_objects(struct etnaviv_gem_submit *submit)
+{
+	int contended, slow_locked = -1, i, ret = 0;
+
+retry:
+	for (i = 0; i < submit->nr_bos; i++) {
+		struct etnaviv_gem_object *etnaviv_obj = submit->bos[i].obj;
+
+		if (slow_locked == i)
+			slow_locked = -1;
+
+		contended = i;
+
+		if (!(submit->bos[i].flags & BO_LOCKED)) {
+			ret = ww_mutex_lock_interruptible(&etnaviv_obj->resv->lock,
+					&submit->ticket);
+			if (ret == -EALREADY)
+				DRM_ERROR("BO at index %u already on submit list\n",
+					  i);
+			if (ret)
+				goto fail;
+			submit->bos[i].flags |= BO_LOCKED;
+		}
+	}
+
+	ww_acquire_done(&submit->ticket);
+
+	return 0;
+
+fail:
+	for (; i >= 0; i--)
+		submit_unlock_object(submit, i);
+
+	if (slow_locked > 0)
+		submit_unlock_object(submit, slow_locked);
+
+	if (ret == -EDEADLK) {
+		struct etnaviv_gem_object *etnaviv_obj;
+
+		etnaviv_obj = submit->bos[contended].obj;
+
+		/* we lost out in a seqno race, lock and retry.. */
+		ret = ww_mutex_lock_slow_interruptible(&etnaviv_obj->resv->lock,
+				&submit->ticket);
+		if (!ret) {
+			submit->bos[contended].flags |= BO_LOCKED;
+			slow_locked = contended;
+			goto retry;
+		}
+	}
+
+	return ret;
+}
+
+static int submit_fence_sync(const struct etnaviv_gem_submit *submit)
+{
+	unsigned int context = submit->gpu->fence_context;
+	int i, ret = 0;
+
+	for (i = 0; i < submit->nr_bos; i++) {
+		struct etnaviv_gem_object *etnaviv_obj = submit->bos[i].obj;
+		bool write = submit->bos[i].flags & ETNA_SUBMIT_BO_WRITE;
+
+		ret = etnaviv_gpu_fence_sync_obj(etnaviv_obj, context, write);
+		if (ret)
+			break;
+	}
+
+	return ret;
+}
+
+static void submit_unpin_objects(struct etnaviv_gem_submit *submit)
+{
+	int i;
+
+	for (i = 0; i < submit->nr_bos; i++) {
+		struct etnaviv_gem_object *etnaviv_obj = submit->bos[i].obj;
+
+		if (submit->bos[i].flags & BO_PINNED)
+			etnaviv_gem_put_iova(submit->gpu, &etnaviv_obj->base);
+
+		submit->bos[i].iova = 0;
+		submit->bos[i].flags &= ~BO_PINNED;
+	}
+}
+
+static int submit_pin_objects(struct etnaviv_gem_submit *submit)
+{
+	int i, ret = 0;
+
+	for (i = 0; i < submit->nr_bos; i++) {
+		struct etnaviv_gem_object *etnaviv_obj = submit->bos[i].obj;
+		u32 iova;
+
+		ret = etnaviv_gem_get_iova(submit->gpu, &etnaviv_obj->base,
+					   &iova);
+		if (ret)
+			break;
+
+		submit->bos[i].flags |= BO_PINNED;
+		submit->bos[i].iova = iova;
+	}
+
+	return ret;
+}
+
+static int submit_bo(struct etnaviv_gem_submit *submit, u32 idx,
+		struct etnaviv_gem_object **obj, u32 *iova)
+{
+	if (idx >= submit->nr_bos) {
+		DRM_ERROR("invalid buffer index: %u (out of %u)\n",
+				idx, submit->nr_bos);
+		return -EINVAL;
+	}
+
+	if (obj)
+		*obj = submit->bos[idx].obj;
+	if (iova)
+		*iova = submit->bos[idx].iova;
+
+	return 0;
+}
+
+/* process the reloc's and patch up the cmdstream as needed: */
+static int submit_reloc(struct etnaviv_gem_submit *submit, void *stream,
+		u32 size, const struct drm_etnaviv_gem_submit_reloc *relocs,
+		u32 nr_relocs)
+{
+	u32 i, last_offset = 0;
+	u32 *ptr = stream;
+	int ret;
+
+	for (i = 0; i < nr_relocs; i++) {
+		const struct drm_etnaviv_gem_submit_reloc *r = relocs + i;
+		struct etnaviv_gem_object *bobj;
+		u32 iova, off;
+
+		if (unlikely(r->flags)) {
+			DRM_ERROR("invalid reloc flags\n");
+			return -EINVAL;
+		}
+
+		if (r->submit_offset % 4) {
+			DRM_ERROR("non-aligned reloc offset: %u\n",
+				  r->submit_offset);
+			return -EINVAL;
+		}
+
+		/* offset in dwords: */
+		off = r->submit_offset / 4;
+
+		if ((off >= size ) ||
+				(off < last_offset)) {
+			DRM_ERROR("invalid offset %u at reloc %u\n", off, i);
+			return -EINVAL;
+		}
+
+		ret = submit_bo(submit, r->reloc_idx, &bobj, &iova);
+		if (ret)
+			return ret;
+
+		if (r->reloc_offset >=
+		    bobj->base.size - sizeof(*ptr)) {
+			DRM_ERROR("relocation %u outside object", i);
+			return -EINVAL;
+		}
+
+		ptr[off] = iova + r->reloc_offset;
+
+		last_offset = off;
+	}
+
+	return 0;
+}
+
+static void submit_cleanup(struct etnaviv_gem_submit *submit)
+{
+	unsigned i;
+
+	for (i = 0; i < submit->nr_bos; i++) {
+		struct etnaviv_gem_object *etnaviv_obj = submit->bos[i].obj;
+
+		submit_unlock_object(submit, i);
+		drm_gem_object_unreference_unlocked(&etnaviv_obj->base);
+	}
+
+	ww_acquire_fini(&submit->ticket);
+	kfree(submit);
+}
+
+int etnaviv_ioctl_gem_submit(struct drm_device *dev, void *data,
+		struct drm_file *file)
+{
+	struct etnaviv_drm_private *priv = dev->dev_private;
+	struct drm_etnaviv_gem_submit *args = data;
+	struct drm_etnaviv_gem_submit_reloc *relocs;
+	struct drm_etnaviv_gem_submit_bo *bos;
+	struct etnaviv_gem_submit *submit;
+	struct etnaviv_cmdbuf *cmdbuf;
+	struct etnaviv_gpu *gpu;
+	void *stream;
+	int ret;
+
+	if (args->pipe >= ETNA_MAX_PIPES)
+		return -EINVAL;
+
+	gpu = priv->gpu[args->pipe];
+	if (!gpu)
+		return -ENXIO;
+
+	if (args->stream_size % 4) {
+		DRM_ERROR("non-aligned cmdstream buffer size: %u\n",
+			  args->stream_size);
+		return -EINVAL;
+	}
+
+	if (args->exec_state != ETNA_PIPE_3D &&
+	    args->exec_state != ETNA_PIPE_2D &&
+	    args->exec_state != ETNA_PIPE_VG) {
+		DRM_ERROR("invalid exec_state: 0x%x\n", args->exec_state);
+		return -EINVAL;
+	}
+
+	/*
+	 * Copy the command submission and bo array to kernel space in
+	 * one go, and do this outside of any locks.
+	 */
+	bos = drm_malloc_ab(args->nr_bos, sizeof(*bos));
+	relocs = drm_malloc_ab(args->nr_relocs, sizeof(*relocs));
+	stream = drm_malloc_ab(1, args->stream_size);
+	cmdbuf = etnaviv_gpu_cmdbuf_new(gpu, ALIGN(args->stream_size, 8) + 8,
+					args->nr_bos);
+	if (!bos || !relocs || !stream || !cmdbuf) {
+		ret = -ENOMEM;
+		goto err_submit_cmds;
+	}
+
+	cmdbuf->exec_state = args->exec_state;
+	cmdbuf->ctx = file->driver_priv;
+
+	ret = copy_from_user(bos, to_user_ptr(args->bos),
+			     args->nr_bos * sizeof(*bos));
+	if (ret) {
+		ret = -EFAULT;
+		goto err_submit_cmds;
+	}
+
+	ret = copy_from_user(relocs, to_user_ptr(args->relocs),
+			     args->nr_relocs * sizeof(*relocs));
+	if (ret) {
+		ret = -EFAULT;
+		goto err_submit_cmds;
+	}
+
+	ret = copy_from_user(stream, to_user_ptr(args->stream),
+			     args->stream_size);
+	if (ret) {
+		ret = -EFAULT;
+		goto err_submit_cmds;
+	}
+
+	submit = submit_create(dev, gpu, args->nr_bos);
+	if (!submit) {
+		ret = -ENOMEM;
+		goto err_submit_cmds;
+	}
+
+	ret = submit_lookup_objects(submit, file, bos, args->nr_bos);
+	if (ret)
+		goto err_submit_objects;
+
+	ret = submit_lock_objects(submit);
+	if (ret)
+		goto err_submit_objects;
+
+	if (!etnaviv_cmd_validate_one(gpu, stream, args->stream_size / 4,
+				      relocs, args->nr_relocs)) {
+		ret = -EINVAL;
+		goto err_submit_objects;
+	}
+
+	ret = submit_fence_sync(submit);
+	if (ret)
+		goto err_submit_objects;
+
+	ret = submit_pin_objects(submit);
+	if (ret)
+		goto out;
+
+	ret = submit_reloc(submit, stream, args->stream_size / 4,
+			   relocs, args->nr_relocs);
+	if (ret)
+		goto out;
+
+	memcpy(cmdbuf->vaddr, stream, args->stream_size);
+	cmdbuf->user_size = ALIGN(args->stream_size, 8);
+
+	ret = etnaviv_gpu_submit(gpu, submit, cmdbuf);
+	if (ret == 0)
+		cmdbuf = NULL;
+
+	args->fence = submit->fence;
+
+out:
+	submit_unpin_objects(submit);
+
+	/*
+	 * If we're returning -EAGAIN, it may be due to the userptr code
+	 * wanting to run its workqueue outside of any locks. Flush our
+	 * workqueue to ensure that it is run in a timely manner.
+	 */
+	if (ret == -EAGAIN)
+		flush_workqueue(priv->wq);
+
+err_submit_objects:
+	submit_cleanup(submit);
+
+err_submit_cmds:
+	/* if we still own the cmdbuf */
+	if (cmdbuf)
+		etnaviv_gpu_cmdbuf_free(cmdbuf);
+	if (stream)
+		drm_free_large(stream);
+	if (bos)
+		drm_free_large(bos);
+	if (relocs)
+		drm_free_large(relocs);
+
+	return ret;
+}
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
new file mode 100644
index 000000000000..d39093dc37e6
--- /dev/null
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
@@ -0,0 +1,1644 @@
+/*
+ * Copyright (C) 2015 Etnaviv Project
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/component.h>
+#include <linux/fence.h>
+#include <linux/moduleparam.h>
+#include <linux/of_device.h>
+#include "etnaviv_dump.h"
+#include "etnaviv_gpu.h"
+#include "etnaviv_gem.h"
+#include "etnaviv_mmu.h"
+#include "etnaviv_iommu.h"
+#include "etnaviv_iommu_v2.h"
+#include "common.xml.h"
+#include "state.xml.h"
+#include "state_hi.xml.h"
+#include "cmdstream.xml.h"
+
+static const struct platform_device_id gpu_ids[] = {
+	{ .name = "etnaviv-gpu,2d" },
+	{ },
+};
+
+static bool etnaviv_dump_core = true;
+module_param_named(dump_core, etnaviv_dump_core, bool, 0600);
+
+/*
+ * Driver functions:
+ */
+
+int etnaviv_gpu_get_param(struct etnaviv_gpu *gpu, u32 param, u64 *value)
+{
+	switch (param) {
+	case ETNAVIV_PARAM_GPU_MODEL:
+		*value = gpu->identity.model;
+		break;
+
+	case ETNAVIV_PARAM_GPU_REVISION:
+		*value = gpu->identity.revision;
+		break;
+
+	case ETNAVIV_PARAM_GPU_FEATURES_0:
+		*value = gpu->identity.features;
+		break;
+
+	case ETNAVIV_PARAM_GPU_FEATURES_1:
+		*value = gpu->identity.minor_features0;
+		break;
+
+	case ETNAVIV_PARAM_GPU_FEATURES_2:
+		*value = gpu->identity.minor_features1;
+		break;
+
+	case ETNAVIV_PARAM_GPU_FEATURES_3:
+		*value = gpu->identity.minor_features2;
+		break;
+
+	case ETNAVIV_PARAM_GPU_FEATURES_4:
+		*value = gpu->identity.minor_features3;
+		break;
+
+	case ETNAVIV_PARAM_GPU_STREAM_COUNT:
+		*value = gpu->identity.stream_count;
+		break;
+
+	case ETNAVIV_PARAM_GPU_REGISTER_MAX:
+		*value = gpu->identity.register_max;
+		break;
+
+	case ETNAVIV_PARAM_GPU_THREAD_COUNT:
+		*value = gpu->identity.thread_count;
+		break;
+
+	case ETNAVIV_PARAM_GPU_VERTEX_CACHE_SIZE:
+		*value = gpu->identity.vertex_cache_size;
+		break;
+
+	case ETNAVIV_PARAM_GPU_SHADER_CORE_COUNT:
+		*value = gpu->identity.shader_core_count;
+		break;
+
+	case ETNAVIV_PARAM_GPU_PIXEL_PIPES:
+		*value = gpu->identity.pixel_pipes;
+		break;
+
+	case ETNAVIV_PARAM_GPU_VERTEX_OUTPUT_BUFFER_SIZE:
+		*value = gpu->identity.vertex_output_buffer_size;
+		break;
+
+	case ETNAVIV_PARAM_GPU_BUFFER_SIZE:
+		*value = gpu->identity.buffer_size;
+		break;
+
+	case ETNAVIV_PARAM_GPU_INSTRUCTION_COUNT:
+		*value = gpu->identity.instruction_count;
+		break;
+
+	case ETNAVIV_PARAM_GPU_NUM_CONSTANTS:
+		*value = gpu->identity.num_constants;
+		break;
+
+	default:
+		DBG("%s: invalid param: %u", dev_name(gpu->dev), param);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static void etnaviv_hw_specs(struct etnaviv_gpu *gpu)
+{
+	if (gpu->identity.minor_features0 &
+	    chipMinorFeatures0_MORE_MINOR_FEATURES) {
+		u32 specs[2];
+
+		specs[0] = gpu_read(gpu, VIVS_HI_CHIP_SPECS);
+		specs[1] = gpu_read(gpu, VIVS_HI_CHIP_SPECS_2);
+
+		gpu->identity.stream_count =
+			(specs[0] & VIVS_HI_CHIP_SPECS_STREAM_COUNT__MASK)
+				>> VIVS_HI_CHIP_SPECS_STREAM_COUNT__SHIFT;
+		gpu->identity.register_max =
+			(specs[0] & VIVS_HI_CHIP_SPECS_REGISTER_MAX__MASK)
+				>> VIVS_HI_CHIP_SPECS_REGISTER_MAX__SHIFT;
+		gpu->identity.thread_count =
+			(specs[0] & VIVS_HI_CHIP_SPECS_THREAD_COUNT__MASK)
+				>> VIVS_HI_CHIP_SPECS_THREAD_COUNT__SHIFT;
+		gpu->identity.vertex_cache_size =
+			(specs[0] & VIVS_HI_CHIP_SPECS_VERTEX_CACHE_SIZE__MASK)
+				>> VIVS_HI_CHIP_SPECS_VERTEX_CACHE_SIZE__SHIFT;
+		gpu->identity.shader_core_count =
+			(specs[0] & VIVS_HI_CHIP_SPECS_SHADER_CORE_COUNT__MASK)
+				>> VIVS_HI_CHIP_SPECS_SHADER_CORE_COUNT__SHIFT;
+		gpu->identity.pixel_pipes =
+			(specs[0] & VIVS_HI_CHIP_SPECS_PIXEL_PIPES__MASK)
+				>> VIVS_HI_CHIP_SPECS_PIXEL_PIPES__SHIFT;
+		gpu->identity.vertex_output_buffer_size =
+			(specs[0] & VIVS_HI_CHIP_SPECS_VERTEX_OUTPUT_BUFFER_SIZE__MASK)
+				>> VIVS_HI_CHIP_SPECS_VERTEX_OUTPUT_BUFFER_SIZE__SHIFT;
+
+		gpu->identity.buffer_size =
+			(specs[1] & VIVS_HI_CHIP_SPECS_2_BUFFER_SIZE__MASK)
+				>> VIVS_HI_CHIP_SPECS_2_BUFFER_SIZE__SHIFT;
+		gpu->identity.instruction_count =
+			(specs[1] & VIVS_HI_CHIP_SPECS_2_INSTRUCTION_COUNT__MASK)
+				>> VIVS_HI_CHIP_SPECS_2_INSTRUCTION_COUNT__SHIFT;
+		gpu->identity.num_constants =
+			(specs[1] & VIVS_HI_CHIP_SPECS_2_NUM_CONSTANTS__MASK)
+				>> VIVS_HI_CHIP_SPECS_2_NUM_CONSTANTS__SHIFT;
+	}
+
+	/* Fill in the stream count if not specified */
+	if (gpu->identity.stream_count == 0) {
+		if (gpu->identity.model >= 0x1000)
+			gpu->identity.stream_count = 4;
+		else
+			gpu->identity.stream_count = 1;
+	}
+
+	/* Convert the register max value */
+	if (gpu->identity.register_max)
+		gpu->identity.register_max = 1 << gpu->identity.register_max;
+	else if (gpu->identity.model == 0x0400)
+		gpu->identity.register_max = 32;
+	else
+		gpu->identity.register_max = 64;
+
+	/* Convert thread count */
+	if (gpu->identity.thread_count)
+		gpu->identity.thread_count = 1 << gpu->identity.thread_count;
+	else if (gpu->identity.model == 0x0400)
+		gpu->identity.thread_count = 64;
+	else if (gpu->identity.model == 0x0500 ||
+		 gpu->identity.model == 0x0530)
+		gpu->identity.thread_count = 128;
+	else
+		gpu->identity.thread_count = 256;
+
+	if (gpu->identity.vertex_cache_size == 0)
+		gpu->identity.vertex_cache_size = 8;
+
+	if (gpu->identity.shader_core_count == 0) {
+		if (gpu->identity.model >= 0x1000)
+			gpu->identity.shader_core_count = 2;
+		else
+			gpu->identity.shader_core_count = 1;
+	}
+
+	if (gpu->identity.pixel_pipes == 0)
+		gpu->identity.pixel_pipes = 1;
+
+	/* Convert virtex buffer size */
+	if (gpu->identity.vertex_output_buffer_size) {
+		gpu->identity.vertex_output_buffer_size =
+			1 << gpu->identity.vertex_output_buffer_size;
+	} else if (gpu->identity.model == 0x0400) {
+		if (gpu->identity.revision < 0x4000)
+			gpu->identity.vertex_output_buffer_size = 512;
+		else if (gpu->identity.revision < 0x4200)
+			gpu->identity.vertex_output_buffer_size = 256;
+		else
+			gpu->identity.vertex_output_buffer_size = 128;
+	} else {
+		gpu->identity.vertex_output_buffer_size = 512;
+	}
+
+	switch (gpu->identity.instruction_count) {
+	case 0:
+		if ((gpu->identity.model == 0x2000 &&
+		     gpu->identity.revision == 0x5108) ||
+		    gpu->identity.model == 0x880)
+			gpu->identity.instruction_count = 512;
+		else
+			gpu->identity.instruction_count = 256;
+		break;
+
+	case 1:
+		gpu->identity.instruction_count = 1024;
+		break;
+
+	case 2:
+		gpu->identity.instruction_count = 2048;
+		break;
+
+	default:
+		gpu->identity.instruction_count = 256;
+		break;
+	}
+
+	if (gpu->identity.num_constants == 0)
+		gpu->identity.num_constants = 168;
+}
+
+static void etnaviv_hw_identify(struct etnaviv_gpu *gpu)
+{
+	u32 chipIdentity;
+
+	chipIdentity = gpu_read(gpu, VIVS_HI_CHIP_IDENTITY);
+
+	/* Special case for older graphic cores. */
+	if (VIVS_HI_CHIP_IDENTITY_FAMILY(chipIdentity) ==  0x01) {
+		gpu->identity.model    = 0x500; /* gc500 */
+		gpu->identity.revision = VIVS_HI_CHIP_IDENTITY_REVISION(chipIdentity);
+	} else {
+
+		gpu->identity.model = gpu_read(gpu, VIVS_HI_CHIP_MODEL);
+		gpu->identity.revision = gpu_read(gpu, VIVS_HI_CHIP_REV);
+
+		/*
+		 * !!!! HACK ALERT !!!!
+		 * Because people change device IDs without letting software
+		 * know about it - here is the hack to make it all look the
+		 * same.  Only for GC400 family.
+		 */
+		if ((gpu->identity.model & 0xff00) == 0x0400 &&
+		    gpu->identity.model != 0x0420) {
+			gpu->identity.model = gpu->identity.model & 0x0400;
+		}
+
+		/* Another special case */
+		if (gpu->identity.model == 0x300 &&
+		    gpu->identity.revision == 0x2201) {
+			u32 chipDate = gpu_read(gpu, VIVS_HI_CHIP_DATE);
+			u32 chipTime = gpu_read(gpu, VIVS_HI_CHIP_TIME);
+
+			if (chipDate == 0x20080814 && chipTime == 0x12051100) {
+				/*
+				 * This IP has an ECO; put the correct
+				 * revision in it.
+				 */
+				gpu->identity.revision = 0x1051;
+			}
+		}
+	}
+
+	dev_info(gpu->dev, "model: GC%x, revision: %x\n",
+		 gpu->identity.model, gpu->identity.revision);
+
+	gpu->identity.features = gpu_read(gpu, VIVS_HI_CHIP_FEATURE);
+
+	/* Disable fast clear on GC700. */
+	if (gpu->identity.model == 0x700)
+		gpu->identity.features &= ~chipFeatures_FAST_CLEAR;
+
+	if ((gpu->identity.model == 0x500 && gpu->identity.revision < 2) ||
+	    (gpu->identity.model == 0x300 && gpu->identity.revision < 0x2000)) {
+
+		/*
+		 * GC500 rev 1.x and GC300 rev < 2.0 doesn't have these
+		 * registers.
+		 */
+		gpu->identity.minor_features0 = 0;
+		gpu->identity.minor_features1 = 0;
+		gpu->identity.minor_features2 = 0;
+		gpu->identity.minor_features3 = 0;
+	} else
+		gpu->identity.minor_features0 =
+				gpu_read(gpu, VIVS_HI_CHIP_MINOR_FEATURE_0);
+
+	if (gpu->identity.minor_features0 &
+	    chipMinorFeatures0_MORE_MINOR_FEATURES) {
+		gpu->identity.minor_features1 =
+				gpu_read(gpu, VIVS_HI_CHIP_MINOR_FEATURE_1);
+		gpu->identity.minor_features2 =
+				gpu_read(gpu, VIVS_HI_CHIP_MINOR_FEATURE_2);
+		gpu->identity.minor_features3 =
+				gpu_read(gpu, VIVS_HI_CHIP_MINOR_FEATURE_3);
+	}
+
+	/* GC600 idle register reports zero bits where modules aren't present */
+	if (gpu->identity.model == chipModel_GC600) {
+		gpu->idle_mask = VIVS_HI_IDLE_STATE_TX |
+				 VIVS_HI_IDLE_STATE_RA |
+				 VIVS_HI_IDLE_STATE_SE |
+				 VIVS_HI_IDLE_STATE_PA |
+				 VIVS_HI_IDLE_STATE_SH |
+				 VIVS_HI_IDLE_STATE_PE |
+				 VIVS_HI_IDLE_STATE_DE |
+				 VIVS_HI_IDLE_STATE_FE;
+	} else {
+		gpu->idle_mask = ~VIVS_HI_IDLE_STATE_AXI_LP;
+	}
+
+	etnaviv_hw_specs(gpu);
+}
+
+static void etnaviv_gpu_load_clock(struct etnaviv_gpu *gpu, u32 clock)
+{
+	gpu_write(gpu, VIVS_HI_CLOCK_CONTROL, clock |
+		  VIVS_HI_CLOCK_CONTROL_FSCALE_CMD_LOAD);
+	gpu_write(gpu, VIVS_HI_CLOCK_CONTROL, clock);
+}
+
+static int etnaviv_hw_reset(struct etnaviv_gpu *gpu)
+{
+	u32 control, idle;
+	unsigned long timeout;
+	bool failed = true;
+
+	/* TODO
+	 *
+	 * - clock gating
+	 * - puls eater
+	 * - what about VG?
+	 */
+
+	/* We hope that the GPU resets in under one second */
+	timeout = jiffies + msecs_to_jiffies(1000);
+
+	while (time_is_after_jiffies(timeout)) {
+		control = VIVS_HI_CLOCK_CONTROL_DISABLE_DEBUG_REGISTERS |
+			  VIVS_HI_CLOCK_CONTROL_FSCALE_VAL(0x40);
+
+		/* enable clock */
+		etnaviv_gpu_load_clock(gpu, control);
+
+		/* Wait for stable clock.  Vivante's code waited for 1ms */
+		usleep_range(1000, 10000);
+
+		/* isolate the GPU. */
+		control |= VIVS_HI_CLOCK_CONTROL_ISOLATE_GPU;
+		gpu_write(gpu, VIVS_HI_CLOCK_CONTROL, control);
+
+		/* set soft reset. */
+		control |= VIVS_HI_CLOCK_CONTROL_SOFT_RESET;
+		gpu_write(gpu, VIVS_HI_CLOCK_CONTROL, control);
+
+		/* wait for reset. */
+		msleep(1);
+
+		/* reset soft reset bit. */
+		control &= ~VIVS_HI_CLOCK_CONTROL_SOFT_RESET;
+		gpu_write(gpu, VIVS_HI_CLOCK_CONTROL, control);
+
+		/* reset GPU isolation. */
+		control &= ~VIVS_HI_CLOCK_CONTROL_ISOLATE_GPU;
+		gpu_write(gpu, VIVS_HI_CLOCK_CONTROL, control);
+
+		/* read idle register. */
+		idle = gpu_read(gpu, VIVS_HI_IDLE_STATE);
+
+		/* try reseting again if FE it not idle */
+		if ((idle & VIVS_HI_IDLE_STATE_FE) == 0) {
+			dev_dbg(gpu->dev, "FE is not idle\n");
+			continue;
+		}
+
+		/* read reset register. */
+		control = gpu_read(gpu, VIVS_HI_CLOCK_CONTROL);
+
+		/* is the GPU idle? */
+		if (((control & VIVS_HI_CLOCK_CONTROL_IDLE_3D) == 0) ||
+		    ((control & VIVS_HI_CLOCK_CONTROL_IDLE_2D) == 0)) {
+			dev_dbg(gpu->dev, "GPU is not idle\n");
+			continue;
+		}
+
+		failed = false;
+		break;
+	}
+
+	if (failed) {
+		idle = gpu_read(gpu, VIVS_HI_IDLE_STATE);
+		control = gpu_read(gpu, VIVS_HI_CLOCK_CONTROL);
+
+		dev_err(gpu->dev, "GPU failed to reset: FE %sidle, 3D %sidle, 2D %sidle\n",
+			idle & VIVS_HI_IDLE_STATE_FE ? "" : "not ",
+			control & VIVS_HI_CLOCK_CONTROL_IDLE_3D ? "" : "not ",
+			control & VIVS_HI_CLOCK_CONTROL_IDLE_2D ? "" : "not ");
+
+		return -EBUSY;
+	}
+
+	/* We rely on the GPU running, so program the clock */
+	control = VIVS_HI_CLOCK_CONTROL_DISABLE_DEBUG_REGISTERS |
+		  VIVS_HI_CLOCK_CONTROL_FSCALE_VAL(0x40);
+
+	/* enable clock */
+	etnaviv_gpu_load_clock(gpu, control);
+
+	return 0;
+}
+
+static void etnaviv_gpu_hw_init(struct etnaviv_gpu *gpu)
+{
+	u16 prefetch;
+
+	if (gpu->identity.model == chipModel_GC320 &&
+	    gpu_read(gpu, VIVS_HI_CHIP_TIME) != 0x2062400 &&
+	    (gpu->identity.revision == 0x5007 ||
+	     gpu->identity.revision == 0x5220)) {
+		u32 mc_memory_debug;
+
+		mc_memory_debug = gpu_read(gpu, VIVS_MC_DEBUG_MEMORY) & ~0xff;
+
+		if (gpu->identity.revision == 0x5007)
+			mc_memory_debug |= 0x0c;
+		else
+			mc_memory_debug |= 0x08;
+
+		gpu_write(gpu, VIVS_MC_DEBUG_MEMORY, mc_memory_debug);
+	}
+
+	/*
+	 * Update GPU AXI cache atttribute to "cacheable, no allocate".
+	 * This is necessary to prevent the iMX6 SoC locking up.
+	 */
+	gpu_write(gpu, VIVS_HI_AXI_CONFIG,
+		  VIVS_HI_AXI_CONFIG_AWCACHE(2) |
+		  VIVS_HI_AXI_CONFIG_ARCACHE(2));
+
+	/* GC2000 rev 5108 needs a special bus config */
+	if (gpu->identity.model == 0x2000 && gpu->identity.revision == 0x5108) {
+		u32 bus_config = gpu_read(gpu, VIVS_MC_BUS_CONFIG);
+		bus_config &= ~(VIVS_MC_BUS_CONFIG_FE_BUS_CONFIG__MASK |
+				VIVS_MC_BUS_CONFIG_TX_BUS_CONFIG__MASK);
+		bus_config |= VIVS_MC_BUS_CONFIG_FE_BUS_CONFIG(1) |
+			      VIVS_MC_BUS_CONFIG_TX_BUS_CONFIG(0);
+		gpu_write(gpu, VIVS_MC_BUS_CONFIG, bus_config);
+	}
+
+	/* set base addresses */
+	gpu_write(gpu, VIVS_MC_MEMORY_BASE_ADDR_RA, gpu->memory_base);
+	gpu_write(gpu, VIVS_MC_MEMORY_BASE_ADDR_FE, gpu->memory_base);
+	gpu_write(gpu, VIVS_MC_MEMORY_BASE_ADDR_TX, gpu->memory_base);
+	gpu_write(gpu, VIVS_MC_MEMORY_BASE_ADDR_PEZ, gpu->memory_base);
+	gpu_write(gpu, VIVS_MC_MEMORY_BASE_ADDR_PE, gpu->memory_base);
+
+	/* setup the MMU page table pointers */
+	etnaviv_iommu_domain_restore(gpu, gpu->mmu->domain);
+
+	/* Start command processor */
+	prefetch = etnaviv_buffer_init(gpu);
+
+	gpu_write(gpu, VIVS_HI_INTR_ENBL, ~0U);
+	gpu_write(gpu, VIVS_FE_COMMAND_ADDRESS,
+		  gpu->buffer->paddr - gpu->memory_base);
+	gpu_write(gpu, VIVS_FE_COMMAND_CONTROL,
+		  VIVS_FE_COMMAND_CONTROL_ENABLE |
+		  VIVS_FE_COMMAND_CONTROL_PREFETCH(prefetch));
+}
+
+int etnaviv_gpu_init(struct etnaviv_gpu *gpu)
+{
+	int ret, i;
+	struct iommu_domain *iommu;
+	enum etnaviv_iommu_version version;
+	bool mmuv2;
+
+	ret = pm_runtime_get_sync(gpu->dev);
+	if (ret < 0)
+		return ret;
+
+	etnaviv_hw_identify(gpu);
+
+	if (gpu->identity.model == 0) {
+		dev_err(gpu->dev, "Unknown GPU model\n");
+		pm_runtime_put_autosuspend(gpu->dev);
+		return -ENXIO;
+	}
+
+	ret = etnaviv_hw_reset(gpu);
+	if (ret)
+		goto fail;
+
+	/* Setup IOMMU.. eventually we will (I think) do this once per context
+	 * and have separate page tables per context.  For now, to keep things
+	 * simple and to get something working, just use a single address space:
+	 */
+	mmuv2 = gpu->identity.minor_features1 & chipMinorFeatures1_MMU_VERSION;
+	dev_dbg(gpu->dev, "mmuv2: %d\n", mmuv2);
+
+	if (!mmuv2) {
+		iommu = etnaviv_iommu_domain_alloc(gpu);
+		version = ETNAVIV_IOMMU_V1;
+	} else {
+		iommu = etnaviv_iommu_v2_domain_alloc(gpu);
+		version = ETNAVIV_IOMMU_V2;
+	}
+
+	if (!iommu) {
+		ret = -ENOMEM;
+		goto fail;
+	}
+
+	/* TODO: we will leak here memory - fix it! */
+
+	gpu->mmu = etnaviv_iommu_new(gpu, iommu, version);
+	if (!gpu->mmu) {
+		ret = -ENOMEM;
+		goto fail;
+	}
+
+	/* Create buffer: */
+	gpu->buffer = etnaviv_gpu_cmdbuf_new(gpu, PAGE_SIZE, 0);
+	if (!gpu->buffer) {
+		ret = -ENOMEM;
+		dev_err(gpu->dev, "could not create command buffer\n");
+		goto fail;
+	}
+	if (gpu->buffer->paddr - gpu->memory_base > 0x80000000) {
+		ret = -EINVAL;
+		dev_err(gpu->dev,
+			"command buffer outside valid memory window\n");
+		goto free_buffer;
+	}
+
+	/* Setup event management */
+	spin_lock_init(&gpu->event_spinlock);
+	init_completion(&gpu->event_free);
+	for (i = 0; i < ARRAY_SIZE(gpu->event); i++) {
+		gpu->event[i].used = false;
+		complete(&gpu->event_free);
+	}
+
+	/* Now program the hardware */
+	mutex_lock(&gpu->lock);
+	etnaviv_gpu_hw_init(gpu);
+	mutex_unlock(&gpu->lock);
+
+	pm_runtime_mark_last_busy(gpu->dev);
+	pm_runtime_put_autosuspend(gpu->dev);
+
+	return 0;
+
+free_buffer:
+	etnaviv_gpu_cmdbuf_free(gpu->buffer);
+	gpu->buffer = NULL;
+fail:
+	pm_runtime_mark_last_busy(gpu->dev);
+	pm_runtime_put_autosuspend(gpu->dev);
+
+	return ret;
+}
+
+#ifdef CONFIG_DEBUG_FS
+struct dma_debug {
+	u32 address[2];
+	u32 state[2];
+};
+
+static void verify_dma(struct etnaviv_gpu *gpu, struct dma_debug *debug)
+{
+	u32 i;
+
+	debug->address[0] = gpu_read(gpu, VIVS_FE_DMA_ADDRESS);
+	debug->state[0]   = gpu_read(gpu, VIVS_FE_DMA_DEBUG_STATE);
+
+	for (i = 0; i < 500; i++) {
+		debug->address[1] = gpu_read(gpu, VIVS_FE_DMA_ADDRESS);
+		debug->state[1]   = gpu_read(gpu, VIVS_FE_DMA_DEBUG_STATE);
+
+		if (debug->address[0] != debug->address[1])
+			break;
+
+		if (debug->state[0] != debug->state[1])
+			break;
+	}
+}
+
+int etnaviv_gpu_debugfs(struct etnaviv_gpu *gpu, struct seq_file *m)
+{
+	struct dma_debug debug;
+	u32 dma_lo, dma_hi, axi, idle;
+	int ret;
+
+	seq_printf(m, "%s Status:\n", dev_name(gpu->dev));
+
+	ret = pm_runtime_get_sync(gpu->dev);
+	if (ret < 0)
+		return ret;
+
+	dma_lo = gpu_read(gpu, VIVS_FE_DMA_LOW);
+	dma_hi = gpu_read(gpu, VIVS_FE_DMA_HIGH);
+	axi = gpu_read(gpu, VIVS_HI_AXI_STATUS);
+	idle = gpu_read(gpu, VIVS_HI_IDLE_STATE);
+
+	verify_dma(gpu, &debug);
+
+	seq_puts(m, "\tfeatures\n");
+	seq_printf(m, "\t minor_features0: 0x%08x\n",
+		   gpu->identity.minor_features0);
+	seq_printf(m, "\t minor_features1: 0x%08x\n",
+		   gpu->identity.minor_features1);
+	seq_printf(m, "\t minor_features2: 0x%08x\n",
+		   gpu->identity.minor_features2);
+	seq_printf(m, "\t minor_features3: 0x%08x\n",
+		   gpu->identity.minor_features3);
+
+	seq_puts(m, "\tspecs\n");
+	seq_printf(m, "\t stream_count:  %d\n",
+			gpu->identity.stream_count);
+	seq_printf(m, "\t register_max: %d\n",
+			gpu->identity.register_max);
+	seq_printf(m, "\t thread_count: %d\n",
+			gpu->identity.thread_count);
+	seq_printf(m, "\t vertex_cache_size: %d\n",
+			gpu->identity.vertex_cache_size);
+	seq_printf(m, "\t shader_core_count: %d\n",
+			gpu->identity.shader_core_count);
+	seq_printf(m, "\t pixel_pipes: %d\n",
+			gpu->identity.pixel_pipes);
+	seq_printf(m, "\t vertex_output_buffer_size: %d\n",
+			gpu->identity.vertex_output_buffer_size);
+	seq_printf(m, "\t buffer_size: %d\n",
+			gpu->identity.buffer_size);
+	seq_printf(m, "\t instruction_count: %d\n",
+			gpu->identity.instruction_count);
+	seq_printf(m, "\t num_constants: %d\n",
+			gpu->identity.num_constants);
+
+	seq_printf(m, "\taxi: 0x%08x\n", axi);
+	seq_printf(m, "\tidle: 0x%08x\n", idle);
+	idle |= ~gpu->idle_mask & ~VIVS_HI_IDLE_STATE_AXI_LP;
+	if ((idle & VIVS_HI_IDLE_STATE_FE) == 0)
+		seq_puts(m, "\t FE is not idle\n");
+	if ((idle & VIVS_HI_IDLE_STATE_DE) == 0)
+		seq_puts(m, "\t DE is not idle\n");
+	if ((idle & VIVS_HI_IDLE_STATE_PE) == 0)
+		seq_puts(m, "\t PE is not idle\n");
+	if ((idle & VIVS_HI_IDLE_STATE_SH) == 0)
+		seq_puts(m, "\t SH is not idle\n");
+	if ((idle & VIVS_HI_IDLE_STATE_PA) == 0)
+		seq_puts(m, "\t PA is not idle\n");
+	if ((idle & VIVS_HI_IDLE_STATE_SE) == 0)
+		seq_puts(m, "\t SE is not idle\n");
+	if ((idle & VIVS_HI_IDLE_STATE_RA) == 0)
+		seq_puts(m, "\t RA is not idle\n");
+	if ((idle & VIVS_HI_IDLE_STATE_TX) == 0)
+		seq_puts(m, "\t TX is not idle\n");
+	if ((idle & VIVS_HI_IDLE_STATE_VG) == 0)
+		seq_puts(m, "\t VG is not idle\n");
+	if ((idle & VIVS_HI_IDLE_STATE_IM) == 0)
+		seq_puts(m, "\t IM is not idle\n");
+	if ((idle & VIVS_HI_IDLE_STATE_FP) == 0)
+		seq_puts(m, "\t FP is not idle\n");
+	if ((idle & VIVS_HI_IDLE_STATE_TS) == 0)
+		seq_puts(m, "\t TS is not idle\n");
+	if (idle & VIVS_HI_IDLE_STATE_AXI_LP)
+		seq_puts(m, "\t AXI low power mode\n");
+
+	if (gpu->identity.features & chipFeatures_DEBUG_MODE) {
+		u32 read0 = gpu_read(gpu, VIVS_MC_DEBUG_READ0);
+		u32 read1 = gpu_read(gpu, VIVS_MC_DEBUG_READ1);
+		u32 write = gpu_read(gpu, VIVS_MC_DEBUG_WRITE);
+
+		seq_puts(m, "\tMC\n");
+		seq_printf(m, "\t read0: 0x%08x\n", read0);
+		seq_printf(m, "\t read1: 0x%08x\n", read1);
+		seq_printf(m, "\t write: 0x%08x\n", write);
+	}
+
+	seq_puts(m, "\tDMA ");
+
+	if (debug.address[0] == debug.address[1] &&
+	    debug.state[0] == debug.state[1]) {
+		seq_puts(m, "seems to be stuck\n");
+	} else if (debug.address[0] == debug.address[1]) {
+		seq_puts(m, "adress is constant\n");
+	} else {
+		seq_puts(m, "is runing\n");
+	}
+
+	seq_printf(m, "\t address 0: 0x%08x\n", debug.address[0]);
+	seq_printf(m, "\t address 1: 0x%08x\n", debug.address[1]);
+	seq_printf(m, "\t state 0: 0x%08x\n", debug.state[0]);
+	seq_printf(m, "\t state 1: 0x%08x\n", debug.state[1]);
+	seq_printf(m, "\t last fetch 64 bit word: 0x%08x 0x%08x\n",
+		   dma_lo, dma_hi);
+
+	ret = 0;
+
+	pm_runtime_mark_last_busy(gpu->dev);
+	pm_runtime_put_autosuspend(gpu->dev);
+
+	return ret;
+}
+#endif
+
+/*
+ * Power Management:
+ */
+static int enable_clk(struct etnaviv_gpu *gpu)
+{
+	if (gpu->clk_core)
+		clk_prepare_enable(gpu->clk_core);
+	if (gpu->clk_shader)
+		clk_prepare_enable(gpu->clk_shader);
+
+	return 0;
+}
+
+static int disable_clk(struct etnaviv_gpu *gpu)
+{
+	if (gpu->clk_core)
+		clk_disable_unprepare(gpu->clk_core);
+	if (gpu->clk_shader)
+		clk_disable_unprepare(gpu->clk_shader);
+
+	return 0;
+}
+
+static int enable_axi(struct etnaviv_gpu *gpu)
+{
+	if (gpu->clk_bus)
+		clk_prepare_enable(gpu->clk_bus);
+
+	return 0;
+}
+
+static int disable_axi(struct etnaviv_gpu *gpu)
+{
+	if (gpu->clk_bus)
+		clk_disable_unprepare(gpu->clk_bus);
+
+	return 0;
+}
+
+/*
+ * Hangcheck detection for locked gpu:
+ */
+static void recover_worker(struct work_struct *work)
+{
+	struct etnaviv_gpu *gpu = container_of(work, struct etnaviv_gpu,
+					       recover_work);
+	unsigned long flags;
+	unsigned int i;
+
+	dev_err(gpu->dev, "hangcheck recover!\n");
+
+	if (pm_runtime_get_sync(gpu->dev) < 0)
+		return;
+
+	mutex_lock(&gpu->lock);
+
+	/* Only catch the first event, or when manually re-armed */
+	if (etnaviv_dump_core) {
+		etnaviv_core_dump(gpu);
+		etnaviv_dump_core = false;
+	}
+
+	etnaviv_hw_reset(gpu);
+
+	/* complete all events, the GPU won't do it after the reset */
+	spin_lock_irqsave(&gpu->event_spinlock, flags);
+	for (i = 0; i < ARRAY_SIZE(gpu->event); i++) {
+		if (!gpu->event[i].used)
+			continue;
+		fence_signal(gpu->event[i].fence);
+		gpu->event[i].fence = NULL;
+		gpu->event[i].used = false;
+		complete(&gpu->event_free);
+		/*
+		 * Decrement the PM count for each stuck event. This is safe
+		 * even in atomic context as we use ASYNC RPM here.
+		 */
+		pm_runtime_put_autosuspend(gpu->dev);
+	}
+	spin_unlock_irqrestore(&gpu->event_spinlock, flags);
+	gpu->completed_fence = gpu->active_fence;
+
+	etnaviv_gpu_hw_init(gpu);
+	gpu->switch_context = true;
+
+	mutex_unlock(&gpu->lock);
+	pm_runtime_mark_last_busy(gpu->dev);
+	pm_runtime_put_autosuspend(gpu->dev);
+
+	/* Retire the buffer objects in a work */
+	etnaviv_queue_work(gpu->drm, &gpu->retire_work);
+}
+
+static void hangcheck_timer_reset(struct etnaviv_gpu *gpu)
+{
+	DBG("%s", dev_name(gpu->dev));
+	mod_timer(&gpu->hangcheck_timer,
+		  round_jiffies_up(jiffies + DRM_ETNAVIV_HANGCHECK_JIFFIES));
+}
+
+static void hangcheck_handler(unsigned long data)
+{
+	struct etnaviv_gpu *gpu = (struct etnaviv_gpu *)data;
+	u32 fence = gpu->completed_fence;
+	bool progress = false;
+
+	if (fence != gpu->hangcheck_fence) {
+		gpu->hangcheck_fence = fence;
+		progress = true;
+	}
+
+	if (!progress) {
+		u32 dma_addr = gpu_read(gpu, VIVS_FE_DMA_ADDRESS);
+		int change = dma_addr - gpu->hangcheck_dma_addr;
+
+		if (change < 0 || change > 16) {
+			gpu->hangcheck_dma_addr = dma_addr;
+			progress = true;
+		}
+	}
+
+	if (!progress && fence_after(gpu->active_fence, fence)) {
+		dev_err(gpu->dev, "hangcheck detected gpu lockup!\n");
+		dev_err(gpu->dev, "     completed fence: %u\n", fence);
+		dev_err(gpu->dev, "     active fence: %u\n",
+			gpu->active_fence);
+		etnaviv_queue_work(gpu->drm, &gpu->recover_work);
+	}
+
+	/* if still more pending work, reset the hangcheck timer: */
+	if (fence_after(gpu->active_fence, gpu->hangcheck_fence))
+		hangcheck_timer_reset(gpu);
+}
+
+static void hangcheck_disable(struct etnaviv_gpu *gpu)
+{
+	del_timer_sync(&gpu->hangcheck_timer);
+	cancel_work_sync(&gpu->recover_work);
+}
+
+/* fence object management */
+struct etnaviv_fence {
+	struct etnaviv_gpu *gpu;
+	struct fence base;
+};
+
+static inline struct etnaviv_fence *to_etnaviv_fence(struct fence *fence)
+{
+	return container_of(fence, struct etnaviv_fence, base);
+}
+
+static const char *etnaviv_fence_get_driver_name(struct fence *fence)
+{
+	return "etnaviv";
+}
+
+static const char *etnaviv_fence_get_timeline_name(struct fence *fence)
+{
+	struct etnaviv_fence *f = to_etnaviv_fence(fence);
+
+	return dev_name(f->gpu->dev);
+}
+
+static bool etnaviv_fence_enable_signaling(struct fence *fence)
+{
+	return true;
+}
+
+static bool etnaviv_fence_signaled(struct fence *fence)
+{
+	struct etnaviv_fence *f = to_etnaviv_fence(fence);
+
+	return fence_completed(f->gpu, f->base.seqno);
+}
+
+static void etnaviv_fence_release(struct fence *fence)
+{
+	struct etnaviv_fence *f = to_etnaviv_fence(fence);
+
+	kfree_rcu(f, base.rcu);
+}
+
+static const struct fence_ops etnaviv_fence_ops = {
+	.get_driver_name = etnaviv_fence_get_driver_name,
+	.get_timeline_name = etnaviv_fence_get_timeline_name,
+	.enable_signaling = etnaviv_fence_enable_signaling,
+	.signaled = etnaviv_fence_signaled,
+	.wait = fence_default_wait,
+	.release = etnaviv_fence_release,
+};
+
+static struct fence *etnaviv_gpu_fence_alloc(struct etnaviv_gpu *gpu)
+{
+	struct etnaviv_fence *f;
+
+	f = kzalloc(sizeof(*f), GFP_KERNEL);
+	if (!f)
+		return NULL;
+
+	f->gpu = gpu;
+
+	fence_init(&f->base, &etnaviv_fence_ops, &gpu->fence_spinlock,
+		   gpu->fence_context, ++gpu->next_fence);
+
+	return &f->base;
+}
+
+int etnaviv_gpu_fence_sync_obj(struct etnaviv_gem_object *etnaviv_obj,
+	unsigned int context, bool exclusive)
+{
+	struct reservation_object *robj = etnaviv_obj->resv;
+	struct reservation_object_list *fobj;
+	struct fence *fence;
+	int i, ret;
+
+	if (!exclusive) {
+		ret = reservation_object_reserve_shared(robj);
+		if (ret)
+			return ret;
+	}
+
+	/*
+	 * If we have any shared fences, then the exclusive fence
+	 * should be ignored as it will already have been signalled.
+	 */
+	fobj = reservation_object_get_list(robj);
+	if (!fobj || fobj->shared_count == 0) {
+		/* Wait on any existing exclusive fence which isn't our own */
+		fence = reservation_object_get_excl(robj);
+		if (fence && fence->context != context) {
+			ret = fence_wait(fence, true);
+			if (ret)
+				return ret;
+		}
+	}
+
+	if (!exclusive || !fobj)
+		return 0;
+
+	for (i = 0; i < fobj->shared_count; i++) {
+		fence = rcu_dereference_protected(fobj->shared[i],
+						reservation_object_held(robj));
+		if (fence->context != context) {
+			ret = fence_wait(fence, true);
+			if (ret)
+				return ret;
+		}
+	}
+
+	return 0;
+}
+
+/*
+ * event management:
+ */
+
+static unsigned int event_alloc(struct etnaviv_gpu *gpu)
+{
+	unsigned long ret, flags;
+	unsigned int i, event = ~0U;
+
+	ret = wait_for_completion_timeout(&gpu->event_free,
+					  msecs_to_jiffies(10 * 10000));
+	if (!ret)
+		dev_err(gpu->dev, "wait_for_completion_timeout failed");
+
+	spin_lock_irqsave(&gpu->event_spinlock, flags);
+
+	/* find first free event */
+	for (i = 0; i < ARRAY_SIZE(gpu->event); i++) {
+		if (gpu->event[i].used == false) {
+			gpu->event[i].used = true;
+			event = i;
+			break;
+		}
+	}
+
+	spin_unlock_irqrestore(&gpu->event_spinlock, flags);
+
+	return event;
+}
+
+static void event_free(struct etnaviv_gpu *gpu, unsigned int event)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&gpu->event_spinlock, flags);
+
+	if (gpu->event[event].used == false) {
+		dev_warn(gpu->dev, "event %u is already marked as free",
+			 event);
+		spin_unlock_irqrestore(&gpu->event_spinlock, flags);
+	} else {
+		gpu->event[event].used = false;
+		spin_unlock_irqrestore(&gpu->event_spinlock, flags);
+
+		complete(&gpu->event_free);
+	}
+}
+
+/*
+ * Cmdstream submission/retirement:
+ */
+
+struct etnaviv_cmdbuf *etnaviv_gpu_cmdbuf_new(struct etnaviv_gpu *gpu, u32 size,
+	size_t nr_bos)
+{
+	struct etnaviv_cmdbuf *cmdbuf;
+	size_t sz = size_vstruct(nr_bos, sizeof(cmdbuf->bo[0]),
+				 sizeof(*cmdbuf));
+
+	cmdbuf = kzalloc(sz, GFP_KERNEL);
+	if (!cmdbuf)
+		return NULL;
+
+	cmdbuf->vaddr = dma_alloc_writecombine(gpu->dev, size, &cmdbuf->paddr,
+					       GFP_KERNEL);
+	if (!cmdbuf->vaddr) {
+		kfree(cmdbuf);
+		return NULL;
+	}
+
+	cmdbuf->gpu = gpu;
+	cmdbuf->size = size;
+
+	return cmdbuf;
+}
+
+void etnaviv_gpu_cmdbuf_free(struct etnaviv_cmdbuf *cmdbuf)
+{
+	dma_free_writecombine(cmdbuf->gpu->dev, cmdbuf->size,
+			      cmdbuf->vaddr, cmdbuf->paddr);
+	kfree(cmdbuf);
+}
+
+static void retire_worker(struct work_struct *work)
+{
+	struct etnaviv_gpu *gpu = container_of(work, struct etnaviv_gpu,
+					       retire_work);
+	u32 fence = gpu->completed_fence;
+	struct etnaviv_cmdbuf *cmdbuf, *tmp;
+	unsigned int i;
+
+	mutex_lock(&gpu->lock);
+	list_for_each_entry_safe(cmdbuf, tmp, &gpu->active_cmd_list, node) {
+		if (!fence_is_signaled(cmdbuf->fence))
+			break;
+
+		list_del(&cmdbuf->node);
+		fence_put(cmdbuf->fence);
+
+		for (i = 0; i < cmdbuf->nr_bos; i++) {
+			struct etnaviv_gem_object *etnaviv_obj = cmdbuf->bo[i];
+
+			atomic_dec(&etnaviv_obj->gpu_active);
+			/* drop the refcount taken in etnaviv_gpu_submit */
+			etnaviv_gem_put_iova(gpu, &etnaviv_obj->base);
+		}
+
+		etnaviv_gpu_cmdbuf_free(cmdbuf);
+	}
+
+	gpu->retired_fence = fence;
+
+	mutex_unlock(&gpu->lock);
+
+	wake_up_all(&gpu->fence_event);
+}
+
+int etnaviv_gpu_wait_fence_interruptible(struct etnaviv_gpu *gpu,
+	u32 fence, struct timespec *timeout)
+{
+	int ret;
+
+	if (fence_after(fence, gpu->next_fence)) {
+		DRM_ERROR("waiting on invalid fence: %u (of %u)\n",
+				fence, gpu->next_fence);
+		return -EINVAL;
+	}
+
+	if (!timeout) {
+		/* No timeout was requested: just test for completion */
+		ret = fence_completed(gpu, fence) ? 0 : -EBUSY;
+	} else {
+		unsigned long remaining = etnaviv_timeout_to_jiffies(timeout);
+
+		ret = wait_event_interruptible_timeout(gpu->fence_event,
+						fence_completed(gpu, fence),
+						remaining);
+		if (ret == 0) {
+			DBG("timeout waiting for fence: %u (retired: %u completed: %u)",
+				fence, gpu->retired_fence,
+				gpu->completed_fence);
+			ret = -ETIMEDOUT;
+		} else if (ret != -ERESTARTSYS) {
+			ret = 0;
+		}
+	}
+
+	return ret;
+}
+
+/*
+ * Wait for an object to become inactive.  This, on it's own, is not race
+ * free: the object is moved by the retire worker off the active list, and
+ * then the iova is put.  Moreover, the object could be re-submitted just
+ * after we notice that it's become inactive.
+ *
+ * Although the retirement happens under the gpu lock, we don't want to hold
+ * that lock in this function while waiting.
+ */
+int etnaviv_gpu_wait_obj_inactive(struct etnaviv_gpu *gpu,
+	struct etnaviv_gem_object *etnaviv_obj, struct timespec *timeout)
+{
+	unsigned long remaining;
+	long ret;
+
+	if (!timeout)
+		return !is_active(etnaviv_obj) ? 0 : -EBUSY;
+
+	remaining = etnaviv_timeout_to_jiffies(timeout);
+
+	ret = wait_event_interruptible_timeout(gpu->fence_event,
+					       !is_active(etnaviv_obj),
+					       remaining);
+	if (ret > 0) {
+		struct etnaviv_drm_private *priv = gpu->drm->dev_private;
+
+		/* Synchronise with the retire worker */
+		flush_workqueue(priv->wq);
+		return 0;
+	} else if (ret == -ERESTARTSYS) {
+		return -ERESTARTSYS;
+	} else {
+		return -ETIMEDOUT;
+	}
+}
+
+int etnaviv_gpu_pm_get_sync(struct etnaviv_gpu *gpu)
+{
+	return pm_runtime_get_sync(gpu->dev);
+}
+
+void etnaviv_gpu_pm_put(struct etnaviv_gpu *gpu)
+{
+	pm_runtime_mark_last_busy(gpu->dev);
+	pm_runtime_put_autosuspend(gpu->dev);
+}
+
+/* add bo's to gpu's ring, and kick gpu: */
+int etnaviv_gpu_submit(struct etnaviv_gpu *gpu,
+	struct etnaviv_gem_submit *submit, struct etnaviv_cmdbuf *cmdbuf)
+{
+	struct fence *fence;
+	unsigned int event, i;
+	int ret;
+
+	ret = etnaviv_gpu_pm_get_sync(gpu);
+	if (ret < 0)
+		return ret;
+
+	mutex_lock(&gpu->lock);
+
+	/*
+	 * TODO
+	 *
+	 * - flush
+	 * - data endian
+	 * - prefetch
+	 *
+	 */
+
+	event = event_alloc(gpu);
+	if (unlikely(event == ~0U)) {
+		DRM_ERROR("no free event\n");
+		ret = -EBUSY;
+		goto out_unlock;
+	}
+
+	fence = etnaviv_gpu_fence_alloc(gpu);
+	if (!fence) {
+		event_free(gpu, event);
+		ret = -ENOMEM;
+		goto out_unlock;
+	}
+
+	gpu->event[event].fence = fence;
+	submit->fence = fence->seqno;
+	gpu->active_fence = submit->fence;
+
+	if (gpu->lastctx != cmdbuf->ctx) {
+		gpu->mmu->need_flush = true;
+		gpu->switch_context = true;
+		gpu->lastctx = cmdbuf->ctx;
+	}
+
+	etnaviv_buffer_queue(gpu, event, cmdbuf);
+
+	cmdbuf->fence = fence;
+	list_add_tail(&cmdbuf->node, &gpu->active_cmd_list);
+
+	/* We're committed to adding this command buffer, hold a PM reference */
+	pm_runtime_get_noresume(gpu->dev);
+
+	for (i = 0; i < submit->nr_bos; i++) {
+		struct etnaviv_gem_object *etnaviv_obj = submit->bos[i].obj;
+		u32 iova;
+
+		/* Each cmdbuf takes a refcount on the iova */
+		etnaviv_gem_get_iova(gpu, &etnaviv_obj->base, &iova);
+		cmdbuf->bo[i] = etnaviv_obj;
+		atomic_inc(&etnaviv_obj->gpu_active);
+
+		if (submit->bos[i].flags & ETNA_SUBMIT_BO_WRITE)
+			reservation_object_add_excl_fence(etnaviv_obj->resv,
+							  fence);
+		else
+			reservation_object_add_shared_fence(etnaviv_obj->resv,
+							    fence);
+	}
+	cmdbuf->nr_bos = submit->nr_bos;
+	hangcheck_timer_reset(gpu);
+	ret = 0;
+
+out_unlock:
+	mutex_unlock(&gpu->lock);
+
+	etnaviv_gpu_pm_put(gpu);
+
+	return ret;
+}
+
+/*
+ * Init/Cleanup:
+ */
+static irqreturn_t irq_handler(int irq, void *data)
+{
+	struct etnaviv_gpu *gpu = data;
+	irqreturn_t ret = IRQ_NONE;
+
+	u32 intr = gpu_read(gpu, VIVS_HI_INTR_ACKNOWLEDGE);
+
+	if (intr != 0) {
+		int event;
+
+		pm_runtime_mark_last_busy(gpu->dev);
+
+		dev_dbg(gpu->dev, "intr 0x%08x\n", intr);
+
+		if (intr & VIVS_HI_INTR_ACKNOWLEDGE_AXI_BUS_ERROR) {
+			dev_err(gpu->dev, "AXI bus error\n");
+			intr &= ~VIVS_HI_INTR_ACKNOWLEDGE_AXI_BUS_ERROR;
+		}
+
+		while ((event = ffs(intr)) != 0) {
+			struct fence *fence;
+
+			event -= 1;
+
+			intr &= ~(1 << event);
+
+			dev_dbg(gpu->dev, "event %u\n", event);
+
+			fence = gpu->event[event].fence;
+			gpu->event[event].fence = NULL;
+			fence_signal(fence);
+
+			/*
+			 * Events can be processed out of order.  Eg,
+			 * - allocate and queue event 0
+			 * - allocate event 1
+			 * - event 0 completes, we process it
+			 * - allocate and queue event 0
+			 * - event 1 and event 0 complete
+			 * we can end up processing event 0 first, then 1.
+			 */
+			if (fence_after(fence->seqno, gpu->completed_fence))
+				gpu->completed_fence = fence->seqno;
+
+			event_free(gpu, event);
+
+			/*
+			 * We need to balance the runtime PM count caused by
+			 * each submission.  Upon submission, we increment
+			 * the runtime PM counter, and allocate one event.
+			 * So here, we put the runtime PM count for each
+			 * completed event.
+			 */
+			pm_runtime_put_autosuspend(gpu->dev);
+		}
+
+		/* Retire the buffer objects in a work */
+		etnaviv_queue_work(gpu->drm, &gpu->retire_work);
+
+		ret = IRQ_HANDLED;
+	}
+
+	return ret;
+}
+
+static int etnaviv_gpu_clk_enable(struct etnaviv_gpu *gpu)
+{
+	int ret;
+
+	ret = enable_clk(gpu);
+	if (ret)
+		return ret;
+
+	ret = enable_axi(gpu);
+	if (ret) {
+		disable_clk(gpu);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int etnaviv_gpu_clk_disable(struct etnaviv_gpu *gpu)
+{
+	int ret;
+
+	ret = disable_axi(gpu);
+	if (ret)
+		return ret;
+
+	ret = disable_clk(gpu);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+static int etnaviv_gpu_hw_suspend(struct etnaviv_gpu *gpu)
+{
+	if (gpu->buffer) {
+		unsigned long timeout;
+
+		/* Replace the last WAIT with END */
+		etnaviv_buffer_end(gpu);
+
+		/*
+		 * We know that only the FE is busy here, this should
+		 * happen quickly (as the WAIT is only 200 cycles).  If
+		 * we fail, just warn and continue.
+		 */
+		timeout = jiffies + msecs_to_jiffies(100);
+		do {
+			u32 idle = gpu_read(gpu, VIVS_HI_IDLE_STATE);
+
+			if ((idle & gpu->idle_mask) == gpu->idle_mask)
+				break;
+
+			if (time_is_before_jiffies(timeout)) {
+				dev_warn(gpu->dev,
+					 "timed out waiting for idle: idle=0x%x\n",
+					 idle);
+				break;
+			}
+
+			udelay(5);
+		} while (1);
+	}
+
+	return etnaviv_gpu_clk_disable(gpu);
+}
+
+#ifdef CONFIG_PM
+static int etnaviv_gpu_hw_resume(struct etnaviv_gpu *gpu)
+{
+	u32 clock;
+	int ret;
+
+	ret = mutex_lock_killable(&gpu->lock);
+	if (ret)
+		return ret;
+
+	clock = VIVS_HI_CLOCK_CONTROL_DISABLE_DEBUG_REGISTERS |
+		VIVS_HI_CLOCK_CONTROL_FSCALE_VAL(0x40);
+
+	etnaviv_gpu_load_clock(gpu, clock);
+	etnaviv_gpu_hw_init(gpu);
+
+	gpu->switch_context = true;
+
+	mutex_unlock(&gpu->lock);
+
+	return 0;
+}
+#endif
+
+static int etnaviv_gpu_bind(struct device *dev, struct device *master,
+	void *data)
+{
+	struct drm_device *drm = data;
+	struct etnaviv_drm_private *priv = drm->dev_private;
+	struct etnaviv_gpu *gpu = dev_get_drvdata(dev);
+	int ret;
+
+#ifdef CONFIG_PM
+	ret = pm_runtime_get_sync(gpu->dev);
+#else
+	ret = etnaviv_gpu_clk_enable(gpu);
+#endif
+	if (ret < 0)
+		return ret;
+
+	gpu->drm = drm;
+	gpu->fence_context = fence_context_alloc(1);
+	spin_lock_init(&gpu->fence_spinlock);
+
+	INIT_LIST_HEAD(&gpu->active_cmd_list);
+	INIT_WORK(&gpu->retire_work, retire_worker);
+	INIT_WORK(&gpu->recover_work, recover_worker);
+	init_waitqueue_head(&gpu->fence_event);
+
+	setup_timer(&gpu->hangcheck_timer, hangcheck_handler,
+			(unsigned long)gpu);
+
+	priv->gpu[priv->num_gpus++] = gpu;
+
+	pm_runtime_mark_last_busy(gpu->dev);
+	pm_runtime_put_autosuspend(gpu->dev);
+
+	return 0;
+}
+
+static void etnaviv_gpu_unbind(struct device *dev, struct device *master,
+	void *data)
+{
+	struct etnaviv_gpu *gpu = dev_get_drvdata(dev);
+
+	DBG("%s", dev_name(gpu->dev));
+
+	hangcheck_disable(gpu);
+
+#ifdef CONFIG_PM
+	pm_runtime_get_sync(gpu->dev);
+	pm_runtime_put_sync_suspend(gpu->dev);
+#else
+	etnaviv_gpu_hw_suspend(gpu);
+#endif
+
+	if (gpu->buffer) {
+		etnaviv_gpu_cmdbuf_free(gpu->buffer);
+		gpu->buffer = NULL;
+	}
+
+	if (gpu->mmu) {
+		etnaviv_iommu_destroy(gpu->mmu);
+		gpu->mmu = NULL;
+	}
+
+	gpu->drm = NULL;
+}
+
+static const struct component_ops gpu_ops = {
+	.bind = etnaviv_gpu_bind,
+	.unbind = etnaviv_gpu_unbind,
+};
+
+static const struct of_device_id etnaviv_gpu_match[] = {
+	{
+		.compatible = "vivante,gc"
+	},
+	{ /* sentinel */ }
+};
+
+static int etnaviv_gpu_platform_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct etnaviv_gpu *gpu;
+	int err = 0;
+
+	gpu = devm_kzalloc(dev, sizeof(*gpu), GFP_KERNEL);
+	if (!gpu)
+		return -ENOMEM;
+
+	gpu->dev = &pdev->dev;
+	mutex_init(&gpu->lock);
+
+	/*
+	 * Set the GPU base address to the start of physical memory.  This
+	 * ensures that if we have up to 2GB, the v1 MMU can address the
+	 * highest memory.  This is important as command buffers may be
+	 * allocated outside of this limit.
+	 */
+	gpu->memory_base = PHYS_OFFSET;
+
+	/* Map registers: */
+	gpu->mmio = etnaviv_ioremap(pdev, NULL, dev_name(gpu->dev));
+	if (IS_ERR(gpu->mmio))
+		return PTR_ERR(gpu->mmio);
+
+	/* Get Interrupt: */
+	gpu->irq = platform_get_irq(pdev, 0);
+	if (gpu->irq < 0) {
+		err = gpu->irq;
+		dev_err(dev, "failed to get irq: %d\n", err);
+		goto fail;
+	}
+
+	err = devm_request_irq(&pdev->dev, gpu->irq, irq_handler, 0,
+			       dev_name(gpu->dev), gpu);
+	if (err) {
+		dev_err(dev, "failed to request IRQ%u: %d\n", gpu->irq, err);
+		goto fail;
+	}
+
+	/* Get Clocks: */
+	gpu->clk_bus = devm_clk_get(&pdev->dev, "bus");
+	DBG("clk_bus: %p", gpu->clk_bus);
+	if (IS_ERR(gpu->clk_bus))
+		gpu->clk_bus = NULL;
+
+	gpu->clk_core = devm_clk_get(&pdev->dev, "core");
+	DBG("clk_core: %p", gpu->clk_core);
+	if (IS_ERR(gpu->clk_core))
+		gpu->clk_core = NULL;
+
+	gpu->clk_shader = devm_clk_get(&pdev->dev, "shader");
+	DBG("clk_shader: %p", gpu->clk_shader);
+	if (IS_ERR(gpu->clk_shader))
+		gpu->clk_shader = NULL;
+
+	/* TODO: figure out max mapped size */
+	dev_set_drvdata(dev, gpu);
+
+	/*
+	 * We treat the device as initially suspended.  The runtime PM
+	 * autosuspend delay is rather arbitary: no measurements have
+	 * yet been performed to determine an appropriate value.
+	 */
+	pm_runtime_use_autosuspend(gpu->dev);
+	pm_runtime_set_autosuspend_delay(gpu->dev, 200);
+	pm_runtime_enable(gpu->dev);
+
+	err = component_add(&pdev->dev, &gpu_ops);
+	if (err < 0) {
+		dev_err(&pdev->dev, "failed to register component: %d\n", err);
+		goto fail;
+	}
+
+	return 0;
+
+fail:
+	return err;
+}
+
+static int etnaviv_gpu_platform_remove(struct platform_device *pdev)
+{
+	component_del(&pdev->dev, &gpu_ops);
+	pm_runtime_disable(&pdev->dev);
+	return 0;
+}
+
+#ifdef CONFIG_PM
+static int etnaviv_gpu_rpm_suspend(struct device *dev)
+{
+	struct etnaviv_gpu *gpu = dev_get_drvdata(dev);
+	u32 idle, mask;
+
+	/* If we have outstanding fences, we're not idle */
+	if (gpu->completed_fence != gpu->active_fence)
+		return -EBUSY;
+
+	/* Check whether the hardware (except FE) is idle */
+	mask = gpu->idle_mask & ~VIVS_HI_IDLE_STATE_FE;
+	idle = gpu_read(gpu, VIVS_HI_IDLE_STATE) & mask;
+	if (idle != mask)
+		return -EBUSY;
+
+	return etnaviv_gpu_hw_suspend(gpu);
+}
+
+static int etnaviv_gpu_rpm_resume(struct device *dev)
+{
+	struct etnaviv_gpu *gpu = dev_get_drvdata(dev);
+	int ret;
+
+	ret = etnaviv_gpu_clk_enable(gpu);
+	if (ret)
+		return ret;
+
+	/* Re-initialise the basic hardware state */
+	if (gpu->drm && gpu->buffer) {
+		ret = etnaviv_gpu_hw_resume(gpu);
+		if (ret) {
+			etnaviv_gpu_clk_disable(gpu);
+			return ret;
+		}
+	}
+
+	return 0;
+}
+#endif
+
+static const struct dev_pm_ops etnaviv_gpu_pm_ops = {
+	SET_RUNTIME_PM_OPS(etnaviv_gpu_rpm_suspend, etnaviv_gpu_rpm_resume,
+			   NULL)
+};
+
+struct platform_driver etnaviv_gpu_driver = {
+	.driver = {
+		.name = "etnaviv-gpu",
+		.owner = THIS_MODULE,
+		.pm = &etnaviv_gpu_pm_ops,
+		.of_match_table = etnaviv_gpu_match,
+	},
+	.probe = etnaviv_gpu_platform_probe,
+	.remove = etnaviv_gpu_platform_remove,
+	.id_table = gpu_ids,
+};
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h
new file mode 100644
index 000000000000..c75d50359ab0
--- /dev/null
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h
@@ -0,0 +1,209 @@
+/*
+ * Copyright (C) 2015 Etnaviv Project
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ETNAVIV_GPU_H__
+#define __ETNAVIV_GPU_H__
+
+#include <linux/clk.h>
+#include <linux/regulator/consumer.h>
+
+#include "etnaviv_drv.h"
+
+struct etnaviv_gem_submit;
+
+struct etnaviv_chip_identity {
+	/* Chip model. */
+	u32 model;
+
+	/* Revision value.*/
+	u32 revision;
+
+	/* Supported feature fields. */
+	u32 features;
+
+	/* Supported minor feature fields. */
+	u32 minor_features0;
+
+	/* Supported minor feature 1 fields. */
+	u32 minor_features1;
+
+	/* Supported minor feature 2 fields. */
+	u32 minor_features2;
+
+	/* Supported minor feature 3 fields. */
+	u32 minor_features3;
+
+	/* Number of streams supported. */
+	u32 stream_count;
+
+	/* Total number of temporary registers per thread. */
+	u32 register_max;
+
+	/* Maximum number of threads. */
+	u32 thread_count;
+
+	/* Number of shader cores. */
+	u32 shader_core_count;
+
+	/* Size of the vertex cache. */
+	u32 vertex_cache_size;
+
+	/* Number of entries in the vertex output buffer. */
+	u32 vertex_output_buffer_size;
+
+	/* Number of pixel pipes. */
+	u32 pixel_pipes;
+
+	/* Number of instructions. */
+	u32 instruction_count;
+
+	/* Number of constants. */
+	u32 num_constants;
+
+	/* Buffer size */
+	u32 buffer_size;
+};
+
+struct etnaviv_event {
+	bool used;
+	struct fence *fence;
+};
+
+struct etnaviv_cmdbuf;
+
+struct etnaviv_gpu {
+	struct drm_device *drm;
+	struct device *dev;
+	struct mutex lock;
+	struct etnaviv_chip_identity identity;
+	struct etnaviv_file_private *lastctx;
+	bool switch_context;
+
+	/* 'ring'-buffer: */
+	struct etnaviv_cmdbuf *buffer;
+
+	/* bus base address of memory  */
+	u32 memory_base;
+
+	/* event management: */
+	struct etnaviv_event event[30];
+	struct completion event_free;
+	spinlock_t event_spinlock;
+
+	/* list of currently in-flight command buffers */
+	struct list_head active_cmd_list;
+
+	u32 idle_mask;
+
+	/* Fencing support */
+	u32 next_fence;
+	u32 active_fence;
+	u32 completed_fence;
+	u32 retired_fence;
+	wait_queue_head_t fence_event;
+	unsigned int fence_context;
+	spinlock_t fence_spinlock;
+
+	/* worker for handling active-list retiring: */
+	struct work_struct retire_work;
+
+	void __iomem *mmio;
+	int irq;
+
+	struct etnaviv_iommu *mmu;
+
+	/* Power Control: */
+	struct clk *clk_bus;
+	struct clk *clk_core;
+	struct clk *clk_shader;
+
+	/* Hang Detction: */
+#define DRM_ETNAVIV_HANGCHECK_PERIOD 500 /* in ms */
+#define DRM_ETNAVIV_HANGCHECK_JIFFIES msecs_to_jiffies(DRM_ETNAVIV_HANGCHECK_PERIOD)
+	struct timer_list hangcheck_timer;
+	u32 hangcheck_fence;
+	u32 hangcheck_dma_addr;
+	struct work_struct recover_work;
+};
+
+struct etnaviv_cmdbuf {
+	/* device this cmdbuf is allocated for */
+	struct etnaviv_gpu *gpu;
+	/* user context key, must be unique between all active users */
+	struct etnaviv_file_private *ctx;
+	/* cmdbuf properties */
+	void *vaddr;
+	dma_addr_t paddr;
+	u32 size;
+	u32 user_size;
+	/* fence after which this buffer is to be disposed */
+	struct fence *fence;
+	/* target exec state */
+	u32 exec_state;
+	/* per GPU in-flight list */
+	struct list_head node;
+	/* BOs attached to this command buffer */
+	unsigned int nr_bos;
+	struct etnaviv_gem_object *bo[0];
+};
+
+static inline void gpu_write(struct etnaviv_gpu *gpu, u32 reg, u32 data)
+{
+	etnaviv_writel(data, gpu->mmio + reg);
+}
+
+static inline u32 gpu_read(struct etnaviv_gpu *gpu, u32 reg)
+{
+	return etnaviv_readl(gpu->mmio + reg);
+}
+
+static inline bool fence_completed(struct etnaviv_gpu *gpu, u32 fence)
+{
+	return fence_after_eq(gpu->completed_fence, fence);
+}
+
+static inline bool fence_retired(struct etnaviv_gpu *gpu, u32 fence)
+{
+	return fence_after_eq(gpu->retired_fence, fence);
+}
+
+int etnaviv_gpu_get_param(struct etnaviv_gpu *gpu, u32 param, u64 *value);
+
+int etnaviv_gpu_init(struct etnaviv_gpu *gpu);
+
+#ifdef CONFIG_DEBUG_FS
+int etnaviv_gpu_debugfs(struct etnaviv_gpu *gpu, struct seq_file *m);
+#endif
+
+int etnaviv_gpu_fence_sync_obj(struct etnaviv_gem_object *etnaviv_obj,
+	unsigned int context, bool exclusive);
+
+void etnaviv_gpu_retire(struct etnaviv_gpu *gpu);
+int etnaviv_gpu_wait_fence_interruptible(struct etnaviv_gpu *gpu,
+	u32 fence, struct timespec *timeout);
+int etnaviv_gpu_wait_obj_inactive(struct etnaviv_gpu *gpu,
+	struct etnaviv_gem_object *etnaviv_obj, struct timespec *timeout);
+int etnaviv_gpu_submit(struct etnaviv_gpu *gpu,
+	struct etnaviv_gem_submit *submit, struct etnaviv_cmdbuf *cmdbuf);
+struct etnaviv_cmdbuf *etnaviv_gpu_cmdbuf_new(struct etnaviv_gpu *gpu,
+					      u32 size, size_t nr_bos);
+void etnaviv_gpu_cmdbuf_free(struct etnaviv_cmdbuf *cmdbuf);
+int etnaviv_gpu_pm_get_sync(struct etnaviv_gpu *gpu);
+void etnaviv_gpu_pm_put(struct etnaviv_gpu *gpu);
+
+extern struct platform_driver etnaviv_gpu_driver;
+
+#endif /* __ETNAVIV_GPU_H__ */
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_iommu.c b/drivers/gpu/drm/etnaviv/etnaviv_iommu.c
new file mode 100644
index 000000000000..522cfd447892
--- /dev/null
+++ b/drivers/gpu/drm/etnaviv/etnaviv_iommu.c
@@ -0,0 +1,240 @@
+/*
+ * Copyright (C) 2014 Christian Gmeiner <christian.gmeiner@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/iommu.h>
+#include <linux/platform_device.h>
+#include <linux/sizes.h>
+#include <linux/slab.h>
+#include <linux/dma-mapping.h>
+#include <linux/bitops.h>
+
+#include "etnaviv_gpu.h"
+#include "etnaviv_mmu.h"
+#include "etnaviv_iommu.h"
+#include "state_hi.xml.h"
+
+#define PT_SIZE		SZ_2M
+#define PT_ENTRIES	(PT_SIZE / sizeof(u32))
+
+#define GPU_MEM_START	0x80000000
+
+struct etnaviv_iommu_domain_pgtable {
+	u32 *pgtable;
+	dma_addr_t paddr;
+};
+
+struct etnaviv_iommu_domain {
+	struct iommu_domain domain;
+	struct device *dev;
+	void *bad_page_cpu;
+	dma_addr_t bad_page_dma;
+	struct etnaviv_iommu_domain_pgtable pgtable;
+	spinlock_t map_lock;
+};
+
+static struct etnaviv_iommu_domain *to_etnaviv_domain(struct iommu_domain *domain)
+{
+	return container_of(domain, struct etnaviv_iommu_domain, domain);
+}
+
+static int pgtable_alloc(struct etnaviv_iommu_domain_pgtable *pgtable,
+			 size_t size)
+{
+	pgtable->pgtable = dma_alloc_coherent(NULL, size, &pgtable->paddr, GFP_KERNEL);
+	if (!pgtable->pgtable)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static void pgtable_free(struct etnaviv_iommu_domain_pgtable *pgtable,
+			 size_t size)
+{
+	dma_free_coherent(NULL, size, pgtable->pgtable, pgtable->paddr);
+}
+
+static u32 pgtable_read(struct etnaviv_iommu_domain_pgtable *pgtable,
+			   unsigned long iova)
+{
+	/* calcuate index into page table */
+	unsigned int index = (iova - GPU_MEM_START) / SZ_4K;
+	phys_addr_t paddr;
+
+	paddr = pgtable->pgtable[index];
+
+	return paddr;
+}
+
+static void pgtable_write(struct etnaviv_iommu_domain_pgtable *pgtable,
+			  unsigned long iova, phys_addr_t paddr)
+{
+	/* calcuate index into page table */
+	unsigned int index = (iova - GPU_MEM_START) / SZ_4K;
+
+	pgtable->pgtable[index] = paddr;
+}
+
+static int __etnaviv_iommu_init(struct etnaviv_iommu_domain *etnaviv_domain)
+{
+	u32 *p;
+	int ret, i;
+
+	etnaviv_domain->bad_page_cpu = dma_alloc_coherent(etnaviv_domain->dev,
+						  SZ_4K,
+						  &etnaviv_domain->bad_page_dma,
+						  GFP_KERNEL);
+	if (!etnaviv_domain->bad_page_cpu)
+		return -ENOMEM;
+
+	p = etnaviv_domain->bad_page_cpu;
+	for (i = 0; i < SZ_4K / 4; i++)
+		*p++ = 0xdead55aa;
+
+	ret = pgtable_alloc(&etnaviv_domain->pgtable, PT_SIZE);
+	if (ret < 0) {
+		dma_free_coherent(etnaviv_domain->dev, SZ_4K,
+				  etnaviv_domain->bad_page_cpu,
+				  etnaviv_domain->bad_page_dma);
+		return ret;
+	}
+
+	for (i = 0; i < PT_ENTRIES; i++)
+		etnaviv_domain->pgtable.pgtable[i] =
+			etnaviv_domain->bad_page_dma;
+
+	spin_lock_init(&etnaviv_domain->map_lock);
+
+	return 0;
+}
+
+static void etnaviv_domain_free(struct iommu_domain *domain)
+{
+	struct etnaviv_iommu_domain *etnaviv_domain = to_etnaviv_domain(domain);
+
+	pgtable_free(&etnaviv_domain->pgtable, PT_SIZE);
+
+	dma_free_coherent(etnaviv_domain->dev, SZ_4K,
+			  etnaviv_domain->bad_page_cpu,
+			  etnaviv_domain->bad_page_dma);
+
+	kfree(etnaviv_domain);
+}
+
+static int etnaviv_iommuv1_map(struct iommu_domain *domain, unsigned long iova,
+	   phys_addr_t paddr, size_t size, int prot)
+{
+	struct etnaviv_iommu_domain *etnaviv_domain = to_etnaviv_domain(domain);
+
+	if (size != SZ_4K)
+		return -EINVAL;
+
+	spin_lock(&etnaviv_domain->map_lock);
+	pgtable_write(&etnaviv_domain->pgtable, iova, paddr);
+	spin_unlock(&etnaviv_domain->map_lock);
+
+	return 0;
+}
+
+static size_t etnaviv_iommuv1_unmap(struct iommu_domain *domain,
+	unsigned long iova, size_t size)
+{
+	struct etnaviv_iommu_domain *etnaviv_domain = to_etnaviv_domain(domain);
+
+	if (size != SZ_4K)
+		return -EINVAL;
+
+	spin_lock(&etnaviv_domain->map_lock);
+	pgtable_write(&etnaviv_domain->pgtable, iova,
+		      etnaviv_domain->bad_page_dma);
+	spin_unlock(&etnaviv_domain->map_lock);
+
+	return SZ_4K;
+}
+
+static phys_addr_t etnaviv_iommu_iova_to_phys(struct iommu_domain *domain,
+	dma_addr_t iova)
+{
+	struct etnaviv_iommu_domain *etnaviv_domain = to_etnaviv_domain(domain);
+
+	return pgtable_read(&etnaviv_domain->pgtable, iova);
+}
+
+static size_t etnaviv_iommuv1_dump_size(struct iommu_domain *domain)
+{
+	return PT_SIZE;
+}
+
+static void etnaviv_iommuv1_dump(struct iommu_domain *domain, void *buf)
+{
+	struct etnaviv_iommu_domain *etnaviv_domain = to_etnaviv_domain(domain);
+
+	memcpy(buf, etnaviv_domain->pgtable.pgtable, PT_SIZE);
+}
+
+static struct etnaviv_iommu_ops etnaviv_iommu_ops = {
+	.ops = {
+		.domain_free = etnaviv_domain_free,
+		.map = etnaviv_iommuv1_map,
+		.unmap = etnaviv_iommuv1_unmap,
+		.iova_to_phys = etnaviv_iommu_iova_to_phys,
+		.pgsize_bitmap = SZ_4K,
+	},
+	.dump_size = etnaviv_iommuv1_dump_size,
+	.dump = etnaviv_iommuv1_dump,
+};
+
+void etnaviv_iommu_domain_restore(struct etnaviv_gpu *gpu,
+	struct iommu_domain *domain)
+{
+	struct etnaviv_iommu_domain *etnaviv_domain = to_etnaviv_domain(domain);
+	u32 pgtable;
+
+	/* set page table address in MC */
+	pgtable = (u32)etnaviv_domain->pgtable.paddr;
+
+	gpu_write(gpu, VIVS_MC_MMU_FE_PAGE_TABLE, pgtable);
+	gpu_write(gpu, VIVS_MC_MMU_TX_PAGE_TABLE, pgtable);
+	gpu_write(gpu, VIVS_MC_MMU_PE_PAGE_TABLE, pgtable);
+	gpu_write(gpu, VIVS_MC_MMU_PEZ_PAGE_TABLE, pgtable);
+	gpu_write(gpu, VIVS_MC_MMU_RA_PAGE_TABLE, pgtable);
+}
+
+struct iommu_domain *etnaviv_iommu_domain_alloc(struct etnaviv_gpu *gpu)
+{
+	struct etnaviv_iommu_domain *etnaviv_domain;
+	int ret;
+
+	etnaviv_domain = kzalloc(sizeof(*etnaviv_domain), GFP_KERNEL);
+	if (!etnaviv_domain)
+		return NULL;
+
+	etnaviv_domain->dev = gpu->dev;
+
+	etnaviv_domain->domain.type = __IOMMU_DOMAIN_PAGING;
+	etnaviv_domain->domain.ops = &etnaviv_iommu_ops.ops;
+	etnaviv_domain->domain.geometry.aperture_start = GPU_MEM_START;
+	etnaviv_domain->domain.geometry.aperture_end = GPU_MEM_START + PT_ENTRIES * SZ_4K - 1;
+
+	ret = __etnaviv_iommu_init(etnaviv_domain);
+	if (ret)
+		goto out_free;
+
+	return &etnaviv_domain->domain;
+
+out_free:
+	kfree(etnaviv_domain);
+	return NULL;
+}
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_iommu.h b/drivers/gpu/drm/etnaviv/etnaviv_iommu.h
new file mode 100644
index 000000000000..cf45503f6b6f
--- /dev/null
+++ b/drivers/gpu/drm/etnaviv/etnaviv_iommu.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright (C) 2014 Christian Gmeiner <christian.gmeiner@gmail.com>
+  *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ETNAVIV_IOMMU_H__
+#define __ETNAVIV_IOMMU_H__
+
+#include <linux/iommu.h>
+struct etnaviv_gpu;
+
+struct iommu_domain *etnaviv_iommu_domain_alloc(struct etnaviv_gpu *gpu);
+void etnaviv_iommu_domain_restore(struct etnaviv_gpu *gpu,
+	struct iommu_domain *domain);
+struct iommu_domain *etnaviv_iommu_v2_domain_alloc(struct etnaviv_gpu *gpu);
+
+#endif /* __ETNAVIV_IOMMU_H__ */
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_iommu_v2.c b/drivers/gpu/drm/etnaviv/etnaviv_iommu_v2.c
new file mode 100644
index 000000000000..fbb4aed3dc80
--- /dev/null
+++ b/drivers/gpu/drm/etnaviv/etnaviv_iommu_v2.c
@@ -0,0 +1,33 @@
+/*
+ * Copyright (C) 2014 Christian Gmeiner <christian.gmeiner@gmail.com>
+  *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/iommu.h>
+#include <linux/platform_device.h>
+#include <linux/sizes.h>
+#include <linux/slab.h>
+#include <linux/dma-mapping.h>
+#include <linux/bitops.h>
+
+#include "etnaviv_gpu.h"
+#include "etnaviv_iommu.h"
+#include "state_hi.xml.h"
+
+
+struct iommu_domain *etnaviv_iommu_v2_domain_alloc(struct etnaviv_gpu *gpu)
+{
+	/* TODO */
+	return NULL;
+}
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_iommu_v2.h b/drivers/gpu/drm/etnaviv/etnaviv_iommu_v2.h
new file mode 100644
index 000000000000..603ea41c5389
--- /dev/null
+++ b/drivers/gpu/drm/etnaviv/etnaviv_iommu_v2.h
@@ -0,0 +1,25 @@
+/*
+ * Copyright (C) 2014 Christian Gmeiner <christian.gmeiner@gmail.com>
+  *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ETNAVIV_IOMMU_V2_H__
+#define __ETNAVIV_IOMMU_V2_H__
+
+#include <linux/iommu.h>
+struct etnaviv_gpu;
+
+struct iommu_domain *etnaviv_iommu_v2_domain_alloc(struct etnaviv_gpu *gpu);
+
+#endif /* __ETNAVIV_IOMMU_V2_H__ */
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_mmu.c b/drivers/gpu/drm/etnaviv/etnaviv_mmu.c
new file mode 100644
index 000000000000..6743bc648dc8
--- /dev/null
+++ b/drivers/gpu/drm/etnaviv/etnaviv_mmu.c
@@ -0,0 +1,299 @@
+/*
+ * Copyright (C) 2015 Etnaviv Project
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "etnaviv_drv.h"
+#include "etnaviv_gem.h"
+#include "etnaviv_gpu.h"
+#include "etnaviv_mmu.h"
+
+static int etnaviv_fault_handler(struct iommu_domain *iommu, struct device *dev,
+		unsigned long iova, int flags, void *arg)
+{
+	DBG("*** fault: iova=%08lx, flags=%d", iova, flags);
+	return 0;
+}
+
+int etnaviv_iommu_map(struct etnaviv_iommu *iommu, u32 iova,
+		struct sg_table *sgt, unsigned len, int prot)
+{
+	struct iommu_domain *domain = iommu->domain;
+	struct scatterlist *sg;
+	unsigned int da = iova;
+	unsigned int i, j;
+	int ret;
+
+	if (!domain || !sgt)
+		return -EINVAL;
+
+	for_each_sg(sgt->sgl, sg, sgt->nents, i) {
+		u32 pa = sg_dma_address(sg) - sg->offset;
+		size_t bytes = sg_dma_len(sg) + sg->offset;
+
+		VERB("map[%d]: %08x %08x(%zx)", i, iova, pa, bytes);
+
+		ret = iommu_map(domain, da, pa, bytes, prot);
+		if (ret)
+			goto fail;
+
+		da += bytes;
+	}
+
+	return 0;
+
+fail:
+	da = iova;
+
+	for_each_sg(sgt->sgl, sg, i, j) {
+		size_t bytes = sg_dma_len(sg) + sg->offset;
+
+		iommu_unmap(domain, da, bytes);
+		da += bytes;
+	}
+	return ret;
+}
+
+int etnaviv_iommu_unmap(struct etnaviv_iommu *iommu, u32 iova,
+		struct sg_table *sgt, unsigned len)
+{
+	struct iommu_domain *domain = iommu->domain;
+	struct scatterlist *sg;
+	unsigned int da = iova;
+	int i;
+
+	for_each_sg(sgt->sgl, sg, sgt->nents, i) {
+		size_t bytes = sg_dma_len(sg) + sg->offset;
+		size_t unmapped;
+
+		unmapped = iommu_unmap(domain, da, bytes);
+		if (unmapped < bytes)
+			return unmapped;
+
+		VERB("unmap[%d]: %08x(%zx)", i, iova, bytes);
+
+		BUG_ON(!PAGE_ALIGNED(bytes));
+
+		da += bytes;
+	}
+
+	return 0;
+}
+
+static void etnaviv_iommu_remove_mapping(struct etnaviv_iommu *mmu,
+	struct etnaviv_vram_mapping *mapping)
+{
+	struct etnaviv_gem_object *etnaviv_obj = mapping->object;
+
+	etnaviv_iommu_unmap(mmu, mapping->vram_node.start,
+			    etnaviv_obj->sgt, etnaviv_obj->base.size);
+	drm_mm_remove_node(&mapping->vram_node);
+}
+
+int etnaviv_iommu_map_gem(struct etnaviv_iommu *mmu,
+	struct etnaviv_gem_object *etnaviv_obj, u32 memory_base,
+	struct etnaviv_vram_mapping *mapping)
+{
+	struct etnaviv_vram_mapping *free = NULL;
+	struct sg_table *sgt = etnaviv_obj->sgt;
+	struct drm_mm_node *node;
+	int ret;
+
+	lockdep_assert_held(&etnaviv_obj->lock);
+
+	mutex_lock(&mmu->lock);
+
+	/* v1 MMU can optimize single entry (contiguous) scatterlists */
+	if (sgt->nents == 1 && !(etnaviv_obj->flags & ETNA_BO_FORCE_MMU)) {
+		u32 iova;
+
+		iova = sg_dma_address(sgt->sgl) - memory_base;
+		if (iova < 0x80000000 - sg_dma_len(sgt->sgl)) {
+			mapping->iova = iova;
+			list_add_tail(&mapping->mmu_node, &mmu->mappings);
+			mutex_unlock(&mmu->lock);
+			return 0;
+		}
+	}
+
+	node = &mapping->vram_node;
+	while (1) {
+		struct etnaviv_vram_mapping *m, *n;
+		struct list_head list;
+		bool found;
+
+		ret = drm_mm_insert_node_in_range(&mmu->mm, node,
+			etnaviv_obj->base.size, 0, mmu->last_iova, ~0UL,
+			DRM_MM_SEARCH_DEFAULT);
+
+		if (ret != -ENOSPC)
+			break;
+
+		/*
+		 * If we did not search from the start of the MMU region,
+		 * try again in case there are free slots.
+		 */
+		if (mmu->last_iova) {
+			mmu->last_iova = 0;
+			mmu->need_flush = true;
+			continue;
+		}
+
+		/* Try to retire some entries */
+		drm_mm_init_scan(&mmu->mm, etnaviv_obj->base.size, 0, 0);
+
+		found = 0;
+		INIT_LIST_HEAD(&list);
+		list_for_each_entry(free, &mmu->mappings, mmu_node) {
+			/* If this vram node has not been used, skip this. */
+			if (!free->vram_node.mm)
+				continue;
+
+			/*
+			 * If the iova is pinned, then it's in-use,
+			 * so we must keep its mapping.
+			 */
+			if (free->use)
+				continue;
+
+			list_add(&free->scan_node, &list);
+			if (drm_mm_scan_add_block(&free->vram_node)) {
+				found = true;
+				break;
+			}
+		}
+
+		if (!found) {
+			/* Nothing found, clean up and fail */
+			list_for_each_entry_safe(m, n, &list, scan_node)
+				BUG_ON(drm_mm_scan_remove_block(&m->vram_node));
+			break;
+		}
+
+		/*
+		 * drm_mm does not allow any other operations while
+		 * scanning, so we have to remove all blocks first.
+		 * If drm_mm_scan_remove_block() returns false, we
+		 * can leave the block pinned.
+		 */
+		list_for_each_entry_safe(m, n, &list, scan_node)
+			if (!drm_mm_scan_remove_block(&m->vram_node))
+				list_del_init(&m->scan_node);
+
+		/*
+		 * Unmap the blocks which need to be reaped from the MMU.
+		 * Clear the mmu pointer to prevent the get_iova finding
+		 * this mapping.
+		 */
+		list_for_each_entry_safe(m, n, &list, scan_node) {
+			etnaviv_iommu_remove_mapping(mmu, m);
+			m->mmu = NULL;
+			list_del_init(&m->mmu_node);
+			list_del_init(&m->scan_node);
+		}
+
+		/*
+		 * We removed enough mappings so that the new allocation will
+		 * succeed.  Ensure that the MMU will be flushed before the
+		 * associated commit requesting this mapping, and retry the
+		 * allocation one more time.
+		 */
+		mmu->need_flush = true;
+	}
+
+	if (ret < 0) {
+		mutex_unlock(&mmu->lock);
+		return ret;
+	}
+
+	mmu->last_iova = node->start + etnaviv_obj->base.size;
+	mapping->iova = node->start;
+	ret = etnaviv_iommu_map(mmu, node->start, sgt, etnaviv_obj->base.size,
+				IOMMU_READ | IOMMU_WRITE);
+
+	if (ret < 0) {
+		drm_mm_remove_node(node);
+		mutex_unlock(&mmu->lock);
+		return ret;
+	}
+
+	list_add_tail(&mapping->mmu_node, &mmu->mappings);
+	mutex_unlock(&mmu->lock);
+
+	return ret;
+}
+
+void etnaviv_iommu_unmap_gem(struct etnaviv_iommu *mmu,
+	struct etnaviv_vram_mapping *mapping)
+{
+	WARN_ON(mapping->use);
+
+	mutex_lock(&mmu->lock);
+
+	/* If the vram node is on the mm, unmap and remove the node */
+	if (mapping->vram_node.mm == &mmu->mm)
+		etnaviv_iommu_remove_mapping(mmu, mapping);
+
+	list_del(&mapping->mmu_node);
+	mutex_unlock(&mmu->lock);
+}
+
+void etnaviv_iommu_destroy(struct etnaviv_iommu *mmu)
+{
+	drm_mm_takedown(&mmu->mm);
+	iommu_domain_free(mmu->domain);
+	kfree(mmu);
+}
+
+struct etnaviv_iommu *etnaviv_iommu_new(struct etnaviv_gpu *gpu,
+	struct iommu_domain *domain, enum etnaviv_iommu_version version)
+{
+	struct etnaviv_iommu *mmu;
+
+	mmu = kzalloc(sizeof(*mmu), GFP_KERNEL);
+	if (!mmu)
+		return ERR_PTR(-ENOMEM);
+
+	mmu->domain = domain;
+	mmu->gpu = gpu;
+	mmu->version = version;
+	mutex_init(&mmu->lock);
+	INIT_LIST_HEAD(&mmu->mappings);
+
+	drm_mm_init(&mmu->mm, domain->geometry.aperture_start,
+		    domain->geometry.aperture_end -
+		      domain->geometry.aperture_start + 1);
+
+	iommu_set_fault_handler(domain, etnaviv_fault_handler, gpu->dev);
+
+	return mmu;
+}
+
+size_t etnaviv_iommu_dump_size(struct etnaviv_iommu *iommu)
+{
+	struct etnaviv_iommu_ops *ops;
+
+	ops = container_of(iommu->domain->ops, struct etnaviv_iommu_ops, ops);
+
+	return ops->dump_size(iommu->domain);
+}
+
+void etnaviv_iommu_dump(struct etnaviv_iommu *iommu, void *buf)
+{
+	struct etnaviv_iommu_ops *ops;
+
+	ops = container_of(iommu->domain->ops, struct etnaviv_iommu_ops, ops);
+
+	ops->dump(iommu->domain, buf);
+}
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_mmu.h b/drivers/gpu/drm/etnaviv/etnaviv_mmu.h
new file mode 100644
index 000000000000..fff215a47630
--- /dev/null
+++ b/drivers/gpu/drm/etnaviv/etnaviv_mmu.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright (C) 2015 Etnaviv Project
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ETNAVIV_MMU_H__
+#define __ETNAVIV_MMU_H__
+
+#include <linux/iommu.h>
+
+enum etnaviv_iommu_version {
+	ETNAVIV_IOMMU_V1 = 0,
+	ETNAVIV_IOMMU_V2,
+};
+
+struct etnaviv_gpu;
+struct etnaviv_vram_mapping;
+
+struct etnaviv_iommu_ops {
+	struct iommu_ops ops;
+	size_t (*dump_size)(struct iommu_domain *);
+	void (*dump)(struct iommu_domain *, void *);
+};
+
+struct etnaviv_iommu {
+	struct etnaviv_gpu *gpu;
+	struct iommu_domain *domain;
+
+	enum etnaviv_iommu_version version;
+
+	/* memory manager for GPU address area */
+	struct mutex lock;
+	struct list_head mappings;
+	struct drm_mm mm;
+	u32 last_iova;
+	bool need_flush;
+};
+
+struct etnaviv_gem_object;
+
+int etnaviv_iommu_attach(struct etnaviv_iommu *iommu, const char **names,
+	int cnt);
+int etnaviv_iommu_map(struct etnaviv_iommu *iommu, u32 iova,
+	struct sg_table *sgt, unsigned len, int prot);
+int etnaviv_iommu_unmap(struct etnaviv_iommu *iommu, u32 iova,
+	struct sg_table *sgt, unsigned len);
+int etnaviv_iommu_map_gem(struct etnaviv_iommu *mmu,
+	struct etnaviv_gem_object *etnaviv_obj, u32 memory_base,
+	struct etnaviv_vram_mapping *mapping);
+void etnaviv_iommu_unmap_gem(struct etnaviv_iommu *mmu,
+	struct etnaviv_vram_mapping *mapping);
+void etnaviv_iommu_destroy(struct etnaviv_iommu *iommu);
+
+size_t etnaviv_iommu_dump_size(struct etnaviv_iommu *iommu);
+void etnaviv_iommu_dump(struct etnaviv_iommu *iommu, void *buf);
+
+struct etnaviv_iommu *etnaviv_iommu_new(struct etnaviv_gpu *gpu,
+	struct iommu_domain *domain, enum etnaviv_iommu_version version);
+
+#endif /* __ETNAVIV_MMU_H__ */
diff --git a/drivers/gpu/drm/etnaviv/state.xml.h b/drivers/gpu/drm/etnaviv/state.xml.h
new file mode 100644
index 000000000000..368218304566
--- /dev/null
+++ b/drivers/gpu/drm/etnaviv/state.xml.h
@@ -0,0 +1,351 @@
+#ifndef STATE_XML
+#define STATE_XML
+
+/* Autogenerated file, DO NOT EDIT manually!
+
+This file was generated by the rules-ng-ng headergen tool in this git repository:
+http://0x04.net/cgit/index.cgi/rules-ng-ng
+git clone git://0x04.net/rules-ng-ng
+
+The rules-ng-ng source files this header was generated from are:
+- state.xml    (  18882 bytes, from 2015-03-25 11:42:32)
+- common.xml   (  18437 bytes, from 2015-03-25 11:27:41)
+- state_hi.xml (  23420 bytes, from 2015-03-25 11:47:21)
+- state_2d.xml (  51549 bytes, from 2015-03-25 11:25:06)
+- state_3d.xml (  54600 bytes, from 2015-03-25 11:25:19)
+- state_vg.xml (   5973 bytes, from 2015-03-25 11:26:01)
+
+Copyright (C) 2015
+*/
+
+
+#define VARYING_COMPONENT_USE_UNUSED				0x00000000
+#define VARYING_COMPONENT_USE_USED				0x00000001
+#define VARYING_COMPONENT_USE_POINTCOORD_X			0x00000002
+#define VARYING_COMPONENT_USE_POINTCOORD_Y			0x00000003
+#define FE_VERTEX_STREAM_CONTROL_VERTEX_STRIDE__MASK		0x000000ff
+#define FE_VERTEX_STREAM_CONTROL_VERTEX_STRIDE__SHIFT		0
+#define FE_VERTEX_STREAM_CONTROL_VERTEX_STRIDE(x)		(((x) << FE_VERTEX_STREAM_CONTROL_VERTEX_STRIDE__SHIFT) & FE_VERTEX_STREAM_CONTROL_VERTEX_STRIDE__MASK)
+#define VIVS_FE							0x00000000
+
+#define VIVS_FE_VERTEX_ELEMENT_CONFIG(i0)		       (0x00000600 + 0x4*(i0))
+#define VIVS_FE_VERTEX_ELEMENT_CONFIG__ESIZE			0x00000004
+#define VIVS_FE_VERTEX_ELEMENT_CONFIG__LEN			0x00000010
+#define VIVS_FE_VERTEX_ELEMENT_CONFIG_TYPE__MASK		0x0000000f
+#define VIVS_FE_VERTEX_ELEMENT_CONFIG_TYPE__SHIFT		0
+#define VIVS_FE_VERTEX_ELEMENT_CONFIG_TYPE_BYTE			0x00000000
+#define VIVS_FE_VERTEX_ELEMENT_CONFIG_TYPE_UNSIGNED_BYTE	0x00000001
+#define VIVS_FE_VERTEX_ELEMENT_CONFIG_TYPE_SHORT		0x00000002
+#define VIVS_FE_VERTEX_ELEMENT_CONFIG_TYPE_UNSIGNED_SHORT	0x00000003
+#define VIVS_FE_VERTEX_ELEMENT_CONFIG_TYPE_INT			0x00000004
+#define VIVS_FE_VERTEX_ELEMENT_CONFIG_TYPE_UNSIGNED_INT		0x00000005
+#define VIVS_FE_VERTEX_ELEMENT_CONFIG_TYPE_FLOAT		0x00000008
+#define VIVS_FE_VERTEX_ELEMENT_CONFIG_TYPE_HALF_FLOAT		0x00000009
+#define VIVS_FE_VERTEX_ELEMENT_CONFIG_TYPE_FIXED		0x0000000b
+#define VIVS_FE_VERTEX_ELEMENT_CONFIG_TYPE_INT_10_10_10_2	0x0000000c
+#define VIVS_FE_VERTEX_ELEMENT_CONFIG_TYPE_UNSIGNED_INT_10_10_10_2	0x0000000d
+#define VIVS_FE_VERTEX_ELEMENT_CONFIG_ENDIAN__MASK		0x00000030
+#define VIVS_FE_VERTEX_ELEMENT_CONFIG_ENDIAN__SHIFT		4
+#define VIVS_FE_VERTEX_ELEMENT_CONFIG_ENDIAN(x)			(((x) << VIVS_FE_VERTEX_ELEMENT_CONFIG_ENDIAN__SHIFT) & VIVS_FE_VERTEX_ELEMENT_CONFIG_ENDIAN__MASK)
+#define VIVS_FE_VERTEX_ELEMENT_CONFIG_NONCONSECUTIVE		0x00000080
+#define VIVS_FE_VERTEX_ELEMENT_CONFIG_STREAM__MASK		0x00000700
+#define VIVS_FE_VERTEX_ELEMENT_CONFIG_STREAM__SHIFT		8
+#define VIVS_FE_VERTEX_ELEMENT_CONFIG_STREAM(x)			(((x) << VIVS_FE_VERTEX_ELEMENT_CONFIG_STREAM__SHIFT) & VIVS_FE_VERTEX_ELEMENT_CONFIG_STREAM__MASK)
+#define VIVS_FE_VERTEX_ELEMENT_CONFIG_NUM__MASK			0x00003000
+#define VIVS_FE_VERTEX_ELEMENT_CONFIG_NUM__SHIFT		12
+#define VIVS_FE_VERTEX_ELEMENT_CONFIG_NUM(x)			(((x) << VIVS_FE_VERTEX_ELEMENT_CONFIG_NUM__SHIFT) & VIVS_FE_VERTEX_ELEMENT_CONFIG_NUM__MASK)
+#define VIVS_FE_VERTEX_ELEMENT_CONFIG_NORMALIZE__MASK		0x0000c000
+#define VIVS_FE_VERTEX_ELEMENT_CONFIG_NORMALIZE__SHIFT		14
+#define VIVS_FE_VERTEX_ELEMENT_CONFIG_NORMALIZE_OFF		0x00000000
+#define VIVS_FE_VERTEX_ELEMENT_CONFIG_NORMALIZE_ON		0x00008000
+#define VIVS_FE_VERTEX_ELEMENT_CONFIG_START__MASK		0x00ff0000
+#define VIVS_FE_VERTEX_ELEMENT_CONFIG_START__SHIFT		16
+#define VIVS_FE_VERTEX_ELEMENT_CONFIG_START(x)			(((x) << VIVS_FE_VERTEX_ELEMENT_CONFIG_START__SHIFT) & VIVS_FE_VERTEX_ELEMENT_CONFIG_START__MASK)
+#define VIVS_FE_VERTEX_ELEMENT_CONFIG_END__MASK			0xff000000
+#define VIVS_FE_VERTEX_ELEMENT_CONFIG_END__SHIFT		24
+#define VIVS_FE_VERTEX_ELEMENT_CONFIG_END(x)			(((x) << VIVS_FE_VERTEX_ELEMENT_CONFIG_END__SHIFT) & VIVS_FE_VERTEX_ELEMENT_CONFIG_END__MASK)
+
+#define VIVS_FE_CMD_STREAM_BASE_ADDR				0x00000640
+
+#define VIVS_FE_INDEX_STREAM_BASE_ADDR				0x00000644
+
+#define VIVS_FE_INDEX_STREAM_CONTROL				0x00000648
+#define VIVS_FE_INDEX_STREAM_CONTROL_TYPE__MASK			0x00000003
+#define VIVS_FE_INDEX_STREAM_CONTROL_TYPE__SHIFT		0
+#define VIVS_FE_INDEX_STREAM_CONTROL_TYPE_UNSIGNED_CHAR		0x00000000
+#define VIVS_FE_INDEX_STREAM_CONTROL_TYPE_UNSIGNED_SHORT	0x00000001
+#define VIVS_FE_INDEX_STREAM_CONTROL_TYPE_UNSIGNED_INT		0x00000002
+
+#define VIVS_FE_VERTEX_STREAM_BASE_ADDR				0x0000064c
+
+#define VIVS_FE_VERTEX_STREAM_CONTROL				0x00000650
+
+#define VIVS_FE_COMMAND_ADDRESS					0x00000654
+
+#define VIVS_FE_COMMAND_CONTROL					0x00000658
+#define VIVS_FE_COMMAND_CONTROL_PREFETCH__MASK			0x0000ffff
+#define VIVS_FE_COMMAND_CONTROL_PREFETCH__SHIFT			0
+#define VIVS_FE_COMMAND_CONTROL_PREFETCH(x)			(((x) << VIVS_FE_COMMAND_CONTROL_PREFETCH__SHIFT) & VIVS_FE_COMMAND_CONTROL_PREFETCH__MASK)
+#define VIVS_FE_COMMAND_CONTROL_ENABLE				0x00010000
+
+#define VIVS_FE_DMA_STATUS					0x0000065c
+
+#define VIVS_FE_DMA_DEBUG_STATE					0x00000660
+#define VIVS_FE_DMA_DEBUG_STATE_CMD_STATE__MASK			0x0000001f
+#define VIVS_FE_DMA_DEBUG_STATE_CMD_STATE__SHIFT		0
+#define VIVS_FE_DMA_DEBUG_STATE_CMD_STATE_IDLE			0x00000000
+#define VIVS_FE_DMA_DEBUG_STATE_CMD_STATE_DEC			0x00000001
+#define VIVS_FE_DMA_DEBUG_STATE_CMD_STATE_ADR0			0x00000002
+#define VIVS_FE_DMA_DEBUG_STATE_CMD_STATE_LOAD0			0x00000003
+#define VIVS_FE_DMA_DEBUG_STATE_CMD_STATE_ADR1			0x00000004
+#define VIVS_FE_DMA_DEBUG_STATE_CMD_STATE_LOAD1			0x00000005
+#define VIVS_FE_DMA_DEBUG_STATE_CMD_STATE_3DADR			0x00000006
+#define VIVS_FE_DMA_DEBUG_STATE_CMD_STATE_3DCMD			0x00000007
+#define VIVS_FE_DMA_DEBUG_STATE_CMD_STATE_3DCNTL		0x00000008
+#define VIVS_FE_DMA_DEBUG_STATE_CMD_STATE_3DIDXCNTL		0x00000009
+#define VIVS_FE_DMA_DEBUG_STATE_CMD_STATE_INITREQDMA		0x0000000a
+#define VIVS_FE_DMA_DEBUG_STATE_CMD_STATE_DRAWIDX		0x0000000b
+#define VIVS_FE_DMA_DEBUG_STATE_CMD_STATE_DRAW			0x0000000c
+#define VIVS_FE_DMA_DEBUG_STATE_CMD_STATE_2DRECT0		0x0000000d
+#define VIVS_FE_DMA_DEBUG_STATE_CMD_STATE_2DRECT1		0x0000000e
+#define VIVS_FE_DMA_DEBUG_STATE_CMD_STATE_2DDATA0		0x0000000f
+#define VIVS_FE_DMA_DEBUG_STATE_CMD_STATE_2DDATA1		0x00000010
+#define VIVS_FE_DMA_DEBUG_STATE_CMD_STATE_WAITFIFO		0x00000011
+#define VIVS_FE_DMA_DEBUG_STATE_CMD_STATE_WAIT			0x00000012
+#define VIVS_FE_DMA_DEBUG_STATE_CMD_STATE_LINK			0x00000013
+#define VIVS_FE_DMA_DEBUG_STATE_CMD_STATE_END			0x00000014
+#define VIVS_FE_DMA_DEBUG_STATE_CMD_STATE_STALL			0x00000015
+#define VIVS_FE_DMA_DEBUG_STATE_CMD_DMA_STATE__MASK		0x00000300
+#define VIVS_FE_DMA_DEBUG_STATE_CMD_DMA_STATE__SHIFT		8
+#define VIVS_FE_DMA_DEBUG_STATE_CMD_DMA_STATE_IDLE		0x00000000
+#define VIVS_FE_DMA_DEBUG_STATE_CMD_DMA_STATE_START		0x00000100
+#define VIVS_FE_DMA_DEBUG_STATE_CMD_DMA_STATE_REQ		0x00000200
+#define VIVS_FE_DMA_DEBUG_STATE_CMD_DMA_STATE_END		0x00000300
+#define VIVS_FE_DMA_DEBUG_STATE_CMD_FETCH_STATE__MASK		0x00000c00
+#define VIVS_FE_DMA_DEBUG_STATE_CMD_FETCH_STATE__SHIFT		10
+#define VIVS_FE_DMA_DEBUG_STATE_CMD_FETCH_STATE_IDLE		0x00000000
+#define VIVS_FE_DMA_DEBUG_STATE_CMD_FETCH_STATE_RAMVALID	0x00000400
+#define VIVS_FE_DMA_DEBUG_STATE_CMD_FETCH_STATE_VALID		0x00000800
+#define VIVS_FE_DMA_DEBUG_STATE_REQ_DMA_STATE__MASK		0x00003000
+#define VIVS_FE_DMA_DEBUG_STATE_REQ_DMA_STATE__SHIFT		12
+#define VIVS_FE_DMA_DEBUG_STATE_REQ_DMA_STATE_IDLE		0x00000000
+#define VIVS_FE_DMA_DEBUG_STATE_REQ_DMA_STATE_WAITIDX		0x00001000
+#define VIVS_FE_DMA_DEBUG_STATE_REQ_DMA_STATE_CAL		0x00002000
+#define VIVS_FE_DMA_DEBUG_STATE_CAL_STATE__MASK			0x0000c000
+#define VIVS_FE_DMA_DEBUG_STATE_CAL_STATE__SHIFT		14
+#define VIVS_FE_DMA_DEBUG_STATE_CAL_STATE_IDLE			0x00000000
+#define VIVS_FE_DMA_DEBUG_STATE_CAL_STATE_LDADR			0x00004000
+#define VIVS_FE_DMA_DEBUG_STATE_CAL_STATE_IDXCALC		0x00008000
+#define VIVS_FE_DMA_DEBUG_STATE_VE_REQ_STATE__MASK		0x00030000
+#define VIVS_FE_DMA_DEBUG_STATE_VE_REQ_STATE__SHIFT		16
+#define VIVS_FE_DMA_DEBUG_STATE_VE_REQ_STATE_IDLE		0x00000000
+#define VIVS_FE_DMA_DEBUG_STATE_VE_REQ_STATE_CKCACHE		0x00010000
+#define VIVS_FE_DMA_DEBUG_STATE_VE_REQ_STATE_MISS		0x00020000
+
+#define VIVS_FE_DMA_ADDRESS					0x00000664
+
+#define VIVS_FE_DMA_LOW						0x00000668
+
+#define VIVS_FE_DMA_HIGH					0x0000066c
+
+#define VIVS_FE_AUTO_FLUSH					0x00000670
+
+#define VIVS_FE_UNK00678					0x00000678
+
+#define VIVS_FE_UNK0067C					0x0000067c
+
+#define VIVS_FE_VERTEX_STREAMS(i0)			       (0x00000000 + 0x4*(i0))
+#define VIVS_FE_VERTEX_STREAMS__ESIZE				0x00000004
+#define VIVS_FE_VERTEX_STREAMS__LEN				0x00000008
+
+#define VIVS_FE_VERTEX_STREAMS_BASE_ADDR(i0)		       (0x00000680 + 0x4*(i0))
+
+#define VIVS_FE_VERTEX_STREAMS_CONTROL(i0)		       (0x000006a0 + 0x4*(i0))
+
+#define VIVS_FE_UNK00700(i0)				       (0x00000700 + 0x4*(i0))
+#define VIVS_FE_UNK00700__ESIZE					0x00000004
+#define VIVS_FE_UNK00700__LEN					0x00000010
+
+#define VIVS_FE_UNK00740(i0)				       (0x00000740 + 0x4*(i0))
+#define VIVS_FE_UNK00740__ESIZE					0x00000004
+#define VIVS_FE_UNK00740__LEN					0x00000010
+
+#define VIVS_FE_UNK00780(i0)				       (0x00000780 + 0x4*(i0))
+#define VIVS_FE_UNK00780__ESIZE					0x00000004
+#define VIVS_FE_UNK00780__LEN					0x00000010
+
+#define VIVS_GL							0x00000000
+
+#define VIVS_GL_PIPE_SELECT					0x00003800
+#define VIVS_GL_PIPE_SELECT_PIPE__MASK				0x00000001
+#define VIVS_GL_PIPE_SELECT_PIPE__SHIFT				0
+#define VIVS_GL_PIPE_SELECT_PIPE(x)				(((x) << VIVS_GL_PIPE_SELECT_PIPE__SHIFT) & VIVS_GL_PIPE_SELECT_PIPE__MASK)
+
+#define VIVS_GL_EVENT						0x00003804
+#define VIVS_GL_EVENT_EVENT_ID__MASK				0x0000001f
+#define VIVS_GL_EVENT_EVENT_ID__SHIFT				0
+#define VIVS_GL_EVENT_EVENT_ID(x)				(((x) << VIVS_GL_EVENT_EVENT_ID__SHIFT) & VIVS_GL_EVENT_EVENT_ID__MASK)
+#define VIVS_GL_EVENT_FROM_FE					0x00000020
+#define VIVS_GL_EVENT_FROM_PE					0x00000040
+#define VIVS_GL_EVENT_SOURCE__MASK				0x00001f00
+#define VIVS_GL_EVENT_SOURCE__SHIFT				8
+#define VIVS_GL_EVENT_SOURCE(x)					(((x) << VIVS_GL_EVENT_SOURCE__SHIFT) & VIVS_GL_EVENT_SOURCE__MASK)
+
+#define VIVS_GL_SEMAPHORE_TOKEN					0x00003808
+#define VIVS_GL_SEMAPHORE_TOKEN_FROM__MASK			0x0000001f
+#define VIVS_GL_SEMAPHORE_TOKEN_FROM__SHIFT			0
+#define VIVS_GL_SEMAPHORE_TOKEN_FROM(x)				(((x) << VIVS_GL_SEMAPHORE_TOKEN_FROM__SHIFT) & VIVS_GL_SEMAPHORE_TOKEN_FROM__MASK)
+#define VIVS_GL_SEMAPHORE_TOKEN_TO__MASK			0x00001f00
+#define VIVS_GL_SEMAPHORE_TOKEN_TO__SHIFT			8
+#define VIVS_GL_SEMAPHORE_TOKEN_TO(x)				(((x) << VIVS_GL_SEMAPHORE_TOKEN_TO__SHIFT) & VIVS_GL_SEMAPHORE_TOKEN_TO__MASK)
+
+#define VIVS_GL_FLUSH_CACHE					0x0000380c
+#define VIVS_GL_FLUSH_CACHE_DEPTH				0x00000001
+#define VIVS_GL_FLUSH_CACHE_COLOR				0x00000002
+#define VIVS_GL_FLUSH_CACHE_TEXTURE				0x00000004
+#define VIVS_GL_FLUSH_CACHE_PE2D				0x00000008
+#define VIVS_GL_FLUSH_CACHE_TEXTUREVS				0x00000010
+#define VIVS_GL_FLUSH_CACHE_SHADER_L1				0x00000020
+#define VIVS_GL_FLUSH_CACHE_SHADER_L2				0x00000040
+
+#define VIVS_GL_FLUSH_MMU					0x00003810
+#define VIVS_GL_FLUSH_MMU_FLUSH_FEMMU				0x00000001
+#define VIVS_GL_FLUSH_MMU_FLUSH_UNK1				0x00000002
+#define VIVS_GL_FLUSH_MMU_FLUSH_UNK2				0x00000004
+#define VIVS_GL_FLUSH_MMU_FLUSH_PEMMU				0x00000008
+#define VIVS_GL_FLUSH_MMU_FLUSH_UNK4				0x00000010
+
+#define VIVS_GL_VERTEX_ELEMENT_CONFIG				0x00003814
+
+#define VIVS_GL_MULTI_SAMPLE_CONFIG				0x00003818
+#define VIVS_GL_MULTI_SAMPLE_CONFIG_MSAA_SAMPLES__MASK		0x00000003
+#define VIVS_GL_MULTI_SAMPLE_CONFIG_MSAA_SAMPLES__SHIFT		0
+#define VIVS_GL_MULTI_SAMPLE_CONFIG_MSAA_SAMPLES_NONE		0x00000000
+#define VIVS_GL_MULTI_SAMPLE_CONFIG_MSAA_SAMPLES_2X		0x00000001
+#define VIVS_GL_MULTI_SAMPLE_CONFIG_MSAA_SAMPLES_4X		0x00000002
+#define VIVS_GL_MULTI_SAMPLE_CONFIG_MSAA_SAMPLES_MASK		0x00000008
+#define VIVS_GL_MULTI_SAMPLE_CONFIG_MSAA_ENABLES__MASK		0x000000f0
+#define VIVS_GL_MULTI_SAMPLE_CONFIG_MSAA_ENABLES__SHIFT		4
+#define VIVS_GL_MULTI_SAMPLE_CONFIG_MSAA_ENABLES(x)		(((x) << VIVS_GL_MULTI_SAMPLE_CONFIG_MSAA_ENABLES__SHIFT) & VIVS_GL_MULTI_SAMPLE_CONFIG_MSAA_ENABLES__MASK)
+#define VIVS_GL_MULTI_SAMPLE_CONFIG_MSAA_ENABLES_MASK		0x00000100
+#define VIVS_GL_MULTI_SAMPLE_CONFIG_UNK12__MASK			0x00007000
+#define VIVS_GL_MULTI_SAMPLE_CONFIG_UNK12__SHIFT		12
+#define VIVS_GL_MULTI_SAMPLE_CONFIG_UNK12(x)			(((x) << VIVS_GL_MULTI_SAMPLE_CONFIG_UNK12__SHIFT) & VIVS_GL_MULTI_SAMPLE_CONFIG_UNK12__MASK)
+#define VIVS_GL_MULTI_SAMPLE_CONFIG_UNK12_MASK			0x00008000
+#define VIVS_GL_MULTI_SAMPLE_CONFIG_UNK16__MASK			0x00030000
+#define VIVS_GL_MULTI_SAMPLE_CONFIG_UNK16__SHIFT		16
+#define VIVS_GL_MULTI_SAMPLE_CONFIG_UNK16(x)			(((x) << VIVS_GL_MULTI_SAMPLE_CONFIG_UNK16__SHIFT) & VIVS_GL_MULTI_SAMPLE_CONFIG_UNK16__MASK)
+#define VIVS_GL_MULTI_SAMPLE_CONFIG_UNK16_MASK			0x00080000
+
+#define VIVS_GL_VARYING_TOTAL_COMPONENTS			0x0000381c
+#define VIVS_GL_VARYING_TOTAL_COMPONENTS_NUM__MASK		0x000000ff
+#define VIVS_GL_VARYING_TOTAL_COMPONENTS_NUM__SHIFT		0
+#define VIVS_GL_VARYING_TOTAL_COMPONENTS_NUM(x)			(((x) << VIVS_GL_VARYING_TOTAL_COMPONENTS_NUM__SHIFT) & VIVS_GL_VARYING_TOTAL_COMPONENTS_NUM__MASK)
+
+#define VIVS_GL_VARYING_NUM_COMPONENTS				0x00003820
+#define VIVS_GL_VARYING_NUM_COMPONENTS_VAR0__MASK		0x00000007
+#define VIVS_GL_VARYING_NUM_COMPONENTS_VAR0__SHIFT		0
+#define VIVS_GL_VARYING_NUM_COMPONENTS_VAR0(x)			(((x) << VIVS_GL_VARYING_NUM_COMPONENTS_VAR0__SHIFT) & VIVS_GL_VARYING_NUM_COMPONENTS_VAR0__MASK)
+#define VIVS_GL_VARYING_NUM_COMPONENTS_VAR1__MASK		0x00000070
+#define VIVS_GL_VARYING_NUM_COMPONENTS_VAR1__SHIFT		4
+#define VIVS_GL_VARYING_NUM_COMPONENTS_VAR1(x)			(((x) << VIVS_GL_VARYING_NUM_COMPONENTS_VAR1__SHIFT) & VIVS_GL_VARYING_NUM_COMPONENTS_VAR1__MASK)
+#define VIVS_GL_VARYING_NUM_COMPONENTS_VAR2__MASK		0x00000700
+#define VIVS_GL_VARYING_NUM_COMPONENTS_VAR2__SHIFT		8
+#define VIVS_GL_VARYING_NUM_COMPONENTS_VAR2(x)			(((x) << VIVS_GL_VARYING_NUM_COMPONENTS_VAR2__SHIFT) & VIVS_GL_VARYING_NUM_COMPONENTS_VAR2__MASK)
+#define VIVS_GL_VARYING_NUM_COMPONENTS_VAR3__MASK		0x00007000
+#define VIVS_GL_VARYING_NUM_COMPONENTS_VAR3__SHIFT		12
+#define VIVS_GL_VARYING_NUM_COMPONENTS_VAR3(x)			(((x) << VIVS_GL_VARYING_NUM_COMPONENTS_VAR3__SHIFT) & VIVS_GL_VARYING_NUM_COMPONENTS_VAR3__MASK)
+#define VIVS_GL_VARYING_NUM_COMPONENTS_VAR4__MASK		0x00070000
+#define VIVS_GL_VARYING_NUM_COMPONENTS_VAR4__SHIFT		16
+#define VIVS_GL_VARYING_NUM_COMPONENTS_VAR4(x)			(((x) << VIVS_GL_VARYING_NUM_COMPONENTS_VAR4__SHIFT) & VIVS_GL_VARYING_NUM_COMPONENTS_VAR4__MASK)
+#define VIVS_GL_VARYING_NUM_COMPONENTS_VAR5__MASK		0x00700000
+#define VIVS_GL_VARYING_NUM_COMPONENTS_VAR5__SHIFT		20
+#define VIVS_GL_VARYING_NUM_COMPONENTS_VAR5(x)			(((x) << VIVS_GL_VARYING_NUM_COMPONENTS_VAR5__SHIFT) & VIVS_GL_VARYING_NUM_COMPONENTS_VAR5__MASK)
+#define VIVS_GL_VARYING_NUM_COMPONENTS_VAR6__MASK		0x07000000
+#define VIVS_GL_VARYING_NUM_COMPONENTS_VAR6__SHIFT		24
+#define VIVS_GL_VARYING_NUM_COMPONENTS_VAR6(x)			(((x) << VIVS_GL_VARYING_NUM_COMPONENTS_VAR6__SHIFT) & VIVS_GL_VARYING_NUM_COMPONENTS_VAR6__MASK)
+#define VIVS_GL_VARYING_NUM_COMPONENTS_VAR7__MASK		0x70000000
+#define VIVS_GL_VARYING_NUM_COMPONENTS_VAR7__SHIFT		28
+#define VIVS_GL_VARYING_NUM_COMPONENTS_VAR7(x)			(((x) << VIVS_GL_VARYING_NUM_COMPONENTS_VAR7__SHIFT) & VIVS_GL_VARYING_NUM_COMPONENTS_VAR7__MASK)
+
+#define VIVS_GL_VARYING_COMPONENT_USE(i0)		       (0x00003828 + 0x4*(i0))
+#define VIVS_GL_VARYING_COMPONENT_USE__ESIZE			0x00000004
+#define VIVS_GL_VARYING_COMPONENT_USE__LEN			0x00000002
+#define VIVS_GL_VARYING_COMPONENT_USE_COMP0__MASK		0x00000003
+#define VIVS_GL_VARYING_COMPONENT_USE_COMP0__SHIFT		0
+#define VIVS_GL_VARYING_COMPONENT_USE_COMP0(x)			(((x) << VIVS_GL_VARYING_COMPONENT_USE_COMP0__SHIFT) & VIVS_GL_VARYING_COMPONENT_USE_COMP0__MASK)
+#define VIVS_GL_VARYING_COMPONENT_USE_COMP1__MASK		0x0000000c
+#define VIVS_GL_VARYING_COMPONENT_USE_COMP1__SHIFT		2
+#define VIVS_GL_VARYING_COMPONENT_USE_COMP1(x)			(((x) << VIVS_GL_VARYING_COMPONENT_USE_COMP1__SHIFT) & VIVS_GL_VARYING_COMPONENT_USE_COMP1__MASK)
+#define VIVS_GL_VARYING_COMPONENT_USE_COMP2__MASK		0x00000030
+#define VIVS_GL_VARYING_COMPONENT_USE_COMP2__SHIFT		4
+#define VIVS_GL_VARYING_COMPONENT_USE_COMP2(x)			(((x) << VIVS_GL_VARYING_COMPONENT_USE_COMP2__SHIFT) & VIVS_GL_VARYING_COMPONENT_USE_COMP2__MASK)
+#define VIVS_GL_VARYING_COMPONENT_USE_COMP3__MASK		0x000000c0
+#define VIVS_GL_VARYING_COMPONENT_USE_COMP3__SHIFT		6
+#define VIVS_GL_VARYING_COMPONENT_USE_COMP3(x)			(((x) << VIVS_GL_VARYING_COMPONENT_USE_COMP3__SHIFT) & VIVS_GL_VARYING_COMPONENT_USE_COMP3__MASK)
+#define VIVS_GL_VARYING_COMPONENT_USE_COMP4__MASK		0x00000300
+#define VIVS_GL_VARYING_COMPONENT_USE_COMP4__SHIFT		8
+#define VIVS_GL_VARYING_COMPONENT_USE_COMP4(x)			(((x) << VIVS_GL_VARYING_COMPONENT_USE_COMP4__SHIFT) & VIVS_GL_VARYING_COMPONENT_USE_COMP4__MASK)
+#define VIVS_GL_VARYING_COMPONENT_USE_COMP5__MASK		0x00000c00
+#define VIVS_GL_VARYING_COMPONENT_USE_COMP5__SHIFT		10
+#define VIVS_GL_VARYING_COMPONENT_USE_COMP5(x)			(((x) << VIVS_GL_VARYING_COMPONENT_USE_COMP5__SHIFT) & VIVS_GL_VARYING_COMPONENT_USE_COMP5__MASK)
+#define VIVS_GL_VARYING_COMPONENT_USE_COMP6__MASK		0x00003000
+#define VIVS_GL_VARYING_COMPONENT_USE_COMP6__SHIFT		12
+#define VIVS_GL_VARYING_COMPONENT_USE_COMP6(x)			(((x) << VIVS_GL_VARYING_COMPONENT_USE_COMP6__SHIFT) & VIVS_GL_VARYING_COMPONENT_USE_COMP6__MASK)
+#define VIVS_GL_VARYING_COMPONENT_USE_COMP7__MASK		0x0000c000
+#define VIVS_GL_VARYING_COMPONENT_USE_COMP7__SHIFT		14
+#define VIVS_GL_VARYING_COMPONENT_USE_COMP7(x)			(((x) << VIVS_GL_VARYING_COMPONENT_USE_COMP7__SHIFT) & VIVS_GL_VARYING_COMPONENT_USE_COMP7__MASK)
+#define VIVS_GL_VARYING_COMPONENT_USE_COMP8__MASK		0x00030000
+#define VIVS_GL_VARYING_COMPONENT_USE_COMP8__SHIFT		16
+#define VIVS_GL_VARYING_COMPONENT_USE_COMP8(x)			(((x) << VIVS_GL_VARYING_COMPONENT_USE_COMP8__SHIFT) & VIVS_GL_VARYING_COMPONENT_USE_COMP8__MASK)
+#define VIVS_GL_VARYING_COMPONENT_USE_COMP9__MASK		0x000c0000
+#define VIVS_GL_VARYING_COMPONENT_USE_COMP9__SHIFT		18
+#define VIVS_GL_VARYING_COMPONENT_USE_COMP9(x)			(((x) << VIVS_GL_VARYING_COMPONENT_USE_COMP9__SHIFT) & VIVS_GL_VARYING_COMPONENT_USE_COMP9__MASK)
+#define VIVS_GL_VARYING_COMPONENT_USE_COMP10__MASK		0x00300000
+#define VIVS_GL_VARYING_COMPONENT_USE_COMP10__SHIFT		20
+#define VIVS_GL_VARYING_COMPONENT_USE_COMP10(x)			(((x) << VIVS_GL_VARYING_COMPONENT_USE_COMP10__SHIFT) & VIVS_GL_VARYING_COMPONENT_USE_COMP10__MASK)
+#define VIVS_GL_VARYING_COMPONENT_USE_COMP11__MASK		0x00c00000
+#define VIVS_GL_VARYING_COMPONENT_USE_COMP11__SHIFT		22
+#define VIVS_GL_VARYING_COMPONENT_USE_COMP11(x)			(((x) << VIVS_GL_VARYING_COMPONENT_USE_COMP11__SHIFT) & VIVS_GL_VARYING_COMPONENT_USE_COMP11__MASK)
+#define VIVS_GL_VARYING_COMPONENT_USE_COMP12__MASK		0x03000000
+#define VIVS_GL_VARYING_COMPONENT_USE_COMP12__SHIFT		24
+#define VIVS_GL_VARYING_COMPONENT_USE_COMP12(x)			(((x) << VIVS_GL_VARYING_COMPONENT_USE_COMP12__SHIFT) & VIVS_GL_VARYING_COMPONENT_USE_COMP12__MASK)
+#define VIVS_GL_VARYING_COMPONENT_USE_COMP13__MASK		0x0c000000
+#define VIVS_GL_VARYING_COMPONENT_USE_COMP13__SHIFT		26
+#define VIVS_GL_VARYING_COMPONENT_USE_COMP13(x)			(((x) << VIVS_GL_VARYING_COMPONENT_USE_COMP13__SHIFT) & VIVS_GL_VARYING_COMPONENT_USE_COMP13__MASK)
+#define VIVS_GL_VARYING_COMPONENT_USE_COMP14__MASK		0x30000000
+#define VIVS_GL_VARYING_COMPONENT_USE_COMP14__SHIFT		28
+#define VIVS_GL_VARYING_COMPONENT_USE_COMP14(x)			(((x) << VIVS_GL_VARYING_COMPONENT_USE_COMP14__SHIFT) & VIVS_GL_VARYING_COMPONENT_USE_COMP14__MASK)
+#define VIVS_GL_VARYING_COMPONENT_USE_COMP15__MASK		0xc0000000
+#define VIVS_GL_VARYING_COMPONENT_USE_COMP15__SHIFT		30
+#define VIVS_GL_VARYING_COMPONENT_USE_COMP15(x)			(((x) << VIVS_GL_VARYING_COMPONENT_USE_COMP15__SHIFT) & VIVS_GL_VARYING_COMPONENT_USE_COMP15__MASK)
+
+#define VIVS_GL_UNK03834					0x00003834
+
+#define VIVS_GL_UNK03838					0x00003838
+
+#define VIVS_GL_API_MODE					0x0000384c
+#define VIVS_GL_API_MODE_OPENGL					0x00000000
+#define VIVS_GL_API_MODE_OPENVG					0x00000001
+#define VIVS_GL_API_MODE_OPENCL					0x00000002
+
+#define VIVS_GL_CONTEXT_POINTER					0x00003850
+
+#define VIVS_GL_UNK03A00					0x00003a00
+
+#define VIVS_GL_STALL_TOKEN					0x00003c00
+#define VIVS_GL_STALL_TOKEN_FROM__MASK				0x0000001f
+#define VIVS_GL_STALL_TOKEN_FROM__SHIFT				0
+#define VIVS_GL_STALL_TOKEN_FROM(x)				(((x) << VIVS_GL_STALL_TOKEN_FROM__SHIFT) & VIVS_GL_STALL_TOKEN_FROM__MASK)
+#define VIVS_GL_STALL_TOKEN_TO__MASK				0x00001f00
+#define VIVS_GL_STALL_TOKEN_TO__SHIFT				8
+#define VIVS_GL_STALL_TOKEN_TO(x)				(((x) << VIVS_GL_STALL_TOKEN_TO__SHIFT) & VIVS_GL_STALL_TOKEN_TO__MASK)
+#define VIVS_GL_STALL_TOKEN_FLIP0				0x40000000
+#define VIVS_GL_STALL_TOKEN_FLIP1				0x80000000
+
+#define VIVS_DUMMY						0x00000000
+
+#define VIVS_DUMMY_DUMMY					0x0003fffc
+
+
+#endif /* STATE_XML */
diff --git a/drivers/gpu/drm/etnaviv/state_hi.xml.h b/drivers/gpu/drm/etnaviv/state_hi.xml.h
new file mode 100644
index 000000000000..0064f2640396
--- /dev/null
+++ b/drivers/gpu/drm/etnaviv/state_hi.xml.h
@@ -0,0 +1,407 @@
+#ifndef STATE_HI_XML
+#define STATE_HI_XML
+
+/* Autogenerated file, DO NOT EDIT manually!
+
+This file was generated by the rules-ng-ng headergen tool in this git repository:
+http://0x04.net/cgit/index.cgi/rules-ng-ng
+git clone git://0x04.net/rules-ng-ng
+
+The rules-ng-ng source files this header was generated from are:
+- state_hi.xml (  23420 bytes, from 2015-03-25 11:47:21)
+- common.xml   (  18437 bytes, from 2015-03-25 11:27:41)
+
+Copyright (C) 2015
+*/
+
+
+#define MMU_EXCEPTION_SLAVE_NOT_PRESENT				0x00000001
+#define MMU_EXCEPTION_PAGE_NOT_PRESENT				0x00000002
+#define MMU_EXCEPTION_WRITE_VIOLATION				0x00000003
+#define VIVS_HI							0x00000000
+
+#define VIVS_HI_CLOCK_CONTROL					0x00000000
+#define VIVS_HI_CLOCK_CONTROL_CLK3D_DIS				0x00000001
+#define VIVS_HI_CLOCK_CONTROL_CLK2D_DIS				0x00000002
+#define VIVS_HI_CLOCK_CONTROL_FSCALE_VAL__MASK			0x000001fc
+#define VIVS_HI_CLOCK_CONTROL_FSCALE_VAL__SHIFT			2
+#define VIVS_HI_CLOCK_CONTROL_FSCALE_VAL(x)			(((x) << VIVS_HI_CLOCK_CONTROL_FSCALE_VAL__SHIFT) & VIVS_HI_CLOCK_CONTROL_FSCALE_VAL__MASK)
+#define VIVS_HI_CLOCK_CONTROL_FSCALE_CMD_LOAD			0x00000200
+#define VIVS_HI_CLOCK_CONTROL_DISABLE_RAM_CLK_GATING		0x00000400
+#define VIVS_HI_CLOCK_CONTROL_DISABLE_DEBUG_REGISTERS		0x00000800
+#define VIVS_HI_CLOCK_CONTROL_SOFT_RESET			0x00001000
+#define VIVS_HI_CLOCK_CONTROL_IDLE_3D				0x00010000
+#define VIVS_HI_CLOCK_CONTROL_IDLE_2D				0x00020000
+#define VIVS_HI_CLOCK_CONTROL_IDLE_VG				0x00040000
+#define VIVS_HI_CLOCK_CONTROL_ISOLATE_GPU			0x00080000
+#define VIVS_HI_CLOCK_CONTROL_DEBUG_PIXEL_PIPE__MASK		0x00f00000
+#define VIVS_HI_CLOCK_CONTROL_DEBUG_PIXEL_PIPE__SHIFT		20
+#define VIVS_HI_CLOCK_CONTROL_DEBUG_PIXEL_PIPE(x)		(((x) << VIVS_HI_CLOCK_CONTROL_DEBUG_PIXEL_PIPE__SHIFT) & VIVS_HI_CLOCK_CONTROL_DEBUG_PIXEL_PIPE__MASK)
+
+#define VIVS_HI_IDLE_STATE					0x00000004
+#define VIVS_HI_IDLE_STATE_FE					0x00000001
+#define VIVS_HI_IDLE_STATE_DE					0x00000002
+#define VIVS_HI_IDLE_STATE_PE					0x00000004
+#define VIVS_HI_IDLE_STATE_SH					0x00000008
+#define VIVS_HI_IDLE_STATE_PA					0x00000010
+#define VIVS_HI_IDLE_STATE_SE					0x00000020
+#define VIVS_HI_IDLE_STATE_RA					0x00000040
+#define VIVS_HI_IDLE_STATE_TX					0x00000080
+#define VIVS_HI_IDLE_STATE_VG					0x00000100
+#define VIVS_HI_IDLE_STATE_IM					0x00000200
+#define VIVS_HI_IDLE_STATE_FP					0x00000400
+#define VIVS_HI_IDLE_STATE_TS					0x00000800
+#define VIVS_HI_IDLE_STATE_AXI_LP				0x80000000
+
+#define VIVS_HI_AXI_CONFIG					0x00000008
+#define VIVS_HI_AXI_CONFIG_AWID__MASK				0x0000000f
+#define VIVS_HI_AXI_CONFIG_AWID__SHIFT				0
+#define VIVS_HI_AXI_CONFIG_AWID(x)				(((x) << VIVS_HI_AXI_CONFIG_AWID__SHIFT) & VIVS_HI_AXI_CONFIG_AWID__MASK)
+#define VIVS_HI_AXI_CONFIG_ARID__MASK				0x000000f0
+#define VIVS_HI_AXI_CONFIG_ARID__SHIFT				4
+#define VIVS_HI_AXI_CONFIG_ARID(x)				(((x) << VIVS_HI_AXI_CONFIG_ARID__SHIFT) & VIVS_HI_AXI_CONFIG_ARID__MASK)
+#define VIVS_HI_AXI_CONFIG_AWCACHE__MASK			0x00000f00
+#define VIVS_HI_AXI_CONFIG_AWCACHE__SHIFT			8
+#define VIVS_HI_AXI_CONFIG_AWCACHE(x)				(((x) << VIVS_HI_AXI_CONFIG_AWCACHE__SHIFT) & VIVS_HI_AXI_CONFIG_AWCACHE__MASK)
+#define VIVS_HI_AXI_CONFIG_ARCACHE__MASK			0x0000f000
+#define VIVS_HI_AXI_CONFIG_ARCACHE__SHIFT			12
+#define VIVS_HI_AXI_CONFIG_ARCACHE(x)				(((x) << VIVS_HI_AXI_CONFIG_ARCACHE__SHIFT) & VIVS_HI_AXI_CONFIG_ARCACHE__MASK)
+
+#define VIVS_HI_AXI_STATUS					0x0000000c
+#define VIVS_HI_AXI_STATUS_WR_ERR_ID__MASK			0x0000000f
+#define VIVS_HI_AXI_STATUS_WR_ERR_ID__SHIFT			0
+#define VIVS_HI_AXI_STATUS_WR_ERR_ID(x)				(((x) << VIVS_HI_AXI_STATUS_WR_ERR_ID__SHIFT) & VIVS_HI_AXI_STATUS_WR_ERR_ID__MASK)
+#define VIVS_HI_AXI_STATUS_RD_ERR_ID__MASK			0x000000f0
+#define VIVS_HI_AXI_STATUS_RD_ERR_ID__SHIFT			4
+#define VIVS_HI_AXI_STATUS_RD_ERR_ID(x)				(((x) << VIVS_HI_AXI_STATUS_RD_ERR_ID__SHIFT) & VIVS_HI_AXI_STATUS_RD_ERR_ID__MASK)
+#define VIVS_HI_AXI_STATUS_DET_WR_ERR				0x00000100
+#define VIVS_HI_AXI_STATUS_DET_RD_ERR				0x00000200
+
+#define VIVS_HI_INTR_ACKNOWLEDGE				0x00000010
+#define VIVS_HI_INTR_ACKNOWLEDGE_INTR_VEC__MASK			0x7fffffff
+#define VIVS_HI_INTR_ACKNOWLEDGE_INTR_VEC__SHIFT		0
+#define VIVS_HI_INTR_ACKNOWLEDGE_INTR_VEC(x)			(((x) << VIVS_HI_INTR_ACKNOWLEDGE_INTR_VEC__SHIFT) & VIVS_HI_INTR_ACKNOWLEDGE_INTR_VEC__MASK)
+#define VIVS_HI_INTR_ACKNOWLEDGE_AXI_BUS_ERROR			0x80000000
+
+#define VIVS_HI_INTR_ENBL					0x00000014
+#define VIVS_HI_INTR_ENBL_INTR_ENBL_VEC__MASK			0xffffffff
+#define VIVS_HI_INTR_ENBL_INTR_ENBL_VEC__SHIFT			0
+#define VIVS_HI_INTR_ENBL_INTR_ENBL_VEC(x)			(((x) << VIVS_HI_INTR_ENBL_INTR_ENBL_VEC__SHIFT) & VIVS_HI_INTR_ENBL_INTR_ENBL_VEC__MASK)
+
+#define VIVS_HI_CHIP_IDENTITY					0x00000018
+#define VIVS_HI_CHIP_IDENTITY_FAMILY__MASK			0xff000000
+#define VIVS_HI_CHIP_IDENTITY_FAMILY__SHIFT			24
+#define VIVS_HI_CHIP_IDENTITY_FAMILY(x)				(((x) << VIVS_HI_CHIP_IDENTITY_FAMILY__SHIFT) & VIVS_HI_CHIP_IDENTITY_FAMILY__MASK)
+#define VIVS_HI_CHIP_IDENTITY_PRODUCT__MASK			0x00ff0000
+#define VIVS_HI_CHIP_IDENTITY_PRODUCT__SHIFT			16
+#define VIVS_HI_CHIP_IDENTITY_PRODUCT(x)			(((x) << VIVS_HI_CHIP_IDENTITY_PRODUCT__SHIFT) & VIVS_HI_CHIP_IDENTITY_PRODUCT__MASK)
+#define VIVS_HI_CHIP_IDENTITY_REVISION__MASK			0x0000f000
+#define VIVS_HI_CHIP_IDENTITY_REVISION__SHIFT			12
+#define VIVS_HI_CHIP_IDENTITY_REVISION(x)			(((x) << VIVS_HI_CHIP_IDENTITY_REVISION__SHIFT) & VIVS_HI_CHIP_IDENTITY_REVISION__MASK)
+
+#define VIVS_HI_CHIP_FEATURE					0x0000001c
+
+#define VIVS_HI_CHIP_MODEL					0x00000020
+
+#define VIVS_HI_CHIP_REV					0x00000024
+
+#define VIVS_HI_CHIP_DATE					0x00000028
+
+#define VIVS_HI_CHIP_TIME					0x0000002c
+
+#define VIVS_HI_CHIP_MINOR_FEATURE_0				0x00000034
+
+#define VIVS_HI_CACHE_CONTROL					0x00000038
+
+#define VIVS_HI_MEMORY_COUNTER_RESET				0x0000003c
+
+#define VIVS_HI_PROFILE_READ_BYTES8				0x00000040
+
+#define VIVS_HI_PROFILE_WRITE_BYTES8				0x00000044
+
+#define VIVS_HI_CHIP_SPECS					0x00000048
+#define VIVS_HI_CHIP_SPECS_STREAM_COUNT__MASK			0x0000000f
+#define VIVS_HI_CHIP_SPECS_STREAM_COUNT__SHIFT			0
+#define VIVS_HI_CHIP_SPECS_STREAM_COUNT(x)			(((x) << VIVS_HI_CHIP_SPECS_STREAM_COUNT__SHIFT) & VIVS_HI_CHIP_SPECS_STREAM_COUNT__MASK)
+#define VIVS_HI_CHIP_SPECS_REGISTER_MAX__MASK			0x000000f0
+#define VIVS_HI_CHIP_SPECS_REGISTER_MAX__SHIFT			4
+#define VIVS_HI_CHIP_SPECS_REGISTER_MAX(x)			(((x) << VIVS_HI_CHIP_SPECS_REGISTER_MAX__SHIFT) & VIVS_HI_CHIP_SPECS_REGISTER_MAX__MASK)
+#define VIVS_HI_CHIP_SPECS_THREAD_COUNT__MASK			0x00000f00
+#define VIVS_HI_CHIP_SPECS_THREAD_COUNT__SHIFT			8
+#define VIVS_HI_CHIP_SPECS_THREAD_COUNT(x)			(((x) << VIVS_HI_CHIP_SPECS_THREAD_COUNT__SHIFT) & VIVS_HI_CHIP_SPECS_THREAD_COUNT__MASK)
+#define VIVS_HI_CHIP_SPECS_VERTEX_CACHE_SIZE__MASK		0x0001f000
+#define VIVS_HI_CHIP_SPECS_VERTEX_CACHE_SIZE__SHIFT		12
+#define VIVS_HI_CHIP_SPECS_VERTEX_CACHE_SIZE(x)			(((x) << VIVS_HI_CHIP_SPECS_VERTEX_CACHE_SIZE__SHIFT) & VIVS_HI_CHIP_SPECS_VERTEX_CACHE_SIZE__MASK)
+#define VIVS_HI_CHIP_SPECS_SHADER_CORE_COUNT__MASK		0x01f00000
+#define VIVS_HI_CHIP_SPECS_SHADER_CORE_COUNT__SHIFT		20
+#define VIVS_HI_CHIP_SPECS_SHADER_CORE_COUNT(x)			(((x) << VIVS_HI_CHIP_SPECS_SHADER_CORE_COUNT__SHIFT) & VIVS_HI_CHIP_SPECS_SHADER_CORE_COUNT__MASK)
+#define VIVS_HI_CHIP_SPECS_PIXEL_PIPES__MASK			0x0e000000
+#define VIVS_HI_CHIP_SPECS_PIXEL_PIPES__SHIFT			25
+#define VIVS_HI_CHIP_SPECS_PIXEL_PIPES(x)			(((x) << VIVS_HI_CHIP_SPECS_PIXEL_PIPES__SHIFT) & VIVS_HI_CHIP_SPECS_PIXEL_PIPES__MASK)
+#define VIVS_HI_CHIP_SPECS_VERTEX_OUTPUT_BUFFER_SIZE__MASK	0xf0000000
+#define VIVS_HI_CHIP_SPECS_VERTEX_OUTPUT_BUFFER_SIZE__SHIFT	28
+#define VIVS_HI_CHIP_SPECS_VERTEX_OUTPUT_BUFFER_SIZE(x)		(((x) << VIVS_HI_CHIP_SPECS_VERTEX_OUTPUT_BUFFER_SIZE__SHIFT) & VIVS_HI_CHIP_SPECS_VERTEX_OUTPUT_BUFFER_SIZE__MASK)
+
+#define VIVS_HI_PROFILE_WRITE_BURSTS				0x0000004c
+
+#define VIVS_HI_PROFILE_WRITE_REQUESTS				0x00000050
+
+#define VIVS_HI_PROFILE_READ_BURSTS				0x00000058
+
+#define VIVS_HI_PROFILE_READ_REQUESTS				0x0000005c
+
+#define VIVS_HI_PROFILE_READ_LASTS				0x00000060
+
+#define VIVS_HI_GP_OUT0						0x00000064
+
+#define VIVS_HI_GP_OUT1						0x00000068
+
+#define VIVS_HI_GP_OUT2						0x0000006c
+
+#define VIVS_HI_AXI_CONTROL					0x00000070
+#define VIVS_HI_AXI_CONTROL_WR_FULL_BURST_MODE			0x00000001
+
+#define VIVS_HI_CHIP_MINOR_FEATURE_1				0x00000074
+
+#define VIVS_HI_PROFILE_TOTAL_CYCLES				0x00000078
+
+#define VIVS_HI_PROFILE_IDLE_CYCLES				0x0000007c
+
+#define VIVS_HI_CHIP_SPECS_2					0x00000080
+#define VIVS_HI_CHIP_SPECS_2_BUFFER_SIZE__MASK			0x000000ff
+#define VIVS_HI_CHIP_SPECS_2_BUFFER_SIZE__SHIFT			0
+#define VIVS_HI_CHIP_SPECS_2_BUFFER_SIZE(x)			(((x) << VIVS_HI_CHIP_SPECS_2_BUFFER_SIZE__SHIFT) & VIVS_HI_CHIP_SPECS_2_BUFFER_SIZE__MASK)
+#define VIVS_HI_CHIP_SPECS_2_INSTRUCTION_COUNT__MASK		0x0000ff00
+#define VIVS_HI_CHIP_SPECS_2_INSTRUCTION_COUNT__SHIFT		8
+#define VIVS_HI_CHIP_SPECS_2_INSTRUCTION_COUNT(x)		(((x) << VIVS_HI_CHIP_SPECS_2_INSTRUCTION_COUNT__SHIFT) & VIVS_HI_CHIP_SPECS_2_INSTRUCTION_COUNT__MASK)
+#define VIVS_HI_CHIP_SPECS_2_NUM_CONSTANTS__MASK		0xffff0000
+#define VIVS_HI_CHIP_SPECS_2_NUM_CONSTANTS__SHIFT		16
+#define VIVS_HI_CHIP_SPECS_2_NUM_CONSTANTS(x)			(((x) << VIVS_HI_CHIP_SPECS_2_NUM_CONSTANTS__SHIFT) & VIVS_HI_CHIP_SPECS_2_NUM_CONSTANTS__MASK)
+
+#define VIVS_HI_CHIP_MINOR_FEATURE_2				0x00000084
+
+#define VIVS_HI_CHIP_MINOR_FEATURE_3				0x00000088
+
+#define VIVS_HI_CHIP_MINOR_FEATURE_4				0x00000094
+
+#define VIVS_PM							0x00000000
+
+#define VIVS_PM_POWER_CONTROLS					0x00000100
+#define VIVS_PM_POWER_CONTROLS_ENABLE_MODULE_CLOCK_GATING	0x00000001
+#define VIVS_PM_POWER_CONTROLS_DISABLE_STALL_MODULE_CLOCK_GATING	0x00000002
+#define VIVS_PM_POWER_CONTROLS_DISABLE_STARVE_MODULE_CLOCK_GATING	0x00000004
+#define VIVS_PM_POWER_CONTROLS_TURN_ON_COUNTER__MASK		0x000000f0
+#define VIVS_PM_POWER_CONTROLS_TURN_ON_COUNTER__SHIFT		4
+#define VIVS_PM_POWER_CONTROLS_TURN_ON_COUNTER(x)		(((x) << VIVS_PM_POWER_CONTROLS_TURN_ON_COUNTER__SHIFT) & VIVS_PM_POWER_CONTROLS_TURN_ON_COUNTER__MASK)
+#define VIVS_PM_POWER_CONTROLS_TURN_OFF_COUNTER__MASK		0xffff0000
+#define VIVS_PM_POWER_CONTROLS_TURN_OFF_COUNTER__SHIFT		16
+#define VIVS_PM_POWER_CONTROLS_TURN_OFF_COUNTER(x)		(((x) << VIVS_PM_POWER_CONTROLS_TURN_OFF_COUNTER__SHIFT) & VIVS_PM_POWER_CONTROLS_TURN_OFF_COUNTER__MASK)
+
+#define VIVS_PM_MODULE_CONTROLS					0x00000104
+#define VIVS_PM_MODULE_CONTROLS_DISABLE_MODULE_CLOCK_GATING_FE	0x00000001
+#define VIVS_PM_MODULE_CONTROLS_DISABLE_MODULE_CLOCK_GATING_DE	0x00000002
+#define VIVS_PM_MODULE_CONTROLS_DISABLE_MODULE_CLOCK_GATING_PE	0x00000004
+
+#define VIVS_PM_MODULE_STATUS					0x00000108
+#define VIVS_PM_MODULE_STATUS_MODULE_CLOCK_GATED_FE		0x00000001
+#define VIVS_PM_MODULE_STATUS_MODULE_CLOCK_GATED_DE		0x00000002
+#define VIVS_PM_MODULE_STATUS_MODULE_CLOCK_GATED_PE		0x00000004
+
+#define VIVS_PM_PULSE_EATER					0x0000010c
+
+#define VIVS_MMUv2						0x00000000
+
+#define VIVS_MMUv2_SAFE_ADDRESS					0x00000180
+
+#define VIVS_MMUv2_CONFIGURATION				0x00000184
+#define VIVS_MMUv2_CONFIGURATION_MODE__MASK			0x00000001
+#define VIVS_MMUv2_CONFIGURATION_MODE__SHIFT			0
+#define VIVS_MMUv2_CONFIGURATION_MODE_MODE4_K			0x00000000
+#define VIVS_MMUv2_CONFIGURATION_MODE_MODE1_K			0x00000001
+#define VIVS_MMUv2_CONFIGURATION_MODE_MASK			0x00000008
+#define VIVS_MMUv2_CONFIGURATION_FLUSH__MASK			0x00000010
+#define VIVS_MMUv2_CONFIGURATION_FLUSH__SHIFT			4
+#define VIVS_MMUv2_CONFIGURATION_FLUSH_FLUSH			0x00000010
+#define VIVS_MMUv2_CONFIGURATION_FLUSH_MASK			0x00000080
+#define VIVS_MMUv2_CONFIGURATION_ADDRESS_MASK			0x00000100
+#define VIVS_MMUv2_CONFIGURATION_ADDRESS__MASK			0xfffffc00
+#define VIVS_MMUv2_CONFIGURATION_ADDRESS__SHIFT			10
+#define VIVS_MMUv2_CONFIGURATION_ADDRESS(x)			(((x) << VIVS_MMUv2_CONFIGURATION_ADDRESS__SHIFT) & VIVS_MMUv2_CONFIGURATION_ADDRESS__MASK)
+
+#define VIVS_MMUv2_STATUS					0x00000188
+#define VIVS_MMUv2_STATUS_EXCEPTION0__MASK			0x00000003
+#define VIVS_MMUv2_STATUS_EXCEPTION0__SHIFT			0
+#define VIVS_MMUv2_STATUS_EXCEPTION0(x)				(((x) << VIVS_MMUv2_STATUS_EXCEPTION0__SHIFT) & VIVS_MMUv2_STATUS_EXCEPTION0__MASK)
+#define VIVS_MMUv2_STATUS_EXCEPTION1__MASK			0x00000030
+#define VIVS_MMUv2_STATUS_EXCEPTION1__SHIFT			4
+#define VIVS_MMUv2_STATUS_EXCEPTION1(x)				(((x) << VIVS_MMUv2_STATUS_EXCEPTION1__SHIFT) & VIVS_MMUv2_STATUS_EXCEPTION1__MASK)
+#define VIVS_MMUv2_STATUS_EXCEPTION2__MASK			0x00000300
+#define VIVS_MMUv2_STATUS_EXCEPTION2__SHIFT			8
+#define VIVS_MMUv2_STATUS_EXCEPTION2(x)				(((x) << VIVS_MMUv2_STATUS_EXCEPTION2__SHIFT) & VIVS_MMUv2_STATUS_EXCEPTION2__MASK)
+#define VIVS_MMUv2_STATUS_EXCEPTION3__MASK			0x00003000
+#define VIVS_MMUv2_STATUS_EXCEPTION3__SHIFT			12
+#define VIVS_MMUv2_STATUS_EXCEPTION3(x)				(((x) << VIVS_MMUv2_STATUS_EXCEPTION3__SHIFT) & VIVS_MMUv2_STATUS_EXCEPTION3__MASK)
+
+#define VIVS_MMUv2_CONTROL					0x0000018c
+#define VIVS_MMUv2_CONTROL_ENABLE				0x00000001
+
+#define VIVS_MMUv2_EXCEPTION_ADDR(i0)			       (0x00000190 + 0x4*(i0))
+#define VIVS_MMUv2_EXCEPTION_ADDR__ESIZE			0x00000004
+#define VIVS_MMUv2_EXCEPTION_ADDR__LEN				0x00000004
+
+#define VIVS_MC							0x00000000
+
+#define VIVS_MC_MMU_FE_PAGE_TABLE				0x00000400
+
+#define VIVS_MC_MMU_TX_PAGE_TABLE				0x00000404
+
+#define VIVS_MC_MMU_PE_PAGE_TABLE				0x00000408
+
+#define VIVS_MC_MMU_PEZ_PAGE_TABLE				0x0000040c
+
+#define VIVS_MC_MMU_RA_PAGE_TABLE				0x00000410
+
+#define VIVS_MC_DEBUG_MEMORY					0x00000414
+#define VIVS_MC_DEBUG_MEMORY_SPECIAL_PATCH_GC320		0x00000008
+#define VIVS_MC_DEBUG_MEMORY_FAST_CLEAR_BYPASS			0x00100000
+#define VIVS_MC_DEBUG_MEMORY_COMPRESSION_BYPASS			0x00200000
+
+#define VIVS_MC_MEMORY_BASE_ADDR_RA				0x00000418
+
+#define VIVS_MC_MEMORY_BASE_ADDR_FE				0x0000041c
+
+#define VIVS_MC_MEMORY_BASE_ADDR_TX				0x00000420
+
+#define VIVS_MC_MEMORY_BASE_ADDR_PEZ				0x00000424
+
+#define VIVS_MC_MEMORY_BASE_ADDR_PE				0x00000428
+
+#define VIVS_MC_MEMORY_TIMING_CONTROL				0x0000042c
+
+#define VIVS_MC_MEMORY_FLUSH					0x00000430
+
+#define VIVS_MC_PROFILE_CYCLE_COUNTER				0x00000438
+
+#define VIVS_MC_DEBUG_READ0					0x0000043c
+
+#define VIVS_MC_DEBUG_READ1					0x00000440
+
+#define VIVS_MC_DEBUG_WRITE					0x00000444
+
+#define VIVS_MC_PROFILE_RA_READ					0x00000448
+
+#define VIVS_MC_PROFILE_TX_READ					0x0000044c
+
+#define VIVS_MC_PROFILE_FE_READ					0x00000450
+
+#define VIVS_MC_PROFILE_PE_READ					0x00000454
+
+#define VIVS_MC_PROFILE_DE_READ					0x00000458
+
+#define VIVS_MC_PROFILE_SH_READ					0x0000045c
+
+#define VIVS_MC_PROFILE_PA_READ					0x00000460
+
+#define VIVS_MC_PROFILE_SE_READ					0x00000464
+
+#define VIVS_MC_PROFILE_MC_READ					0x00000468
+
+#define VIVS_MC_PROFILE_HI_READ					0x0000046c
+
+#define VIVS_MC_PROFILE_CONFIG0					0x00000470
+#define VIVS_MC_PROFILE_CONFIG0_FE__MASK			0x0000000f
+#define VIVS_MC_PROFILE_CONFIG0_FE__SHIFT			0
+#define VIVS_MC_PROFILE_CONFIG0_FE_RESET			0x0000000f
+#define VIVS_MC_PROFILE_CONFIG0_DE__MASK			0x00000f00
+#define VIVS_MC_PROFILE_CONFIG0_DE__SHIFT			8
+#define VIVS_MC_PROFILE_CONFIG0_DE_RESET			0x00000f00
+#define VIVS_MC_PROFILE_CONFIG0_PE__MASK			0x000f0000
+#define VIVS_MC_PROFILE_CONFIG0_PE__SHIFT			16
+#define VIVS_MC_PROFILE_CONFIG0_PE_PIXEL_COUNT_KILLED_BY_COLOR_PIPE	0x00000000
+#define VIVS_MC_PROFILE_CONFIG0_PE_PIXEL_COUNT_KILLED_BY_DEPTH_PIPE	0x00010000
+#define VIVS_MC_PROFILE_CONFIG0_PE_PIXEL_COUNT_DRAWN_BY_COLOR_PIPE	0x00020000
+#define VIVS_MC_PROFILE_CONFIG0_PE_PIXEL_COUNT_DRAWN_BY_DEPTH_PIPE	0x00030000
+#define VIVS_MC_PROFILE_CONFIG0_PE_PIXELS_RENDERED_2D		0x000b0000
+#define VIVS_MC_PROFILE_CONFIG0_PE_RESET			0x000f0000
+#define VIVS_MC_PROFILE_CONFIG0_SH__MASK			0x0f000000
+#define VIVS_MC_PROFILE_CONFIG0_SH__SHIFT			24
+#define VIVS_MC_PROFILE_CONFIG0_SH_SHADER_CYCLES		0x04000000
+#define VIVS_MC_PROFILE_CONFIG0_SH_PS_INST_COUNTER		0x07000000
+#define VIVS_MC_PROFILE_CONFIG0_SH_RENDERED_PIXEL_COUNTER	0x08000000
+#define VIVS_MC_PROFILE_CONFIG0_SH_VS_INST_COUNTER		0x09000000
+#define VIVS_MC_PROFILE_CONFIG0_SH_RENDERED_VERTICE_COUNTER	0x0a000000
+#define VIVS_MC_PROFILE_CONFIG0_SH_VTX_BRANCH_INST_COUNTER	0x0b000000
+#define VIVS_MC_PROFILE_CONFIG0_SH_VTX_TEXLD_INST_COUNTER	0x0c000000
+#define VIVS_MC_PROFILE_CONFIG0_SH_PXL_BRANCH_INST_COUNTER	0x0d000000
+#define VIVS_MC_PROFILE_CONFIG0_SH_PXL_TEXLD_INST_COUNTER	0x0e000000
+#define VIVS_MC_PROFILE_CONFIG0_SH_RESET			0x0f000000
+
+#define VIVS_MC_PROFILE_CONFIG1					0x00000474
+#define VIVS_MC_PROFILE_CONFIG1_PA__MASK			0x0000000f
+#define VIVS_MC_PROFILE_CONFIG1_PA__SHIFT			0
+#define VIVS_MC_PROFILE_CONFIG1_PA_INPUT_VTX_COUNTER		0x00000003
+#define VIVS_MC_PROFILE_CONFIG1_PA_INPUT_PRIM_COUNTER		0x00000004
+#define VIVS_MC_PROFILE_CONFIG1_PA_OUTPUT_PRIM_COUNTER		0x00000005
+#define VIVS_MC_PROFILE_CONFIG1_PA_DEPTH_CLIPPED_COUNTER	0x00000006
+#define VIVS_MC_PROFILE_CONFIG1_PA_TRIVIAL_REJECTED_COUNTER	0x00000007
+#define VIVS_MC_PROFILE_CONFIG1_PA_CULLED_COUNTER		0x00000008
+#define VIVS_MC_PROFILE_CONFIG1_PA_RESET			0x0000000f
+#define VIVS_MC_PROFILE_CONFIG1_SE__MASK			0x00000f00
+#define VIVS_MC_PROFILE_CONFIG1_SE__SHIFT			8
+#define VIVS_MC_PROFILE_CONFIG1_SE_CULLED_TRIANGLE_COUNT	0x00000000
+#define VIVS_MC_PROFILE_CONFIG1_SE_CULLED_LINES_COUNT		0x00000100
+#define VIVS_MC_PROFILE_CONFIG1_SE_RESET			0x00000f00
+#define VIVS_MC_PROFILE_CONFIG1_RA__MASK			0x000f0000
+#define VIVS_MC_PROFILE_CONFIG1_RA__SHIFT			16
+#define VIVS_MC_PROFILE_CONFIG1_RA_VALID_PIXEL_COUNT		0x00000000
+#define VIVS_MC_PROFILE_CONFIG1_RA_TOTAL_QUAD_COUNT		0x00010000
+#define VIVS_MC_PROFILE_CONFIG1_RA_VALID_QUAD_COUNT_AFTER_EARLY_Z	0x00020000
+#define VIVS_MC_PROFILE_CONFIG1_RA_TOTAL_PRIMITIVE_COUNT	0x00030000
+#define VIVS_MC_PROFILE_CONFIG1_RA_PIPE_CACHE_MISS_COUNTER	0x00090000
+#define VIVS_MC_PROFILE_CONFIG1_RA_PREFETCH_CACHE_MISS_COUNTER	0x000a0000
+#define VIVS_MC_PROFILE_CONFIG1_RA_CULLED_QUAD_COUNT		0x000b0000
+#define VIVS_MC_PROFILE_CONFIG1_RA_RESET			0x000f0000
+#define VIVS_MC_PROFILE_CONFIG1_TX__MASK			0x0f000000
+#define VIVS_MC_PROFILE_CONFIG1_TX__SHIFT			24
+#define VIVS_MC_PROFILE_CONFIG1_TX_TOTAL_BILINEAR_REQUESTS	0x00000000
+#define VIVS_MC_PROFILE_CONFIG1_TX_TOTAL_TRILINEAR_REQUESTS	0x01000000
+#define VIVS_MC_PROFILE_CONFIG1_TX_TOTAL_DISCARDED_TEXTURE_REQUESTS	0x02000000
+#define VIVS_MC_PROFILE_CONFIG1_TX_TOTAL_TEXTURE_REQUESTS	0x03000000
+#define VIVS_MC_PROFILE_CONFIG1_TX_UNKNOWN			0x04000000
+#define VIVS_MC_PROFILE_CONFIG1_TX_MEM_READ_COUNT		0x05000000
+#define VIVS_MC_PROFILE_CONFIG1_TX_MEM_READ_IN_8B_COUNT		0x06000000
+#define VIVS_MC_PROFILE_CONFIG1_TX_CACHE_MISS_COUNT		0x07000000
+#define VIVS_MC_PROFILE_CONFIG1_TX_CACHE_HIT_TEXEL_COUNT	0x08000000
+#define VIVS_MC_PROFILE_CONFIG1_TX_CACHE_MISS_TEXEL_COUNT	0x09000000
+#define VIVS_MC_PROFILE_CONFIG1_TX_RESET			0x0f000000
+
+#define VIVS_MC_PROFILE_CONFIG2					0x00000478
+#define VIVS_MC_PROFILE_CONFIG2_MC__MASK			0x0000000f
+#define VIVS_MC_PROFILE_CONFIG2_MC__SHIFT			0
+#define VIVS_MC_PROFILE_CONFIG2_MC_TOTAL_READ_REQ_8B_FROM_PIPELINE	0x00000001
+#define VIVS_MC_PROFILE_CONFIG2_MC_TOTAL_READ_REQ_8B_FROM_IP	0x00000002
+#define VIVS_MC_PROFILE_CONFIG2_MC_TOTAL_WRITE_REQ_8B_FROM_PIPELINE	0x00000003
+#define VIVS_MC_PROFILE_CONFIG2_MC_RESET			0x0000000f
+#define VIVS_MC_PROFILE_CONFIG2_HI__MASK			0x00000f00
+#define VIVS_MC_PROFILE_CONFIG2_HI__SHIFT			8
+#define VIVS_MC_PROFILE_CONFIG2_HI_AXI_CYCLES_READ_REQUEST_STALLED	0x00000000
+#define VIVS_MC_PROFILE_CONFIG2_HI_AXI_CYCLES_WRITE_REQUEST_STALLED	0x00000100
+#define VIVS_MC_PROFILE_CONFIG2_HI_AXI_CYCLES_WRITE_DATA_STALLED	0x00000200
+#define VIVS_MC_PROFILE_CONFIG2_HI_RESET			0x00000f00
+
+#define VIVS_MC_PROFILE_CONFIG3					0x0000047c
+
+#define VIVS_MC_BUS_CONFIG					0x00000480
+#define VIVS_MC_BUS_CONFIG_FE_BUS_CONFIG__MASK			0x0000000f
+#define VIVS_MC_BUS_CONFIG_FE_BUS_CONFIG__SHIFT			0
+#define VIVS_MC_BUS_CONFIG_FE_BUS_CONFIG(x)			(((x) << VIVS_MC_BUS_CONFIG_FE_BUS_CONFIG__SHIFT) & VIVS_MC_BUS_CONFIG_FE_BUS_CONFIG__MASK)
+#define VIVS_MC_BUS_CONFIG_TX_BUS_CONFIG__MASK			0x000000f0
+#define VIVS_MC_BUS_CONFIG_TX_BUS_CONFIG__SHIFT			4
+#define VIVS_MC_BUS_CONFIG_TX_BUS_CONFIG(x)			(((x) << VIVS_MC_BUS_CONFIG_TX_BUS_CONFIG__SHIFT) & VIVS_MC_BUS_CONFIG_TX_BUS_CONFIG__MASK)
+
+#define VIVS_MC_START_COMPOSITION				0x00000554
+
+#define VIVS_MC_128B_MERGE					0x00000558
+
+
+#endif /* STATE_HI_XML */
diff --git a/drivers/gpu/drm/exynos/Kconfig b/drivers/gpu/drm/exynos/Kconfig
index 96e86cf4455b..83efca941388 100644
--- a/drivers/gpu/drm/exynos/Kconfig
+++ b/drivers/gpu/drm/exynos/Kconfig
@@ -118,7 +118,7 @@ config DRM_EXYNOS_ROTATOR
 
 config DRM_EXYNOS_GSC
 	bool "GScaler"
-	depends on DRM_EXYNOS_IPP && ARCH_EXYNOS5 && !ARCH_MULTIPLATFORM
+	depends on DRM_EXYNOS_IPP && ARCH_EXYNOS5 && !VIDEO_SAMSUNG_EXYNOS_GSC
 	help
 	  Choose this option if you want to use Exynos GSC for DRM.
 
diff --git a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c
index fbe1b3174f75..c7362b99ce28 100644
--- a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c
+++ b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c
@@ -21,11 +21,11 @@
 
 #include "exynos_drm_drv.h"
 #include "exynos_drm_crtc.h"
+#include "exynos_drm_fb.h"
 #include "exynos_drm_plane.h"
 #include "exynos_drm_iommu.h"
 
 #define WINDOWS_NR	3
-#define CURSOR_WIN	2
 #define MIN_FB_WIDTH_FOR_16WORD_BURST	128
 
 static const char * const decon_clks_name[] = {
@@ -56,6 +56,7 @@ struct decon_context {
 	struct drm_device		*drm_dev;
 	struct exynos_drm_crtc		*crtc;
 	struct exynos_drm_plane		planes[WINDOWS_NR];
+	struct exynos_drm_plane_config	configs[WINDOWS_NR];
 	void __iomem			*addr;
 	struct clk			*clks[ARRAY_SIZE(decon_clks_name)];
 	int				pipe;
@@ -71,6 +72,12 @@ static const uint32_t decon_formats[] = {
 	DRM_FORMAT_ARGB8888,
 };
 
+static const enum drm_plane_type decon_win_types[WINDOWS_NR] = {
+	DRM_PLANE_TYPE_PRIMARY,
+	DRM_PLANE_TYPE_OVERLAY,
+	DRM_PLANE_TYPE_CURSOR,
+};
+
 static inline void decon_set_bits(struct decon_context *ctx, u32 reg, u32 mask,
 				  u32 val)
 {
@@ -259,21 +266,24 @@ static void decon_atomic_begin(struct exynos_drm_crtc *crtc,
 static void decon_update_plane(struct exynos_drm_crtc *crtc,
 			       struct exynos_drm_plane *plane)
 {
+	struct exynos_drm_plane_state *state =
+				to_exynos_plane_state(plane->base.state);
 	struct decon_context *ctx = crtc->ctx;
-	struct drm_plane_state *state = plane->base.state;
+	struct drm_framebuffer *fb = state->base.fb;
 	unsigned int win = plane->zpos;
-	unsigned int bpp = state->fb->bits_per_pixel >> 3;
-	unsigned int pitch = state->fb->pitches[0];
+	unsigned int bpp = fb->bits_per_pixel >> 3;
+	unsigned int pitch = fb->pitches[0];
+	dma_addr_t dma_addr = exynos_drm_fb_dma_addr(fb, 0);
 	u32 val;
 
 	if (test_bit(BIT_SUSPENDED, &ctx->flags))
 		return;
 
-	val = COORDINATE_X(plane->crtc_x) | COORDINATE_Y(plane->crtc_y);
+	val = COORDINATE_X(state->crtc.x) | COORDINATE_Y(state->crtc.y);
 	writel(val, ctx->addr + DECON_VIDOSDxA(win));
 
-	val = COORDINATE_X(plane->crtc_x + plane->crtc_w - 1) |
-		COORDINATE_Y(plane->crtc_y + plane->crtc_h - 1);
+	val = COORDINATE_X(state->crtc.x + state->crtc.w - 1) |
+		COORDINATE_Y(state->crtc.y + state->crtc.h - 1);
 	writel(val, ctx->addr + DECON_VIDOSDxB(win));
 
 	val = VIDOSD_Wx_ALPHA_R_F(0x0) | VIDOSD_Wx_ALPHA_G_F(0x0) |
@@ -284,20 +294,20 @@ static void decon_update_plane(struct exynos_drm_crtc *crtc,
 		VIDOSD_Wx_ALPHA_B_F(0x0);
 	writel(val, ctx->addr + DECON_VIDOSDxD(win));
 
-	writel(plane->dma_addr[0], ctx->addr + DECON_VIDW0xADD0B0(win));
+	writel(dma_addr, ctx->addr + DECON_VIDW0xADD0B0(win));
 
-	val = plane->dma_addr[0] + pitch * plane->crtc_h;
+	val = dma_addr + pitch * state->src.h;
 	writel(val, ctx->addr + DECON_VIDW0xADD1B0(win));
 
 	if (ctx->out_type != IFTYPE_HDMI)
-		val = BIT_VAL(pitch - plane->crtc_w * bpp, 27, 14)
-			| BIT_VAL(plane->crtc_w * bpp, 13, 0);
+		val = BIT_VAL(pitch - state->crtc.w * bpp, 27, 14)
+			| BIT_VAL(state->crtc.w * bpp, 13, 0);
 	else
-		val = BIT_VAL(pitch - plane->crtc_w * bpp, 29, 15)
-			| BIT_VAL(plane->crtc_w * bpp, 14, 0);
+		val = BIT_VAL(pitch - state->crtc.w * bpp, 29, 15)
+			| BIT_VAL(state->crtc.w * bpp, 14, 0);
 	writel(val, ctx->addr + DECON_VIDW0xADD2(win));
 
-	decon_win_set_pixfmt(ctx, win, state->fb);
+	decon_win_set_pixfmt(ctx, win, fb);
 
 	/* window enable */
 	decon_set_bits(ctx, DECON_WINCONx(win), WINCONx_ENWIN_F, ~0);
@@ -377,20 +387,12 @@ static void decon_swreset(struct decon_context *ctx)
 static void decon_enable(struct exynos_drm_crtc *crtc)
 {
 	struct decon_context *ctx = crtc->ctx;
-	int ret;
-	int i;
 
 	if (!test_and_clear_bit(BIT_SUSPENDED, &ctx->flags))
 		return;
 
 	pm_runtime_get_sync(ctx->dev);
 
-	for (i = 0; i < ARRAY_SIZE(decon_clks_name); i++) {
-		ret = clk_prepare_enable(ctx->clks[i]);
-		if (ret < 0)
-			goto err;
-	}
-
 	set_bit(BIT_CLKS_ENABLED, &ctx->flags);
 
 	/* if vblank was enabled status, enable it again. */
@@ -399,11 +401,6 @@ static void decon_enable(struct exynos_drm_crtc *crtc)
 
 	decon_commit(ctx->crtc);
 
-	return;
-err:
-	while (--i >= 0)
-		clk_disable_unprepare(ctx->clks[i]);
-
 	set_bit(BIT_SUSPENDED, &ctx->flags);
 }
 
@@ -425,9 +422,6 @@ static void decon_disable(struct exynos_drm_crtc *crtc)
 
 	decon_swreset(ctx);
 
-	for (i = 0; i < ARRAY_SIZE(decon_clks_name); i++)
-		clk_disable_unprepare(ctx->clks[i]);
-
 	clear_bit(BIT_CLKS_ENABLED, &ctx->flags);
 
 	pm_runtime_put_sync(ctx->dev);
@@ -478,7 +472,6 @@ err:
 static struct exynos_drm_crtc_ops decon_crtc_ops = {
 	.enable			= decon_enable,
 	.disable		= decon_disable,
-	.commit			= decon_commit,
 	.enable_vblank		= decon_enable_vblank,
 	.disable_vblank		= decon_disable_vblank,
 	.atomic_begin		= decon_atomic_begin,
@@ -495,7 +488,6 @@ static int decon_bind(struct device *dev, struct device *master, void *data)
 	struct exynos_drm_private *priv = drm_dev->dev_private;
 	struct exynos_drm_plane *exynos_plane;
 	enum exynos_drm_output_type out_type;
-	enum drm_plane_type type;
 	unsigned int win;
 	int ret;
 
@@ -505,10 +497,13 @@ static int decon_bind(struct device *dev, struct device *master, void *data)
 	for (win = ctx->first_win; win < WINDOWS_NR; win++) {
 		int tmp = (win == ctx->first_win) ? 0 : win;
 
-		type = exynos_plane_get_type(tmp, CURSOR_WIN);
+		ctx->configs[win].pixel_formats = decon_formats;
+		ctx->configs[win].num_pixel_formats = ARRAY_SIZE(decon_formats);
+		ctx->configs[win].zpos = win;
+		ctx->configs[win].type = decon_win_types[tmp];
+
 		ret = exynos_plane_init(drm_dev, &ctx->planes[win],
-				1 << ctx->pipe, type, decon_formats,
-				ARRAY_SIZE(decon_formats), win);
+					1 << ctx->pipe, &ctx->configs[win]);
 		if (ret)
 			return ret;
 	}
@@ -581,6 +576,44 @@ out:
 	return IRQ_HANDLED;
 }
 
+#ifdef CONFIG_PM
+static int exynos5433_decon_suspend(struct device *dev)
+{
+	struct decon_context *ctx = dev_get_drvdata(dev);
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(decon_clks_name); i++)
+		clk_disable_unprepare(ctx->clks[i]);
+
+	return 0;
+}
+
+static int exynos5433_decon_resume(struct device *dev)
+{
+	struct decon_context *ctx = dev_get_drvdata(dev);
+	int i, ret;
+
+	for (i = 0; i < ARRAY_SIZE(decon_clks_name); i++) {
+		ret = clk_prepare_enable(ctx->clks[i]);
+		if (ret < 0)
+			goto err;
+	}
+
+	return 0;
+
+err:
+	while (--i >= 0)
+		clk_disable_unprepare(ctx->clks[i]);
+
+	return ret;
+}
+#endif
+
+static const struct dev_pm_ops exynos5433_decon_pm_ops = {
+	SET_RUNTIME_PM_OPS(exynos5433_decon_suspend, exynos5433_decon_resume,
+			   NULL)
+};
+
 static const struct of_device_id exynos5433_decon_driver_dt_match[] = {
 	{
 		.compatible = "samsung,exynos5433-decon",
@@ -684,6 +717,7 @@ struct platform_driver exynos5433_decon_driver = {
 	.remove		= exynos5433_decon_remove,
 	.driver		= {
 		.name	= "exynos5433-decon",
+		.pm	= &exynos5433_decon_pm_ops,
 		.of_match_table = exynos5433_decon_driver_dt_match,
 	},
 };
diff --git a/drivers/gpu/drm/exynos/exynos7_drm_decon.c b/drivers/gpu/drm/exynos/exynos7_drm_decon.c
index ead2b16e237d..c47f9af8170b 100644
--- a/drivers/gpu/drm/exynos/exynos7_drm_decon.c
+++ b/drivers/gpu/drm/exynos/exynos7_drm_decon.c
@@ -30,6 +30,7 @@
 #include "exynos_drm_crtc.h"
 #include "exynos_drm_plane.h"
 #include "exynos_drm_drv.h"
+#include "exynos_drm_fb.h"
 #include "exynos_drm_fbdev.h"
 #include "exynos_drm_iommu.h"
 
@@ -40,13 +41,13 @@
 #define MIN_FB_WIDTH_FOR_16WORD_BURST 128
 
 #define WINDOWS_NR	2
-#define CURSOR_WIN	1
 
 struct decon_context {
 	struct device			*dev;
 	struct drm_device		*drm_dev;
 	struct exynos_drm_crtc		*crtc;
 	struct exynos_drm_plane		planes[WINDOWS_NR];
+	struct exynos_drm_plane_config	configs[WINDOWS_NR];
 	struct clk			*pclk;
 	struct clk			*aclk;
 	struct clk			*eclk;
@@ -81,6 +82,11 @@ static const uint32_t decon_formats[] = {
 	DRM_FORMAT_BGRA8888,
 };
 
+static const enum drm_plane_type decon_win_types[WINDOWS_NR] = {
+	DRM_PLANE_TYPE_PRIMARY,
+	DRM_PLANE_TYPE_CURSOR,
+};
+
 static void decon_wait_for_vblank(struct exynos_drm_crtc *crtc)
 {
 	struct decon_context *ctx = crtc->ctx;
@@ -119,13 +125,8 @@ static void decon_clear_channels(struct exynos_drm_crtc *crtc)
 	}
 
 	/* Wait for vsync, as disable channel takes effect at next vsync */
-	if (ch_enabled) {
-		unsigned int state = ctx->suspended;
-
-		ctx->suspended = 0;
+	if (ch_enabled)
 		decon_wait_for_vblank(ctx->crtc);
-		ctx->suspended = state;
-	}
 }
 
 static int decon_ctx_initialize(struct decon_context *ctx,
@@ -398,16 +399,17 @@ static void decon_atomic_begin(struct exynos_drm_crtc *crtc,
 static void decon_update_plane(struct exynos_drm_crtc *crtc,
 			       struct exynos_drm_plane *plane)
 {
+	struct exynos_drm_plane_state *state =
+				to_exynos_plane_state(plane->base.state);
 	struct decon_context *ctx = crtc->ctx;
-	struct drm_display_mode *mode = &crtc->base.state->adjusted_mode;
-	struct drm_plane_state *state = plane->base.state;
+	struct drm_framebuffer *fb = state->base.fb;
 	int padding;
 	unsigned long val, alpha;
 	unsigned int last_x;
 	unsigned int last_y;
 	unsigned int win = plane->zpos;
-	unsigned int bpp = state->fb->bits_per_pixel >> 3;
-	unsigned int pitch = state->fb->pitches[0];
+	unsigned int bpp = fb->bits_per_pixel >> 3;
+	unsigned int pitch = fb->pitches[0];
 
 	if (ctx->suspended)
 		return;
@@ -423,41 +425,32 @@ static void decon_update_plane(struct exynos_drm_crtc *crtc,
 	 */
 
 	/* buffer start address */
-	val = (unsigned long)plane->dma_addr[0];
+	val = (unsigned long)exynos_drm_fb_dma_addr(fb, 0);
 	writel(val, ctx->regs + VIDW_BUF_START(win));
 
-	padding = (pitch / bpp) - state->fb->width;
+	padding = (pitch / bpp) - fb->width;
 
 	/* buffer size */
-	writel(state->fb->width + padding, ctx->regs + VIDW_WHOLE_X(win));
-	writel(state->fb->height, ctx->regs + VIDW_WHOLE_Y(win));
+	writel(fb->width + padding, ctx->regs + VIDW_WHOLE_X(win));
+	writel(fb->height, ctx->regs + VIDW_WHOLE_Y(win));
 
 	/* offset from the start of the buffer to read */
-	writel(plane->src_x, ctx->regs + VIDW_OFFSET_X(win));
-	writel(plane->src_y, ctx->regs + VIDW_OFFSET_Y(win));
+	writel(state->src.x, ctx->regs + VIDW_OFFSET_X(win));
+	writel(state->src.y, ctx->regs + VIDW_OFFSET_Y(win));
 
 	DRM_DEBUG_KMS("start addr = 0x%lx\n",
 			(unsigned long)val);
 	DRM_DEBUG_KMS("ovl_width = %d, ovl_height = %d\n",
-			plane->crtc_w, plane->crtc_h);
+			state->crtc.w, state->crtc.h);
 
-	/*
-	 * OSD position.
-	 * In case the window layout goes of LCD layout, DECON fails.
-	 */
-	if ((plane->crtc_x + plane->crtc_w) > mode->hdisplay)
-		plane->crtc_x = mode->hdisplay - plane->crtc_w;
-	if ((plane->crtc_y + plane->crtc_h) > mode->vdisplay)
-		plane->crtc_y = mode->vdisplay - plane->crtc_h;
-
-	val = VIDOSDxA_TOPLEFT_X(plane->crtc_x) |
-		VIDOSDxA_TOPLEFT_Y(plane->crtc_y);
+	val = VIDOSDxA_TOPLEFT_X(state->crtc.x) |
+		VIDOSDxA_TOPLEFT_Y(state->crtc.y);
 	writel(val, ctx->regs + VIDOSD_A(win));
 
-	last_x = plane->crtc_x + plane->crtc_w;
+	last_x = state->crtc.x + state->crtc.w;
 	if (last_x)
 		last_x--;
-	last_y = plane->crtc_y + plane->crtc_h;
+	last_y = state->crtc.y + state->crtc.h;
 	if (last_y)
 		last_y--;
 
@@ -466,7 +459,7 @@ static void decon_update_plane(struct exynos_drm_crtc *crtc,
 	writel(val, ctx->regs + VIDOSD_B(win));
 
 	DRM_DEBUG_KMS("osd pos: tx = %d, ty = %d, bx = %d, by = %d\n",
-			plane->crtc_x, plane->crtc_y, last_x, last_y);
+			state->crtc.x, state->crtc.y, last_x, last_y);
 
 	/* OSD alpha */
 	alpha = VIDOSDxC_ALPHA0_R_F(0x0) |
@@ -481,7 +474,7 @@ static void decon_update_plane(struct exynos_drm_crtc *crtc,
 
 	writel(alpha, ctx->regs + VIDOSD_D(win));
 
-	decon_win_set_pixfmt(ctx, win, state->fb);
+	decon_win_set_pixfmt(ctx, win, fb);
 
 	/* hardware window 0 doesn't support color key. */
 	if (win != 0)
@@ -555,39 +548,12 @@ static void decon_init(struct decon_context *ctx)
 static void decon_enable(struct exynos_drm_crtc *crtc)
 {
 	struct decon_context *ctx = crtc->ctx;
-	int ret;
 
 	if (!ctx->suspended)
 		return;
 
-	ctx->suspended = false;
-
 	pm_runtime_get_sync(ctx->dev);
 
-	ret = clk_prepare_enable(ctx->pclk);
-	if (ret < 0) {
-		DRM_ERROR("Failed to prepare_enable the pclk [%d]\n", ret);
-		return;
-	}
-
-	ret = clk_prepare_enable(ctx->aclk);
-	if (ret < 0) {
-		DRM_ERROR("Failed to prepare_enable the aclk [%d]\n", ret);
-		return;
-	}
-
-	ret = clk_prepare_enable(ctx->eclk);
-	if  (ret < 0) {
-		DRM_ERROR("Failed to prepare_enable the eclk [%d]\n", ret);
-		return;
-	}
-
-	ret = clk_prepare_enable(ctx->vclk);
-	if  (ret < 0) {
-		DRM_ERROR("Failed to prepare_enable the vclk [%d]\n", ret);
-		return;
-	}
-
 	decon_init(ctx);
 
 	/* if vblank was enabled status, enable it again. */
@@ -595,6 +561,8 @@ static void decon_enable(struct exynos_drm_crtc *crtc)
 		decon_enable_vblank(ctx->crtc);
 
 	decon_commit(ctx->crtc);
+
+	ctx->suspended = false;
 }
 
 static void decon_disable(struct exynos_drm_crtc *crtc)
@@ -613,11 +581,6 @@ static void decon_disable(struct exynos_drm_crtc *crtc)
 	for (i = 0; i < WINDOWS_NR; i++)
 		decon_disable_plane(crtc, &ctx->planes[i]);
 
-	clk_disable_unprepare(ctx->vclk);
-	clk_disable_unprepare(ctx->eclk);
-	clk_disable_unprepare(ctx->aclk);
-	clk_disable_unprepare(ctx->pclk);
-
 	pm_runtime_put_sync(ctx->dev);
 
 	ctx->suspended = true;
@@ -679,8 +642,7 @@ static int decon_bind(struct device *dev, struct device *master, void *data)
 	struct decon_context *ctx = dev_get_drvdata(dev);
 	struct drm_device *drm_dev = data;
 	struct exynos_drm_plane *exynos_plane;
-	enum drm_plane_type type;
-	unsigned int zpos;
+	unsigned int i;
 	int ret;
 
 	ret = decon_ctx_initialize(ctx, drm_dev);
@@ -689,11 +651,14 @@ static int decon_bind(struct device *dev, struct device *master, void *data)
 		return ret;
 	}
 
-	for (zpos = 0; zpos < WINDOWS_NR; zpos++) {
-		type = exynos_plane_get_type(zpos, CURSOR_WIN);
-		ret = exynos_plane_init(drm_dev, &ctx->planes[zpos],
-					1 << ctx->pipe, type, decon_formats,
-					ARRAY_SIZE(decon_formats), zpos);
+	for (i = 0; i < WINDOWS_NR; i++) {
+		ctx->configs[i].pixel_formats = decon_formats;
+		ctx->configs[i].num_pixel_formats = ARRAY_SIZE(decon_formats);
+		ctx->configs[i].zpos = i;
+		ctx->configs[i].type = decon_win_types[i];
+
+		ret = exynos_plane_init(drm_dev, &ctx->planes[i],
+					1 << ctx->pipe, &ctx->configs[i]);
 		if (ret)
 			return ret;
 	}
@@ -843,11 +808,63 @@ static int decon_remove(struct platform_device *pdev)
 	return 0;
 }
 
+#ifdef CONFIG_PM
+static int exynos7_decon_suspend(struct device *dev)
+{
+	struct decon_context *ctx = dev_get_drvdata(dev);
+
+	clk_disable_unprepare(ctx->vclk);
+	clk_disable_unprepare(ctx->eclk);
+	clk_disable_unprepare(ctx->aclk);
+	clk_disable_unprepare(ctx->pclk);
+
+	return 0;
+}
+
+static int exynos7_decon_resume(struct device *dev)
+{
+	struct decon_context *ctx = dev_get_drvdata(dev);
+	int ret;
+
+	ret = clk_prepare_enable(ctx->pclk);
+	if (ret < 0) {
+		DRM_ERROR("Failed to prepare_enable the pclk [%d]\n", ret);
+		return ret;
+	}
+
+	ret = clk_prepare_enable(ctx->aclk);
+	if (ret < 0) {
+		DRM_ERROR("Failed to prepare_enable the aclk [%d]\n", ret);
+		return ret;
+	}
+
+	ret = clk_prepare_enable(ctx->eclk);
+	if  (ret < 0) {
+		DRM_ERROR("Failed to prepare_enable the eclk [%d]\n", ret);
+		return ret;
+	}
+
+	ret = clk_prepare_enable(ctx->vclk);
+	if  (ret < 0) {
+		DRM_ERROR("Failed to prepare_enable the vclk [%d]\n", ret);
+		return ret;
+	}
+
+	return 0;
+}
+#endif
+
+static const struct dev_pm_ops exynos7_decon_pm_ops = {
+	SET_RUNTIME_PM_OPS(exynos7_decon_suspend, exynos7_decon_resume,
+			   NULL)
+};
+
 struct platform_driver decon_driver = {
 	.probe		= decon_probe,
 	.remove		= decon_remove,
 	.driver		= {
 		.name	= "exynos-decon",
+		.pm	= &exynos7_decon_pm_ops,
 		.of_match_table = decon_driver_dt_match,
 	},
 };
diff --git a/drivers/gpu/drm/exynos/exynos_dp_core.c b/drivers/gpu/drm/exynos/exynos_dp_core.c
index 124fb9a56f02..b79c316c2ad2 100644
--- a/drivers/gpu/drm/exynos/exynos_dp_core.c
+++ b/drivers/gpu/drm/exynos/exynos_dp_core.c
@@ -953,7 +953,7 @@ static void exynos_dp_connector_destroy(struct drm_connector *connector)
 	drm_connector_cleanup(connector);
 }
 
-static struct drm_connector_funcs exynos_dp_connector_funcs = {
+static const struct drm_connector_funcs exynos_dp_connector_funcs = {
 	.dpms = drm_atomic_helper_connector_dpms,
 	.fill_modes = drm_helper_probe_single_connector_modes,
 	.detect = exynos_dp_detect,
@@ -998,7 +998,7 @@ static struct drm_encoder *exynos_dp_best_encoder(
 	return &dp->encoder;
 }
 
-static struct drm_connector_helper_funcs exynos_dp_connector_helper_funcs = {
+static const struct drm_connector_helper_funcs exynos_dp_connector_helper_funcs = {
 	.get_modes = exynos_dp_get_modes,
 	.best_encoder = exynos_dp_best_encoder,
 };
@@ -1009,9 +1009,9 @@ static int exynos_drm_attach_lcd_bridge(struct exynos_dp_device *dp,
 {
 	int ret;
 
-	encoder->bridge = dp->bridge;
-	dp->bridge->encoder = encoder;
-	ret = drm_bridge_attach(encoder->dev, dp->bridge);
+	encoder->bridge->next = dp->ptn_bridge;
+	dp->ptn_bridge->encoder = encoder;
+	ret = drm_bridge_attach(encoder->dev, dp->ptn_bridge);
 	if (ret) {
 		DRM_ERROR("Failed to attach bridge to drm\n");
 		return ret;
@@ -1020,14 +1020,15 @@ static int exynos_drm_attach_lcd_bridge(struct exynos_dp_device *dp,
 	return 0;
 }
 
-static int exynos_dp_create_connector(struct drm_encoder *encoder)
+static int exynos_dp_bridge_attach(struct drm_bridge *bridge)
 {
-	struct exynos_dp_device *dp = encoder_to_dp(encoder);
+	struct exynos_dp_device *dp = bridge->driver_private;
+	struct drm_encoder *encoder = &dp->encoder;
 	struct drm_connector *connector = &dp->connector;
 	int ret;
 
 	/* Pre-empt DP connector creation if there's a bridge */
-	if (dp->bridge) {
+	if (dp->ptn_bridge) {
 		ret = exynos_drm_attach_lcd_bridge(dp, encoder);
 		if (!ret)
 			return 0;
@@ -1052,27 +1053,16 @@ static int exynos_dp_create_connector(struct drm_encoder *encoder)
 	return ret;
 }
 
-static bool exynos_dp_mode_fixup(struct drm_encoder *encoder,
-				 const struct drm_display_mode *mode,
-				 struct drm_display_mode *adjusted_mode)
-{
-	return true;
-}
-
-static void exynos_dp_mode_set(struct drm_encoder *encoder,
-			       struct drm_display_mode *mode,
-			       struct drm_display_mode *adjusted_mode)
-{
-}
-
-static void exynos_dp_enable(struct drm_encoder *encoder)
+static void exynos_dp_bridge_enable(struct drm_bridge *bridge)
 {
-	struct exynos_dp_device *dp = encoder_to_dp(encoder);
+	struct exynos_dp_device *dp = bridge->driver_private;
 	struct exynos_drm_crtc *crtc = dp_to_crtc(dp);
 
 	if (dp->dpms_mode == DRM_MODE_DPMS_ON)
 		return;
 
+	pm_runtime_get_sync(dp->dev);
+
 	if (dp->panel) {
 		if (drm_panel_prepare(dp->panel)) {
 			DRM_ERROR("failed to setup the panel\n");
@@ -1083,7 +1073,6 @@ static void exynos_dp_enable(struct drm_encoder *encoder)
 	if (crtc->ops->clock_enable)
 		crtc->ops->clock_enable(dp_to_crtc(dp), true);
 
-	clk_prepare_enable(dp->clock);
 	phy_power_on(dp->phy);
 	exynos_dp_init_dp(dp);
 	enable_irq(dp->irq);
@@ -1092,9 +1081,9 @@ static void exynos_dp_enable(struct drm_encoder *encoder)
 	dp->dpms_mode = DRM_MODE_DPMS_ON;
 }
 
-static void exynos_dp_disable(struct drm_encoder *encoder)
+static void exynos_dp_bridge_disable(struct drm_bridge *bridge)
 {
-	struct exynos_dp_device *dp = encoder_to_dp(encoder);
+	struct exynos_dp_device *dp = bridge->driver_private;
 	struct exynos_drm_crtc *crtc = dp_to_crtc(dp);
 
 	if (dp->dpms_mode != DRM_MODE_DPMS_ON)
@@ -1110,7 +1099,6 @@ static void exynos_dp_disable(struct drm_encoder *encoder)
 	disable_irq(dp->irq);
 	flush_work(&dp->hotplug_work);
 	phy_power_off(dp->phy);
-	clk_disable_unprepare(dp->clock);
 
 	if (crtc->ops->clock_enable)
 		crtc->ops->clock_enable(dp_to_crtc(dp), false);
@@ -1120,17 +1108,82 @@ static void exynos_dp_disable(struct drm_encoder *encoder)
 			DRM_ERROR("failed to turnoff the panel\n");
 	}
 
+	pm_runtime_put_sync(dp->dev);
+
 	dp->dpms_mode = DRM_MODE_DPMS_OFF;
 }
 
-static struct drm_encoder_helper_funcs exynos_dp_encoder_helper_funcs = {
+static void exynos_dp_bridge_nop(struct drm_bridge *bridge)
+{
+	/* do nothing */
+}
+
+static const struct drm_bridge_funcs exynos_dp_bridge_funcs = {
+	.enable = exynos_dp_bridge_enable,
+	.disable = exynos_dp_bridge_disable,
+	.pre_enable = exynos_dp_bridge_nop,
+	.post_disable = exynos_dp_bridge_nop,
+	.attach = exynos_dp_bridge_attach,
+};
+
+static int exynos_dp_create_connector(struct drm_encoder *encoder)
+{
+	struct exynos_dp_device *dp = encoder_to_dp(encoder);
+	struct drm_device *drm_dev = dp->drm_dev;
+	struct drm_bridge *bridge;
+	int ret;
+
+	bridge = devm_kzalloc(drm_dev->dev, sizeof(*bridge), GFP_KERNEL);
+	if (!bridge) {
+		DRM_ERROR("failed to allocate for drm bridge\n");
+		return -ENOMEM;
+	}
+
+	dp->bridge = bridge;
+
+	encoder->bridge = bridge;
+	bridge->driver_private = dp;
+	bridge->encoder = encoder;
+	bridge->funcs = &exynos_dp_bridge_funcs;
+
+	ret = drm_bridge_attach(drm_dev, bridge);
+	if (ret) {
+		DRM_ERROR("failed to attach drm bridge\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static bool exynos_dp_mode_fixup(struct drm_encoder *encoder,
+				 const struct drm_display_mode *mode,
+				 struct drm_display_mode *adjusted_mode)
+{
+	return true;
+}
+
+static void exynos_dp_mode_set(struct drm_encoder *encoder,
+			       struct drm_display_mode *mode,
+			       struct drm_display_mode *adjusted_mode)
+{
+}
+
+static void exynos_dp_enable(struct drm_encoder *encoder)
+{
+}
+
+static void exynos_dp_disable(struct drm_encoder *encoder)
+{
+}
+
+static const struct drm_encoder_helper_funcs exynos_dp_encoder_helper_funcs = {
 	.mode_fixup = exynos_dp_mode_fixup,
 	.mode_set = exynos_dp_mode_set,
 	.enable = exynos_dp_enable,
 	.disable = exynos_dp_disable,
 };
 
-static struct drm_encoder_funcs exynos_dp_encoder_funcs = {
+static const struct drm_encoder_funcs exynos_dp_encoder_funcs = {
 	.destroy = drm_encoder_cleanup,
 };
 
@@ -1238,7 +1291,7 @@ static int exynos_dp_bind(struct device *dev, struct device *master, void *data)
 		}
 	}
 
-	if (!dp->panel && !dp->bridge) {
+	if (!dp->panel && !dp->ptn_bridge) {
 		ret = exynos_dp_dt_parse_panel(dp);
 		if (ret)
 			return ret;
@@ -1289,10 +1342,6 @@ static int exynos_dp_bind(struct device *dev, struct device *master, void *data)
 
 	INIT_WORK(&dp->hotplug_work, exynos_dp_hotplug);
 
-	phy_power_on(dp->phy);
-
-	exynos_dp_init_dp(dp);
-
 	ret = devm_request_irq(&pdev->dev, dp->irq, exynos_dp_irq_handler,
 			irq_flags, "exynos-dp", dp);
 	if (ret) {
@@ -1313,7 +1362,7 @@ static int exynos_dp_bind(struct device *dev, struct device *master, void *data)
 	DRM_DEBUG_KMS("possible_crtcs = 0x%x\n", encoder->possible_crtcs);
 
 	drm_encoder_init(drm_dev, encoder, &exynos_dp_encoder_funcs,
-			 DRM_MODE_ENCODER_TMDS);
+			 DRM_MODE_ENCODER_TMDS, NULL);
 
 	drm_encoder_helper_add(encoder, &exynos_dp_encoder_helper_funcs);
 
@@ -1343,8 +1392,9 @@ static const struct component_ops exynos_dp_ops = {
 static int exynos_dp_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
-	struct device_node *panel_node, *bridge_node, *endpoint;
+	struct device_node *panel_node = NULL, *bridge_node, *endpoint = NULL;
 	struct exynos_dp_device *dp;
+	int ret;
 
 	dp = devm_kzalloc(&pdev->dev, sizeof(struct exynos_dp_device),
 				GFP_KERNEL);
@@ -1353,36 +1403,96 @@ static int exynos_dp_probe(struct platform_device *pdev)
 
 	platform_set_drvdata(pdev, dp);
 
+	/* This is for the backward compatibility. */
 	panel_node = of_parse_phandle(dev->of_node, "panel", 0);
 	if (panel_node) {
 		dp->panel = of_drm_find_panel(panel_node);
 		of_node_put(panel_node);
 		if (!dp->panel)
 			return -EPROBE_DEFER;
+	} else {
+		endpoint = of_graph_get_next_endpoint(dev->of_node, NULL);
+		if (endpoint) {
+			panel_node = of_graph_get_remote_port_parent(endpoint);
+			if (panel_node) {
+				dp->panel = of_drm_find_panel(panel_node);
+				of_node_put(panel_node);
+				if (!dp->panel)
+					return -EPROBE_DEFER;
+			} else {
+				DRM_ERROR("no port node for panel device.\n");
+				return -EINVAL;
+			}
+		}
 	}
 
+	if (endpoint)
+		goto out;
+
 	endpoint = of_graph_get_next_endpoint(dev->of_node, NULL);
 	if (endpoint) {
 		bridge_node = of_graph_get_remote_port_parent(endpoint);
 		if (bridge_node) {
-			dp->bridge = of_drm_find_bridge(bridge_node);
+			dp->ptn_bridge = of_drm_find_bridge(bridge_node);
 			of_node_put(bridge_node);
-			if (!dp->bridge)
+			if (!dp->ptn_bridge)
 				return -EPROBE_DEFER;
 		} else
 			return -EPROBE_DEFER;
 	}
 
-	return component_add(&pdev->dev, &exynos_dp_ops);
+out:
+	pm_runtime_enable(dev);
+
+	ret = component_add(&pdev->dev, &exynos_dp_ops);
+	if (ret)
+		goto err_disable_pm_runtime;
+
+	return ret;
+
+err_disable_pm_runtime:
+	pm_runtime_disable(dev);
+
+	return ret;
 }
 
 static int exynos_dp_remove(struct platform_device *pdev)
 {
+	pm_runtime_disable(&pdev->dev);
 	component_del(&pdev->dev, &exynos_dp_ops);
 
 	return 0;
 }
 
+#ifdef CONFIG_PM
+static int exynos_dp_suspend(struct device *dev)
+{
+	struct exynos_dp_device *dp = dev_get_drvdata(dev);
+
+	clk_disable_unprepare(dp->clock);
+
+	return 0;
+}
+
+static int exynos_dp_resume(struct device *dev)
+{
+	struct exynos_dp_device *dp = dev_get_drvdata(dev);
+	int ret;
+
+	ret = clk_prepare_enable(dp->clock);
+	if (ret < 0) {
+		DRM_ERROR("Failed to prepare_enable the clock clk [%d]\n", ret);
+		return ret;
+	}
+
+	return 0;
+}
+#endif
+
+static const struct dev_pm_ops exynos_dp_pm_ops = {
+	SET_RUNTIME_PM_OPS(exynos_dp_suspend, exynos_dp_resume, NULL)
+};
+
 static const struct of_device_id exynos_dp_match[] = {
 	{ .compatible = "samsung,exynos5-dp" },
 	{},
@@ -1395,6 +1505,7 @@ struct platform_driver dp_driver = {
 	.driver		= {
 		.name	= "exynos-dp",
 		.owner	= THIS_MODULE,
+		.pm	= &exynos_dp_pm_ops,
 		.of_match_table = exynos_dp_match,
 	},
 };
diff --git a/drivers/gpu/drm/exynos/exynos_dp_core.h b/drivers/gpu/drm/exynos/exynos_dp_core.h
index e413b6f7b0e7..66eec4b2d5c6 100644
--- a/drivers/gpu/drm/exynos/exynos_dp_core.h
+++ b/drivers/gpu/drm/exynos/exynos_dp_core.h
@@ -153,6 +153,7 @@ struct exynos_dp_device {
 	struct drm_connector	connector;
 	struct drm_panel	*panel;
 	struct drm_bridge	*bridge;
+	struct drm_bridge	*ptn_bridge;
 	struct clk		*clock;
 	unsigned int		irq;
 	void __iomem		*reg_base;
diff --git a/drivers/gpu/drm/exynos/exynos_drm_crtc.c b/drivers/gpu/drm/exynos/exynos_drm_crtc.c
index b3ba27fd9a6b..80f797414236 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_crtc.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_crtc.c
@@ -93,7 +93,7 @@ static void exynos_crtc_atomic_flush(struct drm_crtc *crtc,
 	}
 }
 
-static struct drm_crtc_helper_funcs exynos_crtc_helper_funcs = {
+static const struct drm_crtc_helper_funcs exynos_crtc_helper_funcs = {
 	.enable		= exynos_drm_crtc_enable,
 	.disable	= exynos_drm_crtc_disable,
 	.mode_set_nofb	= exynos_drm_crtc_mode_set_nofb,
@@ -113,7 +113,7 @@ static void exynos_drm_crtc_destroy(struct drm_crtc *crtc)
 	kfree(exynos_crtc);
 }
 
-static struct drm_crtc_funcs exynos_crtc_funcs = {
+static const struct drm_crtc_funcs exynos_crtc_funcs = {
 	.set_config	= drm_atomic_helper_set_config,
 	.page_flip	= drm_atomic_helper_page_flip,
 	.destroy	= exynos_drm_crtc_destroy,
@@ -150,7 +150,7 @@ struct exynos_drm_crtc *exynos_drm_crtc_create(struct drm_device *drm_dev,
 	private->crtc[pipe] = crtc;
 
 	ret = drm_crtc_init_with_planes(drm_dev, crtc, plane, NULL,
-					&exynos_crtc_funcs);
+					&exynos_crtc_funcs, NULL);
 	if (ret < 0)
 		goto err_crtc;
 
diff --git a/drivers/gpu/drm/exynos/exynos_drm_dpi.c b/drivers/gpu/drm/exynos/exynos_drm_dpi.c
index c748b8790de3..05350ae0785b 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_dpi.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_dpi.c
@@ -57,7 +57,7 @@ static void exynos_dpi_connector_destroy(struct drm_connector *connector)
 	drm_connector_cleanup(connector);
 }
 
-static struct drm_connector_funcs exynos_dpi_connector_funcs = {
+static const struct drm_connector_funcs exynos_dpi_connector_funcs = {
 	.dpms = drm_atomic_helper_connector_dpms,
 	.detect = exynos_dpi_detect,
 	.fill_modes = drm_helper_probe_single_connector_modes,
@@ -100,7 +100,7 @@ exynos_dpi_best_encoder(struct drm_connector *connector)
 	return &ctx->encoder;
 }
 
-static struct drm_connector_helper_funcs exynos_dpi_connector_helper_funcs = {
+static const struct drm_connector_helper_funcs exynos_dpi_connector_helper_funcs = {
 	.get_modes = exynos_dpi_get_modes,
 	.best_encoder = exynos_dpi_best_encoder,
 };
@@ -161,14 +161,14 @@ static void exynos_dpi_disable(struct drm_encoder *encoder)
 	}
 }
 
-static struct drm_encoder_helper_funcs exynos_dpi_encoder_helper_funcs = {
+static const struct drm_encoder_helper_funcs exynos_dpi_encoder_helper_funcs = {
 	.mode_fixup = exynos_dpi_mode_fixup,
 	.mode_set = exynos_dpi_mode_set,
 	.enable = exynos_dpi_enable,
 	.disable = exynos_dpi_disable,
 };
 
-static struct drm_encoder_funcs exynos_dpi_encoder_funcs = {
+static const struct drm_encoder_funcs exynos_dpi_encoder_funcs = {
 	.destroy = drm_encoder_cleanup,
 };
 
@@ -309,7 +309,7 @@ int exynos_dpi_bind(struct drm_device *dev, struct drm_encoder *encoder)
 	DRM_DEBUG_KMS("possible_crtcs = 0x%x\n", encoder->possible_crtcs);
 
 	drm_encoder_init(dev, encoder, &exynos_dpi_encoder_funcs,
-			 DRM_MODE_ENCODER_TMDS);
+			 DRM_MODE_ENCODER_TMDS, NULL);
 
 	drm_encoder_helper_add(encoder, &exynos_dpi_encoder_helper_funcs);
 
diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.c b/drivers/gpu/drm/exynos/exynos_drm_drv.c
index 2c6019d6a205..9756797a15a5 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_drv.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_drv.c
@@ -304,45 +304,6 @@ int exynos_atomic_commit(struct drm_device *dev, struct drm_atomic_state *state,
 	return 0;
 }
 
-#ifdef CONFIG_PM_SLEEP
-static int exynos_drm_suspend(struct drm_device *dev, pm_message_t state)
-{
-	struct drm_connector *connector;
-
-	drm_modeset_lock_all(dev);
-	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
-		int old_dpms = connector->dpms;
-
-		if (connector->funcs->dpms)
-			connector->funcs->dpms(connector, DRM_MODE_DPMS_OFF);
-
-		/* Set the old mode back to the connector for resume */
-		connector->dpms = old_dpms;
-	}
-	drm_modeset_unlock_all(dev);
-
-	return 0;
-}
-
-static int exynos_drm_resume(struct drm_device *dev)
-{
-	struct drm_connector *connector;
-
-	drm_modeset_lock_all(dev);
-	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
-		if (connector->funcs->dpms) {
-			int dpms = connector->dpms;
-
-			connector->dpms = DRM_MODE_DPMS_OFF;
-			connector->funcs->dpms(connector, dpms);
-		}
-	}
-	drm_modeset_unlock_all(dev);
-
-	return 0;
-}
-#endif
-
 static int exynos_drm_open(struct drm_device *dev, struct drm_file *file)
 {
 	struct drm_exynos_file_private *file_priv;
@@ -476,31 +437,54 @@ static struct drm_driver exynos_drm_driver = {
 };
 
 #ifdef CONFIG_PM_SLEEP
-static int exynos_drm_sys_suspend(struct device *dev)
+static int exynos_drm_suspend(struct device *dev)
 {
 	struct drm_device *drm_dev = dev_get_drvdata(dev);
-	pm_message_t message;
+	struct drm_connector *connector;
 
 	if (pm_runtime_suspended(dev) || !drm_dev)
 		return 0;
 
-	message.event = PM_EVENT_SUSPEND;
-	return exynos_drm_suspend(drm_dev, message);
+	drm_modeset_lock_all(drm_dev);
+	drm_for_each_connector(connector, drm_dev) {
+		int old_dpms = connector->dpms;
+
+		if (connector->funcs->dpms)
+			connector->funcs->dpms(connector, DRM_MODE_DPMS_OFF);
+
+		/* Set the old mode back to the connector for resume */
+		connector->dpms = old_dpms;
+	}
+	drm_modeset_unlock_all(drm_dev);
+
+	return 0;
 }
 
-static int exynos_drm_sys_resume(struct device *dev)
+static int exynos_drm_resume(struct device *dev)
 {
 	struct drm_device *drm_dev = dev_get_drvdata(dev);
+	struct drm_connector *connector;
 
 	if (pm_runtime_suspended(dev) || !drm_dev)
 		return 0;
 
-	return exynos_drm_resume(drm_dev);
+	drm_modeset_lock_all(drm_dev);
+	drm_for_each_connector(connector, drm_dev) {
+		if (connector->funcs->dpms) {
+			int dpms = connector->dpms;
+
+			connector->dpms = DRM_MODE_DPMS_OFF;
+			connector->funcs->dpms(connector, dpms);
+		}
+	}
+	drm_modeset_unlock_all(drm_dev);
+
+	return 0;
 }
 #endif
 
 static const struct dev_pm_ops exynos_drm_pm_ops = {
-	SET_SYSTEM_SLEEP_PM_OPS(exynos_drm_sys_suspend, exynos_drm_sys_resume)
+	SET_SYSTEM_SLEEP_PM_OPS(exynos_drm_suspend, exynos_drm_resume)
 };
 
 /* forward declaration */
diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.h b/drivers/gpu/drm/exynos/exynos_drm_drv.h
index f1eda7fa4e3c..82bbd7f4b316 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_drv.h
+++ b/drivers/gpu/drm/exynos/exynos_drm_drv.h
@@ -38,24 +38,44 @@ enum exynos_drm_output_type {
 	EXYNOS_DISPLAY_TYPE_VIDI,
 };
 
+struct exynos_drm_rect {
+	unsigned int x, y;
+	unsigned int w, h;
+};
+
 /*
- * Exynos drm common overlay structure.
+ * Exynos drm plane state structure.
  *
- * @base: plane object
- * @src_x: offset x on a framebuffer to be displayed.
- *	- the unit is screen coordinates.
- * @src_y: offset y on a framebuffer to be displayed.
- *	- the unit is screen coordinates.
- * @src_w: width of a partial image to be displayed from framebuffer.
- * @src_h: height of a partial image to be displayed from framebuffer.
- * @crtc_x: offset x on hardware screen.
- * @crtc_y: offset y on hardware screen.
- * @crtc_w: window width to be displayed (hardware screen).
- * @crtc_h: window height to be displayed (hardware screen).
+ * @base: plane_state object (contains drm_framebuffer pointer)
+ * @src: rectangle of the source image data to be displayed (clipped to
+ *       visible part).
+ * @crtc: rectangle of the target image position on hardware screen
+ *       (clipped to visible part).
  * @h_ratio: horizontal scaling ratio, 16.16 fixed point
  * @v_ratio: vertical scaling ratio, 16.16 fixed point
- * @dma_addr: array of bus(accessed by dma) address to the memory region
- *	      allocated for a overlay.
+ *
+ * this structure consists plane state data that will be applied to hardware
+ * specific overlay info.
+ */
+
+struct exynos_drm_plane_state {
+	struct drm_plane_state base;
+	struct exynos_drm_rect crtc;
+	struct exynos_drm_rect src;
+	unsigned int h_ratio;
+	unsigned int v_ratio;
+};
+
+static inline struct exynos_drm_plane_state *
+to_exynos_plane_state(struct drm_plane_state *state)
+{
+	return container_of(state, struct exynos_drm_plane_state, base);
+}
+
+/*
+ * Exynos drm common overlay structure.
+ *
+ * @base: plane object
  * @zpos: order of overlay layer(z position).
  *
  * this structure is common to exynos SoC and its contents would be copied
@@ -64,21 +84,32 @@ enum exynos_drm_output_type {
 
 struct exynos_drm_plane {
 	struct drm_plane base;
-	unsigned int src_x;
-	unsigned int src_y;
-	unsigned int src_w;
-	unsigned int src_h;
-	unsigned int crtc_x;
-	unsigned int crtc_y;
-	unsigned int crtc_w;
-	unsigned int crtc_h;
-	unsigned int h_ratio;
-	unsigned int v_ratio;
-	dma_addr_t dma_addr[MAX_FB_BUFFER];
+	const struct exynos_drm_plane_config *config;
 	unsigned int zpos;
 	struct drm_framebuffer *pending_fb;
 };
 
+#define EXYNOS_DRM_PLANE_CAP_DOUBLE	(1 << 0)
+#define EXYNOS_DRM_PLANE_CAP_SCALE	(1 << 1)
+
+/*
+ * Exynos DRM plane configuration structure.
+ *
+ * @zpos: z-position of the plane.
+ * @type: type of the plane (primary, cursor or overlay).
+ * @pixel_formats: supported pixel formats.
+ * @num_pixel_formats: number of elements in 'pixel_formats'.
+ * @capabilities: supported features (see EXYNOS_DRM_PLANE_CAP_*)
+ */
+
+struct exynos_drm_plane_config {
+	unsigned int zpos;
+	enum drm_plane_type type;
+	const uint32_t *pixel_formats;
+	unsigned int num_pixel_formats;
+	unsigned int capabilities;
+};
+
 /*
  * Exynos drm crtc ops
  *
diff --git a/drivers/gpu/drm/exynos/exynos_drm_dsi.c b/drivers/gpu/drm/exynos/exynos_drm_dsi.c
index 12b03b364703..d84a498ef099 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_dsi.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_dsi.c
@@ -1458,66 +1458,6 @@ static const struct mipi_dsi_host_ops exynos_dsi_ops = {
 	.transfer = exynos_dsi_host_transfer,
 };
 
-static int exynos_dsi_poweron(struct exynos_dsi *dsi)
-{
-	struct exynos_dsi_driver_data *driver_data = dsi->driver_data;
-	int ret, i;
-
-	ret = regulator_bulk_enable(ARRAY_SIZE(dsi->supplies), dsi->supplies);
-	if (ret < 0) {
-		dev_err(dsi->dev, "cannot enable regulators %d\n", ret);
-		return ret;
-	}
-
-	for (i = 0; i < driver_data->num_clks; i++) {
-		ret = clk_prepare_enable(dsi->clks[i]);
-		if (ret < 0)
-			goto err_clk;
-	}
-
-	ret = phy_power_on(dsi->phy);
-	if (ret < 0) {
-		dev_err(dsi->dev, "cannot enable phy %d\n", ret);
-		goto err_clk;
-	}
-
-	return 0;
-
-err_clk:
-	while (--i > -1)
-		clk_disable_unprepare(dsi->clks[i]);
-	regulator_bulk_disable(ARRAY_SIZE(dsi->supplies), dsi->supplies);
-
-	return ret;
-}
-
-static void exynos_dsi_poweroff(struct exynos_dsi *dsi)
-{
-	struct exynos_dsi_driver_data *driver_data = dsi->driver_data;
-	int ret, i;
-
-	usleep_range(10000, 20000);
-
-	if (dsi->state & DSIM_STATE_INITIALIZED) {
-		dsi->state &= ~DSIM_STATE_INITIALIZED;
-
-		exynos_dsi_disable_clock(dsi);
-
-		exynos_dsi_disable_irq(dsi);
-	}
-
-	dsi->state &= ~DSIM_STATE_CMD_LPM;
-
-	phy_power_off(dsi->phy);
-
-	for (i = driver_data->num_clks - 1; i > -1; i--)
-		clk_disable_unprepare(dsi->clks[i]);
-
-	ret = regulator_bulk_disable(ARRAY_SIZE(dsi->supplies), dsi->supplies);
-	if (ret < 0)
-		dev_err(dsi->dev, "cannot disable regulators %d\n", ret);
-}
-
 static void exynos_dsi_enable(struct drm_encoder *encoder)
 {
 	struct exynos_dsi *dsi = encoder_to_dsi(encoder);
@@ -1526,16 +1466,14 @@ static void exynos_dsi_enable(struct drm_encoder *encoder)
 	if (dsi->state & DSIM_STATE_ENABLED)
 		return;
 
-	ret = exynos_dsi_poweron(dsi);
-	if (ret < 0)
-		return;
+	pm_runtime_get_sync(dsi->dev);
 
 	dsi->state |= DSIM_STATE_ENABLED;
 
 	ret = drm_panel_prepare(dsi->panel);
 	if (ret < 0) {
 		dsi->state &= ~DSIM_STATE_ENABLED;
-		exynos_dsi_poweroff(dsi);
+		pm_runtime_put_sync(dsi->dev);
 		return;
 	}
 
@@ -1547,7 +1485,7 @@ static void exynos_dsi_enable(struct drm_encoder *encoder)
 		dsi->state &= ~DSIM_STATE_ENABLED;
 		exynos_dsi_set_display_enable(dsi, false);
 		drm_panel_unprepare(dsi->panel);
-		exynos_dsi_poweroff(dsi);
+		pm_runtime_put_sync(dsi->dev);
 		return;
 	}
 
@@ -1569,7 +1507,7 @@ static void exynos_dsi_disable(struct drm_encoder *encoder)
 
 	dsi->state &= ~DSIM_STATE_ENABLED;
 
-	exynos_dsi_poweroff(dsi);
+	pm_runtime_put_sync(dsi->dev);
 }
 
 static enum drm_connector_status
@@ -1603,7 +1541,7 @@ static void exynos_dsi_connector_destroy(struct drm_connector *connector)
 	connector->dev = NULL;
 }
 
-static struct drm_connector_funcs exynos_dsi_connector_funcs = {
+static const struct drm_connector_funcs exynos_dsi_connector_funcs = {
 	.dpms = drm_atomic_helper_connector_dpms,
 	.detect = exynos_dsi_detect,
 	.fill_modes = drm_helper_probe_single_connector_modes,
@@ -1631,7 +1569,7 @@ exynos_dsi_best_encoder(struct drm_connector *connector)
 	return &dsi->encoder;
 }
 
-static struct drm_connector_helper_funcs exynos_dsi_connector_helper_funcs = {
+static const struct drm_connector_helper_funcs exynos_dsi_connector_helper_funcs = {
 	.get_modes = exynos_dsi_get_modes,
 	.best_encoder = exynos_dsi_best_encoder,
 };
@@ -1684,14 +1622,14 @@ static void exynos_dsi_mode_set(struct drm_encoder *encoder,
 	vm->hsync_len = m->hsync_end - m->hsync_start;
 }
 
-static struct drm_encoder_helper_funcs exynos_dsi_encoder_helper_funcs = {
+static const struct drm_encoder_helper_funcs exynos_dsi_encoder_helper_funcs = {
 	.mode_fixup = exynos_dsi_mode_fixup,
 	.mode_set = exynos_dsi_mode_set,
 	.enable = exynos_dsi_enable,
 	.disable = exynos_dsi_disable,
 };
 
-static struct drm_encoder_funcs exynos_dsi_encoder_funcs = {
+static const struct drm_encoder_funcs exynos_dsi_encoder_funcs = {
 	.destroy = drm_encoder_cleanup,
 };
 
@@ -1797,13 +1735,13 @@ static int exynos_dsi_parse_dt(struct exynos_dsi *dsi)
 
 	ep = of_graph_get_next_endpoint(node, NULL);
 	if (!ep) {
-		ret = -ENXIO;
+		ret = -EINVAL;
 		goto end;
 	}
 
 	dsi->bridge_node = of_graph_get_remote_port_parent(ep);
 	if (!dsi->bridge_node) {
-		ret = -ENXIO;
+		ret = -EINVAL;
 		goto end;
 	}
 end:
@@ -1831,7 +1769,7 @@ static int exynos_dsi_bind(struct device *dev, struct device *master,
 	DRM_DEBUG_KMS("possible_crtcs = 0x%x\n", encoder->possible_crtcs);
 
 	drm_encoder_init(drm_dev, encoder, &exynos_dsi_encoder_funcs,
-			 DRM_MODE_ENCODER_TMDS);
+			 DRM_MODE_ENCODER_TMDS, NULL);
 
 	drm_encoder_helper_add(encoder, &exynos_dsi_encoder_helper_funcs);
 
@@ -1954,22 +1892,99 @@ static int exynos_dsi_probe(struct platform_device *pdev)
 
 	platform_set_drvdata(pdev, &dsi->encoder);
 
+	pm_runtime_enable(dev);
+
 	return component_add(dev, &exynos_dsi_component_ops);
 }
 
 static int exynos_dsi_remove(struct platform_device *pdev)
 {
+	pm_runtime_disable(&pdev->dev);
+
 	component_del(&pdev->dev, &exynos_dsi_component_ops);
 
 	return 0;
 }
 
+#ifdef CONFIG_PM
+static int exynos_dsi_suspend(struct device *dev)
+{
+	struct drm_encoder *encoder = dev_get_drvdata(dev);
+	struct exynos_dsi *dsi = encoder_to_dsi(encoder);
+	struct exynos_dsi_driver_data *driver_data = dsi->driver_data;
+	int ret, i;
+
+	usleep_range(10000, 20000);
+
+	if (dsi->state & DSIM_STATE_INITIALIZED) {
+		dsi->state &= ~DSIM_STATE_INITIALIZED;
+
+		exynos_dsi_disable_clock(dsi);
+
+		exynos_dsi_disable_irq(dsi);
+	}
+
+	dsi->state &= ~DSIM_STATE_CMD_LPM;
+
+	phy_power_off(dsi->phy);
+
+	for (i = driver_data->num_clks - 1; i > -1; i--)
+		clk_disable_unprepare(dsi->clks[i]);
+
+	ret = regulator_bulk_disable(ARRAY_SIZE(dsi->supplies), dsi->supplies);
+	if (ret < 0)
+		dev_err(dsi->dev, "cannot disable regulators %d\n", ret);
+
+	return 0;
+}
+
+static int exynos_dsi_resume(struct device *dev)
+{
+	struct drm_encoder *encoder = dev_get_drvdata(dev);
+	struct exynos_dsi *dsi = encoder_to_dsi(encoder);
+	struct exynos_dsi_driver_data *driver_data = dsi->driver_data;
+	int ret, i;
+
+	ret = regulator_bulk_enable(ARRAY_SIZE(dsi->supplies), dsi->supplies);
+	if (ret < 0) {
+		dev_err(dsi->dev, "cannot enable regulators %d\n", ret);
+		return ret;
+	}
+
+	for (i = 0; i < driver_data->num_clks; i++) {
+		ret = clk_prepare_enable(dsi->clks[i]);
+		if (ret < 0)
+			goto err_clk;
+	}
+
+	ret = phy_power_on(dsi->phy);
+	if (ret < 0) {
+		dev_err(dsi->dev, "cannot enable phy %d\n", ret);
+		goto err_clk;
+	}
+
+	return 0;
+
+err_clk:
+	while (--i > -1)
+		clk_disable_unprepare(dsi->clks[i]);
+	regulator_bulk_disable(ARRAY_SIZE(dsi->supplies), dsi->supplies);
+
+	return ret;
+}
+#endif
+
+static const struct dev_pm_ops exynos_dsi_pm_ops = {
+	SET_RUNTIME_PM_OPS(exynos_dsi_suspend, exynos_dsi_resume, NULL)
+};
+
 struct platform_driver dsi_driver = {
 	.probe = exynos_dsi_probe,
 	.remove = exynos_dsi_remove,
 	.driver = {
 		   .name = "exynos-dsi",
 		   .owner = THIS_MODULE,
+		   .pm = &exynos_dsi_pm_ops,
 		   .of_match_table = exynos_dsi_of_match,
 	},
 };
diff --git a/drivers/gpu/drm/exynos/exynos_drm_fb.c b/drivers/gpu/drm/exynos/exynos_drm_fb.c
index 49b9bc302e87..cbbb1a86e70a 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_fb.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_fb.c
@@ -37,6 +37,7 @@
 struct exynos_drm_fb {
 	struct drm_framebuffer	fb;
 	struct exynos_drm_gem	*exynos_gem[MAX_FB_BUFFER];
+	dma_addr_t			dma_addr[MAX_FB_BUFFER];
 };
 
 static int check_fb_gem_memory_type(struct drm_device *drm_dev,
@@ -109,7 +110,7 @@ static int exynos_drm_fb_dirty(struct drm_framebuffer *fb,
 	return 0;
 }
 
-static struct drm_framebuffer_funcs exynos_drm_fb_funcs = {
+static const struct drm_framebuffer_funcs exynos_drm_fb_funcs = {
 	.destroy	= exynos_drm_fb_destroy,
 	.create_handle	= exynos_drm_fb_create_handle,
 	.dirty		= exynos_drm_fb_dirty,
@@ -135,6 +136,8 @@ exynos_drm_framebuffer_init(struct drm_device *dev,
 			goto err;
 
 		exynos_fb->exynos_gem[i] = exynos_gem[i];
+		exynos_fb->dma_addr[i] = exynos_gem[i]->dma_addr
+						+ mode_cmd->offsets[i];
 	}
 
 	drm_helper_mode_fill_fb_struct(&exynos_fb->fb, mode_cmd);
@@ -189,21 +192,14 @@ err:
 	return ERR_PTR(ret);
 }
 
-struct exynos_drm_gem *exynos_drm_fb_gem(struct drm_framebuffer *fb, int index)
+dma_addr_t exynos_drm_fb_dma_addr(struct drm_framebuffer *fb, int index)
 {
 	struct exynos_drm_fb *exynos_fb = to_exynos_fb(fb);
-	struct exynos_drm_gem *exynos_gem;
 
 	if (index >= MAX_FB_BUFFER)
-		return NULL;
+		return DMA_ERROR_CODE;
 
-	exynos_gem = exynos_fb->exynos_gem[index];
-	if (!exynos_gem)
-		return NULL;
-
-	DRM_DEBUG_KMS("dma_addr: 0x%lx\n", (unsigned long)exynos_gem->dma_addr);
-
-	return exynos_gem;
+	return exynos_fb->dma_addr[index];
 }
 
 static void exynos_drm_output_poll_changed(struct drm_device *dev)
diff --git a/drivers/gpu/drm/exynos/exynos_drm_fb.h b/drivers/gpu/drm/exynos/exynos_drm_fb.h
index a8a75ac87e59..4aae9dd2b0d1 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_fb.h
+++ b/drivers/gpu/drm/exynos/exynos_drm_fb.h
@@ -22,8 +22,7 @@ exynos_drm_framebuffer_init(struct drm_device *dev,
 			    struct exynos_drm_gem **exynos_gem,
 			    int count);
 
-/* get gem object of a drm framebuffer */
-struct exynos_drm_gem *exynos_drm_fb_gem(struct drm_framebuffer *fb, int index);
+dma_addr_t exynos_drm_fb_dma_addr(struct drm_framebuffer *fb, int index);
 
 void exynos_drm_mode_config_init(struct drm_device *dev);
 
diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimd.c b/drivers/gpu/drm/exynos/exynos_drm_fimd.c
index bd75c1531cac..2e2247126581 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_fimd.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_fimd.c
@@ -29,6 +29,7 @@
 #include <drm/exynos_drm.h>
 
 #include "exynos_drm_drv.h"
+#include "exynos_drm_fb.h"
 #include "exynos_drm_fbdev.h"
 #include "exynos_drm_crtc.h"
 #include "exynos_drm_plane.h"
@@ -87,7 +88,6 @@
 
 /* FIMD has totally five hardware windows. */
 #define WINDOWS_NR	5
-#define CURSOR_WIN	4
 
 struct fimd_driver_data {
 	unsigned int timing_base;
@@ -150,6 +150,7 @@ struct fimd_context {
 	struct drm_device		*drm_dev;
 	struct exynos_drm_crtc		*crtc;
 	struct exynos_drm_plane		planes[WINDOWS_NR];
+	struct exynos_drm_plane_config	configs[WINDOWS_NR];
 	struct clk			*bus_clk;
 	struct clk			*lcd_clk;
 	void __iomem			*regs;
@@ -187,6 +188,14 @@ static const struct of_device_id fimd_driver_dt_match[] = {
 };
 MODULE_DEVICE_TABLE(of, fimd_driver_dt_match);
 
+static const enum drm_plane_type fimd_win_types[WINDOWS_NR] = {
+	DRM_PLANE_TYPE_PRIMARY,
+	DRM_PLANE_TYPE_OVERLAY,
+	DRM_PLANE_TYPE_OVERLAY,
+	DRM_PLANE_TYPE_OVERLAY,
+	DRM_PLANE_TYPE_CURSOR,
+};
+
 static const uint32_t fimd_formats[] = {
 	DRM_FORMAT_C8,
 	DRM_FORMAT_XRGB1555,
@@ -478,7 +487,7 @@ static void fimd_commit(struct exynos_drm_crtc *crtc)
 
 
 static void fimd_win_set_pixfmt(struct fimd_context *ctx, unsigned int win,
-				struct drm_framebuffer *fb)
+				uint32_t pixel_format, int width)
 {
 	unsigned long val;
 
@@ -489,11 +498,11 @@ static void fimd_win_set_pixfmt(struct fimd_context *ctx, unsigned int win,
 	 * So the request format is ARGB8888 then change it to XRGB8888.
 	 */
 	if (ctx->driver_data->has_limited_fmt && !win) {
-		if (fb->pixel_format == DRM_FORMAT_ARGB8888)
-			fb->pixel_format = DRM_FORMAT_XRGB8888;
+		if (pixel_format == DRM_FORMAT_ARGB8888)
+			pixel_format = DRM_FORMAT_XRGB8888;
 	}
 
-	switch (fb->pixel_format) {
+	switch (pixel_format) {
 	case DRM_FORMAT_C8:
 		val |= WINCON0_BPPMODE_8BPP_PALETTE;
 		val |= WINCONx_BURSTLEN_8WORD;
@@ -529,17 +538,15 @@ static void fimd_win_set_pixfmt(struct fimd_context *ctx, unsigned int win,
 		break;
 	}
 
-	DRM_DEBUG_KMS("bpp = %d\n", fb->bits_per_pixel);
-
 	/*
-	 * In case of exynos, setting dma-burst to 16Word causes permanent
-	 * tearing for very small buffers, e.g. cursor buffer. Burst Mode
-	 * switching which is based on plane size is not recommended as
-	 * plane size varies alot towards the end of the screen and rapid
-	 * movement causes unstable DMA which results into iommu crash/tear.
+	 * Setting dma-burst to 16Word causes permanent tearing for very small
+	 * buffers, e.g. cursor buffer. Burst Mode switching which based on
+	 * plane size is not recommended as plane size varies alot towards the
+	 * end of the screen and rapid movement causes unstable DMA, but it is
+	 * still better to change dma-burst than displaying garbage.
 	 */
 
-	if (fb->width < MIN_FB_WIDTH_FOR_16WORD_BURST) {
+	if (width < MIN_FB_WIDTH_FOR_16WORD_BURST) {
 		val &= ~WINCONx_BURSTLEN_MASK;
 		val |= WINCONx_BURSTLEN_4WORD;
 	}
@@ -640,39 +647,41 @@ static void fimd_atomic_flush(struct exynos_drm_crtc *crtc,
 static void fimd_update_plane(struct exynos_drm_crtc *crtc,
 			      struct exynos_drm_plane *plane)
 {
+	struct exynos_drm_plane_state *state =
+				to_exynos_plane_state(plane->base.state);
 	struct fimd_context *ctx = crtc->ctx;
-	struct drm_plane_state *state = plane->base.state;
+	struct drm_framebuffer *fb = state->base.fb;
 	dma_addr_t dma_addr;
 	unsigned long val, size, offset;
 	unsigned int last_x, last_y, buf_offsize, line_size;
 	unsigned int win = plane->zpos;
-	unsigned int bpp = state->fb->bits_per_pixel >> 3;
-	unsigned int pitch = state->fb->pitches[0];
+	unsigned int bpp = fb->bits_per_pixel >> 3;
+	unsigned int pitch = fb->pitches[0];
 
 	if (ctx->suspended)
 		return;
 
-	offset = plane->src_x * bpp;
-	offset += plane->src_y * pitch;
+	offset = state->src.x * bpp;
+	offset += state->src.y * pitch;
 
 	/* buffer start address */
-	dma_addr = plane->dma_addr[0] + offset;
+	dma_addr = exynos_drm_fb_dma_addr(fb, 0) + offset;
 	val = (unsigned long)dma_addr;
 	writel(val, ctx->regs + VIDWx_BUF_START(win, 0));
 
 	/* buffer end address */
-	size = pitch * plane->crtc_h;
+	size = pitch * state->crtc.h;
 	val = (unsigned long)(dma_addr + size);
 	writel(val, ctx->regs + VIDWx_BUF_END(win, 0));
 
 	DRM_DEBUG_KMS("start addr = 0x%lx, end addr = 0x%lx, size = 0x%lx\n",
 			(unsigned long)dma_addr, val, size);
 	DRM_DEBUG_KMS("ovl_width = %d, ovl_height = %d\n",
-			plane->crtc_w, plane->crtc_h);
+			state->crtc.w, state->crtc.h);
 
 	/* buffer size */
-	buf_offsize = pitch - (plane->crtc_w * bpp);
-	line_size = plane->crtc_w * bpp;
+	buf_offsize = pitch - (state->crtc.w * bpp);
+	line_size = state->crtc.w * bpp;
 	val = VIDW_BUF_SIZE_OFFSET(buf_offsize) |
 		VIDW_BUF_SIZE_PAGEWIDTH(line_size) |
 		VIDW_BUF_SIZE_OFFSET_E(buf_offsize) |
@@ -680,16 +689,16 @@ static void fimd_update_plane(struct exynos_drm_crtc *crtc,
 	writel(val, ctx->regs + VIDWx_BUF_SIZE(win, 0));
 
 	/* OSD position */
-	val = VIDOSDxA_TOPLEFT_X(plane->crtc_x) |
-		VIDOSDxA_TOPLEFT_Y(plane->crtc_y) |
-		VIDOSDxA_TOPLEFT_X_E(plane->crtc_x) |
-		VIDOSDxA_TOPLEFT_Y_E(plane->crtc_y);
+	val = VIDOSDxA_TOPLEFT_X(state->crtc.x) |
+		VIDOSDxA_TOPLEFT_Y(state->crtc.y) |
+		VIDOSDxA_TOPLEFT_X_E(state->crtc.x) |
+		VIDOSDxA_TOPLEFT_Y_E(state->crtc.y);
 	writel(val, ctx->regs + VIDOSD_A(win));
 
-	last_x = plane->crtc_x + plane->crtc_w;
+	last_x = state->crtc.x + state->crtc.w;
 	if (last_x)
 		last_x--;
-	last_y = plane->crtc_y + plane->crtc_h;
+	last_y = state->crtc.y + state->crtc.h;
 	if (last_y)
 		last_y--;
 
@@ -699,20 +708,20 @@ static void fimd_update_plane(struct exynos_drm_crtc *crtc,
 	writel(val, ctx->regs + VIDOSD_B(win));
 
 	DRM_DEBUG_KMS("osd pos: tx = %d, ty = %d, bx = %d, by = %d\n",
-			plane->crtc_x, plane->crtc_y, last_x, last_y);
+			state->crtc.x, state->crtc.y, last_x, last_y);
 
 	/* OSD size */
 	if (win != 3 && win != 4) {
 		u32 offset = VIDOSD_D(win);
 		if (win == 0)
 			offset = VIDOSD_C(win);
-		val = plane->crtc_w * plane->crtc_h;
+		val = state->crtc.w * state->crtc.h;
 		writel(val, ctx->regs + offset);
 
 		DRM_DEBUG_KMS("osd size = 0x%x\n", (unsigned int)val);
 	}
 
-	fimd_win_set_pixfmt(ctx, win, state->fb);
+	fimd_win_set_pixfmt(ctx, win, fb->pixel_format, state->src.w);
 
 	/* hardware window 0 doesn't support color key. */
 	if (win != 0)
@@ -745,7 +754,6 @@ static void fimd_disable_plane(struct exynos_drm_crtc *crtc,
 static void fimd_enable(struct exynos_drm_crtc *crtc)
 {
 	struct fimd_context *ctx = crtc->ctx;
-	int ret;
 
 	if (!ctx->suspended)
 		return;
@@ -754,18 +762,6 @@ static void fimd_enable(struct exynos_drm_crtc *crtc)
 
 	pm_runtime_get_sync(ctx->dev);
 
-	ret = clk_prepare_enable(ctx->bus_clk);
-	if (ret < 0) {
-		DRM_ERROR("Failed to prepare_enable the bus clk [%d]\n", ret);
-		return;
-	}
-
-	ret = clk_prepare_enable(ctx->lcd_clk);
-	if  (ret < 0) {
-		DRM_ERROR("Failed to prepare_enable the lcd clk [%d]\n", ret);
-		return;
-	}
-
 	/* if vblank was enabled status, enable it again. */
 	if (test_and_clear_bit(0, &ctx->irq_flags))
 		fimd_enable_vblank(ctx->crtc);
@@ -795,11 +791,7 @@ static void fimd_disable(struct exynos_drm_crtc *crtc)
 
 	writel(0, ctx->regs + VIDCON0);
 
-	clk_disable_unprepare(ctx->lcd_clk);
-	clk_disable_unprepare(ctx->bus_clk);
-
 	pm_runtime_put_sync(ctx->dev);
-
 	ctx->suspended = true;
 }
 
@@ -941,18 +933,19 @@ static int fimd_bind(struct device *dev, struct device *master, void *data)
 	struct drm_device *drm_dev = data;
 	struct exynos_drm_private *priv = drm_dev->dev_private;
 	struct exynos_drm_plane *exynos_plane;
-	enum drm_plane_type type;
-	unsigned int zpos;
+	unsigned int i;
 	int ret;
 
 	ctx->drm_dev = drm_dev;
 	ctx->pipe = priv->pipe++;
 
-	for (zpos = 0; zpos < WINDOWS_NR; zpos++) {
-		type = exynos_plane_get_type(zpos, CURSOR_WIN);
-		ret = exynos_plane_init(drm_dev, &ctx->planes[zpos],
-					1 << ctx->pipe, type, fimd_formats,
-					ARRAY_SIZE(fimd_formats), zpos);
+	for (i = 0; i < WINDOWS_NR; i++) {
+		ctx->configs[i].pixel_formats = fimd_formats;
+		ctx->configs[i].num_pixel_formats = ARRAY_SIZE(fimd_formats);
+		ctx->configs[i].zpos = i;
+		ctx->configs[i].type = fimd_win_types[i];
+		ret = exynos_plane_init(drm_dev, &ctx->planes[i],
+					1 << ctx->pipe, &ctx->configs[i]);
 		if (ret)
 			return ret;
 	}
@@ -1121,12 +1114,49 @@ static int fimd_remove(struct platform_device *pdev)
 	return 0;
 }
 
+#ifdef CONFIG_PM
+static int exynos_fimd_suspend(struct device *dev)
+{
+	struct fimd_context *ctx = dev_get_drvdata(dev);
+
+	clk_disable_unprepare(ctx->lcd_clk);
+	clk_disable_unprepare(ctx->bus_clk);
+
+	return 0;
+}
+
+static int exynos_fimd_resume(struct device *dev)
+{
+	struct fimd_context *ctx = dev_get_drvdata(dev);
+	int ret;
+
+	ret = clk_prepare_enable(ctx->bus_clk);
+	if (ret < 0) {
+		DRM_ERROR("Failed to prepare_enable the bus clk [%d]\n", ret);
+		return ret;
+	}
+
+	ret = clk_prepare_enable(ctx->lcd_clk);
+	if  (ret < 0) {
+		DRM_ERROR("Failed to prepare_enable the lcd clk [%d]\n", ret);
+		return ret;
+	}
+
+	return 0;
+}
+#endif
+
+static const struct dev_pm_ops exynos_fimd_pm_ops = {
+	SET_RUNTIME_PM_OPS(exynos_fimd_suspend, exynos_fimd_resume, NULL)
+};
+
 struct platform_driver fimd_driver = {
 	.probe		= fimd_probe,
 	.remove		= fimd_remove,
 	.driver		= {
 		.name	= "exynos4-fb",
 		.owner	= THIS_MODULE,
+		.pm	= &exynos_fimd_pm_ops,
 		.of_match_table = fimd_driver_dt_match,
 	},
 };
diff --git a/drivers/gpu/drm/exynos/exynos_drm_gem.h b/drivers/gpu/drm/exynos/exynos_drm_gem.h
index 37ab8b282db6..9ca5047959ec 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_gem.h
+++ b/drivers/gpu/drm/exynos/exynos_drm_gem.h
@@ -55,8 +55,6 @@ struct exynos_drm_gem {
 	struct sg_table		*sgt;
 };
 
-struct page **exynos_gem_get_pages(struct drm_gem_object *obj, gfp_t gfpmask);
-
 /* destroy a buffer with gem object */
 void exynos_drm_gem_destroy(struct exynos_drm_gem *exynos_gem);
 
@@ -91,10 +89,6 @@ void exynos_drm_gem_put_dma_addr(struct drm_device *dev,
 					unsigned int gem_handle,
 					struct drm_file *filp);
 
-/* map user space allocated by malloc to pages. */
-int exynos_drm_gem_userptr_ioctl(struct drm_device *dev, void *data,
-				      struct drm_file *file_priv);
-
 /* get buffer information to memory region allocated by gem. */
 int exynos_drm_gem_get_ioctl(struct drm_device *dev, void *data,
 				      struct drm_file *file_priv);
@@ -123,28 +117,6 @@ int exynos_drm_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
 /* set vm_flags and we can change the vm attribute to other one at here. */
 int exynos_drm_gem_mmap(struct file *filp, struct vm_area_struct *vma);
 
-static inline int vma_is_io(struct vm_area_struct *vma)
-{
-	return !!(vma->vm_flags & (VM_IO | VM_PFNMAP));
-}
-
-/* get a copy of a virtual memory region. */
-struct vm_area_struct *exynos_gem_get_vma(struct vm_area_struct *vma);
-
-/* release a userspace virtual memory area. */
-void exynos_gem_put_vma(struct vm_area_struct *vma);
-
-/* get pages from user space. */
-int exynos_gem_get_pages_from_userptr(unsigned long start,
-						unsigned int npages,
-						struct page **pages,
-						struct vm_area_struct *vma);
-
-/* drop the reference to pages. */
-void exynos_gem_put_pages_to_userptr(struct page **pages,
-					unsigned int npages,
-					struct vm_area_struct *vma);
-
 /* map sgt with dma region. */
 int exynos_gem_map_sgt_with_dma(struct drm_device *drm_dev,
 				struct sg_table *sgt,
diff --git a/drivers/gpu/drm/exynos/exynos_drm_gsc.c b/drivers/gpu/drm/exynos/exynos_drm_gsc.c
index 11b87d2a7913..7aecd23cfa11 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_gsc.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_gsc.c
@@ -15,7 +15,8 @@
 #include <linux/platform_device.h>
 #include <linux/clk.h>
 #include <linux/pm_runtime.h>
-#include <plat/map-base.h>
+#include <linux/mfd/syscon.h>
+#include <linux/regmap.h>
 
 #include <drm/drmP.h>
 #include <drm/exynos_drm.h>
@@ -126,6 +127,7 @@ struct gsc_capability {
  * @ippdrv: prepare initialization using ippdrv.
  * @regs_res: register resources.
  * @regs: memory mapped io registers.
+ * @sysreg: handle to SYSREG block regmap.
  * @lock: locking of operations.
  * @gsc_clk: gsc gate clock.
  * @sc: scaler infomations.
@@ -138,6 +140,7 @@ struct gsc_context {
 	struct exynos_drm_ippdrv	ippdrv;
 	struct resource	*regs_res;
 	void __iomem	*regs;
+	struct regmap	*sysreg;
 	struct mutex	lock;
 	struct clk	*gsc_clk;
 	struct gsc_scaler	sc;
@@ -437,9 +440,12 @@ static int gsc_sw_reset(struct gsc_context *ctx)
 
 static void gsc_set_gscblk_fimd_wb(struct gsc_context *ctx, bool enable)
 {
-	u32 gscblk_cfg;
+	unsigned int gscblk_cfg;
 
-	gscblk_cfg = readl(SYSREG_GSCBLK_CFG1);
+	if (!ctx->sysreg)
+		return;
+
+	regmap_read(ctx->sysreg, SYSREG_GSCBLK_CFG1, &gscblk_cfg);
 
 	if (enable)
 		gscblk_cfg |= GSC_BLK_DISP1WB_DEST(ctx->id) |
@@ -448,7 +454,7 @@ static void gsc_set_gscblk_fimd_wb(struct gsc_context *ctx, bool enable)
 	else
 		gscblk_cfg |= GSC_BLK_PXLASYNC_LO_MASK_WB(ctx->id);
 
-	writel(gscblk_cfg, SYSREG_GSCBLK_CFG1);
+	regmap_write(ctx->sysreg, SYSREG_GSCBLK_CFG1, gscblk_cfg);
 }
 
 static void gsc_handle_irq(struct gsc_context *ctx, bool enable,
@@ -1215,10 +1221,10 @@ static int gsc_clk_ctrl(struct gsc_context *ctx, bool enable)
 	DRM_DEBUG_KMS("enable[%d]\n", enable);
 
 	if (enable) {
-		clk_enable(ctx->gsc_clk);
+		clk_prepare_enable(ctx->gsc_clk);
 		ctx->suspended = false;
 	} else {
-		clk_disable(ctx->gsc_clk);
+		clk_disable_unprepare(ctx->gsc_clk);
 		ctx->suspended = true;
 	}
 
@@ -1663,6 +1669,15 @@ static int gsc_probe(struct platform_device *pdev)
 	if (!ctx)
 		return -ENOMEM;
 
+	if (dev->of_node) {
+		ctx->sysreg = syscon_regmap_lookup_by_phandle(dev->of_node,
+							"samsung,sysreg");
+		if (IS_ERR(ctx->sysreg)) {
+			dev_warn(dev, "failed to get system register.\n");
+			ctx->sysreg = NULL;
+		}
+	}
+
 	/* clock control */
 	ctx->gsc_clk = devm_clk_get(dev, "gscl");
 	if (IS_ERR(ctx->gsc_clk)) {
@@ -1713,7 +1728,6 @@ static int gsc_probe(struct platform_device *pdev)
 	mutex_init(&ctx->lock);
 	platform_set_drvdata(pdev, ctx);
 
-	pm_runtime_set_active(dev);
 	pm_runtime_enable(dev);
 
 	ret = exynos_drm_ippdrv_register(ippdrv);
@@ -1797,6 +1811,12 @@ static const struct dev_pm_ops gsc_pm_ops = {
 	SET_RUNTIME_PM_OPS(gsc_runtime_suspend, gsc_runtime_resume, NULL)
 };
 
+static const struct of_device_id exynos_drm_gsc_of_match[] = {
+	{ .compatible = "samsung,exynos5-gsc" },
+	{ },
+};
+MODULE_DEVICE_TABLE(of, exynos_drm_gsc_of_match);
+
 struct platform_driver gsc_driver = {
 	.probe		= gsc_probe,
 	.remove		= gsc_remove,
@@ -1804,6 +1824,7 @@ struct platform_driver gsc_driver = {
 		.name	= "exynos-drm-gsc",
 		.owner	= THIS_MODULE,
 		.pm	= &gsc_pm_ops,
+		.of_match_table = of_match_ptr(exynos_drm_gsc_of_match),
 	},
 };
 
diff --git a/drivers/gpu/drm/exynos/exynos_drm_mic.c b/drivers/gpu/drm/exynos/exynos_drm_mic.c
index 8994eab56ba8..4eaef36aec5a 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_mic.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_mic.c
@@ -389,7 +389,7 @@ already_disabled:
 	mutex_unlock(&mic_mutex);
 }
 
-struct drm_bridge_funcs mic_bridge_funcs = {
+static const struct drm_bridge_funcs mic_bridge_funcs = {
 	.disable = mic_disable,
 	.post_disable = mic_post_disable,
 	.pre_enable = mic_pre_enable,
diff --git a/drivers/gpu/drm/exynos/exynos_drm_plane.c b/drivers/gpu/drm/exynos/exynos_drm_plane.c
index 179311760bb7..e668fcdbcafc 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_plane.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_plane.c
@@ -56,93 +56,170 @@ static int exynos_plane_get_size(int start, unsigned length, unsigned last)
 	return size;
 }
 
-static void exynos_plane_mode_set(struct drm_plane *plane,
-				  struct drm_crtc *crtc,
-				  struct drm_framebuffer *fb,
-				  int crtc_x, int crtc_y,
-				  unsigned int crtc_w, unsigned int crtc_h,
-				  uint32_t src_x, uint32_t src_y,
-				  uint32_t src_w, uint32_t src_h)
+static void exynos_plane_mode_set(struct exynos_drm_plane_state *exynos_state)
+
 {
-	struct exynos_drm_plane *exynos_plane = to_exynos_plane(plane);
+	struct drm_plane_state *state = &exynos_state->base;
+	struct drm_crtc *crtc = exynos_state->base.crtc;
 	struct drm_display_mode *mode = &crtc->state->adjusted_mode;
+	int crtc_x, crtc_y;
+	unsigned int crtc_w, crtc_h;
+	unsigned int src_x, src_y;
+	unsigned int src_w, src_h;
 	unsigned int actual_w;
 	unsigned int actual_h;
 
+	/*
+	 * The original src/dest coordinates are stored in exynos_state->base,
+	 * but we want to keep another copy internal to our driver that we can
+	 * clip/modify ourselves.
+	 */
+
+	crtc_x = state->crtc_x;
+	crtc_y = state->crtc_y;
+	crtc_w = state->crtc_w;
+	crtc_h = state->crtc_h;
+
+	src_x = state->src_x >> 16;
+	src_y = state->src_y >> 16;
+	src_w = state->src_w >> 16;
+	src_h = state->src_h >> 16;
+
+	/* set ratio */
+	exynos_state->h_ratio = (src_w << 16) / crtc_w;
+	exynos_state->v_ratio = (src_h << 16) / crtc_h;
+
+	/* clip to visible area */
 	actual_w = exynos_plane_get_size(crtc_x, crtc_w, mode->hdisplay);
 	actual_h = exynos_plane_get_size(crtc_y, crtc_h, mode->vdisplay);
 
 	if (crtc_x < 0) {
 		if (actual_w)
-			src_x -= crtc_x;
+			src_x += ((-crtc_x) * exynos_state->h_ratio) >> 16;
 		crtc_x = 0;
 	}
 
 	if (crtc_y < 0) {
 		if (actual_h)
-			src_y -= crtc_y;
+			src_y += ((-crtc_y) * exynos_state->v_ratio) >> 16;
 		crtc_y = 0;
 	}
 
-	/* set ratio */
-	exynos_plane->h_ratio = (src_w << 16) / crtc_w;
-	exynos_plane->v_ratio = (src_h << 16) / crtc_h;
-
 	/* set drm framebuffer data. */
-	exynos_plane->src_x = src_x;
-	exynos_plane->src_y = src_y;
-	exynos_plane->src_w = (actual_w * exynos_plane->h_ratio) >> 16;
-	exynos_plane->src_h = (actual_h * exynos_plane->v_ratio) >> 16;
+	exynos_state->src.x = src_x;
+	exynos_state->src.y = src_y;
+	exynos_state->src.w = (actual_w * exynos_state->h_ratio) >> 16;
+	exynos_state->src.h = (actual_h * exynos_state->v_ratio) >> 16;
 
 	/* set plane range to be displayed. */
-	exynos_plane->crtc_x = crtc_x;
-	exynos_plane->crtc_y = crtc_y;
-	exynos_plane->crtc_w = actual_w;
-	exynos_plane->crtc_h = actual_h;
+	exynos_state->crtc.x = crtc_x;
+	exynos_state->crtc.y = crtc_y;
+	exynos_state->crtc.w = actual_w;
+	exynos_state->crtc.h = actual_h;
 
 	DRM_DEBUG_KMS("plane : offset_x/y(%d,%d), width/height(%d,%d)",
-			exynos_plane->crtc_x, exynos_plane->crtc_y,
-			exynos_plane->crtc_w, exynos_plane->crtc_h);
+			exynos_state->crtc.x, exynos_state->crtc.y,
+			exynos_state->crtc.w, exynos_state->crtc.h);
+}
+
+static void exynos_drm_plane_reset(struct drm_plane *plane)
+{
+	struct exynos_drm_plane_state *exynos_state;
+
+	if (plane->state) {
+		exynos_state = to_exynos_plane_state(plane->state);
+		if (exynos_state->base.fb)
+			drm_framebuffer_unreference(exynos_state->base.fb);
+		kfree(exynos_state);
+		plane->state = NULL;
+	}
+
+	exynos_state = kzalloc(sizeof(*exynos_state), GFP_KERNEL);
+	if (exynos_state) {
+		plane->state = &exynos_state->base;
+		plane->state->plane = plane;
+	}
+}
+
+static struct drm_plane_state *
+exynos_drm_plane_duplicate_state(struct drm_plane *plane)
+{
+	struct exynos_drm_plane_state *exynos_state;
+	struct exynos_drm_plane_state *copy;
+
+	exynos_state = to_exynos_plane_state(plane->state);
+	copy = kzalloc(sizeof(*exynos_state), GFP_KERNEL);
+	if (!copy)
+		return NULL;
+
+	__drm_atomic_helper_plane_duplicate_state(plane, &copy->base);
+	return &copy->base;
+}
 
-	plane->crtc = crtc;
+static void exynos_drm_plane_destroy_state(struct drm_plane *plane,
+					   struct drm_plane_state *old_state)
+{
+	struct exynos_drm_plane_state *old_exynos_state =
+					to_exynos_plane_state(old_state);
+	__drm_atomic_helper_plane_destroy_state(plane, old_state);
+	kfree(old_exynos_state);
 }
 
 static struct drm_plane_funcs exynos_plane_funcs = {
 	.update_plane	= drm_atomic_helper_update_plane,
 	.disable_plane	= drm_atomic_helper_disable_plane,
 	.destroy	= drm_plane_cleanup,
-	.reset = drm_atomic_helper_plane_reset,
-	.atomic_duplicate_state = drm_atomic_helper_plane_duplicate_state,
-	.atomic_destroy_state = drm_atomic_helper_plane_destroy_state,
+	.reset		= exynos_drm_plane_reset,
+	.atomic_duplicate_state = exynos_drm_plane_duplicate_state,
+	.atomic_destroy_state = exynos_drm_plane_destroy_state,
 };
 
+static int
+exynos_drm_plane_check_size(const struct exynos_drm_plane_config *config,
+			    struct exynos_drm_plane_state *state)
+{
+	bool width_ok = false, height_ok = false;
+
+	if (config->capabilities & EXYNOS_DRM_PLANE_CAP_SCALE)
+		return 0;
+
+	if (state->src.w == state->crtc.w)
+		width_ok = true;
+
+	if (state->src.h == state->crtc.h)
+		height_ok = true;
+
+	if ((config->capabilities & EXYNOS_DRM_PLANE_CAP_DOUBLE) &&
+	    state->h_ratio == (1 << 15))
+		width_ok = true;
+
+	if ((config->capabilities & EXYNOS_DRM_PLANE_CAP_DOUBLE) &&
+	    state->v_ratio == (1 << 15))
+		height_ok = true;
+
+	if (width_ok & height_ok)
+		return 0;
+
+	DRM_DEBUG_KMS("scaling mode is not supported");
+	return -ENOTSUPP;
+}
+
 static int exynos_plane_atomic_check(struct drm_plane *plane,
 				     struct drm_plane_state *state)
 {
 	struct exynos_drm_plane *exynos_plane = to_exynos_plane(plane);
-	int nr;
-	int i;
+	struct exynos_drm_plane_state *exynos_state =
+						to_exynos_plane_state(state);
+	int ret = 0;
 
-	if (!state->fb)
+	if (!state->crtc || !state->fb)
 		return 0;
 
-	nr = drm_format_num_planes(state->fb->pixel_format);
-	for (i = 0; i < nr; i++) {
-		struct exynos_drm_gem *exynos_gem =
-					exynos_drm_fb_gem(state->fb, i);
-		if (!exynos_gem) {
-			DRM_DEBUG_KMS("gem object is null\n");
-			return -EFAULT;
-		}
-
-		exynos_plane->dma_addr[i] = exynos_gem->dma_addr +
-					    state->fb->offsets[i];
-
-		DRM_DEBUG_KMS("buffer: %d, dma_addr = 0x%lx\n",
-				i, (unsigned long)exynos_plane->dma_addr[i]);
-	}
+	/* translate state into exynos_state */
+	exynos_plane_mode_set(exynos_state);
 
-	return 0;
+	ret = exynos_drm_plane_check_size(exynos_plane->config, exynos_state);
+	return ret;
 }
 
 static void exynos_plane_atomic_update(struct drm_plane *plane,
@@ -155,12 +232,7 @@ static void exynos_plane_atomic_update(struct drm_plane *plane,
 	if (!state->crtc)
 		return;
 
-	exynos_plane_mode_set(plane, state->crtc, state->fb,
-			      state->crtc_x, state->crtc_y,
-			      state->crtc_w, state->crtc_h,
-			      state->src_x >> 16, state->src_y >> 16,
-			      state->src_w >> 16, state->src_h >> 16);
-
+	plane->crtc = state->crtc;
 	exynos_plane->pending_fb = state->fb;
 
 	if (exynos_crtc->ops->update_plane)
@@ -177,8 +249,7 @@ static void exynos_plane_atomic_disable(struct drm_plane *plane,
 		return;
 
 	if (exynos_crtc->ops->disable_plane)
-		exynos_crtc->ops->disable_plane(exynos_crtc,
-						exynos_plane);
+		exynos_crtc->ops->disable_plane(exynos_crtc, exynos_plane);
 }
 
 static const struct drm_plane_helper_funcs plane_helper_funcs = {
@@ -207,28 +278,19 @@ static void exynos_plane_attach_zpos_property(struct drm_plane *plane,
 	drm_object_attach_property(&plane->base, prop, zpos);
 }
 
-enum drm_plane_type exynos_plane_get_type(unsigned int zpos,
-					  unsigned int cursor_win)
-{
-		if (zpos == DEFAULT_WIN)
-			return DRM_PLANE_TYPE_PRIMARY;
-		else if (zpos == cursor_win)
-			return DRM_PLANE_TYPE_CURSOR;
-		else
-			return DRM_PLANE_TYPE_OVERLAY;
-}
-
 int exynos_plane_init(struct drm_device *dev,
 		      struct exynos_drm_plane *exynos_plane,
-		      unsigned long possible_crtcs, enum drm_plane_type type,
-		      const uint32_t *formats, unsigned int fcount,
-		      unsigned int zpos)
+		      unsigned long possible_crtcs,
+		      const struct exynos_drm_plane_config *config)
 {
 	int err;
 
-	err = drm_universal_plane_init(dev, &exynos_plane->base, possible_crtcs,
-				       &exynos_plane_funcs, formats, fcount,
-				       type);
+	err = drm_universal_plane_init(dev, &exynos_plane->base,
+				       possible_crtcs,
+				       &exynos_plane_funcs,
+				       config->pixel_formats,
+				       config->num_pixel_formats,
+				       config->type, NULL);
 	if (err) {
 		DRM_ERROR("failed to initialize plane\n");
 		return err;
@@ -236,10 +298,12 @@ int exynos_plane_init(struct drm_device *dev,
 
 	drm_plane_helper_add(&exynos_plane->base, &plane_helper_funcs);
 
-	exynos_plane->zpos = zpos;
+	exynos_plane->zpos = config->zpos;
+	exynos_plane->config = config;
 
-	if (type == DRM_PLANE_TYPE_OVERLAY)
-		exynos_plane_attach_zpos_property(&exynos_plane->base, zpos);
+	if (config->type == DRM_PLANE_TYPE_OVERLAY)
+		exynos_plane_attach_zpos_property(&exynos_plane->base,
+						  config->zpos);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/exynos/exynos_drm_plane.h b/drivers/gpu/drm/exynos/exynos_drm_plane.h
index abb641e64c23..0dd096548284 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_plane.h
+++ b/drivers/gpu/drm/exynos/exynos_drm_plane.h
@@ -9,10 +9,7 @@
  *
  */
 
-enum drm_plane_type exynos_plane_get_type(unsigned int zpos,
-					  unsigned int cursor_win);
 int exynos_plane_init(struct drm_device *dev,
 		      struct exynos_drm_plane *exynos_plane,
-		      unsigned long possible_crtcs, enum drm_plane_type type,
-		      const uint32_t *formats, unsigned int fcount,
-		      unsigned int zpos);
+		      unsigned long possible_crtcs,
+		      const struct exynos_drm_plane_config *config);
diff --git a/drivers/gpu/drm/exynos/exynos_drm_rotator.c b/drivers/gpu/drm/exynos/exynos_drm_rotator.c
index 2f5c118f4c8e..bea0f7826d30 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_rotator.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_rotator.c
@@ -790,10 +790,10 @@ static int rotator_remove(struct platform_device *pdev)
 static int rotator_clk_crtl(struct rot_context *rot, bool enable)
 {
 	if (enable) {
-		clk_enable(rot->clock);
+		clk_prepare_enable(rot->clock);
 		rot->suspended = false;
 	} else {
-		clk_disable(rot->clock);
+		clk_disable_unprepare(rot->clock);
 		rot->suspended = true;
 	}
 
diff --git a/drivers/gpu/drm/exynos/exynos_drm_vidi.c b/drivers/gpu/drm/exynos/exynos_drm_vidi.c
index 669362c53f49..0be29c1b2c05 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_vidi.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_vidi.c
@@ -24,12 +24,12 @@
 
 #include "exynos_drm_drv.h"
 #include "exynos_drm_crtc.h"
+#include "exynos_drm_fb.h"
 #include "exynos_drm_plane.h"
 #include "exynos_drm_vidi.h"
 
 /* vidi has totally three virtual windows. */
 #define WINDOWS_NR		3
-#define CURSOR_WIN		2
 
 #define ctx_from_connector(c)	container_of(c, struct vidi_context, \
 					connector)
@@ -89,6 +89,12 @@ static const uint32_t formats[] = {
 	DRM_FORMAT_NV12,
 };
 
+static const enum drm_plane_type vidi_win_types[WINDOWS_NR] = {
+	DRM_PLANE_TYPE_PRIMARY,
+	DRM_PLANE_TYPE_OVERLAY,
+	DRM_PLANE_TYPE_CURSOR,
+};
+
 static int vidi_enable_vblank(struct exynos_drm_crtc *crtc)
 {
 	struct vidi_context *ctx = crtc->ctx;
@@ -125,12 +131,15 @@ static void vidi_disable_vblank(struct exynos_drm_crtc *crtc)
 static void vidi_update_plane(struct exynos_drm_crtc *crtc,
 			      struct exynos_drm_plane *plane)
 {
+	struct drm_plane_state *state = plane->base.state;
 	struct vidi_context *ctx = crtc->ctx;
+	dma_addr_t addr;
 
 	if (ctx->suspended)
 		return;
 
-	DRM_DEBUG_KMS("dma_addr = %pad\n", plane->dma_addr);
+	addr = exynos_drm_fb_dma_addr(state->fb, 0);
+	DRM_DEBUG_KMS("dma_addr = %pad\n", &addr);
 
 	if (ctx->vblank_on)
 		schedule_work(&ctx->work);
@@ -330,7 +339,7 @@ static void vidi_connector_destroy(struct drm_connector *connector)
 {
 }
 
-static struct drm_connector_funcs vidi_connector_funcs = {
+static const struct drm_connector_funcs vidi_connector_funcs = {
 	.dpms = drm_atomic_helper_connector_dpms,
 	.fill_modes = drm_helper_probe_single_connector_modes,
 	.detect = vidi_detect,
@@ -374,7 +383,7 @@ static struct drm_encoder *vidi_best_encoder(struct drm_connector *connector)
 	return &ctx->encoder;
 }
 
-static struct drm_connector_helper_funcs vidi_connector_helper_funcs = {
+static const struct drm_connector_helper_funcs vidi_connector_helper_funcs = {
 	.get_modes = vidi_get_modes,
 	.best_encoder = vidi_best_encoder,
 };
@@ -422,14 +431,14 @@ static void exynos_vidi_disable(struct drm_encoder *encoder)
 {
 }
 
-static struct drm_encoder_helper_funcs exynos_vidi_encoder_helper_funcs = {
+static const struct drm_encoder_helper_funcs exynos_vidi_encoder_helper_funcs = {
 	.mode_fixup = exynos_vidi_mode_fixup,
 	.mode_set = exynos_vidi_mode_set,
 	.enable = exynos_vidi_enable,
 	.disable = exynos_vidi_disable,
 };
 
-static struct drm_encoder_funcs exynos_vidi_encoder_funcs = {
+static const struct drm_encoder_funcs exynos_vidi_encoder_funcs = {
 	.destroy = drm_encoder_cleanup,
 };
 
@@ -439,17 +448,21 @@ static int vidi_bind(struct device *dev, struct device *master, void *data)
 	struct drm_device *drm_dev = data;
 	struct drm_encoder *encoder = &ctx->encoder;
 	struct exynos_drm_plane *exynos_plane;
-	enum drm_plane_type type;
-	unsigned int zpos;
+	struct exynos_drm_plane_config plane_config = { 0 };
+	unsigned int i;
 	int pipe, ret;
 
 	vidi_ctx_initialize(ctx, drm_dev);
 
-	for (zpos = 0; zpos < WINDOWS_NR; zpos++) {
-		type = exynos_plane_get_type(zpos, CURSOR_WIN);
-		ret = exynos_plane_init(drm_dev, &ctx->planes[zpos],
-					1 << ctx->pipe, type, formats,
-					ARRAY_SIZE(formats), zpos);
+	plane_config.pixel_formats = formats;
+	plane_config.num_pixel_formats = ARRAY_SIZE(formats);
+
+	for (i = 0; i < WINDOWS_NR; i++) {
+		plane_config.zpos = i;
+		plane_config.type = vidi_win_types[i];
+
+		ret = exynos_plane_init(drm_dev, &ctx->planes[i],
+					1 << ctx->pipe, &plane_config);
 		if (ret)
 			return ret;
 	}
@@ -473,7 +486,7 @@ static int vidi_bind(struct device *dev, struct device *master, void *data)
 	DRM_DEBUG_KMS("possible_crtcs = 0x%x\n", encoder->possible_crtcs);
 
 	drm_encoder_init(drm_dev, encoder, &exynos_vidi_encoder_funcs,
-			 DRM_MODE_ENCODER_TMDS);
+			 DRM_MODE_ENCODER_TMDS, NULL);
 
 	drm_encoder_helper_add(encoder, &exynos_vidi_encoder_helper_funcs);
 
diff --git a/drivers/gpu/drm/exynos/exynos_hdmi.c b/drivers/gpu/drm/exynos/exynos_hdmi.c
index 57b675563e94..21a29dbce18c 100644
--- a/drivers/gpu/drm/exynos/exynos_hdmi.c
+++ b/drivers/gpu/drm/exynos/exynos_hdmi.c
@@ -113,7 +113,7 @@ struct hdmi_context {
 	void __iomem			*regs_hdmiphy;
 	struct i2c_client		*hdmiphy_port;
 	struct i2c_adapter		*ddc_adpt;
-	struct gpio_desc 		*hpd_gpio;
+	struct gpio_desc		*hpd_gpio;
 	int				irq;
 	struct regmap			*pmureg;
 	struct clk			*hdmi;
@@ -956,7 +956,7 @@ static void hdmi_connector_destroy(struct drm_connector *connector)
 	drm_connector_cleanup(connector);
 }
 
-static struct drm_connector_funcs hdmi_connector_funcs = {
+static const struct drm_connector_funcs hdmi_connector_funcs = {
 	.dpms = drm_atomic_helper_connector_dpms,
 	.fill_modes = drm_helper_probe_single_connector_modes,
 	.detect = hdmi_detect,
@@ -1030,7 +1030,7 @@ static struct drm_encoder *hdmi_best_encoder(struct drm_connector *connector)
 	return &hdata->encoder;
 }
 
-static struct drm_connector_helper_funcs hdmi_connector_helper_funcs = {
+static const struct drm_connector_helper_funcs hdmi_connector_helper_funcs = {
 	.get_modes = hdmi_get_modes,
 	.mode_valid = hdmi_mode_valid,
 	.best_encoder = hdmi_best_encoder,
@@ -1588,8 +1588,6 @@ static void hdmi_enable(struct drm_encoder *encoder)
 	if (hdata->powered)
 		return;
 
-	hdata->powered = true;
-
 	pm_runtime_get_sync(hdata->dev);
 
 	if (regulator_bulk_enable(ARRAY_SIZE(supply), hdata->regul_bulk))
@@ -1599,10 +1597,9 @@ static void hdmi_enable(struct drm_encoder *encoder)
 	regmap_update_bits(hdata->pmureg, PMU_HDMI_PHY_CONTROL,
 			PMU_HDMI_PHY_ENABLE_BIT, 1);
 
-	clk_prepare_enable(hdata->hdmi);
-	clk_prepare_enable(hdata->sclk_hdmi);
-
 	hdmi_conf_apply(hdata);
+
+	hdata->powered = true;
 }
 
 static void hdmi_disable(struct drm_encoder *encoder)
@@ -1633,9 +1630,6 @@ static void hdmi_disable(struct drm_encoder *encoder)
 
 	cancel_delayed_work(&hdata->hotplug_work);
 
-	clk_disable_unprepare(hdata->sclk_hdmi);
-	clk_disable_unprepare(hdata->hdmi);
-
 	/* reset pmu hdmiphy control bit to disable hdmiphy */
 	regmap_update_bits(hdata->pmureg, PMU_HDMI_PHY_CONTROL,
 			PMU_HDMI_PHY_ENABLE_BIT, 0);
@@ -1647,14 +1641,14 @@ static void hdmi_disable(struct drm_encoder *encoder)
 	hdata->powered = false;
 }
 
-static struct drm_encoder_helper_funcs exynos_hdmi_encoder_helper_funcs = {
+static const struct drm_encoder_helper_funcs exynos_hdmi_encoder_helper_funcs = {
 	.mode_fixup	= hdmi_mode_fixup,
 	.mode_set	= hdmi_mode_set,
 	.enable		= hdmi_enable,
 	.disable	= hdmi_disable,
 };
 
-static struct drm_encoder_funcs exynos_hdmi_encoder_funcs = {
+static const struct drm_encoder_funcs exynos_hdmi_encoder_funcs = {
 	.destroy = drm_encoder_cleanup,
 };
 
@@ -1793,7 +1787,7 @@ static int hdmi_bind(struct device *dev, struct device *master, void *data)
 	DRM_DEBUG_KMS("possible_crtcs = 0x%x\n", encoder->possible_crtcs);
 
 	drm_encoder_init(drm_dev, encoder, &exynos_hdmi_encoder_funcs,
-			 DRM_MODE_ENCODER_TMDS);
+			 DRM_MODE_ENCODER_TMDS, NULL);
 
 	drm_encoder_helper_add(encoder, &exynos_hdmi_encoder_helper_funcs);
 
@@ -1978,12 +1972,49 @@ static int hdmi_remove(struct platform_device *pdev)
 	return 0;
 }
 
+#ifdef CONFIG_PM
+static int exynos_hdmi_suspend(struct device *dev)
+{
+	struct hdmi_context *hdata = dev_get_drvdata(dev);
+
+	clk_disable_unprepare(hdata->sclk_hdmi);
+	clk_disable_unprepare(hdata->hdmi);
+
+	return 0;
+}
+
+static int exynos_hdmi_resume(struct device *dev)
+{
+	struct hdmi_context *hdata = dev_get_drvdata(dev);
+	int ret;
+
+	ret = clk_prepare_enable(hdata->hdmi);
+	if (ret < 0) {
+		DRM_ERROR("Failed to prepare_enable the hdmi clk [%d]\n", ret);
+		return ret;
+	}
+	ret = clk_prepare_enable(hdata->sclk_hdmi);
+	if (ret < 0) {
+		DRM_ERROR("Failed to prepare_enable the sclk_mixer clk [%d]\n",
+			  ret);
+		return ret;
+	}
+
+	return 0;
+}
+#endif
+
+static const struct dev_pm_ops exynos_hdmi_pm_ops = {
+	SET_RUNTIME_PM_OPS(exynos_hdmi_suspend, exynos_hdmi_resume, NULL)
+};
+
 struct platform_driver hdmi_driver = {
 	.probe		= hdmi_probe,
 	.remove		= hdmi_remove,
 	.driver		= {
 		.name	= "exynos-hdmi",
 		.owner	= THIS_MODULE,
+		.pm	= &exynos_hdmi_pm_ops,
 		.of_match_table = hdmi_match_types,
 	},
 };
diff --git a/drivers/gpu/drm/exynos/exynos_mixer.c b/drivers/gpu/drm/exynos/exynos_mixer.c
index d09f8f9a8939..dfb35e2da4db 100644
--- a/drivers/gpu/drm/exynos/exynos_mixer.c
+++ b/drivers/gpu/drm/exynos/exynos_mixer.c
@@ -37,12 +37,12 @@
 
 #include "exynos_drm_drv.h"
 #include "exynos_drm_crtc.h"
+#include "exynos_drm_fb.h"
 #include "exynos_drm_plane.h"
 #include "exynos_drm_iommu.h"
 
 #define MIXER_WIN_NR		3
 #define VP_DEFAULT_WIN		2
-#define CURSOR_WIN		1
 
 /* The pixelformats that are natively supported by the mixer. */
 #define MXR_FORMAT_RGB565	4
@@ -111,6 +111,28 @@ struct mixer_drv_data {
 	bool					has_sclk;
 };
 
+static const struct exynos_drm_plane_config plane_configs[MIXER_WIN_NR] = {
+	{
+		.zpos = 0,
+		.type = DRM_PLANE_TYPE_PRIMARY,
+		.pixel_formats = mixer_formats,
+		.num_pixel_formats = ARRAY_SIZE(mixer_formats),
+		.capabilities = EXYNOS_DRM_PLANE_CAP_DOUBLE,
+	}, {
+		.zpos = 1,
+		.type = DRM_PLANE_TYPE_CURSOR,
+		.pixel_formats = mixer_formats,
+		.num_pixel_formats = ARRAY_SIZE(mixer_formats),
+		.capabilities = EXYNOS_DRM_PLANE_CAP_DOUBLE,
+	}, {
+		.zpos = 2,
+		.type = DRM_PLANE_TYPE_OVERLAY,
+		.pixel_formats = vp_formats,
+		.num_pixel_formats = ARRAY_SIZE(vp_formats),
+		.capabilities = EXYNOS_DRM_PLANE_CAP_SCALE,
+	},
+};
+
 static const u8 filter_y_horiz_tap8[] = {
 	0,	-1,	-1,	-1,	-1,	-1,	-1,	-1,
 	-1,	-1,	-1,	-1,	-1,	0,	0,	0,
@@ -399,10 +421,11 @@ static void mixer_stop(struct mixer_context *ctx)
 static void vp_video_buffer(struct mixer_context *ctx,
 			    struct exynos_drm_plane *plane)
 {
+	struct exynos_drm_plane_state *state =
+				to_exynos_plane_state(plane->base.state);
+	struct drm_display_mode *mode = &state->base.crtc->state->adjusted_mode;
 	struct mixer_resources *res = &ctx->mixer_res;
-	struct drm_plane_state *state = plane->base.state;
-	struct drm_framebuffer *fb = state->fb;
-	struct drm_display_mode *mode = &state->crtc->mode;
+	struct drm_framebuffer *fb = state->base.fb;
 	unsigned long flags;
 	dma_addr_t luma_addr[2], chroma_addr[2];
 	bool tiled_mode = false;
@@ -422,8 +445,8 @@ static void vp_video_buffer(struct mixer_context *ctx,
 		return;
 	}
 
-	luma_addr[0] = plane->dma_addr[0];
-	chroma_addr[0] = plane->dma_addr[1];
+	luma_addr[0] = exynos_drm_fb_dma_addr(fb, 0);
+	chroma_addr[0] = exynos_drm_fb_dma_addr(fb, 1);
 
 	if (mode->flags & DRM_MODE_FLAG_INTERLACE) {
 		ctx->interlace = true;
@@ -459,24 +482,24 @@ static void vp_video_buffer(struct mixer_context *ctx,
 	vp_reg_write(res, VP_IMG_SIZE_C, VP_IMG_HSIZE(fb->pitches[0]) |
 		VP_IMG_VSIZE(fb->height / 2));
 
-	vp_reg_write(res, VP_SRC_WIDTH, plane->src_w);
-	vp_reg_write(res, VP_SRC_HEIGHT, plane->src_h);
+	vp_reg_write(res, VP_SRC_WIDTH, state->src.w);
+	vp_reg_write(res, VP_SRC_HEIGHT, state->src.h);
 	vp_reg_write(res, VP_SRC_H_POSITION,
-			VP_SRC_H_POSITION_VAL(plane->src_x));
-	vp_reg_write(res, VP_SRC_V_POSITION, plane->src_y);
+			VP_SRC_H_POSITION_VAL(state->src.x));
+	vp_reg_write(res, VP_SRC_V_POSITION, state->src.y);
 
-	vp_reg_write(res, VP_DST_WIDTH, plane->crtc_w);
-	vp_reg_write(res, VP_DST_H_POSITION, plane->crtc_x);
+	vp_reg_write(res, VP_DST_WIDTH, state->crtc.w);
+	vp_reg_write(res, VP_DST_H_POSITION, state->crtc.x);
 	if (ctx->interlace) {
-		vp_reg_write(res, VP_DST_HEIGHT, plane->crtc_h / 2);
-		vp_reg_write(res, VP_DST_V_POSITION, plane->crtc_y / 2);
+		vp_reg_write(res, VP_DST_HEIGHT, state->crtc.h / 2);
+		vp_reg_write(res, VP_DST_V_POSITION, state->crtc.y / 2);
 	} else {
-		vp_reg_write(res, VP_DST_HEIGHT, plane->crtc_h);
-		vp_reg_write(res, VP_DST_V_POSITION, plane->crtc_y);
+		vp_reg_write(res, VP_DST_HEIGHT, state->crtc.h);
+		vp_reg_write(res, VP_DST_V_POSITION, state->crtc.y);
 	}
 
-	vp_reg_write(res, VP_H_RATIO, plane->h_ratio);
-	vp_reg_write(res, VP_V_RATIO, plane->v_ratio);
+	vp_reg_write(res, VP_H_RATIO, state->h_ratio);
+	vp_reg_write(res, VP_V_RATIO, state->v_ratio);
 
 	vp_reg_write(res, VP_ENDIAN_MODE, VP_ENDIAN_MODE_LITTLE);
 
@@ -505,37 +528,14 @@ static void mixer_layer_update(struct mixer_context *ctx)
 	mixer_reg_writemask(res, MXR_CFG, ~0, MXR_CFG_LAYER_UPDATE);
 }
 
-static int mixer_setup_scale(const struct exynos_drm_plane *plane,
-		unsigned int *x_ratio, unsigned int *y_ratio)
-{
-	if (plane->crtc_w != plane->src_w) {
-		if (plane->crtc_w == 2 * plane->src_w)
-			*x_ratio = 1;
-		else
-			goto fail;
-	}
-
-	if (plane->crtc_h != plane->src_h) {
-		if (plane->crtc_h == 2 * plane->src_h)
-			*y_ratio = 1;
-		else
-			goto fail;
-	}
-
-	return 0;
-
-fail:
-	DRM_DEBUG_KMS("only 2x width/height scaling of plane supported\n");
-	return -ENOTSUPP;
-}
-
 static void mixer_graph_buffer(struct mixer_context *ctx,
 			       struct exynos_drm_plane *plane)
 {
+	struct exynos_drm_plane_state *state =
+				to_exynos_plane_state(plane->base.state);
+	struct drm_display_mode *mode = &state->base.crtc->state->adjusted_mode;
 	struct mixer_resources *res = &ctx->mixer_res;
-	struct drm_plane_state *state = plane->base.state;
-	struct drm_framebuffer *fb = state->fb;
-	struct drm_display_mode *mode = &state->crtc->mode;
+	struct drm_framebuffer *fb = state->base.fb;
 	unsigned long flags;
 	unsigned int win = plane->zpos;
 	unsigned int x_ratio = 0, y_ratio = 0;
@@ -567,17 +567,17 @@ static void mixer_graph_buffer(struct mixer_context *ctx,
 		return;
 	}
 
-	/* check if mixer supports requested scaling setup */
-	if (mixer_setup_scale(plane, &x_ratio, &y_ratio))
-		return;
+	/* ratio is already checked by common plane code */
+	x_ratio = state->h_ratio == (1 << 15);
+	y_ratio = state->v_ratio == (1 << 15);
 
-	dst_x_offset = plane->crtc_x;
-	dst_y_offset = plane->crtc_y;
+	dst_x_offset = state->crtc.x;
+	dst_y_offset = state->crtc.y;
 
 	/* converting dma address base and source offset */
-	dma_addr = plane->dma_addr[0]
-		+ (plane->src_x * fb->bits_per_pixel >> 3)
-		+ (plane->src_y * fb->pitches[0]);
+	dma_addr = exynos_drm_fb_dma_addr(fb, 0)
+		+ (state->src.x * fb->bits_per_pixel >> 3)
+		+ (state->src.y * fb->pitches[0]);
 	src_x_offset = 0;
 	src_y_offset = 0;
 
@@ -605,8 +605,8 @@ static void mixer_graph_buffer(struct mixer_context *ctx,
 		mixer_reg_write(res, MXR_RESOLUTION, val);
 	}
 
-	val  = MXR_GRP_WH_WIDTH(plane->src_w);
-	val |= MXR_GRP_WH_HEIGHT(plane->src_h);
+	val  = MXR_GRP_WH_WIDTH(state->src.w);
+	val |= MXR_GRP_WH_HEIGHT(state->src.h);
 	val |= MXR_GRP_WH_H_SCALE(x_ratio);
 	val |= MXR_GRP_WH_V_SCALE(y_ratio);
 	mixer_reg_write(res, MXR_GRAPHIC_WH(win), val);
@@ -1020,43 +1020,12 @@ static void mixer_enable(struct exynos_drm_crtc *crtc)
 {
 	struct mixer_context *ctx = crtc->ctx;
 	struct mixer_resources *res = &ctx->mixer_res;
-	int ret;
 
 	if (test_bit(MXR_BIT_POWERED, &ctx->flags))
 		return;
 
 	pm_runtime_get_sync(ctx->dev);
 
-	ret = clk_prepare_enable(res->mixer);
-	if (ret < 0) {
-		DRM_ERROR("Failed to prepare_enable the mixer clk [%d]\n", ret);
-		return;
-	}
-	ret = clk_prepare_enable(res->hdmi);
-	if (ret < 0) {
-		DRM_ERROR("Failed to prepare_enable the hdmi clk [%d]\n", ret);
-		return;
-	}
-	if (ctx->vp_enabled) {
-		ret = clk_prepare_enable(res->vp);
-		if (ret < 0) {
-			DRM_ERROR("Failed to prepare_enable the vp clk [%d]\n",
-				  ret);
-			return;
-		}
-		if (ctx->has_sclk) {
-			ret = clk_prepare_enable(res->sclk_mixer);
-			if (ret < 0) {
-				DRM_ERROR("Failed to prepare_enable the " \
-					   "sclk_mixer clk [%d]\n",
-					  ret);
-				return;
-			}
-		}
-	}
-
-	set_bit(MXR_BIT_POWERED, &ctx->flags);
-
 	mixer_reg_writemask(res, MXR_STATUS, ~0, MXR_STATUS_SOFT_RESET);
 
 	if (test_bit(MXR_BIT_VSYNC, &ctx->flags)) {
@@ -1064,12 +1033,13 @@ static void mixer_enable(struct exynos_drm_crtc *crtc)
 		mixer_reg_writemask(res, MXR_INT_EN, ~0, MXR_INT_EN_VSYNC);
 	}
 	mixer_win_reset(ctx);
+
+	set_bit(MXR_BIT_POWERED, &ctx->flags);
 }
 
 static void mixer_disable(struct exynos_drm_crtc *crtc)
 {
 	struct mixer_context *ctx = crtc->ctx;
-	struct mixer_resources *res = &ctx->mixer_res;
 	int i;
 
 	if (!test_bit(MXR_BIT_POWERED, &ctx->flags))
@@ -1081,17 +1051,9 @@ static void mixer_disable(struct exynos_drm_crtc *crtc)
 	for (i = 0; i < MIXER_WIN_NR; i++)
 		mixer_disable_plane(crtc, &ctx->planes[i]);
 
-	clear_bit(MXR_BIT_POWERED, &ctx->flags);
+	pm_runtime_put(ctx->dev);
 
-	clk_disable_unprepare(res->hdmi);
-	clk_disable_unprepare(res->mixer);
-	if (ctx->vp_enabled) {
-		clk_disable_unprepare(res->vp);
-		if (ctx->has_sclk)
-			clk_disable_unprepare(res->sclk_mixer);
-	}
-
-	pm_runtime_put_sync(ctx->dev);
+	clear_bit(MXR_BIT_POWERED, &ctx->flags);
 }
 
 /* Only valid for Mixer version 16.0.33.0 */
@@ -1187,30 +1149,19 @@ static int mixer_bind(struct device *dev, struct device *manager, void *data)
 	struct mixer_context *ctx = dev_get_drvdata(dev);
 	struct drm_device *drm_dev = data;
 	struct exynos_drm_plane *exynos_plane;
-	unsigned int zpos;
+	unsigned int i;
 	int ret;
 
 	ret = mixer_initialize(ctx, drm_dev);
 	if (ret)
 		return ret;
 
-	for (zpos = 0; zpos < MIXER_WIN_NR; zpos++) {
-		enum drm_plane_type type;
-		const uint32_t *formats;
-		unsigned int fcount;
-
-		if (zpos < VP_DEFAULT_WIN) {
-			formats = mixer_formats;
-			fcount = ARRAY_SIZE(mixer_formats);
-		} else {
-			formats = vp_formats;
-			fcount = ARRAY_SIZE(vp_formats);
-		}
+	for (i = 0; i < MIXER_WIN_NR; i++) {
+		if (i == VP_DEFAULT_WIN && !ctx->vp_enabled)
+			continue;
 
-		type = exynos_plane_get_type(zpos, CURSOR_WIN);
-		ret = exynos_plane_init(drm_dev, &ctx->planes[zpos],
-					1 << ctx->pipe, type, formats, fcount,
-					zpos);
+		ret = exynos_plane_init(drm_dev, &ctx->planes[i],
+					1 << ctx->pipe, &plane_configs[i]);
 		if (ret)
 			return ret;
 	}
@@ -1293,10 +1244,70 @@ static int mixer_remove(struct platform_device *pdev)
 	return 0;
 }
 
+#ifdef CONFIG_PM_SLEEP
+static int exynos_mixer_suspend(struct device *dev)
+{
+	struct mixer_context *ctx = dev_get_drvdata(dev);
+	struct mixer_resources *res = &ctx->mixer_res;
+
+	clk_disable_unprepare(res->hdmi);
+	clk_disable_unprepare(res->mixer);
+	if (ctx->vp_enabled) {
+		clk_disable_unprepare(res->vp);
+		if (ctx->has_sclk)
+			clk_disable_unprepare(res->sclk_mixer);
+	}
+
+	return 0;
+}
+
+static int exynos_mixer_resume(struct device *dev)
+{
+	struct mixer_context *ctx = dev_get_drvdata(dev);
+	struct mixer_resources *res = &ctx->mixer_res;
+	int ret;
+
+	ret = clk_prepare_enable(res->mixer);
+	if (ret < 0) {
+		DRM_ERROR("Failed to prepare_enable the mixer clk [%d]\n", ret);
+		return ret;
+	}
+	ret = clk_prepare_enable(res->hdmi);
+	if (ret < 0) {
+		DRM_ERROR("Failed to prepare_enable the hdmi clk [%d]\n", ret);
+		return ret;
+	}
+	if (ctx->vp_enabled) {
+		ret = clk_prepare_enable(res->vp);
+		if (ret < 0) {
+			DRM_ERROR("Failed to prepare_enable the vp clk [%d]\n",
+				  ret);
+			return ret;
+		}
+		if (ctx->has_sclk) {
+			ret = clk_prepare_enable(res->sclk_mixer);
+			if (ret < 0) {
+				DRM_ERROR("Failed to prepare_enable the " \
+					   "sclk_mixer clk [%d]\n",
+					  ret);
+				return ret;
+			}
+		}
+	}
+
+	return 0;
+}
+#endif
+
+static const struct dev_pm_ops exynos_mixer_pm_ops = {
+	SET_RUNTIME_PM_OPS(exynos_mixer_suspend, exynos_mixer_resume, NULL)
+};
+
 struct platform_driver mixer_driver = {
 	.driver = {
 		.name = "exynos-mixer",
 		.owner = THIS_MODULE,
+		.pm = &exynos_mixer_pm_ops,
 		.of_match_table = mixer_match_types,
 	},
 	.probe = mixer_probe,
diff --git a/drivers/gpu/drm/exynos/regs-gsc.h b/drivers/gpu/drm/exynos/regs-gsc.h
index 9ad592707aaf..4704a993cbb7 100644
--- a/drivers/gpu/drm/exynos/regs-gsc.h
+++ b/drivers/gpu/drm/exynos/regs-gsc.h
@@ -273,12 +273,12 @@
 #define GSC_CLK_GATE_MODE_SNOOP_CNT(x)	((x) << 0)
 
 /* SYSCON. GSCBLK_CFG */
-#define SYSREG_GSCBLK_CFG1		(S3C_VA_SYS + 0x0224)
+#define SYSREG_GSCBLK_CFG1		0x0224
 #define GSC_BLK_DISP1WB_DEST(x)		(x << 10)
 #define GSC_BLK_SW_RESET_WB_DEST(x)	(1 << (18 + x))
 #define GSC_BLK_PXLASYNC_LO_MASK_WB(x)	(0 << (14 + x))
 #define GSC_BLK_GSCL_WB_IN_SRC_SEL(x)	(1 << (2 * x))
-#define SYSREG_GSCBLK_CFG2		(S3C_VA_SYS + 0x2000)
+#define SYSREG_GSCBLK_CFG2		0x2000
 #define PXLASYNC_LO_MASK_CAMIF_GSCL(x)	(1 << (x))
 
 #endif /* EXYNOS_REGS_GSC_H_ */
diff --git a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_crtc.c b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_crtc.c
index 82a3d311e164..d8ab8f0af10c 100644
--- a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_crtc.c
+++ b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_crtc.c
@@ -175,7 +175,7 @@ int fsl_dcu_drm_crtc_create(struct fsl_dcu_drm_device *fsl_dev)
 
 	primary = fsl_dcu_drm_primary_create_plane(fsl_dev->drm);
 	ret = drm_crtc_init_with_planes(fsl_dev->drm, crtc, primary, NULL,
-					&fsl_dcu_drm_crtc_funcs);
+					&fsl_dcu_drm_crtc_funcs, NULL);
 	if (ret < 0)
 		return ret;
 
diff --git a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.c b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.c
index 1930234ba5f1..fca97d3fc846 100644
--- a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.c
+++ b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.c
@@ -363,7 +363,6 @@ static int fsl_dcu_drm_probe(struct platform_device *pdev)
 	fsl_dev->np = dev->of_node;
 	drm->dev_private = fsl_dev;
 	dev_set_drvdata(dev, fsl_dev);
-	drm_dev_set_unique(drm, dev_name(dev));
 
 	ret = drm_dev_register(drm, 0);
 	if (ret < 0)
diff --git a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_plane.c b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_plane.c
index 51daaea40b4d..4b13cf919575 100644
--- a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_plane.c
+++ b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_plane.c
@@ -249,7 +249,7 @@ struct drm_plane *fsl_dcu_drm_primary_create_plane(struct drm_device *dev)
 				       &fsl_dcu_drm_plane_funcs,
 				       fsl_dcu_drm_plane_formats,
 				       ARRAY_SIZE(fsl_dcu_drm_plane_formats),
-				       DRM_PLANE_TYPE_PRIMARY);
+				       DRM_PLANE_TYPE_PRIMARY, NULL);
 	if (ret) {
 		kfree(primary);
 		primary = NULL;
diff --git a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_rgb.c b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_rgb.c
index fe8ab5da04fb..8780deba5e8a 100644
--- a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_rgb.c
+++ b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_rgb.c
@@ -57,7 +57,7 @@ int fsl_dcu_drm_encoder_create(struct fsl_dcu_drm_device *fsl_dev,
 
 	encoder->possible_crtcs = 1;
 	ret = drm_encoder_init(fsl_dev->drm, encoder, &encoder_funcs,
-			       DRM_MODE_ENCODER_LVDS);
+			       DRM_MODE_ENCODER_LVDS, NULL);
 	if (ret < 0)
 		return ret;
 
diff --git a/drivers/gpu/drm/gma500/cdv_device.c b/drivers/gpu/drm/gma500/cdv_device.c
index 3531f90e53d0..8745971a7680 100644
--- a/drivers/gpu/drm/gma500/cdv_device.c
+++ b/drivers/gpu/drm/gma500/cdv_device.c
@@ -619,6 +619,8 @@ const struct psb_ops cdv_chip_ops = {
 	.init_pm = cdv_init_pm,
 	.save_regs = cdv_save_display_registers,
 	.restore_regs = cdv_restore_display_registers,
+	.save_crtc = gma_crtc_save,
+	.restore_crtc = gma_crtc_restore,
 	.power_down = cdv_power_down,
 	.power_up = cdv_power_up,
 	.update_wm = cdv_update_wm,
diff --git a/drivers/gpu/drm/gma500/cdv_intel_crt.c b/drivers/gpu/drm/gma500/cdv_intel_crt.c
index 248c33a35ebf..d0717a85c7ec 100644
--- a/drivers/gpu/drm/gma500/cdv_intel_crt.c
+++ b/drivers/gpu/drm/gma500/cdv_intel_crt.c
@@ -273,7 +273,7 @@ void cdv_intel_crt_init(struct drm_device *dev,
 
 	encoder = &gma_encoder->base;
 	drm_encoder_init(dev, encoder,
-		&cdv_intel_crt_enc_funcs, DRM_MODE_ENCODER_DAC);
+		&cdv_intel_crt_enc_funcs, DRM_MODE_ENCODER_DAC, NULL);
 
 	gma_connector_attach_encoder(gma_connector, gma_encoder);
 
diff --git a/drivers/gpu/drm/gma500/cdv_intel_display.c b/drivers/gpu/drm/gma500/cdv_intel_display.c
index 7d47b3d5cc0d..6126546295e9 100644
--- a/drivers/gpu/drm/gma500/cdv_intel_display.c
+++ b/drivers/gpu/drm/gma500/cdv_intel_display.c
@@ -983,8 +983,6 @@ const struct drm_crtc_helper_funcs cdv_intel_helper_funcs = {
 };
 
 const struct drm_crtc_funcs cdv_intel_crtc_funcs = {
-	.save = gma_crtc_save,
-	.restore = gma_crtc_restore,
 	.cursor_set = gma_crtc_cursor_set,
 	.cursor_move = gma_crtc_cursor_move,
 	.gamma_set = gma_crtc_gamma_set,
diff --git a/drivers/gpu/drm/gma500/cdv_intel_dp.c b/drivers/gpu/drm/gma500/cdv_intel_dp.c
index 17cea400ae32..7bb1f1aff932 100644
--- a/drivers/gpu/drm/gma500/cdv_intel_dp.c
+++ b/drivers/gpu/drm/gma500/cdv_intel_dp.c
@@ -2020,7 +2020,8 @@ cdv_intel_dp_init(struct drm_device *dev, struct psb_intel_mode_device *mode_dev
 	encoder = &gma_encoder->base;
 
 	drm_connector_init(dev, connector, &cdv_intel_dp_connector_funcs, type);
-	drm_encoder_init(dev, encoder, &cdv_intel_dp_enc_funcs, DRM_MODE_ENCODER_TMDS);
+	drm_encoder_init(dev, encoder, &cdv_intel_dp_enc_funcs,
+			 DRM_MODE_ENCODER_TMDS, NULL);
 
 	gma_connector_attach_encoder(gma_connector, gma_encoder);
 
diff --git a/drivers/gpu/drm/gma500/cdv_intel_hdmi.c b/drivers/gpu/drm/gma500/cdv_intel_hdmi.c
index 6b1d3340ba14..ddf2d7700759 100644
--- a/drivers/gpu/drm/gma500/cdv_intel_hdmi.c
+++ b/drivers/gpu/drm/gma500/cdv_intel_hdmi.c
@@ -270,8 +270,6 @@ static const struct drm_connector_helper_funcs
 
 static const struct drm_connector_funcs cdv_hdmi_connector_funcs = {
 	.dpms = drm_helper_connector_dpms,
-	.save = cdv_hdmi_save,
-	.restore = cdv_hdmi_restore,
 	.detect = cdv_hdmi_detect,
 	.fill_modes = drm_helper_probe_single_connector_modes,
 	.set_property = cdv_hdmi_set_property,
@@ -306,13 +304,16 @@ void cdv_hdmi_init(struct drm_device *dev,
 
 	connector = &gma_connector->base;
 	connector->polled = DRM_CONNECTOR_POLL_HPD;
+	gma_connector->save = cdv_hdmi_save;
+	gma_connector->restore = cdv_hdmi_restore;
+
 	encoder = &gma_encoder->base;
 	drm_connector_init(dev, connector,
 			   &cdv_hdmi_connector_funcs,
 			   DRM_MODE_CONNECTOR_DVID);
 
 	drm_encoder_init(dev, encoder, &psb_intel_lvds_enc_funcs,
-			 DRM_MODE_ENCODER_TMDS);
+			 DRM_MODE_ENCODER_TMDS, NULL);
 
 	gma_connector_attach_encoder(gma_connector, gma_encoder);
 	gma_encoder->type = INTEL_OUTPUT_HDMI;
diff --git a/drivers/gpu/drm/gma500/cdv_intel_lvds.c b/drivers/gpu/drm/gma500/cdv_intel_lvds.c
index 211069b2b951..813ef23a8054 100644
--- a/drivers/gpu/drm/gma500/cdv_intel_lvds.c
+++ b/drivers/gpu/drm/gma500/cdv_intel_lvds.c
@@ -530,8 +530,6 @@ static const struct drm_connector_helper_funcs
 
 static const struct drm_connector_funcs cdv_intel_lvds_connector_funcs = {
 	.dpms = drm_helper_connector_dpms,
-	.save = cdv_intel_lvds_save,
-	.restore = cdv_intel_lvds_restore,
 	.detect = cdv_intel_lvds_detect,
 	.fill_modes = drm_helper_probe_single_connector_modes,
 	.set_property = cdv_intel_lvds_set_property,
@@ -643,6 +641,8 @@ void cdv_intel_lvds_init(struct drm_device *dev,
 	gma_encoder->dev_priv = lvds_priv;
 
 	connector = &gma_connector->base;
+	gma_connector->save = cdv_intel_lvds_save;
+	gma_connector->restore = cdv_intel_lvds_restore;
 	encoder = &gma_encoder->base;
 
 
@@ -652,7 +652,7 @@ void cdv_intel_lvds_init(struct drm_device *dev,
 
 	drm_encoder_init(dev, encoder,
 			 &cdv_intel_lvds_enc_funcs,
-			 DRM_MODE_ENCODER_LVDS);
+			 DRM_MODE_ENCODER_LVDS, NULL);
 
 
 	gma_connector_attach_encoder(gma_connector, gma_encoder);
diff --git a/drivers/gpu/drm/gma500/framebuffer.c b/drivers/gpu/drm/gma500/framebuffer.c
index dc0508dca1d4..ee95c03a8c54 100644
--- a/drivers/gpu/drm/gma500/framebuffer.c
+++ b/drivers/gpu/drm/gma500/framebuffer.c
@@ -406,8 +406,6 @@ static int psbfb_create(struct psb_fbdev *fbdev,
 
 	memset(dev_priv->vram_addr + backing->offset, 0, size);
 
-	mutex_lock(&dev->struct_mutex);
-
 	info = drm_fb_helper_alloc_fbi(&fbdev->psb_fb_helper);
 	if (IS_ERR(info)) {
 		ret = PTR_ERR(info);
@@ -463,17 +461,15 @@ static int psbfb_create(struct psb_fbdev *fbdev,
 	dev_dbg(dev->dev, "allocated %dx%d fb\n",
 					psbfb->base.width, psbfb->base.height);
 
-	mutex_unlock(&dev->struct_mutex);
 	return 0;
 out_unref:
 	if (backing->stolen)
 		psb_gtt_free_range(dev, backing);
 	else
-		drm_gem_object_unreference(&backing->gem);
+		drm_gem_object_unreference_unlocked(&backing->gem);
 
 	drm_fb_helper_release_fbi(&fbdev->psb_fb_helper);
 out_err1:
-	mutex_unlock(&dev->struct_mutex);
 	psb_gtt_free_range(dev, backing);
 	return ret;
 }
@@ -569,7 +565,7 @@ static int psb_fbdev_destroy(struct drm_device *dev, struct psb_fbdev *fbdev)
 	drm_framebuffer_cleanup(&psbfb->base);
 
 	if (psbfb->gtt)
-		drm_gem_object_unreference(&psbfb->gtt->gem);
+		drm_gem_object_unreference_unlocked(&psbfb->gtt->gem);
 	return 0;
 }
 
@@ -784,12 +780,8 @@ void psb_modeset_cleanup(struct drm_device *dev)
 {
 	struct drm_psb_private *dev_priv = dev->dev_private;
 	if (dev_priv->modeset) {
-		mutex_lock(&dev->struct_mutex);
-
 		drm_kms_helper_poll_fini(dev);
 		psb_fbdev_fini(dev);
 		drm_mode_config_cleanup(dev);
-
-		mutex_unlock(&dev->struct_mutex);
 	}
 }
diff --git a/drivers/gpu/drm/gma500/gem.c b/drivers/gpu/drm/gma500/gem.c
index c707fa6fca85..506224b3a0ad 100644
--- a/drivers/gpu/drm/gma500/gem.c
+++ b/drivers/gpu/drm/gma500/gem.c
@@ -62,15 +62,10 @@ int psb_gem_dumb_map_gtt(struct drm_file *file, struct drm_device *dev,
 	int ret = 0;
 	struct drm_gem_object *obj;
 
-	mutex_lock(&dev->struct_mutex);
-
 	/* GEM does all our handle to object mapping */
 	obj = drm_gem_object_lookup(dev, file, handle);
-	if (obj == NULL) {
-		ret = -ENOENT;
-		goto unlock;
-	}
-	/* What validation is needed here ? */
+	if (obj == NULL)
+		return -ENOENT;
 
 	/* Make it mmapable */
 	ret = drm_gem_create_mmap_offset(obj);
@@ -78,9 +73,7 @@ int psb_gem_dumb_map_gtt(struct drm_file *file, struct drm_device *dev,
 		goto out;
 	*offset = drm_vma_node_offset_addr(&obj->vma_node);
 out:
-	drm_gem_object_unreference(obj);
-unlock:
-	mutex_unlock(&dev->struct_mutex);
+	drm_gem_object_unreference_unlocked(obj);
 	return ret;
 }
 
@@ -130,7 +123,7 @@ int psb_gem_create(struct drm_file *file, struct drm_device *dev, u64 size,
 		return ret;
 	}
 	/* We have the initial and handle reference but need only one now */
-	drm_gem_object_unreference(&r->gem);
+	drm_gem_object_unreference_unlocked(&r->gem);
 	*handlep = handle;
 	return 0;
 }
@@ -189,7 +182,7 @@ int psb_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 
 	/* Make sure we don't parallel update on a fault, nor move or remove
 	   something from beneath our feet */
-	mutex_lock(&dev->struct_mutex);
+	mutex_lock(&dev_priv->mmap_mutex);
 
 	/* For now the mmap pins the object and it stays pinned. As things
 	   stand that will do us no harm */
@@ -215,7 +208,7 @@ int psb_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 	ret = vm_insert_pfn(vma, (unsigned long)vmf->virtual_address, pfn);
 
 fail:
-	mutex_unlock(&dev->struct_mutex);
+	mutex_unlock(&dev_priv->mmap_mutex);
 	switch (ret) {
 	case 0:
 	case -ERESTARTSYS:
diff --git a/drivers/gpu/drm/gma500/gma_display.c b/drivers/gpu/drm/gma500/gma_display.c
index 001b450b27b3..ff17af4cfc64 100644
--- a/drivers/gpu/drm/gma500/gma_display.c
+++ b/drivers/gpu/drm/gma500/gma_display.c
@@ -349,8 +349,6 @@ int gma_crtc_cursor_set(struct drm_crtc *crtc,
 	/* If we didn't get a handle then turn the cursor off */
 	if (!handle) {
 		temp = CURSOR_MODE_DISABLE;
-		mutex_lock(&dev->struct_mutex);
-
 		if (gma_power_begin(dev, false)) {
 			REG_WRITE(control, temp);
 			REG_WRITE(base, 0);
@@ -362,11 +360,9 @@ int gma_crtc_cursor_set(struct drm_crtc *crtc,
 			gt = container_of(gma_crtc->cursor_obj,
 					  struct gtt_range, gem);
 			psb_gtt_unpin(gt);
-			drm_gem_object_unreference(gma_crtc->cursor_obj);
+			drm_gem_object_unreference_unlocked(gma_crtc->cursor_obj);
 			gma_crtc->cursor_obj = NULL;
 		}
-
-		mutex_unlock(&dev->struct_mutex);
 		return 0;
 	}
 
@@ -376,7 +372,6 @@ int gma_crtc_cursor_set(struct drm_crtc *crtc,
 		return -EINVAL;
 	}
 
-	mutex_lock(&dev->struct_mutex);
 	obj = drm_gem_object_lookup(dev, file_priv, handle);
 	if (!obj) {
 		ret = -ENOENT;
@@ -441,17 +436,15 @@ int gma_crtc_cursor_set(struct drm_crtc *crtc,
 	if (gma_crtc->cursor_obj) {
 		gt = container_of(gma_crtc->cursor_obj, struct gtt_range, gem);
 		psb_gtt_unpin(gt);
-		drm_gem_object_unreference(gma_crtc->cursor_obj);
+		drm_gem_object_unreference_unlocked(gma_crtc->cursor_obj);
 	}
 
 	gma_crtc->cursor_obj = obj;
 unlock:
-	mutex_unlock(&dev->struct_mutex);
 	return ret;
 
 unref_cursor:
-	drm_gem_object_unreference(obj);
-	mutex_unlock(&dev->struct_mutex);
+	drm_gem_object_unreference_unlocked(obj);
 	return ret;
 }
 
diff --git a/drivers/gpu/drm/gma500/gtt.c b/drivers/gpu/drm/gma500/gtt.c
index ce015db59dc6..8f69225ce2b4 100644
--- a/drivers/gpu/drm/gma500/gtt.c
+++ b/drivers/gpu/drm/gma500/gtt.c
@@ -425,6 +425,7 @@ int psb_gtt_init(struct drm_device *dev, int resume)
 
 	if (!resume) {
 		mutex_init(&dev_priv->gtt_mutex);
+		mutex_init(&dev_priv->mmap_mutex);
 		psb_gtt_alloc(dev);
 	}
 
diff --git a/drivers/gpu/drm/gma500/mdfld_device.c b/drivers/gpu/drm/gma500/mdfld_device.c
index 265ad0de44a6..e2ab858122f9 100644
--- a/drivers/gpu/drm/gma500/mdfld_device.c
+++ b/drivers/gpu/drm/gma500/mdfld_device.c
@@ -546,6 +546,8 @@ const struct psb_ops mdfld_chip_ops = {
 
 	.save_regs = mdfld_save_registers,
 	.restore_regs = mdfld_restore_registers,
+	.save_crtc = gma_crtc_save,
+	.restore_crtc = gma_crtc_restore,
 	.power_down = mdfld_power_down,
 	.power_up = mdfld_power_up,
 };
diff --git a/drivers/gpu/drm/gma500/mdfld_dsi_dpi.c b/drivers/gpu/drm/gma500/mdfld_dsi_dpi.c
index d4813e03f5ee..7cd87a0c2385 100644
--- a/drivers/gpu/drm/gma500/mdfld_dsi_dpi.c
+++ b/drivers/gpu/drm/gma500/mdfld_dsi_dpi.c
@@ -821,14 +821,18 @@ void mdfld_dsi_dpi_mode_set(struct drm_encoder *encoder,
 	struct drm_device *dev = dsi_config->dev;
 	struct drm_psb_private *dev_priv = dev->dev_private;
 	int pipe = mdfld_dsi_encoder_get_pipe(dsi_encoder);
-
 	u32 pipeconf_reg = PIPEACONF;
 	u32 dspcntr_reg = DSPACNTR;
+	u32 pipeconf, dspcntr;
 
-	u32 pipeconf = dev_priv->pipeconf[pipe];
-	u32 dspcntr = dev_priv->dspcntr[pipe];
 	u32 mipi = MIPI_PORT_EN | PASS_FROM_SPHY_TO_AFE | SEL_FLOPPED_HSTX;
 
+	if (WARN_ON(pipe < 0))
+		return;
+
+	pipeconf = dev_priv->pipeconf[pipe];
+	dspcntr = dev_priv->dspcntr[pipe];
+
 	if (pipe) {
 		pipeconf_reg = PIPECCONF;
 		dspcntr_reg = DSPCCNTR;
@@ -994,7 +998,7 @@ struct mdfld_dsi_encoder *mdfld_dsi_dpi_init(struct drm_device *dev,
 	drm_encoder_init(dev,
 			encoder,
 			p_funcs->encoder_funcs,
-			DRM_MODE_ENCODER_LVDS);
+			DRM_MODE_ENCODER_LVDS, NULL);
 	drm_encoder_helper_add(encoder,
 				p_funcs->encoder_helper_funcs);
 
diff --git a/drivers/gpu/drm/gma500/mdfld_dsi_output.c b/drivers/gpu/drm/gma500/mdfld_dsi_output.c
index 89f705c3a5eb..d758f4cc6805 100644
--- a/drivers/gpu/drm/gma500/mdfld_dsi_output.c
+++ b/drivers/gpu/drm/gma500/mdfld_dsi_output.c
@@ -405,8 +405,6 @@ static struct drm_encoder *mdfld_dsi_connector_best_encoder(
 /*DSI connector funcs*/
 static const struct drm_connector_funcs mdfld_dsi_connector_funcs = {
 	.dpms = /*drm_helper_connector_dpms*/mdfld_dsi_connector_dpms,
-	.save = mdfld_dsi_connector_save,
-	.restore = mdfld_dsi_connector_restore,
 	.detect = mdfld_dsi_connector_detect,
 	.fill_modes = drm_helper_probe_single_connector_modes,
 	.set_property = mdfld_dsi_connector_set_property,
@@ -563,6 +561,9 @@ void mdfld_dsi_output_init(struct drm_device *dev,
 
 
 	connector = &dsi_connector->base.base;
+	dsi_connector->base.save = mdfld_dsi_connector_save;
+	dsi_connector->base.restore = mdfld_dsi_connector_restore;
+
 	drm_connector_init(dev, connector, &mdfld_dsi_connector_funcs,
 						DRM_MODE_CONNECTOR_LVDS);
 	drm_connector_helper_add(connector, &mdfld_dsi_connector_helper_funcs);
diff --git a/drivers/gpu/drm/gma500/oaktrail_device.c b/drivers/gpu/drm/gma500/oaktrail_device.c
index 368a03ae3010..ba30b43a3412 100644
--- a/drivers/gpu/drm/gma500/oaktrail_device.c
+++ b/drivers/gpu/drm/gma500/oaktrail_device.c
@@ -568,6 +568,8 @@ const struct psb_ops oaktrail_chip_ops = {
 
 	.save_regs = oaktrail_save_display_registers,
 	.restore_regs = oaktrail_restore_display_registers,
+	.save_crtc = gma_crtc_save,
+	.restore_crtc = gma_crtc_restore,
 	.power_down = oaktrail_power_down,
 	.power_up = oaktrail_power_up,
 
diff --git a/drivers/gpu/drm/gma500/oaktrail_hdmi.c b/drivers/gpu/drm/gma500/oaktrail_hdmi.c
index 2310d879cdc2..2d18499d6060 100644
--- a/drivers/gpu/drm/gma500/oaktrail_hdmi.c
+++ b/drivers/gpu/drm/gma500/oaktrail_hdmi.c
@@ -654,7 +654,7 @@ void oaktrail_hdmi_init(struct drm_device *dev,
 
 	drm_encoder_init(dev, encoder,
 			 &oaktrail_hdmi_enc_funcs,
-			 DRM_MODE_ENCODER_TMDS);
+			 DRM_MODE_ENCODER_TMDS, NULL);
 
 	gma_connector_attach_encoder(gma_connector, gma_encoder);
 
diff --git a/drivers/gpu/drm/gma500/oaktrail_lvds.c b/drivers/gpu/drm/gma500/oaktrail_lvds.c
index 83bbc271bcfb..f7038f12ac76 100644
--- a/drivers/gpu/drm/gma500/oaktrail_lvds.c
+++ b/drivers/gpu/drm/gma500/oaktrail_lvds.c
@@ -323,7 +323,7 @@ void oaktrail_lvds_init(struct drm_device *dev,
 			   DRM_MODE_CONNECTOR_LVDS);
 
 	drm_encoder_init(dev, encoder, &psb_intel_lvds_enc_funcs,
-			 DRM_MODE_ENCODER_LVDS);
+			 DRM_MODE_ENCODER_LVDS, NULL);
 
 	gma_connector_attach_encoder(gma_connector, gma_encoder);
 	gma_encoder->type = INTEL_OUTPUT_LVDS;
diff --git a/drivers/gpu/drm/gma500/psb_device.c b/drivers/gpu/drm/gma500/psb_device.c
index 07df7d4eea72..dc0f8527570c 100644
--- a/drivers/gpu/drm/gma500/psb_device.c
+++ b/drivers/gpu/drm/gma500/psb_device.c
@@ -181,7 +181,7 @@ static int psb_save_display_registers(struct drm_device *dev)
 {
 	struct drm_psb_private *dev_priv = dev->dev_private;
 	struct drm_crtc *crtc;
-	struct drm_connector *connector;
+	struct gma_connector *connector;
 	struct psb_state *regs = &dev_priv->regs.psb;
 
 	/* Display arbitration control + watermarks */
@@ -198,12 +198,12 @@ static int psb_save_display_registers(struct drm_device *dev)
 	drm_modeset_lock_all(dev);
 	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
 		if (drm_helper_crtc_in_use(crtc))
-			crtc->funcs->save(crtc);
+			dev_priv->ops->save_crtc(crtc);
 	}
 
-	list_for_each_entry(connector, &dev->mode_config.connector_list, head)
-		if (connector->funcs->save)
-			connector->funcs->save(connector);
+	list_for_each_entry(connector, &dev->mode_config.connector_list, base.head)
+		if (connector->save)
+			connector->save(&connector->base);
 
 	drm_modeset_unlock_all(dev);
 	return 0;
@@ -219,7 +219,7 @@ static int psb_restore_display_registers(struct drm_device *dev)
 {
 	struct drm_psb_private *dev_priv = dev->dev_private;
 	struct drm_crtc *crtc;
-	struct drm_connector *connector;
+	struct gma_connector *connector;
 	struct psb_state *regs = &dev_priv->regs.psb;
 
 	/* Display arbitration + watermarks */
@@ -238,11 +238,11 @@ static int psb_restore_display_registers(struct drm_device *dev)
 	drm_modeset_lock_all(dev);
 	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head)
 		if (drm_helper_crtc_in_use(crtc))
-			crtc->funcs->restore(crtc);
+			dev_priv->ops->restore_crtc(crtc);
 
-	list_for_each_entry(connector, &dev->mode_config.connector_list, head)
-		if (connector->funcs->restore)
-			connector->funcs->restore(connector);
+	list_for_each_entry(connector, &dev->mode_config.connector_list, base.head)
+		if (connector->restore)
+			connector->restore(&connector->base);
 
 	drm_modeset_unlock_all(dev);
 	return 0;
@@ -354,6 +354,8 @@ const struct psb_ops psb_chip_ops = {
 	.init_pm = psb_init_pm,
 	.save_regs = psb_save_display_registers,
 	.restore_regs = psb_restore_display_registers,
+	.save_crtc = gma_crtc_save,
+	.restore_crtc = gma_crtc_restore,
 	.power_down = psb_power_down,
 	.power_up = psb_power_up,
 };
diff --git a/drivers/gpu/drm/gma500/psb_drv.h b/drivers/gpu/drm/gma500/psb_drv.h
index e21726ecac32..b74372760d7f 100644
--- a/drivers/gpu/drm/gma500/psb_drv.h
+++ b/drivers/gpu/drm/gma500/psb_drv.h
@@ -465,6 +465,8 @@ struct drm_psb_private {
 	struct mutex gtt_mutex;
 	struct resource *gtt_mem;	/* Our PCI resource */
 
+	struct mutex mmap_mutex;
+
 	struct psb_mmu_driver *mmu;
 	struct psb_mmu_pd *pf_pd;
 
@@ -651,6 +653,8 @@ struct psb_ops {
 	void (*init_pm)(struct drm_device *dev);
 	int (*save_regs)(struct drm_device *dev);
 	int (*restore_regs)(struct drm_device *dev);
+	void (*save_crtc)(struct drm_crtc *crtc);
+	void (*restore_crtc)(struct drm_crtc *crtc);
 	int (*power_up)(struct drm_device *dev);
 	int (*power_down)(struct drm_device *dev);
 	void (*update_wm)(struct drm_device *dev, struct drm_crtc *crtc);
diff --git a/drivers/gpu/drm/gma500/psb_intel_display.c b/drivers/gpu/drm/gma500/psb_intel_display.c
index 6659da88fe5b..dcdbc37e55e1 100644
--- a/drivers/gpu/drm/gma500/psb_intel_display.c
+++ b/drivers/gpu/drm/gma500/psb_intel_display.c
@@ -439,8 +439,6 @@ const struct drm_crtc_helper_funcs psb_intel_helper_funcs = {
 };
 
 const struct drm_crtc_funcs psb_intel_crtc_funcs = {
-	.save = gma_crtc_save,
-	.restore = gma_crtc_restore,
 	.cursor_set = gma_crtc_cursor_set,
 	.cursor_move = gma_crtc_cursor_move,
 	.gamma_set = gma_crtc_gamma_set,
diff --git a/drivers/gpu/drm/gma500/psb_intel_drv.h b/drivers/gpu/drm/gma500/psb_intel_drv.h
index 860dd2177ca1..2a3b7c684db2 100644
--- a/drivers/gpu/drm/gma500/psb_intel_drv.h
+++ b/drivers/gpu/drm/gma500/psb_intel_drv.h
@@ -140,6 +140,9 @@ struct gma_encoder {
 struct gma_connector {
 	struct drm_connector base;
 	struct gma_encoder *encoder;
+
+	void (*save)(struct drm_connector *connector);
+	void (*restore)(struct drm_connector *connector);
 };
 
 struct psb_intel_crtc_state {
diff --git a/drivers/gpu/drm/gma500/psb_intel_lvds.c b/drivers/gpu/drm/gma500/psb_intel_lvds.c
index ce0645d0c1e5..b1b93317d054 100644
--- a/drivers/gpu/drm/gma500/psb_intel_lvds.c
+++ b/drivers/gpu/drm/gma500/psb_intel_lvds.c
@@ -653,8 +653,6 @@ const struct drm_connector_helper_funcs
 
 const struct drm_connector_funcs psb_intel_lvds_connector_funcs = {
 	.dpms = drm_helper_connector_dpms,
-	.save = psb_intel_lvds_save,
-	.restore = psb_intel_lvds_restore,
 	.detect = psb_intel_lvds_detect,
 	.fill_modes = drm_helper_probe_single_connector_modes,
 	.set_property = psb_intel_lvds_set_property,
@@ -715,6 +713,9 @@ void psb_intel_lvds_init(struct drm_device *dev,
 	gma_encoder->dev_priv = lvds_priv;
 
 	connector = &gma_connector->base;
+	gma_connector->save = psb_intel_lvds_save;
+	gma_connector->restore = psb_intel_lvds_restore;
+
 	encoder = &gma_encoder->base;
 	drm_connector_init(dev, connector,
 			   &psb_intel_lvds_connector_funcs,
@@ -722,7 +723,7 @@ void psb_intel_lvds_init(struct drm_device *dev,
 
 	drm_encoder_init(dev, encoder,
 			 &psb_intel_lvds_enc_funcs,
-			 DRM_MODE_ENCODER_LVDS);
+			 DRM_MODE_ENCODER_LVDS, NULL);
 
 	gma_connector_attach_encoder(gma_connector, gma_encoder);
 	gma_encoder->type = INTEL_OUTPUT_LVDS;
diff --git a/drivers/gpu/drm/gma500/psb_intel_sdvo.c b/drivers/gpu/drm/gma500/psb_intel_sdvo.c
index 58529cea575d..e787d376ba67 100644
--- a/drivers/gpu/drm/gma500/psb_intel_sdvo.c
+++ b/drivers/gpu/drm/gma500/psb_intel_sdvo.c
@@ -1837,8 +1837,6 @@ static const struct drm_encoder_helper_funcs psb_intel_sdvo_helper_funcs = {
 
 static const struct drm_connector_funcs psb_intel_sdvo_connector_funcs = {
 	.dpms = drm_helper_connector_dpms,
-	.save = psb_intel_sdvo_save,
-	.restore = psb_intel_sdvo_restore,
 	.detect = psb_intel_sdvo_detect,
 	.fill_modes = drm_helper_probe_single_connector_modes,
 	.set_property = psb_intel_sdvo_set_property,
@@ -2021,6 +2019,9 @@ psb_intel_sdvo_connector_init(struct psb_intel_sdvo_connector *connector,
 	connector->base.base.doublescan_allowed = 0;
 	connector->base.base.display_info.subpixel_order = SubPixelHorizontalRGB;
 
+	connector->base.save = psb_intel_sdvo_save;
+	connector->base.restore = psb_intel_sdvo_restore;
+
 	gma_connector_attach_encoder(&connector->base, &encoder->base);
 	drm_connector_register(&connector->base.base);
 }
@@ -2525,7 +2526,8 @@ bool psb_intel_sdvo_init(struct drm_device *dev, int sdvo_reg)
 	/* encoder type will be decided later */
 	gma_encoder = &psb_intel_sdvo->base;
 	gma_encoder->type = INTEL_OUTPUT_SDVO;
-	drm_encoder_init(dev, &gma_encoder->base, &psb_intel_sdvo_enc_funcs, 0);
+	drm_encoder_init(dev, &gma_encoder->base, &psb_intel_sdvo_enc_funcs,
+			 0, NULL);
 
 	/* Read the regs to test if we can talk to the device */
 	for (i = 0; i < 0x40; i++) {
diff --git a/drivers/gpu/drm/i2c/adv7511.c b/drivers/gpu/drm/i2c/adv7511.c
index 00416f23b5cb..533d1e3d4a99 100644
--- a/drivers/gpu/drm/i2c/adv7511.c
+++ b/drivers/gpu/drm/i2c/adv7511.c
@@ -752,7 +752,7 @@ static void adv7511_encoder_mode_set(struct drm_encoder *encoder,
 	adv7511->f_tmds = mode->clock;
 }
 
-static struct drm_encoder_slave_funcs adv7511_encoder_funcs = {
+static const struct drm_encoder_slave_funcs adv7511_encoder_funcs = {
 	.dpms = adv7511_encoder_dpms,
 	.mode_valid = adv7511_encoder_mode_valid,
 	.mode_set = adv7511_encoder_mode_set,
diff --git a/drivers/gpu/drm/i2c/ch7006_drv.c b/drivers/gpu/drm/i2c/ch7006_drv.c
index d9a72c96e56c..90db5f4dcce5 100644
--- a/drivers/gpu/drm/i2c/ch7006_drv.c
+++ b/drivers/gpu/drm/i2c/ch7006_drv.c
@@ -371,7 +371,7 @@ static int ch7006_encoder_set_property(struct drm_encoder *encoder,
 	return 0;
 }
 
-static struct drm_encoder_slave_funcs ch7006_encoder_funcs = {
+static const struct drm_encoder_slave_funcs ch7006_encoder_funcs = {
 	.set_config = ch7006_encoder_set_config,
 	.destroy = ch7006_encoder_destroy,
 	.dpms = ch7006_encoder_dpms,
diff --git a/drivers/gpu/drm/i2c/sil164_drv.c b/drivers/gpu/drm/i2c/sil164_drv.c
index 002ce7874332..c400428f6c8c 100644
--- a/drivers/gpu/drm/i2c/sil164_drv.c
+++ b/drivers/gpu/drm/i2c/sil164_drv.c
@@ -341,7 +341,7 @@ sil164_encoder_destroy(struct drm_encoder *encoder)
 	drm_i2c_encoder_destroy(encoder);
 }
 
-static struct drm_encoder_slave_funcs sil164_encoder_funcs = {
+static const struct drm_encoder_slave_funcs sil164_encoder_funcs = {
 	.set_config = sil164_encoder_set_config,
 	.destroy = sil164_encoder_destroy,
 	.dpms = sil164_encoder_dpms,
diff --git a/drivers/gpu/drm/i2c/tda998x_drv.c b/drivers/gpu/drm/i2c/tda998x_drv.c
index 896b6aaf8c4d..012d36d9a75b 100644
--- a/drivers/gpu/drm/i2c/tda998x_drv.c
+++ b/drivers/gpu/drm/i2c/tda998x_drv.c
@@ -22,6 +22,7 @@
 #include <sound/asoundef.h>
 
 #include <drm/drmP.h>
+#include <drm/drm_atomic_helper.h>
 #include <drm/drm_crtc_helper.h>
 #include <drm/drm_edid.h>
 #include <drm/drm_of.h>
@@ -855,18 +856,6 @@ static void tda998x_encoder_dpms(struct drm_encoder *encoder, int mode)
 	priv->dpms = mode;
 }
 
-static void
-tda998x_encoder_save(struct drm_encoder *encoder)
-{
-	DBG("");
-}
-
-static void
-tda998x_encoder_restore(struct drm_encoder *encoder)
-{
-	DBG("");
-}
-
 static bool
 tda998x_encoder_mode_fixup(struct drm_encoder *encoder,
 			  const struct drm_display_mode *mode,
@@ -878,7 +867,10 @@ tda998x_encoder_mode_fixup(struct drm_encoder *encoder,
 static int tda998x_connector_mode_valid(struct drm_connector *connector,
 					struct drm_display_mode *mode)
 {
-	if (mode->clock > 150000)
+	/* TDA19988 dotclock can go up to 165MHz */
+	struct tda998x_priv *priv = conn_to_tda998x_priv(connector);
+
+	if (mode->clock > ((priv->rev == TDA19988) ? 165000 : 150000))
 		return MODE_CLOCK_HIGH;
 	if (mode->htotal >= BIT(13))
 		return MODE_BAD_HVALUE;
@@ -1351,8 +1343,6 @@ static void tda998x_encoder_commit(struct drm_encoder *encoder)
 
 static const struct drm_encoder_helper_funcs tda998x_encoder_helper_funcs = {
 	.dpms = tda998x_encoder_dpms,
-	.save = tda998x_encoder_save,
-	.restore = tda998x_encoder_restore,
 	.mode_fixup = tda998x_encoder_mode_fixup,
 	.prepare = tda998x_encoder_prepare,
 	.commit = tda998x_encoder_commit,
@@ -1393,10 +1383,13 @@ static void tda998x_connector_destroy(struct drm_connector *connector)
 }
 
 static const struct drm_connector_funcs tda998x_connector_funcs = {
-	.dpms = drm_helper_connector_dpms,
+	.dpms = drm_atomic_helper_connector_dpms,
+	.reset = drm_atomic_helper_connector_reset,
 	.fill_modes = drm_helper_probe_single_connector_modes,
 	.detect = tda998x_connector_detect,
 	.destroy = tda998x_connector_destroy,
+	.atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state,
+	.atomic_destroy_state = drm_atomic_helper_connector_destroy_state,
 };
 
 static int tda998x_bind(struct device *dev, struct device *master, void *data)
@@ -1437,7 +1430,7 @@ static int tda998x_bind(struct device *dev, struct device *master, void *data)
 
 	drm_encoder_helper_add(&priv->encoder, &tda998x_encoder_helper_funcs);
 	ret = drm_encoder_init(drm, &priv->encoder, &tda998x_encoder_funcs,
-			       DRM_MODE_ENCODER_TMDS);
+			       DRM_MODE_ENCODER_TMDS, NULL);
 	if (ret)
 		goto err_encoder;
 
@@ -1472,6 +1465,7 @@ static void tda998x_unbind(struct device *dev, struct device *master,
 {
 	struct tda998x_priv *priv = dev_get_drvdata(dev);
 
+	drm_connector_unregister(&priv->connector);
 	drm_connector_cleanup(&priv->connector);
 	drm_encoder_cleanup(&priv->encoder);
 	tda998x_destroy(priv);
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 411a9c68b4ee..a8721fccd8a0 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1639,7 +1639,7 @@ static int i915_fbc_status(struct seq_file *m, void *unused)
 	intel_runtime_pm_get(dev_priv);
 	mutex_lock(&dev_priv->fbc.lock);
 
-	if (intel_fbc_enabled(dev_priv))
+	if (intel_fbc_is_active(dev_priv))
 		seq_puts(m, "FBC enabled\n");
 	else
 		seq_printf(m, "FBC disabled: %s\n",
@@ -1869,33 +1869,29 @@ static int i915_gem_framebuffer_info(struct seq_file *m, void *data)
 {
 	struct drm_info_node *node = m->private;
 	struct drm_device *dev = node->minor->dev;
-	struct intel_fbdev *ifbdev = NULL;
-	struct intel_framebuffer *fb;
+	struct intel_framebuffer *fbdev_fb = NULL;
 	struct drm_framebuffer *drm_fb;
 
 #ifdef CONFIG_DRM_FBDEV_EMULATION
-	struct drm_i915_private *dev_priv = dev->dev_private;
-
-	ifbdev = dev_priv->fbdev;
-	if (ifbdev) {
-		fb = to_intel_framebuffer(ifbdev->helper.fb);
-
-		seq_printf(m, "fbcon size: %d x %d, depth %d, %d bpp, modifier 0x%llx, refcount %d, obj ",
-			   fb->base.width,
-			   fb->base.height,
-			   fb->base.depth,
-			   fb->base.bits_per_pixel,
-			   fb->base.modifier[0],
-			   atomic_read(&fb->base.refcount.refcount));
-		describe_obj(m, fb->obj);
-		seq_putc(m, '\n');
-	}
+       if (to_i915(dev)->fbdev) {
+               fbdev_fb = to_intel_framebuffer(to_i915(dev)->fbdev->helper.fb);
+
+               seq_printf(m, "fbcon size: %d x %d, depth %d, %d bpp, modifier 0x%llx, refcount %d, obj ",
+                         fbdev_fb->base.width,
+                         fbdev_fb->base.height,
+                         fbdev_fb->base.depth,
+                         fbdev_fb->base.bits_per_pixel,
+                         fbdev_fb->base.modifier[0],
+                         atomic_read(&fbdev_fb->base.refcount.refcount));
+               describe_obj(m, fbdev_fb->obj);
+               seq_putc(m, '\n');
+       }
 #endif
 
 	mutex_lock(&dev->mode_config.fb_lock);
 	drm_for_each_fb(drm_fb, dev) {
-		fb = to_intel_framebuffer(drm_fb);
-		if (ifbdev && &fb->base == ifbdev->helper.fb)
+		struct intel_framebuffer *fb = to_intel_framebuffer(drm_fb);
+		if (fb == fbdev_fb)
 			continue;
 
 		seq_printf(m, "user size: %d x %d, depth %d, %d bpp, modifier 0x%llx, refcount %d, obj ",
@@ -2473,15 +2469,15 @@ static int i915_guc_info(struct seq_file *m, void *data)
 	if (!HAS_GUC_SCHED(dev_priv->dev))
 		return 0;
 
+	if (mutex_lock_interruptible(&dev->struct_mutex))
+		return 0;
+
 	/* Take a local copy of the GuC data, so we can dump it at leisure */
-	spin_lock(&dev_priv->guc.host2guc_lock);
 	guc = dev_priv->guc;
-	if (guc.execbuf_client) {
-		spin_lock(&guc.execbuf_client->wq_lock);
+	if (guc.execbuf_client)
 		client = *guc.execbuf_client;
-		spin_unlock(&guc.execbuf_client->wq_lock);
-	}
-	spin_unlock(&dev_priv->guc.host2guc_lock);
+
+	mutex_unlock(&dev->struct_mutex);
 
 	seq_printf(m, "GuC total action count: %llu\n", guc.action_count);
 	seq_printf(m, "GuC action failure count: %u\n", guc.action_fail);
@@ -2582,8 +2578,11 @@ static int i915_edp_psr_status(struct seq_file *m, void *data)
 		}
 	seq_puts(m, "\n");
 
-	/* CHV PSR has no kind of performance counter */
-	if (HAS_DDI(dev)) {
+	/*
+	 * VLV/CHV PSR has no kind of performance counter
+	 * SKL+ Perf counter is reset to 0 everytime DC state is entered
+	 */
+	if (IS_HASWELL(dev) || IS_BROADWELL(dev)) {
 		psrperf = I915_READ(EDP_PSR_PERF_CNT) &
 			EDP_PSR_PERF_CNT_MASK;
 
@@ -2685,71 +2684,6 @@ static int i915_runtime_pm_status(struct seq_file *m, void *unused)
 	return 0;
 }
 
-static const char *power_domain_str(enum intel_display_power_domain domain)
-{
-	switch (domain) {
-	case POWER_DOMAIN_PIPE_A:
-		return "PIPE_A";
-	case POWER_DOMAIN_PIPE_B:
-		return "PIPE_B";
-	case POWER_DOMAIN_PIPE_C:
-		return "PIPE_C";
-	case POWER_DOMAIN_PIPE_A_PANEL_FITTER:
-		return "PIPE_A_PANEL_FITTER";
-	case POWER_DOMAIN_PIPE_B_PANEL_FITTER:
-		return "PIPE_B_PANEL_FITTER";
-	case POWER_DOMAIN_PIPE_C_PANEL_FITTER:
-		return "PIPE_C_PANEL_FITTER";
-	case POWER_DOMAIN_TRANSCODER_A:
-		return "TRANSCODER_A";
-	case POWER_DOMAIN_TRANSCODER_B:
-		return "TRANSCODER_B";
-	case POWER_DOMAIN_TRANSCODER_C:
-		return "TRANSCODER_C";
-	case POWER_DOMAIN_TRANSCODER_EDP:
-		return "TRANSCODER_EDP";
-	case POWER_DOMAIN_PORT_DDI_A_LANES:
-		return "PORT_DDI_A_LANES";
-	case POWER_DOMAIN_PORT_DDI_B_LANES:
-		return "PORT_DDI_B_LANES";
-	case POWER_DOMAIN_PORT_DDI_C_LANES:
-		return "PORT_DDI_C_LANES";
-	case POWER_DOMAIN_PORT_DDI_D_LANES:
-		return "PORT_DDI_D_LANES";
-	case POWER_DOMAIN_PORT_DDI_E_LANES:
-		return "PORT_DDI_E_LANES";
-	case POWER_DOMAIN_PORT_DSI:
-		return "PORT_DSI";
-	case POWER_DOMAIN_PORT_CRT:
-		return "PORT_CRT";
-	case POWER_DOMAIN_PORT_OTHER:
-		return "PORT_OTHER";
-	case POWER_DOMAIN_VGA:
-		return "VGA";
-	case POWER_DOMAIN_AUDIO:
-		return "AUDIO";
-	case POWER_DOMAIN_PLLS:
-		return "PLLS";
-	case POWER_DOMAIN_AUX_A:
-		return "AUX_A";
-	case POWER_DOMAIN_AUX_B:
-		return "AUX_B";
-	case POWER_DOMAIN_AUX_C:
-		return "AUX_C";
-	case POWER_DOMAIN_AUX_D:
-		return "AUX_D";
-	case POWER_DOMAIN_GMBUS:
-		return "GMBUS";
-	case POWER_DOMAIN_MODESET:
-		return "MODESET";
-	case POWER_DOMAIN_INIT:
-		return "INIT";
-	default:
-		MISSING_CASE(domain);
-		return "?";
-	}
-}
-
 static int i915_power_domain_info(struct seq_file *m, void *unused)
 {
 	struct drm_info_node *node = m->private;
@@ -2775,7 +2709,7 @@ static int i915_power_domain_info(struct seq_file *m, void *unused)
 				continue;
 
 			seq_printf(m, "  %-23s %d\n",
-				 power_domain_str(power_domain),
+				 intel_display_power_domain_str(power_domain),
 				 power_domains->domain_use_count[power_domain]);
 		}
 	}
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 6344dfb72177..e6935f1cb689 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -228,121 +228,83 @@ static const struct intel_device_info intel_sandybridge_m_info = {
 	.need_gfx_hws = 1, .has_hotplug = 1, \
 	.has_fbc = 1, \
 	.ring_mask = RENDER_RING | BSD_RING | BLT_RING, \
-	.has_llc = 1
+	.has_llc = 1, \
+	GEN_DEFAULT_PIPEOFFSETS, \
+	IVB_CURSOR_OFFSETS
 
 static const struct intel_device_info intel_ivybridge_d_info = {
 	GEN7_FEATURES,
 	.is_ivybridge = 1,
-	GEN_DEFAULT_PIPEOFFSETS,
-	IVB_CURSOR_OFFSETS,
 };
 
 static const struct intel_device_info intel_ivybridge_m_info = {
 	GEN7_FEATURES,
 	.is_ivybridge = 1,
 	.is_mobile = 1,
-	GEN_DEFAULT_PIPEOFFSETS,
-	IVB_CURSOR_OFFSETS,
 };
 
 static const struct intel_device_info intel_ivybridge_q_info = {
 	GEN7_FEATURES,
 	.is_ivybridge = 1,
 	.num_pipes = 0, /* legal, last one wins */
-	GEN_DEFAULT_PIPEOFFSETS,
-	IVB_CURSOR_OFFSETS,
 };
 
+#define VLV_FEATURES  \
+	.gen = 7, .num_pipes = 2, \
+	.need_gfx_hws = 1, .has_hotplug = 1, \
+	.ring_mask = RENDER_RING | BSD_RING | BLT_RING, \
+	.display_mmio_offset = VLV_DISPLAY_BASE, \
+	GEN_DEFAULT_PIPEOFFSETS, \
+	CURSOR_OFFSETS
+
 static const struct intel_device_info intel_valleyview_m_info = {
-	GEN7_FEATURES,
-	.is_mobile = 1,
-	.num_pipes = 2,
+	VLV_FEATURES,
 	.is_valleyview = 1,
-	.display_mmio_offset = VLV_DISPLAY_BASE,
-	.has_fbc = 0, /* legal, last one wins */
-	.has_llc = 0, /* legal, last one wins */
-	GEN_DEFAULT_PIPEOFFSETS,
-	CURSOR_OFFSETS,
+	.is_mobile = 1,
 };
 
 static const struct intel_device_info intel_valleyview_d_info = {
-	GEN7_FEATURES,
-	.num_pipes = 2,
+	VLV_FEATURES,
 	.is_valleyview = 1,
-	.display_mmio_offset = VLV_DISPLAY_BASE,
-	.has_fbc = 0, /* legal, last one wins */
-	.has_llc = 0, /* legal, last one wins */
-	GEN_DEFAULT_PIPEOFFSETS,
-	CURSOR_OFFSETS,
 };
 
+#define HSW_FEATURES  \
+	GEN7_FEATURES, \
+	.ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING, \
+	.has_ddi = 1, \
+	.has_fpga_dbg = 1
+
 static const struct intel_device_info intel_haswell_d_info = {
-	GEN7_FEATURES,
+	HSW_FEATURES,
 	.is_haswell = 1,
-	.has_ddi = 1,
-	.has_fpga_dbg = 1,
-	.ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING,
-	GEN_DEFAULT_PIPEOFFSETS,
-	IVB_CURSOR_OFFSETS,
 };
 
 static const struct intel_device_info intel_haswell_m_info = {
-	GEN7_FEATURES,
+	HSW_FEATURES,
 	.is_haswell = 1,
 	.is_mobile = 1,
-	.has_ddi = 1,
-	.has_fpga_dbg = 1,
-	.ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING,
-	GEN_DEFAULT_PIPEOFFSETS,
-	IVB_CURSOR_OFFSETS,
 };
 
 static const struct intel_device_info intel_broadwell_d_info = {
-	.gen = 8, .num_pipes = 3,
-	.need_gfx_hws = 1, .has_hotplug = 1,
-	.ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING,
-	.has_llc = 1,
-	.has_ddi = 1,
-	.has_fpga_dbg = 1,
-	.has_fbc = 1,
-	GEN_DEFAULT_PIPEOFFSETS,
-	IVB_CURSOR_OFFSETS,
+	HSW_FEATURES,
+	.gen = 8,
 };
 
 static const struct intel_device_info intel_broadwell_m_info = {
-	.gen = 8, .is_mobile = 1, .num_pipes = 3,
-	.need_gfx_hws = 1, .has_hotplug = 1,
-	.ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING,
-	.has_llc = 1,
-	.has_ddi = 1,
-	.has_fpga_dbg = 1,
-	.has_fbc = 1,
-	GEN_DEFAULT_PIPEOFFSETS,
-	IVB_CURSOR_OFFSETS,
+	HSW_FEATURES,
+	.gen = 8, .is_mobile = 1,
 };
 
 static const struct intel_device_info intel_broadwell_gt3d_info = {
-	.gen = 8, .num_pipes = 3,
-	.need_gfx_hws = 1, .has_hotplug = 1,
+	HSW_FEATURES,
+	.gen = 8,
 	.ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING | BSD2_RING,
-	.has_llc = 1,
-	.has_ddi = 1,
-	.has_fpga_dbg = 1,
-	.has_fbc = 1,
-	GEN_DEFAULT_PIPEOFFSETS,
-	IVB_CURSOR_OFFSETS,
 };
 
 static const struct intel_device_info intel_broadwell_gt3m_info = {
-	.gen = 8, .is_mobile = 1, .num_pipes = 3,
-	.need_gfx_hws = 1, .has_hotplug = 1,
+	HSW_FEATURES,
+	.gen = 8, .is_mobile = 1,
 	.ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING | BSD2_RING,
-	.has_llc = 1,
-	.has_ddi = 1,
-	.has_fpga_dbg = 1,
-	.has_fbc = 1,
-	GEN_DEFAULT_PIPEOFFSETS,
-	IVB_CURSOR_OFFSETS,
 };
 
 static const struct intel_device_info intel_cherryview_info = {
@@ -356,29 +318,16 @@ static const struct intel_device_info intel_cherryview_info = {
 };
 
 static const struct intel_device_info intel_skylake_info = {
+	HSW_FEATURES,
 	.is_skylake = 1,
-	.gen = 9, .num_pipes = 3,
-	.need_gfx_hws = 1, .has_hotplug = 1,
-	.ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING,
-	.has_llc = 1,
-	.has_ddi = 1,
-	.has_fpga_dbg = 1,
-	.has_fbc = 1,
-	GEN_DEFAULT_PIPEOFFSETS,
-	IVB_CURSOR_OFFSETS,
+	.gen = 9,
 };
 
 static const struct intel_device_info intel_skylake_gt3_info = {
+	HSW_FEATURES,
 	.is_skylake = 1,
-	.gen = 9, .num_pipes = 3,
-	.need_gfx_hws = 1, .has_hotplug = 1,
+	.gen = 9,
 	.ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING | BSD2_RING,
-	.has_llc = 1,
-	.has_ddi = 1,
-	.has_fpga_dbg = 1,
-	.has_fbc = 1,
-	GEN_DEFAULT_PIPEOFFSETS,
-	IVB_CURSOR_OFFSETS,
 };
 
 static const struct intel_device_info intel_broxton_info = {
@@ -396,33 +345,18 @@ static const struct intel_device_info intel_broxton_info = {
 };
 
 static const struct intel_device_info intel_kabylake_info = {
+	HSW_FEATURES,
 	.is_preliminary = 1,
 	.is_kabylake = 1,
 	.gen = 9,
-	.num_pipes = 3,
-	.need_gfx_hws = 1, .has_hotplug = 1,
-	.ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING,
-	.has_llc = 1,
-	.has_ddi = 1,
-	.has_fpga_dbg = 1,
-	.has_fbc = 1,
-	GEN_DEFAULT_PIPEOFFSETS,
-	IVB_CURSOR_OFFSETS,
 };
 
 static const struct intel_device_info intel_kabylake_gt3_info = {
+	HSW_FEATURES,
 	.is_preliminary = 1,
 	.is_kabylake = 1,
 	.gen = 9,
-	.num_pipes = 3,
-	.need_gfx_hws = 1, .has_hotplug = 1,
 	.ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING | BSD2_RING,
-	.has_llc = 1,
-	.has_ddi = 1,
-	.has_fpga_dbg = 1,
-	.has_fbc = 1,
-	GEN_DEFAULT_PIPEOFFSETS,
-	IVB_CURSOR_OFFSETS,
 };
 
 /*
@@ -465,6 +399,7 @@ static const struct pci_device_id pciidlist[] = {
 	INTEL_SKL_GT1_IDS(&intel_skylake_info),
 	INTEL_SKL_GT2_IDS(&intel_skylake_info),
 	INTEL_SKL_GT3_IDS(&intel_skylake_gt3_info),
+	INTEL_SKL_GT4_IDS(&intel_skylake_gt3_info),
 	INTEL_BXT_IDS(&intel_broxton_info),
 	INTEL_KBL_GT1_IDS(&intel_kabylake_info),
 	INTEL_KBL_GT2_IDS(&intel_kabylake_info),
@@ -565,7 +500,8 @@ void intel_detect_pch(struct drm_device *dev)
 				DRM_DEBUG_KMS("Found SunrisePoint LP PCH\n");
 				WARN_ON(!IS_SKYLAKE(dev) &&
 					!IS_KABYLAKE(dev));
-			} else if (id == INTEL_PCH_P2X_DEVICE_ID_TYPE) {
+			} else if ((id == INTEL_PCH_P2X_DEVICE_ID_TYPE) ||
+				   (id == INTEL_PCH_QEMU_DEVICE_ID_TYPE)) {
 				dev_priv->pch_type = intel_virt_detect_pch(dev);
 			} else
 				continue;
@@ -624,6 +560,14 @@ static int vlv_resume_prepare(struct drm_i915_private *dev_priv,
 			      bool rpm_resume);
 static int bxt_resume_prepare(struct drm_i915_private *dev_priv);
 
+static bool suspend_to_idle(struct drm_i915_private *dev_priv)
+{
+#if IS_ENABLED(CONFIG_ACPI_SLEEP)
+	if (acpi_target_system_state() < ACPI_STATE_S3)
+		return true;
+#endif
+	return false;
+}
 
 static int i915_drm_suspend(struct drm_device *dev)
 {
@@ -676,11 +620,7 @@ static int i915_drm_suspend(struct drm_device *dev)
 
 	i915_save_state(dev);
 
-	opregion_target_state = PCI_D3cold;
-#if IS_ENABLED(CONFIG_ACPI_SLEEP)
-	if (acpi_target_system_state() < ACPI_STATE_S3)
-		opregion_target_state = PCI_D1;
-#endif
+	opregion_target_state = suspend_to_idle(dev_priv) ? PCI_D1 : PCI_D3cold;
 	intel_opregion_notify_adapter(dev, opregion_target_state);
 
 	intel_uncore_forcewake_reset(dev, false);
@@ -701,15 +641,26 @@ static int i915_drm_suspend(struct drm_device *dev)
 static int i915_drm_suspend_late(struct drm_device *drm_dev, bool hibernation)
 {
 	struct drm_i915_private *dev_priv = drm_dev->dev_private;
+	bool fw_csr;
 	int ret;
 
-	intel_power_domains_suspend(dev_priv);
+	fw_csr = suspend_to_idle(dev_priv) && dev_priv->csr.dmc_payload;
+	/*
+	 * In case of firmware assisted context save/restore don't manually
+	 * deinit the power domains. This also means the CSR/DMC firmware will
+	 * stay active, it will power down any HW resources as required and
+	 * also enable deeper system power states that would be blocked if the
+	 * firmware was inactive.
+	 */
+	if (!fw_csr)
+		intel_power_domains_suspend(dev_priv);
 
 	ret = intel_suspend_complete(dev_priv);
 
 	if (ret) {
 		DRM_ERROR("Suspend complete failed: %d\n", ret);
-		intel_power_domains_init_hw(dev_priv, true);
+		if (!fw_csr)
+			intel_power_domains_init_hw(dev_priv, true);
 
 		return ret;
 	}
@@ -730,6 +681,8 @@ static int i915_drm_suspend_late(struct drm_device *drm_dev, bool hibernation)
 	if (!(hibernation && INTEL_INFO(dev_priv)->gen < 6))
 		pci_set_power_state(drm_dev->pdev, PCI_D3hot);
 
+	dev_priv->suspended_to_idle = suspend_to_idle(dev_priv);
+
 	return 0;
 }
 
@@ -842,8 +795,10 @@ static int i915_drm_resume_early(struct drm_device *dev)
 	 * FIXME: This should be solved with a special hdmi sink device or
 	 * similar so that power domains can be employed.
 	 */
-	if (pci_enable_device(dev->pdev))
-		return -EIO;
+	if (pci_enable_device(dev->pdev)) {
+		ret = -EIO;
+		goto out;
+	}
 
 	pci_set_master(dev->pdev);
 
@@ -861,7 +816,12 @@ static int i915_drm_resume_early(struct drm_device *dev)
 		hsw_disable_pc8(dev_priv);
 
 	intel_uncore_sanitize(dev);
-	intel_power_domains_init_hw(dev_priv, true);
+
+	if (!(dev_priv->suspended_to_idle && dev_priv->csr.dmc_payload))
+		intel_power_domains_init_hw(dev_priv, true);
+
+out:
+	dev_priv->suspended_to_idle = false;
 
 	return ret;
 }
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 15c6dc0b4f37..f1a8a53e9e30 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -57,7 +57,7 @@
 
 #define DRIVER_NAME		"i915"
 #define DRIVER_DESC		"Intel Graphics"
-#define DRIVER_DATE		"20151120"
+#define DRIVER_DATE		"20151204"
 
 #undef WARN_ON
 /* Many gcc seem to no see through this and fall over :( */
@@ -902,7 +902,6 @@ struct i915_fbc {
 	/* This is always the inner lock when overlapping with struct_mutex and
 	 * it's the outer lock when overlapping with stolen_lock. */
 	struct mutex lock;
-	unsigned long uncompressed_size;
 	unsigned threshold;
 	unsigned int fb_id;
 	unsigned int possible_framebuffer_bits;
@@ -915,21 +914,21 @@ struct i915_fbc {
 
 	bool false_color;
 
-	/* Tracks whether the HW is actually enabled, not whether the feature is
-	 * possible. */
 	bool enabled;
+	bool active;
 
 	struct intel_fbc_work {
-		struct delayed_work work;
-		struct intel_crtc *crtc;
+		bool scheduled;
+		struct work_struct work;
 		struct drm_framebuffer *fb;
-	} *fbc_work;
+		unsigned long enable_jiffies;
+	} work;
 
 	const char *no_fbc_reason;
 
-	bool (*fbc_enabled)(struct drm_i915_private *dev_priv);
-	void (*enable_fbc)(struct intel_crtc *crtc);
-	void (*disable_fbc)(struct drm_i915_private *dev_priv);
+	bool (*is_active)(struct drm_i915_private *dev_priv);
+	void (*activate)(struct intel_crtc *crtc);
+	void (*deactivate)(struct drm_i915_private *dev_priv);
 };
 
 /**
@@ -1885,6 +1884,7 @@ struct drm_i915_private {
 	u32 chv_phy_control;
 
 	u32 suspend_count;
+	bool suspended_to_idle;
 	struct i915_suspend_saved_registers regfile;
 	struct vlv_s0ix_state vlv_s0ix_state;
 
@@ -2608,11 +2608,13 @@ struct drm_i915_cmd_table {
 #define INTEL_PCH_SPT_DEVICE_ID_TYPE		0xA100
 #define INTEL_PCH_SPT_LP_DEVICE_ID_TYPE		0x9D00
 #define INTEL_PCH_P2X_DEVICE_ID_TYPE		0x7100
+#define INTEL_PCH_QEMU_DEVICE_ID_TYPE		0x2900 /* qemu q35 has 2918 */
 
 #define INTEL_PCH_TYPE(dev) (__I915__(dev)->pch_type)
 #define HAS_PCH_SPT(dev) (INTEL_PCH_TYPE(dev) == PCH_SPT)
 #define HAS_PCH_LPT(dev) (INTEL_PCH_TYPE(dev) == PCH_LPT)
 #define HAS_PCH_LPT_LP(dev) (__I915__(dev)->pch_id == INTEL_PCH_LPT_LP_DEVICE_ID_TYPE)
+#define HAS_PCH_LPT_H(dev) (__I915__(dev)->pch_id == INTEL_PCH_LPT_DEVICE_ID_TYPE)
 #define HAS_PCH_CPT(dev) (INTEL_PCH_TYPE(dev) == PCH_CPT)
 #define HAS_PCH_IBX(dev) (INTEL_PCH_TYPE(dev) == PCH_IBX)
 #define HAS_PCH_NOP(dev) (INTEL_PCH_TYPE(dev) == PCH_NOP)
@@ -2749,17 +2751,47 @@ void valleyview_disable_display_irqs(struct drm_i915_private *dev_priv);
 void i915_hotplug_interrupt_update(struct drm_i915_private *dev_priv,
 				   uint32_t mask,
 				   uint32_t bits);
-void
-ironlake_enable_display_irq(struct drm_i915_private *dev_priv, u32 mask);
-void
-ironlake_disable_display_irq(struct drm_i915_private *dev_priv, u32 mask);
+void ilk_update_display_irq(struct drm_i915_private *dev_priv,
+			    uint32_t interrupt_mask,
+			    uint32_t enabled_irq_mask);
+static inline void
+ilk_enable_display_irq(struct drm_i915_private *dev_priv, uint32_t bits)
+{
+	ilk_update_display_irq(dev_priv, bits, bits);
+}
+static inline void
+ilk_disable_display_irq(struct drm_i915_private *dev_priv, uint32_t bits)
+{
+	ilk_update_display_irq(dev_priv, bits, 0);
+}
+void bdw_update_pipe_irq(struct drm_i915_private *dev_priv,
+			 enum pipe pipe,
+			 uint32_t interrupt_mask,
+			 uint32_t enabled_irq_mask);
+static inline void bdw_enable_pipe_irq(struct drm_i915_private *dev_priv,
+				       enum pipe pipe, uint32_t bits)
+{
+	bdw_update_pipe_irq(dev_priv, pipe, bits, bits);
+}
+static inline void bdw_disable_pipe_irq(struct drm_i915_private *dev_priv,
+					enum pipe pipe, uint32_t bits)
+{
+	bdw_update_pipe_irq(dev_priv, pipe, bits, 0);
+}
 void ibx_display_interrupt_update(struct drm_i915_private *dev_priv,
 				  uint32_t interrupt_mask,
 				  uint32_t enabled_irq_mask);
-#define ibx_enable_display_interrupt(dev_priv, bits) \
-	ibx_display_interrupt_update((dev_priv), (bits), (bits))
-#define ibx_disable_display_interrupt(dev_priv, bits) \
-	ibx_display_interrupt_update((dev_priv), (bits), 0)
+static inline void
+ibx_enable_display_interrupt(struct drm_i915_private *dev_priv, uint32_t bits)
+{
+	ibx_display_interrupt_update(dev_priv, bits, bits);
+}
+static inline void
+ibx_disable_display_interrupt(struct drm_i915_private *dev_priv, uint32_t bits)
+{
+	ibx_display_interrupt_update(dev_priv, bits, 0);
+}
+
 
 /* i915_gem.c */
 int i915_gem_create_ioctl(struct drm_device *dev, void *data,
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 33adc8f8ab20..b7d7cecdddf6 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1210,8 +1210,16 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
 	if (i915_gem_request_completed(req, true))
 		return 0;
 
-	timeout_expire = timeout ?
-		jiffies + nsecs_to_jiffies_timeout((u64)*timeout) : 0;
+	timeout_expire = 0;
+	if (timeout) {
+		if (WARN_ON(*timeout < 0))
+			return -EINVAL;
+
+		if (*timeout == 0)
+			return -ETIME;
+
+		timeout_expire = jiffies + nsecs_to_jiffies_timeout(*timeout);
+	}
 
 	if (INTEL_INFO(dev_priv)->gen >= 6)
 		gen6_rps_boost(dev_priv, rps, req->emitted_jiffies);
@@ -2941,6 +2949,10 @@ i915_gem_idle_work_handler(struct work_struct *work)
 		if (!list_empty(&ring->request_list))
 			return;
 
+	/* we probably should sync with hangcheck here, using cancel_work_sync.
+	 * Also locking seems to be fubar here, ring->request_list is protected
+	 * by dev->struct_mutex. */
+
 	intel_mark_idle(dev);
 
 	if (mutex_trylock(&dev->struct_mutex)) {
@@ -3065,7 +3077,7 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 		if (ret == 0)
 			ret = __i915_wait_request(req[i], reset_counter, true,
 						  args->timeout_ns > 0 ? &args->timeout_ns : NULL,
-						  file->driver_priv);
+						  to_rps_client(file));
 		i915_gem_request_unreference__unlocked(req[i]);
 	}
 	return ret;
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 4b9400402aa3..43761c5bcaca 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -141,8 +141,6 @@ static void i915_gem_context_clean(struct intel_context *ctx)
 	if (!ppgtt)
 		return;
 
-	WARN_ON(!list_empty(&ppgtt->base.active_list));
-
 	list_for_each_entry_safe(vma, next, &ppgtt->base.inactive_list,
 				 mm_list) {
 		if (WARN_ON(__i915_vma_unbind_no_wait(vma)))
diff --git a/drivers/gpu/drm/i915/i915_gem_fence.c b/drivers/gpu/drm/i915/i915_gem_fence.c
index b80d0456fe03..598198543dcd 100644
--- a/drivers/gpu/drm/i915/i915_gem_fence.c
+++ b/drivers/gpu/drm/i915/i915_gem_fence.c
@@ -642,11 +642,10 @@ i915_gem_detect_bit_6_swizzle(struct drm_device *dev)
 		}
 
 		/* check for L-shaped memory aka modified enhanced addressing */
-		if (IS_GEN4(dev)) {
-			uint32_t ddc2 = I915_READ(DCC2);
-
-			if (!(ddc2 & DCC2_MODIFIED_ENHANCED_DISABLE))
-				dev_priv->quirks |= QUIRK_PIN_SWIZZLED_PAGES;
+		if (IS_GEN4(dev) &&
+		    !(I915_READ(DCC2) & DCC2_MODIFIED_ENHANCED_DISABLE)) {
+			swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN;
+			swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN;
 		}
 
 		if (dcc == 0xffffffff) {
@@ -675,16 +674,35 @@ i915_gem_detect_bit_6_swizzle(struct drm_device *dev)
 		 * matching, which was the case for the swizzling required in
 		 * the table above, or from the 1-ch value being less than
 		 * the minimum size of a rank.
+		 *
+		 * Reports indicate that the swizzling actually
+		 * varies depending upon page placement inside the
+		 * channels, i.e. we see swizzled pages where the
+		 * banks of memory are paired and unswizzled on the
+		 * uneven portion, so leave that as unknown.
 		 */
-		if (I915_READ16(C0DRB3) != I915_READ16(C1DRB3)) {
-			swizzle_x = I915_BIT_6_SWIZZLE_NONE;
-			swizzle_y = I915_BIT_6_SWIZZLE_NONE;
-		} else {
+		if (I915_READ16(C0DRB3) == I915_READ16(C1DRB3)) {
 			swizzle_x = I915_BIT_6_SWIZZLE_9_10;
 			swizzle_y = I915_BIT_6_SWIZZLE_9;
 		}
 	}
 
+	if (swizzle_x == I915_BIT_6_SWIZZLE_UNKNOWN ||
+	    swizzle_y == I915_BIT_6_SWIZZLE_UNKNOWN) {
+		/* Userspace likes to explode if it sees unknown swizzling,
+		 * so lie. We will finish the lie when reporting through
+		 * the get-tiling-ioctl by reporting the physical swizzle
+		 * mode as unknown instead.
+		 *
+		 * As we don't strictly know what the swizzling is, it may be
+		 * bit17 dependent, and so we need to also prevent the pages
+		 * from being moved.
+		 */
+		dev_priv->quirks |= QUIRK_PIN_SWIZZLED_PAGES;
+		swizzle_x = I915_BIT_6_SWIZZLE_NONE;
+		swizzle_y = I915_BIT_6_SWIZZLE_NONE;
+	}
+
 	dev_priv->mm.bit_6_swizzle_x = swizzle_x;
 	dev_priv->mm.bit_6_swizzle_y = swizzle_y;
 }
diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
index ed9f1002ab36..0d23785ba818 100644
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -86,7 +86,6 @@ static int host2guc_action(struct intel_guc *guc, u32 *data, u32 len)
 		return -EINVAL;
 
 	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
-	spin_lock(&dev_priv->guc.host2guc_lock);
 
 	dev_priv->guc.action_count += 1;
 	dev_priv->guc.action_cmd = data[0];
@@ -119,7 +118,6 @@ static int host2guc_action(struct intel_guc *guc, u32 *data, u32 len)
 	}
 	dev_priv->guc.action_status = status;
 
-	spin_unlock(&dev_priv->guc.host2guc_lock);
 	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
 
 	return ret;
@@ -292,16 +290,12 @@ static uint32_t select_doorbell_cacheline(struct intel_guc *guc)
 	const uint32_t cacheline_size = cache_line_size();
 	uint32_t offset;
 
-	spin_lock(&guc->host2guc_lock);
-
 	/* Doorbell uses a single cache line within a page */
 	offset = offset_in_page(guc->db_cacheline);
 
 	/* Moving to next cache line to reduce contention */
 	guc->db_cacheline += cacheline_size;
 
-	spin_unlock(&guc->host2guc_lock);
-
 	DRM_DEBUG_DRIVER("selected doorbell cacheline 0x%x, next 0x%x, linesize %u\n",
 			offset, guc->db_cacheline, cacheline_size);
 
@@ -322,13 +316,11 @@ static uint16_t assign_doorbell(struct intel_guc *guc, uint32_t priority)
 	const uint16_t end = start + half;
 	uint16_t id;
 
-	spin_lock(&guc->host2guc_lock);
 	id = find_next_zero_bit(guc->doorbell_bitmap, end, start);
 	if (id == end)
 		id = GUC_INVALID_DOORBELL_ID;
 	else
 		bitmap_set(guc->doorbell_bitmap, id, 1);
-	spin_unlock(&guc->host2guc_lock);
 
 	DRM_DEBUG_DRIVER("assigned %s priority doorbell id 0x%x\n",
 			hi_pri ? "high" : "normal", id);
@@ -338,9 +330,7 @@ static uint16_t assign_doorbell(struct intel_guc *guc, uint32_t priority)
 
 static void release_doorbell(struct intel_guc *guc, uint16_t id)
 {
-	spin_lock(&guc->host2guc_lock);
 	bitmap_clear(guc->doorbell_bitmap, id, 1);
-	spin_unlock(&guc->host2guc_lock);
 }
 
 /*
@@ -487,16 +477,13 @@ static int guc_get_workqueue_space(struct i915_guc_client *gc, u32 *offset)
 	struct guc_process_desc *desc;
 	void *base;
 	u32 size = sizeof(struct guc_wq_item);
-	int ret = 0, timeout_counter = 200;
+	int ret = -ETIMEDOUT, timeout_counter = 200;
 
 	base = kmap_atomic(i915_gem_object_get_page(gc->client_obj, 0));
 	desc = base + gc->proc_desc_offset;
 
 	while (timeout_counter-- > 0) {
-		ret = wait_for_atomic(CIRC_SPACE(gc->wq_tail, desc->head,
-				gc->wq_size) >= size, 1);
-
-		if (!ret) {
+		if (CIRC_SPACE(gc->wq_tail, desc->head, gc->wq_size) >= size) {
 			*offset = gc->wq_tail;
 
 			/* advance the tail for next workqueue item */
@@ -505,7 +492,11 @@ static int guc_get_workqueue_space(struct i915_guc_client *gc, u32 *offset)
 
 			/* this will break the loop */
 			timeout_counter = 0;
+			ret = 0;
 		}
+
+		if (timeout_counter)
+			usleep_range(1000, 2000);
 	};
 
 	kunmap_atomic(base);
@@ -597,15 +588,12 @@ int i915_guc_submit(struct i915_guc_client *client,
 {
 	struct intel_guc *guc = client->guc;
 	enum intel_ring_id ring_id = rq->ring->id;
-	unsigned long flags;
 	int q_ret, b_ret;
 
 	/* Need this because of the deferred pin ctx and ring */
 	/* Shall we move this right after ring is pinned? */
 	lr_context_update(rq);
 
-	spin_lock_irqsave(&client->wq_lock, flags);
-
 	q_ret = guc_add_workqueue_item(client, rq);
 	if (q_ret == 0)
 		b_ret = guc_ring_doorbell(client);
@@ -620,12 +608,8 @@ int i915_guc_submit(struct i915_guc_client *client,
 	} else {
 		client->retcode = 0;
 	}
-	spin_unlock_irqrestore(&client->wq_lock, flags);
-
-	spin_lock(&guc->host2guc_lock);
 	guc->submissions[ring_id] += 1;
 	guc->last_seqno[ring_id] = rq->seqno;
-	spin_unlock(&guc->host2guc_lock);
 
 	return q_ret;
 }
@@ -677,7 +661,7 @@ static struct drm_i915_gem_object *gem_allocate_guc_obj(struct drm_device *dev,
 /**
  * gem_release_guc_obj() - Release gem object allocated for GuC usage
  * @obj:	gem obj to be released
-  */
+ */
 static void gem_release_guc_obj(struct drm_i915_gem_object *obj)
 {
 	if (!obj)
@@ -768,7 +752,6 @@ static struct i915_guc_client *guc_client_alloc(struct drm_device *dev,
 	client->client_obj = obj;
 	client->wq_offset = GUC_DB_SIZE;
 	client->wq_size = GUC_WQ_SIZE;
-	spin_lock_init(&client->wq_lock);
 
 	client->doorbell_offset = select_doorbell_cacheline(guc);
 
@@ -871,8 +854,6 @@ int i915_guc_submission_init(struct drm_device *dev)
 	if (!guc->ctx_pool_obj)
 		return -ENOMEM;
 
-	spin_lock_init(&dev_priv->guc.host2guc_lock);
-
 	ida_init(&guc->ctx_ids);
 
 	guc_create_log(guc);
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index c8ba94968aaf..e88d692583a5 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -215,9 +215,9 @@ void i915_hotplug_interrupt_update(struct drm_i915_private *dev_priv,
  * @interrupt_mask: mask of interrupt bits to update
  * @enabled_irq_mask: mask of interrupt bits to enable
  */
-static void ilk_update_display_irq(struct drm_i915_private *dev_priv,
-				   uint32_t interrupt_mask,
-				   uint32_t enabled_irq_mask)
+void ilk_update_display_irq(struct drm_i915_private *dev_priv,
+			    uint32_t interrupt_mask,
+			    uint32_t enabled_irq_mask)
 {
 	uint32_t new_val;
 
@@ -239,18 +239,6 @@ static void ilk_update_display_irq(struct drm_i915_private *dev_priv,
 	}
 }
 
-void
-ironlake_enable_display_irq(struct drm_i915_private *dev_priv, u32 mask)
-{
-	ilk_update_display_irq(dev_priv, mask, mask);
-}
-
-void
-ironlake_disable_display_irq(struct drm_i915_private *dev_priv, u32 mask)
-{
-	ilk_update_display_irq(dev_priv, mask, 0);
-}
-
 /**
  * ilk_update_gt_irq - update GTIMR
  * @dev_priv: driver private
@@ -300,11 +288,11 @@ static i915_reg_t gen6_pm_ier(struct drm_i915_private *dev_priv)
 }
 
 /**
-  * snb_update_pm_irq - update GEN6_PMIMR
-  * @dev_priv: driver private
-  * @interrupt_mask: mask of interrupt bits to update
-  * @enabled_irq_mask: mask of interrupt bits to enable
-  */
+ * snb_update_pm_irq - update GEN6_PMIMR
+ * @dev_priv: driver private
+ * @interrupt_mask: mask of interrupt bits to update
+ * @enabled_irq_mask: mask of interrupt bits to enable
+ */
 static void snb_update_pm_irq(struct drm_i915_private *dev_priv,
 			      uint32_t interrupt_mask,
 			      uint32_t enabled_irq_mask)
@@ -418,11 +406,11 @@ void gen6_disable_rps_interrupts(struct drm_device *dev)
 }
 
 /**
-  * bdw_update_port_irq - update DE port interrupt
-  * @dev_priv: driver private
-  * @interrupt_mask: mask of interrupt bits to update
-  * @enabled_irq_mask: mask of interrupt bits to enable
-  */
+ * bdw_update_port_irq - update DE port interrupt
+ * @dev_priv: driver private
+ * @interrupt_mask: mask of interrupt bits to update
+ * @enabled_irq_mask: mask of interrupt bits to enable
+ */
 static void bdw_update_port_irq(struct drm_i915_private *dev_priv,
 				uint32_t interrupt_mask,
 				uint32_t enabled_irq_mask)
@@ -450,6 +438,38 @@ static void bdw_update_port_irq(struct drm_i915_private *dev_priv,
 }
 
 /**
+ * bdw_update_pipe_irq - update DE pipe interrupt
+ * @dev_priv: driver private
+ * @pipe: pipe whose interrupt to update
+ * @interrupt_mask: mask of interrupt bits to update
+ * @enabled_irq_mask: mask of interrupt bits to enable
+ */
+void bdw_update_pipe_irq(struct drm_i915_private *dev_priv,
+			 enum pipe pipe,
+			 uint32_t interrupt_mask,
+			 uint32_t enabled_irq_mask)
+{
+	uint32_t new_val;
+
+	assert_spin_locked(&dev_priv->irq_lock);
+
+	WARN_ON(enabled_irq_mask & ~interrupt_mask);
+
+	if (WARN_ON(!intel_irqs_enabled(dev_priv)))
+		return;
+
+	new_val = dev_priv->de_irq_mask[pipe];
+	new_val &= ~interrupt_mask;
+	new_val |= (~enabled_irq_mask & interrupt_mask);
+
+	if (new_val != dev_priv->de_irq_mask[pipe]) {
+		dev_priv->de_irq_mask[pipe] = new_val;
+		I915_WRITE(GEN8_DE_PIPE_IMR(pipe), dev_priv->de_irq_mask[pipe]);
+		POSTING_READ(GEN8_DE_PIPE_IMR(pipe));
+	}
+}
+
+/**
  * ibx_display_interrupt_update - update SDEIMR
  * @dev_priv: driver private
  * @interrupt_mask: mask of interrupt bits to update
@@ -1824,8 +1844,24 @@ static void ibx_hpd_irq_handler(struct drm_device *dev, u32 hotplug_trigger,
 	struct drm_i915_private *dev_priv = to_i915(dev);
 	u32 dig_hotplug_reg, pin_mask = 0, long_mask = 0;
 
+	/*
+	 * Somehow the PCH doesn't seem to really ack the interrupt to the CPU
+	 * unless we touch the hotplug register, even if hotplug_trigger is
+	 * zero. Not acking leads to "The master control interrupt lied (SDE)!"
+	 * errors.
+	 */
 	dig_hotplug_reg = I915_READ(PCH_PORT_HOTPLUG);
+	if (!hotplug_trigger) {
+		u32 mask = PORTA_HOTPLUG_STATUS_MASK |
+			PORTD_HOTPLUG_STATUS_MASK |
+			PORTC_HOTPLUG_STATUS_MASK |
+			PORTB_HOTPLUG_STATUS_MASK;
+		dig_hotplug_reg &= ~mask;
+	}
+
 	I915_WRITE(PCH_PORT_HOTPLUG, dig_hotplug_reg);
+	if (!hotplug_trigger)
+		return;
 
 	intel_get_hpd_pins(&pin_mask, &long_mask, hotplug_trigger,
 			   dig_hotplug_reg, hpd,
@@ -1840,8 +1876,7 @@ static void ibx_irq_handler(struct drm_device *dev, u32 pch_iir)
 	int pipe;
 	u32 hotplug_trigger = pch_iir & SDE_HOTPLUG_MASK;
 
-	if (hotplug_trigger)
-		ibx_hpd_irq_handler(dev, hotplug_trigger, hpd_ibx);
+	ibx_hpd_irq_handler(dev, hotplug_trigger, hpd_ibx);
 
 	if (pch_iir & SDE_AUDIO_POWER_MASK) {
 		int port = ffs((pch_iir & SDE_AUDIO_POWER_MASK) >>
@@ -1934,8 +1969,7 @@ static void cpt_irq_handler(struct drm_device *dev, u32 pch_iir)
 	int pipe;
 	u32 hotplug_trigger = pch_iir & SDE_HOTPLUG_MASK_CPT;
 
-	if (hotplug_trigger)
-		ibx_hpd_irq_handler(dev, hotplug_trigger, hpd_cpt);
+	ibx_hpd_irq_handler(dev, hotplug_trigger, hpd_cpt);
 
 	if (pch_iir & SDE_AUDIO_POWER_MASK_CPT) {
 		int port = ffs((pch_iir & SDE_AUDIO_POWER_MASK_CPT) >>
@@ -2351,13 +2385,9 @@ static irqreturn_t gen8_irq_handler(int irq, void *arg)
 				spt_irq_handler(dev, pch_iir);
 			else
 				cpt_irq_handler(dev, pch_iir);
-		} else {
-			/*
-			 * Like on previous PCH there seems to be something
-			 * fishy going on with forwarding PCH interrupts.
-			 */
-			DRM_DEBUG_DRIVER("The master control interrupt lied (SDE)!\n");
-		}
+		} else
+			DRM_ERROR("The master control interrupt lied (SDE)!\n");
+
 	}
 
 	I915_WRITE_FW(GEN8_MASTER_IRQ, GEN8_MASTER_IRQ_CONTROL);
@@ -2645,7 +2675,7 @@ static int ironlake_enable_vblank(struct drm_device *dev, unsigned int pipe)
 						     DE_PIPE_VBLANK(pipe);
 
 	spin_lock_irqsave(&dev_priv->irq_lock, irqflags);
-	ironlake_enable_display_irq(dev_priv, bit);
+	ilk_enable_display_irq(dev_priv, bit);
 	spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags);
 
 	return 0;
@@ -2670,10 +2700,9 @@ static int gen8_enable_vblank(struct drm_device *dev, unsigned int pipe)
 	unsigned long irqflags;
 
 	spin_lock_irqsave(&dev_priv->irq_lock, irqflags);
-	dev_priv->de_irq_mask[pipe] &= ~GEN8_PIPE_VBLANK;
-	I915_WRITE(GEN8_DE_PIPE_IMR(pipe), dev_priv->de_irq_mask[pipe]);
-	POSTING_READ(GEN8_DE_PIPE_IMR(pipe));
+	bdw_enable_pipe_irq(dev_priv, pipe, GEN8_PIPE_VBLANK);
 	spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags);
+
 	return 0;
 }
 
@@ -2700,7 +2729,7 @@ static void ironlake_disable_vblank(struct drm_device *dev, unsigned int pipe)
 						     DE_PIPE_VBLANK(pipe);
 
 	spin_lock_irqsave(&dev_priv->irq_lock, irqflags);
-	ironlake_disable_display_irq(dev_priv, bit);
+	ilk_disable_display_irq(dev_priv, bit);
 	spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags);
 }
 
@@ -2721,9 +2750,7 @@ static void gen8_disable_vblank(struct drm_device *dev, unsigned int pipe)
 	unsigned long irqflags;
 
 	spin_lock_irqsave(&dev_priv->irq_lock, irqflags);
-	dev_priv->de_irq_mask[pipe] |= GEN8_PIPE_VBLANK;
-	I915_WRITE(GEN8_DE_PIPE_IMR(pipe), dev_priv->de_irq_mask[pipe]);
-	POSTING_READ(GEN8_DE_PIPE_IMR(pipe));
+	bdw_disable_pipe_irq(dev_priv, pipe, GEN8_PIPE_VBLANK);
 	spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags);
 }
 
@@ -3452,7 +3479,7 @@ static int ironlake_irq_postinstall(struct drm_device *dev)
 		 * setup is guaranteed to run in single-threaded context. But we
 		 * need it to make the assert_spin_locked happy. */
 		spin_lock_irq(&dev_priv->irq_lock);
-		ironlake_enable_display_irq(dev_priv, DE_PCU_EVENT);
+		ilk_enable_display_irq(dev_priv, DE_PCU_EVENT);
 		spin_unlock_irq(&dev_priv->irq_lock);
 	}
 
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 1a12d44b9710..1dae5ac3e0b1 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -855,31 +855,31 @@ enum skl_disp_power_wells {
  *
  * Note: DDI0 is digital port B, DD1 is digital port C, and DDI2 is
  * digital port D (CHV) or port A (BXT).
- */
-/*
- * Dual channel PHY (VLV/CHV/BXT)
- * ---------------------------------
- * |      CH0      |      CH1      |
- * |  CMN/PLL/REF  |  CMN/PLL/REF  |
- * |---------------|---------------| Display PHY
- * | PCS01 | PCS23 | PCS01 | PCS23 |
- * |-------|-------|-------|-------|
- * |TX0|TX1|TX2|TX3|TX0|TX1|TX2|TX3|
- * ---------------------------------
- * |     DDI0      |     DDI1      | DP/HDMI ports
- * ---------------------------------
  *
- * Single channel PHY (CHV/BXT)
- * -----------------
- * |      CH0      |
- * |  CMN/PLL/REF  |
- * |---------------| Display PHY
- * | PCS01 | PCS23 |
- * |-------|-------|
- * |TX0|TX1|TX2|TX3|
- * -----------------
- * |     DDI2      | DP/HDMI port
- * -----------------
+ *
+ *     Dual channel PHY (VLV/CHV/BXT)
+ *     ---------------------------------
+ *     |      CH0      |      CH1      |
+ *     |  CMN/PLL/REF  |  CMN/PLL/REF  |
+ *     |---------------|---------------| Display PHY
+ *     | PCS01 | PCS23 | PCS01 | PCS23 |
+ *     |-------|-------|-------|-------|
+ *     |TX0|TX1|TX2|TX3|TX0|TX1|TX2|TX3|
+ *     ---------------------------------
+ *     |     DDI0      |     DDI1      | DP/HDMI ports
+ *     ---------------------------------
+ *
+ *     Single channel PHY (CHV/BXT)
+ *     -----------------
+ *     |      CH0      |
+ *     |  CMN/PLL/REF  |
+ *     |---------------| Display PHY
+ *     | PCS01 | PCS23 |
+ *     |-------|-------|
+ *     |TX0|TX1|TX2|TX3|
+ *     -----------------
+ *     |     DDI2      | DP/HDMI port
+ *     -----------------
  */
 #define DPIO_DEVFN			0
 
@@ -2973,6 +2973,13 @@ enum skl_disp_power_wells {
 #define OGAMC0			_MMIO(0x30024)
 
 /*
+ * GEN9 clock gating regs
+ */
+#define GEN9_CLKGATE_DIS_0		_MMIO(0x46530)
+#define   PWM2_GATING_DIS		(1 << 14)
+#define   PWM1_GATING_DIS		(1 << 13)
+
+/*
  * Display engine regs
  */
 
@@ -7549,6 +7556,7 @@ enum skl_disp_power_wells {
 #define SFUSE_STRAP			_MMIO(0xc2014)
 #define  SFUSE_STRAP_FUSE_LOCK		(1<<13)
 #define  SFUSE_STRAP_DISPLAY_DISABLED	(1<<7)
+#define  SFUSE_STRAP_CRT_DISABLED	(1<<6)
 #define  SFUSE_STRAP_DDIB_DETECTED	(1<<2)
 #define  SFUSE_STRAP_DDIC_DETECTED	(1<<1)
 #define  SFUSE_STRAP_DDID_DETECTED	(1<<0)
@@ -7706,7 +7714,7 @@ enum skl_disp_power_wells {
 #define BXT_DSI_PLL_RATIO_MAX		0x7D
 #define BXT_DSI_PLL_RATIO_MIN		0x22
 #define BXT_DSI_PLL_RATIO_MASK		0xFF
-#define BXT_REF_CLOCK_KHZ		19500
+#define BXT_REF_CLOCK_KHZ		19200
 
 #define BXT_DSI_PLL_ENABLE		_MMIO(0x46080)
 #define  BXT_DSI_PLL_DO_ENABLE		(1 << 31)
diff --git a/drivers/gpu/drm/i915/intel_bios.c b/drivers/gpu/drm/i915/intel_bios.c
index ce82f9c7df24..070470fe9a91 100644
--- a/drivers/gpu/drm/i915/intel_bios.c
+++ b/drivers/gpu/drm/i915/intel_bios.c
@@ -356,7 +356,10 @@ parse_general_features(struct drm_i915_private *dev_priv,
 	general = find_section(bdb, BDB_GENERAL_FEATURES);
 	if (general) {
 		dev_priv->vbt.int_tv_support = general->int_tv_support;
-		dev_priv->vbt.int_crt_support = general->int_crt_support;
+		/* int_crt_support can't be trusted on earlier platforms */
+		if (bdb->version >= 155 &&
+		    (HAS_DDI(dev_priv) || IS_VALLEYVIEW(dev_priv)))
+			dev_priv->vbt.int_crt_support = general->int_crt_support;
 		dev_priv->vbt.lvds_use_ssc = general->enable_ssc;
 		dev_priv->vbt.lvds_ssc_freq =
 			intel_bios_ssc_frequency(dev, general->ssc_freq);
diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c
index 27b3e610e8f0..9285fc1e64ee 100644
--- a/drivers/gpu/drm/i915/intel_crt.c
+++ b/drivers/gpu/drm/i915/intel_crt.c
@@ -777,11 +777,37 @@ void intel_crt_init(struct drm_device *dev)
 	struct intel_crt *crt;
 	struct intel_connector *intel_connector;
 	struct drm_i915_private *dev_priv = dev->dev_private;
+	i915_reg_t adpa_reg;
+	u32 adpa;
 
 	/* Skip machines without VGA that falsely report hotplug events */
 	if (dmi_check_system(intel_no_crt))
 		return;
 
+	if (HAS_PCH_SPLIT(dev))
+		adpa_reg = PCH_ADPA;
+	else if (IS_VALLEYVIEW(dev))
+		adpa_reg = VLV_ADPA;
+	else
+		adpa_reg = ADPA;
+
+	adpa = I915_READ(adpa_reg);
+	if ((adpa & ADPA_DAC_ENABLE) == 0) {
+		/*
+		 * On some machines (some IVB at least) CRT can be
+		 * fused off, but there's no known fuse bit to
+		 * indicate that. On these machine the ADPA register
+		 * works normally, except the DAC enable bit won't
+		 * take. So the only way to tell is attempt to enable
+		 * it and see what happens.
+		 */
+		I915_WRITE(adpa_reg, adpa | ADPA_DAC_ENABLE |
+			   ADPA_HSYNC_CNTL_DISABLE | ADPA_VSYNC_CNTL_DISABLE);
+		if ((I915_READ(adpa_reg) & ADPA_DAC_ENABLE) == 0)
+			return;
+		I915_WRITE(adpa_reg, adpa);
+	}
+
 	crt = kzalloc(sizeof(struct intel_crt), GFP_KERNEL);
 	if (!crt)
 		return;
@@ -798,7 +824,7 @@ void intel_crt_init(struct drm_device *dev)
 			   &intel_crt_connector_funcs, DRM_MODE_CONNECTOR_VGA);
 
 	drm_encoder_init(dev, &crt->base.base, &intel_crt_enc_funcs,
-			 DRM_MODE_ENCODER_DAC);
+			 DRM_MODE_ENCODER_DAC, NULL);
 
 	intel_connector_attach_encoder(intel_connector, &crt->base);
 
@@ -815,12 +841,7 @@ void intel_crt_init(struct drm_device *dev)
 		connector->interlace_allowed = 1;
 	connector->doublescan_allowed = 0;
 
-	if (HAS_PCH_SPLIT(dev))
-		crt->adpa_reg = PCH_ADPA;
-	else if (IS_VALLEYVIEW(dev))
-		crt->adpa_reg = VLV_ADPA;
-	else
-		crt->adpa_reg = ADPA;
+	crt->adpa_reg = adpa_reg;
 
 	crt->base.compute_config = intel_crt_compute_config;
 	if (HAS_PCH_SPLIT(dev) && !HAS_DDI(dev)) {
diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c
index 76ce7c2960b6..4afb3103eb96 100644
--- a/drivers/gpu/drm/i915/intel_ddi.c
+++ b/drivers/gpu/drm/i915/intel_ddi.c
@@ -3151,7 +3151,7 @@ void intel_ddi_get_config(struct intel_encoder *encoder,
 		pipe_config->has_hdmi_sink = true;
 		intel_hdmi = enc_to_intel_hdmi(&encoder->base);
 
-		if (intel_hdmi->infoframe_enabled(&encoder->base))
+		if (intel_hdmi->infoframe_enabled(&encoder->base, pipe_config))
 			pipe_config->has_infoframe = true;
 		break;
 	case TRANS_DDI_MODE_SELECT_DVI:
@@ -3284,7 +3284,7 @@ void intel_ddi_init(struct drm_device *dev, enum port port)
 	encoder = &intel_encoder->base;
 
 	drm_encoder_init(dev, encoder, &intel_ddi_funcs,
-			 DRM_MODE_ENCODER_TMDS);
+			 DRM_MODE_ENCODER_TMDS, NULL);
 
 	intel_encoder->compute_config = intel_ddi_compute_config;
 	intel_encoder->enable = intel_enable_ddi;
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 9228ec018e98..bda6b9c82e66 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -44,6 +44,8 @@
 #include <drm/drm_plane_helper.h>
 #include <drm/drm_rect.h>
 #include <linux/dma_remapping.h>
+#include <linux/reservation.h>
+#include <linux/dma-buf.h>
 
 /* Primary plane formats for gen <= 3 */
 static const uint32_t i8xx_primary_formats[] = {
@@ -2130,7 +2132,7 @@ static void intel_enable_pipe(struct intel_crtc *crtc)
 	 * need the check.
 	 */
 	if (HAS_GMCH_DISPLAY(dev_priv->dev))
-		if (intel_pipe_has_type(crtc, INTEL_OUTPUT_DSI))
+		if (crtc->config->has_dsi_encoder)
 			assert_dsi_pll_enabled(dev_priv);
 		else
 			assert_pll_enabled(dev_priv, pipe);
@@ -3174,8 +3176,8 @@ intel_pipe_set_base_atomic(struct drm_crtc *crtc, struct drm_framebuffer *fb,
 	struct drm_device *dev = crtc->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 
-	if (dev_priv->fbc.disable_fbc)
-		dev_priv->fbc.disable_fbc(dev_priv);
+	if (dev_priv->fbc.deactivate)
+		dev_priv->fbc.deactivate(dev_priv);
 
 	dev_priv->display.update_primary_plane(crtc, fb, x, y);
 
@@ -4137,6 +4139,12 @@ static void ironlake_pch_enable(struct drm_crtc *crtc)
 	I915_WRITE(FDI_RX_TUSIZE1(pipe),
 		   I915_READ(PIPE_DATA_M1(pipe)) & TU_SIZE_MASK);
 
+	/*
+	 * Sometimes spurious CPU pipe underruns happen during FDI
+	 * training, at least with VGA+HDMI cloning. Suppress them.
+	 */
+	intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, false);
+
 	/* For PCH output, training FDI link */
 	dev_priv->display.fdi_link_train(crtc);
 
@@ -4170,6 +4178,8 @@ static void ironlake_pch_enable(struct drm_crtc *crtc)
 
 	intel_fdi_normal_train(crtc);
 
+	intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, true);
+
 	/* For PCH DP, enable TRANS_DP_CTL */
 	if (HAS_PCH_CPT(dev) && intel_crtc->config->has_dp_encoder) {
 		const struct drm_display_mode *adjusted_mode =
@@ -4628,7 +4638,7 @@ static void intel_crtc_load_lut(struct drm_crtc *crtc)
 		return;
 
 	if (HAS_GMCH_DISPLAY(dev_priv->dev)) {
-		if (intel_pipe_has_type(intel_crtc, INTEL_OUTPUT_DSI))
+		if (intel_crtc->config->has_dsi_encoder)
 			assert_dsi_pll_enabled(dev_priv);
 		else
 			assert_pll_enabled(dev_priv, pipe);
@@ -4784,7 +4794,6 @@ static void intel_post_plane_update(struct intel_crtc *crtc)
 {
 	struct intel_crtc_atomic_commit *atomic = &crtc->atomic;
 	struct drm_device *dev = crtc->base.dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
 
 	if (atomic->wait_vblank)
 		intel_wait_for_vblank(dev, crtc->pipe);
@@ -4798,7 +4807,7 @@ static void intel_post_plane_update(struct intel_crtc *crtc)
 		intel_update_watermarks(&crtc->base);
 
 	if (atomic->update_fbc)
-		intel_fbc_update(dev_priv);
+		intel_fbc_update(crtc);
 
 	if (atomic->post_enable_primary)
 		intel_post_enable_primary(&crtc->base);
@@ -4813,7 +4822,7 @@ static void intel_pre_plane_update(struct intel_crtc *crtc)
 	struct intel_crtc_atomic_commit *atomic = &crtc->atomic;
 
 	if (atomic->disable_fbc)
-		intel_fbc_disable_crtc(crtc);
+		intel_fbc_deactivate(crtc);
 
 	if (crtc->atomic.disable_ips)
 		hsw_disable_ips(crtc);
@@ -4921,6 +4930,8 @@ static void ironlake_crtc_enable(struct drm_crtc *crtc)
 	if (intel_crtc->config->has_pch_encoder)
 		intel_wait_for_vblank(dev, pipe);
 	intel_set_pch_fifo_underrun_reporting(dev_priv, pipe, true);
+
+	intel_fbc_enable(intel_crtc);
 }
 
 /* IPS only exists on ULT machines and is tied to pipe A. */
@@ -4938,7 +4949,6 @@ static void haswell_crtc_enable(struct drm_crtc *crtc)
 	int pipe = intel_crtc->pipe, hsw_workaround_pipe;
 	struct intel_crtc_state *pipe_config =
 		to_intel_crtc_state(crtc->state);
-	bool is_dsi = intel_pipe_has_type(intel_crtc, INTEL_OUTPUT_DSI);
 
 	if (WARN_ON(intel_crtc->active))
 		return;
@@ -4971,10 +4981,12 @@ static void haswell_crtc_enable(struct drm_crtc *crtc)
 
 	intel_crtc->active = true;
 
-	intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, true);
+	if (intel_crtc->config->has_pch_encoder)
+		intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, false);
+	else
+		intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, true);
+
 	for_each_encoder_on_crtc(dev, crtc, encoder) {
-		if (encoder->pre_pll_enable)
-			encoder->pre_pll_enable(encoder);
 		if (encoder->pre_enable)
 			encoder->pre_enable(encoder);
 	}
@@ -4982,7 +4994,7 @@ static void haswell_crtc_enable(struct drm_crtc *crtc)
 	if (intel_crtc->config->has_pch_encoder)
 		dev_priv->display.fdi_link_train(crtc);
 
-	if (!is_dsi)
+	if (!intel_crtc->config->has_dsi_encoder)
 		intel_ddi_enable_pipe_clock(intel_crtc);
 
 	if (INTEL_INFO(dev)->gen >= 9)
@@ -4997,7 +5009,7 @@ static void haswell_crtc_enable(struct drm_crtc *crtc)
 	intel_crtc_load_lut(crtc);
 
 	intel_ddi_set_pipe_settings(crtc);
-	if (!is_dsi)
+	if (!intel_crtc->config->has_dsi_encoder)
 		intel_ddi_enable_transcoder_func(crtc);
 
 	intel_update_watermarks(crtc);
@@ -5006,7 +5018,7 @@ static void haswell_crtc_enable(struct drm_crtc *crtc)
 	if (intel_crtc->config->has_pch_encoder)
 		lpt_pch_enable(crtc);
 
-	if (intel_crtc->config->dp_encoder_is_mst && !is_dsi)
+	if (intel_crtc->config->dp_encoder_is_mst)
 		intel_ddi_set_vc_payload_alloc(crtc, true);
 
 	assert_vblank_disabled(crtc);
@@ -5017,9 +5029,13 @@ static void haswell_crtc_enable(struct drm_crtc *crtc)
 		intel_opregion_notify_encoder(encoder, true);
 	}
 
-	if (intel_crtc->config->has_pch_encoder)
+	if (intel_crtc->config->has_pch_encoder) {
+		intel_wait_for_vblank(dev, pipe);
+		intel_wait_for_vblank(dev, pipe);
+		intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, true);
 		intel_set_pch_fifo_underrun_reporting(dev_priv, TRANSCODER_A,
 						      true);
+	}
 
 	/* If we change the relative order between pipe/planes enabling, we need
 	 * to change the workaround. */
@@ -5028,6 +5044,8 @@ static void haswell_crtc_enable(struct drm_crtc *crtc)
 		intel_wait_for_vblank(dev, hsw_workaround_pipe);
 		intel_wait_for_vblank(dev, hsw_workaround_pipe);
 	}
+
+	intel_fbc_enable(intel_crtc);
 }
 
 static void ironlake_pfit_disable(struct intel_crtc *crtc, bool force)
@@ -5062,12 +5080,22 @@ static void ironlake_crtc_disable(struct drm_crtc *crtc)
 	drm_crtc_vblank_off(crtc);
 	assert_vblank_disabled(crtc);
 
+	/*
+	 * Sometimes spurious CPU pipe underruns happen when the
+	 * pipe is already disabled, but FDI RX/TX is still enabled.
+	 * Happens at least with VGA+HDMI cloning. Suppress them.
+	 */
+	if (intel_crtc->config->has_pch_encoder)
+		intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, false);
+
 	intel_disable_pipe(intel_crtc);
 
 	ironlake_pfit_disable(intel_crtc, false);
 
-	if (intel_crtc->config->has_pch_encoder)
+	if (intel_crtc->config->has_pch_encoder) {
 		ironlake_fdi_disable(crtc);
+		intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, true);
+	}
 
 	for_each_encoder_on_crtc(dev, crtc, encoder)
 		if (encoder->post_disable)
@@ -5098,6 +5126,8 @@ static void ironlake_crtc_disable(struct drm_crtc *crtc)
 	}
 
 	intel_set_pch_fifo_underrun_reporting(dev_priv, pipe, true);
+
+	intel_fbc_disable_crtc(intel_crtc);
 }
 
 static void haswell_crtc_disable(struct drm_crtc *crtc)
@@ -5107,7 +5137,6 @@ static void haswell_crtc_disable(struct drm_crtc *crtc)
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 	struct intel_encoder *encoder;
 	enum transcoder cpu_transcoder = intel_crtc->config->cpu_transcoder;
-	bool is_dsi = intel_pipe_has_type(intel_crtc, INTEL_OUTPUT_DSI);
 
 	if (intel_crtc->config->has_pch_encoder)
 		intel_set_pch_fifo_underrun_reporting(dev_priv, TRANSCODER_A,
@@ -5126,7 +5155,7 @@ static void haswell_crtc_disable(struct drm_crtc *crtc)
 	if (intel_crtc->config->dp_encoder_is_mst)
 		intel_ddi_set_vc_payload_alloc(crtc, false);
 
-	if (!is_dsi)
+	if (!intel_crtc->config->has_dsi_encoder)
 		intel_ddi_disable_transcoder_func(dev_priv, cpu_transcoder);
 
 	if (INTEL_INFO(dev)->gen >= 9)
@@ -5134,7 +5163,7 @@ static void haswell_crtc_disable(struct drm_crtc *crtc)
 	else
 		ironlake_pfit_disable(intel_crtc, false);
 
-	if (!is_dsi)
+	if (!intel_crtc->config->has_dsi_encoder)
 		intel_ddi_disable_pipe_clock(intel_crtc);
 
 	if (intel_crtc->config->has_pch_encoder) {
@@ -5149,6 +5178,8 @@ static void haswell_crtc_disable(struct drm_crtc *crtc)
 	if (intel_crtc->config->has_pch_encoder)
 		intel_set_pch_fifo_underrun_reporting(dev_priv, TRANSCODER_A,
 						      true);
+
+	intel_fbc_disable_crtc(intel_crtc);
 }
 
 static void i9xx_pfit_enable(struct intel_crtc *crtc)
@@ -5214,10 +5245,6 @@ static enum intel_display_power_domain port_to_aux_power_domain(enum port port)
 	}
 }
 
-#define for_each_power_domain(domain, mask)				\
-	for ((domain) = 0; (domain) < POWER_DOMAIN_NUM; (domain)++)	\
-		if ((1 << (domain)) & (mask))
-
 enum intel_display_power_domain
 intel_display_port_power_domain(struct intel_encoder *intel_encoder)
 {
@@ -6140,13 +6167,10 @@ static void valleyview_crtc_enable(struct drm_crtc *crtc)
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 	struct intel_encoder *encoder;
 	int pipe = intel_crtc->pipe;
-	bool is_dsi;
 
 	if (WARN_ON(intel_crtc->active))
 		return;
 
-	is_dsi = intel_pipe_has_type(intel_crtc, INTEL_OUTPUT_DSI);
-
 	if (intel_crtc->config->has_dp_encoder)
 		intel_dp_set_m_n(intel_crtc, M1_N1);
 
@@ -6169,7 +6193,7 @@ static void valleyview_crtc_enable(struct drm_crtc *crtc)
 		if (encoder->pre_pll_enable)
 			encoder->pre_pll_enable(encoder);
 
-	if (!is_dsi) {
+	if (!intel_crtc->config->has_dsi_encoder) {
 		if (IS_CHERRYVIEW(dev)) {
 			chv_prepare_pll(intel_crtc, intel_crtc->config);
 			chv_enable_pll(intel_crtc, intel_crtc->config);
@@ -6248,6 +6272,8 @@ static void i9xx_crtc_enable(struct drm_crtc *crtc)
 
 	for_each_encoder_on_crtc(dev, crtc, encoder)
 		encoder->enable(encoder);
+
+	intel_fbc_enable(intel_crtc);
 }
 
 static void i9xx_pfit_disable(struct intel_crtc *crtc)
@@ -6295,7 +6321,7 @@ static void i9xx_crtc_disable(struct drm_crtc *crtc)
 		if (encoder->post_disable)
 			encoder->post_disable(encoder);
 
-	if (!intel_pipe_has_type(intel_crtc, INTEL_OUTPUT_DSI)) {
+	if (!intel_crtc->config->has_dsi_encoder) {
 		if (IS_CHERRYVIEW(dev))
 			chv_disable_pll(dev_priv, pipe);
 		else if (IS_VALLEYVIEW(dev))
@@ -6310,6 +6336,8 @@ static void i9xx_crtc_disable(struct drm_crtc *crtc)
 
 	if (!IS_GEN2(dev))
 		intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, false);
+
+	intel_fbc_disable_crtc(intel_crtc);
 }
 
 static void intel_crtc_disable_noatomic(struct drm_crtc *crtc)
@@ -7908,8 +7936,6 @@ static int i9xx_crtc_compute_clock(struct intel_crtc *crtc,
 	int refclk, num_connectors = 0;
 	intel_clock_t clock;
 	bool ok;
-	bool is_dsi = false;
-	struct intel_encoder *encoder;
 	const intel_limit_t *limit;
 	struct drm_atomic_state *state = crtc_state->base.state;
 	struct drm_connector *connector;
@@ -7919,26 +7945,14 @@ static int i9xx_crtc_compute_clock(struct intel_crtc *crtc,
 	memset(&crtc_state->dpll_hw_state, 0,
 	       sizeof(crtc_state->dpll_hw_state));
 
-	for_each_connector_in_state(state, connector, connector_state, i) {
-		if (connector_state->crtc != &crtc->base)
-			continue;
-
-		encoder = to_intel_encoder(connector_state->best_encoder);
-
-		switch (encoder->type) {
-		case INTEL_OUTPUT_DSI:
-			is_dsi = true;
-			break;
-		default:
-			break;
-		}
+	if (crtc_state->has_dsi_encoder)
+		return 0;
 
-		num_connectors++;
+	for_each_connector_in_state(state, connector, connector_state, i) {
+		if (connector_state->crtc == &crtc->base)
+			num_connectors++;
 	}
 
-	if (is_dsi)
-		return 0;
-
 	if (!crtc_state->clock_set) {
 		refclk = i9xx_get_refclk(crtc_state, num_connectors);
 
@@ -8931,7 +8945,7 @@ static int ironlake_crtc_compute_clock(struct intel_crtc *crtc,
 	memset(&crtc_state->dpll_hw_state, 0,
 	       sizeof(crtc_state->dpll_hw_state));
 
-	is_lvds = intel_pipe_has_type(crtc, INTEL_OUTPUT_LVDS);
+	is_lvds = intel_pipe_will_have_type(crtc_state, INTEL_OUTPUT_LVDS);
 
 	WARN(!(HAS_PCH_IBX(dev) || HAS_PCH_CPT(dev)),
 	     "Unexpected PCH type %d\n", INTEL_PCH_TYPE(dev));
@@ -9705,14 +9719,10 @@ static int broadwell_modeset_calc_cdclk(struct drm_atomic_state *state)
 	else
 		cdclk = 337500;
 
-	/*
-	 * FIXME move the cdclk caclulation to
-	 * compute_config() so we can fail gracegully.
-	 */
 	if (cdclk > dev_priv->max_cdclk_freq) {
-		DRM_ERROR("requested cdclk (%d kHz) exceeds max (%d kHz)\n",
-			  cdclk, dev_priv->max_cdclk_freq);
-		cdclk = dev_priv->max_cdclk_freq;
+		DRM_DEBUG_KMS("requested cdclk (%d kHz) exceeds max (%d kHz)\n",
+			      cdclk, dev_priv->max_cdclk_freq);
+		return -EINVAL;
 	}
 
 	to_intel_atomic_state(state)->cdclk = cdclk;
@@ -9807,6 +9817,7 @@ static void haswell_get_ddi_pll(struct drm_i915_private *dev_priv,
 		break;
 	case PORT_CLK_SEL_SPLL:
 		pipe_config->shared_dpll = DPLL_ID_SPLL;
+		break;
 	}
 }
 
@@ -11191,6 +11202,10 @@ static bool use_mmio_flip(struct intel_engine_cs *ring,
 		return true;
 	else if (i915.enable_execlists)
 		return true;
+	else if (obj->base.dma_buf &&
+		 !reservation_object_test_signaled_rcu(obj->base.dma_buf->resv,
+						       false))
+		return true;
 	else
 		return ring != i915_gem_request_get_ring(obj->last_write_req);
 }
@@ -11305,6 +11320,9 @@ static void intel_mmio_flip_work_func(struct work_struct *work)
 {
 	struct intel_mmio_flip *mmio_flip =
 		container_of(work, struct intel_mmio_flip, work);
+	struct intel_framebuffer *intel_fb =
+		to_intel_framebuffer(mmio_flip->crtc->base.primary->fb);
+	struct drm_i915_gem_object *obj = intel_fb->obj;
 
 	if (mmio_flip->req) {
 		WARN_ON(__i915_wait_request(mmio_flip->req,
@@ -11314,6 +11332,12 @@ static void intel_mmio_flip_work_func(struct work_struct *work)
 		i915_gem_request_unreference__unlocked(mmio_flip->req);
 	}
 
+	/* For framebuffer backed by dmabuf, wait for fence */
+	if (obj->base.dma_buf)
+		WARN_ON(reservation_object_wait_timeout_rcu(obj->base.dma_buf->resv,
+							    false, false,
+							    MAX_SCHEDULE_TIMEOUT) < 0);
+
 	intel_do_mmio_flip(mmio_flip);
 	kfree(mmio_flip);
 }
@@ -11584,7 +11608,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
 			  to_intel_plane(primary)->frontbuffer_bit);
 	mutex_unlock(&dev->struct_mutex);
 
-	intel_fbc_disable_crtc(intel_crtc);
+	intel_fbc_deactivate(intel_crtc);
 	intel_frontbuffer_flip_prepare(dev,
 				       to_intel_plane(primary)->frontbuffer_bit);
 
@@ -12582,12 +12606,13 @@ intel_pipe_config_compare(struct drm_device *dev,
 	if (INTEL_INFO(dev)->gen < 8) {
 		PIPE_CONF_CHECK_M_N(dp_m_n);
 
-		PIPE_CONF_CHECK_I(has_drrs);
 		if (current_config->has_drrs)
 			PIPE_CONF_CHECK_M_N(dp_m2_n2);
 	} else
 		PIPE_CONF_CHECK_M_N_ALT(dp_m_n, dp_m2_n2);
 
+	PIPE_CONF_CHECK_I(has_dsi_encoder);
+
 	PIPE_CONF_CHECK_I(base.adjusted_mode.crtc_hdisplay);
 	PIPE_CONF_CHECK_I(base.adjusted_mode.crtc_htotal);
 	PIPE_CONF_CHECK_I(base.adjusted_mode.crtc_hblank_start);
@@ -13384,6 +13409,13 @@ static int intel_atomic_commit(struct drm_device *dev,
 			dev_priv->display.crtc_disable(crtc);
 			intel_crtc->active = false;
 			intel_disable_shared_dpll(intel_crtc);
+
+			/*
+			 * Underruns don't always raise
+			 * interrupts, so check manually.
+			 */
+			intel_check_cpu_fifo_underruns(dev_priv);
+			intel_check_pch_fifo_underruns(dev_priv);
 		}
 	}
 
@@ -13653,6 +13685,17 @@ intel_prepare_plane_fb(struct drm_plane *plane,
 			return ret;
 	}
 
+	/* For framebuffer backed by dmabuf, wait for fence */
+	if (obj && obj->base.dma_buf) {
+		ret = reservation_object_wait_timeout_rcu(obj->base.dma_buf->resv,
+							  false, true,
+							  MAX_SCHEDULE_TIMEOUT);
+		if (ret == -ERESTARTSYS)
+			return ret;
+
+		WARN_ON(ret < 0);
+	}
+
 	if (!obj) {
 		ret = 0;
 	} else if (plane->type == DRM_PLANE_TYPE_CURSOR &&
@@ -13905,7 +13948,7 @@ static struct drm_plane *intel_primary_plane_create(struct drm_device *dev,
 	drm_universal_plane_init(dev, &primary->base, 0,
 				 &intel_plane_funcs,
 				 intel_primary_formats, num_formats,
-				 DRM_PLANE_TYPE_PRIMARY);
+				 DRM_PLANE_TYPE_PRIMARY, NULL);
 
 	if (INTEL_INFO(dev)->gen >= 4)
 		intel_create_rotation_property(dev, primary);
@@ -14044,7 +14087,7 @@ static struct drm_plane *intel_cursor_plane_create(struct drm_device *dev,
 				 &intel_plane_funcs,
 				 intel_cursor_formats,
 				 ARRAY_SIZE(intel_cursor_formats),
-				 DRM_PLANE_TYPE_CURSOR);
+				 DRM_PLANE_TYPE_CURSOR, NULL);
 
 	if (INTEL_INFO(dev)->gen >= 4) {
 		if (!dev->mode_config.rotation_property)
@@ -14121,7 +14164,7 @@ static void intel_crtc_init(struct drm_device *dev, int pipe)
 		goto fail;
 
 	ret = drm_crtc_init_with_planes(dev, &intel_crtc->base, primary,
-					cursor, &intel_crtc_funcs);
+					cursor, &intel_crtc_funcs, NULL);
 	if (ret)
 		goto fail;
 
@@ -14247,7 +14290,14 @@ static bool intel_crt_present(struct drm_device *dev)
 	if (IS_CHERRYVIEW(dev))
 		return false;
 
-	if (IS_VALLEYVIEW(dev) && !dev_priv->vbt.int_crt_support)
+	if (HAS_PCH_LPT_H(dev) && I915_READ(SFUSE_STRAP) & SFUSE_STRAP_CRT_DISABLED)
+		return false;
+
+	/* DDI E can't be used if DDI A requires 4 lanes */
+	if (HAS_DDI(dev) && I915_READ(DDI_BUF_CTL(PORT_A)) & DDI_A_4_LANES)
+		return false;
+
+	if (!dev_priv->vbt.int_crt_support)
 		return false;
 
 	return true;
@@ -14790,9 +14840,6 @@ static void intel_init_display(struct drm_device *dev)
 	else if (IS_I945GM(dev) || IS_845G(dev))
 		dev_priv->display.get_display_clock_speed =
 			i9xx_misc_get_display_clock_speed;
-	else if (IS_PINEVIEW(dev))
-		dev_priv->display.get_display_clock_speed =
-			pnv_get_display_clock_speed;
 	else if (IS_I915GM(dev))
 		dev_priv->display.get_display_clock_speed =
 			i915gm_get_display_clock_speed;
diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
index bec443a629da..0f0573aa1b0d 100644
--- a/drivers/gpu/drm/i915/intel_dp.c
+++ b/drivers/gpu/drm/i915/intel_dp.c
@@ -681,7 +681,7 @@ static uint32_t i9xx_get_aux_clock_divider(struct intel_dp *intel_dp, int index)
 	 * The clock divider is based off the hrawclk, and would like to run at
 	 * 2MHz.  So, take the hrawclk value and divide by 2 and use that
 	 */
-	return index ? 0 : intel_hrawclk(dev) / 2;
+	return index ? 0 : DIV_ROUND_CLOSEST(intel_hrawclk(dev), 2);
 }
 
 static uint32_t ilk_get_aux_clock_divider(struct intel_dp *intel_dp, int index)
@@ -694,10 +694,10 @@ static uint32_t ilk_get_aux_clock_divider(struct intel_dp *intel_dp, int index)
 		return 0;
 
 	if (intel_dig_port->port == PORT_A) {
-		return DIV_ROUND_UP(dev_priv->cdclk_freq, 2000);
+		return DIV_ROUND_CLOSEST(dev_priv->cdclk_freq, 2000);
 
 	} else {
-		return DIV_ROUND_UP(intel_pch_rawclk(dev), 2);
+		return DIV_ROUND_CLOSEST(intel_pch_rawclk(dev), 2);
 	}
 }
 
@@ -711,7 +711,7 @@ static uint32_t hsw_get_aux_clock_divider(struct intel_dp *intel_dp, int index)
 		if (index)
 			return 0;
 		return DIV_ROUND_CLOSEST(dev_priv->cdclk_freq, 2000);
-	} else if (dev_priv->pch_id == INTEL_PCH_LPT_DEVICE_ID_TYPE) {
+	} else if (HAS_PCH_LPT_H(dev_priv)) {
 		/* Workaround for non-ULT HSW */
 		switch (index) {
 		case 0: return 63;
@@ -719,7 +719,7 @@ static uint32_t hsw_get_aux_clock_divider(struct intel_dp *intel_dp, int index)
 		default: return 0;
 		}
 	} else  {
-		return index ? 0 : DIV_ROUND_UP(intel_pch_rawclk(dev), 2);
+		return index ? 0 : DIV_ROUND_CLOSEST(intel_pch_rawclk(dev), 2);
 	}
 }
 
@@ -2697,6 +2697,15 @@ static void intel_enable_dp(struct intel_encoder *encoder)
 	if (IS_VALLEYVIEW(dev))
 		vlv_init_panel_power_sequencer(intel_dp);
 
+	/*
+	 * We get an occasional spurious underrun between the port
+	 * enable and vdd enable, when enabling port A eDP.
+	 *
+	 * FIXME: Not sure if this applies to (PCH) port D eDP as well
+	 */
+	if (port == PORT_A)
+		intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, false);
+
 	intel_dp_enable_port(intel_dp);
 
 	if (port == PORT_A && IS_GEN5(dev_priv)) {
@@ -2714,6 +2723,9 @@ static void intel_enable_dp(struct intel_encoder *encoder)
 	edp_panel_on(intel_dp);
 	edp_panel_vdd_off(intel_dp, true);
 
+	if (port == PORT_A)
+		intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, true);
+
 	pps_unlock(intel_dp);
 
 	if (IS_VALLEYVIEW(dev)) {
@@ -4962,7 +4974,8 @@ intel_dp_hpd_pulse(struct intel_digital_port *intel_dig_port, bool long_hpd)
 	enum intel_display_power_domain power_domain;
 	enum irqreturn ret = IRQ_NONE;
 
-	if (intel_dig_port->base.type != INTEL_OUTPUT_EDP)
+	if (intel_dig_port->base.type != INTEL_OUTPUT_EDP &&
+	    intel_dig_port->base.type != INTEL_OUTPUT_HDMI)
 		intel_dig_port->base.type = INTEL_OUTPUT_DISPLAYPORT;
 
 	if (long_hpd && intel_dig_port->base.type == INTEL_OUTPUT_EDP) {
@@ -5976,7 +5989,7 @@ intel_dp_init(struct drm_device *dev,
 	encoder = &intel_encoder->base;
 
 	drm_encoder_init(dev, &intel_encoder->base, &intel_dp_enc_funcs,
-			 DRM_MODE_ENCODER_TMDS);
+			 DRM_MODE_ENCODER_TMDS, NULL);
 
 	intel_encoder->compute_config = intel_dp_compute_config;
 	intel_encoder->disable = intel_disable_dp;
diff --git a/drivers/gpu/drm/i915/intel_dp_mst.c b/drivers/gpu/drm/i915/intel_dp_mst.c
index 8c4e7dfe304c..e8d369d0a713 100644
--- a/drivers/gpu/drm/i915/intel_dp_mst.c
+++ b/drivers/gpu/drm/i915/intel_dp_mst.c
@@ -536,7 +536,7 @@ intel_dp_create_fake_mst_encoder(struct intel_digital_port *intel_dig_port, enum
 	intel_mst->primary = intel_dig_port;
 
 	drm_encoder_init(dev, &intel_encoder->base, &intel_dp_mst_enc_funcs,
-			 DRM_MODE_ENCODER_DPMST);
+			 DRM_MODE_ENCODER_DPMST, NULL);
 
 	intel_encoder->type = INTEL_OUTPUT_DP_MST;
 	intel_encoder->crtc_mask = 0x7;
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index ab5c147fa9e9..50f83d220249 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -393,6 +393,9 @@ struct intel_crtc_state {
 	 * accordingly. */
 	bool has_dp_encoder;
 
+	/* DSI has special cases */
+	bool has_dsi_encoder;
+
 	/* Whether we should send NULL infoframes. Required for audio. */
 	bool has_hdmi_sink;
 
@@ -710,7 +713,8 @@ struct intel_hdmi {
 	void (*set_infoframes)(struct drm_encoder *encoder,
 			       bool enable,
 			       const struct drm_display_mode *adjusted_mode);
-	bool (*infoframe_enabled)(struct drm_encoder *encoder);
+	bool (*infoframe_enabled)(struct drm_encoder *encoder,
+				  const struct intel_crtc_state *pipe_config);
 };
 
 struct intel_dp_mst_encoder;
@@ -1316,9 +1320,11 @@ static inline void intel_fbdev_restore_mode(struct drm_device *dev)
 #endif
 
 /* intel_fbc.c */
-bool intel_fbc_enabled(struct drm_i915_private *dev_priv);
-void intel_fbc_update(struct drm_i915_private *dev_priv);
+bool intel_fbc_is_active(struct drm_i915_private *dev_priv);
+void intel_fbc_deactivate(struct intel_crtc *crtc);
+void intel_fbc_update(struct intel_crtc *crtc);
 void intel_fbc_init(struct drm_i915_private *dev_priv);
+void intel_fbc_enable(struct intel_crtc *crtc);
 void intel_fbc_disable(struct drm_i915_private *dev_priv);
 void intel_fbc_disable_crtc(struct intel_crtc *crtc);
 void intel_fbc_invalidate(struct drm_i915_private *dev_priv,
@@ -1410,6 +1416,8 @@ void intel_power_domains_suspend(struct drm_i915_private *dev_priv);
 void skl_pw1_misc_io_init(struct drm_i915_private *dev_priv);
 void skl_pw1_misc_io_fini(struct drm_i915_private *dev_priv);
 void intel_runtime_pm_enable(struct drm_i915_private *dev_priv);
+const char *
+intel_display_power_domain_str(enum intel_display_power_domain domain);
 
 bool intel_display_power_is_enabled(struct drm_i915_private *dev_priv,
 				    enum intel_display_power_domain domain);
diff --git a/drivers/gpu/drm/i915/intel_dsi.c b/drivers/gpu/drm/i915/intel_dsi.c
index efb5a27dd49c..fff9a66c32a1 100644
--- a/drivers/gpu/drm/i915/intel_dsi.c
+++ b/drivers/gpu/drm/i915/intel_dsi.c
@@ -266,16 +266,18 @@ static inline bool is_cmd_mode(struct intel_dsi *intel_dsi)
 }
 
 static bool intel_dsi_compute_config(struct intel_encoder *encoder,
-				     struct intel_crtc_state *config)
+				     struct intel_crtc_state *pipe_config)
 {
 	struct intel_dsi *intel_dsi = container_of(encoder, struct intel_dsi,
 						   base);
 	struct intel_connector *intel_connector = intel_dsi->attached_connector;
 	struct drm_display_mode *fixed_mode = intel_connector->panel.fixed_mode;
-	struct drm_display_mode *adjusted_mode = &config->base.adjusted_mode;
+	struct drm_display_mode *adjusted_mode = &pipe_config->base.adjusted_mode;
 
 	DRM_DEBUG_KMS("\n");
 
+	pipe_config->has_dsi_encoder = true;
+
 	if (fixed_mode)
 		intel_fixed_panel_mode(fixed_mode, adjusted_mode);
 
@@ -462,6 +464,8 @@ static void intel_dsi_enable(struct intel_encoder *encoder)
 	intel_panel_enable_backlight(intel_dsi->attached_connector);
 }
 
+static void intel_dsi_prepare(struct intel_encoder *intel_encoder);
+
 static void intel_dsi_pre_enable(struct intel_encoder *encoder)
 {
 	struct drm_device *dev = encoder->base.dev;
@@ -474,6 +478,9 @@ static void intel_dsi_pre_enable(struct intel_encoder *encoder)
 
 	DRM_DEBUG_KMS("\n");
 
+	intel_dsi_prepare(encoder);
+	intel_enable_dsi_pll(encoder);
+
 	/* Panel Enable over CRC PMIC */
 	if (intel_dsi->gpio_panel)
 		gpiod_set_value_cansleep(intel_dsi->gpio_panel, 1);
@@ -699,6 +706,8 @@ static void intel_dsi_get_config(struct intel_encoder *encoder,
 	u32 pclk = 0;
 	DRM_DEBUG_KMS("\n");
 
+	pipe_config->has_dsi_encoder = true;
+
 	/*
 	 * DPLL_MD is not used in case of DSI, reading will get some default value
 	 * set dpll_md = 0
@@ -1026,15 +1035,6 @@ static void intel_dsi_prepare(struct intel_encoder *intel_encoder)
 	}
 }
 
-static void intel_dsi_pre_pll_enable(struct intel_encoder *encoder)
-{
-	DRM_DEBUG_KMS("\n");
-
-	intel_dsi_prepare(encoder);
-	intel_enable_dsi_pll(encoder);
-
-}
-
 static enum drm_connector_status
 intel_dsi_detect(struct drm_connector *connector, bool force)
 {
@@ -1152,11 +1152,10 @@ void intel_dsi_init(struct drm_device *dev)
 
 	connector = &intel_connector->base;
 
-	drm_encoder_init(dev, encoder, &intel_dsi_funcs, DRM_MODE_ENCODER_DSI);
+	drm_encoder_init(dev, encoder, &intel_dsi_funcs, DRM_MODE_ENCODER_DSI,
+			 NULL);
 
-	/* XXX: very likely not all of these are needed */
 	intel_encoder->compute_config = intel_dsi_compute_config;
-	intel_encoder->pre_pll_enable = intel_dsi_pre_pll_enable;
 	intel_encoder->pre_enable = intel_dsi_pre_enable;
 	intel_encoder->enable = intel_dsi_enable_nop;
 	intel_encoder->disable = intel_dsi_pre_disable;
diff --git a/drivers/gpu/drm/i915/intel_dvo.c b/drivers/gpu/drm/i915/intel_dvo.c
index 7161deb2aed8..286baec979c8 100644
--- a/drivers/gpu/drm/i915/intel_dvo.c
+++ b/drivers/gpu/drm/i915/intel_dvo.c
@@ -429,7 +429,7 @@ void intel_dvo_init(struct drm_device *dev)
 
 	intel_encoder = &intel_dvo->base;
 	drm_encoder_init(dev, &intel_encoder->base,
-			 &intel_dvo_enc_funcs, encoder_type);
+			 &intel_dvo_enc_funcs, encoder_type, NULL);
 
 	intel_encoder->disable = intel_disable_dvo;
 	intel_encoder->enable = intel_enable_dvo;
diff --git a/drivers/gpu/drm/i915/intel_fbc.c b/drivers/gpu/drm/i915/intel_fbc.c
index 11fc5281e8ef..a1988a486b92 100644
--- a/drivers/gpu/drm/i915/intel_fbc.c
+++ b/drivers/gpu/drm/i915/intel_fbc.c
@@ -43,7 +43,7 @@
 
 static inline bool fbc_supported(struct drm_i915_private *dev_priv)
 {
-	return dev_priv->fbc.enable_fbc != NULL;
+	return dev_priv->fbc.activate != NULL;
 }
 
 static inline bool fbc_on_pipe_a_only(struct drm_i915_private *dev_priv)
@@ -51,6 +51,11 @@ static inline bool fbc_on_pipe_a_only(struct drm_i915_private *dev_priv)
 	return IS_HASWELL(dev_priv) || INTEL_INFO(dev_priv)->gen >= 8;
 }
 
+static inline bool fbc_on_plane_a_only(struct drm_i915_private *dev_priv)
+{
+	return INTEL_INFO(dev_priv)->gen < 4;
+}
+
 /*
  * In some platforms where the CRTC's x:0/y:0 coordinates doesn't match the
  * frontbuffer's x:0/y:0 coordinates we lie to the hardware about the plane's
@@ -64,11 +69,51 @@ static unsigned int get_crtc_fence_y_offset(struct intel_crtc *crtc)
 	return crtc->base.y - crtc->adjusted_y;
 }
 
-static void i8xx_fbc_disable(struct drm_i915_private *dev_priv)
+/*
+ * For SKL+, the plane source size used by the hardware is based on the value we
+ * write to the PLANE_SIZE register. For BDW-, the hardware looks at the value
+ * we wrote to PIPESRC.
+ */
+static void intel_fbc_get_plane_source_size(struct intel_crtc *crtc,
+					    int *width, int *height)
+{
+	struct intel_plane_state *plane_state =
+			to_intel_plane_state(crtc->base.primary->state);
+	int w, h;
+
+	if (intel_rotation_90_or_270(plane_state->base.rotation)) {
+		w = drm_rect_height(&plane_state->src) >> 16;
+		h = drm_rect_width(&plane_state->src) >> 16;
+	} else {
+		w = drm_rect_width(&plane_state->src) >> 16;
+		h = drm_rect_height(&plane_state->src) >> 16;
+	}
+
+	if (width)
+		*width = w;
+	if (height)
+		*height = h;
+}
+
+static int intel_fbc_calculate_cfb_size(struct intel_crtc *crtc,
+					struct drm_framebuffer *fb)
+{
+	struct drm_i915_private *dev_priv = crtc->base.dev->dev_private;
+	int lines;
+
+	intel_fbc_get_plane_source_size(crtc, NULL, &lines);
+	if (INTEL_INFO(dev_priv)->gen >= 7)
+		lines = min(lines, 2048);
+
+	/* Hardware needs the full buffer stride, not just the active area. */
+	return lines * fb->pitches[0];
+}
+
+static void i8xx_fbc_deactivate(struct drm_i915_private *dev_priv)
 {
 	u32 fbc_ctl;
 
-	dev_priv->fbc.enabled = false;
+	dev_priv->fbc.active = false;
 
 	/* Disable compression */
 	fbc_ctl = I915_READ(FBC_CONTROL);
@@ -83,11 +128,9 @@ static void i8xx_fbc_disable(struct drm_i915_private *dev_priv)
 		DRM_DEBUG_KMS("FBC idle timed out\n");
 		return;
 	}
-
-	DRM_DEBUG_KMS("disabled FBC\n");
 }
 
-static void i8xx_fbc_enable(struct intel_crtc *crtc)
+static void i8xx_fbc_activate(struct intel_crtc *crtc)
 {
 	struct drm_i915_private *dev_priv = crtc->base.dev->dev_private;
 	struct drm_framebuffer *fb = crtc->base.primary->fb;
@@ -96,10 +139,10 @@ static void i8xx_fbc_enable(struct intel_crtc *crtc)
 	int i;
 	u32 fbc_ctl;
 
-	dev_priv->fbc.enabled = true;
+	dev_priv->fbc.active = true;
 
 	/* Note: fbc.threshold == 1 for i8xx */
-	cfb_pitch = dev_priv->fbc.uncompressed_size / FBC_LL_SIZE;
+	cfb_pitch = intel_fbc_calculate_cfb_size(crtc, fb) / FBC_LL_SIZE;
 	if (fb->pitches[0] < cfb_pitch)
 		cfb_pitch = fb->pitches[0];
 
@@ -132,24 +175,21 @@ static void i8xx_fbc_enable(struct intel_crtc *crtc)
 	fbc_ctl |= (cfb_pitch & 0xff) << FBC_CTL_STRIDE_SHIFT;
 	fbc_ctl |= obj->fence_reg;
 	I915_WRITE(FBC_CONTROL, fbc_ctl);
-
-	DRM_DEBUG_KMS("enabled FBC, pitch %d, yoff %d, plane %c\n",
-		      cfb_pitch, crtc->base.y, plane_name(crtc->plane));
 }
 
-static bool i8xx_fbc_enabled(struct drm_i915_private *dev_priv)
+static bool i8xx_fbc_is_active(struct drm_i915_private *dev_priv)
 {
 	return I915_READ(FBC_CONTROL) & FBC_CTL_EN;
 }
 
-static void g4x_fbc_enable(struct intel_crtc *crtc)
+static void g4x_fbc_activate(struct intel_crtc *crtc)
 {
 	struct drm_i915_private *dev_priv = crtc->base.dev->dev_private;
 	struct drm_framebuffer *fb = crtc->base.primary->fb;
 	struct drm_i915_gem_object *obj = intel_fb_obj(fb);
 	u32 dpfc_ctl;
 
-	dev_priv->fbc.enabled = true;
+	dev_priv->fbc.active = true;
 
 	dpfc_ctl = DPFC_CTL_PLANE(crtc->plane) | DPFC_SR_EN;
 	if (drm_format_plane_cpp(fb->pixel_format, 0) == 2)
@@ -162,27 +202,23 @@ static void g4x_fbc_enable(struct intel_crtc *crtc)
 
 	/* enable it... */
 	I915_WRITE(DPFC_CONTROL, dpfc_ctl | DPFC_CTL_EN);
-
-	DRM_DEBUG_KMS("enabled fbc on plane %c\n", plane_name(crtc->plane));
 }
 
-static void g4x_fbc_disable(struct drm_i915_private *dev_priv)
+static void g4x_fbc_deactivate(struct drm_i915_private *dev_priv)
 {
 	u32 dpfc_ctl;
 
-	dev_priv->fbc.enabled = false;
+	dev_priv->fbc.active = false;
 
 	/* Disable compression */
 	dpfc_ctl = I915_READ(DPFC_CONTROL);
 	if (dpfc_ctl & DPFC_CTL_EN) {
 		dpfc_ctl &= ~DPFC_CTL_EN;
 		I915_WRITE(DPFC_CONTROL, dpfc_ctl);
-
-		DRM_DEBUG_KMS("disabled FBC\n");
 	}
 }
 
-static bool g4x_fbc_enabled(struct drm_i915_private *dev_priv)
+static bool g4x_fbc_is_active(struct drm_i915_private *dev_priv)
 {
 	return I915_READ(DPFC_CONTROL) & DPFC_CTL_EN;
 }
@@ -194,7 +230,7 @@ static void intel_fbc_recompress(struct drm_i915_private *dev_priv)
 	POSTING_READ(MSG_FBC_REND_STATE);
 }
 
-static void ilk_fbc_enable(struct intel_crtc *crtc)
+static void ilk_fbc_activate(struct intel_crtc *crtc)
 {
 	struct drm_i915_private *dev_priv = crtc->base.dev->dev_private;
 	struct drm_framebuffer *fb = crtc->base.primary->fb;
@@ -203,7 +239,7 @@ static void ilk_fbc_enable(struct intel_crtc *crtc)
 	int threshold = dev_priv->fbc.threshold;
 	unsigned int y_offset;
 
-	dev_priv->fbc.enabled = true;
+	dev_priv->fbc.active = true;
 
 	dpfc_ctl = DPFC_CTL_PLANE(crtc->plane);
 	if (drm_format_plane_cpp(fb->pixel_format, 0) == 2)
@@ -238,32 +274,28 @@ static void ilk_fbc_enable(struct intel_crtc *crtc)
 	}
 
 	intel_fbc_recompress(dev_priv);
-
-	DRM_DEBUG_KMS("enabled fbc on plane %c\n", plane_name(crtc->plane));
 }
 
-static void ilk_fbc_disable(struct drm_i915_private *dev_priv)
+static void ilk_fbc_deactivate(struct drm_i915_private *dev_priv)
 {
 	u32 dpfc_ctl;
 
-	dev_priv->fbc.enabled = false;
+	dev_priv->fbc.active = false;
 
 	/* Disable compression */
 	dpfc_ctl = I915_READ(ILK_DPFC_CONTROL);
 	if (dpfc_ctl & DPFC_CTL_EN) {
 		dpfc_ctl &= ~DPFC_CTL_EN;
 		I915_WRITE(ILK_DPFC_CONTROL, dpfc_ctl);
-
-		DRM_DEBUG_KMS("disabled FBC\n");
 	}
 }
 
-static bool ilk_fbc_enabled(struct drm_i915_private *dev_priv)
+static bool ilk_fbc_is_active(struct drm_i915_private *dev_priv)
 {
 	return I915_READ(ILK_DPFC_CONTROL) & DPFC_CTL_EN;
 }
 
-static void gen7_fbc_enable(struct intel_crtc *crtc)
+static void gen7_fbc_activate(struct intel_crtc *crtc)
 {
 	struct drm_i915_private *dev_priv = crtc->base.dev->dev_private;
 	struct drm_framebuffer *fb = crtc->base.primary->fb;
@@ -271,7 +303,7 @@ static void gen7_fbc_enable(struct intel_crtc *crtc)
 	u32 dpfc_ctl;
 	int threshold = dev_priv->fbc.threshold;
 
-	dev_priv->fbc.enabled = true;
+	dev_priv->fbc.active = true;
 
 	dpfc_ctl = 0;
 	if (IS_IVYBRIDGE(dev_priv))
@@ -317,153 +349,119 @@ static void gen7_fbc_enable(struct intel_crtc *crtc)
 	I915_WRITE(DPFC_CPU_FENCE_OFFSET, get_crtc_fence_y_offset(crtc));
 
 	intel_fbc_recompress(dev_priv);
-
-	DRM_DEBUG_KMS("enabled fbc on plane %c\n", plane_name(crtc->plane));
 }
 
 /**
- * intel_fbc_enabled - Is FBC enabled?
+ * intel_fbc_is_active - Is FBC active?
  * @dev_priv: i915 device instance
  *
  * This function is used to verify the current state of FBC.
  * FIXME: This should be tracked in the plane config eventually
  *        instead of queried at runtime for most callers.
  */
-bool intel_fbc_enabled(struct drm_i915_private *dev_priv)
+bool intel_fbc_is_active(struct drm_i915_private *dev_priv)
 {
-	return dev_priv->fbc.enabled;
+	return dev_priv->fbc.active;
 }
 
-static void intel_fbc_enable(struct intel_crtc *crtc,
-			     const struct drm_framebuffer *fb)
+static void intel_fbc_activate(const struct drm_framebuffer *fb)
 {
-	struct drm_i915_private *dev_priv = crtc->base.dev->dev_private;
+	struct drm_i915_private *dev_priv = fb->dev->dev_private;
+	struct intel_crtc *crtc = dev_priv->fbc.crtc;
 
-	dev_priv->fbc.enable_fbc(crtc);
+	dev_priv->fbc.activate(crtc);
 
-	dev_priv->fbc.crtc = crtc;
 	dev_priv->fbc.fb_id = fb->base.id;
 	dev_priv->fbc.y = crtc->base.y;
 }
 
 static void intel_fbc_work_fn(struct work_struct *__work)
 {
-	struct intel_fbc_work *work =
-		container_of(to_delayed_work(__work),
-			     struct intel_fbc_work, work);
-	struct drm_i915_private *dev_priv = work->crtc->base.dev->dev_private;
-	struct drm_framebuffer *crtc_fb = work->crtc->base.primary->fb;
+	struct drm_i915_private *dev_priv =
+		container_of(__work, struct drm_i915_private, fbc.work.work);
+	struct intel_fbc_work *work = &dev_priv->fbc.work;
+	struct intel_crtc *crtc = dev_priv->fbc.crtc;
+	int delay_ms = 50;
+
+retry:
+	/* Delay the actual enabling to let pageflipping cease and the
+	 * display to settle before starting the compression. Note that
+	 * this delay also serves a second purpose: it allows for a
+	 * vblank to pass after disabling the FBC before we attempt
+	 * to modify the control registers.
+	 *
+	 * A more complicated solution would involve tracking vblanks
+	 * following the termination of the page-flipping sequence
+	 * and indeed performing the enable as a co-routine and not
+	 * waiting synchronously upon the vblank.
+	 *
+	 * WaFbcWaitForVBlankBeforeEnable:ilk,snb
+	 */
+	wait_remaining_ms_from_jiffies(work->enable_jiffies, delay_ms);
 
 	mutex_lock(&dev_priv->fbc.lock);
-	if (work == dev_priv->fbc.fbc_work) {
-		/* Double check that we haven't switched fb without cancelling
-		 * the prior work.
-		 */
-		if (crtc_fb == work->fb)
-			intel_fbc_enable(work->crtc, work->fb);
 
-		dev_priv->fbc.fbc_work = NULL;
+	/* Were we cancelled? */
+	if (!work->scheduled)
+		goto out;
+
+	/* Were we delayed again while this function was sleeping? */
+	if (time_after(work->enable_jiffies + msecs_to_jiffies(delay_ms),
+		       jiffies)) {
+		mutex_unlock(&dev_priv->fbc.lock);
+		goto retry;
 	}
-	mutex_unlock(&dev_priv->fbc.lock);
 
-	kfree(work);
+	if (crtc->base.primary->fb == work->fb)
+		intel_fbc_activate(work->fb);
+
+	work->scheduled = false;
+
+out:
+	mutex_unlock(&dev_priv->fbc.lock);
 }
 
 static void intel_fbc_cancel_work(struct drm_i915_private *dev_priv)
 {
 	WARN_ON(!mutex_is_locked(&dev_priv->fbc.lock));
-
-	if (dev_priv->fbc.fbc_work == NULL)
-		return;
-
-	/* Synchronisation is provided by struct_mutex and checking of
-	 * dev_priv->fbc.fbc_work, so we can perform the cancellation
-	 * entirely asynchronously.
-	 */
-	if (cancel_delayed_work(&dev_priv->fbc.fbc_work->work))
-		/* tasklet was killed before being run, clean up */
-		kfree(dev_priv->fbc.fbc_work);
-
-	/* Mark the work as no longer wanted so that if it does
-	 * wake-up (because the work was already running and waiting
-	 * for our mutex), it will discover that is no longer
-	 * necessary to run.
-	 */
-	dev_priv->fbc.fbc_work = NULL;
+	dev_priv->fbc.work.scheduled = false;
 }
 
-static void intel_fbc_schedule_enable(struct intel_crtc *crtc)
+static void intel_fbc_schedule_activation(struct intel_crtc *crtc)
 {
-	struct intel_fbc_work *work;
 	struct drm_i915_private *dev_priv = crtc->base.dev->dev_private;
+	struct intel_fbc_work *work = &dev_priv->fbc.work;
 
 	WARN_ON(!mutex_is_locked(&dev_priv->fbc.lock));
 
-	intel_fbc_cancel_work(dev_priv);
-
-	work = kzalloc(sizeof(*work), GFP_KERNEL);
-	if (work == NULL) {
-		DRM_ERROR("Failed to allocate FBC work structure\n");
-		intel_fbc_enable(crtc, crtc->base.primary->fb);
-		return;
-	}
-
-	work->crtc = crtc;
+	/* It is useless to call intel_fbc_cancel_work() in this function since
+	 * we're not releasing fbc.lock, so it won't have an opportunity to grab
+	 * it to discover that it was cancelled. So we just update the expected
+	 * jiffy count. */
 	work->fb = crtc->base.primary->fb;
-	INIT_DELAYED_WORK(&work->work, intel_fbc_work_fn);
-
-	dev_priv->fbc.fbc_work = work;
+	work->scheduled = true;
+	work->enable_jiffies = jiffies;
 
-	/* Delay the actual enabling to let pageflipping cease and the
-	 * display to settle before starting the compression. Note that
-	 * this delay also serves a second purpose: it allows for a
-	 * vblank to pass after disabling the FBC before we attempt
-	 * to modify the control registers.
-	 *
-	 * A more complicated solution would involve tracking vblanks
-	 * following the termination of the page-flipping sequence
-	 * and indeed performing the enable as a co-routine and not
-	 * waiting synchronously upon the vblank.
-	 *
-	 * WaFbcWaitForVBlankBeforeEnable:ilk,snb
-	 */
-	schedule_delayed_work(&work->work, msecs_to_jiffies(50));
+	schedule_work(&work->work);
 }
 
-static void __intel_fbc_disable(struct drm_i915_private *dev_priv)
+static void __intel_fbc_deactivate(struct drm_i915_private *dev_priv)
 {
 	WARN_ON(!mutex_is_locked(&dev_priv->fbc.lock));
 
 	intel_fbc_cancel_work(dev_priv);
 
-	if (dev_priv->fbc.enabled)
-		dev_priv->fbc.disable_fbc(dev_priv);
-	dev_priv->fbc.crtc = NULL;
-}
-
-/**
- * intel_fbc_disable - disable FBC
- * @dev_priv: i915 device instance
- *
- * This function disables FBC.
- */
-void intel_fbc_disable(struct drm_i915_private *dev_priv)
-{
-	if (!fbc_supported(dev_priv))
-		return;
-
-	mutex_lock(&dev_priv->fbc.lock);
-	__intel_fbc_disable(dev_priv);
-	mutex_unlock(&dev_priv->fbc.lock);
+	if (dev_priv->fbc.active)
+		dev_priv->fbc.deactivate(dev_priv);
 }
 
 /*
- * intel_fbc_disable_crtc - disable FBC if it's associated with crtc
+ * intel_fbc_deactivate - deactivate FBC if it's associated with crtc
  * @crtc: the CRTC
  *
- * This function disables FBC if it's associated with the provided CRTC.
+ * This function deactivates FBC if it's associated with the provided CRTC.
  */
-void intel_fbc_disable_crtc(struct intel_crtc *crtc)
+void intel_fbc_deactivate(struct intel_crtc *crtc)
 {
 	struct drm_i915_private *dev_priv = crtc->base.dev->dev_private;
 
@@ -472,7 +470,7 @@ void intel_fbc_disable_crtc(struct intel_crtc *crtc)
 
 	mutex_lock(&dev_priv->fbc.lock);
 	if (dev_priv->fbc.crtc == crtc)
-		__intel_fbc_disable(dev_priv);
+		__intel_fbc_deactivate(dev_priv);
 	mutex_unlock(&dev_priv->fbc.lock);
 }
 
@@ -486,38 +484,28 @@ static void set_no_fbc_reason(struct drm_i915_private *dev_priv,
 	DRM_DEBUG_KMS("Disabling FBC: %s\n", reason);
 }
 
-static bool crtc_is_valid(struct intel_crtc *crtc)
+static bool crtc_can_fbc(struct intel_crtc *crtc)
 {
 	struct drm_i915_private *dev_priv = crtc->base.dev->dev_private;
 
 	if (fbc_on_pipe_a_only(dev_priv) && crtc->pipe != PIPE_A)
 		return false;
 
-	if (!intel_crtc_active(&crtc->base))
-		return false;
-
-	if (!to_intel_plane_state(crtc->base.primary->state)->visible)
+	if (fbc_on_plane_a_only(dev_priv) && crtc->plane != PLANE_A)
 		return false;
 
 	return true;
 }
 
-static struct drm_crtc *intel_fbc_find_crtc(struct drm_i915_private *dev_priv)
+static bool crtc_is_valid(struct intel_crtc *crtc)
 {
-	struct drm_crtc *crtc = NULL, *tmp_crtc;
-	enum pipe pipe;
-
-	for_each_pipe(dev_priv, pipe) {
-		tmp_crtc = dev_priv->pipe_to_crtc_mapping[pipe];
-
-		if (crtc_is_valid(to_intel_crtc(tmp_crtc)))
-			crtc = tmp_crtc;
-	}
+	if (!intel_crtc_active(&crtc->base))
+		return false;
 
-	if (!crtc)
-		return NULL;
+	if (!to_intel_plane_state(crtc->base.primary->state)->visible)
+		return false;
 
-	return crtc;
+	return true;
 }
 
 static bool multiple_pipes_ok(struct drm_i915_private *dev_priv)
@@ -590,11 +578,17 @@ again:
 	}
 }
 
-static int intel_fbc_alloc_cfb(struct drm_i915_private *dev_priv, int size,
-			       int fb_cpp)
+static int intel_fbc_alloc_cfb(struct intel_crtc *crtc)
 {
+	struct drm_i915_private *dev_priv = crtc->base.dev->dev_private;
+	struct drm_framebuffer *fb = crtc->base.primary->state->fb;
 	struct drm_mm_node *uninitialized_var(compressed_llb);
-	int ret;
+	int size, fb_cpp, ret;
+
+	WARN_ON(drm_mm_node_allocated(&dev_priv->fbc.compressed_fb));
+
+	size = intel_fbc_calculate_cfb_size(crtc, fb);
+	fb_cpp = drm_format_plane_cpp(fb->pixel_format, 0);
 
 	ret = find_compression_threshold(dev_priv, &dev_priv->fbc.compressed_fb,
 					 size, fb_cpp);
@@ -629,8 +623,6 @@ static int intel_fbc_alloc_cfb(struct drm_i915_private *dev_priv, int size,
 			   dev_priv->mm.stolen_base + compressed_llb->start);
 	}
 
-	dev_priv->fbc.uncompressed_size = size;
-
 	DRM_DEBUG_KMS("reserved %llu bytes of contiguous stolen space for FBC, threshold: %d\n",
 		      dev_priv->fbc.compressed_fb.size,
 		      dev_priv->fbc.threshold);
@@ -647,18 +639,15 @@ err_llb:
 
 static void __intel_fbc_cleanup_cfb(struct drm_i915_private *dev_priv)
 {
-	if (dev_priv->fbc.uncompressed_size == 0)
-		return;
-
-	i915_gem_stolen_remove_node(dev_priv, &dev_priv->fbc.compressed_fb);
+	if (drm_mm_node_allocated(&dev_priv->fbc.compressed_fb))
+		i915_gem_stolen_remove_node(dev_priv,
+					    &dev_priv->fbc.compressed_fb);
 
 	if (dev_priv->fbc.compressed_llb) {
 		i915_gem_stolen_remove_node(dev_priv,
 					    dev_priv->fbc.compressed_llb);
 		kfree(dev_priv->fbc.compressed_llb);
 	}
-
-	dev_priv->fbc.uncompressed_size = 0;
 }
 
 void intel_fbc_cleanup_cfb(struct drm_i915_private *dev_priv)
@@ -671,64 +660,6 @@ void intel_fbc_cleanup_cfb(struct drm_i915_private *dev_priv)
 	mutex_unlock(&dev_priv->fbc.lock);
 }
 
-/*
- * For SKL+, the plane source size used by the hardware is based on the value we
- * write to the PLANE_SIZE register. For BDW-, the hardware looks at the value
- * we wrote to PIPESRC.
- */
-static void intel_fbc_get_plane_source_size(struct intel_crtc *crtc,
-					    int *width, int *height)
-{
-	struct intel_plane_state *plane_state =
-			to_intel_plane_state(crtc->base.primary->state);
-	int w, h;
-
-	if (intel_rotation_90_or_270(plane_state->base.rotation)) {
-		w = drm_rect_height(&plane_state->src) >> 16;
-		h = drm_rect_width(&plane_state->src) >> 16;
-	} else {
-		w = drm_rect_width(&plane_state->src) >> 16;
-		h = drm_rect_height(&plane_state->src) >> 16;
-	}
-
-	if (width)
-		*width = w;
-	if (height)
-		*height = h;
-}
-
-static int intel_fbc_calculate_cfb_size(struct intel_crtc *crtc)
-{
-	struct drm_i915_private *dev_priv = crtc->base.dev->dev_private;
-	struct drm_framebuffer *fb = crtc->base.primary->fb;
-	int lines;
-
-	intel_fbc_get_plane_source_size(crtc, NULL, &lines);
-	if (INTEL_INFO(dev_priv)->gen >= 7)
-		lines = min(lines, 2048);
-
-	/* Hardware needs the full buffer stride, not just the active area. */
-	return lines * fb->pitches[0];
-}
-
-static int intel_fbc_setup_cfb(struct intel_crtc *crtc)
-{
-	struct drm_i915_private *dev_priv = crtc->base.dev->dev_private;
-	struct drm_framebuffer *fb = crtc->base.primary->fb;
-	int size, cpp;
-
-	size = intel_fbc_calculate_cfb_size(crtc);
-	cpp = drm_format_plane_cpp(fb->pixel_format, 0);
-
-	if (size <= dev_priv->fbc.uncompressed_size)
-		return 0;
-
-	/* Release any current block */
-	__intel_fbc_cleanup_cfb(dev_priv);
-
-	return intel_fbc_alloc_cfb(dev_priv, size, cpp);
-}
-
 static bool stride_is_valid(struct drm_i915_private *dev_priv,
 			    unsigned int stride)
 {
@@ -803,47 +734,34 @@ static bool intel_fbc_hw_tracking_covers_screen(struct intel_crtc *crtc)
 }
 
 /**
- * __intel_fbc_update - enable/disable FBC as needed, unlocked
- * @dev_priv: i915 device instance
+ * __intel_fbc_update - activate/deactivate FBC as needed, unlocked
+ * @crtc: the CRTC that triggered the update
  *
- * This function completely reevaluates the status of FBC, then enables,
- * disables or maintains it on the same state.
+ * This function completely reevaluates the status of FBC, then activates,
+ * deactivates or maintains it on the same state.
  */
-static void __intel_fbc_update(struct drm_i915_private *dev_priv)
+static void __intel_fbc_update(struct intel_crtc *crtc)
 {
-	struct drm_crtc *drm_crtc = NULL;
-	struct intel_crtc *crtc;
+	struct drm_i915_private *dev_priv = crtc->base.dev->dev_private;
 	struct drm_framebuffer *fb;
 	struct drm_i915_gem_object *obj;
 	const struct drm_display_mode *adjusted_mode;
 
 	WARN_ON(!mutex_is_locked(&dev_priv->fbc.lock));
 
-	if (intel_vgpu_active(dev_priv->dev))
-		i915.enable_fbc = 0;
-
-	if (i915.enable_fbc < 0) {
-		set_no_fbc_reason(dev_priv, "disabled per chip default");
+	if (!multiple_pipes_ok(dev_priv)) {
+		set_no_fbc_reason(dev_priv, "more than one pipe active");
 		goto out_disable;
 	}
 
-	if (!i915.enable_fbc) {
-		set_no_fbc_reason(dev_priv, "disabled per module param");
-		goto out_disable;
-	}
+	if (!dev_priv->fbc.enabled || dev_priv->fbc.crtc != crtc)
+		return;
 
-	drm_crtc = intel_fbc_find_crtc(dev_priv);
-	if (!drm_crtc) {
+	if (!crtc_is_valid(crtc)) {
 		set_no_fbc_reason(dev_priv, "no output");
 		goto out_disable;
 	}
 
-	if (!multiple_pipes_ok(dev_priv)) {
-		set_no_fbc_reason(dev_priv, "more than one pipe active");
-		goto out_disable;
-	}
-
-	crtc = to_intel_crtc(drm_crtc);
 	fb = crtc->base.primary->fb;
 	obj = intel_fb_obj(fb);
 	adjusted_mode = &crtc->config->base.adjusted_mode;
@@ -859,12 +777,6 @@ static void __intel_fbc_update(struct drm_i915_private *dev_priv)
 		goto out_disable;
 	}
 
-	if ((INTEL_INFO(dev_priv)->gen < 4 || HAS_DDI(dev_priv)) &&
-	    crtc->plane != PLANE_A) {
-		set_no_fbc_reason(dev_priv, "FBC unsupported on plane");
-		goto out_disable;
-	}
-
 	/* The use of a CPU fence is mandatory in order to detect writes
 	 * by the CPU to the scanout and trigger updates to the FBC.
 	 */
@@ -897,8 +809,19 @@ static void __intel_fbc_update(struct drm_i915_private *dev_priv)
 		goto out_disable;
 	}
 
-	if (intel_fbc_setup_cfb(crtc)) {
-		set_no_fbc_reason(dev_priv, "not enough stolen memory");
+	/* It is possible for the required CFB size change without a
+	 * crtc->disable + crtc->enable since it is possible to change the
+	 * stride without triggering a full modeset. Since we try to
+	 * over-allocate the CFB, there's a chance we may keep FBC enabled even
+	 * if this happens, but if we exceed the current CFB size we'll have to
+	 * disable FBC. Notice that it would be possible to disable FBC, wait
+	 * for a frame, free the stolen node, then try to reenable FBC in case
+	 * we didn't get any invalidate/deactivate calls, but this would require
+	 * a lot of tracking just for a specific case. If we conclude it's an
+	 * important case, we can implement it later. */
+	if (intel_fbc_calculate_cfb_size(crtc, fb) >
+	    dev_priv->fbc.compressed_fb.size * dev_priv->fbc.threshold) {
+		set_no_fbc_reason(dev_priv, "CFB requirements changed");
 		goto out_disable;
 	}
 
@@ -909,10 +832,11 @@ static void __intel_fbc_update(struct drm_i915_private *dev_priv)
 	 */
 	if (dev_priv->fbc.crtc == crtc &&
 	    dev_priv->fbc.fb_id == fb->base.id &&
-	    dev_priv->fbc.y == crtc->base.y)
+	    dev_priv->fbc.y == crtc->base.y &&
+	    dev_priv->fbc.active)
 		return;
 
-	if (intel_fbc_enabled(dev_priv)) {
+	if (intel_fbc_is_active(dev_priv)) {
 		/* We update FBC along two paths, after changing fb/crtc
 		 * configuration (modeswitching) and after page-flipping
 		 * finishes. For the latter, we know that not only did
@@ -936,36 +860,37 @@ static void __intel_fbc_update(struct drm_i915_private *dev_priv)
 		 * disabling paths we do need to wait for a vblank at
 		 * some point. And we wait before enabling FBC anyway.
 		 */
-		DRM_DEBUG_KMS("disabling active FBC for update\n");
-		__intel_fbc_disable(dev_priv);
+		DRM_DEBUG_KMS("deactivating FBC for update\n");
+		__intel_fbc_deactivate(dev_priv);
 	}
 
-	intel_fbc_schedule_enable(crtc);
+	intel_fbc_schedule_activation(crtc);
 	dev_priv->fbc.no_fbc_reason = "FBC enabled (not necessarily active)";
 	return;
 
 out_disable:
 	/* Multiple disables should be harmless */
-	if (intel_fbc_enabled(dev_priv)) {
-		DRM_DEBUG_KMS("unsupported config, disabling FBC\n");
-		__intel_fbc_disable(dev_priv);
+	if (intel_fbc_is_active(dev_priv)) {
+		DRM_DEBUG_KMS("unsupported config, deactivating FBC\n");
+		__intel_fbc_deactivate(dev_priv);
 	}
-	__intel_fbc_cleanup_cfb(dev_priv);
 }
 
 /*
- * intel_fbc_update - enable/disable FBC as needed
- * @dev_priv: i915 device instance
+ * intel_fbc_update - activate/deactivate FBC as needed
+ * @crtc: the CRTC that triggered the update
  *
- * This function reevaluates the overall state and enables or disables FBC.
+ * This function reevaluates the overall state and activates or deactivates FBC.
  */
-void intel_fbc_update(struct drm_i915_private *dev_priv)
+void intel_fbc_update(struct intel_crtc *crtc)
 {
+	struct drm_i915_private *dev_priv = crtc->base.dev->dev_private;
+
 	if (!fbc_supported(dev_priv))
 		return;
 
 	mutex_lock(&dev_priv->fbc.lock);
-	__intel_fbc_update(dev_priv);
+	__intel_fbc_update(crtc);
 	mutex_unlock(&dev_priv->fbc.lock);
 }
 
@@ -985,16 +910,13 @@ void intel_fbc_invalidate(struct drm_i915_private *dev_priv,
 
 	if (dev_priv->fbc.enabled)
 		fbc_bits = INTEL_FRONTBUFFER_PRIMARY(dev_priv->fbc.crtc->pipe);
-	else if (dev_priv->fbc.fbc_work)
-		fbc_bits = INTEL_FRONTBUFFER_PRIMARY(
-					dev_priv->fbc.fbc_work->crtc->pipe);
 	else
 		fbc_bits = dev_priv->fbc.possible_framebuffer_bits;
 
 	dev_priv->fbc.busy_bits |= (fbc_bits & frontbuffer_bits);
 
 	if (dev_priv->fbc.busy_bits)
-		__intel_fbc_disable(dev_priv);
+		__intel_fbc_deactivate(dev_priv);
 
 	mutex_unlock(&dev_priv->fbc.lock);
 }
@@ -1012,11 +934,136 @@ void intel_fbc_flush(struct drm_i915_private *dev_priv,
 
 	dev_priv->fbc.busy_bits &= ~frontbuffer_bits;
 
-	if (!dev_priv->fbc.busy_bits) {
+	if (!dev_priv->fbc.busy_bits && dev_priv->fbc.enabled) {
+		if (origin != ORIGIN_FLIP && dev_priv->fbc.active) {
+			intel_fbc_recompress(dev_priv);
+		} else {
+			__intel_fbc_deactivate(dev_priv);
+			__intel_fbc_update(dev_priv->fbc.crtc);
+		}
+	}
+
+	mutex_unlock(&dev_priv->fbc.lock);
+}
+
+/**
+ * intel_fbc_enable: tries to enable FBC on the CRTC
+ * @crtc: the CRTC
+ *
+ * This function checks if it's possible to enable FBC on the following CRTC,
+ * then enables it. Notice that it doesn't activate FBC.
+ */
+void intel_fbc_enable(struct intel_crtc *crtc)
+{
+	struct drm_i915_private *dev_priv = crtc->base.dev->dev_private;
+
+	if (!fbc_supported(dev_priv))
+		return;
+
+	mutex_lock(&dev_priv->fbc.lock);
+
+	if (dev_priv->fbc.enabled) {
+		WARN_ON(dev_priv->fbc.crtc == crtc);
+		goto out;
+	}
+
+	WARN_ON(dev_priv->fbc.active);
+	WARN_ON(dev_priv->fbc.crtc != NULL);
+
+	if (intel_vgpu_active(dev_priv->dev)) {
+		set_no_fbc_reason(dev_priv, "VGPU is active");
+		goto out;
+	}
+
+	if (i915.enable_fbc < 0) {
+		set_no_fbc_reason(dev_priv, "disabled per chip default");
+		goto out;
+	}
+
+	if (!i915.enable_fbc) {
+		set_no_fbc_reason(dev_priv, "disabled per module param");
+		goto out;
+	}
+
+	if (!crtc_can_fbc(crtc)) {
+		set_no_fbc_reason(dev_priv, "no enabled pipes can have FBC");
+		goto out;
+	}
+
+	if (intel_fbc_alloc_cfb(crtc)) {
+		set_no_fbc_reason(dev_priv, "not enough stolen memory");
+		goto out;
+	}
+
+	DRM_DEBUG_KMS("Enabling FBC on pipe %c\n", pipe_name(crtc->pipe));
+	dev_priv->fbc.no_fbc_reason = "FBC enabled but not active yet\n";
+
+	dev_priv->fbc.enabled = true;
+	dev_priv->fbc.crtc = crtc;
+out:
+	mutex_unlock(&dev_priv->fbc.lock);
+}
+
+/**
+ * __intel_fbc_disable - disable FBC
+ * @dev_priv: i915 device instance
+ *
+ * This is the low level function that actually disables FBC. Callers should
+ * grab the FBC lock.
+ */
+static void __intel_fbc_disable(struct drm_i915_private *dev_priv)
+{
+	struct intel_crtc *crtc = dev_priv->fbc.crtc;
+
+	WARN_ON(!mutex_is_locked(&dev_priv->fbc.lock));
+	WARN_ON(!dev_priv->fbc.enabled);
+	WARN_ON(dev_priv->fbc.active);
+	assert_pipe_disabled(dev_priv, crtc->pipe);
+
+	DRM_DEBUG_KMS("Disabling FBC on pipe %c\n", pipe_name(crtc->pipe));
+
+	__intel_fbc_cleanup_cfb(dev_priv);
+
+	dev_priv->fbc.enabled = false;
+	dev_priv->fbc.crtc = NULL;
+}
+
+/**
+ * intel_fbc_disable_crtc - disable FBC if it's associated with crtc
+ * @crtc: the CRTC
+ *
+ * This function disables FBC if it's associated with the provided CRTC.
+ */
+void intel_fbc_disable_crtc(struct intel_crtc *crtc)
+{
+	struct drm_i915_private *dev_priv = crtc->base.dev->dev_private;
+
+	if (!fbc_supported(dev_priv))
+		return;
+
+	mutex_lock(&dev_priv->fbc.lock);
+	if (dev_priv->fbc.crtc == crtc) {
+		WARN_ON(!dev_priv->fbc.enabled);
+		WARN_ON(dev_priv->fbc.active);
 		__intel_fbc_disable(dev_priv);
-		__intel_fbc_update(dev_priv);
 	}
+	mutex_unlock(&dev_priv->fbc.lock);
+}
 
+/**
+ * intel_fbc_disable - globally disable FBC
+ * @dev_priv: i915 device instance
+ *
+ * This function disables FBC regardless of which CRTC is associated with it.
+ */
+void intel_fbc_disable(struct drm_i915_private *dev_priv)
+{
+	if (!fbc_supported(dev_priv))
+		return;
+
+	mutex_lock(&dev_priv->fbc.lock);
+	if (dev_priv->fbc.enabled)
+		__intel_fbc_disable(dev_priv);
 	mutex_unlock(&dev_priv->fbc.lock);
 }
 
@@ -1030,8 +1077,11 @@ void intel_fbc_init(struct drm_i915_private *dev_priv)
 {
 	enum pipe pipe;
 
+	INIT_WORK(&dev_priv->fbc.work.work, intel_fbc_work_fn);
 	mutex_init(&dev_priv->fbc.lock);
 	dev_priv->fbc.enabled = false;
+	dev_priv->fbc.active = false;
+	dev_priv->fbc.work.scheduled = false;
 
 	if (!HAS_FBC(dev_priv)) {
 		dev_priv->fbc.no_fbc_reason = "unsupported by this chipset";
@@ -1047,29 +1097,29 @@ void intel_fbc_init(struct drm_i915_private *dev_priv)
 	}
 
 	if (INTEL_INFO(dev_priv)->gen >= 7) {
-		dev_priv->fbc.fbc_enabled = ilk_fbc_enabled;
-		dev_priv->fbc.enable_fbc = gen7_fbc_enable;
-		dev_priv->fbc.disable_fbc = ilk_fbc_disable;
+		dev_priv->fbc.is_active = ilk_fbc_is_active;
+		dev_priv->fbc.activate = gen7_fbc_activate;
+		dev_priv->fbc.deactivate = ilk_fbc_deactivate;
 	} else if (INTEL_INFO(dev_priv)->gen >= 5) {
-		dev_priv->fbc.fbc_enabled = ilk_fbc_enabled;
-		dev_priv->fbc.enable_fbc = ilk_fbc_enable;
-		dev_priv->fbc.disable_fbc = ilk_fbc_disable;
+		dev_priv->fbc.is_active = ilk_fbc_is_active;
+		dev_priv->fbc.activate = ilk_fbc_activate;
+		dev_priv->fbc.deactivate = ilk_fbc_deactivate;
 	} else if (IS_GM45(dev_priv)) {
-		dev_priv->fbc.fbc_enabled = g4x_fbc_enabled;
-		dev_priv->fbc.enable_fbc = g4x_fbc_enable;
-		dev_priv->fbc.disable_fbc = g4x_fbc_disable;
+		dev_priv->fbc.is_active = g4x_fbc_is_active;
+		dev_priv->fbc.activate = g4x_fbc_activate;
+		dev_priv->fbc.deactivate = g4x_fbc_deactivate;
 	} else {
-		dev_priv->fbc.fbc_enabled = i8xx_fbc_enabled;
-		dev_priv->fbc.enable_fbc = i8xx_fbc_enable;
-		dev_priv->fbc.disable_fbc = i8xx_fbc_disable;
+		dev_priv->fbc.is_active = i8xx_fbc_is_active;
+		dev_priv->fbc.activate = i8xx_fbc_activate;
+		dev_priv->fbc.deactivate = i8xx_fbc_deactivate;
 
 		/* This value was pulled out of someone's hat */
 		I915_WRITE(FBC_CONTROL, 500 << FBC_CTL_INTERVAL_SHIFT);
 	}
 
 	/* We still don't have any sort of hardware state readout for FBC, so
-	 * disable it in case the BIOS enabled it to make sure software matches
-	 * the hardware state. */
-	if (dev_priv->fbc.fbc_enabled(dev_priv))
-		dev_priv->fbc.disable_fbc(dev_priv);
+	 * deactivate it in case the BIOS activated it to make sure software
+	 * matches the hardware state. */
+	if (dev_priv->fbc.is_active(dev_priv))
+		dev_priv->fbc.deactivate(dev_priv);
 }
diff --git a/drivers/gpu/drm/i915/intel_fifo_underrun.c b/drivers/gpu/drm/i915/intel_fifo_underrun.c
index 7ae182d0594b..bda526660e20 100644
--- a/drivers/gpu/drm/i915/intel_fifo_underrun.c
+++ b/drivers/gpu/drm/i915/intel_fifo_underrun.c
@@ -128,9 +128,9 @@ static void ironlake_set_fifo_underrun_reporting(struct drm_device *dev,
 					  DE_PIPEB_FIFO_UNDERRUN;
 
 	if (enable)
-		ironlake_enable_display_irq(dev_priv, bit);
+		ilk_enable_display_irq(dev_priv, bit);
 	else
-		ironlake_disable_display_irq(dev_priv, bit);
+		ilk_disable_display_irq(dev_priv, bit);
 }
 
 static void ivybridge_check_fifo_underruns(struct intel_crtc *crtc)
@@ -161,9 +161,9 @@ static void ivybridge_set_fifo_underrun_reporting(struct drm_device *dev,
 		if (!ivb_can_enable_err_int(dev))
 			return;
 
-		ironlake_enable_display_irq(dev_priv, DE_ERR_INT_IVB);
+		ilk_enable_display_irq(dev_priv, DE_ERR_INT_IVB);
 	} else {
-		ironlake_disable_display_irq(dev_priv, DE_ERR_INT_IVB);
+		ilk_disable_display_irq(dev_priv, DE_ERR_INT_IVB);
 
 		if (old &&
 		    I915_READ(GEN7_ERR_INT) & ERR_INT_FIFO_UNDERRUN(pipe)) {
@@ -178,14 +178,10 @@ static void broadwell_set_fifo_underrun_reporting(struct drm_device *dev,
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 
-	assert_spin_locked(&dev_priv->irq_lock);
-
 	if (enable)
-		dev_priv->de_irq_mask[pipe] &= ~GEN8_PIPE_FIFO_UNDERRUN;
+		bdw_enable_pipe_irq(dev_priv, pipe, GEN8_PIPE_FIFO_UNDERRUN);
 	else
-		dev_priv->de_irq_mask[pipe] |= GEN8_PIPE_FIFO_UNDERRUN;
-	I915_WRITE(GEN8_DE_PIPE_IMR(pipe), dev_priv->de_irq_mask[pipe]);
-	POSTING_READ(GEN8_DE_PIPE_IMR(pipe));
+		bdw_disable_pipe_irq(dev_priv, pipe, GEN8_PIPE_FIFO_UNDERRUN);
 }
 
 static void ibx_set_fifo_underrun_reporting(struct drm_device *dev,
diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h
index 5ba586683c87..822952235dcf 100644
--- a/drivers/gpu/drm/i915/intel_guc.h
+++ b/drivers/gpu/drm/i915/intel_guc.h
@@ -42,8 +42,6 @@ struct i915_guc_client {
 
 	uint32_t wq_offset;
 	uint32_t wq_size;
-
-	spinlock_t wq_lock;		/* Protects all data below	*/
 	uint32_t wq_tail;
 
 	/* GuC submission statistics & status */
@@ -95,8 +93,6 @@ struct intel_guc {
 
 	struct i915_guc_client *execbuf_client;
 
-	spinlock_t host2guc_lock;	/* Protects all data below	*/
-
 	DECLARE_BITMAP(doorbell_bitmap, GUC_MAX_DOORBELLS);
 	uint32_t db_cacheline;		/* Cyclic counter mod pagesize	*/
 
diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c
index bdd462e7c690..00d065fee506 100644
--- a/drivers/gpu/drm/i915/intel_hdmi.c
+++ b/drivers/gpu/drm/i915/intel_hdmi.c
@@ -169,10 +169,10 @@ static void g4x_write_infoframe(struct drm_encoder *encoder,
 	POSTING_READ(VIDEO_DIP_CTL);
 }
 
-static bool g4x_infoframe_enabled(struct drm_encoder *encoder)
+static bool g4x_infoframe_enabled(struct drm_encoder *encoder,
+				  const struct intel_crtc_state *pipe_config)
 {
-	struct drm_device *dev = encoder->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct drm_i915_private *dev_priv = to_i915(encoder->dev);
 	struct intel_digital_port *intel_dig_port = enc_to_dig_port(encoder);
 	u32 val = I915_READ(VIDEO_DIP_CTL);
 
@@ -225,13 +225,13 @@ static void ibx_write_infoframe(struct drm_encoder *encoder,
 	POSTING_READ(reg);
 }
 
-static bool ibx_infoframe_enabled(struct drm_encoder *encoder)
+static bool ibx_infoframe_enabled(struct drm_encoder *encoder,
+				  const struct intel_crtc_state *pipe_config)
 {
-	struct drm_device *dev = encoder->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_crtc *intel_crtc = to_intel_crtc(encoder->crtc);
+	struct drm_i915_private *dev_priv = to_i915(encoder->dev);
 	struct intel_digital_port *intel_dig_port = enc_to_dig_port(encoder);
-	i915_reg_t reg = TVIDEO_DIP_CTL(intel_crtc->pipe);
+	enum pipe pipe = to_intel_crtc(pipe_config->base.crtc)->pipe;
+	i915_reg_t reg = TVIDEO_DIP_CTL(pipe);
 	u32 val = I915_READ(reg);
 
 	if ((val & VIDEO_DIP_ENABLE) == 0)
@@ -287,12 +287,12 @@ static void cpt_write_infoframe(struct drm_encoder *encoder,
 	POSTING_READ(reg);
 }
 
-static bool cpt_infoframe_enabled(struct drm_encoder *encoder)
+static bool cpt_infoframe_enabled(struct drm_encoder *encoder,
+				  const struct intel_crtc_state *pipe_config)
 {
-	struct drm_device *dev = encoder->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_crtc *intel_crtc = to_intel_crtc(encoder->crtc);
-	u32 val = I915_READ(TVIDEO_DIP_CTL(intel_crtc->pipe));
+	struct drm_i915_private *dev_priv = to_i915(encoder->dev);
+	enum pipe pipe = to_intel_crtc(pipe_config->base.crtc)->pipe;
+	u32 val = I915_READ(TVIDEO_DIP_CTL(pipe));
 
 	if ((val & VIDEO_DIP_ENABLE) == 0)
 		return false;
@@ -341,13 +341,13 @@ static void vlv_write_infoframe(struct drm_encoder *encoder,
 	POSTING_READ(reg);
 }
 
-static bool vlv_infoframe_enabled(struct drm_encoder *encoder)
+static bool vlv_infoframe_enabled(struct drm_encoder *encoder,
+				  const struct intel_crtc_state *pipe_config)
 {
-	struct drm_device *dev = encoder->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_crtc *intel_crtc = to_intel_crtc(encoder->crtc);
+	struct drm_i915_private *dev_priv = to_i915(encoder->dev);
 	struct intel_digital_port *intel_dig_port = enc_to_dig_port(encoder);
-	u32 val = I915_READ(VLV_TVIDEO_DIP_CTL(intel_crtc->pipe));
+	enum pipe pipe = to_intel_crtc(pipe_config->base.crtc)->pipe;
+	u32 val = I915_READ(VLV_TVIDEO_DIP_CTL(pipe));
 
 	if ((val & VIDEO_DIP_ENABLE) == 0)
 		return false;
@@ -398,12 +398,11 @@ static void hsw_write_infoframe(struct drm_encoder *encoder,
 	POSTING_READ(ctl_reg);
 }
 
-static bool hsw_infoframe_enabled(struct drm_encoder *encoder)
+static bool hsw_infoframe_enabled(struct drm_encoder *encoder,
+				  const struct intel_crtc_state *pipe_config)
 {
-	struct drm_device *dev = encoder->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_crtc *intel_crtc = to_intel_crtc(encoder->crtc);
-	u32 val = I915_READ(HSW_TVIDEO_DIP_CTL(intel_crtc->config->cpu_transcoder));
+	struct drm_i915_private *dev_priv = to_i915(encoder->dev);
+	u32 val = I915_READ(HSW_TVIDEO_DIP_CTL(pipe_config->cpu_transcoder));
 
 	return val & (VIDEO_DIP_ENABLE_VSC_HSW | VIDEO_DIP_ENABLE_AVI_HSW |
 		      VIDEO_DIP_ENABLE_GCP_HSW | VIDEO_DIP_ENABLE_VS_HSW |
@@ -927,7 +926,7 @@ static void intel_hdmi_get_config(struct intel_encoder *encoder,
 	if (tmp & HDMI_MODE_SELECT_HDMI)
 		pipe_config->has_hdmi_sink = true;
 
-	if (intel_hdmi->infoframe_enabled(&encoder->base))
+	if (intel_hdmi->infoframe_enabled(&encoder->base, pipe_config))
 		pipe_config->has_infoframe = true;
 
 	if (tmp & SDVO_AUDIO_ENABLE)
@@ -2165,7 +2164,7 @@ void intel_hdmi_init(struct drm_device *dev,
 	intel_encoder = &intel_dig_port->base;
 
 	drm_encoder_init(dev, &intel_encoder->base, &intel_hdmi_enc_funcs,
-			 DRM_MODE_ENCODER_TMDS);
+			 DRM_MODE_ENCODER_TMDS, NULL);
 
 	intel_encoder->compute_config = intel_hdmi_compute_config;
 	if (HAS_PCH_SPLIT(dev)) {
diff --git a/drivers/gpu/drm/i915/intel_i2c.c b/drivers/gpu/drm/i915/intel_i2c.c
index 1110c83953cf..e26e22a72e3b 100644
--- a/drivers/gpu/drm/i915/intel_i2c.c
+++ b/drivers/gpu/drm/i915/intel_i2c.c
@@ -472,9 +472,7 @@ gmbus_xfer_index_read(struct drm_i915_private *dev_priv, struct i2c_msg *msgs)
 }
 
 static int
-gmbus_xfer(struct i2c_adapter *adapter,
-	   struct i2c_msg *msgs,
-	   int num)
+do_gmbus_xfer(struct i2c_adapter *adapter, struct i2c_msg *msgs, int num)
 {
 	struct intel_gmbus *bus = container_of(adapter,
 					       struct intel_gmbus,
@@ -483,14 +481,6 @@ gmbus_xfer(struct i2c_adapter *adapter,
 	int i = 0, inc, try = 0;
 	int ret = 0;
 
-	intel_display_power_get(dev_priv, POWER_DOMAIN_GMBUS);
-	mutex_lock(&dev_priv->gmbus_mutex);
-
-	if (bus->force_bit) {
-		ret = i2c_bit_algo.master_xfer(adapter, msgs, num);
-		goto out;
-	}
-
 retry:
 	I915_WRITE(GMBUS0, bus->reg0);
 
@@ -505,17 +495,13 @@ retry:
 			ret = gmbus_xfer_write(dev_priv, &msgs[i]);
 		}
 
+		if (!ret)
+			ret = gmbus_wait_hw_status(dev_priv, GMBUS_HW_WAIT_PHASE,
+						   GMBUS_HW_WAIT_EN);
 		if (ret == -ETIMEDOUT)
 			goto timeout;
-		if (ret == -ENXIO)
-			goto clear_err;
-
-		ret = gmbus_wait_hw_status(dev_priv, GMBUS_HW_WAIT_PHASE,
-					   GMBUS_HW_WAIT_EN);
-		if (ret == -ENXIO)
+		else if (ret)
 			goto clear_err;
-		if (ret)
-			goto timeout;
 	}
 
 	/* Generate a STOP condition on the bus. Note that gmbus can't generata
@@ -589,13 +575,34 @@ timeout:
 		 bus->adapter.name, bus->reg0 & 0xff);
 	I915_WRITE(GMBUS0, 0);
 
-	/* Hardware may not support GMBUS over these pins? Try GPIO bitbanging instead. */
+	/*
+	 * Hardware may not support GMBUS over these pins? Try GPIO bitbanging
+	 * instead. Use EAGAIN to have i2c core retry.
+	 */
 	bus->force_bit = 1;
-	ret = i2c_bit_algo.master_xfer(adapter, msgs, num);
+	ret = -EAGAIN;
 
 out:
-	mutex_unlock(&dev_priv->gmbus_mutex);
+	return ret;
+}
+
+static int
+gmbus_xfer(struct i2c_adapter *adapter, struct i2c_msg *msgs, int num)
+{
+	struct intel_gmbus *bus = container_of(adapter, struct intel_gmbus,
+					       adapter);
+	struct drm_i915_private *dev_priv = bus->dev_priv;
+	int ret;
 
+	intel_display_power_get(dev_priv, POWER_DOMAIN_GMBUS);
+	mutex_lock(&dev_priv->gmbus_mutex);
+
+	if (bus->force_bit)
+		ret = i2c_bit_algo.master_xfer(adapter, msgs, num);
+	else
+		ret = do_gmbus_xfer(adapter, msgs, num);
+
+	mutex_unlock(&dev_priv->gmbus_mutex);
 	intel_display_power_put(dev_priv, POWER_DOMAIN_GMBUS);
 
 	return ret;
diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c
index 61f1145f6579..0da0240caf81 100644
--- a/drivers/gpu/drm/i915/intel_lvds.c
+++ b/drivers/gpu/drm/i915/intel_lvds.c
@@ -1025,7 +1025,7 @@ void intel_lvds_init(struct drm_device *dev)
 			   DRM_MODE_CONNECTOR_LVDS);
 
 	drm_encoder_init(dev, &intel_encoder->base, &intel_lvds_enc_funcs,
-			 DRM_MODE_ENCODER_LVDS);
+			 DRM_MODE_ENCODER_LVDS, NULL);
 
 	intel_encoder->enable = intel_enable_lvds;
 	intel_encoder->pre_enable = intel_pre_enable_lvds;
diff --git a/drivers/gpu/drm/i915/intel_panel.c b/drivers/gpu/drm/i915/intel_panel.c
index a24df35e11e7..ae808b68a44f 100644
--- a/drivers/gpu/drm/i915/intel_panel.c
+++ b/drivers/gpu/drm/i915/intel_panel.c
@@ -1264,6 +1264,14 @@ static void intel_backlight_device_unregister(struct intel_connector *connector)
 #endif /* CONFIG_BACKLIGHT_CLASS_DEVICE */
 
 /*
+ * BXT: PWM clock frequency = 19.2 MHz.
+ */
+static u32 bxt_hz_to_pwm(struct intel_connector *connector, u32 pwm_freq_hz)
+{
+	return KHz(19200) / pwm_freq_hz;
+}
+
+/*
  * SPT: This value represents the period of the PWM stream in clock periods
  * multiplied by 16 (default increment) or 128 (alternate increment selected in
  * SCHICKEN_1 bit 0). PWM clock is 24 MHz.
@@ -1300,7 +1308,7 @@ static u32 lpt_hz_to_pwm(struct intel_connector *connector, u32 pwm_freq_hz)
 	else
 		mul = 128;
 
-	if (dev_priv->pch_id == INTEL_PCH_LPT_DEVICE_ID_TYPE)
+	if (HAS_PCH_LPT_H(dev_priv))
 		clock = MHz(135); /* LPT:H */
 	else
 		clock = MHz(24); /* LPT:LP */
@@ -1335,22 +1343,28 @@ static u32 i9xx_hz_to_pwm(struct intel_connector *connector, u32 pwm_freq_hz)
 	int clock;
 
 	if (IS_PINEVIEW(dev))
-		clock = intel_hrawclk(dev);
+		clock = MHz(intel_hrawclk(dev));
 	else
-		clock = 1000 * dev_priv->display.get_display_clock_speed(dev);
+		clock = 1000 * dev_priv->cdclk_freq;
 
 	return clock / (pwm_freq_hz * 32);
 }
 
 /*
  * Gen4: This value represents the period of the PWM stream in display core
- * clocks multiplied by 128.
+ * clocks ([DevCTG] HRAW clocks) multiplied by 128.
+ *
  */
 static u32 i965_hz_to_pwm(struct intel_connector *connector, u32 pwm_freq_hz)
 {
 	struct drm_device *dev = connector->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	int clock = 1000 * dev_priv->display.get_display_clock_speed(dev);
+	int clock;
+
+	if (IS_G4X(dev_priv))
+		clock = MHz(intel_hrawclk(dev));
+	else
+		clock = 1000 * dev_priv->cdclk_freq;
 
 	return clock / (pwm_freq_hz * 128);
 }
@@ -1385,14 +1399,18 @@ static u32 get_backlight_max_vbt(struct intel_connector *connector)
 	u16 pwm_freq_hz = dev_priv->vbt.backlight.pwm_freq_hz;
 	u32 pwm;
 
-	if (!pwm_freq_hz) {
-		DRM_DEBUG_KMS("backlight frequency not specified in VBT\n");
+	if (!panel->backlight.hz_to_pwm) {
+		DRM_DEBUG_KMS("backlight frequency conversion not supported\n");
 		return 0;
 	}
 
-	if (!panel->backlight.hz_to_pwm) {
-		DRM_DEBUG_KMS("backlight frequency setting from VBT currently not supported on this platform\n");
-		return 0;
+	if (pwm_freq_hz) {
+		DRM_DEBUG_KMS("VBT defined backlight frequency %u Hz\n",
+			      pwm_freq_hz);
+	} else {
+		pwm_freq_hz = 200;
+		DRM_DEBUG_KMS("default backlight frequency %u Hz\n",
+			      pwm_freq_hz);
 	}
 
 	pwm = panel->backlight.hz_to_pwm(connector, pwm_freq_hz);
@@ -1401,8 +1419,6 @@ static u32 get_backlight_max_vbt(struct intel_connector *connector)
 		return 0;
 	}
 
-	DRM_DEBUG_KMS("backlight frequency %u Hz from VBT\n", pwm_freq_hz);
-
 	return pwm;
 }
 
@@ -1750,6 +1766,7 @@ intel_panel_init_backlight_funcs(struct intel_panel *panel)
 		panel->backlight.disable = bxt_disable_backlight;
 		panel->backlight.set = bxt_set_backlight;
 		panel->backlight.get = bxt_get_backlight;
+		panel->backlight.hz_to_pwm = bxt_hz_to_pwm;
 	} else if (HAS_PCH_LPT(dev) || HAS_PCH_SPT(dev)) {
 		panel->backlight.setup = lpt_setup_backlight;
 		panel->backlight.enable = lpt_enable_backlight;
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 96f45d7b3e4b..ee05ce8bf79a 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -66,6 +66,14 @@ static void bxt_init_clock_gating(struct drm_device *dev)
 	 */
 	I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
 		   GEN8_HDCUNIT_CLOCK_GATE_DISABLE_HDCREQ);
+
+	/*
+	 * Wa: Backlight PWM may stop in the asserted state, causing backlight
+	 * to stay fully on.
+	 */
+	if (IS_BXT_REVID(dev_priv, BXT_REVID_B0, REVID_FOREVER))
+		I915_WRITE(GEN9_CLKGATE_DIS_0, I915_READ(GEN9_CLKGATE_DIS_0) |
+			   PWM1_GATING_DIS | PWM2_GATING_DIS);
 }
 
 static void i915_pineview_get_mem_freq(struct drm_device *dev)
@@ -2422,7 +2430,7 @@ static void ilk_wm_merge(struct drm_device *dev,
 	 * enabled sometime later.
 	 */
 	if (IS_GEN5(dev) && !merged->fbc_wm_enabled &&
-	    intel_fbc_enabled(dev_priv)) {
+	    intel_fbc_is_active(dev_priv)) {
 		for (level = 2; level <= max_level; level++) {
 			struct intel_wm_level *wm = &merged->wm[level];
 
diff --git a/drivers/gpu/drm/i915/intel_psr.c b/drivers/gpu/drm/i915/intel_psr.c
index bc5ea2a6cf4c..b6609e648f75 100644
--- a/drivers/gpu/drm/i915/intel_psr.c
+++ b/drivers/gpu/drm/i915/intel_psr.c
@@ -191,9 +191,6 @@ static void hsw_psr_enable_sink(struct intel_dp *intel_dp)
 
 	aux_clock_divider = intel_dp->get_aux_clock_divider(intel_dp, 0);
 
-	drm_dp_dpcd_writeb(&intel_dp->aux, DP_PSR_EN_CFG,
-			   DP_PSR_ENABLE & ~DP_PSR_MAIN_LINK_ACTIVE);
-
 	/* Enable AUX frame sync at sink */
 	if (dev_priv->psr.aux_frame_sync)
 		drm_dp_dpcd_writeb(&intel_dp->aux,
@@ -414,9 +411,14 @@ void intel_psr_enable(struct intel_dp *intel_dp)
 				skl_psr_setup_su_vsc(intel_dp);
 		}
 
-		/* Avoid continuous PSR exit by masking memup and hpd */
+		/*
+		 * Per Spec: Avoid continuous PSR exit by masking MEMUP and HPD.
+		 * Also mask LPSP to avoid dependency on other drivers that
+		 * might block runtime_pm besides preventing other hw tracking
+		 * issues now we can rely on frontbuffer tracking.
+		 */
 		I915_WRITE(EDP_PSR_DEBUG_CTL, EDP_PSR_DEBUG_MASK_MEMUP |
-			   EDP_PSR_DEBUG_MASK_HPD);
+			   EDP_PSR_DEBUG_MASK_HPD | EDP_PSR_DEBUG_MASK_LPSP);
 
 		/* Enable PSR on the panel */
 		hsw_psr_enable_sink(intel_dp);
@@ -522,11 +524,15 @@ void intel_psr_disable(struct intel_dp *intel_dp)
 		return;
 	}
 
+	/* Disable PSR on Source */
 	if (HAS_DDI(dev))
 		hsw_psr_disable(intel_dp);
 	else
 		vlv_psr_disable(intel_dp);
 
+	/* Disable PSR on Sink */
+	drm_dp_dpcd_writeb(&intel_dp->aux, DP_PSR_EN_CFG, 0);
+
 	dev_priv->psr.enabled = NULL;
 	mutex_unlock(&dev_priv->psr.lock);
 
@@ -737,25 +743,9 @@ void intel_psr_flush(struct drm_device *dev,
 	frontbuffer_bits &= INTEL_FRONTBUFFER_ALL_MASK(pipe);
 	dev_priv->psr.busy_frontbuffer_bits &= ~frontbuffer_bits;
 
-	if (HAS_DDI(dev)) {
-		/*
-		 * By definition every flush should mean invalidate + flush,
-		 * however on core platforms let's minimize the
-		 * disable/re-enable so we can avoid the invalidate when flip
-		 * originated the flush.
-		 */
-		if (frontbuffer_bits && origin != ORIGIN_FLIP)
-			intel_psr_exit(dev);
-	} else {
-		/*
-		 * On Valleyview and Cherryview we don't use hardware tracking
-		 * so any plane updates or cursor moves don't result in a PSR
-		 * invalidating. Which means we need to manually fake this in
-		 * software for all flushes.
-		 */
-		if (frontbuffer_bits)
-			intel_psr_exit(dev);
-	}
+	/* By definition flush = invalidate + flush */
+	if (frontbuffer_bits)
+		intel_psr_exit(dev);
 
 	if (!dev_priv->psr.active && !dev_priv->psr.busy_frontbuffer_bits)
 		if (!work_busy(&dev_priv->psr.work.work))
diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c
index afca6c940b9a..2c2151f1c47e 100644
--- a/drivers/gpu/drm/i915/intel_runtime_pm.c
+++ b/drivers/gpu/drm/i915/intel_runtime_pm.c
@@ -65,6 +65,72 @@
 bool intel_display_power_well_is_enabled(struct drm_i915_private *dev_priv,
 				    int power_well_id);
 
+const char *
+intel_display_power_domain_str(enum intel_display_power_domain domain)
+{
+	switch (domain) {
+	case POWER_DOMAIN_PIPE_A:
+		return "PIPE_A";
+	case POWER_DOMAIN_PIPE_B:
+		return "PIPE_B";
+	case POWER_DOMAIN_PIPE_C:
+		return "PIPE_C";
+	case POWER_DOMAIN_PIPE_A_PANEL_FITTER:
+		return "PIPE_A_PANEL_FITTER";
+	case POWER_DOMAIN_PIPE_B_PANEL_FITTER:
+		return "PIPE_B_PANEL_FITTER";
+	case POWER_DOMAIN_PIPE_C_PANEL_FITTER:
+		return "PIPE_C_PANEL_FITTER";
+	case POWER_DOMAIN_TRANSCODER_A:
+		return "TRANSCODER_A";
+	case POWER_DOMAIN_TRANSCODER_B:
+		return "TRANSCODER_B";
+	case POWER_DOMAIN_TRANSCODER_C:
+		return "TRANSCODER_C";
+	case POWER_DOMAIN_TRANSCODER_EDP:
+		return "TRANSCODER_EDP";
+	case POWER_DOMAIN_PORT_DDI_A_LANES:
+		return "PORT_DDI_A_LANES";
+	case POWER_DOMAIN_PORT_DDI_B_LANES:
+		return "PORT_DDI_B_LANES";
+	case POWER_DOMAIN_PORT_DDI_C_LANES:
+		return "PORT_DDI_C_LANES";
+	case POWER_DOMAIN_PORT_DDI_D_LANES:
+		return "PORT_DDI_D_LANES";
+	case POWER_DOMAIN_PORT_DDI_E_LANES:
+		return "PORT_DDI_E_LANES";
+	case POWER_DOMAIN_PORT_DSI:
+		return "PORT_DSI";
+	case POWER_DOMAIN_PORT_CRT:
+		return "PORT_CRT";
+	case POWER_DOMAIN_PORT_OTHER:
+		return "PORT_OTHER";
+	case POWER_DOMAIN_VGA:
+		return "VGA";
+	case POWER_DOMAIN_AUDIO:
+		return "AUDIO";
+	case POWER_DOMAIN_PLLS:
+		return "PLLS";
+	case POWER_DOMAIN_AUX_A:
+		return "AUX_A";
+	case POWER_DOMAIN_AUX_B:
+		return "AUX_B";
+	case POWER_DOMAIN_AUX_C:
+		return "AUX_C";
+	case POWER_DOMAIN_AUX_D:
+		return "AUX_D";
+	case POWER_DOMAIN_GMBUS:
+		return "GMBUS";
+	case POWER_DOMAIN_INIT:
+		return "INIT";
+	case POWER_DOMAIN_MODESET:
+		return "MODESET";
+	default:
+		MISSING_CASE(domain);
+		return "?";
+	}
+}
+
 static void intel_power_well_enable(struct drm_i915_private *dev_priv,
 				    struct i915_power_well *power_well)
 {
@@ -1433,11 +1499,15 @@ void intel_display_power_put(struct drm_i915_private *dev_priv,
 
 	mutex_lock(&power_domains->lock);
 
-	WARN_ON(!power_domains->domain_use_count[domain]);
+	WARN(!power_domains->domain_use_count[domain],
+	     "Use count on domain %s is already zero\n",
+	     intel_display_power_domain_str(domain));
 	power_domains->domain_use_count[domain]--;
 
 	for_each_power_well_rev(i, power_well, BIT(domain), power_domains) {
-		WARN_ON(!power_well->count);
+		WARN(!power_well->count,
+		     "Use count on power well %s is already zero",
+		     power_well->name);
 
 		if (!--power_well->count)
 			intel_power_well_disable(dev_priv, power_well);
@@ -1841,7 +1911,7 @@ sanitize_disable_power_well_option(const struct drm_i915_private *dev_priv,
 	if (disable_power_well >= 0)
 		return !!disable_power_well;
 
-	if (IS_SKYLAKE(dev_priv)) {
+	if (IS_BROXTON(dev_priv)) {
 		DRM_DEBUG_KMS("Disabling display power well support\n");
 		return 0;
 	}
diff --git a/drivers/gpu/drm/i915/intel_sdvo.c b/drivers/gpu/drm/i915/intel_sdvo.c
index 06679f164b3e..2e1da060b0e1 100644
--- a/drivers/gpu/drm/i915/intel_sdvo.c
+++ b/drivers/gpu/drm/i915/intel_sdvo.c
@@ -2978,7 +2978,8 @@ bool intel_sdvo_init(struct drm_device *dev,
 	/* encoder type will be decided later */
 	intel_encoder = &intel_sdvo->base;
 	intel_encoder->type = INTEL_OUTPUT_SDVO;
-	drm_encoder_init(dev, &intel_encoder->base, &intel_sdvo_enc_funcs, 0);
+	drm_encoder_init(dev, &intel_encoder->base, &intel_sdvo_enc_funcs, 0,
+			 NULL);
 
 	/* Read the regs to test if we can talk to the device */
 	for (i = 0; i < 0x40; i++) {
diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c
index 2b96f336589e..dbf421351b5c 100644
--- a/drivers/gpu/drm/i915/intel_sprite.c
+++ b/drivers/gpu/drm/i915/intel_sprite.c
@@ -1123,7 +1123,7 @@ intel_plane_init(struct drm_device *dev, enum pipe pipe, int plane)
 	ret = drm_universal_plane_init(dev, &intel_plane->base, possible_crtcs,
 				       &intel_plane_funcs,
 				       plane_formats, num_plane_formats,
-				       DRM_PLANE_TYPE_OVERLAY);
+				       DRM_PLANE_TYPE_OVERLAY, NULL);
 	if (ret) {
 		kfree(intel_plane);
 		goto out;
diff --git a/drivers/gpu/drm/i915/intel_tv.c b/drivers/gpu/drm/i915/intel_tv.c
index 6bea78944cd6..948cbff6c62e 100644
--- a/drivers/gpu/drm/i915/intel_tv.c
+++ b/drivers/gpu/drm/i915/intel_tv.c
@@ -1645,7 +1645,7 @@ intel_tv_init(struct drm_device *dev)
 			   DRM_MODE_CONNECTOR_SVIDEO);
 
 	drm_encoder_init(dev, &intel_encoder->base, &intel_tv_enc_funcs,
-			 DRM_MODE_ENCODER_TVDAC);
+			 DRM_MODE_ENCODER_TVDAC, NULL);
 
 	intel_encoder->compute_config = intel_tv_compute_config;
 	intel_encoder->get_config = intel_tv_get_config;
diff --git a/drivers/gpu/drm/imx/dw_hdmi-imx.c b/drivers/gpu/drm/imx/dw_hdmi-imx.c
index 98605ea2ad9d..063825fecbe2 100644
--- a/drivers/gpu/drm/imx/dw_hdmi-imx.c
+++ b/drivers/gpu/drm/imx/dw_hdmi-imx.c
@@ -137,7 +137,7 @@ static void dw_hdmi_imx_encoder_prepare(struct drm_encoder *encoder)
 	imx_drm_set_bus_format(encoder, MEDIA_BUS_FMT_RGB888_1X24);
 }
 
-static struct drm_encoder_helper_funcs dw_hdmi_imx_encoder_helper_funcs = {
+static const struct drm_encoder_helper_funcs dw_hdmi_imx_encoder_helper_funcs = {
 	.mode_fixup = dw_hdmi_imx_encoder_mode_fixup,
 	.mode_set   = dw_hdmi_imx_encoder_mode_set,
 	.prepare    = dw_hdmi_imx_encoder_prepare,
@@ -145,7 +145,7 @@ static struct drm_encoder_helper_funcs dw_hdmi_imx_encoder_helper_funcs = {
 	.disable    = dw_hdmi_imx_encoder_disable,
 };
 
-static struct drm_encoder_funcs dw_hdmi_imx_encoder_funcs = {
+static const struct drm_encoder_funcs dw_hdmi_imx_encoder_funcs = {
 	.destroy = drm_encoder_cleanup,
 };
 
@@ -251,7 +251,7 @@ static int dw_hdmi_imx_bind(struct device *dev, struct device *master,
 
 	drm_encoder_helper_add(encoder, &dw_hdmi_imx_encoder_helper_funcs);
 	drm_encoder_init(drm, encoder, &dw_hdmi_imx_encoder_funcs,
-			 DRM_MODE_ENCODER_TMDS);
+			 DRM_MODE_ENCODER_TMDS, NULL);
 
 	return dw_hdmi_bind(dev, master, data, encoder, iores, irq, plat_data);
 }
diff --git a/drivers/gpu/drm/imx/imx-drm-core.c b/drivers/gpu/drm/imx/imx-drm-core.c
index 6faa735376ec..7be7ac808304 100644
--- a/drivers/gpu/drm/imx/imx-drm-core.c
+++ b/drivers/gpu/drm/imx/imx-drm-core.c
@@ -39,13 +39,12 @@ struct imx_drm_component {
 struct imx_drm_device {
 	struct drm_device			*drm;
 	struct imx_drm_crtc			*crtc[MAX_CRTC];
-	int					pipes;
+	unsigned int				pipes;
 	struct drm_fbdev_cma			*fbhelper;
 };
 
 struct imx_drm_crtc {
 	struct drm_crtc				*crtc;
-	int					pipe;
 	struct imx_drm_crtc_helper_funcs	imx_drm_helper_funcs;
 };
 
@@ -54,9 +53,9 @@ static int legacyfb_depth = 16;
 module_param(legacyfb_depth, int, 0444);
 #endif
 
-int imx_drm_crtc_id(struct imx_drm_crtc *crtc)
+unsigned int imx_drm_crtc_id(struct imx_drm_crtc *crtc)
 {
-	return crtc->pipe;
+	return drm_crtc_index(crtc->crtc);
 }
 EXPORT_SYMBOL_GPL(imx_drm_crtc_id);
 
@@ -64,8 +63,7 @@ static void imx_drm_driver_lastclose(struct drm_device *drm)
 {
 	struct imx_drm_device *imxdrm = drm->dev_private;
 
-	if (imxdrm->fbhelper)
-		drm_fbdev_cma_restore_mode(imxdrm->fbhelper);
+	drm_fbdev_cma_restore_mode(imxdrm->fbhelper);
 }
 
 static int imx_drm_driver_unload(struct drm_device *drm)
@@ -125,19 +123,19 @@ EXPORT_SYMBOL_GPL(imx_drm_set_bus_format);
 
 int imx_drm_crtc_vblank_get(struct imx_drm_crtc *imx_drm_crtc)
 {
-	return drm_vblank_get(imx_drm_crtc->crtc->dev, imx_drm_crtc->pipe);
+	return drm_crtc_vblank_get(imx_drm_crtc->crtc);
 }
 EXPORT_SYMBOL_GPL(imx_drm_crtc_vblank_get);
 
 void imx_drm_crtc_vblank_put(struct imx_drm_crtc *imx_drm_crtc)
 {
-	drm_vblank_put(imx_drm_crtc->crtc->dev, imx_drm_crtc->pipe);
+	drm_crtc_vblank_put(imx_drm_crtc->crtc);
 }
 EXPORT_SYMBOL_GPL(imx_drm_crtc_vblank_put);
 
 void imx_drm_handle_vblank(struct imx_drm_crtc *imx_drm_crtc)
 {
-	drm_handle_vblank(imx_drm_crtc->crtc->dev, imx_drm_crtc->pipe);
+	drm_crtc_handle_vblank(imx_drm_crtc->crtc);
 }
 EXPORT_SYMBOL_GPL(imx_drm_handle_vblank);
 
@@ -216,7 +214,7 @@ static void imx_drm_output_poll_changed(struct drm_device *drm)
 	drm_fbdev_cma_hotplug_event(imxdrm->fbhelper);
 }
 
-static struct drm_mode_config_funcs imx_drm_mode_config_funcs = {
+static const struct drm_mode_config_funcs imx_drm_mode_config_funcs = {
 	.fb_create = drm_fb_cma_create,
 	.output_poll_changed = imx_drm_output_poll_changed,
 };
@@ -334,7 +332,7 @@ err_kms:
  * imx_drm_add_crtc - add a new crtc
  */
 int imx_drm_add_crtc(struct drm_device *drm, struct drm_crtc *crtc,
-		struct imx_drm_crtc **new_crtc,
+		struct imx_drm_crtc **new_crtc, struct drm_plane *primary_plane,
 		const struct imx_drm_crtc_helper_funcs *imx_drm_helper_funcs,
 		struct device_node *port)
 {
@@ -357,12 +355,11 @@ int imx_drm_add_crtc(struct drm_device *drm, struct drm_crtc *crtc,
 		return -ENOMEM;
 
 	imx_drm_crtc->imx_drm_helper_funcs = *imx_drm_helper_funcs;
-	imx_drm_crtc->pipe = imxdrm->pipes++;
 	imx_drm_crtc->crtc = crtc;
 
 	crtc->port = port;
 
-	imxdrm->crtc[imx_drm_crtc->pipe] = imx_drm_crtc;
+	imxdrm->crtc[imxdrm->pipes++] = imx_drm_crtc;
 
 	*new_crtc = imx_drm_crtc;
 
@@ -373,13 +370,13 @@ int imx_drm_add_crtc(struct drm_device *drm, struct drm_crtc *crtc,
 	drm_crtc_helper_add(crtc,
 			imx_drm_crtc->imx_drm_helper_funcs.crtc_helper_funcs);
 
-	drm_crtc_init(drm, crtc,
-			imx_drm_crtc->imx_drm_helper_funcs.crtc_funcs);
+	drm_crtc_init_with_planes(drm, crtc, primary_plane, NULL,
+			imx_drm_crtc->imx_drm_helper_funcs.crtc_funcs, NULL);
 
 	return 0;
 
 err_register:
-	imxdrm->crtc[imx_drm_crtc->pipe] = NULL;
+	imxdrm->crtc[--imxdrm->pipes] = NULL;
 	kfree(imx_drm_crtc);
 	return ret;
 }
@@ -391,10 +388,11 @@ EXPORT_SYMBOL_GPL(imx_drm_add_crtc);
 int imx_drm_remove_crtc(struct imx_drm_crtc *imx_drm_crtc)
 {
 	struct imx_drm_device *imxdrm = imx_drm_crtc->crtc->dev->dev_private;
+	unsigned int pipe = drm_crtc_index(imx_drm_crtc->crtc);
 
 	drm_crtc_cleanup(imx_drm_crtc->crtc);
 
-	imxdrm->crtc[imx_drm_crtc->pipe] = NULL;
+	imxdrm->crtc[pipe] = NULL;
 
 	kfree(imx_drm_crtc);
 
diff --git a/drivers/gpu/drm/imx/imx-drm.h b/drivers/gpu/drm/imx/imx-drm.h
index 28e776d8d9d2..71cf6d9c714f 100644
--- a/drivers/gpu/drm/imx/imx-drm.h
+++ b/drivers/gpu/drm/imx/imx-drm.h
@@ -9,10 +9,11 @@ struct drm_display_mode;
 struct drm_encoder;
 struct drm_fbdev_cma;
 struct drm_framebuffer;
+struct drm_plane;
 struct imx_drm_crtc;
 struct platform_device;
 
-int imx_drm_crtc_id(struct imx_drm_crtc *crtc);
+unsigned int imx_drm_crtc_id(struct imx_drm_crtc *crtc);
 
 struct imx_drm_crtc_helper_funcs {
 	int (*enable_vblank)(struct drm_crtc *crtc);
@@ -24,7 +25,7 @@ struct imx_drm_crtc_helper_funcs {
 };
 
 int imx_drm_add_crtc(struct drm_device *drm, struct drm_crtc *crtc,
-		struct imx_drm_crtc **new_crtc,
+		struct imx_drm_crtc **new_crtc, struct drm_plane *primary_plane,
 		const struct imx_drm_crtc_helper_funcs *imx_helper_funcs,
 		struct device_node *port);
 int imx_drm_remove_crtc(struct imx_drm_crtc *);
diff --git a/drivers/gpu/drm/imx/imx-ldb.c b/drivers/gpu/drm/imx/imx-ldb.c
index abacc8f67469..22ac482231ed 100644
--- a/drivers/gpu/drm/imx/imx-ldb.c
+++ b/drivers/gpu/drm/imx/imx-ldb.c
@@ -358,23 +358,23 @@ static void imx_ldb_encoder_disable(struct drm_encoder *encoder)
 	drm_panel_unprepare(imx_ldb_ch->panel);
 }
 
-static struct drm_connector_funcs imx_ldb_connector_funcs = {
+static const struct drm_connector_funcs imx_ldb_connector_funcs = {
 	.dpms = drm_helper_connector_dpms,
 	.fill_modes = drm_helper_probe_single_connector_modes,
 	.detect = imx_ldb_connector_detect,
 	.destroy = imx_drm_connector_destroy,
 };
 
-static struct drm_connector_helper_funcs imx_ldb_connector_helper_funcs = {
+static const struct drm_connector_helper_funcs imx_ldb_connector_helper_funcs = {
 	.get_modes = imx_ldb_connector_get_modes,
 	.best_encoder = imx_ldb_connector_best_encoder,
 };
 
-static struct drm_encoder_funcs imx_ldb_encoder_funcs = {
+static const struct drm_encoder_funcs imx_ldb_encoder_funcs = {
 	.destroy = imx_drm_encoder_destroy,
 };
 
-static struct drm_encoder_helper_funcs imx_ldb_encoder_helper_funcs = {
+static const struct drm_encoder_helper_funcs imx_ldb_encoder_helper_funcs = {
 	.dpms = imx_ldb_encoder_dpms,
 	.mode_fixup = imx_ldb_encoder_mode_fixup,
 	.prepare = imx_ldb_encoder_prepare,
@@ -422,7 +422,7 @@ static int imx_ldb_register(struct drm_device *drm,
 	drm_encoder_helper_add(&imx_ldb_ch->encoder,
 			&imx_ldb_encoder_helper_funcs);
 	drm_encoder_init(drm, &imx_ldb_ch->encoder, &imx_ldb_encoder_funcs,
-			 DRM_MODE_ENCODER_LVDS);
+			 DRM_MODE_ENCODER_LVDS, NULL);
 
 	drm_connector_helper_add(&imx_ldb_ch->connector,
 			&imx_ldb_connector_helper_funcs);
diff --git a/drivers/gpu/drm/imx/imx-tve.c b/drivers/gpu/drm/imx/imx-tve.c
index e671ad369416..292349f0b132 100644
--- a/drivers/gpu/drm/imx/imx-tve.c
+++ b/drivers/gpu/drm/imx/imx-tve.c
@@ -360,24 +360,24 @@ static void imx_tve_encoder_disable(struct drm_encoder *encoder)
 	tve_disable(tve);
 }
 
-static struct drm_connector_funcs imx_tve_connector_funcs = {
+static const struct drm_connector_funcs imx_tve_connector_funcs = {
 	.dpms = drm_helper_connector_dpms,
 	.fill_modes = drm_helper_probe_single_connector_modes,
 	.detect = imx_tve_connector_detect,
 	.destroy = imx_drm_connector_destroy,
 };
 
-static struct drm_connector_helper_funcs imx_tve_connector_helper_funcs = {
+static const struct drm_connector_helper_funcs imx_tve_connector_helper_funcs = {
 	.get_modes = imx_tve_connector_get_modes,
 	.best_encoder = imx_tve_connector_best_encoder,
 	.mode_valid = imx_tve_connector_mode_valid,
 };
 
-static struct drm_encoder_funcs imx_tve_encoder_funcs = {
+static const struct drm_encoder_funcs imx_tve_encoder_funcs = {
 	.destroy = imx_drm_encoder_destroy,
 };
 
-static struct drm_encoder_helper_funcs imx_tve_encoder_helper_funcs = {
+static const struct drm_encoder_helper_funcs imx_tve_encoder_helper_funcs = {
 	.dpms = imx_tve_encoder_dpms,
 	.mode_fixup = imx_tve_encoder_mode_fixup,
 	.prepare = imx_tve_encoder_prepare,
@@ -508,7 +508,7 @@ static int imx_tve_register(struct drm_device *drm, struct imx_tve *tve)
 
 	drm_encoder_helper_add(&tve->encoder, &imx_tve_encoder_helper_funcs);
 	drm_encoder_init(drm, &tve->encoder, &imx_tve_encoder_funcs,
-			 encoder_type);
+			 encoder_type, NULL);
 
 	drm_connector_helper_add(&tve->connector,
 			&imx_tve_connector_helper_funcs);
@@ -721,6 +721,7 @@ static const struct of_device_id imx_tve_dt_ids[] = {
 	{ .compatible = "fsl,imx53-tve", },
 	{ /* sentinel */ }
 };
+MODULE_DEVICE_TABLE(of, imx_tve_dt_ids);
 
 static struct platform_driver imx_tve_driver = {
 	.probe		= imx_tve_probe,
diff --git a/drivers/gpu/drm/imx/ipuv3-crtc.c b/drivers/gpu/drm/imx/ipuv3-crtc.c
index 7bc8301fafff..30a57185bdb4 100644
--- a/drivers/gpu/drm/imx/ipuv3-crtc.c
+++ b/drivers/gpu/drm/imx/ipuv3-crtc.c
@@ -212,7 +212,8 @@ static void ipu_crtc_handle_pageflip(struct ipu_crtc *ipu_crtc)
 
 	spin_lock_irqsave(&drm->event_lock, flags);
 	if (ipu_crtc->page_flip_event)
-		drm_send_vblank_event(drm, -1, ipu_crtc->page_flip_event);
+		drm_crtc_send_vblank_event(&ipu_crtc->base,
+					   ipu_crtc->page_flip_event);
 	ipu_crtc->page_flip_event = NULL;
 	imx_drm_crtc_vblank_put(ipu_crtc->imx_crtc);
 	spin_unlock_irqrestore(&drm->event_lock, flags);
@@ -269,7 +270,7 @@ static void ipu_crtc_commit(struct drm_crtc *crtc)
 	ipu_fb_enable(ipu_crtc);
 }
 
-static struct drm_crtc_helper_funcs ipu_helper_funcs = {
+static const struct drm_crtc_helper_funcs ipu_helper_funcs = {
 	.dpms = ipu_crtc_dpms,
 	.mode_fixup = ipu_crtc_mode_fixup,
 	.mode_set = ipu_crtc_mode_set,
@@ -349,7 +350,6 @@ static int ipu_crtc_init(struct ipu_crtc *ipu_crtc,
 	struct ipu_soc *ipu = dev_get_drvdata(ipu_crtc->dev->parent);
 	int dp = -EINVAL;
 	int ret;
-	int id;
 
 	ret = ipu_get_resources(ipu_crtc, pdata);
 	if (ret) {
@@ -358,18 +358,23 @@ static int ipu_crtc_init(struct ipu_crtc *ipu_crtc,
 		return ret;
 	}
 
+	if (pdata->dp >= 0)
+		dp = IPU_DP_FLOW_SYNC_BG;
+	ipu_crtc->plane[0] = ipu_plane_init(drm, ipu, pdata->dma[0], dp, 0,
+					    DRM_PLANE_TYPE_PRIMARY);
+	if (IS_ERR(ipu_crtc->plane[0])) {
+		ret = PTR_ERR(ipu_crtc->plane[0]);
+		goto err_put_resources;
+	}
+
 	ret = imx_drm_add_crtc(drm, &ipu_crtc->base, &ipu_crtc->imx_crtc,
-			&ipu_crtc_helper_funcs, ipu_crtc->dev->of_node);
+			&ipu_crtc->plane[0]->base, &ipu_crtc_helper_funcs,
+			ipu_crtc->dev->of_node);
 	if (ret) {
 		dev_err(ipu_crtc->dev, "adding crtc failed with %d.\n", ret);
 		goto err_put_resources;
 	}
 
-	if (pdata->dp >= 0)
-		dp = IPU_DP_FLOW_SYNC_BG;
-	id = imx_drm_crtc_id(ipu_crtc->imx_crtc);
-	ipu_crtc->plane[0] = ipu_plane_init(ipu_crtc->base.dev, ipu,
-					    pdata->dma[0], dp, BIT(id), true);
 	ret = ipu_plane_get_resources(ipu_crtc->plane[0]);
 	if (ret) {
 		dev_err(ipu_crtc->dev, "getting plane 0 resources failed with %d.\n",
@@ -379,10 +384,10 @@ static int ipu_crtc_init(struct ipu_crtc *ipu_crtc,
 
 	/* If this crtc is using the DP, add an overlay plane */
 	if (pdata->dp >= 0 && pdata->dma[1] > 0) {
-		ipu_crtc->plane[1] = ipu_plane_init(ipu_crtc->base.dev, ipu,
-						    pdata->dma[1],
-						    IPU_DP_FLOW_SYNC_FG,
-						    BIT(id), false);
+		ipu_crtc->plane[1] = ipu_plane_init(drm, ipu, pdata->dma[1],
+						IPU_DP_FLOW_SYNC_FG,
+						drm_crtc_mask(&ipu_crtc->base),
+						DRM_PLANE_TYPE_OVERLAY);
 		if (IS_ERR(ipu_crtc->plane[1]))
 			ipu_crtc->plane[1] = NULL;
 	}
@@ -407,28 +412,6 @@ err_put_resources:
 	return ret;
 }
 
-static struct device_node *ipu_drm_get_port_by_id(struct device_node *parent,
-						  int port_id)
-{
-	struct device_node *port;
-	int id, ret;
-
-	port = of_get_child_by_name(parent, "port");
-	while (port) {
-		ret = of_property_read_u32(port, "reg", &id);
-		if (!ret && id == port_id)
-			return port;
-
-		do {
-			port = of_get_next_child(parent, port);
-			if (!port)
-				return NULL;
-		} while (of_node_cmp(port->name, "port"));
-	}
-
-	return NULL;
-}
-
 static int ipu_drm_bind(struct device *dev, struct device *master, void *data)
 {
 	struct ipu_client_platformdata *pdata = dev->platform_data;
@@ -470,23 +453,11 @@ static const struct component_ops ipu_crtc_ops = {
 static int ipu_drm_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
-	struct ipu_client_platformdata *pdata = dev->platform_data;
 	int ret;
 
 	if (!dev->platform_data)
 		return -EINVAL;
 
-	if (!dev->of_node) {
-		/* Associate crtc device with the corresponding DI port node */
-		dev->of_node = ipu_drm_get_port_by_id(dev->parent->of_node,
-						      pdata->di + 2);
-		if (!dev->of_node) {
-			dev_err(dev, "missing port@%d node in %s\n",
-				pdata->di + 2, dev->parent->of_node->full_name);
-			return -ENODEV;
-		}
-	}
-
 	ret = dma_set_coherent_mask(dev, DMA_BIT_MASK(32));
 	if (ret)
 		return ret;
diff --git a/drivers/gpu/drm/imx/ipuv3-plane.c b/drivers/gpu/drm/imx/ipuv3-plane.c
index 575f4c84388f..591ba2f1ae03 100644
--- a/drivers/gpu/drm/imx/ipuv3-plane.c
+++ b/drivers/gpu/drm/imx/ipuv3-plane.c
@@ -381,7 +381,7 @@ static struct drm_plane_funcs ipu_plane_funcs = {
 
 struct ipu_plane *ipu_plane_init(struct drm_device *dev, struct ipu_soc *ipu,
 				 int dma, int dp, unsigned int possible_crtcs,
-				 bool priv)
+				 enum drm_plane_type type)
 {
 	struct ipu_plane *ipu_plane;
 	int ret;
@@ -399,10 +399,10 @@ struct ipu_plane *ipu_plane_init(struct drm_device *dev, struct ipu_soc *ipu,
 	ipu_plane->dma = dma;
 	ipu_plane->dp_flow = dp;
 
-	ret = drm_plane_init(dev, &ipu_plane->base, possible_crtcs,
-			     &ipu_plane_funcs, ipu_plane_formats,
-			     ARRAY_SIZE(ipu_plane_formats),
-			     priv);
+	ret = drm_universal_plane_init(dev, &ipu_plane->base, possible_crtcs,
+				       &ipu_plane_funcs, ipu_plane_formats,
+				       ARRAY_SIZE(ipu_plane_formats), type,
+				       NULL);
 	if (ret) {
 		DRM_ERROR("failed to initialize plane\n");
 		kfree(ipu_plane);
diff --git a/drivers/gpu/drm/imx/ipuv3-plane.h b/drivers/gpu/drm/imx/ipuv3-plane.h
index 9b5eff18f5b8..3a443b413c60 100644
--- a/drivers/gpu/drm/imx/ipuv3-plane.h
+++ b/drivers/gpu/drm/imx/ipuv3-plane.h
@@ -34,7 +34,7 @@ struct ipu_plane {
 
 struct ipu_plane *ipu_plane_init(struct drm_device *dev, struct ipu_soc *ipu,
 				 int dma, int dp, unsigned int possible_crtcs,
-				 bool priv);
+				 enum drm_plane_type type);
 
 /* Init IDMAC, DMFC, DP */
 int ipu_plane_mode_set(struct ipu_plane *plane, struct drm_crtc *crtc,
diff --git a/drivers/gpu/drm/imx/parallel-display.c b/drivers/gpu/drm/imx/parallel-display.c
index b4deb9cf9d71..b74bf8e334f5 100644
--- a/drivers/gpu/drm/imx/parallel-display.c
+++ b/drivers/gpu/drm/imx/parallel-display.c
@@ -54,7 +54,11 @@ static int imx_pd_connector_get_modes(struct drm_connector *connector)
 
 	if (imxpd->panel && imxpd->panel->funcs &&
 	    imxpd->panel->funcs->get_modes) {
+		struct drm_display_info *di = &connector->display_info;
+
 		num_modes = imxpd->panel->funcs->get_modes(imxpd->panel);
+		if (!imxpd->bus_format && di->num_bus_formats)
+			imxpd->bus_format = di->bus_formats[0];
 		if (num_modes > 0)
 			return num_modes;
 	}
@@ -144,23 +148,23 @@ static void imx_pd_encoder_disable(struct drm_encoder *encoder)
 	drm_panel_unprepare(imxpd->panel);
 }
 
-static struct drm_connector_funcs imx_pd_connector_funcs = {
+static const struct drm_connector_funcs imx_pd_connector_funcs = {
 	.dpms = drm_helper_connector_dpms,
 	.fill_modes = drm_helper_probe_single_connector_modes,
 	.detect = imx_pd_connector_detect,
 	.destroy = imx_drm_connector_destroy,
 };
 
-static struct drm_connector_helper_funcs imx_pd_connector_helper_funcs = {
+static const struct drm_connector_helper_funcs imx_pd_connector_helper_funcs = {
 	.get_modes = imx_pd_connector_get_modes,
 	.best_encoder = imx_pd_connector_best_encoder,
 };
 
-static struct drm_encoder_funcs imx_pd_encoder_funcs = {
+static const struct drm_encoder_funcs imx_pd_encoder_funcs = {
 	.destroy = imx_drm_encoder_destroy,
 };
 
-static struct drm_encoder_helper_funcs imx_pd_encoder_helper_funcs = {
+static const struct drm_encoder_helper_funcs imx_pd_encoder_helper_funcs = {
 	.dpms = imx_pd_encoder_dpms,
 	.mode_fixup = imx_pd_encoder_mode_fixup,
 	.prepare = imx_pd_encoder_prepare,
@@ -188,7 +192,7 @@ static int imx_pd_register(struct drm_device *drm,
 
 	drm_encoder_helper_add(&imxpd->encoder, &imx_pd_encoder_helper_funcs);
 	drm_encoder_init(drm, &imxpd->encoder, &imx_pd_encoder_funcs,
-			 DRM_MODE_ENCODER_NONE);
+			 DRM_MODE_ENCODER_NONE, NULL);
 
 	drm_connector_helper_add(&imxpd->connector,
 			&imx_pd_connector_helper_funcs);
diff --git a/drivers/gpu/drm/mgag200/mgag200_mode.c b/drivers/gpu/drm/mgag200/mgag200_mode.c
index c99d3fe12881..19c18b7af28a 100644
--- a/drivers/gpu/drm/mgag200/mgag200_mode.c
+++ b/drivers/gpu/drm/mgag200/mgag200_mode.c
@@ -1538,7 +1538,7 @@ static struct drm_encoder *mga_encoder_init(struct drm_device *dev)
 	encoder->possible_crtcs = 0x1;
 
 	drm_encoder_init(dev, encoder, &mga_encoder_encoder_funcs,
-			 DRM_MODE_ENCODER_DAC);
+			 DRM_MODE_ENCODER_DAC, NULL);
 	drm_encoder_helper_add(encoder, &mga_encoder_helper_funcs);
 
 	return encoder;
@@ -1684,13 +1684,13 @@ static void mga_connector_destroy(struct drm_connector *connector)
 	kfree(connector);
 }
 
-struct drm_connector_helper_funcs mga_vga_connector_helper_funcs = {
+static const struct drm_connector_helper_funcs mga_vga_connector_helper_funcs = {
 	.get_modes = mga_vga_get_modes,
 	.mode_valid = mga_vga_mode_valid,
 	.best_encoder = mga_connector_best_encoder,
 };
 
-struct drm_connector_funcs mga_vga_connector_funcs = {
+static const struct drm_connector_funcs mga_vga_connector_funcs = {
 	.dpms = drm_helper_connector_dpms,
 	.detect = mga_vga_detect,
 	.fill_modes = drm_helper_probe_single_connector_modes,
diff --git a/drivers/gpu/drm/msm/Kconfig b/drivers/gpu/drm/msm/Kconfig
index 84d3ec98e6b9..215495c2780c 100644
--- a/drivers/gpu/drm/msm/Kconfig
+++ b/drivers/gpu/drm/msm/Kconfig
@@ -54,3 +54,11 @@ config DRM_MSM_DSI_20NM_PHY
 	default y
 	help
 	  Choose this option if the 20nm DSI PHY is used on the platform.
+
+config DRM_MSM_DSI_28NM_8960_PHY
+	bool "Enable DSI 28nm 8960 PHY driver in MSM DRM"
+	depends on DRM_MSM_DSI
+	default y
+	help
+	  Choose this option if the 28nm DSI PHY 8960 variant is used on the
+	  platform.
diff --git a/drivers/gpu/drm/msm/Makefile b/drivers/gpu/drm/msm/Makefile
index 1c90290be716..065ad4138799 100644
--- a/drivers/gpu/drm/msm/Makefile
+++ b/drivers/gpu/drm/msm/Makefile
@@ -54,6 +54,7 @@ msm-$(CONFIG_DRM_FBDEV_EMULATION) += msm_fbdev.o
 msm-$(CONFIG_COMMON_CLK) += mdp/mdp4/mdp4_lvds_pll.o
 
 msm-$(CONFIG_DRM_MSM_DSI) += dsi/dsi.o \
+			mdp/mdp4/mdp4_dsi_encoder.o \
 			dsi/dsi_cfg.o \
 			dsi/dsi_host.o \
 			dsi/dsi_manager.o \
@@ -62,10 +63,12 @@ msm-$(CONFIG_DRM_MSM_DSI) += dsi/dsi.o \
 
 msm-$(CONFIG_DRM_MSM_DSI_28NM_PHY) += dsi/phy/dsi_phy_28nm.o
 msm-$(CONFIG_DRM_MSM_DSI_20NM_PHY) += dsi/phy/dsi_phy_20nm.o
+msm-$(CONFIG_DRM_MSM_DSI_28NM_8960_PHY) += dsi/phy/dsi_phy_28nm_8960.o
 
 ifeq ($(CONFIG_DRM_MSM_DSI_PLL),y)
 msm-y += dsi/pll/dsi_pll.o
 msm-$(CONFIG_DRM_MSM_DSI_28NM_PHY) += dsi/pll/dsi_pll_28nm.o
+msm-$(CONFIG_DRM_MSM_DSI_28NM_8960_PHY) += dsi/pll/dsi_pll_28nm_8960.o
 endif
 
 obj-$(CONFIG_DRM_MSM)	+= msm.o
diff --git a/drivers/gpu/drm/msm/adreno/adreno_device.c b/drivers/gpu/drm/msm/adreno/adreno_device.c
index 1ea2df524fac..950d27d26b30 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_device.c
+++ b/drivers/gpu/drm/msm/adreno/adreno_device.c
@@ -19,10 +19,6 @@
 
 #include "adreno_gpu.h"
 
-#if defined(DOWNSTREAM_CONFIG_MSM_BUS_SCALING) && !defined(CONFIG_OF)
-#  include <mach/kgsl.h>
-#endif
-
 #define ANY_ID 0xff
 
 bool hang_debug = false;
@@ -168,7 +164,6 @@ static void set_gpu_pdev(struct drm_device *dev,
 static int adreno_bind(struct device *dev, struct device *master, void *data)
 {
 	static struct adreno_platform_config config = {};
-#ifdef CONFIG_OF
 	struct device_node *child, *node = dev->of_node;
 	u32 val;
 	int ret;
@@ -205,53 +200,6 @@ static int adreno_bind(struct device *dev, struct device *master, void *data)
 		return -ENXIO;
 	}
 
-#else
-	struct kgsl_device_platform_data *pdata = dev->platform_data;
-	uint32_t version = socinfo_get_version();
-	if (cpu_is_apq8064ab()) {
-		config.fast_rate = 450000000;
-		config.slow_rate = 27000000;
-		config.bus_freq  = 4;
-		config.rev = ADRENO_REV(3, 2, 1, 0);
-	} else if (cpu_is_apq8064()) {
-		config.fast_rate = 400000000;
-		config.slow_rate = 27000000;
-		config.bus_freq  = 4;
-
-		if (SOCINFO_VERSION_MAJOR(version) == 2)
-			config.rev = ADRENO_REV(3, 2, 0, 2);
-		else if ((SOCINFO_VERSION_MAJOR(version) == 1) &&
-				(SOCINFO_VERSION_MINOR(version) == 1))
-			config.rev = ADRENO_REV(3, 2, 0, 1);
-		else
-			config.rev = ADRENO_REV(3, 2, 0, 0);
-
-	} else if (cpu_is_msm8960ab()) {
-		config.fast_rate = 400000000;
-		config.slow_rate = 320000000;
-		config.bus_freq  = 4;
-
-		if (SOCINFO_VERSION_MINOR(version) == 0)
-			config.rev = ADRENO_REV(3, 2, 1, 0);
-		else
-			config.rev = ADRENO_REV(3, 2, 1, 1);
-
-	} else if (cpu_is_msm8930()) {
-		config.fast_rate = 400000000;
-		config.slow_rate = 27000000;
-		config.bus_freq  = 3;
-
-		if ((SOCINFO_VERSION_MAJOR(version) == 1) &&
-			(SOCINFO_VERSION_MINOR(version) == 2))
-			config.rev = ADRENO_REV(3, 0, 5, 2);
-		else
-			config.rev = ADRENO_REV(3, 0, 5, 0);
-
-	}
-#  ifdef DOWNSTREAM_CONFIG_MSM_BUS_SCALING
-	config.bus_scale_table = pdata->bus_scale_table;
-#  endif
-#endif
 	dev->platform_data = &config;
 	set_gpu_pdev(dev_get_drvdata(master), to_platform_device(dev));
 	return 0;
diff --git a/drivers/gpu/drm/msm/dsi/dsi.h b/drivers/gpu/drm/msm/dsi/dsi.h
index 5f5a3732cdf6..749fbb28ec3d 100644
--- a/drivers/gpu/drm/msm/dsi/dsi.h
+++ b/drivers/gpu/drm/msm/dsi/dsi.h
@@ -31,10 +31,12 @@ enum msm_dsi_phy_type {
 	MSM_DSI_PHY_28NM_HPM,
 	MSM_DSI_PHY_28NM_LP,
 	MSM_DSI_PHY_20NM,
+	MSM_DSI_PHY_28NM_8960,
 	MSM_DSI_PHY_MAX
 };
 
 #define DSI_DEV_REGULATOR_MAX	8
+#define DSI_BUS_CLK_MAX		4
 
 /* Regulators for DSI devices */
 struct dsi_reg_entry {
@@ -89,7 +91,7 @@ int msm_dsi_manager_phy_enable(int id,
 		u32 *clk_pre, u32 *clk_post);
 void msm_dsi_manager_phy_disable(int id);
 int msm_dsi_manager_cmd_xfer(int id, const struct mipi_dsi_msg *msg);
-bool msm_dsi_manager_cmd_xfer_trigger(int id, u32 iova, u32 len);
+bool msm_dsi_manager_cmd_xfer_trigger(int id, u32 dma_base, u32 len);
 int msm_dsi_manager_register(struct msm_dsi *msm_dsi);
 void msm_dsi_manager_unregister(struct msm_dsi *msm_dsi);
 
@@ -143,7 +145,7 @@ int msm_dsi_host_cmd_tx(struct mipi_dsi_host *host,
 int msm_dsi_host_cmd_rx(struct mipi_dsi_host *host,
 					const struct mipi_dsi_msg *msg);
 void msm_dsi_host_cmd_xfer_commit(struct mipi_dsi_host *host,
-					u32 iova, u32 len);
+					u32 dma_base, u32 len);
 int msm_dsi_host_enable(struct mipi_dsi_host *host);
 int msm_dsi_host_disable(struct mipi_dsi_host *host);
 int msm_dsi_host_power_on(struct mipi_dsi_host *host);
diff --git a/drivers/gpu/drm/msm/dsi/dsi_cfg.c b/drivers/gpu/drm/msm/dsi/dsi_cfg.c
index 5872d5e5934f..2a827d8093a2 100644
--- a/drivers/gpu/drm/msm/dsi/dsi_cfg.c
+++ b/drivers/gpu/drm/msm/dsi/dsi_cfg.c
@@ -13,9 +13,26 @@
 
 #include "dsi_cfg.h"
 
-/* DSI v2 has not been supported by now */
-static const struct msm_dsi_config dsi_v2_cfg = {
+static const char * const dsi_v2_bus_clk_names[] = {
+	"core_mmss_clk", "iface_clk", "bus_clk",
+};
+
+static const struct msm_dsi_config apq8064_dsi_cfg = {
 	.io_offset = 0,
+	.reg_cfg = {
+		.num = 3,
+		.regs = {
+			{"vdda", 1200000, 1200000, 100000, 100},
+			{"avdd", 3000000, 3000000, 110000, 100},
+			{"vddio", 1800000, 1800000, 100000, 100},
+		},
+	},
+	.bus_clk_names = dsi_v2_bus_clk_names,
+	.num_bus_clks = ARRAY_SIZE(dsi_v2_bus_clk_names),
+};
+
+static const char * const dsi_6g_bus_clk_names[] = {
+	"mdp_core_clk", "iface_clk", "bus_clk", "core_mmss_clk",
 };
 
 static const struct msm_dsi_config msm8974_apq8084_dsi_cfg = {
@@ -29,6 +46,12 @@ static const struct msm_dsi_config msm8974_apq8084_dsi_cfg = {
 			{"vddio", 1800000, 1800000, 100000, 100},
 		},
 	},
+	.bus_clk_names = dsi_6g_bus_clk_names,
+	.num_bus_clks = ARRAY_SIZE(dsi_6g_bus_clk_names),
+};
+
+static const char * const dsi_8916_bus_clk_names[] = {
+	"mdp_core_clk", "iface_clk", "bus_clk",
 };
 
 static const struct msm_dsi_config msm8916_dsi_cfg = {
@@ -42,6 +65,8 @@ static const struct msm_dsi_config msm8916_dsi_cfg = {
 			{"vddio", 1800000, 1800000, 100000, 100},
 		},
 	},
+	.bus_clk_names = dsi_8916_bus_clk_names,
+	.num_bus_clks = ARRAY_SIZE(dsi_8916_bus_clk_names),
 };
 
 static const struct msm_dsi_config msm8994_dsi_cfg = {
@@ -57,11 +82,13 @@ static const struct msm_dsi_config msm8994_dsi_cfg = {
 			{"lab_reg", -1, -1, -1, -1},
 			{"ibb_reg", -1, -1, -1, -1},
 		},
-	}
+	},
+	.bus_clk_names = dsi_6g_bus_clk_names,
+	.num_bus_clks = ARRAY_SIZE(dsi_6g_bus_clk_names),
 };
 
 static const struct msm_dsi_cfg_handler dsi_cfg_handlers[] = {
-	{MSM_DSI_VER_MAJOR_V2, U32_MAX, &dsi_v2_cfg},
+	{MSM_DSI_VER_MAJOR_V2, MSM_DSI_V2_VER_MINOR_8064, &apq8064_dsi_cfg},
 	{MSM_DSI_VER_MAJOR_6G, MSM_DSI_6G_VER_MINOR_V1_0,
 						&msm8974_apq8084_dsi_cfg},
 	{MSM_DSI_VER_MAJOR_6G, MSM_DSI_6G_VER_MINOR_V1_1,
diff --git a/drivers/gpu/drm/msm/dsi/dsi_cfg.h b/drivers/gpu/drm/msm/dsi/dsi_cfg.h
index 4cf887240177..a68c836744a3 100644
--- a/drivers/gpu/drm/msm/dsi/dsi_cfg.h
+++ b/drivers/gpu/drm/msm/dsi/dsi_cfg.h
@@ -25,11 +25,15 @@
 #define MSM_DSI_6G_VER_MINOR_V1_3	0x10030000
 #define MSM_DSI_6G_VER_MINOR_V1_3_1	0x10030001
 
+#define MSM_DSI_V2_VER_MINOR_8064	0x0
+
 #define DSI_6G_REG_SHIFT	4
 
 struct msm_dsi_config {
 	u32 io_offset;
 	struct dsi_reg_config reg_cfg;
+	const char * const *bus_clk_names;
+	const int num_bus_clks;
 };
 
 struct msm_dsi_cfg_handler {
diff --git a/drivers/gpu/drm/msm/dsi/dsi_host.c b/drivers/gpu/drm/msm/dsi/dsi_host.c
index 4c49868efcda..48f9967b4a1b 100644
--- a/drivers/gpu/drm/msm/dsi/dsi_host.c
+++ b/drivers/gpu/drm/msm/dsi/dsi_host.c
@@ -24,26 +24,36 @@
 #include <linux/of_graph.h>
 #include <linux/regulator/consumer.h>
 #include <linux/spinlock.h>
+#include <linux/mfd/syscon.h>
+#include <linux/regmap.h>
 #include <video/mipi_display.h>
 
 #include "dsi.h"
 #include "dsi.xml.h"
+#include "sfpb.xml.h"
 #include "dsi_cfg.h"
 
 static int dsi_get_version(const void __iomem *base, u32 *major, u32 *minor)
 {
 	u32 ver;
-	u32 ver_6g;
 
 	if (!major || !minor)
 		return -EINVAL;
 
-	/* From DSI6G(v3), addition of a 6G_HW_VERSION register at offset 0
+	/*
+	 * From DSI6G(v3), addition of a 6G_HW_VERSION register at offset 0
 	 * makes all other registers 4-byte shifted down.
+	 *
+	 * In order to identify between DSI6G(v3) and beyond, and DSIv2 and
+	 * older, we read the DSI_VERSION register without any shift(offset
+	 * 0x1f0). In the case of DSIv2, this hast to be a non-zero value. In
+	 * the case of DSI6G, this has to be zero (the offset points to a
+	 * scratch register which we never touch)
 	 */
-	ver_6g = msm_readl(base + REG_DSI_6G_HW_VERSION);
-	if (ver_6g == 0) {
-		ver = msm_readl(base + REG_DSI_VERSION);
+
+	ver = msm_readl(base + REG_DSI_VERSION);
+	if (ver) {
+		/* older dsi host, there is no register shift */
 		ver = FIELD(ver, DSI_VERSION_MAJOR);
 		if (ver <= MSM_DSI_VER_MAJOR_V2) {
 			/* old versions */
@@ -54,12 +64,17 @@ static int dsi_get_version(const void __iomem *base, u32 *major, u32 *minor)
 			return -EINVAL;
 		}
 	} else {
+		/*
+		 * newer host, offset 0 has 6G_HW_VERSION, the rest of the
+		 * registers are shifted down, read DSI_VERSION again with
+		 * the shifted offset
+		 */
 		ver = msm_readl(base + DSI_6G_REG_SHIFT + REG_DSI_VERSION);
 		ver = FIELD(ver, DSI_VERSION_MAJOR);
 		if (ver == MSM_DSI_VER_MAJOR_6G) {
 			/* 6G version */
 			*major = ver;
-			*minor = ver_6g;
+			*minor = msm_readl(base + REG_DSI_6G_HW_VERSION);
 			return 0;
 		} else {
 			return -EINVAL;
@@ -91,10 +106,9 @@ struct msm_dsi_host {
 
 	void __iomem *ctrl_base;
 	struct regulator_bulk_data supplies[DSI_DEV_REGULATOR_MAX];
-	struct clk *mdp_core_clk;
-	struct clk *ahb_clk;
-	struct clk *axi_clk;
-	struct clk *mmss_misc_ahb_clk;
+
+	struct clk *bus_clks[DSI_BUS_CLK_MAX];
+
 	struct clk *byte_clk;
 	struct clk *esc_clk;
 	struct clk *pixel_clk;
@@ -102,6 +116,14 @@ struct msm_dsi_host {
 	struct clk *pixel_clk_src;
 
 	u32 byte_clk_rate;
+	u32 esc_clk_rate;
+
+	/* DSI v2 specific clocks */
+	struct clk *src_clk;
+	struct clk *esc_clk_src;
+	struct clk *dsi_clk_src;
+
+	u32 src_clk_rate;
 
 	struct gpio_desc *disp_en_gpio;
 	struct gpio_desc *te_gpio;
@@ -119,9 +141,19 @@ struct msm_dsi_host {
 	struct work_struct err_work;
 	struct workqueue_struct *workqueue;
 
+	/* DSI 6G TX buffer*/
 	struct drm_gem_object *tx_gem_obj;
+
+	/* DSI v2 TX buffer */
+	void *tx_buf;
+	dma_addr_t tx_buf_paddr;
+
+	int tx_size;
+
 	u8 *rx_buf;
 
+	struct regmap *sfpb;
+
 	struct drm_display_mode *mode;
 
 	/* connected device info */
@@ -165,21 +197,31 @@ static const struct msm_dsi_cfg_handler *dsi_get_config(
 						struct msm_dsi_host *msm_host)
 {
 	const struct msm_dsi_cfg_handler *cfg_hnd = NULL;
+	struct device *dev = &msm_host->pdev->dev;
 	struct regulator *gdsc_reg;
+	struct clk *ahb_clk;
 	int ret;
 	u32 major = 0, minor = 0;
 
-	gdsc_reg = regulator_get(&msm_host->pdev->dev, "gdsc");
+	gdsc_reg = regulator_get(dev, "gdsc");
 	if (IS_ERR(gdsc_reg)) {
 		pr_err("%s: cannot get gdsc\n", __func__);
 		goto exit;
 	}
+
+	ahb_clk = clk_get(dev, "iface_clk");
+	if (IS_ERR(ahb_clk)) {
+		pr_err("%s: cannot get interface clock\n", __func__);
+		goto put_gdsc;
+	}
+
 	ret = regulator_enable(gdsc_reg);
 	if (ret) {
 		pr_err("%s: unable to enable gdsc\n", __func__);
-		goto put_gdsc;
+		goto put_clk;
 	}
-	ret = clk_prepare_enable(msm_host->ahb_clk);
+
+	ret = clk_prepare_enable(ahb_clk);
 	if (ret) {
 		pr_err("%s: unable to enable ahb_clk\n", __func__);
 		goto disable_gdsc;
@@ -196,9 +238,11 @@ static const struct msm_dsi_cfg_handler *dsi_get_config(
 	DBG("%s: Version %x:%x\n", __func__, major, minor);
 
 disable_clks:
-	clk_disable_unprepare(msm_host->ahb_clk);
+	clk_disable_unprepare(ahb_clk);
 disable_gdsc:
 	regulator_disable(gdsc_reg);
+put_clk:
+	clk_put(ahb_clk);
 put_gdsc:
 	regulator_put(gdsc_reg);
 exit:
@@ -295,40 +339,23 @@ static int dsi_regulator_init(struct msm_dsi_host *msm_host)
 static int dsi_clk_init(struct msm_dsi_host *msm_host)
 {
 	struct device *dev = &msm_host->pdev->dev;
-	int ret = 0;
-
-	msm_host->mdp_core_clk = devm_clk_get(dev, "mdp_core_clk");
-	if (IS_ERR(msm_host->mdp_core_clk)) {
-		ret = PTR_ERR(msm_host->mdp_core_clk);
-		pr_err("%s: Unable to get mdp core clk. ret=%d\n",
-			__func__, ret);
-		goto exit;
-	}
-
-	msm_host->ahb_clk = devm_clk_get(dev, "iface_clk");
-	if (IS_ERR(msm_host->ahb_clk)) {
-		ret = PTR_ERR(msm_host->ahb_clk);
-		pr_err("%s: Unable to get mdss ahb clk. ret=%d\n",
-			__func__, ret);
-		goto exit;
-	}
-
-	msm_host->axi_clk = devm_clk_get(dev, "bus_clk");
-	if (IS_ERR(msm_host->axi_clk)) {
-		ret = PTR_ERR(msm_host->axi_clk);
-		pr_err("%s: Unable to get axi bus clk. ret=%d\n",
-			__func__, ret);
-		goto exit;
-	}
-
-	msm_host->mmss_misc_ahb_clk = devm_clk_get(dev, "core_mmss_clk");
-	if (IS_ERR(msm_host->mmss_misc_ahb_clk)) {
-		ret = PTR_ERR(msm_host->mmss_misc_ahb_clk);
-		pr_err("%s: Unable to get mmss misc ahb clk. ret=%d\n",
-			__func__, ret);
-		goto exit;
+	const struct msm_dsi_cfg_handler *cfg_hnd = msm_host->cfg_hnd;
+	const struct msm_dsi_config *cfg = cfg_hnd->cfg;
+	int i, ret = 0;
+
+	/* get bus clocks */
+	for (i = 0; i < cfg->num_bus_clks; i++) {
+		msm_host->bus_clks[i] = devm_clk_get(dev,
+						cfg->bus_clk_names[i]);
+		if (IS_ERR(msm_host->bus_clks[i])) {
+			ret = PTR_ERR(msm_host->bus_clks[i]);
+			pr_err("%s: Unable to get %s, ret = %d\n",
+				__func__, cfg->bus_clk_names[i], ret);
+			goto exit;
+		}
 	}
 
+	/* get link and source clocks */
 	msm_host->byte_clk = devm_clk_get(dev, "byte_clk");
 	if (IS_ERR(msm_host->byte_clk)) {
 		ret = PTR_ERR(msm_host->byte_clk);
@@ -356,80 +383,85 @@ static int dsi_clk_init(struct msm_dsi_host *msm_host)
 		goto exit;
 	}
 
-	msm_host->byte_clk_src = devm_clk_get(dev, "byte_clk_src");
-	if (IS_ERR(msm_host->byte_clk_src)) {
-		ret = PTR_ERR(msm_host->byte_clk_src);
+	msm_host->byte_clk_src = clk_get_parent(msm_host->byte_clk);
+	if (!msm_host->byte_clk_src) {
+		ret = -ENODEV;
 		pr_err("%s: can't find byte_clk_src. ret=%d\n", __func__, ret);
-		msm_host->byte_clk_src = NULL;
 		goto exit;
 	}
 
-	msm_host->pixel_clk_src = devm_clk_get(dev, "pixel_clk_src");
-	if (IS_ERR(msm_host->pixel_clk_src)) {
-		ret = PTR_ERR(msm_host->pixel_clk_src);
+	msm_host->pixel_clk_src = clk_get_parent(msm_host->pixel_clk);
+	if (!msm_host->pixel_clk_src) {
+		ret = -ENODEV;
 		pr_err("%s: can't find pixel_clk_src. ret=%d\n", __func__, ret);
-		msm_host->pixel_clk_src = NULL;
 		goto exit;
 	}
 
+	if (cfg_hnd->major == MSM_DSI_VER_MAJOR_V2) {
+		msm_host->src_clk = devm_clk_get(dev, "src_clk");
+		if (IS_ERR(msm_host->src_clk)) {
+			ret = PTR_ERR(msm_host->src_clk);
+			pr_err("%s: can't find dsi_src_clk. ret=%d\n",
+				__func__, ret);
+			msm_host->src_clk = NULL;
+			goto exit;
+		}
+
+		msm_host->esc_clk_src = clk_get_parent(msm_host->esc_clk);
+		if (!msm_host->esc_clk_src) {
+			ret = -ENODEV;
+			pr_err("%s: can't get esc_clk_src. ret=%d\n",
+				__func__, ret);
+			goto exit;
+		}
+
+		msm_host->dsi_clk_src = clk_get_parent(msm_host->src_clk);
+		if (!msm_host->dsi_clk_src) {
+			ret = -ENODEV;
+			pr_err("%s: can't get dsi_clk_src. ret=%d\n",
+				__func__, ret);
+		}
+	}
 exit:
 	return ret;
 }
 
 static int dsi_bus_clk_enable(struct msm_dsi_host *msm_host)
 {
-	int ret;
+	const struct msm_dsi_config *cfg = msm_host->cfg_hnd->cfg;
+	int i, ret;
 
 	DBG("id=%d", msm_host->id);
 
-	ret = clk_prepare_enable(msm_host->mdp_core_clk);
-	if (ret) {
-		pr_err("%s: failed to enable mdp_core_clock, %d\n",
-							 __func__, ret);
-		goto core_clk_err;
-	}
-
-	ret = clk_prepare_enable(msm_host->ahb_clk);
-	if (ret) {
-		pr_err("%s: failed to enable ahb clock, %d\n", __func__, ret);
-		goto ahb_clk_err;
-	}
-
-	ret = clk_prepare_enable(msm_host->axi_clk);
-	if (ret) {
-		pr_err("%s: failed to enable ahb clock, %d\n", __func__, ret);
-		goto axi_clk_err;
-	}
-
-	ret = clk_prepare_enable(msm_host->mmss_misc_ahb_clk);
-	if (ret) {
-		pr_err("%s: failed to enable mmss misc ahb clk, %d\n",
-			__func__, ret);
-		goto misc_ahb_clk_err;
+	for (i = 0; i < cfg->num_bus_clks; i++) {
+		ret = clk_prepare_enable(msm_host->bus_clks[i]);
+		if (ret) {
+			pr_err("%s: failed to enable bus clock %d ret %d\n",
+				__func__, i, ret);
+			goto err;
+		}
 	}
 
 	return 0;
+err:
+	for (; i > 0; i--)
+		clk_disable_unprepare(msm_host->bus_clks[i]);
 
-misc_ahb_clk_err:
-	clk_disable_unprepare(msm_host->axi_clk);
-axi_clk_err:
-	clk_disable_unprepare(msm_host->ahb_clk);
-ahb_clk_err:
-	clk_disable_unprepare(msm_host->mdp_core_clk);
-core_clk_err:
 	return ret;
 }
 
 static void dsi_bus_clk_disable(struct msm_dsi_host *msm_host)
 {
+	const struct msm_dsi_config *cfg = msm_host->cfg_hnd->cfg;
+	int i;
+
 	DBG("");
-	clk_disable_unprepare(msm_host->mmss_misc_ahb_clk);
-	clk_disable_unprepare(msm_host->axi_clk);
-	clk_disable_unprepare(msm_host->ahb_clk);
-	clk_disable_unprepare(msm_host->mdp_core_clk);
+
+	for (i = cfg->num_bus_clks - 1; i >= 0; i--)
+		clk_disable_unprepare(msm_host->bus_clks[i]);
 }
 
-static int dsi_link_clk_enable(struct msm_dsi_host *msm_host)
+static int dsi_link_clk_enable_6g(struct msm_dsi_host *msm_host)
 {
 	int ret;
 
@@ -476,11 +508,98 @@ error:
 	return ret;
 }
 
-static void dsi_link_clk_disable(struct msm_dsi_host *msm_host)
+static int dsi_link_clk_enable_v2(struct msm_dsi_host *msm_host)
 {
+	int ret;
+
+	DBG("Set clk rates: pclk=%d, byteclk=%d, esc_clk=%d, dsi_src_clk=%d",
+		msm_host->mode->clock, msm_host->byte_clk_rate,
+		msm_host->esc_clk_rate, msm_host->src_clk_rate);
+
+	ret = clk_set_rate(msm_host->byte_clk, msm_host->byte_clk_rate);
+	if (ret) {
+		pr_err("%s: Failed to set rate byte clk, %d\n", __func__, ret);
+		goto error;
+	}
+
+	ret = clk_set_rate(msm_host->esc_clk, msm_host->esc_clk_rate);
+	if (ret) {
+		pr_err("%s: Failed to set rate esc clk, %d\n", __func__, ret);
+		goto error;
+	}
+
+	ret = clk_set_rate(msm_host->src_clk, msm_host->src_clk_rate);
+	if (ret) {
+		pr_err("%s: Failed to set rate src clk, %d\n", __func__, ret);
+		goto error;
+	}
+
+	ret = clk_set_rate(msm_host->pixel_clk, msm_host->mode->clock * 1000);
+	if (ret) {
+		pr_err("%s: Failed to set rate pixel clk, %d\n", __func__, ret);
+		goto error;
+	}
+
+	ret = clk_prepare_enable(msm_host->byte_clk);
+	if (ret) {
+		pr_err("%s: Failed to enable dsi byte clk\n", __func__);
+		goto error;
+	}
+
+	ret = clk_prepare_enable(msm_host->esc_clk);
+	if (ret) {
+		pr_err("%s: Failed to enable dsi esc clk\n", __func__);
+		goto esc_clk_err;
+	}
+
+	ret = clk_prepare_enable(msm_host->src_clk);
+	if (ret) {
+		pr_err("%s: Failed to enable dsi src clk\n", __func__);
+		goto src_clk_err;
+	}
+
+	ret = clk_prepare_enable(msm_host->pixel_clk);
+	if (ret) {
+		pr_err("%s: Failed to enable dsi pixel clk\n", __func__);
+		goto pixel_clk_err;
+	}
+
+	return 0;
+
+pixel_clk_err:
+	clk_disable_unprepare(msm_host->src_clk);
+src_clk_err:
 	clk_disable_unprepare(msm_host->esc_clk);
-	clk_disable_unprepare(msm_host->pixel_clk);
+esc_clk_err:
 	clk_disable_unprepare(msm_host->byte_clk);
+error:
+	return ret;
+}
+
+static int dsi_link_clk_enable(struct msm_dsi_host *msm_host)
+{
+	const struct msm_dsi_cfg_handler *cfg_hnd = msm_host->cfg_hnd;
+
+	if (cfg_hnd->major == MSM_DSI_VER_MAJOR_6G)
+		return dsi_link_clk_enable_6g(msm_host);
+	else
+		return dsi_link_clk_enable_v2(msm_host);
+}
+
+static void dsi_link_clk_disable(struct msm_dsi_host *msm_host)
+{
+	const struct msm_dsi_cfg_handler *cfg_hnd = msm_host->cfg_hnd;
+
+	if (cfg_hnd->major == MSM_DSI_VER_MAJOR_6G) {
+		clk_disable_unprepare(msm_host->esc_clk);
+		clk_disable_unprepare(msm_host->pixel_clk);
+		clk_disable_unprepare(msm_host->byte_clk);
+	} else {
+		clk_disable_unprepare(msm_host->pixel_clk);
+		clk_disable_unprepare(msm_host->src_clk);
+		clk_disable_unprepare(msm_host->esc_clk);
+		clk_disable_unprepare(msm_host->byte_clk);
+	}
 }
 
 static int dsi_clk_ctrl(struct msm_dsi_host *msm_host, bool enable)
@@ -515,6 +634,7 @@ unlock_ret:
 static int dsi_calc_clk_rate(struct msm_dsi_host *msm_host)
 {
 	struct drm_display_mode *mode = msm_host->mode;
+	const struct msm_dsi_cfg_handler *cfg_hnd = msm_host->cfg_hnd;
 	u8 lanes = msm_host->lanes;
 	u32 bpp = dsi_get_bpp(msm_host->format);
 	u32 pclk_rate;
@@ -534,6 +654,47 @@ static int dsi_calc_clk_rate(struct msm_dsi_host *msm_host)
 
 	DBG("pclk=%d, bclk=%d", pclk_rate, msm_host->byte_clk_rate);
 
+	msm_host->esc_clk_rate = clk_get_rate(msm_host->esc_clk);
+
+	if (cfg_hnd->major == MSM_DSI_VER_MAJOR_V2) {
+		unsigned int esc_mhz, esc_div;
+		unsigned long byte_mhz;
+
+		msm_host->src_clk_rate = (pclk_rate * bpp) / 8;
+
+		/*
+		 * esc clock is byte clock followed by a 4 bit divider,
+		 * we need to find an escape clock frequency within the
+		 * mipi DSI spec range within the maximum divider limit
+		 * We iterate here between an escape clock frequencey
+		 * between 20 Mhz to 5 Mhz and pick up the first one
+		 * that can be supported by our divider
+		 */
+
+		byte_mhz = msm_host->byte_clk_rate / 1000000;
+
+		for (esc_mhz = 20; esc_mhz >= 5; esc_mhz--) {
+			esc_div = DIV_ROUND_UP(byte_mhz, esc_mhz);
+
+			/*
+			 * TODO: Ideally, we shouldn't know what sort of divider
+			 * is available in mmss_cc, we're just assuming that
+			 * it'll always be a 4 bit divider. Need to come up with
+			 * a better way here.
+			 */
+			if (esc_div >= 1 && esc_div <= 16)
+				break;
+		}
+
+		if (esc_mhz < 5)
+			return -EINVAL;
+
+		msm_host->esc_clk_rate = msm_host->byte_clk_rate / esc_div;
+
+		DBG("esc=%d, src=%d", msm_host->esc_clk_rate,
+			msm_host->src_clk_rate);
+	}
+
 	return 0;
 }
 
@@ -835,29 +996,46 @@ static void dsi_wait4video_eng_busy(struct msm_dsi_host *msm_host)
 static int dsi_tx_buf_alloc(struct msm_dsi_host *msm_host, int size)
 {
 	struct drm_device *dev = msm_host->dev;
+	const struct msm_dsi_cfg_handler *cfg_hnd = msm_host->cfg_hnd;
 	int ret;
 	u32 iova;
 
-	mutex_lock(&dev->struct_mutex);
-	msm_host->tx_gem_obj = msm_gem_new(dev, size, MSM_BO_UNCACHED);
-	if (IS_ERR(msm_host->tx_gem_obj)) {
-		ret = PTR_ERR(msm_host->tx_gem_obj);
-		pr_err("%s: failed to allocate gem, %d\n", __func__, ret);
-		msm_host->tx_gem_obj = NULL;
+	if (cfg_hnd->major == MSM_DSI_VER_MAJOR_6G) {
+		mutex_lock(&dev->struct_mutex);
+		msm_host->tx_gem_obj = msm_gem_new(dev, size, MSM_BO_UNCACHED);
+		if (IS_ERR(msm_host->tx_gem_obj)) {
+			ret = PTR_ERR(msm_host->tx_gem_obj);
+			pr_err("%s: failed to allocate gem, %d\n",
+				__func__, ret);
+			msm_host->tx_gem_obj = NULL;
+			mutex_unlock(&dev->struct_mutex);
+			return ret;
+		}
+
+		ret = msm_gem_get_iova_locked(msm_host->tx_gem_obj, 0, &iova);
 		mutex_unlock(&dev->struct_mutex);
-		return ret;
-	}
+		if (ret) {
+			pr_err("%s: failed to get iova, %d\n", __func__, ret);
+			return ret;
+		}
 
-	ret = msm_gem_get_iova_locked(msm_host->tx_gem_obj, 0, &iova);
-	if (ret) {
-		pr_err("%s: failed to get iova, %d\n", __func__, ret);
-		return ret;
-	}
-	mutex_unlock(&dev->struct_mutex);
+		if (iova & 0x07) {
+			pr_err("%s: buf NOT 8 bytes aligned\n", __func__);
+			return -EINVAL;
+		}
 
-	if (iova & 0x07) {
-		pr_err("%s: buf NOT 8 bytes aligned\n", __func__);
-		return -EINVAL;
+		msm_host->tx_size = msm_host->tx_gem_obj->size;
+	} else {
+		msm_host->tx_buf = dma_alloc_coherent(dev->dev, size,
+					&msm_host->tx_buf_paddr, GFP_KERNEL);
+		if (!msm_host->tx_buf) {
+			ret = -ENOMEM;
+			pr_err("%s: failed to allocate tx buf, %d\n",
+				__func__, ret);
+			return ret;
+		}
+
+		msm_host->tx_size = size;
 	}
 
 	return 0;
@@ -874,14 +1052,19 @@ static void dsi_tx_buf_free(struct msm_dsi_host *msm_host)
 		msm_host->tx_gem_obj = NULL;
 		mutex_unlock(&dev->struct_mutex);
 	}
+
+	if (msm_host->tx_buf)
+		dma_free_coherent(dev->dev, msm_host->tx_size, msm_host->tx_buf,
+			msm_host->tx_buf_paddr);
 }
 
 /*
  * prepare cmd buffer to be txed
  */
-static int dsi_cmd_dma_add(struct drm_gem_object *tx_gem,
-			const struct mipi_dsi_msg *msg)
+static int dsi_cmd_dma_add(struct msm_dsi_host *msm_host,
+			   const struct mipi_dsi_msg *msg)
 {
+	const struct msm_dsi_cfg_handler *cfg_hnd = msm_host->cfg_hnd;
 	struct mipi_dsi_packet packet;
 	int len;
 	int ret;
@@ -894,17 +1077,20 @@ static int dsi_cmd_dma_add(struct drm_gem_object *tx_gem,
 	}
 	len = (packet.size + 3) & (~0x3);
 
-	if (len > tx_gem->size) {
+	if (len > msm_host->tx_size) {
 		pr_err("%s: packet size is too big\n", __func__);
 		return -EINVAL;
 	}
 
-	data = msm_gem_vaddr(tx_gem);
-
-	if (IS_ERR(data)) {
-		ret = PTR_ERR(data);
-		pr_err("%s: get vaddr failed, %d\n", __func__, ret);
-		return ret;
+	if (cfg_hnd->major == MSM_DSI_VER_MAJOR_6G) {
+		data = msm_gem_vaddr(msm_host->tx_gem_obj);
+		if (IS_ERR(data)) {
+			ret = PTR_ERR(data);
+			pr_err("%s: get vaddr failed, %d\n", __func__, ret);
+			return ret;
+		}
+	} else {
+		data = msm_host->tx_buf;
 	}
 
 	/* MSM specific command format in memory */
@@ -970,17 +1156,21 @@ static int dsi_long_read_resp(u8 *buf, const struct mipi_dsi_msg *msg)
 	return msg->rx_len;
 }
 
-
 static int dsi_cmd_dma_tx(struct msm_dsi_host *msm_host, int len)
 {
+	const struct msm_dsi_cfg_handler *cfg_hnd = msm_host->cfg_hnd;
 	int ret;
-	u32 iova;
+	u32 dma_base;
 	bool triggered;
 
-	ret = msm_gem_get_iova(msm_host->tx_gem_obj, 0, &iova);
-	if (ret) {
-		pr_err("%s: failed to get iova: %d\n", __func__, ret);
-		return ret;
+	if (cfg_hnd->major == MSM_DSI_VER_MAJOR_6G) {
+		ret = msm_gem_get_iova(msm_host->tx_gem_obj, 0, &dma_base);
+		if (ret) {
+			pr_err("%s: failed to get iova: %d\n", __func__, ret);
+			return ret;
+		}
+	} else {
+		dma_base = msm_host->tx_buf_paddr;
 	}
 
 	reinit_completion(&msm_host->dma_comp);
@@ -988,7 +1178,7 @@ static int dsi_cmd_dma_tx(struct msm_dsi_host *msm_host, int len)
 	dsi_wait4video_eng_busy(msm_host);
 
 	triggered = msm_dsi_manager_cmd_xfer_trigger(
-						msm_host->id, iova, len);
+						msm_host->id, dma_base, len);
 	if (triggered) {
 		ret = wait_for_completion_timeout(&msm_host->dma_comp,
 					msecs_to_jiffies(200));
@@ -1060,7 +1250,7 @@ static int dsi_cmds2buf_tx(struct msm_dsi_host *msm_host,
 	int bllp_len = msm_host->mode->hdisplay *
 			dsi_get_bpp(msm_host->format) / 8;
 
-	len = dsi_cmd_dma_add(msm_host->tx_gem_obj, msg);
+	len = dsi_cmd_dma_add(msm_host, msg);
 	if (!len) {
 		pr_err("%s: failed to add cmd type = 0x%x\n",
 			__func__,  msg->type);
@@ -1383,6 +1573,16 @@ static int dsi_host_parse_dt(struct msm_dsi_host *msm_host)
 
 	msm_host->device_node = device_node;
 
+	if (of_property_read_bool(np, "syscon-sfpb")) {
+		msm_host->sfpb = syscon_regmap_lookup_by_phandle(np,
+					"syscon-sfpb");
+		if (IS_ERR(msm_host->sfpb)) {
+			dev_err(dev, "%s: failed to get sfpb regmap\n",
+				__func__);
+			return PTR_ERR(msm_host->sfpb);
+		}
+	}
+
 	return 0;
 }
 
@@ -1408,12 +1608,6 @@ int msm_dsi_host_init(struct msm_dsi *msm_dsi)
 		goto fail;
 	}
 
-	ret = dsi_clk_init(msm_host);
-	if (ret) {
-		pr_err("%s: unable to initialize dsi clks\n", __func__);
-		goto fail;
-	}
-
 	msm_host->ctrl_base = msm_ioremap(pdev, "dsi_ctrl", "DSI CTRL");
 	if (IS_ERR(msm_host->ctrl_base)) {
 		pr_err("%s: unable to map Dsi ctrl base\n", __func__);
@@ -1437,6 +1631,12 @@ int msm_dsi_host_init(struct msm_dsi *msm_dsi)
 		goto fail;
 	}
 
+	ret = dsi_clk_init(msm_host);
+	if (ret) {
+		pr_err("%s: unable to initialize dsi clks\n", __func__);
+		goto fail;
+	}
+
 	msm_host->rx_buf = devm_kzalloc(&pdev->dev, SZ_4K, GFP_KERNEL);
 	if (!msm_host->rx_buf) {
 		pr_err("%s: alloc rx temp buf failed\n", __func__);
@@ -1750,11 +1950,12 @@ int msm_dsi_host_cmd_rx(struct mipi_dsi_host *host,
 	return ret;
 }
 
-void msm_dsi_host_cmd_xfer_commit(struct mipi_dsi_host *host, u32 iova, u32 len)
+void msm_dsi_host_cmd_xfer_commit(struct mipi_dsi_host *host, u32 dma_base,
+				  u32 len)
 {
 	struct msm_dsi_host *msm_host = to_msm_dsi_host(host);
 
-	dsi_write(msm_host, REG_DSI_DMA_BASE, iova);
+	dsi_write(msm_host, REG_DSI_DMA_BASE, dma_base);
 	dsi_write(msm_host, REG_DSI_DMA_LEN, len);
 	dsi_write(msm_host, REG_DSI_TRIG_DMA, 1);
 
@@ -1766,6 +1967,7 @@ int msm_dsi_host_set_src_pll(struct mipi_dsi_host *host,
 	struct msm_dsi_pll *src_pll)
 {
 	struct msm_dsi_host *msm_host = to_msm_dsi_host(host);
+	const struct msm_dsi_cfg_handler *cfg_hnd = msm_host->cfg_hnd;
 	struct clk *byte_clk_provider, *pixel_clk_provider;
 	int ret;
 
@@ -1791,6 +1993,22 @@ int msm_dsi_host_set_src_pll(struct mipi_dsi_host *host,
 		goto exit;
 	}
 
+	if (cfg_hnd->major == MSM_DSI_VER_MAJOR_V2) {
+		ret = clk_set_parent(msm_host->dsi_clk_src, pixel_clk_provider);
+		if (ret) {
+			pr_err("%s: can't set parent to dsi_clk_src. ret=%d\n",
+				__func__, ret);
+			goto exit;
+		}
+
+		ret = clk_set_parent(msm_host->esc_clk_src, byte_clk_provider);
+		if (ret) {
+			pr_err("%s: can't set parent to esc_clk_src. ret=%d\n",
+				__func__, ret);
+			goto exit;
+		}
+	}
+
 exit:
 	return ret;
 }
@@ -1828,6 +2046,20 @@ int msm_dsi_host_disable(struct mipi_dsi_host *host)
 	return 0;
 }
 
+static void msm_dsi_sfpb_config(struct msm_dsi_host *msm_host, bool enable)
+{
+	enum sfpb_ahb_arb_master_port_en en;
+
+	if (!msm_host->sfpb)
+		return;
+
+	en = enable ? SFPB_MASTER_PORT_ENABLE : SFPB_MASTER_PORT_DISABLE;
+
+	regmap_update_bits(msm_host->sfpb, REG_SFPB_GPREG,
+			SFPB_GPREG_MASTER_PORT_EN__MASK,
+			SFPB_GPREG_MASTER_PORT_EN(en));
+}
+
 int msm_dsi_host_power_on(struct mipi_dsi_host *host)
 {
 	struct msm_dsi_host *msm_host = to_msm_dsi_host(host);
@@ -1840,6 +2072,8 @@ int msm_dsi_host_power_on(struct mipi_dsi_host *host)
 		goto unlock_ret;
 	}
 
+	msm_dsi_sfpb_config(msm_host, true);
+
 	ret = dsi_calc_clk_rate(msm_host);
 	if (ret) {
 		pr_err("%s: unable to calc clk rate, %d\n", __func__, ret);
@@ -1862,7 +2096,7 @@ int msm_dsi_host_power_on(struct mipi_dsi_host *host)
 	dsi_phy_sw_reset(msm_host);
 	ret = msm_dsi_manager_phy_enable(msm_host->id,
 					msm_host->byte_clk_rate * 8,
-					clk_get_rate(msm_host->esc_clk),
+					msm_host->esc_clk_rate,
 					&clk_pre, &clk_post);
 	dsi_bus_clk_disable(msm_host);
 	if (ret) {
@@ -1927,6 +2161,8 @@ int msm_dsi_host_power_off(struct mipi_dsi_host *host)
 
 	dsi_host_regulator_disable(msm_host);
 
+	msm_dsi_sfpb_config(msm_host, false);
+
 	DBG("-");
 
 	msm_host->power_on = false;
diff --git a/drivers/gpu/drm/msm/dsi/dsi_manager.c b/drivers/gpu/drm/msm/dsi/dsi_manager.c
index 0455ff75074a..58ba7ec17f51 100644
--- a/drivers/gpu/drm/msm/dsi/dsi_manager.c
+++ b/drivers/gpu/drm/msm/dsi/dsi_manager.c
@@ -774,7 +774,7 @@ restore_host0:
 	return ret;
 }
 
-bool msm_dsi_manager_cmd_xfer_trigger(int id, u32 iova, u32 len)
+bool msm_dsi_manager_cmd_xfer_trigger(int id, u32 dma_base, u32 len)
 {
 	struct msm_dsi *msm_dsi = dsi_mgr_get_dsi(id);
 	struct msm_dsi *msm_dsi0 = dsi_mgr_get_dsi(DSI_0);
@@ -784,9 +784,9 @@ bool msm_dsi_manager_cmd_xfer_trigger(int id, u32 iova, u32 len)
 		return false;
 
 	if (IS_SYNC_NEEDED() && msm_dsi0)
-		msm_dsi_host_cmd_xfer_commit(msm_dsi0->host, iova, len);
+		msm_dsi_host_cmd_xfer_commit(msm_dsi0->host, dma_base, len);
 
-	msm_dsi_host_cmd_xfer_commit(host, iova, len);
+	msm_dsi_host_cmd_xfer_commit(host, dma_base, len);
 
 	return true;
 }
diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c
index f1f955f571fa..91a95fb04a4a 100644
--- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c
+++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c
@@ -277,6 +277,10 @@ static const struct of_device_id dsi_phy_dt_match[] = {
 	{ .compatible = "qcom,dsi-phy-20nm",
 	  .data = &dsi_phy_20nm_cfgs },
 #endif
+#ifdef CONFIG_DRM_MSM_DSI_28NM_8960_PHY
+	{ .compatible = "qcom,dsi-phy-28nm-8960",
+	  .data = &dsi_phy_28nm_8960_cfgs },
+#endif
 	{}
 };
 
diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h
index 0456b253239f..0d54ed00386d 100644
--- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h
+++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h
@@ -43,6 +43,7 @@ struct msm_dsi_phy_cfg {
 extern const struct msm_dsi_phy_cfg dsi_phy_28nm_hpm_cfgs;
 extern const struct msm_dsi_phy_cfg dsi_phy_28nm_lp_cfgs;
 extern const struct msm_dsi_phy_cfg dsi_phy_20nm_cfgs;
+extern const struct msm_dsi_phy_cfg dsi_phy_28nm_8960_cfgs;
 
 struct msm_dsi_dphy_timing {
 	u32 clk_pre;
diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm_8960.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm_8960.c
new file mode 100644
index 000000000000..197b039ca1f1
--- /dev/null
+++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm_8960.c
@@ -0,0 +1,195 @@
+/*
+ * Copyright (c) 2012-2015, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include "dsi_phy.h"
+#include "dsi.xml.h"
+
+static void dsi_28nm_dphy_set_timing(struct msm_dsi_phy *phy,
+		struct msm_dsi_dphy_timing *timing)
+{
+	void __iomem *base = phy->base;
+
+	dsi_phy_write(base + REG_DSI_28nm_8960_PHY_TIMING_CTRL_0,
+		DSI_28nm_8960_PHY_TIMING_CTRL_0_CLK_ZERO(timing->clk_zero));
+	dsi_phy_write(base + REG_DSI_28nm_8960_PHY_TIMING_CTRL_1,
+		DSI_28nm_8960_PHY_TIMING_CTRL_1_CLK_TRAIL(timing->clk_trail));
+	dsi_phy_write(base + REG_DSI_28nm_8960_PHY_TIMING_CTRL_2,
+		DSI_28nm_8960_PHY_TIMING_CTRL_2_CLK_PREPARE(timing->clk_prepare));
+	dsi_phy_write(base + REG_DSI_28nm_8960_PHY_TIMING_CTRL_3, 0x0);
+	dsi_phy_write(base + REG_DSI_28nm_8960_PHY_TIMING_CTRL_4,
+		DSI_28nm_8960_PHY_TIMING_CTRL_4_HS_EXIT(timing->hs_exit));
+	dsi_phy_write(base + REG_DSI_28nm_8960_PHY_TIMING_CTRL_5,
+		DSI_28nm_8960_PHY_TIMING_CTRL_5_HS_ZERO(timing->hs_zero));
+	dsi_phy_write(base + REG_DSI_28nm_8960_PHY_TIMING_CTRL_6,
+		DSI_28nm_8960_PHY_TIMING_CTRL_6_HS_PREPARE(timing->hs_prepare));
+	dsi_phy_write(base + REG_DSI_28nm_8960_PHY_TIMING_CTRL_7,
+		DSI_28nm_8960_PHY_TIMING_CTRL_7_HS_TRAIL(timing->hs_trail));
+	dsi_phy_write(base + REG_DSI_28nm_8960_PHY_TIMING_CTRL_8,
+		DSI_28nm_8960_PHY_TIMING_CTRL_8_HS_RQST(timing->hs_rqst));
+	dsi_phy_write(base + REG_DSI_28nm_8960_PHY_TIMING_CTRL_9,
+		DSI_28nm_8960_PHY_TIMING_CTRL_9_TA_GO(timing->ta_go) |
+		DSI_28nm_8960_PHY_TIMING_CTRL_9_TA_SURE(timing->ta_sure));
+	dsi_phy_write(base + REG_DSI_28nm_8960_PHY_TIMING_CTRL_10,
+		DSI_28nm_8960_PHY_TIMING_CTRL_10_TA_GET(timing->ta_get));
+	dsi_phy_write(base + REG_DSI_28nm_8960_PHY_TIMING_CTRL_11,
+		DSI_28nm_8960_PHY_TIMING_CTRL_11_TRIG3_CMD(0));
+}
+
+static void dsi_28nm_phy_regulator_init(struct msm_dsi_phy *phy)
+{
+	void __iomem *base = phy->reg_base;
+
+	dsi_phy_write(base + REG_DSI_28nm_8960_PHY_MISC_REGULATOR_CTRL_0, 0x3);
+	dsi_phy_write(base + REG_DSI_28nm_8960_PHY_MISC_REGULATOR_CTRL_1, 1);
+	dsi_phy_write(base + REG_DSI_28nm_8960_PHY_MISC_REGULATOR_CTRL_2, 1);
+	dsi_phy_write(base + REG_DSI_28nm_8960_PHY_MISC_REGULATOR_CTRL_3, 0);
+	dsi_phy_write(base + REG_DSI_28nm_8960_PHY_MISC_REGULATOR_CTRL_4,
+		0x100);
+}
+
+static void dsi_28nm_phy_regulator_ctrl(struct msm_dsi_phy *phy)
+{
+	void __iomem *base = phy->reg_base;
+
+	dsi_phy_write(base + REG_DSI_28nm_8960_PHY_MISC_REGULATOR_CTRL_0, 0x3);
+	dsi_phy_write(base + REG_DSI_28nm_8960_PHY_MISC_REGULATOR_CTRL_1, 0xa);
+	dsi_phy_write(base + REG_DSI_28nm_8960_PHY_MISC_REGULATOR_CTRL_2, 0x4);
+	dsi_phy_write(base + REG_DSI_28nm_8960_PHY_MISC_REGULATOR_CTRL_3, 0x0);
+	dsi_phy_write(base + REG_DSI_28nm_8960_PHY_MISC_REGULATOR_CTRL_4, 0x20);
+}
+
+static void dsi_28nm_phy_calibration(struct msm_dsi_phy *phy)
+{
+	void __iomem *base = phy->reg_base;
+	u32 status;
+	int i = 5000;
+
+	dsi_phy_write(base + REG_DSI_28nm_8960_PHY_MISC_REGULATOR_CAL_PWR_CFG,
+			0x3);
+
+	dsi_phy_write(base + REG_DSI_28nm_8960_PHY_MISC_CAL_SW_CFG_2, 0x0);
+	dsi_phy_write(base + REG_DSI_28nm_8960_PHY_MISC_CAL_HW_CFG_1, 0x5a);
+	dsi_phy_write(base + REG_DSI_28nm_8960_PHY_MISC_CAL_HW_CFG_3, 0x10);
+	dsi_phy_write(base + REG_DSI_28nm_8960_PHY_MISC_CAL_HW_CFG_4, 0x1);
+	dsi_phy_write(base + REG_DSI_28nm_8960_PHY_MISC_CAL_HW_CFG_0, 0x1);
+
+	dsi_phy_write(base + REG_DSI_28nm_8960_PHY_MISC_CAL_HW_TRIGGER, 0x1);
+	usleep_range(5000, 6000);
+	dsi_phy_write(base + REG_DSI_28nm_8960_PHY_MISC_CAL_HW_TRIGGER, 0x0);
+
+	do {
+		status = dsi_phy_read(base +
+				REG_DSI_28nm_8960_PHY_MISC_CAL_STATUS);
+
+		if (!(status & DSI_28nm_8960_PHY_MISC_CAL_STATUS_CAL_BUSY))
+			break;
+
+		udelay(1);
+	} while (--i > 0);
+}
+
+static void dsi_28nm_phy_lane_config(struct msm_dsi_phy *phy)
+{
+	void __iomem *base = phy->base;
+	int i;
+
+	for (i = 0; i < 4; i++) {
+		dsi_phy_write(base + REG_DSI_28nm_8960_PHY_LN_CFG_0(i), 0x80);
+		dsi_phy_write(base + REG_DSI_28nm_8960_PHY_LN_CFG_1(i), 0x45);
+		dsi_phy_write(base + REG_DSI_28nm_8960_PHY_LN_CFG_2(i), 0x00);
+		dsi_phy_write(base + REG_DSI_28nm_8960_PHY_LN_TEST_DATAPATH(i),
+			0x00);
+		dsi_phy_write(base + REG_DSI_28nm_8960_PHY_LN_TEST_STR_0(i),
+			0x01);
+		dsi_phy_write(base + REG_DSI_28nm_8960_PHY_LN_TEST_STR_1(i),
+			0x66);
+	}
+
+	dsi_phy_write(base + REG_DSI_28nm_8960_PHY_LNCK_CFG_0, 0x40);
+	dsi_phy_write(base + REG_DSI_28nm_8960_PHY_LNCK_CFG_1, 0x67);
+	dsi_phy_write(base + REG_DSI_28nm_8960_PHY_LNCK_CFG_2, 0x0);
+	dsi_phy_write(base + REG_DSI_28nm_8960_PHY_LNCK_TEST_DATAPATH, 0x0);
+	dsi_phy_write(base + REG_DSI_28nm_8960_PHY_LNCK_TEST_STR0, 0x1);
+	dsi_phy_write(base + REG_DSI_28nm_8960_PHY_LNCK_TEST_STR1, 0x88);
+}
+
+static int dsi_28nm_phy_enable(struct msm_dsi_phy *phy, int src_pll_id,
+		const unsigned long bit_rate, const unsigned long esc_rate)
+{
+	struct msm_dsi_dphy_timing *timing = &phy->timing;
+	void __iomem *base = phy->base;
+
+	DBG("");
+
+	if (msm_dsi_dphy_timing_calc(timing, bit_rate, esc_rate)) {
+		dev_err(&phy->pdev->dev,
+			"%s: D-PHY timing calculation failed\n", __func__);
+		return -EINVAL;
+	}
+
+	dsi_28nm_phy_regulator_init(phy);
+
+	dsi_phy_write(base + REG_DSI_28nm_8960_PHY_LDO_CTRL, 0x04);
+
+	/* strength control */
+	dsi_phy_write(base + REG_DSI_28nm_8960_PHY_STRENGTH_0, 0xff);
+	dsi_phy_write(base + REG_DSI_28nm_8960_PHY_STRENGTH_1, 0x00);
+	dsi_phy_write(base + REG_DSI_28nm_8960_PHY_STRENGTH_2, 0x06);
+
+	/* phy ctrl */
+	dsi_phy_write(base + REG_DSI_28nm_8960_PHY_CTRL_0, 0x5f);
+	dsi_phy_write(base + REG_DSI_28nm_8960_PHY_CTRL_1, 0x00);
+	dsi_phy_write(base + REG_DSI_28nm_8960_PHY_CTRL_2, 0x00);
+	dsi_phy_write(base + REG_DSI_28nm_8960_PHY_CTRL_3, 0x10);
+
+	dsi_28nm_phy_regulator_ctrl(phy);
+
+	dsi_28nm_phy_calibration(phy);
+
+	dsi_28nm_phy_lane_config(phy);
+
+	dsi_phy_write(base + REG_DSI_28nm_8960_PHY_BIST_CTRL_4, 0x0f);
+	dsi_phy_write(base + REG_DSI_28nm_8960_PHY_BIST_CTRL_1, 0x03);
+	dsi_phy_write(base + REG_DSI_28nm_8960_PHY_BIST_CTRL_0, 0x03);
+	dsi_phy_write(base + REG_DSI_28nm_8960_PHY_BIST_CTRL_4, 0x0);
+
+	dsi_28nm_dphy_set_timing(phy, timing);
+
+	return 0;
+}
+
+static void dsi_28nm_phy_disable(struct msm_dsi_phy *phy)
+{
+	dsi_phy_write(phy->base + REG_DSI_28nm_8960_PHY_CTRL_0, 0x0);
+
+	/*
+	 * Wait for the registers writes to complete in order to
+	 * ensure that the phy is completely disabled
+	 */
+	wmb();
+}
+
+const struct msm_dsi_phy_cfg dsi_phy_28nm_8960_cfgs = {
+	.type = MSM_DSI_PHY_28NM_8960,
+	.src_pll_truthtable = { {true, true}, {false, true} },
+	.reg_cfg = {
+		.num = 1,
+		.regs = {
+			{"vddio", 1800000, 1800000, 100000, 100},
+		},
+	},
+	.ops = {
+		.enable = dsi_28nm_phy_enable,
+		.disable = dsi_28nm_phy_disable,
+	},
+};
diff --git a/drivers/gpu/drm/msm/dsi/pll/dsi_pll.c b/drivers/gpu/drm/msm/dsi/pll/dsi_pll.c
index 5104fc9f9a53..5cd438f91afe 100644
--- a/drivers/gpu/drm/msm/dsi/pll/dsi_pll.c
+++ b/drivers/gpu/drm/msm/dsi/pll/dsi_pll.c
@@ -151,6 +151,9 @@ struct msm_dsi_pll *msm_dsi_pll_init(struct platform_device *pdev,
 	case MSM_DSI_PHY_28NM_LP:
 		pll = msm_dsi_pll_28nm_init(pdev, type, id);
 		break;
+	case MSM_DSI_PHY_28NM_8960:
+		pll = msm_dsi_pll_28nm_8960_init(pdev, id);
+		break;
 	default:
 		pll = ERR_PTR(-ENXIO);
 		break;
diff --git a/drivers/gpu/drm/msm/dsi/pll/dsi_pll.h b/drivers/gpu/drm/msm/dsi/pll/dsi_pll.h
index 063caa2c5740..80b6038334a6 100644
--- a/drivers/gpu/drm/msm/dsi/pll/dsi_pll.h
+++ b/drivers/gpu/drm/msm/dsi/pll/dsi_pll.h
@@ -93,6 +93,16 @@ static inline struct msm_dsi_pll *msm_dsi_pll_28nm_init(
 	return ERR_PTR(-ENODEV);
 }
 #endif
+#ifdef CONFIG_DRM_MSM_DSI_28NM_8960_PHY
+struct msm_dsi_pll *msm_dsi_pll_28nm_8960_init(struct platform_device *pdev,
+					       int id);
+#else
+struct msm_dsi_pll *msm_dsi_pll_28nm_8960_init(struct platform_device *pdev,
+					       int id)
+{
+	return ERR_PTR(-ENODEV);
+}
+#endif
 
 #endif /* __DSI_PLL_H__ */
 
diff --git a/drivers/gpu/drm/msm/dsi/pll/dsi_pll_28nm_8960.c b/drivers/gpu/drm/msm/dsi/pll/dsi_pll_28nm_8960.c
new file mode 100644
index 000000000000..38c90e1eb002
--- /dev/null
+++ b/drivers/gpu/drm/msm/dsi/pll/dsi_pll_28nm_8960.c
@@ -0,0 +1,533 @@
+/*
+ * Copyright (c) 2012-2015, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/clk-provider.h>
+
+#include "dsi_pll.h"
+#include "dsi.xml.h"
+
+/*
+ * DSI PLL 28nm (8960/A family) - clock diagram (eg: DSI1):
+ *
+ *
+ *                        +------+
+ *  dsi1vco_clk ----o-----| DIV1 |---dsi1pllbit (not exposed as clock)
+ *  F * byte_clk    |     +------+
+ *                  | bit clock divider (F / 8)
+ *                  |
+ *                  |     +------+
+ *                  o-----| DIV2 |---dsi0pllbyte---o---> To byte RCG
+ *                  |     +------+                 | (sets parent rate)
+ *                  | byte clock divider (F)       |
+ *                  |                              |
+ *                  |                              o---> To esc RCG
+ *                  |                                (doesn't set parent rate)
+ *                  |
+ *                  |     +------+
+ *                  o-----| DIV3 |----dsi0pll------o---> To dsi RCG
+ *                        +------+                 | (sets parent rate)
+ *                  dsi clock divider (F * magic)  |
+ *                                                 |
+ *                                                 o---> To pixel rcg
+ *                                                  (doesn't set parent rate)
+ */
+
+#define POLL_MAX_READS		8000
+#define POLL_TIMEOUT_US		1
+
+#define NUM_PROVIDED_CLKS	2
+
+#define VCO_REF_CLK_RATE	27000000
+#define VCO_MIN_RATE		600000000
+#define VCO_MAX_RATE		1200000000
+
+#define DSI_BYTE_PLL_CLK	0
+#define DSI_PIXEL_PLL_CLK	1
+
+#define VCO_PREF_DIV_RATIO	27
+
+struct pll_28nm_cached_state {
+	unsigned long vco_rate;
+	u8 postdiv3;
+	u8 postdiv2;
+	u8 postdiv1;
+};
+
+struct clk_bytediv {
+	struct clk_hw hw;
+	void __iomem *reg;
+};
+
+struct dsi_pll_28nm {
+	struct msm_dsi_pll base;
+
+	int id;
+	struct platform_device *pdev;
+	void __iomem *mmio;
+
+	/* custom byte clock divider */
+	struct clk_bytediv *bytediv;
+
+	/* private clocks: */
+	struct clk *clks[NUM_DSI_CLOCKS_MAX];
+	u32 num_clks;
+
+	/* clock-provider: */
+	struct clk *provided_clks[NUM_PROVIDED_CLKS];
+	struct clk_onecell_data clk_data;
+
+	struct pll_28nm_cached_state cached_state;
+};
+
+#define to_pll_28nm(x)	container_of(x, struct dsi_pll_28nm, base)
+
+static bool pll_28nm_poll_for_ready(struct dsi_pll_28nm *pll_28nm,
+				    int nb_tries, int timeout_us)
+{
+	bool pll_locked = false;
+	u32 val;
+
+	while (nb_tries--) {
+		val = pll_read(pll_28nm->mmio + REG_DSI_28nm_8960_PHY_PLL_RDY);
+		pll_locked = !!(val & DSI_28nm_8960_PHY_PLL_RDY_PLL_RDY);
+
+		if (pll_locked)
+			break;
+
+		udelay(timeout_us);
+	}
+	DBG("DSI PLL is %slocked", pll_locked ? "" : "*not* ");
+
+	return pll_locked;
+}
+
+/*
+ * Clock Callbacks
+ */
+static int dsi_pll_28nm_clk_set_rate(struct clk_hw *hw, unsigned long rate,
+				     unsigned long parent_rate)
+{
+	struct msm_dsi_pll *pll = hw_clk_to_pll(hw);
+	struct dsi_pll_28nm *pll_28nm = to_pll_28nm(pll);
+	void __iomem *base = pll_28nm->mmio;
+	u32 val, temp, fb_divider;
+
+	DBG("rate=%lu, parent's=%lu", rate, parent_rate);
+
+	temp = rate / 10;
+	val = VCO_REF_CLK_RATE / 10;
+	fb_divider = (temp * VCO_PREF_DIV_RATIO) / val;
+	fb_divider = fb_divider / 2 - 1;
+	pll_write(base + REG_DSI_28nm_8960_PHY_PLL_CTRL_1,
+			fb_divider & 0xff);
+
+	val = pll_read(base + REG_DSI_28nm_8960_PHY_PLL_CTRL_2);
+
+	val |= (fb_divider >> 8) & 0x07;
+
+	pll_write(base + REG_DSI_28nm_8960_PHY_PLL_CTRL_2,
+			val);
+
+	val = pll_read(base + REG_DSI_28nm_8960_PHY_PLL_CTRL_3);
+
+	val |= (VCO_PREF_DIV_RATIO - 1) & 0x3f;
+
+	pll_write(base + REG_DSI_28nm_8960_PHY_PLL_CTRL_3,
+			val);
+
+	pll_write(base + REG_DSI_28nm_8960_PHY_PLL_CTRL_6,
+			0xf);
+
+	val = pll_read(base + REG_DSI_28nm_8960_PHY_PLL_CTRL_8);
+	val |= 0x7 << 4;
+	pll_write(base + REG_DSI_28nm_8960_PHY_PLL_CTRL_8,
+			val);
+
+	return 0;
+}
+
+static int dsi_pll_28nm_clk_is_enabled(struct clk_hw *hw)
+{
+	struct msm_dsi_pll *pll = hw_clk_to_pll(hw);
+	struct dsi_pll_28nm *pll_28nm = to_pll_28nm(pll);
+
+	return pll_28nm_poll_for_ready(pll_28nm, POLL_MAX_READS,
+					POLL_TIMEOUT_US);
+}
+
+static unsigned long dsi_pll_28nm_clk_recalc_rate(struct clk_hw *hw,
+						  unsigned long parent_rate)
+{
+	struct msm_dsi_pll *pll = hw_clk_to_pll(hw);
+	struct dsi_pll_28nm *pll_28nm = to_pll_28nm(pll);
+	void __iomem *base = pll_28nm->mmio;
+	unsigned long vco_rate;
+	u32 status, fb_divider, temp, ref_divider;
+
+	VERB("parent_rate=%lu", parent_rate);
+
+	status = pll_read(base + REG_DSI_28nm_8960_PHY_PLL_CTRL_0);
+
+	if (status & DSI_28nm_8960_PHY_PLL_CTRL_0_ENABLE) {
+		fb_divider = pll_read(base + REG_DSI_28nm_8960_PHY_PLL_CTRL_1);
+		fb_divider &= 0xff;
+		temp = pll_read(base + REG_DSI_28nm_8960_PHY_PLL_CTRL_2) & 0x07;
+		fb_divider = (temp << 8) | fb_divider;
+		fb_divider += 1;
+
+		ref_divider = pll_read(base + REG_DSI_28nm_8960_PHY_PLL_CTRL_3);
+		ref_divider &= 0x3f;
+		ref_divider += 1;
+
+		/* multiply by 2 */
+		vco_rate = (parent_rate / ref_divider) * fb_divider * 2;
+	} else {
+		vco_rate = 0;
+	}
+
+	DBG("returning vco rate = %lu", vco_rate);
+
+	return vco_rate;
+}
+
+static const struct clk_ops clk_ops_dsi_pll_28nm_vco = {
+	.round_rate = msm_dsi_pll_helper_clk_round_rate,
+	.set_rate = dsi_pll_28nm_clk_set_rate,
+	.recalc_rate = dsi_pll_28nm_clk_recalc_rate,
+	.prepare = msm_dsi_pll_helper_clk_prepare,
+	.unprepare = msm_dsi_pll_helper_clk_unprepare,
+	.is_enabled = dsi_pll_28nm_clk_is_enabled,
+};
+
+/*
+ * Custom byte clock divier clk_ops
+ *
+ * This clock is the entry point to configuring the PLL. The user (dsi host)
+ * will set this clock's rate to the desired byte clock rate. The VCO lock
+ * frequency is a multiple of the byte clock rate. The multiplication factor
+ * (shown as F in the diagram above) is a function of the byte clock rate.
+ *
+ * This custom divider clock ensures that its parent (VCO) is set to the
+ * desired rate, and that the byte clock postdivider (POSTDIV2) is configured
+ * accordingly
+ */
+#define to_clk_bytediv(_hw) container_of(_hw, struct clk_bytediv, hw)
+
+static unsigned long clk_bytediv_recalc_rate(struct clk_hw *hw,
+		unsigned long parent_rate)
+{
+	struct clk_bytediv *bytediv = to_clk_bytediv(hw);
+	unsigned int div;
+
+	div = pll_read(bytediv->reg) & 0xff;
+
+	return parent_rate / (div + 1);
+}
+
+/* find multiplication factor(wrt byte clock) at which the VCO should be set */
+static unsigned int get_vco_mul_factor(unsigned long byte_clk_rate)
+{
+	unsigned long bit_mhz;
+
+	/* convert to bit clock in Mhz */
+	bit_mhz = (byte_clk_rate * 8) / 1000000;
+
+	if (bit_mhz < 125)
+		return 64;
+	else if (bit_mhz < 250)
+		return 32;
+	else if (bit_mhz < 600)
+		return 16;
+	else
+		return 8;
+}
+
+static long clk_bytediv_round_rate(struct clk_hw *hw, unsigned long rate,
+				   unsigned long *prate)
+{
+	unsigned long best_parent;
+	unsigned int factor;
+
+	factor = get_vco_mul_factor(rate);
+
+	best_parent = rate * factor;
+	*prate = clk_hw_round_rate(clk_hw_get_parent(hw), best_parent);
+
+	return *prate / factor;
+}
+
+static int clk_bytediv_set_rate(struct clk_hw *hw, unsigned long rate,
+				unsigned long parent_rate)
+{
+	struct clk_bytediv *bytediv = to_clk_bytediv(hw);
+	u32 val;
+	unsigned int factor;
+
+	factor = get_vco_mul_factor(rate);
+
+	val = pll_read(bytediv->reg);
+	val |= (factor - 1) & 0xff;
+	pll_write(bytediv->reg, val);
+
+	return 0;
+}
+
+/* Our special byte clock divider ops */
+static const struct clk_ops clk_bytediv_ops = {
+	.round_rate = clk_bytediv_round_rate,
+	.set_rate = clk_bytediv_set_rate,
+	.recalc_rate = clk_bytediv_recalc_rate,
+};
+
+/*
+ * PLL Callbacks
+ */
+static int dsi_pll_28nm_enable_seq(struct msm_dsi_pll *pll)
+{
+	struct dsi_pll_28nm *pll_28nm = to_pll_28nm(pll);
+	struct device *dev = &pll_28nm->pdev->dev;
+	void __iomem *base = pll_28nm->mmio;
+	bool locked;
+	unsigned int bit_div, byte_div;
+	int max_reads = 1000, timeout_us = 100;
+	u32 val;
+
+	DBG("id=%d", pll_28nm->id);
+
+	/*
+	 * before enabling the PLL, configure the bit clock divider since we
+	 * don't expose it as a clock to the outside world
+	 * 1: read back the byte clock divider that should already be set
+	 * 2: divide by 8 to get bit clock divider
+	 * 3: write it to POSTDIV1
+	 */
+	val = pll_read(base + REG_DSI_28nm_8960_PHY_PLL_CTRL_9);
+	byte_div = val + 1;
+	bit_div = byte_div / 8;
+
+	val = pll_read(base + REG_DSI_28nm_8960_PHY_PLL_CTRL_8);
+	val &= ~0xf;
+	val |= (bit_div - 1);
+	pll_write(base + REG_DSI_28nm_8960_PHY_PLL_CTRL_8, val);
+
+	/* enable the PLL */
+	pll_write(base + REG_DSI_28nm_8960_PHY_PLL_CTRL_0,
+			DSI_28nm_8960_PHY_PLL_CTRL_0_ENABLE);
+
+	locked = pll_28nm_poll_for_ready(pll_28nm, max_reads, timeout_us);
+
+	if (unlikely(!locked))
+		dev_err(dev, "DSI PLL lock failed\n");
+	else
+		DBG("DSI PLL lock success");
+
+	return locked ? 0 : -EINVAL;
+}
+
+static void dsi_pll_28nm_disable_seq(struct msm_dsi_pll *pll)
+{
+	struct dsi_pll_28nm *pll_28nm = to_pll_28nm(pll);
+
+	DBG("id=%d", pll_28nm->id);
+	pll_write(pll_28nm->mmio + REG_DSI_28nm_8960_PHY_PLL_CTRL_0, 0x00);
+}
+
+static void dsi_pll_28nm_save_state(struct msm_dsi_pll *pll)
+{
+	struct dsi_pll_28nm *pll_28nm = to_pll_28nm(pll);
+	struct pll_28nm_cached_state *cached_state = &pll_28nm->cached_state;
+	void __iomem *base = pll_28nm->mmio;
+
+	cached_state->postdiv3 =
+			pll_read(base + REG_DSI_28nm_8960_PHY_PLL_CTRL_10);
+	cached_state->postdiv2 =
+			pll_read(base + REG_DSI_28nm_8960_PHY_PLL_CTRL_9);
+	cached_state->postdiv1 =
+			pll_read(base + REG_DSI_28nm_8960_PHY_PLL_CTRL_8);
+
+	cached_state->vco_rate = clk_hw_get_rate(&pll->clk_hw);
+}
+
+static int dsi_pll_28nm_restore_state(struct msm_dsi_pll *pll)
+{
+	struct dsi_pll_28nm *pll_28nm = to_pll_28nm(pll);
+	struct pll_28nm_cached_state *cached_state = &pll_28nm->cached_state;
+	void __iomem *base = pll_28nm->mmio;
+	int ret;
+
+	ret = dsi_pll_28nm_clk_set_rate(&pll->clk_hw,
+					cached_state->vco_rate, 0);
+	if (ret) {
+		dev_err(&pll_28nm->pdev->dev,
+			"restore vco rate failed. ret=%d\n", ret);
+		return ret;
+	}
+
+	pll_write(base + REG_DSI_28nm_8960_PHY_PLL_CTRL_10,
+			cached_state->postdiv3);
+	pll_write(base + REG_DSI_28nm_8960_PHY_PLL_CTRL_9,
+			cached_state->postdiv2);
+	pll_write(base + REG_DSI_28nm_8960_PHY_PLL_CTRL_8,
+			cached_state->postdiv1);
+
+	return 0;
+}
+
+static int dsi_pll_28nm_get_provider(struct msm_dsi_pll *pll,
+				struct clk **byte_clk_provider,
+				struct clk **pixel_clk_provider)
+{
+	struct dsi_pll_28nm *pll_28nm = to_pll_28nm(pll);
+
+	if (byte_clk_provider)
+		*byte_clk_provider = pll_28nm->provided_clks[DSI_BYTE_PLL_CLK];
+	if (pixel_clk_provider)
+		*pixel_clk_provider =
+				pll_28nm->provided_clks[DSI_PIXEL_PLL_CLK];
+
+	return 0;
+}
+
+static void dsi_pll_28nm_destroy(struct msm_dsi_pll *pll)
+{
+	struct dsi_pll_28nm *pll_28nm = to_pll_28nm(pll);
+
+	msm_dsi_pll_helper_unregister_clks(pll_28nm->pdev,
+					pll_28nm->clks, pll_28nm->num_clks);
+}
+
+static int pll_28nm_register(struct dsi_pll_28nm *pll_28nm)
+{
+	char *clk_name, *parent_name, *vco_name;
+	struct clk_init_data vco_init = {
+		.parent_names = (const char *[]){ "pxo" },
+		.num_parents = 1,
+		.ops = &clk_ops_dsi_pll_28nm_vco,
+	};
+	struct device *dev = &pll_28nm->pdev->dev;
+	struct clk **clks = pll_28nm->clks;
+	struct clk **provided_clks = pll_28nm->provided_clks;
+	struct clk_bytediv *bytediv;
+	struct clk_init_data bytediv_init = { };
+	int ret, num = 0;
+
+	DBG("%d", pll_28nm->id);
+
+	bytediv = devm_kzalloc(dev, sizeof(*bytediv), GFP_KERNEL);
+	if (!bytediv)
+		return -ENOMEM;
+
+	vco_name = devm_kzalloc(dev, 32, GFP_KERNEL);
+	if (!vco_name)
+		return -ENOMEM;
+
+	parent_name = devm_kzalloc(dev, 32, GFP_KERNEL);
+	if (!parent_name)
+		return -ENOMEM;
+
+	clk_name = devm_kzalloc(dev, 32, GFP_KERNEL);
+	if (!clk_name)
+		return -ENOMEM;
+
+	pll_28nm->bytediv = bytediv;
+
+	snprintf(vco_name, 32, "dsi%dvco_clk", pll_28nm->id);
+	vco_init.name = vco_name;
+
+	pll_28nm->base.clk_hw.init = &vco_init;
+
+	clks[num++] = clk_register(dev, &pll_28nm->base.clk_hw);
+
+	/* prepare and register bytediv */
+	bytediv->hw.init = &bytediv_init;
+	bytediv->reg = pll_28nm->mmio + REG_DSI_28nm_8960_PHY_PLL_CTRL_9;
+
+	snprintf(parent_name, 32, "dsi%dvco_clk", pll_28nm->id);
+	snprintf(clk_name, 32, "dsi%dpllbyte", pll_28nm->id);
+
+	bytediv_init.name = clk_name;
+	bytediv_init.ops = &clk_bytediv_ops;
+	bytediv_init.flags = CLK_SET_RATE_PARENT;
+	bytediv_init.parent_names = (const char * const *) &parent_name;
+	bytediv_init.num_parents = 1;
+
+	/* DIV2 */
+	clks[num++] = provided_clks[DSI_BYTE_PLL_CLK] =
+			clk_register(dev, &bytediv->hw);
+
+	snprintf(clk_name, 32, "dsi%dpll", pll_28nm->id);
+	/* DIV3 */
+	clks[num++] = provided_clks[DSI_PIXEL_PLL_CLK] =
+			clk_register_divider(dev, clk_name,
+				parent_name, 0, pll_28nm->mmio +
+				REG_DSI_28nm_8960_PHY_PLL_CTRL_10,
+				0, 8, 0, NULL);
+
+	pll_28nm->num_clks = num;
+
+	pll_28nm->clk_data.clk_num = NUM_PROVIDED_CLKS;
+	pll_28nm->clk_data.clks = provided_clks;
+
+	ret = of_clk_add_provider(dev->of_node,
+			of_clk_src_onecell_get, &pll_28nm->clk_data);
+	if (ret) {
+		dev_err(dev, "failed to register clk provider: %d\n", ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+struct msm_dsi_pll *msm_dsi_pll_28nm_8960_init(struct platform_device *pdev,
+					       int id)
+{
+	struct dsi_pll_28nm *pll_28nm;
+	struct msm_dsi_pll *pll;
+	int ret;
+
+	if (!pdev)
+		return ERR_PTR(-ENODEV);
+
+	pll_28nm = devm_kzalloc(&pdev->dev, sizeof(*pll_28nm), GFP_KERNEL);
+	if (!pll_28nm)
+		return ERR_PTR(-ENOMEM);
+
+	pll_28nm->pdev = pdev;
+	pll_28nm->id = id + 1;
+
+	pll_28nm->mmio = msm_ioremap(pdev, "dsi_pll", "DSI_PLL");
+	if (IS_ERR_OR_NULL(pll_28nm->mmio)) {
+		dev_err(&pdev->dev, "%s: failed to map pll base\n", __func__);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	pll = &pll_28nm->base;
+	pll->min_rate = VCO_MIN_RATE;
+	pll->max_rate = VCO_MAX_RATE;
+	pll->get_provider = dsi_pll_28nm_get_provider;
+	pll->destroy = dsi_pll_28nm_destroy;
+	pll->disable_seq = dsi_pll_28nm_disable_seq;
+	pll->save_state = dsi_pll_28nm_save_state;
+	pll->restore_state = dsi_pll_28nm_restore_state;
+
+	pll->en_seq_cnt = 1;
+	pll->enable_seqs[0] = dsi_pll_28nm_enable_seq;
+
+	ret = pll_28nm_register(pll_28nm);
+	if (ret) {
+		dev_err(&pdev->dev, "failed to register PLL: %d\n", ret);
+		return ERR_PTR(ret);
+	}
+
+	return pll;
+}
diff --git a/drivers/gpu/drm/msm/hdmi/hdmi.c b/drivers/gpu/drm/msm/hdmi/hdmi.c
index 1f4a95eeb348..9a0989c0b4de 100644
--- a/drivers/gpu/drm/msm/hdmi/hdmi.c
+++ b/drivers/gpu/drm/msm/hdmi/hdmi.c
@@ -17,6 +17,8 @@
  */
 
 #include <linux/of_irq.h>
+#include <linux/of_gpio.h>
+
 #include "hdmi.h"
 
 void hdmi_set_mode(struct hdmi *hdmi, bool power_on)
@@ -322,8 +324,6 @@ fail:
  * The hdmi device:
  */
 
-#include <linux/of_gpio.h>
-
 #define HDMI_CFG(item, entry) \
 	.item ## _names = item ##_names_ ## entry, \
 	.item ## _cnt   = ARRAY_SIZE(item ## _names_ ## entry)
@@ -388,17 +388,6 @@ static struct hdmi_platform_config hdmi_tx_8996_config = {
 		.hpd_freq      = hpd_clk_freq_8x74,
 };
 
-static const struct of_device_id dt_match[] = {
-	{ .compatible = "qcom,hdmi-tx-8996", .data = &hdmi_tx_8996_config },
-	{ .compatible = "qcom,hdmi-tx-8994", .data = &hdmi_tx_8994_config },
-	{ .compatible = "qcom,hdmi-tx-8084", .data = &hdmi_tx_8084_config },
-	{ .compatible = "qcom,hdmi-tx-8974", .data = &hdmi_tx_8974_config },
-	{ .compatible = "qcom,hdmi-tx-8960", .data = &hdmi_tx_8960_config },
-	{ .compatible = "qcom,hdmi-tx-8660", .data = &hdmi_tx_8660_config },
-	{}
-};
-
-#ifdef CONFIG_OF
 static int get_gpio(struct device *dev, struct device_node *of_node, const char *name)
 {
 	int gpio = of_get_named_gpio(of_node, name, 0);
@@ -413,7 +402,6 @@ static int get_gpio(struct device *dev, struct device_node *of_node, const char
 	}
 	return gpio;
 }
-#endif
 
 static int hdmi_bind(struct device *dev, struct device *master, void *data)
 {
@@ -421,16 +409,12 @@ static int hdmi_bind(struct device *dev, struct device *master, void *data)
 	struct msm_drm_private *priv = drm->dev_private;
 	static struct hdmi_platform_config *hdmi_cfg;
 	struct hdmi *hdmi;
-#ifdef CONFIG_OF
 	struct device_node *of_node = dev->of_node;
-	const struct of_device_id *match;
 
-	match = of_match_node(dt_match, of_node);
-	if (match && match->data) {
-		hdmi_cfg = (struct hdmi_platform_config *)match->data;
-		DBG("hdmi phy: %s", match->compatible);
-	} else {
-		dev_err(dev, "unknown phy: %s\n", of_node->name);
+	hdmi_cfg = (struct hdmi_platform_config *)
+			of_device_get_match_data(dev);
+	if (!hdmi_cfg) {
+		dev_err(dev, "unknown hdmi_cfg: %s\n", of_node->name);
 		return -ENXIO;
 	}
 
@@ -443,55 +427,6 @@ static int hdmi_bind(struct device *dev, struct device *master, void *data)
 	hdmi_cfg->mux_sel_gpio  = get_gpio(dev, of_node, "qcom,hdmi-tx-mux-sel");
 	hdmi_cfg->mux_lpm_gpio  = get_gpio(dev, of_node, "qcom,hdmi-tx-mux-lpm");
 
-#else
-	static struct hdmi_platform_config config = {};
-	static const char *hpd_clk_names[] = {
-			"core_clk", "master_iface_clk", "slave_iface_clk",
-	};
-	if (cpu_is_apq8064()) {
-		static const char *hpd_reg_names[] = {"8921_hdmi_mvs"};
-		config.phy_init      = hdmi_phy_8960_init;
-		config.hpd_reg_names = hpd_reg_names;
-		config.hpd_reg_cnt   = ARRAY_SIZE(hpd_reg_names);
-		config.hpd_clk_names = hpd_clk_names;
-		config.hpd_clk_cnt   = ARRAY_SIZE(hpd_clk_names);
-		config.ddc_clk_gpio  = 70;
-		config.ddc_data_gpio = 71;
-		config.hpd_gpio      = 72;
-		config.mux_en_gpio   = -1;
-		config.mux_sel_gpio  = -1;
-	} else if (cpu_is_msm8960() || cpu_is_msm8960ab()) {
-		static const char *hpd_reg_names[] = {"8921_hdmi_mvs"};
-		config.phy_init      = hdmi_phy_8960_init;
-		config.hpd_reg_names = hpd_reg_names;
-		config.hpd_reg_cnt   = ARRAY_SIZE(hpd_reg_names);
-		config.hpd_clk_names = hpd_clk_names;
-		config.hpd_clk_cnt   = ARRAY_SIZE(hpd_clk_names);
-		config.ddc_clk_gpio  = 100;
-		config.ddc_data_gpio = 101;
-		config.hpd_gpio      = 102;
-		config.mux_en_gpio   = -1;
-		config.mux_sel_gpio  = -1;
-	} else if (cpu_is_msm8x60()) {
-		static const char *hpd_reg_names[] = {
-				"8901_hdmi_mvs", "8901_mpp0"
-		};
-		config.phy_init      = hdmi_phy_8x60_init;
-		config.hpd_reg_names = hpd_reg_names;
-		config.hpd_reg_cnt   = ARRAY_SIZE(hpd_reg_names);
-		config.hpd_clk_names = hpd_clk_names;
-		config.hpd_clk_cnt   = ARRAY_SIZE(hpd_clk_names);
-		config.ddc_clk_gpio  = 170;
-		config.ddc_data_gpio = 171;
-		config.hpd_gpio      = 172;
-		config.mux_en_gpio   = -1;
-		config.mux_sel_gpio  = -1;
-	}
-	config.mmio_name     = "hdmi_msm_hdmi_addr";
-	config.qfprom_mmio_name = "hdmi_msm_qfprom_addr";
-
-	hdmi_cfg = &config;
-#endif
 	dev->platform_data = hdmi_cfg;
 
 	hdmi = hdmi_init(to_platform_device(dev));
@@ -529,6 +464,16 @@ static int hdmi_dev_remove(struct platform_device *pdev)
 	return 0;
 }
 
+static const struct of_device_id dt_match[] = {
+	{ .compatible = "qcom,hdmi-tx-8996", .data = &hdmi_tx_8996_config },
+	{ .compatible = "qcom,hdmi-tx-8994", .data = &hdmi_tx_8994_config },
+	{ .compatible = "qcom,hdmi-tx-8084", .data = &hdmi_tx_8084_config },
+	{ .compatible = "qcom,hdmi-tx-8974", .data = &hdmi_tx_8974_config },
+	{ .compatible = "qcom,hdmi-tx-8960", .data = &hdmi_tx_8960_config },
+	{ .compatible = "qcom,hdmi-tx-8660", .data = &hdmi_tx_8660_config },
+	{}
+};
+
 static struct platform_driver hdmi_driver = {
 	.probe = hdmi_dev_probe,
 	.remove = hdmi_dev_remove,
diff --git a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_crtc.c b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_crtc.c
index 6ac9aa165768..28df397c3b04 100644
--- a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_crtc.c
+++ b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_crtc.c
@@ -678,7 +678,8 @@ struct drm_crtc *mdp4_crtc_init(struct drm_device *dev,
 	drm_flip_work_init(&mdp4_crtc->unref_cursor_work,
 			"unref cursor", unref_cursor_worker);
 
-	drm_crtc_init_with_planes(dev, crtc, plane, NULL, &mdp4_crtc_funcs);
+	drm_crtc_init_with_planes(dev, crtc, plane, NULL, &mdp4_crtc_funcs,
+				  NULL);
 	drm_crtc_helper_add(crtc, &mdp4_crtc_helper_funcs);
 	plane->crtc = crtc;
 
diff --git a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_dsi_encoder.c b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_dsi_encoder.c
new file mode 100644
index 000000000000..2f57e9453b67
--- /dev/null
+++ b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_dsi_encoder.c
@@ -0,0 +1,198 @@
+/*
+ * Copyright (c) 2015, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2014, Inforce Computing. All rights reserved.
+ *
+ * Author: Vinay Simha <vinaysimha@inforcecomputing.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "mdp4_kms.h"
+
+#include "drm_crtc.h"
+#include "drm_crtc_helper.h"
+
+struct mdp4_dsi_encoder {
+	struct drm_encoder base;
+	struct drm_panel *panel;
+	bool enabled;
+};
+#define to_mdp4_dsi_encoder(x) container_of(x, struct mdp4_dsi_encoder, base)
+
+static struct mdp4_kms *get_kms(struct drm_encoder *encoder)
+{
+	struct msm_drm_private *priv = encoder->dev->dev_private;
+	return to_mdp4_kms(to_mdp_kms(priv->kms));
+}
+
+static void mdp4_dsi_encoder_destroy(struct drm_encoder *encoder)
+{
+	struct mdp4_dsi_encoder *mdp4_dsi_encoder = to_mdp4_dsi_encoder(encoder);
+
+	drm_encoder_cleanup(encoder);
+	kfree(mdp4_dsi_encoder);
+}
+
+static const struct drm_encoder_funcs mdp4_dsi_encoder_funcs = {
+	.destroy = mdp4_dsi_encoder_destroy,
+};
+
+static bool mdp4_dsi_encoder_mode_fixup(struct drm_encoder *encoder,
+					const struct drm_display_mode *mode,
+					struct drm_display_mode *adjusted_mode)
+{
+	return true;
+}
+
+static void mdp4_dsi_encoder_mode_set(struct drm_encoder *encoder,
+				      struct drm_display_mode *mode,
+				      struct drm_display_mode *adjusted_mode)
+{
+	struct mdp4_kms *mdp4_kms = get_kms(encoder);
+	uint32_t dsi_hsync_skew, vsync_period, vsync_len, ctrl_pol;
+	uint32_t display_v_start, display_v_end;
+	uint32_t hsync_start_x, hsync_end_x;
+
+	mode = adjusted_mode;
+
+	DBG("set mode: %d:\"%s\" %d %d %d %d %d %d %d %d %d %d 0x%x 0x%x",
+			mode->base.id, mode->name,
+			mode->vrefresh, mode->clock,
+			mode->hdisplay, mode->hsync_start,
+			mode->hsync_end, mode->htotal,
+			mode->vdisplay, mode->vsync_start,
+			mode->vsync_end, mode->vtotal,
+			mode->type, mode->flags);
+
+	ctrl_pol = 0;
+	if (mode->flags & DRM_MODE_FLAG_NHSYNC)
+		ctrl_pol |= MDP4_DSI_CTRL_POLARITY_HSYNC_LOW;
+	if (mode->flags & DRM_MODE_FLAG_NVSYNC)
+		ctrl_pol |= MDP4_DSI_CTRL_POLARITY_VSYNC_LOW;
+	/* probably need to get DATA_EN polarity from panel.. */
+
+	dsi_hsync_skew = 0;  /* get this from panel? */
+
+	hsync_start_x = (mode->htotal - mode->hsync_start);
+	hsync_end_x = mode->htotal - (mode->hsync_start - mode->hdisplay) - 1;
+
+	vsync_period = mode->vtotal * mode->htotal;
+	vsync_len = (mode->vsync_end - mode->vsync_start) * mode->htotal;
+	display_v_start = (mode->vtotal - mode->vsync_start) * mode->htotal + dsi_hsync_skew;
+	display_v_end = vsync_period - ((mode->vsync_start - mode->vdisplay) * mode->htotal) + dsi_hsync_skew - 1;
+
+	mdp4_write(mdp4_kms, REG_MDP4_DSI_HSYNC_CTRL,
+			MDP4_DSI_HSYNC_CTRL_PULSEW(mode->hsync_end - mode->hsync_start) |
+			MDP4_DSI_HSYNC_CTRL_PERIOD(mode->htotal));
+	mdp4_write(mdp4_kms, REG_MDP4_DSI_VSYNC_PERIOD, vsync_period);
+	mdp4_write(mdp4_kms, REG_MDP4_DSI_VSYNC_LEN, vsync_len);
+	mdp4_write(mdp4_kms, REG_MDP4_DSI_DISPLAY_HCTRL,
+			MDP4_DSI_DISPLAY_HCTRL_START(hsync_start_x) |
+			MDP4_DSI_DISPLAY_HCTRL_END(hsync_end_x));
+	mdp4_write(mdp4_kms, REG_MDP4_DSI_DISPLAY_VSTART, display_v_start);
+	mdp4_write(mdp4_kms, REG_MDP4_DSI_DISPLAY_VEND, display_v_end);
+
+	mdp4_write(mdp4_kms, REG_MDP4_DSI_CTRL_POLARITY, ctrl_pol);
+	mdp4_write(mdp4_kms, REG_MDP4_DSI_UNDERFLOW_CLR,
+			MDP4_DSI_UNDERFLOW_CLR_ENABLE_RECOVERY |
+			MDP4_DSI_UNDERFLOW_CLR_COLOR(0xff));
+	mdp4_write(mdp4_kms, REG_MDP4_DSI_ACTIVE_HCTL,
+			MDP4_DSI_ACTIVE_HCTL_START(0) |
+			MDP4_DSI_ACTIVE_HCTL_END(0));
+	mdp4_write(mdp4_kms, REG_MDP4_DSI_HSYNC_SKEW, dsi_hsync_skew);
+	mdp4_write(mdp4_kms, REG_MDP4_DSI_BORDER_CLR, 0);
+	mdp4_write(mdp4_kms, REG_MDP4_DSI_ACTIVE_VSTART, 0);
+	mdp4_write(mdp4_kms, REG_MDP4_DSI_ACTIVE_VEND, 0);
+}
+
+static void mdp4_dsi_encoder_disable(struct drm_encoder *encoder)
+{
+	struct mdp4_dsi_encoder *mdp4_dsi_encoder = to_mdp4_dsi_encoder(encoder);
+	struct mdp4_kms *mdp4_kms = get_kms(encoder);
+
+	if (!mdp4_dsi_encoder->enabled)
+		return;
+
+	mdp4_write(mdp4_kms, REG_MDP4_DSI_ENABLE, 0);
+
+	/*
+	 * Wait for a vsync so we know the ENABLE=0 latched before
+	 * the (connector) source of the vsync's gets disabled,
+	 * otherwise we end up in a funny state if we re-enable
+	 * before the disable latches, which results that some of
+	 * the settings changes for the new modeset (like new
+	 * scanout buffer) don't latch properly..
+	 */
+	mdp_irq_wait(&mdp4_kms->base, MDP4_IRQ_PRIMARY_VSYNC);
+
+	mdp4_dsi_encoder->enabled = false;
+}
+
+static void mdp4_dsi_encoder_enable(struct drm_encoder *encoder)
+{
+	struct mdp4_dsi_encoder *mdp4_dsi_encoder = to_mdp4_dsi_encoder(encoder);
+	struct mdp4_kms *mdp4_kms = get_kms(encoder);
+
+	if (mdp4_dsi_encoder->enabled)
+		return;
+
+	 mdp4_crtc_set_config(encoder->crtc,
+			MDP4_DMA_CONFIG_PACK_ALIGN_MSB |
+			MDP4_DMA_CONFIG_DEFLKR_EN |
+			MDP4_DMA_CONFIG_DITHER_EN |
+			MDP4_DMA_CONFIG_R_BPC(BPC8) |
+			MDP4_DMA_CONFIG_G_BPC(BPC8) |
+			MDP4_DMA_CONFIG_B_BPC(BPC8) |
+			MDP4_DMA_CONFIG_PACK(0x21));
+
+	mdp4_crtc_set_intf(encoder->crtc, INTF_DSI_VIDEO, 0);
+
+	mdp4_write(mdp4_kms, REG_MDP4_DSI_ENABLE, 1);
+
+	mdp4_dsi_encoder->enabled = true;
+}
+
+static const struct drm_encoder_helper_funcs mdp4_dsi_encoder_helper_funcs = {
+	.mode_fixup = mdp4_dsi_encoder_mode_fixup,
+	.mode_set = mdp4_dsi_encoder_mode_set,
+	.disable = mdp4_dsi_encoder_disable,
+	.enable = mdp4_dsi_encoder_enable,
+};
+
+/* initialize encoder */
+struct drm_encoder *mdp4_dsi_encoder_init(struct drm_device *dev)
+{
+	struct drm_encoder *encoder = NULL;
+	struct mdp4_dsi_encoder *mdp4_dsi_encoder;
+	int ret;
+
+	mdp4_dsi_encoder = kzalloc(sizeof(*mdp4_dsi_encoder), GFP_KERNEL);
+	if (!mdp4_dsi_encoder) {
+		ret = -ENOMEM;
+		goto fail;
+	}
+
+	encoder = &mdp4_dsi_encoder->base;
+
+	drm_encoder_init(dev, encoder, &mdp4_dsi_encoder_funcs,
+			 DRM_MODE_ENCODER_DSI, NULL);
+	drm_encoder_helper_add(encoder, &mdp4_dsi_encoder_helper_funcs);
+
+	return encoder;
+
+fail:
+	if (encoder)
+		mdp4_dsi_encoder_destroy(encoder);
+
+	return ERR_PTR(ret);
+}
diff --git a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_dtv_encoder.c b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_dtv_encoder.c
index 89614c6a6c1b..a21df54cb50f 100644
--- a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_dtv_encoder.c
+++ b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_dtv_encoder.c
@@ -262,7 +262,7 @@ struct drm_encoder *mdp4_dtv_encoder_init(struct drm_device *dev)
 	encoder = &mdp4_dtv_encoder->base;
 
 	drm_encoder_init(dev, encoder, &mdp4_dtv_encoder_funcs,
-			 DRM_MODE_ENCODER_TMDS);
+			 DRM_MODE_ENCODER_TMDS, NULL);
 	drm_encoder_helper_add(encoder, &mdp4_dtv_encoder_helper_funcs);
 
 	mdp4_dtv_encoder->src_clk = devm_clk_get(dev->dev, "src_clk");
diff --git a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_irq.c b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_irq.c
index 5ed38cf548a1..a521207db8a1 100644
--- a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_irq.c
+++ b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_irq.c
@@ -29,7 +29,7 @@ void mdp4_set_irqmask(struct mdp_kms *mdp_kms, uint32_t irqmask,
 
 static void mdp4_irq_error_handler(struct mdp_irq *irq, uint32_t irqstatus)
 {
-	DRM_ERROR("errors: %08x\n", irqstatus);
+	DRM_ERROR_RATELIMITED("errors: %08x\n", irqstatus);
 }
 
 void mdp4_irq_preinstall(struct msm_kms *kms)
diff --git a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.c b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.c
index 077f7521a971..5a8e3d6bcbff 100644
--- a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.c
+++ b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.c
@@ -169,7 +169,14 @@ static long mdp4_round_pixclk(struct msm_kms *kms, unsigned long rate,
 		struct drm_encoder *encoder)
 {
 	/* if we had >1 encoder, we'd need something more clever: */
-	return mdp4_dtv_round_pixclk(encoder, rate);
+	switch (encoder->encoder_type) {
+	case DRM_MODE_ENCODER_TMDS:
+		return mdp4_dtv_round_pixclk(encoder, rate);
+	case DRM_MODE_ENCODER_LVDS:
+	case DRM_MODE_ENCODER_DSI:
+	default:
+		return rate;
+	}
 }
 
 static void mdp4_preclose(struct msm_kms *kms, struct drm_file *file)
@@ -240,19 +247,18 @@ int mdp4_enable(struct mdp4_kms *mdp4_kms)
 	return 0;
 }
 
-#ifdef CONFIG_OF
-static struct drm_panel *detect_panel(struct drm_device *dev)
+static struct device_node *mdp4_detect_lcdc_panel(struct drm_device *dev)
 {
 	struct device_node *endpoint, *panel_node;
 	struct device_node *np = dev->dev->of_node;
-	struct drm_panel *panel = NULL;
 
 	endpoint = of_graph_get_next_endpoint(np, NULL);
 	if (!endpoint) {
-		dev_err(dev->dev, "no valid endpoint\n");
-		return ERR_PTR(-ENODEV);
+		DBG("no endpoint in MDP4 to fetch LVDS panel\n");
+		return NULL;
 	}
 
+	/* don't proceed if we have an endpoint but no panel_node tied to it */
 	panel_node = of_graph_get_remote_port_parent(endpoint);
 	if (!panel_node) {
 		dev_err(dev->dev, "no valid panel node\n");
@@ -262,132 +268,185 @@ static struct drm_panel *detect_panel(struct drm_device *dev)
 
 	of_node_put(endpoint);
 
-	panel = of_drm_find_panel(panel_node);
-	if (!panel) {
-		of_node_put(panel_node);
-		return ERR_PTR(-EPROBE_DEFER);
-	}
-
-	return panel;
+	return panel_node;
 }
-#else
-static struct drm_panel *detect_panel(struct drm_device *dev)
-{
-	// ??? maybe use a module param to specify which panel is attached?
-}
-#endif
 
-static int modeset_init(struct mdp4_kms *mdp4_kms)
+static int mdp4_modeset_init_intf(struct mdp4_kms *mdp4_kms,
+				  int intf_type)
 {
 	struct drm_device *dev = mdp4_kms->dev;
 	struct msm_drm_private *priv = dev->dev_private;
-	struct drm_plane *plane;
-	struct drm_crtc *crtc;
 	struct drm_encoder *encoder;
 	struct drm_connector *connector;
-	struct drm_panel *panel;
+	struct device_node *panel_node;
+	struct drm_encoder *dsi_encs[MSM_DSI_ENCODER_NUM];
+	int i, dsi_id;
 	int ret;
 
-	/* construct non-private planes: */
-	plane = mdp4_plane_init(dev, VG1, false);
-	if (IS_ERR(plane)) {
-		dev_err(dev->dev, "failed to construct plane for VG1\n");
-		ret = PTR_ERR(plane);
-		goto fail;
-	}
-	priv->planes[priv->num_planes++] = plane;
+	switch (intf_type) {
+	case DRM_MODE_ENCODER_LVDS:
+		/*
+		 * bail out early if:
+		 * - there is no panel node (no need to initialize lcdc
+		 *   encoder and lvds connector), or
+		 * - panel node is a bad pointer
+		 */
+		panel_node = mdp4_detect_lcdc_panel(dev);
+		if (IS_ERR_OR_NULL(panel_node))
+			return PTR_ERR(panel_node);
+
+		encoder = mdp4_lcdc_encoder_init(dev, panel_node);
+		if (IS_ERR(encoder)) {
+			dev_err(dev->dev, "failed to construct LCDC encoder\n");
+			return PTR_ERR(encoder);
+		}
 
-	plane = mdp4_plane_init(dev, VG2, false);
-	if (IS_ERR(plane)) {
-		dev_err(dev->dev, "failed to construct plane for VG2\n");
-		ret = PTR_ERR(plane);
-		goto fail;
-	}
-	priv->planes[priv->num_planes++] = plane;
+		/* LCDC can be hooked to DMA_P (TODO: Add DMA_S later?) */
+		encoder->possible_crtcs = 1 << DMA_P;
 
-	/*
-	 * Setup the LCDC/LVDS path: RGB2 -> DMA_P -> LCDC -> LVDS:
-	 */
+		connector = mdp4_lvds_connector_init(dev, panel_node, encoder);
+		if (IS_ERR(connector)) {
+			dev_err(dev->dev, "failed to initialize LVDS connector\n");
+			return PTR_ERR(connector);
+		}
 
-	panel = detect_panel(dev);
-	if (IS_ERR(panel)) {
-		ret = PTR_ERR(panel);
-		dev_err(dev->dev, "failed to detect LVDS panel: %d\n", ret);
-		goto fail;
-	}
+		priv->encoders[priv->num_encoders++] = encoder;
+		priv->connectors[priv->num_connectors++] = connector;
 
-	plane = mdp4_plane_init(dev, RGB2, true);
-	if (IS_ERR(plane)) {
-		dev_err(dev->dev, "failed to construct plane for RGB2\n");
-		ret = PTR_ERR(plane);
-		goto fail;
-	}
+		break;
+	case DRM_MODE_ENCODER_TMDS:
+		encoder = mdp4_dtv_encoder_init(dev);
+		if (IS_ERR(encoder)) {
+			dev_err(dev->dev, "failed to construct DTV encoder\n");
+			return PTR_ERR(encoder);
+		}
 
-	crtc  = mdp4_crtc_init(dev, plane, priv->num_crtcs, 0, DMA_P);
-	if (IS_ERR(crtc)) {
-		dev_err(dev->dev, "failed to construct crtc for DMA_P\n");
-		ret = PTR_ERR(crtc);
-		goto fail;
-	}
+		/* DTV can be hooked to DMA_E: */
+		encoder->possible_crtcs = 1 << 1;
 
-	encoder = mdp4_lcdc_encoder_init(dev, panel);
-	if (IS_ERR(encoder)) {
-		dev_err(dev->dev, "failed to construct LCDC encoder\n");
-		ret = PTR_ERR(encoder);
-		goto fail;
-	}
+		if (priv->hdmi) {
+			/* Construct bridge/connector for HDMI: */
+			ret = hdmi_modeset_init(priv->hdmi, dev, encoder);
+			if (ret) {
+				dev_err(dev->dev, "failed to initialize HDMI: %d\n", ret);
+				return ret;
+			}
+		}
 
-	/* LCDC can be hooked to DMA_P: */
-	encoder->possible_crtcs = 1 << priv->num_crtcs;
+		priv->encoders[priv->num_encoders++] = encoder;
 
-	priv->crtcs[priv->num_crtcs++] = crtc;
-	priv->encoders[priv->num_encoders++] = encoder;
+		break;
+	case DRM_MODE_ENCODER_DSI:
+		/* only DSI1 supported for now */
+		dsi_id = 0;
 
-	connector = mdp4_lvds_connector_init(dev, panel, encoder);
-	if (IS_ERR(connector)) {
-		ret = PTR_ERR(connector);
-		dev_err(dev->dev, "failed to initialize LVDS connector: %d\n", ret);
-		goto fail;
-	}
+		if (!priv->dsi[dsi_id])
+			break;
 
-	priv->connectors[priv->num_connectors++] = connector;
+		for (i = 0; i < MSM_DSI_ENCODER_NUM; i++) {
+			dsi_encs[i] = mdp4_dsi_encoder_init(dev);
+			if (IS_ERR(dsi_encs[i])) {
+				ret = PTR_ERR(dsi_encs[i]);
+				dev_err(dev->dev,
+					"failed to construct DSI encoder: %d\n",
+					ret);
+				return ret;
+			}
 
-	/*
-	 * Setup DTV/HDMI path: RGB1 -> DMA_E -> DTV -> HDMI:
-	 */
+			/* TODO: Add DMA_S later? */
+			dsi_encs[i]->possible_crtcs = 1 << DMA_P;
+			priv->encoders[priv->num_encoders++] = dsi_encs[i];
+		}
 
-	plane = mdp4_plane_init(dev, RGB1, true);
-	if (IS_ERR(plane)) {
-		dev_err(dev->dev, "failed to construct plane for RGB1\n");
-		ret = PTR_ERR(plane);
-		goto fail;
-	}
+		ret = msm_dsi_modeset_init(priv->dsi[dsi_id], dev, dsi_encs);
+		if (ret) {
+			dev_err(dev->dev, "failed to initialize DSI: %d\n",
+				ret);
+			return ret;
+		}
 
-	crtc  = mdp4_crtc_init(dev, plane, priv->num_crtcs, 1, DMA_E);
-	if (IS_ERR(crtc)) {
-		dev_err(dev->dev, "failed to construct crtc for DMA_E\n");
-		ret = PTR_ERR(crtc);
-		goto fail;
+		break;
+	default:
+		dev_err(dev->dev, "Invalid or unsupported interface\n");
+		return -EINVAL;
 	}
 
-	encoder = mdp4_dtv_encoder_init(dev);
-	if (IS_ERR(encoder)) {
-		dev_err(dev->dev, "failed to construct DTV encoder\n");
-		ret = PTR_ERR(encoder);
-		goto fail;
+	return 0;
+}
+
+static int modeset_init(struct mdp4_kms *mdp4_kms)
+{
+	struct drm_device *dev = mdp4_kms->dev;
+	struct msm_drm_private *priv = dev->dev_private;
+	struct drm_plane *plane;
+	struct drm_crtc *crtc;
+	int i, ret;
+	static const enum mdp4_pipe rgb_planes[] = {
+		RGB1, RGB2,
+	};
+	static const enum mdp4_pipe vg_planes[] = {
+		VG1, VG2,
+	};
+	static const enum mdp4_dma mdp4_crtcs[] = {
+		DMA_P, DMA_E,
+	};
+	static const char * const mdp4_crtc_names[] = {
+		"DMA_P", "DMA_E",
+	};
+	static const int mdp4_intfs[] = {
+		DRM_MODE_ENCODER_LVDS,
+		DRM_MODE_ENCODER_DSI,
+		DRM_MODE_ENCODER_TMDS,
+	};
+
+	/* construct non-private planes: */
+	for (i = 0; i < ARRAY_SIZE(vg_planes); i++) {
+		plane = mdp4_plane_init(dev, vg_planes[i], false);
+		if (IS_ERR(plane)) {
+			dev_err(dev->dev,
+				"failed to construct plane for VG%d\n", i + 1);
+			ret = PTR_ERR(plane);
+			goto fail;
+		}
+		priv->planes[priv->num_planes++] = plane;
 	}
 
-	/* DTV can be hooked to DMA_E: */
-	encoder->possible_crtcs = 1 << priv->num_crtcs;
+	for (i = 0; i < ARRAY_SIZE(mdp4_crtcs); i++) {
+		plane = mdp4_plane_init(dev, rgb_planes[i], true);
+		if (IS_ERR(plane)) {
+			dev_err(dev->dev,
+				"failed to construct plane for RGB%d\n", i + 1);
+			ret = PTR_ERR(plane);
+			goto fail;
+		}
+
+		crtc  = mdp4_crtc_init(dev, plane, priv->num_crtcs, i,
+				mdp4_crtcs[i]);
+		if (IS_ERR(crtc)) {
+			dev_err(dev->dev, "failed to construct crtc for %s\n",
+				mdp4_crtc_names[i]);
+			ret = PTR_ERR(crtc);
+			goto fail;
+		}
+
+		priv->crtcs[priv->num_crtcs++] = crtc;
+	}
 
-	priv->crtcs[priv->num_crtcs++] = crtc;
-	priv->encoders[priv->num_encoders++] = encoder;
+	/*
+	 * we currently set up two relatively fixed paths:
+	 *
+	 * LCDC/LVDS path: RGB1 -> DMA_P -> LCDC -> LVDS
+	 *			or
+	 * DSI path: RGB1 -> DMA_P -> DSI1 -> DSI Panel
+	 *
+	 * DTV/HDMI path: RGB2 -> DMA_E -> DTV -> HDMI
+	 */
 
-	if (priv->hdmi) {
-		/* Construct bridge/connector for HDMI: */
-		ret = hdmi_modeset_init(priv->hdmi, dev, encoder);
+	for (i = 0; i < ARRAY_SIZE(mdp4_intfs); i++) {
+		ret = mdp4_modeset_init_intf(mdp4_kms, mdp4_intfs[i]);
 		if (ret) {
-			dev_err(dev->dev, "failed to initialize HDMI: %d\n", ret);
+			dev_err(dev->dev, "failed to initialize intf: %d, %d\n",
+				i, ret);
 			goto fail;
 		}
 	}
@@ -558,17 +617,10 @@ fail:
 static struct mdp4_platform_config *mdp4_get_config(struct platform_device *dev)
 {
 	static struct mdp4_platform_config config = {};
-#ifdef CONFIG_OF
-	/* TODO */
+
+	/* TODO: Chips that aren't apq8064 have a 200 Mhz max_clk */
 	config.max_clk = 266667000;
 	config.iommu = iommu_domain_alloc(&platform_bus_type);
-#else
-	if (cpu_is_apq8064())
-		config.max_clk = 266667000;
-	else
-		config.max_clk = 200000000;
-
-	config.iommu = msm_get_iommu_domain(DISPLAY_READ_DOMAIN);
-#endif
+
 	return &config;
 }
diff --git a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.h b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.h
index 8a7f6e1e2bca..d2c96ef431f4 100644
--- a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.h
+++ b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.h
@@ -157,7 +157,7 @@ static inline uint32_t mixercfg(uint32_t mixer_cfg, int mixer,
 			COND(mixer == 1, MDP4_LAYERMIXER_IN_CFG_PIPE6_MIXER1);
 		break;
 	default:
-		WARN_ON("invalid pipe");
+		WARN(1, "invalid pipe");
 		break;
 	}
 
@@ -212,10 +212,19 @@ struct drm_encoder *mdp4_dtv_encoder_init(struct drm_device *dev);
 
 long mdp4_lcdc_round_pixclk(struct drm_encoder *encoder, unsigned long rate);
 struct drm_encoder *mdp4_lcdc_encoder_init(struct drm_device *dev,
-		struct drm_panel *panel);
+		struct device_node *panel_node);
 
 struct drm_connector *mdp4_lvds_connector_init(struct drm_device *dev,
-		struct drm_panel *panel, struct drm_encoder *encoder);
+		struct device_node *panel_node, struct drm_encoder *encoder);
+
+#ifdef CONFIG_DRM_MSM_DSI
+struct drm_encoder *mdp4_dsi_encoder_init(struct drm_device *dev);
+#else
+static inline struct drm_encoder *mdp4_dsi_encoder_init(struct drm_device *dev)
+{
+	return ERR_PTR(-ENODEV);
+}
+#endif
 
 #ifdef CONFIG_COMMON_CLK
 struct clk *mpd4_lvds_pll_init(struct drm_device *dev);
diff --git a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_lcdc_encoder.c b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_lcdc_encoder.c
index 4cd6e721aa0a..cd63fedb67cc 100644
--- a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_lcdc_encoder.c
+++ b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_lcdc_encoder.c
@@ -23,6 +23,7 @@
 
 struct mdp4_lcdc_encoder {
 	struct drm_encoder base;
+	struct device_node *panel_node;
 	struct drm_panel *panel;
 	struct clk *lcdc_clk;
 	unsigned long int pixclock;
@@ -338,7 +339,7 @@ static void mdp4_lcdc_encoder_disable(struct drm_encoder *encoder)
 	struct mdp4_lcdc_encoder *mdp4_lcdc_encoder =
 			to_mdp4_lcdc_encoder(encoder);
 	struct mdp4_kms *mdp4_kms = get_kms(encoder);
-	struct drm_panel *panel = mdp4_lcdc_encoder->panel;
+	struct drm_panel *panel;
 	int i, ret;
 
 	if (WARN_ON(!mdp4_lcdc_encoder->enabled))
@@ -346,6 +347,7 @@ static void mdp4_lcdc_encoder_disable(struct drm_encoder *encoder)
 
 	mdp4_write(mdp4_kms, REG_MDP4_LCDC_ENABLE, 0);
 
+	panel = of_drm_find_panel(mdp4_lcdc_encoder->panel_node);
 	if (panel) {
 		drm_panel_disable(panel);
 		drm_panel_unprepare(panel);
@@ -381,7 +383,7 @@ static void mdp4_lcdc_encoder_enable(struct drm_encoder *encoder)
 			to_mdp4_lcdc_encoder(encoder);
 	unsigned long pc = mdp4_lcdc_encoder->pixclock;
 	struct mdp4_kms *mdp4_kms = get_kms(encoder);
-	struct drm_panel *panel = mdp4_lcdc_encoder->panel;
+	struct drm_panel *panel;
 	int i, ret;
 
 	if (WARN_ON(mdp4_lcdc_encoder->enabled))
@@ -414,6 +416,7 @@ static void mdp4_lcdc_encoder_enable(struct drm_encoder *encoder)
 	if (ret)
 		dev_err(dev->dev, "failed to enable lcdc_clk: %d\n", ret);
 
+	panel = of_drm_find_panel(mdp4_lcdc_encoder->panel_node);
 	if (panel) {
 		drm_panel_prepare(panel);
 		drm_panel_enable(panel);
@@ -442,7 +445,7 @@ long mdp4_lcdc_round_pixclk(struct drm_encoder *encoder, unsigned long rate)
 
 /* initialize encoder */
 struct drm_encoder *mdp4_lcdc_encoder_init(struct drm_device *dev,
-		struct drm_panel *panel)
+		struct device_node *panel_node)
 {
 	struct drm_encoder *encoder = NULL;
 	struct mdp4_lcdc_encoder *mdp4_lcdc_encoder;
@@ -455,12 +458,12 @@ struct drm_encoder *mdp4_lcdc_encoder_init(struct drm_device *dev,
 		goto fail;
 	}
 
-	mdp4_lcdc_encoder->panel = panel;
+	mdp4_lcdc_encoder->panel_node = panel_node;
 
 	encoder = &mdp4_lcdc_encoder->base;
 
 	drm_encoder_init(dev, encoder, &mdp4_lcdc_encoder_funcs,
-			 DRM_MODE_ENCODER_LVDS);
+			 DRM_MODE_ENCODER_LVDS, NULL);
 	drm_encoder_helper_add(encoder, &mdp4_lcdc_encoder_helper_funcs);
 
 	/* TODO: do we need different pll in other cases? */
diff --git a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_lvds_connector.c b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_lvds_connector.c
index 921185133d38..e73e1742b250 100644
--- a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_lvds_connector.c
+++ b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_lvds_connector.c
@@ -23,6 +23,7 @@
 struct mdp4_lvds_connector {
 	struct drm_connector base;
 	struct drm_encoder *encoder;
+	struct device_node *panel_node;
 	struct drm_panel *panel;
 };
 #define to_mdp4_lvds_connector(x) container_of(x, struct mdp4_lvds_connector, base)
@@ -33,6 +34,10 @@ static enum drm_connector_status mdp4_lvds_connector_detect(
 	struct mdp4_lvds_connector *mdp4_lvds_connector =
 			to_mdp4_lvds_connector(connector);
 
+	if (!mdp4_lvds_connector->panel)
+		mdp4_lvds_connector->panel =
+			of_drm_find_panel(mdp4_lvds_connector->panel_node);
+
 	return mdp4_lvds_connector->panel ?
 			connector_status_connected :
 			connector_status_disconnected;
@@ -42,10 +47,6 @@ static void mdp4_lvds_connector_destroy(struct drm_connector *connector)
 {
 	struct mdp4_lvds_connector *mdp4_lvds_connector =
 			to_mdp4_lvds_connector(connector);
-	struct drm_panel *panel = mdp4_lvds_connector->panel;
-
-	if (panel)
-		drm_panel_detach(panel);
 
 	drm_connector_unregister(connector);
 	drm_connector_cleanup(connector);
@@ -60,9 +61,14 @@ static int mdp4_lvds_connector_get_modes(struct drm_connector *connector)
 	struct drm_panel *panel = mdp4_lvds_connector->panel;
 	int ret = 0;
 
-	if (panel)
+	if (panel) {
+		drm_panel_attach(panel, connector);
+
 		ret = panel->funcs->get_modes(panel);
 
+		drm_panel_detach(panel);
+	}
+
 	return ret;
 }
 
@@ -111,7 +117,7 @@ static const struct drm_connector_helper_funcs mdp4_lvds_connector_helper_funcs
 
 /* initialize connector */
 struct drm_connector *mdp4_lvds_connector_init(struct drm_device *dev,
-		struct drm_panel *panel, struct drm_encoder *encoder)
+		struct device_node *panel_node, struct drm_encoder *encoder)
 {
 	struct drm_connector *connector = NULL;
 	struct mdp4_lvds_connector *mdp4_lvds_connector;
@@ -124,7 +130,7 @@ struct drm_connector *mdp4_lvds_connector_init(struct drm_device *dev,
 	}
 
 	mdp4_lvds_connector->encoder = encoder;
-	mdp4_lvds_connector->panel = panel;
+	mdp4_lvds_connector->panel_node = panel_node;
 
 	connector = &mdp4_lvds_connector->base;
 
@@ -141,9 +147,6 @@ struct drm_connector *mdp4_lvds_connector_init(struct drm_device *dev,
 
 	drm_mode_connector_attach_encoder(connector, encoder);
 
-	if (panel)
-		drm_panel_attach(panel, connector);
-
 	return connector;
 
 fail:
diff --git a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_plane.c b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_plane.c
index 30d57e74c42f..9f96dfe67769 100644
--- a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_plane.c
+++ b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_plane.c
@@ -397,7 +397,8 @@ struct drm_plane *mdp4_plane_init(struct drm_device *dev,
 
 	type = private_plane ? DRM_PLANE_TYPE_PRIMARY : DRM_PLANE_TYPE_OVERLAY;
 	ret = drm_universal_plane_init(dev, plane, 0xff, &mdp4_plane_funcs,
-				 mdp4_plane->formats, mdp4_plane->nformats, type);
+				 mdp4_plane->formats, mdp4_plane->nformats,
+				 type, NULL);
 	if (ret)
 		goto fail;
 
diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_cfg.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_cfg.c
index bb1225aa2f75..57f73f0c120d 100644
--- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_cfg.c
+++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_cfg.c
@@ -553,9 +553,7 @@ fail:
 static struct mdp5_cfg_platform *mdp5_get_config(struct platform_device *dev)
 {
 	static struct mdp5_cfg_platform config = {};
-#ifdef CONFIG_OF
-	/* TODO */
-#endif
+
 	config.iommu = iommu_domain_alloc(&platform_bus_type);
 
 	return &config;
diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_cmd_encoder.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_cmd_encoder.c
index 8e6c9b598a57..1aa21dba663d 100644
--- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_cmd_encoder.c
+++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_cmd_encoder.c
@@ -326,7 +326,7 @@ struct drm_encoder *mdp5_cmd_encoder_init(struct drm_device *dev,
 	mdp5_cmd_enc->ctl = ctl;
 
 	drm_encoder_init(dev, encoder, &mdp5_cmd_encoder_funcs,
-			DRM_MODE_ENCODER_DSI);
+			DRM_MODE_ENCODER_DSI, NULL);
 
 	drm_encoder_helper_add(encoder, &mdp5_cmd_encoder_helper_funcs);
 
diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c
index 7f9f4ac88029..20cee5ce4071 100644
--- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c
+++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c
@@ -797,7 +797,8 @@ struct drm_crtc *mdp5_crtc_init(struct drm_device *dev,
 	snprintf(mdp5_crtc->name, sizeof(mdp5_crtc->name), "%s:%d",
 			pipe2name(mdp5_plane_pipe(plane)), id);
 
-	drm_crtc_init_with_planes(dev, crtc, plane, NULL, &mdp5_crtc_funcs);
+	drm_crtc_init_with_planes(dev, crtc, plane, NULL, &mdp5_crtc_funcs,
+				  NULL);
 
 	drm_flip_work_init(&mdp5_crtc->unref_cursor_work,
 			"unref cursor", unref_cursor_worker);
diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_encoder.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_encoder.c
index c9e32b08a7a0..0d737cad03a6 100644
--- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_encoder.c
+++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_encoder.c
@@ -293,6 +293,24 @@ static const struct drm_encoder_helper_funcs mdp5_encoder_helper_funcs = {
 	.enable = mdp5_encoder_enable,
 };
 
+int mdp5_encoder_get_linecount(struct drm_encoder *encoder)
+{
+	struct mdp5_encoder *mdp5_encoder = to_mdp5_encoder(encoder);
+	struct mdp5_kms *mdp5_kms = get_kms(encoder);
+	int intf = mdp5_encoder->intf.num;
+
+	return mdp5_read(mdp5_kms, REG_MDP5_INTF_LINE_COUNT(intf));
+}
+
+u32 mdp5_encoder_get_framecount(struct drm_encoder *encoder)
+{
+	struct mdp5_encoder *mdp5_encoder = to_mdp5_encoder(encoder);
+	struct mdp5_kms *mdp5_kms = get_kms(encoder);
+	int intf = mdp5_encoder->intf.num;
+
+	return mdp5_read(mdp5_kms, REG_MDP5_INTF_FRAME_COUNT(intf));
+}
+
 int mdp5_encoder_set_split_display(struct drm_encoder *encoder,
 					struct drm_encoder *slave_encoder)
 {
@@ -354,7 +372,7 @@ struct drm_encoder *mdp5_encoder_init(struct drm_device *dev,
 
 	spin_lock_init(&mdp5_encoder->intf_lock);
 
-	drm_encoder_init(dev, encoder, &mdp5_encoder_funcs, enc_type);
+	drm_encoder_init(dev, encoder, &mdp5_encoder_funcs, enc_type, NULL);
 
 	drm_encoder_helper_add(encoder, &mdp5_encoder_helper_funcs);
 
diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_irq.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_irq.c
index b0d4b53b97f4..73bc3e312fd4 100644
--- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_irq.c
+++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_irq.c
@@ -31,7 +31,7 @@ void mdp5_set_irqmask(struct mdp_kms *mdp_kms, uint32_t irqmask,
 
 static void mdp5_irq_error_handler(struct mdp_irq *irq, uint32_t irqstatus)
 {
-	DRM_ERROR("errors: %08x\n", irqstatus);
+	DRM_ERROR_RATELIMITED("errors: %08x\n", irqstatus);
 }
 
 void mdp5_irq_preinstall(struct msm_kms *kms)
diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c
index b532faa8026d..e115318402bd 100644
--- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c
+++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c
@@ -468,6 +468,127 @@ static int get_clk(struct platform_device *pdev, struct clk **clkp,
 	return 0;
 }
 
+static struct drm_encoder *get_encoder_from_crtc(struct drm_crtc *crtc)
+{
+	struct drm_device *dev = crtc->dev;
+	struct drm_encoder *encoder;
+
+	drm_for_each_encoder(encoder, dev)
+		if (encoder->crtc == crtc)
+			return encoder;
+
+	return NULL;
+}
+
+static int mdp5_get_scanoutpos(struct drm_device *dev, unsigned int pipe,
+			       unsigned int flags, int *vpos, int *hpos,
+			       ktime_t *stime, ktime_t *etime,
+			       const struct drm_display_mode *mode)
+{
+	struct msm_drm_private *priv = dev->dev_private;
+	struct drm_crtc *crtc;
+	struct drm_encoder *encoder;
+	int line, vsw, vbp, vactive_start, vactive_end, vfp_end;
+	int ret = 0;
+
+	crtc = priv->crtcs[pipe];
+	if (!crtc) {
+		DRM_ERROR("Invalid crtc %d\n", pipe);
+		return 0;
+	}
+
+	encoder = get_encoder_from_crtc(crtc);
+	if (!encoder) {
+		DRM_ERROR("no encoder found for crtc %d\n", pipe);
+		return 0;
+	}
+
+	ret |= DRM_SCANOUTPOS_VALID | DRM_SCANOUTPOS_ACCURATE;
+
+	vsw = mode->crtc_vsync_end - mode->crtc_vsync_start;
+	vbp = mode->crtc_vtotal - mode->crtc_vsync_end;
+
+	/*
+	 * the line counter is 1 at the start of the VSYNC pulse and VTOTAL at
+	 * the end of VFP. Translate the porch values relative to the line
+	 * counter positions.
+	 */
+
+	vactive_start = vsw + vbp + 1;
+
+	vactive_end = vactive_start + mode->crtc_vdisplay;
+
+	/* last scan line before VSYNC */
+	vfp_end = mode->crtc_vtotal;
+
+	if (stime)
+		*stime = ktime_get();
+
+	line = mdp5_encoder_get_linecount(encoder);
+
+	if (line < vactive_start) {
+		line -= vactive_start;
+		ret |= DRM_SCANOUTPOS_IN_VBLANK;
+	} else if (line > vactive_end) {
+		line = line - vfp_end - vactive_start;
+		ret |= DRM_SCANOUTPOS_IN_VBLANK;
+	} else {
+		line -= vactive_start;
+	}
+
+	*vpos = line;
+	*hpos = 0;
+
+	if (etime)
+		*etime = ktime_get();
+
+	return ret;
+}
+
+static int mdp5_get_vblank_timestamp(struct drm_device *dev, unsigned int pipe,
+				     int *max_error,
+				     struct timeval *vblank_time,
+				     unsigned flags)
+{
+	struct msm_drm_private *priv = dev->dev_private;
+	struct drm_crtc *crtc;
+
+	if (pipe < 0 || pipe >= priv->num_crtcs) {
+		DRM_ERROR("Invalid crtc %d\n", pipe);
+		return -EINVAL;
+	}
+
+	crtc = priv->crtcs[pipe];
+	if (!crtc) {
+		DRM_ERROR("Invalid crtc %d\n", pipe);
+		return -EINVAL;
+	}
+
+	return drm_calc_vbltimestamp_from_scanoutpos(dev, pipe, max_error,
+						     vblank_time, flags,
+						     &crtc->mode);
+}
+
+static u32 mdp5_get_vblank_counter(struct drm_device *dev, unsigned int pipe)
+{
+	struct msm_drm_private *priv = dev->dev_private;
+	struct drm_crtc *crtc;
+	struct drm_encoder *encoder;
+
+	if (pipe < 0 || pipe >= priv->num_crtcs)
+		return 0;
+
+	crtc = priv->crtcs[pipe];
+	if (!crtc)
+		return 0;
+
+	encoder = get_encoder_from_crtc(crtc);
+	if (!encoder)
+		return 0;
+
+	return mdp5_encoder_get_framecount(encoder);
+}
+
 struct msm_kms *mdp5_kms_init(struct drm_device *dev)
 {
 	struct platform_device *pdev = dev->platformdev;
@@ -590,6 +711,8 @@ struct msm_kms *mdp5_kms_init(struct drm_device *dev)
 				!config->hw->intf.base[i])
 			continue;
 		mdp5_write(mdp5_kms, REG_MDP5_INTF_TIMING_ENGINE_EN(i), 0);
+
+		mdp5_write(mdp5_kms, REG_MDP5_INTF_FRAME_LINE_COUNT_EN(i), 0x3);
 	}
 	mdp5_disable(mdp5_kms);
 	mdelay(16);
@@ -635,6 +758,12 @@ struct msm_kms *mdp5_kms_init(struct drm_device *dev)
 	dev->mode_config.max_width = config->hw->lm.max_width;
 	dev->mode_config.max_height = config->hw->lm.max_height;
 
+	dev->driver->get_vblank_timestamp = mdp5_get_vblank_timestamp;
+	dev->driver->get_scanout_position = mdp5_get_scanoutpos;
+	dev->driver->get_vblank_counter = mdp5_get_vblank_counter;
+	dev->max_vblank_count = 0xffffffff;
+	dev->vblank_disable_immediate = true;
+
 	return kms;
 
 fail:
diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.h b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.h
index 84f65d415598..00730ba08a60 100644
--- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.h
+++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.h
@@ -222,6 +222,8 @@ struct drm_encoder *mdp5_encoder_init(struct drm_device *dev,
 		struct mdp5_interface *intf, struct mdp5_ctl *ctl);
 int mdp5_encoder_set_split_display(struct drm_encoder *encoder,
 					struct drm_encoder *slave_encoder);
+int mdp5_encoder_get_linecount(struct drm_encoder *encoder);
+u32 mdp5_encoder_get_framecount(struct drm_encoder *encoder);
 
 #ifdef CONFIG_DRM_MSM_DSI
 struct drm_encoder *mdp5_cmd_encoder_init(struct drm_device *dev,
diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c
index 81cd49045ffc..432c09836b0e 100644
--- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c
+++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c
@@ -904,7 +904,7 @@ struct drm_plane *mdp5_plane_init(struct drm_device *dev,
 	type = private_plane ? DRM_PLANE_TYPE_PRIMARY : DRM_PLANE_TYPE_OVERLAY;
 	ret = drm_universal_plane_init(dev, plane, 0xff, &mdp5_plane_funcs,
 				 mdp5_plane->formats, mdp5_plane->nformats,
-				 type);
+				 type, NULL);
 	if (ret)
 		goto fail;
 
diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c
index b88ce514eb8e..9a30807b900b 100644
--- a/drivers/gpu/drm/msm/msm_drv.c
+++ b/drivers/gpu/drm/msm/msm_drv.c
@@ -237,20 +237,9 @@ static int msm_unload(struct drm_device *dev)
 
 static int get_mdp_ver(struct platform_device *pdev)
 {
-#ifdef CONFIG_OF
-	static const struct of_device_id match_types[] = { {
-		.compatible = "qcom,mdss_mdp",
-		.data	= (void	*)5,
-	}, {
-		/* end node */
-	} };
 	struct device *dev = &pdev->dev;
-	const struct of_device_id *match;
-	match = of_match_node(match_types, dev->of_node);
-	if (match)
-		return (int)(unsigned long)match->data;
-#endif
-	return 4;
+
+	return (int) (unsigned long) of_device_get_match_data(dev);
 }
 
 #include <linux/of_address.h>
@@ -258,10 +247,10 @@ static int get_mdp_ver(struct platform_device *pdev)
 static int msm_init_vram(struct drm_device *dev)
 {
 	struct msm_drm_private *priv = dev->dev_private;
+	struct device_node *node;
 	unsigned long size = 0;
 	int ret = 0;
 
-#ifdef CONFIG_OF
 	/* In the device-tree world, we could have a 'memory-region'
 	 * phandle, which gives us a link to our "vram".  Allocating
 	 * is all nicely abstracted behind the dma api, but we need
@@ -278,7 +267,6 @@ static int msm_init_vram(struct drm_device *dev)
 	 *     as corruption on screen before we have a chance to
 	 *     load and do initial modeset)
 	 */
-	struct device_node *node;
 
 	node = of_parse_phandle(dev->dev->of_node, "memory-region", 0);
 	if (node) {
@@ -288,14 +276,12 @@ static int msm_init_vram(struct drm_device *dev)
 			return ret;
 		size = r.end - r.start;
 		DRM_INFO("using VRAM carveout: %lx@%pa\n", size, &r.start);
-	} else
-#endif
 
-	/* if we have no IOMMU, then we need to use carveout allocator.
-	 * Grab the entire CMA chunk carved out in early startup in
-	 * mach-msm:
-	 */
-	if (!iommu_present(&platform_bus_type)) {
+		/* if we have no IOMMU, then we need to use carveout allocator.
+		 * Grab the entire CMA chunk carved out in early startup in
+		 * mach-msm:
+		 */
+	} else if (!iommu_present(&platform_bus_type)) {
 		DRM_INFO("using %s VRAM carveout\n", vram);
 		size = memparse(vram, NULL);
 	}
@@ -1035,9 +1021,9 @@ static const struct dev_pm_ops msm_pm_ops = {
  * Componentized driver support:
  */
 
-#ifdef CONFIG_OF
-/* NOTE: the CONFIG_OF case duplicates the same code as exynos or imx
- * (or probably any other).. so probably some room for some helpers
+/*
+ * NOTE: duplication of the same code as exynos or imx (or probably any other).
+ * so probably some room for some helpers
  */
 static int compare_of(struct device *dev, void *data)
 {
@@ -1062,12 +1048,6 @@ static int add_components(struct device *dev, struct component_match **matchptr,
 
 	return 0;
 }
-#else
-static int compare_dev(struct device *dev, void *data)
-{
-	return dev == data;
-}
-#endif
 
 static int msm_drm_bind(struct device *dev)
 {
@@ -1091,35 +1071,9 @@ static const struct component_master_ops msm_drm_ops = {
 static int msm_pdev_probe(struct platform_device *pdev)
 {
 	struct component_match *match = NULL;
-#ifdef CONFIG_OF
+
 	add_components(&pdev->dev, &match, "connectors");
 	add_components(&pdev->dev, &match, "gpus");
-#else
-	/* For non-DT case, it kinda sucks.  We don't actually have a way
-	 * to know whether or not we are waiting for certain devices (or if
-	 * they are simply not present).  But for non-DT we only need to
-	 * care about apq8064/apq8060/etc (all mdp4/a3xx):
-	 */
-	static const char *devnames[] = {
-			"hdmi_msm.0", "kgsl-3d0.0",
-	};
-	int i;
-
-	DBG("Adding components..");
-
-	for (i = 0; i < ARRAY_SIZE(devnames); i++) {
-		struct device *dev;
-
-		dev = bus_find_device_by_name(&platform_bus_type,
-				NULL, devnames[i]);
-		if (!dev) {
-			dev_info(&pdev->dev, "still waiting for %s\n", devnames[i]);
-			return -EPROBE_DEFER;
-		}
-
-		component_match_add(&pdev->dev, &match, compare_dev, dev);
-	}
-#endif
 
 	pdev->dev.coherent_dma_mask = DMA_BIT_MASK(32);
 	return component_master_add_with_match(&pdev->dev, &msm_drm_ops, match);
@@ -1138,8 +1092,10 @@ static const struct platform_device_id msm_id[] = {
 };
 
 static const struct of_device_id dt_match[] = {
-	{ .compatible = "qcom,mdp" },      /* mdp4 */
-	{ .compatible = "qcom,mdss_mdp" }, /* mdp5 */
+	{ .compatible = "qcom,mdp4", .data = (void *) 4 },	/* mdp4 */
+	{ .compatible = "qcom,mdp5", .data = (void *) 5 },	/* mdp5 */
+	/* to support downstream DT files */
+	{ .compatible = "qcom,mdss_mdp", .data = (void *) 5 },  /* mdp5 */
 	{}
 };
 MODULE_DEVICE_TABLE(of, dt_match);
diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h
index 9a713b7a009d..c1e7bba2fdb7 100644
--- a/drivers/gpu/drm/msm/msm_drv.h
+++ b/drivers/gpu/drm/msm/msm_drv.h
@@ -31,14 +31,9 @@
 #include <linux/iommu.h>
 #include <linux/types.h>
 #include <linux/of_graph.h>
+#include <linux/of_device.h>
 #include <asm/sizes.h>
 
-#ifndef CONFIG_OF
-#include <mach/board.h>
-#include <mach/socinfo.h>
-#include <mach/iommu_domains.h>
-#endif
-
 #include <drm/drmP.h>
 #include <drm/drm_atomic.h>
 #include <drm/drm_atomic_helper.h>
diff --git a/drivers/gpu/drm/msm/msm_fbdev.c b/drivers/gpu/drm/msm/msm_fbdev.c
index 3f6ec077b51d..d95af6eba602 100644
--- a/drivers/gpu/drm/msm/msm_fbdev.c
+++ b/drivers/gpu/drm/msm/msm_fbdev.c
@@ -121,7 +121,7 @@ static int msm_fbdev_create(struct drm_fb_helper *helper,
 		/* note: if fb creation failed, we can't rely on fb destroy
 		 * to unref the bo:
 		 */
-		drm_gem_object_unreference(fbdev->bo);
+		drm_gem_object_unreference_unlocked(fbdev->bo);
 		ret = PTR_ERR(fb);
 		goto fail;
 	}
diff --git a/drivers/gpu/drm/nouveau/dispnv04/crtc.c b/drivers/gpu/drm/nouveau/dispnv04/crtc.c
index 3d96b49fe662..6f04397d43a7 100644
--- a/drivers/gpu/drm/nouveau/dispnv04/crtc.c
+++ b/drivers/gpu/drm/nouveau/dispnv04/crtc.c
@@ -1081,8 +1081,6 @@ nouveau_crtc_set_config(struct drm_mode_set *set)
 }
 
 static const struct drm_crtc_funcs nv04_crtc_funcs = {
-	.save = nv_crtc_save,
-	.restore = nv_crtc_restore,
 	.cursor_set = nv04_crtc_cursor_set,
 	.cursor_move = nv04_crtc_cursor_move,
 	.gamma_set = nv_crtc_gamma_set,
@@ -1123,6 +1121,9 @@ nv04_crtc_create(struct drm_device *dev, int crtc_num)
 	nv_crtc->index = crtc_num;
 	nv_crtc->last_dpms = NV_DPMS_CLEARED;
 
+	nv_crtc->save = nv_crtc_save;
+	nv_crtc->restore = nv_crtc_restore;
+
 	drm_crtc_init(dev, &nv_crtc->base, &nv04_crtc_funcs);
 	drm_crtc_helper_add(&nv_crtc->base, &nv04_crtc_helper_funcs);
 	drm_mode_crtc_set_gamma_size(&nv_crtc->base, 256);
diff --git a/drivers/gpu/drm/nouveau/dispnv04/dac.c b/drivers/gpu/drm/nouveau/dispnv04/dac.c
index 78cb033bc015..b48eec395f07 100644
--- a/drivers/gpu/drm/nouveau/dispnv04/dac.c
+++ b/drivers/gpu/drm/nouveau/dispnv04/dac.c
@@ -504,8 +504,6 @@ static void nv04_dac_destroy(struct drm_encoder *encoder)
 
 static const struct drm_encoder_helper_funcs nv04_dac_helper_funcs = {
 	.dpms = nv04_dac_dpms,
-	.save = nv04_dac_save,
-	.restore = nv04_dac_restore,
 	.mode_fixup = nv04_dac_mode_fixup,
 	.prepare = nv04_dac_prepare,
 	.commit = nv04_dac_commit,
@@ -515,8 +513,6 @@ static const struct drm_encoder_helper_funcs nv04_dac_helper_funcs = {
 
 static const struct drm_encoder_helper_funcs nv17_dac_helper_funcs = {
 	.dpms = nv04_dac_dpms,
-	.save = nv04_dac_save,
-	.restore = nv04_dac_restore,
 	.mode_fixup = nv04_dac_mode_fixup,
 	.prepare = nv04_dac_prepare,
 	.commit = nv04_dac_commit,
@@ -545,12 +541,16 @@ nv04_dac_create(struct drm_connector *connector, struct dcb_output *entry)
 	nv_encoder->dcb = entry;
 	nv_encoder->or = ffs(entry->or) - 1;
 
+	nv_encoder->enc_save = nv04_dac_save;
+	nv_encoder->enc_restore = nv04_dac_restore;
+
 	if (nv_gf4_disp_arch(dev))
 		helper = &nv17_dac_helper_funcs;
 	else
 		helper = &nv04_dac_helper_funcs;
 
-	drm_encoder_init(dev, encoder, &nv04_dac_funcs, DRM_MODE_ENCODER_DAC);
+	drm_encoder_init(dev, encoder, &nv04_dac_funcs, DRM_MODE_ENCODER_DAC,
+			 NULL);
 	drm_encoder_helper_add(encoder, helper);
 
 	encoder->possible_crtcs = entry->heads;
diff --git a/drivers/gpu/drm/nouveau/dispnv04/dfp.c b/drivers/gpu/drm/nouveau/dispnv04/dfp.c
index 429ab5e3025a..05bfd151d1d8 100644
--- a/drivers/gpu/drm/nouveau/dispnv04/dfp.c
+++ b/drivers/gpu/drm/nouveau/dispnv04/dfp.c
@@ -652,8 +652,6 @@ static void nv04_tmds_slave_init(struct drm_encoder *encoder)
 
 static const struct drm_encoder_helper_funcs nv04_lvds_helper_funcs = {
 	.dpms = nv04_lvds_dpms,
-	.save = nv04_dfp_save,
-	.restore = nv04_dfp_restore,
 	.mode_fixup = nv04_dfp_mode_fixup,
 	.prepare = nv04_dfp_prepare,
 	.commit = nv04_dfp_commit,
@@ -663,8 +661,6 @@ static const struct drm_encoder_helper_funcs nv04_lvds_helper_funcs = {
 
 static const struct drm_encoder_helper_funcs nv04_tmds_helper_funcs = {
 	.dpms = nv04_tmds_dpms,
-	.save = nv04_dfp_save,
-	.restore = nv04_dfp_restore,
 	.mode_fixup = nv04_dfp_mode_fixup,
 	.prepare = nv04_dfp_prepare,
 	.commit = nv04_dfp_commit,
@@ -701,12 +697,15 @@ nv04_dfp_create(struct drm_connector *connector, struct dcb_output *entry)
 	if (!nv_encoder)
 		return -ENOMEM;
 
+	nv_encoder->enc_save = nv04_dfp_save;
+	nv_encoder->enc_restore = nv04_dfp_restore;
+
 	encoder = to_drm_encoder(nv_encoder);
 
 	nv_encoder->dcb = entry;
 	nv_encoder->or = ffs(entry->or) - 1;
 
-	drm_encoder_init(connector->dev, encoder, &nv04_dfp_funcs, type);
+	drm_encoder_init(connector->dev, encoder, &nv04_dfp_funcs, type, NULL);
 	drm_encoder_helper_add(encoder, helper);
 
 	encoder->possible_crtcs = entry->heads;
diff --git a/drivers/gpu/drm/nouveau/dispnv04/disp.c b/drivers/gpu/drm/nouveau/dispnv04/disp.c
index 9e650081c357..b4a6bc433ef5 100644
--- a/drivers/gpu/drm/nouveau/dispnv04/disp.c
+++ b/drivers/gpu/drm/nouveau/dispnv04/disp.c
@@ -39,7 +39,8 @@ nv04_display_create(struct drm_device *dev)
 	struct dcb_table *dcb = &drm->vbios.dcb;
 	struct drm_connector *connector, *ct;
 	struct drm_encoder *encoder;
-	struct drm_crtc *crtc;
+	struct nouveau_encoder *nv_encoder;
+	struct nouveau_crtc *crtc;
 	struct nv04_display *disp;
 	int i, ret;
 
@@ -107,14 +108,11 @@ nv04_display_create(struct drm_device *dev)
 	}
 
 	/* Save previous state */
-	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head)
-		crtc->funcs->save(crtc);
-
-	list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
-		const struct drm_encoder_helper_funcs *func = encoder->helper_private;
+	list_for_each_entry(crtc, &dev->mode_config.crtc_list, base.head)
+		crtc->save(&crtc->base);
 
-		func->save(encoder);
-	}
+	list_for_each_entry(nv_encoder, &dev->mode_config.encoder_list, base.base.head)
+		nv_encoder->enc_save(&nv_encoder->base.base);
 
 	nouveau_overlay_init(dev);
 
@@ -126,8 +124,9 @@ nv04_display_destroy(struct drm_device *dev)
 {
 	struct nv04_display *disp = nv04_display(dev);
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct drm_encoder *encoder;
+	struct nouveau_encoder *encoder;
 	struct drm_crtc *crtc;
+	struct nouveau_crtc *nv_crtc;
 
 	/* Turn every CRTC off. */
 	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
@@ -139,14 +138,11 @@ nv04_display_destroy(struct drm_device *dev)
 	}
 
 	/* Restore state */
-	list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
-		const struct drm_encoder_helper_funcs *func = encoder->helper_private;
+	list_for_each_entry(encoder, &dev->mode_config.encoder_list, base.base.head)
+		encoder->enc_restore(&encoder->base.base);
 
-		func->restore(encoder);
-	}
-
-	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head)
-		crtc->funcs->restore(crtc);
+	list_for_each_entry(nv_crtc, &dev->mode_config.crtc_list, base.head)
+		nv_crtc->restore(&nv_crtc->base);
 
 	nouveau_hw_save_vga_fonts(dev, 0);
 
@@ -159,8 +155,8 @@ nv04_display_destroy(struct drm_device *dev)
 int
 nv04_display_init(struct drm_device *dev)
 {
-	struct drm_encoder *encoder;
-	struct drm_crtc *crtc;
+	struct nouveau_encoder *encoder;
+	struct nouveau_crtc *crtc;
 
 	/* meh.. modeset apparently doesn't setup all the regs and depends
 	 * on pre-existing state, for now load the state of the card *before*
@@ -170,14 +166,11 @@ nv04_display_init(struct drm_device *dev)
 	 * save/restore "pre-load" state, but more general so we can save
 	 * on suspend too.
 	 */
-	list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
-		const struct drm_encoder_helper_funcs *func = encoder->helper_private;
-
-		func->restore(encoder);
-	}
+	list_for_each_entry(crtc, &dev->mode_config.crtc_list, base.head)
+		crtc->save(&crtc->base);
 
-	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head)
-		crtc->funcs->restore(crtc);
+	list_for_each_entry(encoder, &dev->mode_config.encoder_list, base.base.head)
+		encoder->enc_save(&encoder->base.base);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/nouveau/dispnv04/tvnv04.c b/drivers/gpu/drm/nouveau/dispnv04/tvnv04.c
index 5345eb5378a8..54e9fb9eb5c0 100644
--- a/drivers/gpu/drm/nouveau/dispnv04/tvnv04.c
+++ b/drivers/gpu/drm/nouveau/dispnv04/tvnv04.c
@@ -192,8 +192,6 @@ static const struct drm_encoder_funcs nv04_tv_funcs = {
 
 static const struct drm_encoder_helper_funcs nv04_tv_helper_funcs = {
 	.dpms = nv04_tv_dpms,
-	.save = drm_i2c_encoder_save,
-	.restore = drm_i2c_encoder_restore,
 	.mode_fixup = drm_i2c_encoder_mode_fixup,
 	.prepare = nv04_tv_prepare,
 	.commit = nv04_tv_commit,
@@ -225,9 +223,13 @@ nv04_tv_create(struct drm_connector *connector, struct dcb_output *entry)
 	/* Initialize the common members */
 	encoder = to_drm_encoder(nv_encoder);
 
-	drm_encoder_init(dev, encoder, &nv04_tv_funcs, DRM_MODE_ENCODER_TVDAC);
+	drm_encoder_init(dev, encoder, &nv04_tv_funcs, DRM_MODE_ENCODER_TVDAC,
+			 NULL);
 	drm_encoder_helper_add(encoder, &nv04_tv_helper_funcs);
 
+	nv_encoder->enc_save = drm_i2c_encoder_save;
+	nv_encoder->enc_restore = drm_i2c_encoder_restore;
+
 	encoder->possible_crtcs = entry->heads;
 	encoder->possible_clones = 0;
 	nv_encoder->dcb = entry;
diff --git a/drivers/gpu/drm/nouveau/dispnv04/tvnv17.c b/drivers/gpu/drm/nouveau/dispnv04/tvnv17.c
index b734195d80a0..163317d26de9 100644
--- a/drivers/gpu/drm/nouveau/dispnv04/tvnv17.c
+++ b/drivers/gpu/drm/nouveau/dispnv04/tvnv17.c
@@ -769,10 +769,8 @@ static void nv17_tv_destroy(struct drm_encoder *encoder)
 	kfree(tv_enc);
 }
 
-static struct drm_encoder_helper_funcs nv17_tv_helper_funcs = {
+static const struct drm_encoder_helper_funcs nv17_tv_helper_funcs = {
 	.dpms = nv17_tv_dpms,
-	.save = nv17_tv_save,
-	.restore = nv17_tv_restore,
 	.mode_fixup = nv17_tv_mode_fixup,
 	.prepare = nv17_tv_prepare,
 	.commit = nv17_tv_commit,
@@ -780,14 +778,14 @@ static struct drm_encoder_helper_funcs nv17_tv_helper_funcs = {
 	.detect = nv17_tv_detect,
 };
 
-static struct drm_encoder_slave_funcs nv17_tv_slave_funcs = {
+static const struct drm_encoder_slave_funcs nv17_tv_slave_funcs = {
 	.get_modes = nv17_tv_get_modes,
 	.mode_valid = nv17_tv_mode_valid,
 	.create_resources = nv17_tv_create_resources,
 	.set_property = nv17_tv_set_property,
 };
 
-static struct drm_encoder_funcs nv17_tv_funcs = {
+static const struct drm_encoder_funcs nv17_tv_funcs = {
 	.destroy = nv17_tv_destroy,
 };
 
@@ -816,10 +814,14 @@ nv17_tv_create(struct drm_connector *connector, struct dcb_output *entry)
 	tv_enc->base.dcb = entry;
 	tv_enc->base.or = ffs(entry->or) - 1;
 
-	drm_encoder_init(dev, encoder, &nv17_tv_funcs, DRM_MODE_ENCODER_TVDAC);
+	drm_encoder_init(dev, encoder, &nv17_tv_funcs, DRM_MODE_ENCODER_TVDAC,
+			 NULL);
 	drm_encoder_helper_add(encoder, &nv17_tv_helper_funcs);
 	to_encoder_slave(encoder)->slave_funcs = &nv17_tv_slave_funcs;
 
+	tv_enc->base.enc_save = nv17_tv_save;
+	tv_enc->base.enc_restore = nv17_tv_restore;
+
 	encoder->possible_crtcs = entry->heads;
 	encoder->possible_clones = 0;
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_connector.c b/drivers/gpu/drm/nouveau/nouveau_connector.c
index 2e7cbe933533..5dd1d0111cac 100644
--- a/drivers/gpu/drm/nouveau/nouveau_connector.c
+++ b/drivers/gpu/drm/nouveau/nouveau_connector.c
@@ -898,8 +898,6 @@ nouveau_connector_helper_funcs = {
 static const struct drm_connector_funcs
 nouveau_connector_funcs = {
 	.dpms = drm_helper_connector_dpms,
-	.save = NULL,
-	.restore = NULL,
 	.detect = nouveau_connector_detect,
 	.destroy = nouveau_connector_destroy,
 	.fill_modes = drm_helper_probe_single_connector_modes,
@@ -910,8 +908,6 @@ nouveau_connector_funcs = {
 static const struct drm_connector_funcs
 nouveau_connector_funcs_lvds = {
 	.dpms = drm_helper_connector_dpms,
-	.save = NULL,
-	.restore = NULL,
 	.detect = nouveau_connector_detect_lvds,
 	.destroy = nouveau_connector_destroy,
 	.fill_modes = drm_helper_probe_single_connector_modes,
@@ -944,8 +940,6 @@ nouveau_connector_dp_dpms(struct drm_connector *connector, int mode)
 static const struct drm_connector_funcs
 nouveau_connector_funcs_dp = {
 	.dpms = nouveau_connector_dp_dpms,
-	.save = NULL,
-	.restore = NULL,
 	.detect = nouveau_connector_detect,
 	.destroy = nouveau_connector_destroy,
 	.fill_modes = drm_helper_probe_single_connector_modes,
diff --git a/drivers/gpu/drm/nouveau/nouveau_crtc.h b/drivers/gpu/drm/nouveau/nouveau_crtc.h
index f19cb1c5fc5a..863f10b8d818 100644
--- a/drivers/gpu/drm/nouveau/nouveau_crtc.h
+++ b/drivers/gpu/drm/nouveau/nouveau_crtc.h
@@ -73,6 +73,9 @@ struct nouveau_crtc {
 	int (*set_dither)(struct nouveau_crtc *crtc, bool update);
 	int (*set_scale)(struct nouveau_crtc *crtc, bool update);
 	int (*set_color_vibrance)(struct nouveau_crtc *crtc, bool update);
+
+	void (*save)(struct drm_crtc *crtc);
+	void (*restore)(struct drm_crtc *crtc);
 };
 
 static inline struct nouveau_crtc *nouveau_crtc(struct drm_crtc *crtc)
diff --git a/drivers/gpu/drm/nouveau/nouveau_display.c b/drivers/gpu/drm/nouveau/nouveau_display.c
index ea9d3bc91266..18676b8c1721 100644
--- a/drivers/gpu/drm/nouveau/nouveau_display.c
+++ b/drivers/gpu/drm/nouveau/nouveau_display.c
@@ -829,7 +829,6 @@ nouveau_finish_page_flip(struct nouveau_channel *chan,
 	struct drm_device *dev = drm->dev;
 	struct nouveau_page_flip_state *s;
 	unsigned long flags;
-	int crtcid = -1;
 
 	spin_lock_irqsave(&dev->event_lock, flags);
 
@@ -841,15 +840,19 @@ nouveau_finish_page_flip(struct nouveau_channel *chan,
 
 	s = list_first_entry(&fctx->flip, struct nouveau_page_flip_state, head);
 	if (s->event) {
-		/* Vblank timestamps/counts are only correct on >= NV-50 */
-		if (drm->device.info.family >= NV_DEVICE_INFO_V0_TESLA)
-			crtcid = s->crtc;
+		if (drm->device.info.family < NV_DEVICE_INFO_V0_TESLA) {
+			drm_arm_vblank_event(dev, s->crtc, s->event);
+		} else {
+			drm_send_vblank_event(dev, s->crtc, s->event);
 
-		drm_send_vblank_event(dev, crtcid, s->event);
+			/* Give up ownership of vblank for page-flipped crtc */
+			drm_vblank_put(dev, s->crtc);
+		}
+	}
+	else {
+		/* Give up ownership of vblank for page-flipped crtc */
+		drm_vblank_put(dev, s->crtc);
 	}
-
-	/* Give up ownership of vblank for page-flipped crtc */
-	drm_vblank_put(dev, s->crtc);
 
 	list_del(&s->head);
 	if (ps)
diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c
index 1d3ee5179ab8..b3a563c44bcd 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_drm.c
@@ -1046,10 +1046,6 @@ nouveau_platform_device_create(const struct nvkm_device_tegra_func *func,
 		goto err_free;
 	}
 
-	err = drm_dev_set_unique(drm, "%s", dev_name(&pdev->dev));
-	if (err < 0)
-		goto err_free;
-
 	drm->platformdev = pdev;
 	platform_set_drvdata(pdev, drm);
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_encoder.h b/drivers/gpu/drm/nouveau/nouveau_encoder.h
index b37da95105b0..ee6a6d3fc80f 100644
--- a/drivers/gpu/drm/nouveau/nouveau_encoder.h
+++ b/drivers/gpu/drm/nouveau/nouveau_encoder.h
@@ -63,6 +63,9 @@ struct nouveau_encoder {
 			u32 datarate;
 		} dp;
 	};
+
+	void (*enc_save)(struct drm_encoder *encoder);
+	void (*enc_restore)(struct drm_encoder *encoder);
 };
 
 struct nouveau_encoder *
@@ -80,7 +83,7 @@ static inline struct drm_encoder *to_drm_encoder(struct nouveau_encoder *enc)
 	return &enc->base.base;
 }
 
-static inline struct drm_encoder_slave_funcs *
+static inline const struct drm_encoder_slave_funcs *
 get_slave_funcs(struct drm_encoder *enc)
 {
 	return to_encoder_slave(enc)->slave_funcs;
diff --git a/drivers/gpu/drm/nouveau/nv50_display.c b/drivers/gpu/drm/nouveau/nv50_display.c
index c053c50b346a..44e1952582aa 100644
--- a/drivers/gpu/drm/nouveau/nv50_display.c
+++ b/drivers/gpu/drm/nouveau/nv50_display.c
@@ -28,6 +28,7 @@
 #include <drm/drm_crtc_helper.h>
 #include <drm/drm_plane_helper.h>
 #include <drm/drm_dp_helper.h>
+#include <drm/drm_fb_helper.h>
 
 #include <nvif/class.h>
 
@@ -1717,7 +1718,7 @@ nv50_dac_create(struct drm_connector *connector, struct dcb_output *dcbe)
 	encoder = to_drm_encoder(nv_encoder);
 	encoder->possible_crtcs = dcbe->heads;
 	encoder->possible_clones = 0;
-	drm_encoder_init(connector->dev, encoder, &nv50_dac_func, type);
+	drm_encoder_init(connector->dev, encoder, &nv50_dac_func, type, NULL);
 	drm_encoder_helper_add(encoder, &nv50_dac_hfunc);
 
 	drm_mode_connector_attach_encoder(connector, encoder);
@@ -2125,7 +2126,7 @@ nv50_sor_create(struct drm_connector *connector, struct dcb_output *dcbe)
 	encoder = to_drm_encoder(nv_encoder);
 	encoder->possible_crtcs = dcbe->heads;
 	encoder->possible_clones = 0;
-	drm_encoder_init(connector->dev, encoder, &nv50_sor_func, type);
+	drm_encoder_init(connector->dev, encoder, &nv50_sor_func, type, NULL);
 	drm_encoder_helper_add(encoder, &nv50_sor_hfunc);
 
 	drm_mode_connector_attach_encoder(connector, encoder);
@@ -2305,7 +2306,7 @@ nv50_pior_create(struct drm_connector *connector, struct dcb_output *dcbe)
 	encoder = to_drm_encoder(nv_encoder);
 	encoder->possible_crtcs = dcbe->heads;
 	encoder->possible_clones = 0;
-	drm_encoder_init(connector->dev, encoder, &nv50_pior_func, type);
+	drm_encoder_init(connector->dev, encoder, &nv50_pior_func, type, NULL);
 	drm_encoder_helper_add(encoder, &nv50_pior_hfunc);
 
 	drm_mode_connector_attach_encoder(connector, encoder);
diff --git a/drivers/gpu/drm/omapdrm/omap_crtc.c b/drivers/gpu/drm/omapdrm/omap_crtc.c
index ad09590e8a46..2ed0754ed19e 100644
--- a/drivers/gpu/drm/omapdrm/omap_crtc.c
+++ b/drivers/gpu/drm/omapdrm/omap_crtc.c
@@ -524,7 +524,7 @@ struct drm_crtc *omap_crtc_init(struct drm_device *dev,
 	omap_crtc->mgr = omap_dss_get_overlay_manager(channel);
 
 	ret = drm_crtc_init_with_planes(dev, crtc, plane, NULL,
-					&omap_crtc_funcs);
+					&omap_crtc_funcs, NULL);
 	if (ret < 0) {
 		kfree(omap_crtc);
 		return NULL;
diff --git a/drivers/gpu/drm/omapdrm/omap_encoder.c b/drivers/gpu/drm/omapdrm/omap_encoder.c
index 7d9b32a0eb43..0c104ad7ef66 100644
--- a/drivers/gpu/drm/omapdrm/omap_encoder.c
+++ b/drivers/gpu/drm/omapdrm/omap_encoder.c
@@ -178,7 +178,7 @@ struct drm_encoder *omap_encoder_init(struct drm_device *dev,
 	encoder = &omap_encoder->base;
 
 	drm_encoder_init(dev, encoder, &omap_encoder_funcs,
-			 DRM_MODE_ENCODER_TMDS);
+			 DRM_MODE_ENCODER_TMDS, NULL);
 	drm_encoder_helper_add(encoder, &omap_encoder_helper_funcs);
 
 	return encoder;
diff --git a/drivers/gpu/drm/omapdrm/omap_plane.c b/drivers/gpu/drm/omapdrm/omap_plane.c
index 3054bda72688..d5ecabd6c14c 100644
--- a/drivers/gpu/drm/omapdrm/omap_plane.c
+++ b/drivers/gpu/drm/omapdrm/omap_plane.c
@@ -366,7 +366,7 @@ struct drm_plane *omap_plane_init(struct drm_device *dev,
 
 	ret = drm_universal_plane_init(dev, plane, (1 << priv->num_crtcs) - 1,
 				       &omap_plane_funcs, omap_plane->formats,
-				       omap_plane->nformats, type);
+				       omap_plane->nformats, type, NULL);
 	if (ret < 0)
 		goto error;
 
diff --git a/drivers/gpu/drm/omapdrm/tcm-sita.c b/drivers/gpu/drm/omapdrm/tcm-sita.c
index efb609510540..6df1f2a1bc52 100644
--- a/drivers/gpu/drm/omapdrm/tcm-sita.c
+++ b/drivers/gpu/drm/omapdrm/tcm-sita.c
@@ -87,14 +87,11 @@ struct tcm *sita_init(u16 width, u16 height, struct tcm_pt *attr)
 	if (width == 0 || height == 0)
 		return NULL;
 
-	tcm = kmalloc(sizeof(*tcm), GFP_KERNEL);
-	pvt = kmalloc(sizeof(*pvt), GFP_KERNEL);
+	tcm = kzalloc(sizeof(*tcm), GFP_KERNEL);
+	pvt = kzalloc(sizeof(*pvt), GFP_KERNEL);
 	if (!tcm || !pvt)
 		goto error;
 
-	memset(tcm, 0, sizeof(*tcm));
-	memset(pvt, 0, sizeof(*pvt));
-
 	/* Updating the pointers to SiTA implementation APIs */
 	tcm->height = height;
 	tcm->width = width;
diff --git a/drivers/gpu/drm/panel/Kconfig b/drivers/gpu/drm/panel/Kconfig
index 7d4704b1292b..1500ab99f548 100644
--- a/drivers/gpu/drm/panel/Kconfig
+++ b/drivers/gpu/drm/panel/Kconfig
@@ -31,6 +31,16 @@ config DRM_PANEL_LG_LG4573
 	  Say Y here if you want to enable support for LG4573 RGB panel.
 	  To compile this driver as a module, choose M here.
 
+config DRM_PANEL_PANASONIC_VVX10F034N00
+	tristate "Panasonic VVX10F034N00 1920x1200 video mode panel"
+	depends on OF
+	depends on DRM_MIPI_DSI
+	depends on BACKLIGHT_CLASS_DEVICE
+	help
+	  Say Y here if you want to enable support for Panasonic VVX10F034N00
+	  WUXGA (1920x1200) Novatek NT1397-based DSI panel as found in some
+	  Xperia Z2 tablets
+
 config DRM_PANEL_SAMSUNG_S6E8AA0
 	tristate "Samsung S6E8AA0 DSI video mode panel"
 	depends on OF
@@ -51,4 +61,13 @@ config DRM_PANEL_SHARP_LQ101R1SX01
 	  To compile this driver as a module, choose M here: the module
 	  will be called panel-sharp-lq101r1sx01.
 
+config DRM_PANEL_SHARP_LS043T1LE01
+	tristate "Sharp LS043T1LE01 qHD video mode panel"
+	depends on OF
+	depends on DRM_MIPI_DSI
+	depends on BACKLIGHT_CLASS_DEVICE
+	help
+	  Say Y here if you want to enable support for Sharp LS043T1LE01 qHD
+	  (540x960) DSI panel as found on the Qualcomm APQ8074 Dragonboard
+
 endmenu
diff --git a/drivers/gpu/drm/panel/Makefile b/drivers/gpu/drm/panel/Makefile
index d0f016dd7ddb..f277eed933d6 100644
--- a/drivers/gpu/drm/panel/Makefile
+++ b/drivers/gpu/drm/panel/Makefile
@@ -1,5 +1,7 @@
 obj-$(CONFIG_DRM_PANEL_SIMPLE) += panel-simple.o
 obj-$(CONFIG_DRM_PANEL_LG_LG4573) += panel-lg-lg4573.o
+obj-$(CONFIG_DRM_PANEL_PANASONIC_VVX10F034N00) += panel-panasonic-vvx10f034n00.o
 obj-$(CONFIG_DRM_PANEL_SAMSUNG_LD9040) += panel-samsung-ld9040.o
 obj-$(CONFIG_DRM_PANEL_SAMSUNG_S6E8AA0) += panel-samsung-s6e8aa0.o
 obj-$(CONFIG_DRM_PANEL_SHARP_LQ101R1SX01) += panel-sharp-lq101r1sx01.o
+obj-$(CONFIG_DRM_PANEL_SHARP_LS043T1LE01) += panel-sharp-ls043t1le01.o
diff --git a/drivers/gpu/drm/panel/panel-panasonic-vvx10f034n00.c b/drivers/gpu/drm/panel/panel-panasonic-vvx10f034n00.c
new file mode 100644
index 000000000000..7f915f706fa6
--- /dev/null
+++ b/drivers/gpu/drm/panel/panel-panasonic-vvx10f034n00.c
@@ -0,0 +1,334 @@
+/*
+ * Copyright (C) 2015 Red Hat
+ * Copyright (C) 2015 Sony Mobile Communications Inc.
+ * Author: Werner Johansson <werner.johansson@sonymobile.com>
+ *
+ * Based on AUO panel driver by Rob Clark <robdclark@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/backlight.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/regulator/consumer.h>
+
+#include <drm/drmP.h>
+#include <drm/drm_crtc.h>
+#include <drm/drm_mipi_dsi.h>
+#include <drm/drm_panel.h>
+
+#include <video/mipi_display.h>
+
+/*
+ * When power is turned off to this panel a minimum off time of 500ms has to be
+ * observed before powering back on as there's no external reset pin. Keep
+ * track of earliest wakeup time and delay subsequent prepare call accordingly
+ */
+#define MIN_POFF_MS (500)
+
+struct wuxga_nt_panel {
+	struct drm_panel base;
+	struct mipi_dsi_device *dsi;
+
+	struct backlight_device *backlight;
+	struct regulator *supply;
+
+	bool prepared;
+	bool enabled;
+
+	ktime_t earliest_wake;
+
+	const struct drm_display_mode *mode;
+};
+
+static inline struct wuxga_nt_panel *to_wuxga_nt_panel(struct drm_panel *panel)
+{
+	return container_of(panel, struct wuxga_nt_panel, base);
+}
+
+static int wuxga_nt_panel_on(struct wuxga_nt_panel *wuxga_nt)
+{
+	struct mipi_dsi_device *dsi = wuxga_nt->dsi;
+	int ret;
+
+	ret = mipi_dsi_turn_on_peripheral(dsi);
+	if (ret < 0)
+		return ret;
+
+	return 0;
+}
+
+static int wuxga_nt_panel_disable(struct drm_panel *panel)
+{
+	struct wuxga_nt_panel *wuxga_nt = to_wuxga_nt_panel(panel);
+
+	if (!wuxga_nt->enabled)
+		return 0;
+
+	mipi_dsi_shutdown_peripheral(wuxga_nt->dsi);
+
+	if (wuxga_nt->backlight) {
+		wuxga_nt->backlight->props.power = FB_BLANK_POWERDOWN;
+		wuxga_nt->backlight->props.state |= BL_CORE_FBBLANK;
+		backlight_update_status(wuxga_nt->backlight);
+	}
+
+	wuxga_nt->enabled = false;
+
+	return 0;
+}
+
+static int wuxga_nt_panel_unprepare(struct drm_panel *panel)
+{
+	struct wuxga_nt_panel *wuxga_nt = to_wuxga_nt_panel(panel);
+
+	if (!wuxga_nt->prepared)
+		return 0;
+
+	regulator_disable(wuxga_nt->supply);
+	wuxga_nt->earliest_wake = ktime_add_ms(ktime_get_real(), MIN_POFF_MS);
+	wuxga_nt->prepared = false;
+
+	return 0;
+}
+
+static int wuxga_nt_panel_prepare(struct drm_panel *panel)
+{
+	struct wuxga_nt_panel *wuxga_nt = to_wuxga_nt_panel(panel);
+	int ret;
+	s64 enablewait;
+
+	if (wuxga_nt->prepared)
+		return 0;
+
+	/*
+	 * If the user re-enabled the panel before the required off-time then
+	 * we need to wait the remaining period before re-enabling regulator
+	 */
+	enablewait = ktime_ms_delta(wuxga_nt->earliest_wake, ktime_get_real());
+
+	/* Sanity check, this should never happen */
+	if (enablewait > MIN_POFF_MS)
+		enablewait = MIN_POFF_MS;
+
+	if (enablewait > 0)
+		msleep(enablewait);
+
+	ret = regulator_enable(wuxga_nt->supply);
+	if (ret < 0)
+		return ret;
+
+	/*
+	 * A minimum delay of 250ms is required after power-up until commands
+	 * can be sent
+	 */
+	msleep(250);
+
+	ret = wuxga_nt_panel_on(wuxga_nt);
+	if (ret < 0) {
+		dev_err(panel->dev, "failed to set panel on: %d\n", ret);
+		goto poweroff;
+	}
+
+	wuxga_nt->prepared = true;
+
+	return 0;
+
+poweroff:
+	regulator_disable(wuxga_nt->supply);
+
+	return ret;
+}
+
+static int wuxga_nt_panel_enable(struct drm_panel *panel)
+{
+	struct wuxga_nt_panel *wuxga_nt = to_wuxga_nt_panel(panel);
+
+	if (wuxga_nt->enabled)
+		return 0;
+
+	if (wuxga_nt->backlight) {
+		wuxga_nt->backlight->props.power = FB_BLANK_UNBLANK;
+		wuxga_nt->backlight->props.state &= ~BL_CORE_FBBLANK;
+		backlight_update_status(wuxga_nt->backlight);
+	}
+
+	wuxga_nt->enabled = true;
+
+	return 0;
+}
+
+static const struct drm_display_mode default_mode = {
+	.clock = 164402,
+	.hdisplay = 1920,
+	.hsync_start = 1920 + 152,
+	.hsync_end = 1920 + 152 + 52,
+	.htotal = 1920 + 152 + 52 + 20,
+	.vdisplay = 1200,
+	.vsync_start = 1200 + 24,
+	.vsync_end = 1200 + 24 + 6,
+	.vtotal = 1200 + 24 + 6 + 48,
+	.vrefresh = 60,
+};
+
+static int wuxga_nt_panel_get_modes(struct drm_panel *panel)
+{
+	struct drm_display_mode *mode;
+
+	mode = drm_mode_duplicate(panel->drm, &default_mode);
+	if (!mode) {
+		dev_err(panel->drm->dev, "failed to add mode %ux%ux@%u\n",
+				default_mode.hdisplay, default_mode.vdisplay,
+				default_mode.vrefresh);
+		return -ENOMEM;
+	}
+
+	drm_mode_set_name(mode);
+
+	drm_mode_probed_add(panel->connector, mode);
+
+	panel->connector->display_info.width_mm = 217;
+	panel->connector->display_info.height_mm = 136;
+
+	return 1;
+}
+
+static const struct drm_panel_funcs wuxga_nt_panel_funcs = {
+	.disable = wuxga_nt_panel_disable,
+	.unprepare = wuxga_nt_panel_unprepare,
+	.prepare = wuxga_nt_panel_prepare,
+	.enable = wuxga_nt_panel_enable,
+	.get_modes = wuxga_nt_panel_get_modes,
+};
+
+static const struct of_device_id wuxga_nt_of_match[] = {
+	{ .compatible = "panasonic,vvx10f034n00", },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, wuxga_nt_of_match);
+
+static int wuxga_nt_panel_add(struct wuxga_nt_panel *wuxga_nt)
+{
+	struct device *dev = &wuxga_nt->dsi->dev;
+	struct device_node *np;
+	int ret;
+
+	wuxga_nt->mode = &default_mode;
+
+	wuxga_nt->supply = devm_regulator_get(dev, "power");
+	if (IS_ERR(wuxga_nt->supply))
+		return PTR_ERR(wuxga_nt->supply);
+
+	np = of_parse_phandle(dev->of_node, "backlight", 0);
+	if (np) {
+		wuxga_nt->backlight = of_find_backlight_by_node(np);
+		of_node_put(np);
+
+		if (!wuxga_nt->backlight)
+			return -EPROBE_DEFER;
+	}
+
+	drm_panel_init(&wuxga_nt->base);
+	wuxga_nt->base.funcs = &wuxga_nt_panel_funcs;
+	wuxga_nt->base.dev = &wuxga_nt->dsi->dev;
+
+	ret = drm_panel_add(&wuxga_nt->base);
+	if (ret < 0)
+		goto put_backlight;
+
+	return 0;
+
+put_backlight:
+	if (wuxga_nt->backlight)
+		put_device(&wuxga_nt->backlight->dev);
+
+	return ret;
+}
+
+static void wuxga_nt_panel_del(struct wuxga_nt_panel *wuxga_nt)
+{
+	if (wuxga_nt->base.dev)
+		drm_panel_remove(&wuxga_nt->base);
+
+	if (wuxga_nt->backlight)
+		put_device(&wuxga_nt->backlight->dev);
+}
+
+static int wuxga_nt_panel_probe(struct mipi_dsi_device *dsi)
+{
+	struct wuxga_nt_panel *wuxga_nt;
+	int ret;
+
+	dsi->lanes = 4;
+	dsi->format = MIPI_DSI_FMT_RGB888;
+	dsi->mode_flags = MIPI_DSI_MODE_VIDEO |
+			MIPI_DSI_MODE_VIDEO_HSE |
+			MIPI_DSI_CLOCK_NON_CONTINUOUS |
+			MIPI_DSI_MODE_LPM;
+
+	wuxga_nt = devm_kzalloc(&dsi->dev, sizeof(*wuxga_nt), GFP_KERNEL);
+	if (!wuxga_nt)
+		return -ENOMEM;
+
+	mipi_dsi_set_drvdata(dsi, wuxga_nt);
+
+	wuxga_nt->dsi = dsi;
+
+	ret = wuxga_nt_panel_add(wuxga_nt);
+	if (ret < 0)
+		return ret;
+
+	return mipi_dsi_attach(dsi);
+}
+
+static int wuxga_nt_panel_remove(struct mipi_dsi_device *dsi)
+{
+	struct wuxga_nt_panel *wuxga_nt = mipi_dsi_get_drvdata(dsi);
+	int ret;
+
+	ret = wuxga_nt_panel_disable(&wuxga_nt->base);
+	if (ret < 0)
+		dev_err(&dsi->dev, "failed to disable panel: %d\n", ret);
+
+	ret = mipi_dsi_detach(dsi);
+	if (ret < 0)
+		dev_err(&dsi->dev, "failed to detach from DSI host: %d\n", ret);
+
+	drm_panel_detach(&wuxga_nt->base);
+	wuxga_nt_panel_del(wuxga_nt);
+
+	return 0;
+}
+
+static void wuxga_nt_panel_shutdown(struct mipi_dsi_device *dsi)
+{
+	struct wuxga_nt_panel *wuxga_nt = mipi_dsi_get_drvdata(dsi);
+
+	wuxga_nt_panel_disable(&wuxga_nt->base);
+}
+
+static struct mipi_dsi_driver wuxga_nt_panel_driver = {
+	.driver = {
+		.name = "panel-panasonic-vvx10f034n00",
+		.of_match_table = wuxga_nt_of_match,
+	},
+	.probe = wuxga_nt_panel_probe,
+	.remove = wuxga_nt_panel_remove,
+	.shutdown = wuxga_nt_panel_shutdown,
+};
+module_mipi_dsi_driver(wuxga_nt_panel_driver);
+
+MODULE_AUTHOR("Werner Johansson <werner.johansson@sonymobile.com>");
+MODULE_DESCRIPTION("Panasonic VVX10F034N00 Novatek NT1397-based WUXGA (1920x1200) video mode panel driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/gpu/drm/panel/panel-sharp-ls043t1le01.c b/drivers/gpu/drm/panel/panel-sharp-ls043t1le01.c
new file mode 100644
index 000000000000..3aeb0bda4947
--- /dev/null
+++ b/drivers/gpu/drm/panel/panel-sharp-ls043t1le01.c
@@ -0,0 +1,387 @@
+/*
+ * Copyright (C) 2015 Red Hat
+ * Copyright (C) 2015 Sony Mobile Communications Inc.
+ * Author: Werner Johansson <werner.johansson@sonymobile.com>
+ *
+ * Based on AUO panel driver by Rob Clark <robdclark@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/backlight.h>
+#include <linux/gpio/consumer.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/regulator/consumer.h>
+
+#include <drm/drmP.h>
+#include <drm/drm_crtc.h>
+#include <drm/drm_mipi_dsi.h>
+#include <drm/drm_panel.h>
+
+#include <video/mipi_display.h>
+
+struct sharp_nt_panel {
+	struct drm_panel base;
+	struct mipi_dsi_device *dsi;
+
+	struct backlight_device *backlight;
+	struct regulator *supply;
+	struct gpio_desc *reset_gpio;
+
+	bool prepared;
+	bool enabled;
+
+	const struct drm_display_mode *mode;
+};
+
+static inline struct sharp_nt_panel *to_sharp_nt_panel(struct drm_panel *panel)
+{
+	return container_of(panel, struct sharp_nt_panel, base);
+}
+
+static int sharp_nt_panel_init(struct sharp_nt_panel *sharp_nt)
+{
+	struct mipi_dsi_device *dsi = sharp_nt->dsi;
+	int ret;
+
+	dsi->mode_flags |= MIPI_DSI_MODE_LPM;
+
+	ret = mipi_dsi_dcs_exit_sleep_mode(dsi);
+	if (ret < 0)
+		return ret;
+
+	msleep(120);
+
+	/* Novatek two-lane operation */
+	ret = mipi_dsi_dcs_write(dsi, 0xae, (u8[]){ 0x03 }, 1);
+	if (ret < 0)
+		return ret;
+
+	/* Set both MCU and RGB I/F to 24bpp */
+	ret = mipi_dsi_dcs_set_pixel_format(dsi, MIPI_DCS_PIXEL_FMT_24BIT |
+					(MIPI_DCS_PIXEL_FMT_24BIT << 4));
+	if (ret < 0)
+		return ret;
+
+	return 0;
+}
+
+static int sharp_nt_panel_on(struct sharp_nt_panel *sharp_nt)
+{
+	struct mipi_dsi_device *dsi = sharp_nt->dsi;
+	int ret;
+
+	dsi->mode_flags |= MIPI_DSI_MODE_LPM;
+
+	ret = mipi_dsi_dcs_set_display_on(dsi);
+	if (ret < 0)
+		return ret;
+
+	return 0;
+}
+
+static int sharp_nt_panel_off(struct sharp_nt_panel *sharp_nt)
+{
+	struct mipi_dsi_device *dsi = sharp_nt->dsi;
+	int ret;
+
+	dsi->mode_flags &= ~MIPI_DSI_MODE_LPM;
+
+	ret = mipi_dsi_dcs_set_display_off(dsi);
+	if (ret < 0)
+		return ret;
+
+	ret = mipi_dsi_dcs_enter_sleep_mode(dsi);
+	if (ret < 0)
+		return ret;
+
+	return 0;
+}
+
+
+static int sharp_nt_panel_disable(struct drm_panel *panel)
+{
+	struct sharp_nt_panel *sharp_nt = to_sharp_nt_panel(panel);
+
+	if (!sharp_nt->enabled)
+		return 0;
+
+	if (sharp_nt->backlight) {
+		sharp_nt->backlight->props.power = FB_BLANK_POWERDOWN;
+		backlight_update_status(sharp_nt->backlight);
+	}
+
+	sharp_nt->enabled = false;
+
+	return 0;
+}
+
+static int sharp_nt_panel_unprepare(struct drm_panel *panel)
+{
+	struct sharp_nt_panel *sharp_nt = to_sharp_nt_panel(panel);
+	int ret;
+
+	if (!sharp_nt->prepared)
+		return 0;
+
+	ret = sharp_nt_panel_off(sharp_nt);
+	if (ret < 0) {
+		dev_err(panel->dev, "failed to set panel off: %d\n", ret);
+		return ret;
+	}
+
+	regulator_disable(sharp_nt->supply);
+	if (sharp_nt->reset_gpio)
+		gpiod_set_value(sharp_nt->reset_gpio, 0);
+
+	sharp_nt->prepared = false;
+
+	return 0;
+}
+
+static int sharp_nt_panel_prepare(struct drm_panel *panel)
+{
+	struct sharp_nt_panel *sharp_nt = to_sharp_nt_panel(panel);
+	int ret;
+
+	if (sharp_nt->prepared)
+		return 0;
+
+	ret = regulator_enable(sharp_nt->supply);
+	if (ret < 0)
+		return ret;
+
+	msleep(20);
+
+	if (sharp_nt->reset_gpio) {
+		gpiod_set_value(sharp_nt->reset_gpio, 1);
+		msleep(1);
+		gpiod_set_value(sharp_nt->reset_gpio, 0);
+		msleep(1);
+		gpiod_set_value(sharp_nt->reset_gpio, 1);
+		msleep(10);
+	}
+
+	ret = sharp_nt_panel_init(sharp_nt);
+	if (ret < 0) {
+		dev_err(panel->dev, "failed to init panel: %d\n", ret);
+		goto poweroff;
+	}
+
+	ret = sharp_nt_panel_on(sharp_nt);
+	if (ret < 0) {
+		dev_err(panel->dev, "failed to set panel on: %d\n", ret);
+		goto poweroff;
+	}
+
+	sharp_nt->prepared = true;
+
+	return 0;
+
+poweroff:
+	regulator_disable(sharp_nt->supply);
+	if (sharp_nt->reset_gpio)
+		gpiod_set_value(sharp_nt->reset_gpio, 0);
+	return ret;
+}
+
+static int sharp_nt_panel_enable(struct drm_panel *panel)
+{
+	struct sharp_nt_panel *sharp_nt = to_sharp_nt_panel(panel);
+
+	if (sharp_nt->enabled)
+		return 0;
+
+	if (sharp_nt->backlight) {
+		sharp_nt->backlight->props.power = FB_BLANK_UNBLANK;
+		backlight_update_status(sharp_nt->backlight);
+	}
+
+	sharp_nt->enabled = true;
+
+	return 0;
+}
+
+static const struct drm_display_mode default_mode = {
+	.clock = 41118,
+	.hdisplay = 540,
+	.hsync_start = 540 + 48,
+	.hsync_end = 540 + 48 + 80,
+	.htotal = 540 + 48 + 80 + 32,
+	.vdisplay = 960,
+	.vsync_start = 960 + 3,
+	.vsync_end = 960 + 3 + 15,
+	.vtotal = 960 + 3 + 15 + 1,
+	.vrefresh = 60,
+};
+
+static int sharp_nt_panel_get_modes(struct drm_panel *panel)
+{
+	struct drm_display_mode *mode;
+
+	mode = drm_mode_duplicate(panel->drm, &default_mode);
+	if (!mode) {
+		dev_err(panel->drm->dev, "failed to add mode %ux%ux@%u\n",
+				default_mode.hdisplay, default_mode.vdisplay,
+				default_mode.vrefresh);
+		return -ENOMEM;
+	}
+
+	drm_mode_set_name(mode);
+
+	drm_mode_probed_add(panel->connector, mode);
+
+	panel->connector->display_info.width_mm = 54;
+	panel->connector->display_info.height_mm = 95;
+
+	return 1;
+}
+
+static const struct drm_panel_funcs sharp_nt_panel_funcs = {
+	.disable = sharp_nt_panel_disable,
+	.unprepare = sharp_nt_panel_unprepare,
+	.prepare = sharp_nt_panel_prepare,
+	.enable = sharp_nt_panel_enable,
+	.get_modes = sharp_nt_panel_get_modes,
+};
+
+static int sharp_nt_panel_add(struct sharp_nt_panel *sharp_nt)
+{
+	struct device *dev = &sharp_nt->dsi->dev;
+	struct device_node *np;
+	int ret;
+
+	sharp_nt->mode = &default_mode;
+
+	sharp_nt->supply = devm_regulator_get(dev, "avdd");
+	if (IS_ERR(sharp_nt->supply))
+		return PTR_ERR(sharp_nt->supply);
+
+	sharp_nt->reset_gpio = devm_gpiod_get(dev, "reset", GPIOD_OUT_LOW);
+	if (IS_ERR(sharp_nt->reset_gpio)) {
+		dev_err(dev, "cannot get reset-gpios %ld\n",
+			PTR_ERR(sharp_nt->reset_gpio));
+		sharp_nt->reset_gpio = NULL;
+	} else {
+		gpiod_set_value(sharp_nt->reset_gpio, 0);
+	}
+
+	np = of_parse_phandle(dev->of_node, "backlight", 0);
+	if (np) {
+		sharp_nt->backlight = of_find_backlight_by_node(np);
+		of_node_put(np);
+
+		if (!sharp_nt->backlight)
+			return -EPROBE_DEFER;
+	}
+
+	drm_panel_init(&sharp_nt->base);
+	sharp_nt->base.funcs = &sharp_nt_panel_funcs;
+	sharp_nt->base.dev = &sharp_nt->dsi->dev;
+
+	ret = drm_panel_add(&sharp_nt->base);
+	if (ret < 0)
+		goto put_backlight;
+
+	return 0;
+
+put_backlight:
+	if (sharp_nt->backlight)
+		put_device(&sharp_nt->backlight->dev);
+
+	return ret;
+}
+
+static void sharp_nt_panel_del(struct sharp_nt_panel *sharp_nt)
+{
+	if (sharp_nt->base.dev)
+		drm_panel_remove(&sharp_nt->base);
+
+	if (sharp_nt->backlight)
+		put_device(&sharp_nt->backlight->dev);
+}
+
+static int sharp_nt_panel_probe(struct mipi_dsi_device *dsi)
+{
+	struct sharp_nt_panel *sharp_nt;
+	int ret;
+
+	dsi->lanes = 2;
+	dsi->format = MIPI_DSI_FMT_RGB888;
+	dsi->mode_flags = MIPI_DSI_MODE_VIDEO |
+			MIPI_DSI_MODE_VIDEO_HSE |
+			MIPI_DSI_CLOCK_NON_CONTINUOUS |
+			MIPI_DSI_MODE_EOT_PACKET;
+
+	sharp_nt = devm_kzalloc(&dsi->dev, sizeof(*sharp_nt), GFP_KERNEL);
+	if (!sharp_nt)
+		return -ENOMEM;
+
+	mipi_dsi_set_drvdata(dsi, sharp_nt);
+
+	sharp_nt->dsi = dsi;
+
+	ret = sharp_nt_panel_add(sharp_nt);
+	if (ret < 0)
+		return ret;
+
+	return mipi_dsi_attach(dsi);
+}
+
+static int sharp_nt_panel_remove(struct mipi_dsi_device *dsi)
+{
+	struct sharp_nt_panel *sharp_nt = mipi_dsi_get_drvdata(dsi);
+	int ret;
+
+	ret = sharp_nt_panel_disable(&sharp_nt->base);
+	if (ret < 0)
+		dev_err(&dsi->dev, "failed to disable panel: %d\n", ret);
+
+	ret = mipi_dsi_detach(dsi);
+	if (ret < 0)
+		dev_err(&dsi->dev, "failed to detach from DSI host: %d\n", ret);
+
+	drm_panel_detach(&sharp_nt->base);
+	sharp_nt_panel_del(sharp_nt);
+
+	return 0;
+}
+
+static void sharp_nt_panel_shutdown(struct mipi_dsi_device *dsi)
+{
+	struct sharp_nt_panel *sharp_nt = mipi_dsi_get_drvdata(dsi);
+
+	sharp_nt_panel_disable(&sharp_nt->base);
+}
+
+static const struct of_device_id sharp_nt_of_match[] = {
+	{ .compatible = "sharp,ls043t1le01-qhd", },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, sharp_nt_of_match);
+
+static struct mipi_dsi_driver sharp_nt_panel_driver = {
+	.driver = {
+		.name = "panel-sharp-ls043t1le01-qhd",
+		.of_match_table = sharp_nt_of_match,
+	},
+	.probe = sharp_nt_panel_probe,
+	.remove = sharp_nt_panel_remove,
+	.shutdown = sharp_nt_panel_shutdown,
+};
+module_mipi_dsi_driver(sharp_nt_panel_driver);
+
+MODULE_AUTHOR("Werner Johansson <werner.johansson@sonymobile.com>");
+MODULE_DESCRIPTION("Sharp LS043T1LE01 NT35565-based qHD (540x960) video mode panel driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/gpu/drm/panel/panel-simple.c b/drivers/gpu/drm/panel/panel-simple.c
index f97b73ec4713..f88a631c43ab 100644
--- a/drivers/gpu/drm/panel/panel-simple.c
+++ b/drivers/gpu/drm/panel/panel-simple.c
@@ -44,6 +44,10 @@ struct panel_desc {
 
 	unsigned int bpc;
 
+	/**
+	 * @width: width (in millimeters) of the panel's active display area
+	 * @height: height (in millimeters) of the panel's active display area
+	 */
 	struct {
 		unsigned int width;
 		unsigned int height;
@@ -832,6 +836,34 @@ static const struct panel_desc innolux_g121i1_l01 = {
 	},
 };
 
+static const struct drm_display_mode innolux_g121x1_l03_mode = {
+	.clock = 65000,
+	.hdisplay = 1024,
+	.hsync_start = 1024 + 0,
+	.hsync_end = 1024 + 1,
+	.htotal = 1024 + 0 + 1 + 320,
+	.vdisplay = 768,
+	.vsync_start = 768 + 38,
+	.vsync_end = 768 + 38 + 1,
+	.vtotal = 768 + 38 + 1 + 0,
+	.vrefresh = 60,
+};
+
+static const struct panel_desc innolux_g121x1_l03 = {
+	.modes = &innolux_g121x1_l03_mode,
+	.num_modes = 1,
+	.bpc = 6,
+	.size = {
+		.width = 246,
+		.height = 185,
+	},
+	.delay = {
+		.enable = 200,
+		.unprepare = 200,
+		.disable = 400,
+	},
+};
+
 static const struct drm_display_mode innolux_n116bge_mode = {
 	.clock = 76420,
 	.hdisplay = 1366,
@@ -902,6 +934,30 @@ static const struct panel_desc innolux_zj070na_01p = {
 	},
 };
 
+static const struct display_timing kyo_tcg121xglp_timing = {
+	.pixelclock = { 52000000, 65000000, 71000000 },
+	.hactive = { 1024, 1024, 1024 },
+	.hfront_porch = { 2, 2, 2 },
+	.hback_porch = { 2, 2, 2 },
+	.hsync_len = { 86, 124, 244 },
+	.vactive = { 768, 768, 768 },
+	.vfront_porch = { 2, 2, 2 },
+	.vback_porch = { 2, 2, 2 },
+	.vsync_len = { 6, 34, 73 },
+	.flags = DISPLAY_FLAGS_DE_HIGH,
+};
+
+static const struct panel_desc kyo_tcg121xglp = {
+	.timings = &kyo_tcg121xglp_timing,
+	.num_timings = 1,
+	.bpc = 8,
+	.size = {
+		.width = 246,
+		.height = 184,
+	},
+	.bus_format = MEDIA_BUS_FMT_RGB888_1X7X4_SPWG,
+};
+
 static const struct drm_display_mode lg_lb070wv8_mode = {
 	.clock = 33246,
 	.hdisplay = 800,
@@ -1027,6 +1083,30 @@ static const struct panel_desc ortustech_com43h4m85ulc = {
 	.bus_format = MEDIA_BUS_FMT_RGB888_1X24,
 };
 
+static const struct drm_display_mode qd43003c0_40_mode = {
+	.clock = 9000,
+	.hdisplay = 480,
+	.hsync_start = 480 + 8,
+	.hsync_end = 480 + 8 + 4,
+	.htotal = 480 + 8 + 4 + 39,
+	.vdisplay = 272,
+	.vsync_start = 272 + 4,
+	.vsync_end = 272 + 4 + 10,
+	.vtotal = 272 + 4 + 10 + 2,
+	.vrefresh = 60,
+};
+
+static const struct panel_desc qd43003c0_40 = {
+	.modes = &qd43003c0_40_mode,
+	.num_modes = 1,
+	.bpc = 8,
+	.size = {
+		.width = 95,
+		.height = 53,
+	},
+	.bus_format = MEDIA_BUS_FMT_RGB888_1X24,
+};
+
 static const struct drm_display_mode samsung_ltn101nt05_mode = {
 	.clock = 54030,
 	.hdisplay = 1024,
@@ -1158,6 +1238,9 @@ static const struct of_device_id platform_of_match[] = {
 		.compatible ="innolux,g121i1-l01",
 		.data = &innolux_g121i1_l01
 	}, {
+		.compatible = "innolux,g121x1-l03",
+		.data = &innolux_g121x1_l03,
+	}, {
 		.compatible = "innolux,n116bge",
 		.data = &innolux_n116bge,
 	}, {
@@ -1167,6 +1250,9 @@ static const struct of_device_id platform_of_match[] = {
 		.compatible = "innolux,zj070na-01p",
 		.data = &innolux_zj070na_01p,
 	}, {
+		.compatible = "kyo,tcg121xglp",
+		.data = &kyo_tcg121xglp,
+	}, {
 		.compatible = "lg,lb070wv8",
 		.data = &lg_lb070wv8,
 	}, {
@@ -1182,6 +1268,9 @@ static const struct of_device_id platform_of_match[] = {
 		.compatible = "ortustech,com43h4m85ulc",
 		.data = &ortustech_com43h4m85ulc,
 	}, {
+		.compatible = "qiaodian,qd43003c0-40",
+		.data = &qd43003c0_40,
+	}, {
 		.compatible = "samsung,ltn101nt05",
 		.data = &samsung_ltn101nt05,
 	}, {
@@ -1263,6 +1352,36 @@ static const struct panel_desc_dsi auo_b080uan01 = {
 	.lanes = 4,
 };
 
+static const struct drm_display_mode boe_tv080wum_nl0_mode = {
+	.clock = 160000,
+	.hdisplay = 1200,
+	.hsync_start = 1200 + 120,
+	.hsync_end = 1200 + 120 + 20,
+	.htotal = 1200 + 120 + 20 + 21,
+	.vdisplay = 1920,
+	.vsync_start = 1920 + 21,
+	.vsync_end = 1920 + 21 + 3,
+	.vtotal = 1920 + 21 + 3 + 18,
+	.vrefresh = 60,
+	.flags = DRM_MODE_FLAG_NVSYNC | DRM_MODE_FLAG_NHSYNC,
+};
+
+static const struct panel_desc_dsi boe_tv080wum_nl0 = {
+	.desc = {
+		.modes = &boe_tv080wum_nl0_mode,
+		.num_modes = 1,
+		.size = {
+			.width = 107,
+			.height = 172,
+		},
+	},
+	.flags = MIPI_DSI_MODE_VIDEO |
+		 MIPI_DSI_MODE_VIDEO_BURST |
+		 MIPI_DSI_MODE_VIDEO_SYNC_PULSE,
+	.format = MIPI_DSI_FMT_RGB888,
+	.lanes = 4,
+};
+
 static const struct drm_display_mode lg_ld070wx3_sl01_mode = {
 	.clock = 71000,
 	.hdisplay = 800,
@@ -1348,11 +1467,15 @@ static const struct panel_desc_dsi panasonic_vvx10f004b00 = {
 	.lanes = 4,
 };
 
+
 static const struct of_device_id dsi_of_match[] = {
 	{
 		.compatible = "auo,b080uan01",
 		.data = &auo_b080uan01
 	}, {
+		.compatible = "boe,tv080wum-nl0",
+		.data = &boe_tv080wum_nl0
+	}, {
 		.compatible = "lg,ld070wx3-sl01",
 		.data = &lg_ld070wx3_sl01
 	}, {
diff --git a/drivers/gpu/drm/qxl/qxl_display.c b/drivers/gpu/drm/qxl/qxl_display.c
index cddba079197f..86276519b2ef 100644
--- a/drivers/gpu/drm/qxl/qxl_display.c
+++ b/drivers/gpu/drm/qxl/qxl_display.c
@@ -876,16 +876,6 @@ static const struct drm_connector_helper_funcs qxl_connector_helper_funcs = {
 	.best_encoder = qxl_best_encoder,
 };
 
-static void qxl_conn_save(struct drm_connector *connector)
-{
-	DRM_DEBUG("\n");
-}
-
-static void qxl_conn_restore(struct drm_connector *connector)
-{
-	DRM_DEBUG("\n");
-}
-
 static enum drm_connector_status qxl_conn_detect(
 			struct drm_connector *connector,
 			bool force)
@@ -932,10 +922,8 @@ static void qxl_conn_destroy(struct drm_connector *connector)
 
 static const struct drm_connector_funcs qxl_connector_funcs = {
 	.dpms = drm_helper_connector_dpms,
-	.save = qxl_conn_save,
-	.restore = qxl_conn_restore,
 	.detect = qxl_conn_detect,
-	.fill_modes = drm_helper_probe_single_connector_modes_nomerge,
+	.fill_modes = drm_helper_probe_single_connector_modes,
 	.set_property = qxl_conn_set_property,
 	.destroy = qxl_conn_destroy,
 };
@@ -980,7 +968,7 @@ static int qdev_output_init(struct drm_device *dev, int num_output)
 			   &qxl_connector_funcs, DRM_MODE_CONNECTOR_VIRTUAL);
 
 	drm_encoder_init(dev, &qxl_output->enc, &qxl_enc_funcs,
-			 DRM_MODE_ENCODER_VIRTUAL);
+			 DRM_MODE_ENCODER_VIRTUAL, NULL);
 
 	/* we get HPD via client monitors config */
 	connector->polled = DRM_CONNECTOR_POLL_HPD;
diff --git a/drivers/gpu/drm/qxl/qxl_object.c b/drivers/gpu/drm/qxl/qxl_object.c
index b28370e014c6..5e1d7899dd72 100644
--- a/drivers/gpu/drm/qxl/qxl_object.c
+++ b/drivers/gpu/drm/qxl/qxl_object.c
@@ -32,7 +32,7 @@ static void qxl_ttm_bo_destroy(struct ttm_buffer_object *tbo)
 	struct qxl_bo *bo;
 	struct qxl_device *qdev;
 
-	bo = container_of(tbo, struct qxl_bo, tbo);
+	bo = to_qxl_bo(tbo);
 	qdev = (struct qxl_device *)bo->gem_base.dev->dev_private;
 
 	qxl_surface_evict(qdev, bo, false);
diff --git a/drivers/gpu/drm/qxl/qxl_ttm.c b/drivers/gpu/drm/qxl/qxl_ttm.c
index 0cbc4c987164..953412766416 100644
--- a/drivers/gpu/drm/qxl/qxl_ttm.c
+++ b/drivers/gpu/drm/qxl/qxl_ttm.c
@@ -201,7 +201,7 @@ static void qxl_evict_flags(struct ttm_buffer_object *bo,
 		placement->num_busy_placement = 1;
 		return;
 	}
-	qbo = container_of(bo, struct qxl_bo, tbo);
+	qbo = to_qxl_bo(bo);
 	qxl_ttm_placement_from_domain(qbo, QXL_GEM_DOMAIN_CPU, false);
 	*placement = qbo->placement;
 }
@@ -365,7 +365,7 @@ static void qxl_bo_move_notify(struct ttm_buffer_object *bo,
 
 	if (!qxl_ttm_bo_is_qxl_bo(bo))
 		return;
-	qbo = container_of(bo, struct qxl_bo, tbo);
+	qbo = to_qxl_bo(bo);
 	qdev = qbo->gem_base.dev->dev_private;
 
 	if (bo->mem.mem_type == TTM_PL_PRIV0 && qbo->surface_id)
diff --git a/drivers/gpu/drm/radeon/atombios_crtc.c b/drivers/gpu/drm/radeon/atombios_crtc.c
index dac78ad24b31..801dd60ac192 100644
--- a/drivers/gpu/drm/radeon/atombios_crtc.c
+++ b/drivers/gpu/drm/radeon/atombios_crtc.c
@@ -25,6 +25,7 @@
  */
 #include <drm/drmP.h>
 #include <drm/drm_crtc_helper.h>
+#include <drm/drm_fb_helper.h>
 #include <drm/radeon_drm.h>
 #include <drm/drm_fixed.h>
 #include "radeon.h"
diff --git a/drivers/gpu/drm/radeon/atombios_encoders.c b/drivers/gpu/drm/radeon/atombios_encoders.c
index bb292143997e..01b20e14a247 100644
--- a/drivers/gpu/drm/radeon/atombios_encoders.c
+++ b/drivers/gpu/drm/radeon/atombios_encoders.c
@@ -2767,23 +2767,27 @@ radeon_add_atom_encoder(struct drm_device *dev,
 	case ENCODER_OBJECT_ID_INTERNAL_LVTM1:
 		if (radeon_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT)) {
 			radeon_encoder->rmx_type = RMX_FULL;
-			drm_encoder_init(dev, encoder, &radeon_atom_enc_funcs, DRM_MODE_ENCODER_LVDS);
+			drm_encoder_init(dev, encoder, &radeon_atom_enc_funcs,
+					 DRM_MODE_ENCODER_LVDS, NULL);
 			radeon_encoder->enc_priv = radeon_atombios_get_lvds_info(radeon_encoder);
 		} else {
-			drm_encoder_init(dev, encoder, &radeon_atom_enc_funcs, DRM_MODE_ENCODER_TMDS);
+			drm_encoder_init(dev, encoder, &radeon_atom_enc_funcs,
+					 DRM_MODE_ENCODER_TMDS, NULL);
 			radeon_encoder->enc_priv = radeon_atombios_set_dig_info(radeon_encoder);
 		}
 		drm_encoder_helper_add(encoder, &radeon_atom_dig_helper_funcs);
 		break;
 	case ENCODER_OBJECT_ID_INTERNAL_DAC1:
-		drm_encoder_init(dev, encoder, &radeon_atom_enc_funcs, DRM_MODE_ENCODER_DAC);
+		drm_encoder_init(dev, encoder, &radeon_atom_enc_funcs,
+				 DRM_MODE_ENCODER_DAC, NULL);
 		radeon_encoder->enc_priv = radeon_atombios_set_dac_info(radeon_encoder);
 		drm_encoder_helper_add(encoder, &radeon_atom_dac_helper_funcs);
 		break;
 	case ENCODER_OBJECT_ID_INTERNAL_DAC2:
 	case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1:
 	case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2:
-		drm_encoder_init(dev, encoder, &radeon_atom_enc_funcs, DRM_MODE_ENCODER_TVDAC);
+		drm_encoder_init(dev, encoder, &radeon_atom_enc_funcs,
+				 DRM_MODE_ENCODER_TVDAC, NULL);
 		radeon_encoder->enc_priv = radeon_atombios_set_dac_info(radeon_encoder);
 		drm_encoder_helper_add(encoder, &radeon_atom_dac_helper_funcs);
 		break;
@@ -2797,13 +2801,16 @@ radeon_add_atom_encoder(struct drm_device *dev,
 	case ENCODER_OBJECT_ID_INTERNAL_UNIPHY3:
 		if (radeon_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT)) {
 			radeon_encoder->rmx_type = RMX_FULL;
-			drm_encoder_init(dev, encoder, &radeon_atom_enc_funcs, DRM_MODE_ENCODER_LVDS);
+			drm_encoder_init(dev, encoder, &radeon_atom_enc_funcs,
+					 DRM_MODE_ENCODER_LVDS, NULL);
 			radeon_encoder->enc_priv = radeon_atombios_get_lvds_info(radeon_encoder);
 		} else if (radeon_encoder->devices & (ATOM_DEVICE_CRT_SUPPORT)) {
-			drm_encoder_init(dev, encoder, &radeon_atom_enc_funcs, DRM_MODE_ENCODER_DAC);
+			drm_encoder_init(dev, encoder, &radeon_atom_enc_funcs,
+					 DRM_MODE_ENCODER_DAC, NULL);
 			radeon_encoder->enc_priv = radeon_atombios_set_dig_info(radeon_encoder);
 		} else {
-			drm_encoder_init(dev, encoder, &radeon_atom_enc_funcs, DRM_MODE_ENCODER_TMDS);
+			drm_encoder_init(dev, encoder, &radeon_atom_enc_funcs,
+					 DRM_MODE_ENCODER_TMDS, NULL);
 			radeon_encoder->enc_priv = radeon_atombios_set_dig_info(radeon_encoder);
 		}
 		drm_encoder_helper_add(encoder, &radeon_atom_dig_helper_funcs);
@@ -2820,11 +2827,14 @@ radeon_add_atom_encoder(struct drm_device *dev,
 		/* these are handled by the primary encoders */
 		radeon_encoder->is_ext_encoder = true;
 		if (radeon_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT))
-			drm_encoder_init(dev, encoder, &radeon_atom_enc_funcs, DRM_MODE_ENCODER_LVDS);
+			drm_encoder_init(dev, encoder, &radeon_atom_enc_funcs,
+					 DRM_MODE_ENCODER_LVDS, NULL);
 		else if (radeon_encoder->devices & (ATOM_DEVICE_CRT_SUPPORT))
-			drm_encoder_init(dev, encoder, &radeon_atom_enc_funcs, DRM_MODE_ENCODER_DAC);
+			drm_encoder_init(dev, encoder, &radeon_atom_enc_funcs,
+					 DRM_MODE_ENCODER_DAC, NULL);
 		else
-			drm_encoder_init(dev, encoder, &radeon_atom_enc_funcs, DRM_MODE_ENCODER_TMDS);
+			drm_encoder_init(dev, encoder, &radeon_atom_enc_funcs,
+					 DRM_MODE_ENCODER_TMDS, NULL);
 		drm_encoder_helper_add(encoder, &radeon_atom_ext_helper_funcs);
 		break;
 	}
diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c
index f16b60b6a30f..1e3a80165309 100644
--- a/drivers/gpu/drm/radeon/cik.c
+++ b/drivers/gpu/drm/radeon/cik.c
@@ -8472,7 +8472,7 @@ restart_ih:
 	if (queue_dp)
 		schedule_work(&rdev->dp_work);
 	if (queue_hotplug)
-		schedule_work(&rdev->hotplug_work);
+		schedule_delayed_work(&rdev->hotplug_work, 0);
 	if (queue_reset) {
 		rdev->needs_reset = true;
 		wake_up_all(&rdev->fence_queue);
diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c
index e533db11f70a..2ad462896896 100644
--- a/drivers/gpu/drm/radeon/evergreen.c
+++ b/drivers/gpu/drm/radeon/evergreen.c
@@ -5347,7 +5347,7 @@ restart_ih:
 	if (queue_dp)
 		schedule_work(&rdev->dp_work);
 	if (queue_hotplug)
-		schedule_work(&rdev->hotplug_work);
+		schedule_delayed_work(&rdev->hotplug_work, 0);
 	if (queue_hdmi)
 		schedule_work(&rdev->audio_work);
 	if (queue_thermal && rdev->pm.dpm_enabled)
diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c
index aff1e4d8098d..9e7e2bf03b81 100644
--- a/drivers/gpu/drm/radeon/r100.c
+++ b/drivers/gpu/drm/radeon/r100.c
@@ -806,7 +806,7 @@ int r100_irq_process(struct radeon_device *rdev)
 		status = r100_irq_ack(rdev);
 	}
 	if (queue_hotplug)
-		schedule_work(&rdev->hotplug_work);
+		schedule_delayed_work(&rdev->hotplug_work, 0);
 	if (rdev->msi_enabled) {
 		switch (rdev->family) {
 		case CHIP_RS400:
diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c
index 4ea5b10ff5f4..cc2fdf0be37a 100644
--- a/drivers/gpu/drm/radeon/r600.c
+++ b/drivers/gpu/drm/radeon/r600.c
@@ -4276,7 +4276,7 @@ restart_ih:
 		WREG32(IH_RB_RPTR, rptr);
 	}
 	if (queue_hotplug)
-		schedule_work(&rdev->hotplug_work);
+		schedule_delayed_work(&rdev->hotplug_work, 0);
 	if (queue_hdmi)
 		schedule_work(&rdev->audio_work);
 	if (queue_thermal && rdev->pm.dpm_enabled)
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index a9955e85009a..5ae6db98aa4d 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -2415,7 +2415,7 @@ struct radeon_device {
 	struct r600_ih ih; /* r6/700 interrupt ring */
 	struct radeon_rlc rlc;
 	struct radeon_mec mec;
-	struct work_struct hotplug_work;
+	struct delayed_work hotplug_work;
 	struct work_struct dp_work;
 	struct work_struct audio_work;
 	int num_crtc; /* number of crtcs */
diff --git a/drivers/gpu/drm/radeon/radeon_agp.c b/drivers/gpu/drm/radeon/radeon_agp.c
index fe994aac3b04..c77d349c561c 100644
--- a/drivers/gpu/drm/radeon/radeon_agp.c
+++ b/drivers/gpu/drm/radeon/radeon_agp.c
@@ -54,6 +54,9 @@ static struct radeon_agpmode_quirk radeon_agpmode_quirk_list[] = {
 	/* Intel 82855PM host bridge / Mobility 9600 M10 RV350 Needs AGPMode 1 (lp #195051) */
 	{ PCI_VENDOR_ID_INTEL, 0x3340, PCI_VENDOR_ID_ATI, 0x4e50,
 		PCI_VENDOR_ID_IBM, 0x0550, 1},
+	/* Intel 82855PM host bridge / RV250/M9 GL [Mobility FireGL 9000/Radeon 9000] needs AGPMode 1 (Thinkpad T40p) */
+	{ PCI_VENDOR_ID_INTEL, 0x3340, PCI_VENDOR_ID_ATI, 0x4c66,
+		PCI_VENDOR_ID_IBM, 0x054d, 1},
 	/* Intel 82855PM host bridge / Mobility M7 needs AGPMode 1 */
 	{ PCI_VENDOR_ID_INTEL, 0x3340, PCI_VENDOR_ID_ATI, 0x4c57,
 		PCI_VENDOR_ID_IBM, 0x0530, 1},
diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c
index 5a2cafb4f1bc..340f3f549f29 100644
--- a/drivers/gpu/drm/radeon/radeon_connectors.c
+++ b/drivers/gpu/drm/radeon/radeon_connectors.c
@@ -1234,13 +1234,32 @@ radeon_dvi_detect(struct drm_connector *connector, bool force)
 	if (r < 0)
 		return connector_status_disconnected;
 
+	if (radeon_connector->detected_hpd_without_ddc) {
+		force = true;
+		radeon_connector->detected_hpd_without_ddc = false;
+	}
+
 	if (!force && radeon_check_hpd_status_unchanged(connector)) {
 		ret = connector->status;
 		goto exit;
 	}
 
-	if (radeon_connector->ddc_bus)
+	if (radeon_connector->ddc_bus) {
 		dret = radeon_ddc_probe(radeon_connector, false);
+
+		/* Sometimes the pins required for the DDC probe on DVI
+		 * connectors don't make contact at the same time that the ones
+		 * for HPD do. If the DDC probe fails even though we had an HPD
+		 * signal, try again later */
+		if (!dret && !force &&
+		    connector->status != connector_status_connected) {
+			DRM_DEBUG_KMS("hpd detected without ddc, retrying in 1 second\n");
+			radeon_connector->detected_hpd_without_ddc = true;
+			schedule_delayed_work(&rdev->hotplug_work,
+					      msecs_to_jiffies(1000));
+			goto exit;
+		}
+	}
 	if (dret) {
 		radeon_connector->detected_by_load = false;
 		radeon_connector_free_edid(connector);
diff --git a/drivers/gpu/drm/radeon/radeon_dp_mst.c b/drivers/gpu/drm/radeon/radeon_dp_mst.c
index b431c9c2b247..c236f6fec245 100644
--- a/drivers/gpu/drm/radeon/radeon_dp_mst.c
+++ b/drivers/gpu/drm/radeon/radeon_dp_mst.c
@@ -647,7 +647,7 @@ radeon_dp_create_fake_mst_encoder(struct radeon_connector *connector)
 	}
 
 	drm_encoder_init(dev, &radeon_encoder->base, &radeon_dp_mst_enc_funcs,
-			 DRM_MODE_ENCODER_DPMST);
+			 DRM_MODE_ENCODER_DPMST, NULL);
 	drm_encoder_helper_add(encoder, &radeon_mst_helper_funcs);
 
 	mst_enc = radeon_encoder->enc_priv;
diff --git a/drivers/gpu/drm/radeon/radeon_irq_kms.c b/drivers/gpu/drm/radeon/radeon_irq_kms.c
index 171d3e43c30c..979f3bf65f2c 100644
--- a/drivers/gpu/drm/radeon/radeon_irq_kms.c
+++ b/drivers/gpu/drm/radeon/radeon_irq_kms.c
@@ -74,7 +74,7 @@ irqreturn_t radeon_driver_irq_handler_kms(int irq, void *arg)
 static void radeon_hotplug_work_func(struct work_struct *work)
 {
 	struct radeon_device *rdev = container_of(work, struct radeon_device,
-						  hotplug_work);
+						  hotplug_work.work);
 	struct drm_device *dev = rdev->ddev;
 	struct drm_mode_config *mode_config = &dev->mode_config;
 	struct drm_connector *connector;
@@ -302,7 +302,7 @@ int radeon_irq_kms_init(struct radeon_device *rdev)
 		}
 	}
 
-	INIT_WORK(&rdev->hotplug_work, radeon_hotplug_work_func);
+	INIT_DELAYED_WORK(&rdev->hotplug_work, radeon_hotplug_work_func);
 	INIT_WORK(&rdev->dp_work, radeon_dp_work_func);
 	INIT_WORK(&rdev->audio_work, r600_audio_update_hdmi);
 
@@ -310,7 +310,7 @@ int radeon_irq_kms_init(struct radeon_device *rdev)
 	r = drm_irq_install(rdev->ddev, rdev->ddev->pdev->irq);
 	if (r) {
 		rdev->irq.installed = false;
-		flush_work(&rdev->hotplug_work);
+		flush_delayed_work(&rdev->hotplug_work);
 		return r;
 	}
 
@@ -333,7 +333,7 @@ void radeon_irq_kms_fini(struct radeon_device *rdev)
 		rdev->irq.installed = false;
 		if (rdev->msi_enabled)
 			pci_disable_msi(rdev->pdev);
-		flush_work(&rdev->hotplug_work);
+		flush_delayed_work(&rdev->hotplug_work);
 	}
 }
 
diff --git a/drivers/gpu/drm/radeon/radeon_legacy_crtc.c b/drivers/gpu/drm/radeon/radeon_legacy_crtc.c
index 678b4386540d..32b338ff436b 100644
--- a/drivers/gpu/drm/radeon/radeon_legacy_crtc.c
+++ b/drivers/gpu/drm/radeon/radeon_legacy_crtc.c
@@ -25,6 +25,7 @@
  */
 #include <drm/drmP.h>
 #include <drm/drm_crtc_helper.h>
+#include <drm/drm_fb_helper.h>
 #include <drm/radeon_drm.h>
 #include <drm/drm_fixed.h>
 #include "radeon.h"
diff --git a/drivers/gpu/drm/radeon/radeon_legacy_encoders.c b/drivers/gpu/drm/radeon/radeon_legacy_encoders.c
index 30de43366eae..88dc973fb209 100644
--- a/drivers/gpu/drm/radeon/radeon_legacy_encoders.c
+++ b/drivers/gpu/drm/radeon/radeon_legacy_encoders.c
@@ -1772,7 +1772,8 @@ radeon_add_legacy_encoder(struct drm_device *dev, uint32_t encoder_enum, uint32_
 	switch (radeon_encoder->encoder_id) {
 	case ENCODER_OBJECT_ID_INTERNAL_LVDS:
 		encoder->possible_crtcs = 0x1;
-		drm_encoder_init(dev, encoder, &radeon_legacy_lvds_enc_funcs, DRM_MODE_ENCODER_LVDS);
+		drm_encoder_init(dev, encoder, &radeon_legacy_lvds_enc_funcs,
+				 DRM_MODE_ENCODER_LVDS, NULL);
 		drm_encoder_helper_add(encoder, &radeon_legacy_lvds_helper_funcs);
 		if (rdev->is_atom_bios)
 			radeon_encoder->enc_priv = radeon_atombios_get_lvds_info(radeon_encoder);
@@ -1781,12 +1782,14 @@ radeon_add_legacy_encoder(struct drm_device *dev, uint32_t encoder_enum, uint32_
 		radeon_encoder->rmx_type = RMX_FULL;
 		break;
 	case ENCODER_OBJECT_ID_INTERNAL_TMDS1:
-		drm_encoder_init(dev, encoder, &radeon_legacy_tmds_int_enc_funcs, DRM_MODE_ENCODER_TMDS);
+		drm_encoder_init(dev, encoder, &radeon_legacy_tmds_int_enc_funcs,
+				 DRM_MODE_ENCODER_TMDS, NULL);
 		drm_encoder_helper_add(encoder, &radeon_legacy_tmds_int_helper_funcs);
 		radeon_encoder->enc_priv = radeon_legacy_get_tmds_info(radeon_encoder);
 		break;
 	case ENCODER_OBJECT_ID_INTERNAL_DAC1:
-		drm_encoder_init(dev, encoder, &radeon_legacy_primary_dac_enc_funcs, DRM_MODE_ENCODER_DAC);
+		drm_encoder_init(dev, encoder, &radeon_legacy_primary_dac_enc_funcs,
+				 DRM_MODE_ENCODER_DAC, NULL);
 		drm_encoder_helper_add(encoder, &radeon_legacy_primary_dac_helper_funcs);
 		if (rdev->is_atom_bios)
 			radeon_encoder->enc_priv = radeon_atombios_get_primary_dac_info(radeon_encoder);
@@ -1794,7 +1797,8 @@ radeon_add_legacy_encoder(struct drm_device *dev, uint32_t encoder_enum, uint32_
 			radeon_encoder->enc_priv = radeon_combios_get_primary_dac_info(radeon_encoder);
 		break;
 	case ENCODER_OBJECT_ID_INTERNAL_DAC2:
-		drm_encoder_init(dev, encoder, &radeon_legacy_tv_dac_enc_funcs, DRM_MODE_ENCODER_TVDAC);
+		drm_encoder_init(dev, encoder, &radeon_legacy_tv_dac_enc_funcs,
+				 DRM_MODE_ENCODER_TVDAC, NULL);
 		drm_encoder_helper_add(encoder, &radeon_legacy_tv_dac_helper_funcs);
 		if (rdev->is_atom_bios)
 			radeon_encoder->enc_priv = radeon_atombios_get_tv_dac_info(radeon_encoder);
@@ -1802,7 +1806,8 @@ radeon_add_legacy_encoder(struct drm_device *dev, uint32_t encoder_enum, uint32_
 			radeon_encoder->enc_priv = radeon_combios_get_tv_dac_info(radeon_encoder);
 		break;
 	case ENCODER_OBJECT_ID_INTERNAL_DVO1:
-		drm_encoder_init(dev, encoder, &radeon_legacy_tmds_ext_enc_funcs, DRM_MODE_ENCODER_TMDS);
+		drm_encoder_init(dev, encoder, &radeon_legacy_tmds_ext_enc_funcs,
+				 DRM_MODE_ENCODER_TMDS, NULL);
 		drm_encoder_helper_add(encoder, &radeon_legacy_tmds_ext_helper_funcs);
 		if (!rdev->is_atom_bios)
 			radeon_encoder->enc_priv = radeon_legacy_get_ext_tmds_info(radeon_encoder);
diff --git a/drivers/gpu/drm/radeon/radeon_mode.h b/drivers/gpu/drm/radeon/radeon_mode.h
index 2c8331078529..bb75201a24ba 100644
--- a/drivers/gpu/drm/radeon/radeon_mode.h
+++ b/drivers/gpu/drm/radeon/radeon_mode.h
@@ -554,6 +554,7 @@ struct radeon_connector {
 	void *con_priv;
 	bool dac_load_detect;
 	bool detected_by_load; /* if the connection status was determined by load */
+	bool detected_hpd_without_ddc; /* if an HPD signal was detected on DVI, but ddc probing failed */
 	uint16_t connector_object_id;
 	struct radeon_hpd hpd;
 	struct radeon_router router;
diff --git a/drivers/gpu/drm/radeon/rs600.c b/drivers/gpu/drm/radeon/rs600.c
index 97a904835759..6244f4e44e9a 100644
--- a/drivers/gpu/drm/radeon/rs600.c
+++ b/drivers/gpu/drm/radeon/rs600.c
@@ -813,7 +813,7 @@ int rs600_irq_process(struct radeon_device *rdev)
 		status = rs600_irq_ack(rdev);
 	}
 	if (queue_hotplug)
-		schedule_work(&rdev->hotplug_work);
+		schedule_delayed_work(&rdev->hotplug_work, 0);
 	if (queue_hdmi)
 		schedule_work(&rdev->audio_work);
 	if (rdev->msi_enabled) {
diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c
index 3d5d41240b64..f878d6962da5 100644
--- a/drivers/gpu/drm/radeon/si.c
+++ b/drivers/gpu/drm/radeon/si.c
@@ -6851,7 +6851,7 @@ restart_ih:
 	if (queue_dp)
 		schedule_work(&rdev->dp_work);
 	if (queue_hotplug)
-		schedule_work(&rdev->hotplug_work);
+		schedule_delayed_work(&rdev->hotplug_work, 0);
 	if (queue_thermal && rdev->pm.dpm_enabled)
 		schedule_work(&rdev->pm.dpm.thermal.work);
 	rdev->ih.rptr = rptr;
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_crtc.c b/drivers/gpu/drm/rcar-du/rcar_du_crtc.c
index 48cb19949ca3..88a4b706be16 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_crtc.c
+++ b/drivers/gpu/drm/rcar-du/rcar_du_crtc.c
@@ -613,7 +613,7 @@ int rcar_du_crtc_create(struct rcar_du_group *rgrp, unsigned int index)
 
 	ret = drm_crtc_init_with_planes(rcdu->ddev, crtc,
 					&rgrp->planes[index % 2].plane,
-					NULL, &crtc_funcs);
+					NULL, &crtc_funcs, NULL);
 	if (ret < 0)
 		return ret;
 
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_encoder.c b/drivers/gpu/drm/rcar-du/rcar_du_encoder.c
index d0ae1e8009c6..c08700757feb 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_encoder.c
+++ b/drivers/gpu/drm/rcar-du/rcar_du_encoder.c
@@ -173,7 +173,7 @@ int rcar_du_encoder_init(struct rcar_du_device *rcdu,
 			goto done;
 	} else {
 		ret = drm_encoder_init(rcdu->ddev, encoder, &encoder_funcs,
-				       encoder_type);
+				       encoder_type, NULL);
 		if (ret < 0)
 			goto done;
 
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_hdmicon.c b/drivers/gpu/drm/rcar-du/rcar_du_hdmicon.c
index 96f2eb43713c..a37b6e2fe51a 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_hdmicon.c
+++ b/drivers/gpu/drm/rcar-du/rcar_du_hdmicon.c
@@ -28,7 +28,7 @@ static int rcar_du_hdmi_connector_get_modes(struct drm_connector *connector)
 {
 	struct rcar_du_connector *con = to_rcar_connector(connector);
 	struct drm_encoder *encoder = rcar_encoder_to_drm_encoder(con->encoder);
-	struct drm_encoder_slave_funcs *sfuncs = to_slave_funcs(encoder);
+	const struct drm_encoder_slave_funcs *sfuncs = to_slave_funcs(encoder);
 
 	if (sfuncs->get_modes == NULL)
 		return 0;
@@ -41,7 +41,7 @@ static int rcar_du_hdmi_connector_mode_valid(struct drm_connector *connector,
 {
 	struct rcar_du_connector *con = to_rcar_connector(connector);
 	struct drm_encoder *encoder = rcar_encoder_to_drm_encoder(con->encoder);
-	struct drm_encoder_slave_funcs *sfuncs = to_slave_funcs(encoder);
+	const struct drm_encoder_slave_funcs *sfuncs = to_slave_funcs(encoder);
 
 	if (sfuncs->mode_valid == NULL)
 		return MODE_OK;
@@ -66,7 +66,7 @@ rcar_du_hdmi_connector_detect(struct drm_connector *connector, bool force)
 {
 	struct rcar_du_connector *con = to_rcar_connector(connector);
 	struct drm_encoder *encoder = rcar_encoder_to_drm_encoder(con->encoder);
-	struct drm_encoder_slave_funcs *sfuncs = to_slave_funcs(encoder);
+	const struct drm_encoder_slave_funcs *sfuncs = to_slave_funcs(encoder);
 
 	if (sfuncs->detect == NULL)
 		return connector_status_unknown;
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_hdmienc.c b/drivers/gpu/drm/rcar-du/rcar_du_hdmienc.c
index 81da8419282b..2567efcbee36 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_hdmienc.c
+++ b/drivers/gpu/drm/rcar-du/rcar_du_hdmienc.c
@@ -35,7 +35,7 @@ struct rcar_du_hdmienc {
 static void rcar_du_hdmienc_disable(struct drm_encoder *encoder)
 {
 	struct rcar_du_hdmienc *hdmienc = to_rcar_hdmienc(encoder);
-	struct drm_encoder_slave_funcs *sfuncs = to_slave_funcs(encoder);
+	const struct drm_encoder_slave_funcs *sfuncs = to_slave_funcs(encoder);
 
 	if (sfuncs->dpms)
 		sfuncs->dpms(encoder, DRM_MODE_DPMS_OFF);
@@ -50,7 +50,7 @@ static void rcar_du_hdmienc_disable(struct drm_encoder *encoder)
 static void rcar_du_hdmienc_enable(struct drm_encoder *encoder)
 {
 	struct rcar_du_hdmienc *hdmienc = to_rcar_hdmienc(encoder);
-	struct drm_encoder_slave_funcs *sfuncs = to_slave_funcs(encoder);
+	const struct drm_encoder_slave_funcs *sfuncs = to_slave_funcs(encoder);
 
 	if (hdmienc->renc->lvds)
 		rcar_du_lvdsenc_enable(hdmienc->renc->lvds, encoder->crtc,
@@ -67,7 +67,7 @@ static int rcar_du_hdmienc_atomic_check(struct drm_encoder *encoder,
 					struct drm_connector_state *conn_state)
 {
 	struct rcar_du_hdmienc *hdmienc = to_rcar_hdmienc(encoder);
-	struct drm_encoder_slave_funcs *sfuncs = to_slave_funcs(encoder);
+	const struct drm_encoder_slave_funcs *sfuncs = to_slave_funcs(encoder);
 	struct drm_display_mode *adjusted_mode = &crtc_state->adjusted_mode;
 	const struct drm_display_mode *mode = &crtc_state->mode;
 
@@ -89,7 +89,7 @@ static void rcar_du_hdmienc_mode_set(struct drm_encoder *encoder,
 				     struct drm_display_mode *adjusted_mode)
 {
 	struct rcar_du_hdmienc *hdmienc = to_rcar_hdmienc(encoder);
-	struct drm_encoder_slave_funcs *sfuncs = to_slave_funcs(encoder);
+	const struct drm_encoder_slave_funcs *sfuncs = to_slave_funcs(encoder);
 
 	if (sfuncs->mode_set)
 		sfuncs->mode_set(encoder, mode, adjusted_mode);
@@ -151,7 +151,7 @@ int rcar_du_hdmienc_init(struct rcar_du_device *rcdu,
 		goto error;
 
 	ret = drm_encoder_init(rcdu->ddev, encoder, &encoder_funcs,
-			       DRM_MODE_ENCODER_TMDS);
+			       DRM_MODE_ENCODER_TMDS, NULL);
 	if (ret < 0)
 		goto error;
 
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_plane.c b/drivers/gpu/drm/rcar-du/rcar_du_plane.c
index ffa583712cd9..c3ed9522c0e1 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_plane.c
+++ b/drivers/gpu/drm/rcar-du/rcar_du_plane.c
@@ -410,7 +410,8 @@ int rcar_du_planes_init(struct rcar_du_group *rgrp)
 
 		ret = drm_universal_plane_init(rcdu->ddev, &plane->plane, crtcs,
 					       &rcar_du_plane_funcs, formats,
-					       ARRAY_SIZE(formats), type);
+					       ARRAY_SIZE(formats), type,
+					       NULL);
 		if (ret < 0)
 			return ret;
 
diff --git a/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c b/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c
index 80d6fc8a5cee..bddcabd7a370 100644
--- a/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c
+++ b/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c
@@ -173,7 +173,7 @@ dw_hdmi_rockchip_mode_valid(struct drm_connector *connector,
 	return (valid) ? MODE_OK : MODE_BAD;
 }
 
-static struct drm_encoder_funcs dw_hdmi_rockchip_encoder_funcs = {
+static const struct drm_encoder_funcs dw_hdmi_rockchip_encoder_funcs = {
 	.destroy = drm_encoder_cleanup,
 };
 
@@ -218,7 +218,7 @@ static void dw_hdmi_rockchip_encoder_prepare(struct drm_encoder *encoder)
 				      ROCKCHIP_OUT_MODE_AAAA);
 }
 
-static struct drm_encoder_helper_funcs dw_hdmi_rockchip_encoder_helper_funcs = {
+static const struct drm_encoder_helper_funcs dw_hdmi_rockchip_encoder_helper_funcs = {
 	.mode_fixup = dw_hdmi_rockchip_encoder_mode_fixup,
 	.mode_set   = dw_hdmi_rockchip_encoder_mode_set,
 	.prepare    = dw_hdmi_rockchip_encoder_prepare,
@@ -295,7 +295,7 @@ static int dw_hdmi_rockchip_bind(struct device *dev, struct device *master,
 
 	drm_encoder_helper_add(encoder, &dw_hdmi_rockchip_encoder_helper_funcs);
 	drm_encoder_init(drm, encoder, &dw_hdmi_rockchip_encoder_funcs,
-			 DRM_MODE_ENCODER_TMDS);
+			 DRM_MODE_ENCODER_TMDS, NULL);
 
 	return dw_hdmi_bind(dev, master, data, encoder, iores, irq, plat_data);
 }
diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_drv.c b/drivers/gpu/drm/rockchip/rockchip_drm_drv.c
index f22e1e1ee64a..afbb7407c44f 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_drv.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_drv.c
@@ -450,10 +450,6 @@ static int rockchip_drm_bind(struct device *dev)
 	if (!drm)
 		return -ENOMEM;
 
-	ret = drm_dev_set_unique(drm, "%s", dev_name(dev));
-	if (ret)
-		goto err_free;
-
 	ret = drm_dev_register(drm, 0);
 	if (ret)
 		goto err_free;
diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_fb.c b/drivers/gpu/drm/rockchip/rockchip_drm_fb.c
index b8ac5911c102..621f25c463bd 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_fb.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_fb.c
@@ -66,7 +66,7 @@ static int rockchip_drm_fb_create_handle(struct drm_framebuffer *fb,
 				     rockchip_fb->obj[0], handle);
 }
 
-static struct drm_framebuffer_funcs rockchip_drm_fb_funcs = {
+static const struct drm_framebuffer_funcs rockchip_drm_fb_funcs = {
 	.destroy	= rockchip_drm_fb_destroy,
 	.create_handle	= rockchip_drm_fb_create_handle,
 };
diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_gem.c b/drivers/gpu/drm/rockchip/rockchip_drm_gem.c
index 8caea0a33dd8..d908321b94ce 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_gem.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_gem.c
@@ -67,6 +67,7 @@ static int rockchip_drm_gem_object_mmap(struct drm_gem_object *obj,
 	 * VM_PFNMAP flag that was set by drm_gem_mmap_obj()/drm_gem_mmap().
 	 */
 	vma->vm_flags &= ~VM_PFNMAP;
+	vma->vm_pgoff = 0;
 
 	ret = dma_mmap_attrs(drm->dev, vma, rk_obj->kvaddr, rk_obj->dma_addr,
 			     obj->size, &rk_obj->dma_attrs);
diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c
index 5d8ae5e49c44..dd8e0860ad4e 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c
@@ -374,6 +374,7 @@ static const struct of_device_id vop_driver_dt_match[] = {
 	  .data = &rk3288_vop },
 	{},
 };
+MODULE_DEVICE_TABLE(of, vop_driver_dt_match);
 
 static inline void vop_writel(struct vop *vop, uint32_t offset, uint32_t v)
 {
@@ -959,8 +960,8 @@ static int vop_update_plane_event(struct drm_plane *plane,
 	val = (dest.y2 - dest.y1 - 1) << 16;
 	val |= (dest.x2 - dest.x1 - 1) & 0xffff;
 	VOP_WIN_SET(vop, win, dsp_info, val);
-	val = (dsp_sty - 1) << 16;
-	val |= (dsp_stx - 1) & 0xffff;
+	val = dsp_sty << 16;
+	val |= dsp_stx & 0xffff;
 	VOP_WIN_SET(vop, win, dsp_st, val);
 	VOP_WIN_SET(vop, win, rb_swap, rb_swap);
 
@@ -1289,7 +1290,7 @@ static void vop_win_state_complete(struct vop_win *vop_win,
 
 	if (state->event) {
 		spin_lock_irqsave(&drm->event_lock, flags);
-		drm_send_vblank_event(drm, -1, state->event);
+		drm_crtc_send_vblank_event(crtc, state->event);
 		spin_unlock_irqrestore(&drm->event_lock, flags);
 	}
 
@@ -1477,7 +1478,7 @@ static int vop_create_crtc(struct vop *vop)
 					       0, &vop_plane_funcs,
 					       win_data->phy->data_formats,
 					       win_data->phy->nformats,
-					       win_data->type);
+					       win_data->type, NULL);
 		if (ret) {
 			DRM_ERROR("failed to initialize plane\n");
 			goto err_cleanup_planes;
@@ -1491,7 +1492,7 @@ static int vop_create_crtc(struct vop *vop)
 	}
 
 	ret = drm_crtc_init_with_planes(drm_dev, crtc, primary, cursor,
-					&vop_crtc_funcs);
+					&vop_crtc_funcs, NULL);
 	if (ret)
 		return ret;
 
@@ -1514,7 +1515,7 @@ static int vop_create_crtc(struct vop *vop)
 					       &vop_plane_funcs,
 					       win_data->phy->data_formats,
 					       win_data->phy->nformats,
-					       win_data->type);
+					       win_data->type, NULL);
 		if (ret) {
 			DRM_ERROR("failed to initialize overlay plane\n");
 			goto err_cleanup_crtc;
@@ -1575,32 +1576,25 @@ static int vop_initial(struct vop *vop)
 		return PTR_ERR(vop->dclk);
 	}
 
-	ret = clk_prepare(vop->hclk);
-	if (ret < 0) {
-		dev_err(vop->dev, "failed to prepare hclk\n");
-		return ret;
-	}
-
 	ret = clk_prepare(vop->dclk);
 	if (ret < 0) {
 		dev_err(vop->dev, "failed to prepare dclk\n");
-		goto err_unprepare_hclk;
+		return ret;
 	}
 
-	ret = clk_prepare(vop->aclk);
+	/* Enable both the hclk and aclk to setup the vop */
+	ret = clk_prepare_enable(vop->hclk);
 	if (ret < 0) {
-		dev_err(vop->dev, "failed to prepare aclk\n");
+		dev_err(vop->dev, "failed to prepare/enable hclk\n");
 		goto err_unprepare_dclk;
 	}
 
-	/*
-	 * enable hclk, so that we can config vop register.
-	 */
-	ret = clk_enable(vop->hclk);
+	ret = clk_prepare_enable(vop->aclk);
 	if (ret < 0) {
-		dev_err(vop->dev, "failed to prepare aclk\n");
-		goto err_unprepare_aclk;
+		dev_err(vop->dev, "failed to prepare/enable aclk\n");
+		goto err_disable_hclk;
 	}
+
 	/*
 	 * do hclk_reset, reset all vop registers.
 	 */
@@ -1608,7 +1602,7 @@ static int vop_initial(struct vop *vop)
 	if (IS_ERR(ahb_rst)) {
 		dev_err(vop->dev, "failed to get ahb reset\n");
 		ret = PTR_ERR(ahb_rst);
-		goto err_disable_hclk;
+		goto err_disable_aclk;
 	}
 	reset_control_assert(ahb_rst);
 	usleep_range(10, 20);
@@ -1634,26 +1628,25 @@ static int vop_initial(struct vop *vop)
 	if (IS_ERR(vop->dclk_rst)) {
 		dev_err(vop->dev, "failed to get dclk reset\n");
 		ret = PTR_ERR(vop->dclk_rst);
-		goto err_unprepare_aclk;
+		goto err_disable_aclk;
 	}
 	reset_control_assert(vop->dclk_rst);
 	usleep_range(10, 20);
 	reset_control_deassert(vop->dclk_rst);
 
 	clk_disable(vop->hclk);
+	clk_disable(vop->aclk);
 
 	vop->is_enabled = false;
 
 	return 0;
 
+err_disable_aclk:
+	clk_disable_unprepare(vop->aclk);
 err_disable_hclk:
-	clk_disable(vop->hclk);
-err_unprepare_aclk:
-	clk_unprepare(vop->aclk);
+	clk_disable_unprepare(vop->hclk);
 err_unprepare_dclk:
 	clk_unprepare(vop->dclk);
-err_unprepare_hclk:
-	clk_unprepare(vop->hclk);
 	return ret;
 }
 
diff --git a/drivers/gpu/drm/shmobile/shmob_drm_crtc.c b/drivers/gpu/drm/shmobile/shmob_drm_crtc.c
index e9272b0a8592..b80802f55143 100644
--- a/drivers/gpu/drm/shmobile/shmob_drm_crtc.c
+++ b/drivers/gpu/drm/shmobile/shmob_drm_crtc.c
@@ -613,7 +613,7 @@ int shmob_drm_encoder_create(struct shmob_drm_device *sdev)
 	encoder->possible_crtcs = 1;
 
 	ret = drm_encoder_init(sdev->ddev, encoder, &encoder_funcs,
-			       DRM_MODE_ENCODER_LVDS);
+			       DRM_MODE_ENCODER_LVDS, NULL);
 	if (ret < 0)
 		return ret;
 
diff --git a/drivers/gpu/drm/sti/sti_crtc.c b/drivers/gpu/drm/sti/sti_crtc.c
index 3ae09dcd4fd8..de11c7cfb02f 100644
--- a/drivers/gpu/drm/sti/sti_crtc.c
+++ b/drivers/gpu/drm/sti/sti_crtc.c
@@ -367,7 +367,7 @@ int sti_crtc_init(struct drm_device *drm_dev, struct sti_mixer *mixer,
 	int res;
 
 	res = drm_crtc_init_with_planes(drm_dev, crtc, primary, cursor,
-					&sti_crtc_funcs);
+					&sti_crtc_funcs, NULL);
 	if (res) {
 		DRM_ERROR("Can't initialze CRTC\n");
 		return -EINVAL;
diff --git a/drivers/gpu/drm/sti/sti_cursor.c b/drivers/gpu/drm/sti/sti_cursor.c
index dd1032195051..807863106b8d 100644
--- a/drivers/gpu/drm/sti/sti_cursor.c
+++ b/drivers/gpu/drm/sti/sti_cursor.c
@@ -272,7 +272,7 @@ struct drm_plane *sti_cursor_create(struct drm_device *drm_dev,
 				       &sti_plane_helpers_funcs,
 				       cursor_supported_formats,
 				       ARRAY_SIZE(cursor_supported_formats),
-				       DRM_PLANE_TYPE_CURSOR);
+				       DRM_PLANE_TYPE_CURSOR, NULL);
 	if (res) {
 		DRM_ERROR("Failed to initialize universal plane\n");
 		goto err_plane;
diff --git a/drivers/gpu/drm/sti/sti_gdp.c b/drivers/gpu/drm/sti/sti_gdp.c
index c85dc7d6b005..f9a1d92c9d95 100644
--- a/drivers/gpu/drm/sti/sti_gdp.c
+++ b/drivers/gpu/drm/sti/sti_gdp.c
@@ -630,7 +630,7 @@ struct drm_plane *sti_gdp_create(struct drm_device *drm_dev,
 				       &sti_plane_helpers_funcs,
 				       gdp_supported_formats,
 				       ARRAY_SIZE(gdp_supported_formats),
-				       type);
+				       type, NULL);
 	if (res) {
 		DRM_ERROR("Failed to initialize universal plane\n");
 		goto err;
diff --git a/drivers/gpu/drm/sti/sti_hda.c b/drivers/gpu/drm/sti/sti_hda.c
index d735daccd458..49cce833f2c8 100644
--- a/drivers/gpu/drm/sti/sti_hda.c
+++ b/drivers/gpu/drm/sti/sti_hda.c
@@ -543,8 +543,6 @@ static int sti_hda_connector_get_modes(struct drm_connector *connector)
 		count++;
 	}
 
-	drm_mode_sort(&connector->modes);
-
 	return count;
 }
 
diff --git a/drivers/gpu/drm/sti/sti_hqvdp.c b/drivers/gpu/drm/sti/sti_hqvdp.c
index ea0690bc77d5..43861b52261d 100644
--- a/drivers/gpu/drm/sti/sti_hqvdp.c
+++ b/drivers/gpu/drm/sti/sti_hqvdp.c
@@ -973,7 +973,7 @@ static struct drm_plane *sti_hqvdp_create(struct drm_device *drm_dev,
 				       &sti_plane_helpers_funcs,
 				       hqvdp_supported_formats,
 				       ARRAY_SIZE(hqvdp_supported_formats),
-				       DRM_PLANE_TYPE_OVERLAY);
+				       DRM_PLANE_TYPE_OVERLAY, NULL);
 	if (res) {
 		DRM_ERROR("Failed to initialize universal plane\n");
 		return NULL;
diff --git a/drivers/gpu/drm/sti/sti_tvout.c b/drivers/gpu/drm/sti/sti_tvout.c
index c8a4c5dae2b6..f2afcf5438b8 100644
--- a/drivers/gpu/drm/sti/sti_tvout.c
+++ b/drivers/gpu/drm/sti/sti_tvout.c
@@ -512,7 +512,8 @@ sti_tvout_create_dvo_encoder(struct drm_device *dev,
 	drm_encoder->possible_clones = 1 << 0;
 
 	drm_encoder_init(dev, drm_encoder,
-			 &sti_tvout_encoder_funcs, DRM_MODE_ENCODER_LVDS);
+			 &sti_tvout_encoder_funcs, DRM_MODE_ENCODER_LVDS,
+			 NULL);
 
 	drm_encoder_helper_add(drm_encoder, &sti_dvo_encoder_helper_funcs);
 
@@ -564,7 +565,7 @@ static struct drm_encoder *sti_tvout_create_hda_encoder(struct drm_device *dev,
 	drm_encoder->possible_clones = 1 << 0;
 
 	drm_encoder_init(dev, drm_encoder,
-			&sti_tvout_encoder_funcs, DRM_MODE_ENCODER_DAC);
+			&sti_tvout_encoder_funcs, DRM_MODE_ENCODER_DAC, NULL);
 
 	drm_encoder_helper_add(drm_encoder, &sti_hda_encoder_helper_funcs);
 
@@ -613,7 +614,7 @@ static struct drm_encoder *sti_tvout_create_hdmi_encoder(struct drm_device *dev,
 	drm_encoder->possible_clones = 1 << 1;
 
 	drm_encoder_init(dev, drm_encoder,
-			&sti_tvout_encoder_funcs, DRM_MODE_ENCODER_TMDS);
+			&sti_tvout_encoder_funcs, DRM_MODE_ENCODER_TMDS, NULL);
 
 	drm_encoder_helper_add(drm_encoder, &sti_hdmi_encoder_helper_funcs);
 
diff --git a/drivers/gpu/drm/tegra/dc.c b/drivers/gpu/drm/tegra/dc.c
index e9f24a85a103..dde6f208c347 100644
--- a/drivers/gpu/drm/tegra/dc.c
+++ b/drivers/gpu/drm/tegra/dc.c
@@ -660,7 +660,8 @@ static struct drm_plane *tegra_dc_primary_plane_create(struct drm_device *drm,
 
 	err = drm_universal_plane_init(drm, &plane->base, possible_crtcs,
 				       &tegra_primary_plane_funcs, formats,
-				       num_formats, DRM_PLANE_TYPE_PRIMARY);
+				       num_formats, DRM_PLANE_TYPE_PRIMARY,
+				       NULL);
 	if (err < 0) {
 		kfree(plane);
 		return ERR_PTR(err);
@@ -827,7 +828,8 @@ static struct drm_plane *tegra_dc_cursor_plane_create(struct drm_device *drm,
 
 	err = drm_universal_plane_init(drm, &plane->base, 1 << dc->pipe,
 				       &tegra_cursor_plane_funcs, formats,
-				       num_formats, DRM_PLANE_TYPE_CURSOR);
+				       num_formats, DRM_PLANE_TYPE_CURSOR,
+				       NULL);
 	if (err < 0) {
 		kfree(plane);
 		return ERR_PTR(err);
@@ -890,7 +892,8 @@ static struct drm_plane *tegra_dc_overlay_plane_create(struct drm_device *drm,
 
 	err = drm_universal_plane_init(drm, &plane->base, 1 << dc->pipe,
 				       &tegra_overlay_plane_funcs, formats,
-				       num_formats, DRM_PLANE_TYPE_OVERLAY);
+				       num_formats, DRM_PLANE_TYPE_OVERLAY,
+				       NULL);
 	if (err < 0) {
 		kfree(plane);
 		return ERR_PTR(err);
@@ -1732,7 +1735,7 @@ static int tegra_dc_init(struct host1x_client *client)
 	}
 
 	err = drm_crtc_init_with_planes(drm, &dc->base, primary, cursor,
-					&tegra_crtc_funcs);
+					&tegra_crtc_funcs, NULL);
 	if (err < 0)
 		goto cleanup;
 
@@ -1952,8 +1955,10 @@ static int tegra_dc_parse_dt(struct tegra_dc *dc)
 		 * cases where only a single display controller is used.
 		 */
 		for_each_matching_node(np, tegra_dc_of_match) {
-			if (np == dc->dev->of_node)
+			if (np == dc->dev->of_node) {
+				of_node_put(np);
 				break;
+			}
 
 			value++;
 		}
diff --git a/drivers/gpu/drm/tegra/dpaux.c b/drivers/gpu/drm/tegra/dpaux.c
index 6aecb6647313..b24a0f14821a 100644
--- a/drivers/gpu/drm/tegra/dpaux.c
+++ b/drivers/gpu/drm/tegra/dpaux.c
@@ -436,7 +436,7 @@ struct platform_driver tegra_dpaux_driver = {
 	.remove = tegra_dpaux_remove,
 };
 
-struct tegra_dpaux *tegra_dpaux_find_by_of_node(struct device_node *np)
+struct drm_dp_aux *drm_dp_aux_find_by_of_node(struct device_node *np)
 {
 	struct tegra_dpaux *dpaux;
 
@@ -445,7 +445,7 @@ struct tegra_dpaux *tegra_dpaux_find_by_of_node(struct device_node *np)
 	list_for_each_entry(dpaux, &dpaux_list, list)
 		if (np == dpaux->dev->of_node) {
 			mutex_unlock(&dpaux_lock);
-			return dpaux;
+			return &dpaux->aux;
 		}
 
 	mutex_unlock(&dpaux_lock);
@@ -453,8 +453,9 @@ struct tegra_dpaux *tegra_dpaux_find_by_of_node(struct device_node *np)
 	return NULL;
 }
 
-int tegra_dpaux_attach(struct tegra_dpaux *dpaux, struct tegra_output *output)
+int drm_dp_aux_attach(struct drm_dp_aux *aux, struct tegra_output *output)
 {
+	struct tegra_dpaux *dpaux = to_dpaux(aux);
 	unsigned long timeout;
 	int err;
 
@@ -470,7 +471,7 @@ int tegra_dpaux_attach(struct tegra_dpaux *dpaux, struct tegra_output *output)
 	while (time_before(jiffies, timeout)) {
 		enum drm_connector_status status;
 
-		status = tegra_dpaux_detect(dpaux);
+		status = drm_dp_aux_detect(aux);
 		if (status == connector_status_connected) {
 			enable_irq(dpaux->irq);
 			return 0;
@@ -482,8 +483,9 @@ int tegra_dpaux_attach(struct tegra_dpaux *dpaux, struct tegra_output *output)
 	return -ETIMEDOUT;
 }
 
-int tegra_dpaux_detach(struct tegra_dpaux *dpaux)
+int drm_dp_aux_detach(struct drm_dp_aux *aux)
 {
+	struct tegra_dpaux *dpaux = to_dpaux(aux);
 	unsigned long timeout;
 	int err;
 
@@ -498,7 +500,7 @@ int tegra_dpaux_detach(struct tegra_dpaux *dpaux)
 	while (time_before(jiffies, timeout)) {
 		enum drm_connector_status status;
 
-		status = tegra_dpaux_detect(dpaux);
+		status = drm_dp_aux_detect(aux);
 		if (status == connector_status_disconnected) {
 			dpaux->output = NULL;
 			return 0;
@@ -510,8 +512,9 @@ int tegra_dpaux_detach(struct tegra_dpaux *dpaux)
 	return -ETIMEDOUT;
 }
 
-enum drm_connector_status tegra_dpaux_detect(struct tegra_dpaux *dpaux)
+enum drm_connector_status drm_dp_aux_detect(struct drm_dp_aux *aux)
 {
+	struct tegra_dpaux *dpaux = to_dpaux(aux);
 	u32 value;
 
 	value = tegra_dpaux_readl(dpaux, DPAUX_DP_AUXSTAT);
@@ -522,8 +525,9 @@ enum drm_connector_status tegra_dpaux_detect(struct tegra_dpaux *dpaux)
 	return connector_status_disconnected;
 }
 
-int tegra_dpaux_enable(struct tegra_dpaux *dpaux)
+int drm_dp_aux_enable(struct drm_dp_aux *aux)
 {
+	struct tegra_dpaux *dpaux = to_dpaux(aux);
 	u32 value;
 
 	value = DPAUX_HYBRID_PADCTL_AUX_CMH(2) |
@@ -540,8 +544,9 @@ int tegra_dpaux_enable(struct tegra_dpaux *dpaux)
 	return 0;
 }
 
-int tegra_dpaux_disable(struct tegra_dpaux *dpaux)
+int drm_dp_aux_disable(struct drm_dp_aux *aux)
 {
+	struct tegra_dpaux *dpaux = to_dpaux(aux);
 	u32 value;
 
 	value = tegra_dpaux_readl(dpaux, DPAUX_HYBRID_SPARE);
@@ -551,11 +556,11 @@ int tegra_dpaux_disable(struct tegra_dpaux *dpaux)
 	return 0;
 }
 
-int tegra_dpaux_prepare(struct tegra_dpaux *dpaux, u8 encoding)
+int drm_dp_aux_prepare(struct drm_dp_aux *aux, u8 encoding)
 {
 	int err;
 
-	err = drm_dp_dpcd_writeb(&dpaux->aux, DP_MAIN_LINK_CHANNEL_CODING_SET,
+	err = drm_dp_dpcd_writeb(aux, DP_MAIN_LINK_CHANNEL_CODING_SET,
 				 encoding);
 	if (err < 0)
 		return err;
@@ -563,15 +568,15 @@ int tegra_dpaux_prepare(struct tegra_dpaux *dpaux, u8 encoding)
 	return 0;
 }
 
-int tegra_dpaux_train(struct tegra_dpaux *dpaux, struct drm_dp_link *link,
-		      u8 pattern)
+int drm_dp_aux_train(struct drm_dp_aux *aux, struct drm_dp_link *link,
+		     u8 pattern)
 {
 	u8 tp = pattern & DP_TRAINING_PATTERN_MASK;
 	u8 status[DP_LINK_STATUS_SIZE], values[4];
 	unsigned int i;
 	int err;
 
-	err = drm_dp_dpcd_writeb(&dpaux->aux, DP_TRAINING_PATTERN_SET, pattern);
+	err = drm_dp_dpcd_writeb(aux, DP_TRAINING_PATTERN_SET, pattern);
 	if (err < 0)
 		return err;
 
@@ -584,14 +589,14 @@ int tegra_dpaux_train(struct tegra_dpaux *dpaux, struct drm_dp_link *link,
 			    DP_TRAIN_MAX_SWING_REACHED |
 			    DP_TRAIN_VOLTAGE_SWING_LEVEL_0;
 
-	err = drm_dp_dpcd_write(&dpaux->aux, DP_TRAINING_LANE0_SET, values,
+	err = drm_dp_dpcd_write(aux, DP_TRAINING_LANE0_SET, values,
 				link->num_lanes);
 	if (err < 0)
 		return err;
 
 	usleep_range(500, 1000);
 
-	err = drm_dp_dpcd_read_link_status(&dpaux->aux, status);
+	err = drm_dp_dpcd_read_link_status(aux, status);
 	if (err < 0)
 		return err;
 
@@ -609,11 +614,11 @@ int tegra_dpaux_train(struct tegra_dpaux *dpaux, struct drm_dp_link *link,
 		break;
 
 	default:
-		dev_err(dpaux->dev, "unsupported training pattern %u\n", tp);
+		dev_err(aux->dev, "unsupported training pattern %u\n", tp);
 		return -EINVAL;
 	}
 
-	err = drm_dp_dpcd_writeb(&dpaux->aux, DP_EDP_CONFIGURATION_SET, 0);
+	err = drm_dp_dpcd_writeb(aux, DP_EDP_CONFIGURATION_SET, 0);
 	if (err < 0)
 		return err;
 
diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c
index e0f827790a5e..c5c856a0879d 100644
--- a/drivers/gpu/drm/tegra/drm.c
+++ b/drivers/gpu/drm/tegra/drm.c
@@ -137,8 +137,8 @@ static int tegra_drm_load(struct drm_device *drm, unsigned long flags)
 		start = geometry->aperture_start;
 		end = geometry->aperture_end;
 
-		DRM_DEBUG("IOMMU context initialized (aperture: %#llx-%#llx)\n",
-			  start, end);
+		DRM_DEBUG_DRIVER("IOMMU aperture initialized (%#llx-%#llx)\n",
+				 start, end);
 		drm_mm_init(&tegra->mm, start, end - start + 1);
 	}
 
@@ -277,9 +277,7 @@ host1x_bo_lookup(struct drm_device *drm, struct drm_file *file, u32 handle)
 	if (!gem)
 		return NULL;
 
-	mutex_lock(&drm->struct_mutex);
-	drm_gem_object_unreference(gem);
-	mutex_unlock(&drm->struct_mutex);
+	drm_gem_object_unreference_unlocked(gem);
 
 	bo = to_tegra_bo(gem);
 	return &bo->base;
@@ -473,7 +471,7 @@ static int tegra_gem_mmap(struct drm_device *drm, void *data,
 
 	args->offset = drm_vma_node_offset_addr(&bo->gem.vma_node);
 
-	drm_gem_object_unreference(gem);
+	drm_gem_object_unreference_unlocked(gem);
 
 	return 0;
 }
@@ -683,7 +681,7 @@ static int tegra_gem_set_tiling(struct drm_device *drm, void *data,
 	bo->tiling.mode = mode;
 	bo->tiling.value = value;
 
-	drm_gem_object_unreference(gem);
+	drm_gem_object_unreference_unlocked(gem);
 
 	return 0;
 }
@@ -723,7 +721,7 @@ static int tegra_gem_get_tiling(struct drm_device *drm, void *data,
 		break;
 	}
 
-	drm_gem_object_unreference(gem);
+	drm_gem_object_unreference_unlocked(gem);
 
 	return err;
 }
@@ -748,7 +746,7 @@ static int tegra_gem_set_flags(struct drm_device *drm, void *data,
 	if (args->flags & DRM_TEGRA_GEM_BOTTOM_UP)
 		bo->flags |= TEGRA_BO_BOTTOM_UP;
 
-	drm_gem_object_unreference(gem);
+	drm_gem_object_unreference_unlocked(gem);
 
 	return 0;
 }
@@ -770,7 +768,7 @@ static int tegra_gem_get_flags(struct drm_device *drm, void *data,
 	if (bo->flags & TEGRA_BO_BOTTOM_UP)
 		args->flags |= DRM_TEGRA_GEM_BOTTOM_UP;
 
-	drm_gem_object_unreference(gem);
+	drm_gem_object_unreference_unlocked(gem);
 
 	return 0;
 }
@@ -921,7 +919,8 @@ static void tegra_debugfs_cleanup(struct drm_minor *minor)
 #endif
 
 static struct drm_driver tegra_drm_driver = {
-	.driver_features = DRIVER_MODESET | DRIVER_GEM | DRIVER_PRIME,
+	.driver_features = DRIVER_MODESET | DRIVER_GEM | DRIVER_PRIME |
+			   DRIVER_ATOMIC,
 	.load = tegra_drm_load,
 	.unload = tegra_drm_unload,
 	.open = tegra_drm_open,
@@ -991,7 +990,6 @@ static int host1x_drm_probe(struct host1x_device *dev)
 	if (!drm)
 		return -ENOMEM;
 
-	drm_dev_set_unique(drm, dev_name(&dev->dev));
 	dev_set_drvdata(&dev->dev, drm);
 
 	err = drm_dev_register(drm, 0);
@@ -1023,8 +1021,17 @@ static int host1x_drm_remove(struct host1x_device *dev)
 static int host1x_drm_suspend(struct device *dev)
 {
 	struct drm_device *drm = dev_get_drvdata(dev);
+	struct tegra_drm *tegra = drm->dev_private;
 
 	drm_kms_helper_poll_disable(drm);
+	tegra_drm_fb_suspend(drm);
+
+	tegra->state = drm_atomic_helper_suspend(drm);
+	if (IS_ERR(tegra->state)) {
+		tegra_drm_fb_resume(drm);
+		drm_kms_helper_poll_enable(drm);
+		return PTR_ERR(tegra->state);
+	}
 
 	return 0;
 }
@@ -1032,7 +1039,10 @@ static int host1x_drm_suspend(struct device *dev)
 static int host1x_drm_resume(struct device *dev)
 {
 	struct drm_device *drm = dev_get_drvdata(dev);
+	struct tegra_drm *tegra = drm->dev_private;
 
+	drm_atomic_helper_resume(drm, tegra->state);
+	tegra_drm_fb_resume(drm);
 	drm_kms_helper_poll_enable(drm);
 
 	return 0;
@@ -1076,6 +1086,16 @@ static struct host1x_driver host1x_drm_driver = {
 	.subdevs = host1x_drm_subdevs,
 };
 
+static struct platform_driver * const drivers[] = {
+	&tegra_dc_driver,
+	&tegra_hdmi_driver,
+	&tegra_dsi_driver,
+	&tegra_dpaux_driver,
+	&tegra_sor_driver,
+	&tegra_gr2d_driver,
+	&tegra_gr3d_driver,
+};
+
 static int __init host1x_drm_init(void)
 {
 	int err;
@@ -1084,48 +1104,12 @@ static int __init host1x_drm_init(void)
 	if (err < 0)
 		return err;
 
-	err = platform_driver_register(&tegra_dc_driver);
+	err = platform_register_drivers(drivers, ARRAY_SIZE(drivers));
 	if (err < 0)
 		goto unregister_host1x;
 
-	err = platform_driver_register(&tegra_dsi_driver);
-	if (err < 0)
-		goto unregister_dc;
-
-	err = platform_driver_register(&tegra_sor_driver);
-	if (err < 0)
-		goto unregister_dsi;
-
-	err = platform_driver_register(&tegra_hdmi_driver);
-	if (err < 0)
-		goto unregister_sor;
-
-	err = platform_driver_register(&tegra_dpaux_driver);
-	if (err < 0)
-		goto unregister_hdmi;
-
-	err = platform_driver_register(&tegra_gr2d_driver);
-	if (err < 0)
-		goto unregister_dpaux;
-
-	err = platform_driver_register(&tegra_gr3d_driver);
-	if (err < 0)
-		goto unregister_gr2d;
-
 	return 0;
 
-unregister_gr2d:
-	platform_driver_unregister(&tegra_gr2d_driver);
-unregister_dpaux:
-	platform_driver_unregister(&tegra_dpaux_driver);
-unregister_hdmi:
-	platform_driver_unregister(&tegra_hdmi_driver);
-unregister_sor:
-	platform_driver_unregister(&tegra_sor_driver);
-unregister_dsi:
-	platform_driver_unregister(&tegra_dsi_driver);
-unregister_dc:
-	platform_driver_unregister(&tegra_dc_driver);
 unregister_host1x:
 	host1x_driver_unregister(&host1x_drm_driver);
 	return err;
@@ -1134,13 +1118,7 @@ module_init(host1x_drm_init);
 
 static void __exit host1x_drm_exit(void)
 {
-	platform_driver_unregister(&tegra_gr3d_driver);
-	platform_driver_unregister(&tegra_gr2d_driver);
-	platform_driver_unregister(&tegra_dpaux_driver);
-	platform_driver_unregister(&tegra_hdmi_driver);
-	platform_driver_unregister(&tegra_sor_driver);
-	platform_driver_unregister(&tegra_dsi_driver);
-	platform_driver_unregister(&tegra_dc_driver);
+	platform_unregister_drivers(drivers, ARRAY_SIZE(drivers));
 	host1x_driver_unregister(&host1x_drm_driver);
 }
 module_exit(host1x_drm_exit);
diff --git a/drivers/gpu/drm/tegra/drm.h b/drivers/gpu/drm/tegra/drm.h
index d88a2d18c1a4..c088f2f67eda 100644
--- a/drivers/gpu/drm/tegra/drm.h
+++ b/drivers/gpu/drm/tegra/drm.h
@@ -57,6 +57,8 @@ struct tegra_drm {
 		struct work_struct work;
 		struct mutex lock;
 	} commit;
+
+	struct drm_atomic_state *state;
 };
 
 struct tegra_drm_client;
@@ -247,18 +249,17 @@ void tegra_output_connector_destroy(struct drm_connector *connector);
 void tegra_output_encoder_destroy(struct drm_encoder *encoder);
 
 /* from dpaux.c */
-struct tegra_dpaux;
 struct drm_dp_link;
 
-struct tegra_dpaux *tegra_dpaux_find_by_of_node(struct device_node *np);
-enum drm_connector_status tegra_dpaux_detect(struct tegra_dpaux *dpaux);
-int tegra_dpaux_attach(struct tegra_dpaux *dpaux, struct tegra_output *output);
-int tegra_dpaux_detach(struct tegra_dpaux *dpaux);
-int tegra_dpaux_enable(struct tegra_dpaux *dpaux);
-int tegra_dpaux_disable(struct tegra_dpaux *dpaux);
-int tegra_dpaux_prepare(struct tegra_dpaux *dpaux, u8 encoding);
-int tegra_dpaux_train(struct tegra_dpaux *dpaux, struct drm_dp_link *link,
-		      u8 pattern);
+struct drm_dp_aux *drm_dp_aux_find_by_of_node(struct device_node *np);
+enum drm_connector_status drm_dp_aux_detect(struct drm_dp_aux *aux);
+int drm_dp_aux_attach(struct drm_dp_aux *aux, struct tegra_output *output);
+int drm_dp_aux_detach(struct drm_dp_aux *aux);
+int drm_dp_aux_enable(struct drm_dp_aux *aux);
+int drm_dp_aux_disable(struct drm_dp_aux *aux);
+int drm_dp_aux_prepare(struct drm_dp_aux *aux, u8 encoding);
+int drm_dp_aux_train(struct drm_dp_aux *aux, struct drm_dp_link *link,
+		     u8 pattern);
 
 /* from fb.c */
 struct tegra_bo *tegra_fb_get_plane(struct drm_framebuffer *framebuffer,
@@ -273,16 +274,18 @@ int tegra_drm_fb_prepare(struct drm_device *drm);
 void tegra_drm_fb_free(struct drm_device *drm);
 int tegra_drm_fb_init(struct drm_device *drm);
 void tegra_drm_fb_exit(struct drm_device *drm);
+void tegra_drm_fb_suspend(struct drm_device *drm);
+void tegra_drm_fb_resume(struct drm_device *drm);
 #ifdef CONFIG_DRM_FBDEV_EMULATION
 void tegra_fbdev_restore_mode(struct tegra_fbdev *fbdev);
 void tegra_fb_output_poll_changed(struct drm_device *drm);
 #endif
 
 extern struct platform_driver tegra_dc_driver;
-extern struct platform_driver tegra_dsi_driver;
-extern struct platform_driver tegra_sor_driver;
 extern struct platform_driver tegra_hdmi_driver;
+extern struct platform_driver tegra_dsi_driver;
 extern struct platform_driver tegra_dpaux_driver;
+extern struct platform_driver tegra_sor_driver;
 extern struct platform_driver tegra_gr2d_driver;
 extern struct platform_driver tegra_gr3d_driver;
 
diff --git a/drivers/gpu/drm/tegra/dsi.c b/drivers/gpu/drm/tegra/dsi.c
index f0a138ef68ce..50d46ae3786b 100644
--- a/drivers/gpu/drm/tegra/dsi.c
+++ b/drivers/gpu/drm/tegra/dsi.c
@@ -1023,7 +1023,7 @@ static int tegra_dsi_init(struct host1x_client *client)
 
 		drm_encoder_init(drm, &dsi->output.encoder,
 				 &tegra_dsi_encoder_funcs,
-				 DRM_MODE_ENCODER_DSI);
+				 DRM_MODE_ENCODER_DSI, NULL);
 		drm_encoder_helper_add(&dsi->output.encoder,
 				       &tegra_dsi_encoder_helper_funcs);
 
diff --git a/drivers/gpu/drm/tegra/fb.c b/drivers/gpu/drm/tegra/fb.c
index ede9e94f3312..ca84de9ccb51 100644
--- a/drivers/gpu/drm/tegra/fb.c
+++ b/drivers/gpu/drm/tegra/fb.c
@@ -10,6 +10,8 @@
  * published by the Free Software Foundation.
  */
 
+#include <linux/console.h>
+
 #include "drm.h"
 #include "gem.h"
 
@@ -86,7 +88,7 @@ static int tegra_fb_create_handle(struct drm_framebuffer *framebuffer,
 	return drm_gem_handle_create(file, &fb->planes[0]->gem, handle);
 }
 
-static struct drm_framebuffer_funcs tegra_fb_funcs = {
+static const struct drm_framebuffer_funcs tegra_fb_funcs = {
 	.destroy = tegra_fb_destroy,
 	.create_handle = tegra_fb_create_handle,
 };
@@ -413,3 +415,25 @@ void tegra_drm_fb_exit(struct drm_device *drm)
 	tegra_fbdev_exit(tegra->fbdev);
 #endif
 }
+
+void tegra_drm_fb_suspend(struct drm_device *drm)
+{
+#ifdef CONFIG_DRM_FBDEV_EMULATION
+	struct tegra_drm *tegra = drm->dev_private;
+
+	console_lock();
+	drm_fb_helper_set_suspend(&tegra->fbdev->base, 1);
+	console_unlock();
+#endif
+}
+
+void tegra_drm_fb_resume(struct drm_device *drm)
+{
+#ifdef CONFIG_DRM_FBDEV_EMULATION
+	struct tegra_drm *tegra = drm->dev_private;
+
+	console_lock();
+	drm_fb_helper_set_suspend(&tegra->fbdev->base, 0);
+	console_unlock();
+#endif
+}
diff --git a/drivers/gpu/drm/tegra/gem.c b/drivers/gpu/drm/tegra/gem.c
index 01e16e146bfe..33add93b4ed9 100644
--- a/drivers/gpu/drm/tegra/gem.c
+++ b/drivers/gpu/drm/tegra/gem.c
@@ -28,11 +28,8 @@ static inline struct tegra_bo *host1x_to_tegra_bo(struct host1x_bo *bo)
 static void tegra_bo_put(struct host1x_bo *bo)
 {
 	struct tegra_bo *obj = host1x_to_tegra_bo(bo);
-	struct drm_device *drm = obj->gem.dev;
 
-	mutex_lock(&drm->struct_mutex);
-	drm_gem_object_unreference(&obj->gem);
-	mutex_unlock(&drm->struct_mutex);
+	drm_gem_object_unreference_unlocked(&obj->gem);
 }
 
 static dma_addr_t tegra_bo_pin(struct host1x_bo *bo, struct sg_table **sgt)
@@ -72,11 +69,8 @@ static void tegra_bo_kunmap(struct host1x_bo *bo, unsigned int page,
 static struct host1x_bo *tegra_bo_get(struct host1x_bo *bo)
 {
 	struct tegra_bo *obj = host1x_to_tegra_bo(bo);
-	struct drm_device *drm = obj->gem.dev;
 
-	mutex_lock(&drm->struct_mutex);
 	drm_gem_object_reference(&obj->gem);
-	mutex_unlock(&drm->struct_mutex);
 
 	return bo;
 }
@@ -408,12 +402,9 @@ int tegra_bo_dumb_map_offset(struct drm_file *file, struct drm_device *drm,
 	struct drm_gem_object *gem;
 	struct tegra_bo *bo;
 
-	mutex_lock(&drm->struct_mutex);
-
 	gem = drm_gem_object_lookup(drm, file, handle);
 	if (!gem) {
 		dev_err(drm->dev, "failed to lookup GEM object\n");
-		mutex_unlock(&drm->struct_mutex);
 		return -EINVAL;
 	}
 
@@ -421,9 +412,7 @@ int tegra_bo_dumb_map_offset(struct drm_file *file, struct drm_device *drm,
 
 	*offset = drm_vma_node_offset_addr(&bo->gem.vma_node);
 
-	drm_gem_object_unreference(gem);
-
-	mutex_unlock(&drm->struct_mutex);
+	drm_gem_object_unreference_unlocked(gem);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/tegra/hdmi.c b/drivers/gpu/drm/tegra/hdmi.c
index 52b32cbd9de6..b7ef4929e347 100644
--- a/drivers/gpu/drm/tegra/hdmi.c
+++ b/drivers/gpu/drm/tegra/hdmi.c
@@ -1320,7 +1320,7 @@ static int tegra_hdmi_init(struct host1x_client *client)
 	hdmi->output.connector.dpms = DRM_MODE_DPMS_OFF;
 
 	drm_encoder_init(drm, &hdmi->output.encoder, &tegra_hdmi_encoder_funcs,
-			 DRM_MODE_ENCODER_TMDS);
+			 DRM_MODE_ENCODER_TMDS, NULL);
 	drm_encoder_helper_add(&hdmi->output.encoder,
 			       &tegra_hdmi_encoder_helper_funcs);
 
diff --git a/drivers/gpu/drm/tegra/rgb.c b/drivers/gpu/drm/tegra/rgb.c
index bc9735b4ad60..e246334e0252 100644
--- a/drivers/gpu/drm/tegra/rgb.c
+++ b/drivers/gpu/drm/tegra/rgb.c
@@ -287,7 +287,7 @@ int tegra_dc_rgb_init(struct drm_device *drm, struct tegra_dc *dc)
 	output->connector.dpms = DRM_MODE_DPMS_OFF;
 
 	drm_encoder_init(drm, &output->encoder, &tegra_rgb_encoder_funcs,
-			 DRM_MODE_ENCODER_LVDS);
+			 DRM_MODE_ENCODER_LVDS, NULL);
 	drm_encoder_helper_add(&output->encoder,
 			       &tegra_rgb_encoder_helper_funcs);
 
diff --git a/drivers/gpu/drm/tegra/sor.c b/drivers/gpu/drm/tegra/sor.c
index 3eff7cf75d25..757c6e8603af 100644
--- a/drivers/gpu/drm/tegra/sor.c
+++ b/drivers/gpu/drm/tegra/sor.c
@@ -173,7 +173,7 @@ struct tegra_sor {
 	struct clk *clk_dp;
 	struct clk *clk;
 
-	struct tegra_dpaux *dpaux;
+	struct drm_dp_aux *aux;
 
 	struct drm_info_list *debugfs_files;
 	struct drm_minor *minor;
@@ -273,7 +273,7 @@ static int tegra_sor_dp_train_fast(struct tegra_sor *sor,
 		   SOR_DP_PADCTL_CM_TXD_1 | SOR_DP_PADCTL_CM_TXD_0);
 	tegra_sor_writel(sor, value, SOR_DP_PADCTL0);
 
-	err = tegra_dpaux_prepare(sor->dpaux, DP_SET_ANSI_8B10B);
+	err = drm_dp_aux_prepare(sor->aux, DP_SET_ANSI_8B10B);
 	if (err < 0)
 		return err;
 
@@ -288,7 +288,7 @@ static int tegra_sor_dp_train_fast(struct tegra_sor *sor,
 
 	pattern = DP_TRAINING_PATTERN_1;
 
-	err = tegra_dpaux_train(sor->dpaux, link, pattern);
+	err = drm_dp_aux_train(sor->aux, link, pattern);
 	if (err < 0)
 		return err;
 
@@ -309,7 +309,7 @@ static int tegra_sor_dp_train_fast(struct tegra_sor *sor,
 
 	pattern = DP_LINK_SCRAMBLING_DISABLE | DP_TRAINING_PATTERN_2;
 
-	err = tegra_dpaux_train(sor->dpaux, link, pattern);
+	err = drm_dp_aux_train(sor->aux, link, pattern);
 	if (err < 0)
 		return err;
 
@@ -324,7 +324,7 @@ static int tegra_sor_dp_train_fast(struct tegra_sor *sor,
 
 	pattern = DP_TRAINING_PATTERN_DISABLE;
 
-	err = tegra_dpaux_train(sor->dpaux, link, pattern);
+	err = drm_dp_aux_train(sor->aux, link, pattern);
 	if (err < 0)
 		return err;
 
@@ -1044,8 +1044,8 @@ tegra_sor_connector_detect(struct drm_connector *connector, bool force)
 	struct tegra_output *output = connector_to_output(connector);
 	struct tegra_sor *sor = to_sor(output);
 
-	if (sor->dpaux)
-		return tegra_dpaux_detect(sor->dpaux);
+	if (sor->aux)
+		return drm_dp_aux_detect(sor->aux);
 
 	return tegra_output_connector_detect(connector, force);
 }
@@ -1066,13 +1066,13 @@ static int tegra_sor_connector_get_modes(struct drm_connector *connector)
 	struct tegra_sor *sor = to_sor(output);
 	int err;
 
-	if (sor->dpaux)
-		tegra_dpaux_enable(sor->dpaux);
+	if (sor->aux)
+		drm_dp_aux_enable(sor->aux);
 
 	err = tegra_output_connector_get_modes(connector);
 
-	if (sor->dpaux)
-		tegra_dpaux_disable(sor->dpaux);
+	if (sor->aux)
+		drm_dp_aux_disable(sor->aux);
 
 	return err;
 }
@@ -1128,8 +1128,8 @@ static void tegra_sor_edp_disable(struct drm_encoder *encoder)
 	if (err < 0)
 		dev_err(sor->dev, "failed to power down SOR: %d\n", err);
 
-	if (sor->dpaux) {
-		err = tegra_dpaux_disable(sor->dpaux);
+	if (sor->aux) {
+		err = drm_dp_aux_disable(sor->aux);
 		if (err < 0)
 			dev_err(sor->dev, "failed to disable DP: %d\n", err);
 	}
@@ -1196,7 +1196,7 @@ static void tegra_sor_edp_enable(struct drm_encoder *encoder)
 	struct tegra_sor *sor = to_sor(output);
 	struct tegra_sor_config config;
 	struct drm_dp_link link;
-	struct drm_dp_aux *aux;
+	u8 rate, lanes;
 	int err = 0;
 	u32 value;
 
@@ -1209,20 +1209,14 @@ static void tegra_sor_edp_enable(struct drm_encoder *encoder)
 	if (output->panel)
 		drm_panel_prepare(output->panel);
 
-	/* FIXME: properly convert to struct drm_dp_aux */
-	aux = (struct drm_dp_aux *)sor->dpaux;
-
-	if (sor->dpaux) {
-		err = tegra_dpaux_enable(sor->dpaux);
-		if (err < 0)
-			dev_err(sor->dev, "failed to enable DP: %d\n", err);
+	err = drm_dp_aux_enable(sor->aux);
+	if (err < 0)
+		dev_err(sor->dev, "failed to enable DP: %d\n", err);
 
-		err = drm_dp_link_probe(aux, &link);
-		if (err < 0) {
-			dev_err(sor->dev, "failed to probe eDP link: %d\n",
-				err);
-			return;
-		}
+	err = drm_dp_link_probe(sor->aux, &link);
+	if (err < 0) {
+		dev_err(sor->dev, "failed to probe eDP link: %d\n", err);
+		return;
 	}
 
 	err = clk_set_parent(sor->clk, sor->clk_safe);
@@ -1434,60 +1428,51 @@ static void tegra_sor_edp_enable(struct drm_encoder *encoder)
 	value |= SOR_DP_PADCTL_PAD_CAL_PD;
 	tegra_sor_writel(sor, value, SOR_DP_PADCTL0);
 
-	if (sor->dpaux) {
-		u8 rate, lanes;
-
-		err = drm_dp_link_probe(aux, &link);
-		if (err < 0)
-			dev_err(sor->dev, "failed to probe eDP link: %d\n",
-				err);
+	err = drm_dp_link_probe(sor->aux, &link);
+	if (err < 0)
+		dev_err(sor->dev, "failed to probe eDP link: %d\n", err);
 
-		err = drm_dp_link_power_up(aux, &link);
-		if (err < 0)
-			dev_err(sor->dev, "failed to power up eDP link: %d\n",
-				err);
+	err = drm_dp_link_power_up(sor->aux, &link);
+	if (err < 0)
+		dev_err(sor->dev, "failed to power up eDP link: %d\n", err);
 
-		err = drm_dp_link_configure(aux, &link);
-		if (err < 0)
-			dev_err(sor->dev, "failed to configure eDP link: %d\n",
-				err);
+	err = drm_dp_link_configure(sor->aux, &link);
+	if (err < 0)
+		dev_err(sor->dev, "failed to configure eDP link: %d\n", err);
 
-		rate = drm_dp_link_rate_to_bw_code(link.rate);
-		lanes = link.num_lanes;
+	rate = drm_dp_link_rate_to_bw_code(link.rate);
+	lanes = link.num_lanes;
 
-		value = tegra_sor_readl(sor, SOR_CLK_CNTRL);
-		value &= ~SOR_CLK_CNTRL_DP_LINK_SPEED_MASK;
-		value |= SOR_CLK_CNTRL_DP_LINK_SPEED(rate);
-		tegra_sor_writel(sor, value, SOR_CLK_CNTRL);
+	value = tegra_sor_readl(sor, SOR_CLK_CNTRL);
+	value &= ~SOR_CLK_CNTRL_DP_LINK_SPEED_MASK;
+	value |= SOR_CLK_CNTRL_DP_LINK_SPEED(rate);
+	tegra_sor_writel(sor, value, SOR_CLK_CNTRL);
 
-		value = tegra_sor_readl(sor, SOR_DP_LINKCTL0);
-		value &= ~SOR_DP_LINKCTL_LANE_COUNT_MASK;
-		value |= SOR_DP_LINKCTL_LANE_COUNT(lanes);
+	value = tegra_sor_readl(sor, SOR_DP_LINKCTL0);
+	value &= ~SOR_DP_LINKCTL_LANE_COUNT_MASK;
+	value |= SOR_DP_LINKCTL_LANE_COUNT(lanes);
 
-		if (link.capabilities & DP_LINK_CAP_ENHANCED_FRAMING)
-			value |= SOR_DP_LINKCTL_ENHANCED_FRAME;
+	if (link.capabilities & DP_LINK_CAP_ENHANCED_FRAMING)
+		value |= SOR_DP_LINKCTL_ENHANCED_FRAME;
 
-		tegra_sor_writel(sor, value, SOR_DP_LINKCTL0);
+	tegra_sor_writel(sor, value, SOR_DP_LINKCTL0);
 
-		/* disable training pattern generator */
+	/* disable training pattern generator */
 
-		for (i = 0; i < link.num_lanes; i++) {
-			unsigned long lane = SOR_DP_TPG_CHANNEL_CODING |
-					     SOR_DP_TPG_SCRAMBLER_GALIOS |
-					     SOR_DP_TPG_PATTERN_NONE;
-			value = (value << 8) | lane;
-		}
+	for (i = 0; i < link.num_lanes; i++) {
+		unsigned long lane = SOR_DP_TPG_CHANNEL_CODING |
+				     SOR_DP_TPG_SCRAMBLER_GALIOS |
+				     SOR_DP_TPG_PATTERN_NONE;
+		value = (value << 8) | lane;
+	}
 
-		tegra_sor_writel(sor, value, SOR_DP_TPG);
+	tegra_sor_writel(sor, value, SOR_DP_TPG);
 
-		err = tegra_sor_dp_train_fast(sor, &link);
-		if (err < 0) {
-			dev_err(sor->dev, "DP fast link training failed: %d\n",
-				err);
-		}
+	err = tegra_sor_dp_train_fast(sor, &link);
+	if (err < 0)
+		dev_err(sor->dev, "DP fast link training failed: %d\n", err);
 
-		dev_dbg(sor->dev, "fast link training succeeded\n");
-	}
+	dev_dbg(sor->dev, "fast link training succeeded\n");
 
 	err = tegra_sor_power_up(sor, 250);
 	if (err < 0)
@@ -1961,9 +1946,9 @@ static void tegra_sor_hdmi_enable(struct drm_encoder *encoder)
 
 	/* production settings */
 	settings = tegra_sor_hdmi_find_settings(sor, mode->clock * 1000);
-	if (IS_ERR(settings)) {
-		dev_err(sor->dev, "no settings for pixel clock %d Hz: %ld\n",
-			mode->clock * 1000, PTR_ERR(settings));
+	if (!settings) {
+		dev_err(sor->dev, "no settings for pixel clock %d Hz\n",
+			mode->clock * 1000);
 		return;
 	}
 
@@ -2148,7 +2133,7 @@ static int tegra_sor_init(struct host1x_client *client)
 	int encoder = DRM_MODE_ENCODER_NONE;
 	int err;
 
-	if (!sor->dpaux) {
+	if (!sor->aux) {
 		if (sor->soc->supports_hdmi) {
 			connector = DRM_MODE_CONNECTOR_HDMIA;
 			encoder = DRM_MODE_ENCODER_TMDS;
@@ -2178,7 +2163,7 @@ static int tegra_sor_init(struct host1x_client *client)
 	sor->output.connector.dpms = DRM_MODE_DPMS_OFF;
 
 	drm_encoder_init(drm, &sor->output.encoder, &tegra_sor_encoder_funcs,
-			 encoder);
+			 encoder, NULL);
 	drm_encoder_helper_add(&sor->output.encoder, helpers);
 
 	drm_mode_connector_attach_encoder(&sor->output.connector,
@@ -2199,8 +2184,8 @@ static int tegra_sor_init(struct host1x_client *client)
 			dev_err(sor->dev, "debugfs setup failed: %d\n", err);
 	}
 
-	if (sor->dpaux) {
-		err = tegra_dpaux_attach(sor->dpaux, &sor->output);
+	if (sor->aux) {
+		err = drm_dp_aux_attach(sor->aux, &sor->output);
 		if (err < 0) {
 			dev_err(sor->dev, "failed to attach DP: %d\n", err);
 			return err;
@@ -2249,8 +2234,8 @@ static int tegra_sor_exit(struct host1x_client *client)
 
 	tegra_output_exit(&sor->output);
 
-	if (sor->dpaux) {
-		err = tegra_dpaux_detach(sor->dpaux);
+	if (sor->aux) {
+		err = drm_dp_aux_detach(sor->aux);
 		if (err < 0) {
 			dev_err(sor->dev, "failed to detach DP: %d\n", err);
 			return err;
@@ -2399,14 +2384,14 @@ static int tegra_sor_probe(struct platform_device *pdev)
 
 	np = of_parse_phandle(pdev->dev.of_node, "nvidia,dpaux", 0);
 	if (np) {
-		sor->dpaux = tegra_dpaux_find_by_of_node(np);
+		sor->aux = drm_dp_aux_find_by_of_node(np);
 		of_node_put(np);
 
-		if (!sor->dpaux)
+		if (!sor->aux)
 			return -EPROBE_DEFER;
 	}
 
-	if (!sor->dpaux) {
+	if (!sor->aux) {
 		if (sor->soc->supports_hdmi) {
 			sor->ops = &tegra_sor_hdmi_ops;
 		} else if (sor->soc->supports_lvds) {
diff --git a/drivers/gpu/drm/tilcdc/tilcdc_panel.c b/drivers/gpu/drm/tilcdc/tilcdc_panel.c
index 0af8bed7ce1e..4dda6e2f464b 100644
--- a/drivers/gpu/drm/tilcdc/tilcdc_panel.c
+++ b/drivers/gpu/drm/tilcdc/tilcdc_panel.c
@@ -128,7 +128,7 @@ static struct drm_encoder *panel_encoder_create(struct drm_device *dev,
 	encoder->possible_crtcs = 1;
 
 	ret = drm_encoder_init(dev, encoder, &panel_encoder_funcs,
-			DRM_MODE_ENCODER_LVDS);
+			DRM_MODE_ENCODER_LVDS, NULL);
 	if (ret < 0)
 		goto fail;
 
diff --git a/drivers/gpu/drm/tilcdc/tilcdc_tfp410.c b/drivers/gpu/drm/tilcdc/tilcdc_tfp410.c
index 354c47ca6374..5052a8af7ecb 100644
--- a/drivers/gpu/drm/tilcdc/tilcdc_tfp410.c
+++ b/drivers/gpu/drm/tilcdc/tilcdc_tfp410.c
@@ -138,7 +138,7 @@ static struct drm_encoder *tfp410_encoder_create(struct drm_device *dev,
 	encoder->possible_crtcs = 1;
 
 	ret = drm_encoder_init(dev, encoder, &tfp410_encoder_funcs,
-			DRM_MODE_ENCODER_TMDS);
+			DRM_MODE_ENCODER_TMDS, NULL);
 	if (ret < 0)
 		goto fail;
 
diff --git a/drivers/gpu/drm/udl/udl_connector.c b/drivers/gpu/drm/udl/udl_connector.c
index 0110d95522f3..4709b54c204c 100644
--- a/drivers/gpu/drm/udl/udl_connector.c
+++ b/drivers/gpu/drm/udl/udl_connector.c
@@ -122,13 +122,13 @@ static void udl_connector_destroy(struct drm_connector *connector)
 	kfree(connector);
 }
 
-static struct drm_connector_helper_funcs udl_connector_helper_funcs = {
+static const struct drm_connector_helper_funcs udl_connector_helper_funcs = {
 	.get_modes = udl_get_modes,
 	.mode_valid = udl_mode_valid,
 	.best_encoder = udl_best_single_encoder,
 };
 
-static struct drm_connector_funcs udl_connector_funcs = {
+static const struct drm_connector_funcs udl_connector_funcs = {
 	.dpms = drm_helper_connector_dpms,
 	.detect = udl_detect,
 	.fill_modes = drm_helper_probe_single_connector_modes,
diff --git a/drivers/gpu/drm/udl/udl_encoder.c b/drivers/gpu/drm/udl/udl_encoder.c
index 4052c4656498..a181a647fcf9 100644
--- a/drivers/gpu/drm/udl/udl_encoder.c
+++ b/drivers/gpu/drm/udl/udl_encoder.c
@@ -73,7 +73,8 @@ struct drm_encoder *udl_encoder_init(struct drm_device *dev)
 	if (!encoder)
 		return NULL;
 
-	drm_encoder_init(dev, encoder, &udl_enc_funcs, DRM_MODE_ENCODER_TMDS);
+	drm_encoder_init(dev, encoder, &udl_enc_funcs, DRM_MODE_ENCODER_TMDS,
+			 NULL);
 	drm_encoder_helper_add(encoder, &udl_helper_funcs);
 	encoder->possible_crtcs = 1;
 	return encoder;
diff --git a/drivers/gpu/drm/udl/udl_modeset.c b/drivers/gpu/drm/udl/udl_modeset.c
index 677190a65e82..160ef2a08b89 100644
--- a/drivers/gpu/drm/udl/udl_modeset.c
+++ b/drivers/gpu/drm/udl/udl_modeset.c
@@ -400,7 +400,7 @@ static void udl_crtc_commit(struct drm_crtc *crtc)
 	udl_crtc_dpms(crtc, DRM_MODE_DPMS_ON);
 }
 
-static struct drm_crtc_helper_funcs udl_helper_funcs = {
+static const struct drm_crtc_helper_funcs udl_helper_funcs = {
 	.dpms = udl_crtc_dpms,
 	.mode_fixup = udl_crtc_mode_fixup,
 	.mode_set = udl_crtc_mode_set,
diff --git a/drivers/gpu/drm/vc4/Makefile b/drivers/gpu/drm/vc4/Makefile
index 32b4f9cd8f52..4c6a99f0398c 100644
--- a/drivers/gpu/drm/vc4/Makefile
+++ b/drivers/gpu/drm/vc4/Makefile
@@ -8,10 +8,19 @@ vc4-y := \
 	vc4_crtc.o \
 	vc4_drv.o \
 	vc4_kms.o \
+	vc4_gem.o \
 	vc4_hdmi.o \
 	vc4_hvs.o \
-	vc4_plane.o
+	vc4_irq.o \
+	vc4_plane.o \
+	vc4_render_cl.o \
+	vc4_trace_points.o \
+	vc4_v3d.o \
+	vc4_validate.o \
+	vc4_validate_shaders.o
 
 vc4-$(CONFIG_DEBUG_FS) += vc4_debugfs.o
 
 obj-$(CONFIG_DRM_VC4)  += vc4.o
+
+CFLAGS_vc4_trace_points.o := -I$(src)
diff --git a/drivers/gpu/drm/vc4/vc4_bo.c b/drivers/gpu/drm/vc4/vc4_bo.c
index ab9f5108ae1a..18dfe3ec9a62 100644
--- a/drivers/gpu/drm/vc4/vc4_bo.c
+++ b/drivers/gpu/drm/vc4/vc4_bo.c
@@ -12,19 +12,236 @@
  * access to system memory with no MMU in between.  To support it, we
  * use the GEM CMA helper functions to allocate contiguous ranges of
  * physical memory for our BOs.
+ *
+ * Since the CMA allocator is very slow, we keep a cache of recently
+ * freed BOs around so that the kernel's allocation of objects for 3D
+ * rendering can return quickly.
  */
 
 #include "vc4_drv.h"
+#include "uapi/drm/vc4_drm.h"
+
+static void vc4_bo_stats_dump(struct vc4_dev *vc4)
+{
+	DRM_INFO("num bos allocated: %d\n",
+		 vc4->bo_stats.num_allocated);
+	DRM_INFO("size bos allocated: %dkb\n",
+		 vc4->bo_stats.size_allocated / 1024);
+	DRM_INFO("num bos used: %d\n",
+		 vc4->bo_stats.num_allocated - vc4->bo_stats.num_cached);
+	DRM_INFO("size bos used: %dkb\n",
+		 (vc4->bo_stats.size_allocated -
+		  vc4->bo_stats.size_cached) / 1024);
+	DRM_INFO("num bos cached: %d\n",
+		 vc4->bo_stats.num_cached);
+	DRM_INFO("size bos cached: %dkb\n",
+		 vc4->bo_stats.size_cached / 1024);
+}
+
+#ifdef CONFIG_DEBUG_FS
+int vc4_bo_stats_debugfs(struct seq_file *m, void *unused)
+{
+	struct drm_info_node *node = (struct drm_info_node *)m->private;
+	struct drm_device *dev = node->minor->dev;
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
+	struct vc4_bo_stats stats;
+
+	/* Take a snapshot of the current stats with the lock held. */
+	mutex_lock(&vc4->bo_lock);
+	stats = vc4->bo_stats;
+	mutex_unlock(&vc4->bo_lock);
+
+	seq_printf(m, "num bos allocated: %d\n",
+		   stats.num_allocated);
+	seq_printf(m, "size bos allocated: %dkb\n",
+		   stats.size_allocated / 1024);
+	seq_printf(m, "num bos used: %d\n",
+		   stats.num_allocated - stats.num_cached);
+	seq_printf(m, "size bos used: %dkb\n",
+		   (stats.size_allocated - stats.size_cached) / 1024);
+	seq_printf(m, "num bos cached: %d\n",
+		   stats.num_cached);
+	seq_printf(m, "size bos cached: %dkb\n",
+		   stats.size_cached / 1024);
+
+	return 0;
+}
+#endif
+
+static uint32_t bo_page_index(size_t size)
+{
+	return (size / PAGE_SIZE) - 1;
+}
+
+/* Must be called with bo_lock held. */
+static void vc4_bo_destroy(struct vc4_bo *bo)
+{
+	struct drm_gem_object *obj = &bo->base.base;
+	struct vc4_dev *vc4 = to_vc4_dev(obj->dev);
+
+	if (bo->validated_shader) {
+		kfree(bo->validated_shader->texture_samples);
+		kfree(bo->validated_shader);
+		bo->validated_shader = NULL;
+	}
+
+	vc4->bo_stats.num_allocated--;
+	vc4->bo_stats.size_allocated -= obj->size;
+	drm_gem_cma_free_object(obj);
+}
+
+/* Must be called with bo_lock held. */
+static void vc4_bo_remove_from_cache(struct vc4_bo *bo)
+{
+	struct drm_gem_object *obj = &bo->base.base;
+	struct vc4_dev *vc4 = to_vc4_dev(obj->dev);
+
+	vc4->bo_stats.num_cached--;
+	vc4->bo_stats.size_cached -= obj->size;
+
+	list_del(&bo->unref_head);
+	list_del(&bo->size_head);
+}
+
+static struct list_head *vc4_get_cache_list_for_size(struct drm_device *dev,
+						     size_t size)
+{
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
+	uint32_t page_index = bo_page_index(size);
+
+	if (vc4->bo_cache.size_list_size <= page_index) {
+		uint32_t new_size = max(vc4->bo_cache.size_list_size * 2,
+					page_index + 1);
+		struct list_head *new_list;
+		uint32_t i;
+
+		new_list = kmalloc_array(new_size, sizeof(struct list_head),
+					 GFP_KERNEL);
+		if (!new_list)
+			return NULL;
+
+		/* Rebase the old cached BO lists to their new list
+		 * head locations.
+		 */
+		for (i = 0; i < vc4->bo_cache.size_list_size; i++) {
+			struct list_head *old_list =
+				&vc4->bo_cache.size_list[i];
+
+			if (list_empty(old_list))
+				INIT_LIST_HEAD(&new_list[i]);
+			else
+				list_replace(old_list, &new_list[i]);
+		}
+		/* And initialize the brand new BO list heads. */
+		for (i = vc4->bo_cache.size_list_size; i < new_size; i++)
+			INIT_LIST_HEAD(&new_list[i]);
+
+		kfree(vc4->bo_cache.size_list);
+		vc4->bo_cache.size_list = new_list;
+		vc4->bo_cache.size_list_size = new_size;
+	}
+
+	return &vc4->bo_cache.size_list[page_index];
+}
+
+void vc4_bo_cache_purge(struct drm_device *dev)
+{
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
+
+	mutex_lock(&vc4->bo_lock);
+	while (!list_empty(&vc4->bo_cache.time_list)) {
+		struct vc4_bo *bo = list_last_entry(&vc4->bo_cache.time_list,
+						    struct vc4_bo, unref_head);
+		vc4_bo_remove_from_cache(bo);
+		vc4_bo_destroy(bo);
+	}
+	mutex_unlock(&vc4->bo_lock);
+}
+
+static struct vc4_bo *vc4_bo_get_from_cache(struct drm_device *dev,
+					    uint32_t size)
+{
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
+	uint32_t page_index = bo_page_index(size);
+	struct vc4_bo *bo = NULL;
+
+	size = roundup(size, PAGE_SIZE);
+
+	mutex_lock(&vc4->bo_lock);
+	if (page_index >= vc4->bo_cache.size_list_size)
+		goto out;
 
-struct vc4_bo *vc4_bo_create(struct drm_device *dev, size_t size)
+	if (list_empty(&vc4->bo_cache.size_list[page_index]))
+		goto out;
+
+	bo = list_first_entry(&vc4->bo_cache.size_list[page_index],
+			      struct vc4_bo, size_head);
+	vc4_bo_remove_from_cache(bo);
+	kref_init(&bo->base.base.refcount);
+
+out:
+	mutex_unlock(&vc4->bo_lock);
+	return bo;
+}
+
+/**
+ * vc4_gem_create_object - Implementation of driver->gem_create_object.
+ *
+ * This lets the CMA helpers allocate object structs for us, and keep
+ * our BO stats correct.
+ */
+struct drm_gem_object *vc4_create_object(struct drm_device *dev, size_t size)
 {
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
+	struct vc4_bo *bo;
+
+	bo = kzalloc(sizeof(*bo), GFP_KERNEL);
+	if (!bo)
+		return ERR_PTR(-ENOMEM);
+
+	mutex_lock(&vc4->bo_lock);
+	vc4->bo_stats.num_allocated++;
+	vc4->bo_stats.size_allocated += size;
+	mutex_unlock(&vc4->bo_lock);
+
+	return &bo->base.base;
+}
+
+struct vc4_bo *vc4_bo_create(struct drm_device *dev, size_t unaligned_size,
+			     bool from_cache)
+{
+	size_t size = roundup(unaligned_size, PAGE_SIZE);
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
 	struct drm_gem_cma_object *cma_obj;
 
-	cma_obj = drm_gem_cma_create(dev, size);
-	if (IS_ERR(cma_obj))
+	if (size == 0)
 		return NULL;
-	else
-		return to_vc4_bo(&cma_obj->base);
+
+	/* First, try to get a vc4_bo from the kernel BO cache. */
+	if (from_cache) {
+		struct vc4_bo *bo = vc4_bo_get_from_cache(dev, size);
+
+		if (bo)
+			return bo;
+	}
+
+	cma_obj = drm_gem_cma_create(dev, size);
+	if (IS_ERR(cma_obj)) {
+		/*
+		 * If we've run out of CMA memory, kill the cache of
+		 * CMA allocations we've got laying around and try again.
+		 */
+		vc4_bo_cache_purge(dev);
+
+		cma_obj = drm_gem_cma_create(dev, size);
+		if (IS_ERR(cma_obj)) {
+			DRM_ERROR("Failed to allocate from CMA:\n");
+			vc4_bo_stats_dump(vc4);
+			return NULL;
+		}
+	}
+
+	return to_vc4_bo(&cma_obj->base);
 }
 
 int vc4_dumb_create(struct drm_file *file_priv,
@@ -41,7 +258,191 @@ int vc4_dumb_create(struct drm_file *file_priv,
 	if (args->size < args->pitch * args->height)
 		args->size = args->pitch * args->height;
 
-	bo = vc4_bo_create(dev, roundup(args->size, PAGE_SIZE));
+	bo = vc4_bo_create(dev, args->size, false);
+	if (!bo)
+		return -ENOMEM;
+
+	ret = drm_gem_handle_create(file_priv, &bo->base.base, &args->handle);
+	drm_gem_object_unreference_unlocked(&bo->base.base);
+
+	return ret;
+}
+
+/* Must be called with bo_lock held. */
+static void vc4_bo_cache_free_old(struct drm_device *dev)
+{
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
+	unsigned long expire_time = jiffies - msecs_to_jiffies(1000);
+
+	while (!list_empty(&vc4->bo_cache.time_list)) {
+		struct vc4_bo *bo = list_last_entry(&vc4->bo_cache.time_list,
+						    struct vc4_bo, unref_head);
+		if (time_before(expire_time, bo->free_time)) {
+			mod_timer(&vc4->bo_cache.time_timer,
+				  round_jiffies_up(jiffies +
+						   msecs_to_jiffies(1000)));
+			return;
+		}
+
+		vc4_bo_remove_from_cache(bo);
+		vc4_bo_destroy(bo);
+	}
+}
+
+/* Called on the last userspace/kernel unreference of the BO.  Returns
+ * it to the BO cache if possible, otherwise frees it.
+ *
+ * Note that this is called with the struct_mutex held.
+ */
+void vc4_free_object(struct drm_gem_object *gem_bo)
+{
+	struct drm_device *dev = gem_bo->dev;
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
+	struct vc4_bo *bo = to_vc4_bo(gem_bo);
+	struct list_head *cache_list;
+
+	mutex_lock(&vc4->bo_lock);
+	/* If the object references someone else's memory, we can't cache it.
+	 */
+	if (gem_bo->import_attach) {
+		vc4_bo_destroy(bo);
+		goto out;
+	}
+
+	/* Don't cache if it was publicly named. */
+	if (gem_bo->name) {
+		vc4_bo_destroy(bo);
+		goto out;
+	}
+
+	cache_list = vc4_get_cache_list_for_size(dev, gem_bo->size);
+	if (!cache_list) {
+		vc4_bo_destroy(bo);
+		goto out;
+	}
+
+	if (bo->validated_shader) {
+		kfree(bo->validated_shader->texture_samples);
+		kfree(bo->validated_shader);
+		bo->validated_shader = NULL;
+	}
+
+	bo->free_time = jiffies;
+	list_add(&bo->size_head, cache_list);
+	list_add(&bo->unref_head, &vc4->bo_cache.time_list);
+
+	vc4->bo_stats.num_cached++;
+	vc4->bo_stats.size_cached += gem_bo->size;
+
+	vc4_bo_cache_free_old(dev);
+
+out:
+	mutex_unlock(&vc4->bo_lock);
+}
+
+static void vc4_bo_cache_time_work(struct work_struct *work)
+{
+	struct vc4_dev *vc4 =
+		container_of(work, struct vc4_dev, bo_cache.time_work);
+	struct drm_device *dev = vc4->dev;
+
+	mutex_lock(&vc4->bo_lock);
+	vc4_bo_cache_free_old(dev);
+	mutex_unlock(&vc4->bo_lock);
+}
+
+static void vc4_bo_cache_time_timer(unsigned long data)
+{
+	struct drm_device *dev = (struct drm_device *)data;
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
+
+	schedule_work(&vc4->bo_cache.time_work);
+}
+
+struct dma_buf *
+vc4_prime_export(struct drm_device *dev, struct drm_gem_object *obj, int flags)
+{
+	struct vc4_bo *bo = to_vc4_bo(obj);
+
+	if (bo->validated_shader) {
+		DRM_ERROR("Attempting to export shader BO\n");
+		return ERR_PTR(-EINVAL);
+	}
+
+	return drm_gem_prime_export(dev, obj, flags);
+}
+
+int vc4_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+	struct drm_gem_object *gem_obj;
+	struct vc4_bo *bo;
+	int ret;
+
+	ret = drm_gem_mmap(filp, vma);
+	if (ret)
+		return ret;
+
+	gem_obj = vma->vm_private_data;
+	bo = to_vc4_bo(gem_obj);
+
+	if (bo->validated_shader && (vma->vm_flags & VM_WRITE)) {
+		DRM_ERROR("mmaping of shader BOs for writing not allowed.\n");
+		return -EINVAL;
+	}
+
+	/*
+	 * Clear the VM_PFNMAP flag that was set by drm_gem_mmap(), and set the
+	 * vm_pgoff (used as a fake buffer offset by DRM) to 0 as we want to map
+	 * the whole buffer.
+	 */
+	vma->vm_flags &= ~VM_PFNMAP;
+	vma->vm_pgoff = 0;
+
+	ret = dma_mmap_writecombine(bo->base.base.dev->dev, vma,
+				    bo->base.vaddr, bo->base.paddr,
+				    vma->vm_end - vma->vm_start);
+	if (ret)
+		drm_gem_vm_close(vma);
+
+	return ret;
+}
+
+int vc4_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma)
+{
+	struct vc4_bo *bo = to_vc4_bo(obj);
+
+	if (bo->validated_shader && (vma->vm_flags & VM_WRITE)) {
+		DRM_ERROR("mmaping of shader BOs for writing not allowed.\n");
+		return -EINVAL;
+	}
+
+	return drm_gem_cma_prime_mmap(obj, vma);
+}
+
+void *vc4_prime_vmap(struct drm_gem_object *obj)
+{
+	struct vc4_bo *bo = to_vc4_bo(obj);
+
+	if (bo->validated_shader) {
+		DRM_ERROR("mmaping of shader BOs not allowed.\n");
+		return ERR_PTR(-EINVAL);
+	}
+
+	return drm_gem_cma_prime_vmap(obj);
+}
+
+int vc4_create_bo_ioctl(struct drm_device *dev, void *data,
+			struct drm_file *file_priv)
+{
+	struct drm_vc4_create_bo *args = data;
+	struct vc4_bo *bo = NULL;
+	int ret;
+
+	/*
+	 * We can't allocate from the BO cache, because the BOs don't
+	 * get zeroed, and that might leak data between users.
+	 */
+	bo = vc4_bo_create(dev, args->size, false);
 	if (!bo)
 		return -ENOMEM;
 
@@ -50,3 +451,107 @@ int vc4_dumb_create(struct drm_file *file_priv,
 
 	return ret;
 }
+
+int vc4_mmap_bo_ioctl(struct drm_device *dev, void *data,
+		      struct drm_file *file_priv)
+{
+	struct drm_vc4_mmap_bo *args = data;
+	struct drm_gem_object *gem_obj;
+
+	gem_obj = drm_gem_object_lookup(dev, file_priv, args->handle);
+	if (!gem_obj) {
+		DRM_ERROR("Failed to look up GEM BO %d\n", args->handle);
+		return -EINVAL;
+	}
+
+	/* The mmap offset was set up at BO allocation time. */
+	args->offset = drm_vma_node_offset_addr(&gem_obj->vma_node);
+
+	drm_gem_object_unreference_unlocked(gem_obj);
+	return 0;
+}
+
+int
+vc4_create_shader_bo_ioctl(struct drm_device *dev, void *data,
+			   struct drm_file *file_priv)
+{
+	struct drm_vc4_create_shader_bo *args = data;
+	struct vc4_bo *bo = NULL;
+	int ret;
+
+	if (args->size == 0)
+		return -EINVAL;
+
+	if (args->size % sizeof(u64) != 0)
+		return -EINVAL;
+
+	if (args->flags != 0) {
+		DRM_INFO("Unknown flags set: 0x%08x\n", args->flags);
+		return -EINVAL;
+	}
+
+	if (args->pad != 0) {
+		DRM_INFO("Pad set: 0x%08x\n", args->pad);
+		return -EINVAL;
+	}
+
+	bo = vc4_bo_create(dev, args->size, true);
+	if (!bo)
+		return -ENOMEM;
+
+	ret = copy_from_user(bo->base.vaddr,
+			     (void __user *)(uintptr_t)args->data,
+			     args->size);
+	if (ret != 0)
+		goto fail;
+	/* Clear the rest of the memory from allocating from the BO
+	 * cache.
+	 */
+	memset(bo->base.vaddr + args->size, 0,
+	       bo->base.base.size - args->size);
+
+	bo->validated_shader = vc4_validate_shader(&bo->base);
+	if (!bo->validated_shader) {
+		ret = -EINVAL;
+		goto fail;
+	}
+
+	/* We have to create the handle after validation, to avoid
+	 * races for users to do doing things like mmap the shader BO.
+	 */
+	ret = drm_gem_handle_create(file_priv, &bo->base.base, &args->handle);
+
+ fail:
+	drm_gem_object_unreference_unlocked(&bo->base.base);
+
+	return ret;
+}
+
+void vc4_bo_cache_init(struct drm_device *dev)
+{
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
+
+	mutex_init(&vc4->bo_lock);
+
+	INIT_LIST_HEAD(&vc4->bo_cache.time_list);
+
+	INIT_WORK(&vc4->bo_cache.time_work, vc4_bo_cache_time_work);
+	setup_timer(&vc4->bo_cache.time_timer,
+		    vc4_bo_cache_time_timer,
+		    (unsigned long)dev);
+}
+
+void vc4_bo_cache_destroy(struct drm_device *dev)
+{
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
+
+	del_timer(&vc4->bo_cache.time_timer);
+	cancel_work_sync(&vc4->bo_cache.time_work);
+
+	vc4_bo_cache_purge(dev);
+
+	if (vc4->bo_stats.num_allocated) {
+		DRM_ERROR("Destroying BO cache while BOs still allocated:\n");
+		vc4_bo_stats_dump(vc4);
+	}
+}
diff --git a/drivers/gpu/drm/vc4/vc4_crtc.c b/drivers/gpu/drm/vc4/vc4_crtc.c
index 265064c62d49..8d0d70e51ef2 100644
--- a/drivers/gpu/drm/vc4/vc4_crtc.c
+++ b/drivers/gpu/drm/vc4/vc4_crtc.c
@@ -35,6 +35,7 @@
 #include "drm_atomic_helper.h"
 #include "drm_crtc_helper.h"
 #include "linux/clk.h"
+#include "drm_fb_cma_helper.h"
 #include "linux/component.h"
 #include "linux/of_device.h"
 #include "vc4_drv.h"
@@ -476,10 +477,106 @@ static irqreturn_t vc4_crtc_irq_handler(int irq, void *data)
 	return ret;
 }
 
+struct vc4_async_flip_state {
+	struct drm_crtc *crtc;
+	struct drm_framebuffer *fb;
+	struct drm_pending_vblank_event *event;
+
+	struct vc4_seqno_cb cb;
+};
+
+/* Called when the V3D execution for the BO being flipped to is done, so that
+ * we can actually update the plane's address to point to it.
+ */
+static void
+vc4_async_page_flip_complete(struct vc4_seqno_cb *cb)
+{
+	struct vc4_async_flip_state *flip_state =
+		container_of(cb, struct vc4_async_flip_state, cb);
+	struct drm_crtc *crtc = flip_state->crtc;
+	struct drm_device *dev = crtc->dev;
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
+	struct drm_plane *plane = crtc->primary;
+
+	vc4_plane_async_set_fb(plane, flip_state->fb);
+	if (flip_state->event) {
+		unsigned long flags;
+
+		spin_lock_irqsave(&dev->event_lock, flags);
+		drm_crtc_send_vblank_event(crtc, flip_state->event);
+		spin_unlock_irqrestore(&dev->event_lock, flags);
+	}
+
+	drm_framebuffer_unreference(flip_state->fb);
+	kfree(flip_state);
+
+	up(&vc4->async_modeset);
+}
+
+/* Implements async (non-vblank-synced) page flips.
+ *
+ * The page flip ioctl needs to return immediately, so we grab the
+ * modeset semaphore on the pipe, and queue the address update for
+ * when V3D is done with the BO being flipped to.
+ */
+static int vc4_async_page_flip(struct drm_crtc *crtc,
+			       struct drm_framebuffer *fb,
+			       struct drm_pending_vblank_event *event,
+			       uint32_t flags)
+{
+	struct drm_device *dev = crtc->dev;
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
+	struct drm_plane *plane = crtc->primary;
+	int ret = 0;
+	struct vc4_async_flip_state *flip_state;
+	struct drm_gem_cma_object *cma_bo = drm_fb_cma_get_gem_obj(fb, 0);
+	struct vc4_bo *bo = to_vc4_bo(&cma_bo->base);
+
+	flip_state = kzalloc(sizeof(*flip_state), GFP_KERNEL);
+	if (!flip_state)
+		return -ENOMEM;
+
+	drm_framebuffer_reference(fb);
+	flip_state->fb = fb;
+	flip_state->crtc = crtc;
+	flip_state->event = event;
+
+	/* Make sure all other async modesetes have landed. */
+	ret = down_interruptible(&vc4->async_modeset);
+	if (ret) {
+		kfree(flip_state);
+		return ret;
+	}
+
+	/* Immediately update the plane's legacy fb pointer, so that later
+	 * modeset prep sees the state that will be present when the semaphore
+	 * is released.
+	 */
+	drm_atomic_set_fb_for_plane(plane->state, fb);
+	plane->fb = fb;
+
+	vc4_queue_seqno_cb(dev, &flip_state->cb, bo->seqno,
+			   vc4_async_page_flip_complete);
+
+	/* Driver takes ownership of state on successful async commit. */
+	return 0;
+}
+
+static int vc4_page_flip(struct drm_crtc *crtc,
+			 struct drm_framebuffer *fb,
+			 struct drm_pending_vblank_event *event,
+			 uint32_t flags)
+{
+	if (flags & DRM_MODE_PAGE_FLIP_ASYNC)
+		return vc4_async_page_flip(crtc, fb, event, flags);
+	else
+		return drm_atomic_helper_page_flip(crtc, fb, event, flags);
+}
+
 static const struct drm_crtc_funcs vc4_crtc_funcs = {
 	.set_config = drm_atomic_helper_set_config,
 	.destroy = vc4_crtc_destroy,
-	.page_flip = drm_atomic_helper_page_flip,
+	.page_flip = vc4_page_flip,
 	.set_property = NULL,
 	.cursor_set = NULL, /* handled by drm_mode_cursor_universal */
 	.cursor_move = NULL, /* handled by drm_mode_cursor_universal */
@@ -606,7 +703,7 @@ static int vc4_crtc_bind(struct device *dev, struct device *master, void *data)
 	}
 
 	drm_crtc_init_with_planes(drm, crtc, primary_plane, cursor_plane,
-				  &vc4_crtc_funcs);
+				  &vc4_crtc_funcs, NULL);
 	drm_crtc_helper_add(crtc, &vc4_crtc_helper_funcs);
 	primary_plane->crtc = crtc;
 	cursor_plane->crtc = crtc;
diff --git a/drivers/gpu/drm/vc4/vc4_debugfs.c b/drivers/gpu/drm/vc4/vc4_debugfs.c
index 4297b0a5b74e..d76ad10b07fd 100644
--- a/drivers/gpu/drm/vc4/vc4_debugfs.c
+++ b/drivers/gpu/drm/vc4/vc4_debugfs.c
@@ -16,11 +16,14 @@
 #include "vc4_regs.h"
 
 static const struct drm_info_list vc4_debugfs_list[] = {
+	{"bo_stats", vc4_bo_stats_debugfs, 0},
 	{"hdmi_regs", vc4_hdmi_debugfs_regs, 0},
 	{"hvs_regs", vc4_hvs_debugfs_regs, 0},
 	{"crtc0_regs", vc4_crtc_debugfs_regs, 0, (void *)(uintptr_t)0},
 	{"crtc1_regs", vc4_crtc_debugfs_regs, 0, (void *)(uintptr_t)1},
 	{"crtc2_regs", vc4_crtc_debugfs_regs, 0, (void *)(uintptr_t)2},
+	{"v3d_ident", vc4_v3d_debugfs_ident, 0},
+	{"v3d_regs", vc4_v3d_debugfs_regs, 0},
 };
 
 #define VC4_DEBUGFS_ENTRIES ARRAY_SIZE(vc4_debugfs_list)
diff --git a/drivers/gpu/drm/vc4/vc4_drv.c b/drivers/gpu/drm/vc4/vc4_drv.c
index d5db9e0f3b73..f1655fff8425 100644
--- a/drivers/gpu/drm/vc4/vc4_drv.c
+++ b/drivers/gpu/drm/vc4/vc4_drv.c
@@ -16,6 +16,7 @@
 #include <linux/platform_device.h>
 #include "drm_fb_cma_helper.h"
 
+#include "uapi/drm/vc4_drm.h"
 #include "vc4_drv.h"
 #include "vc4_regs.h"
 
@@ -63,7 +64,7 @@ static const struct file_operations vc4_drm_fops = {
 	.open = drm_open,
 	.release = drm_release,
 	.unlocked_ioctl = drm_ioctl,
-	.mmap = drm_gem_cma_mmap,
+	.mmap = vc4_mmap,
 	.poll = drm_poll,
 	.read = drm_read,
 #ifdef CONFIG_COMPAT
@@ -73,16 +74,30 @@ static const struct file_operations vc4_drm_fops = {
 };
 
 static const struct drm_ioctl_desc vc4_drm_ioctls[] = {
+	DRM_IOCTL_DEF_DRV(VC4_SUBMIT_CL, vc4_submit_cl_ioctl, 0),
+	DRM_IOCTL_DEF_DRV(VC4_WAIT_SEQNO, vc4_wait_seqno_ioctl, 0),
+	DRM_IOCTL_DEF_DRV(VC4_WAIT_BO, vc4_wait_bo_ioctl, 0),
+	DRM_IOCTL_DEF_DRV(VC4_CREATE_BO, vc4_create_bo_ioctl, 0),
+	DRM_IOCTL_DEF_DRV(VC4_MMAP_BO, vc4_mmap_bo_ioctl, 0),
+	DRM_IOCTL_DEF_DRV(VC4_CREATE_SHADER_BO, vc4_create_shader_bo_ioctl, 0),
+	DRM_IOCTL_DEF_DRV(VC4_GET_HANG_STATE, vc4_get_hang_state_ioctl,
+			  DRM_ROOT_ONLY),
 };
 
 static struct drm_driver vc4_drm_driver = {
 	.driver_features = (DRIVER_MODESET |
 			    DRIVER_ATOMIC |
 			    DRIVER_GEM |
+			    DRIVER_HAVE_IRQ |
 			    DRIVER_PRIME),
 	.lastclose = vc4_lastclose,
 	.preclose = vc4_drm_preclose,
 
+	.irq_handler = vc4_irq,
+	.irq_preinstall = vc4_irq_preinstall,
+	.irq_postinstall = vc4_irq_postinstall,
+	.irq_uninstall = vc4_irq_uninstall,
+
 	.enable_vblank = vc4_enable_vblank,
 	.disable_vblank = vc4_disable_vblank,
 	.get_vblank_counter = drm_vblank_count,
@@ -92,18 +107,19 @@ static struct drm_driver vc4_drm_driver = {
 	.debugfs_cleanup = vc4_debugfs_cleanup,
 #endif
 
-	.gem_free_object = drm_gem_cma_free_object,
+	.gem_create_object = vc4_create_object,
+	.gem_free_object = vc4_free_object,
 	.gem_vm_ops = &drm_gem_cma_vm_ops,
 
 	.prime_handle_to_fd = drm_gem_prime_handle_to_fd,
 	.prime_fd_to_handle = drm_gem_prime_fd_to_handle,
 	.gem_prime_import = drm_gem_prime_import,
-	.gem_prime_export = drm_gem_prime_export,
+	.gem_prime_export = vc4_prime_export,
 	.gem_prime_get_sg_table	= drm_gem_cma_prime_get_sg_table,
 	.gem_prime_import_sg_table = drm_gem_cma_prime_import_sg_table,
-	.gem_prime_vmap = drm_gem_cma_prime_vmap,
+	.gem_prime_vmap = vc4_prime_vmap,
 	.gem_prime_vunmap = drm_gem_cma_prime_vunmap,
-	.gem_prime_mmap = drm_gem_cma_prime_mmap,
+	.gem_prime_mmap = vc4_prime_mmap,
 
 	.dumb_create = vc4_dumb_create,
 	.dumb_map_offset = drm_gem_cma_dumb_map_offset,
@@ -168,15 +184,17 @@ static int vc4_drm_bind(struct device *dev)
 	vc4->dev = drm;
 	drm->dev_private = vc4;
 
-	drm_dev_set_unique(drm, dev_name(dev));
+	vc4_bo_cache_init(drm);
 
 	drm_mode_config_init(drm);
 	if (ret)
 		goto unref;
 
+	vc4_gem_init(drm);
+
 	ret = component_bind_all(dev, drm);
 	if (ret)
-		goto unref;
+		goto gem_destroy;
 
 	ret = drm_dev_register(drm, 0);
 	if (ret < 0)
@@ -200,8 +218,11 @@ unregister:
 	drm_dev_unregister(drm);
 unbind_all:
 	component_unbind_all(dev, drm);
+gem_destroy:
+	vc4_gem_destroy(drm);
 unref:
 	drm_dev_unref(drm);
+	vc4_bo_cache_destroy(drm);
 	return ret;
 }
 
@@ -228,6 +249,7 @@ static struct platform_driver *const component_drivers[] = {
 	&vc4_hdmi_driver,
 	&vc4_crtc_driver,
 	&vc4_hvs_driver,
+	&vc4_v3d_driver,
 };
 
 static int vc4_platform_drm_probe(struct platform_device *pdev)
diff --git a/drivers/gpu/drm/vc4/vc4_drv.h b/drivers/gpu/drm/vc4/vc4_drv.h
index fd8319fa682e..080865ec2bae 100644
--- a/drivers/gpu/drm/vc4/vc4_drv.h
+++ b/drivers/gpu/drm/vc4/vc4_drv.h
@@ -15,8 +15,89 @@ struct vc4_dev {
 	struct vc4_hdmi *hdmi;
 	struct vc4_hvs *hvs;
 	struct vc4_crtc *crtc[3];
+	struct vc4_v3d *v3d;
 
 	struct drm_fbdev_cma *fbdev;
+
+	struct vc4_hang_state *hang_state;
+
+	/* The kernel-space BO cache.  Tracks buffers that have been
+	 * unreferenced by all other users (refcounts of 0!) but not
+	 * yet freed, so we can do cheap allocations.
+	 */
+	struct vc4_bo_cache {
+		/* Array of list heads for entries in the BO cache,
+		 * based on number of pages, so we can do O(1) lookups
+		 * in the cache when allocating.
+		 */
+		struct list_head *size_list;
+		uint32_t size_list_size;
+
+		/* List of all BOs in the cache, ordered by age, so we
+		 * can do O(1) lookups when trying to free old
+		 * buffers.
+		 */
+		struct list_head time_list;
+		struct work_struct time_work;
+		struct timer_list time_timer;
+	} bo_cache;
+
+	struct vc4_bo_stats {
+		u32 num_allocated;
+		u32 size_allocated;
+		u32 num_cached;
+		u32 size_cached;
+	} bo_stats;
+
+	/* Protects bo_cache and the BO stats. */
+	struct mutex bo_lock;
+
+	/* Sequence number for the last job queued in job_list.
+	 * Starts at 0 (no jobs emitted).
+	 */
+	uint64_t emit_seqno;
+
+	/* Sequence number for the last completed job on the GPU.
+	 * Starts at 0 (no jobs completed).
+	 */
+	uint64_t finished_seqno;
+
+	/* List of all struct vc4_exec_info for jobs to be executed.
+	 * The first job in the list is the one currently programmed
+	 * into ct0ca/ct1ca for execution.
+	 */
+	struct list_head job_list;
+	/* List of the finished vc4_exec_infos waiting to be freed by
+	 * job_done_work.
+	 */
+	struct list_head job_done_list;
+	/* Spinlock used to synchronize the job_list and seqno
+	 * accesses between the IRQ handler and GEM ioctls.
+	 */
+	spinlock_t job_lock;
+	wait_queue_head_t job_wait_queue;
+	struct work_struct job_done_work;
+
+	/* List of struct vc4_seqno_cb for callbacks to be made from a
+	 * workqueue when the given seqno is passed.
+	 */
+	struct list_head seqno_cb_list;
+
+	/* The binner overflow memory that's currently set up in
+	 * BPOA/BPOS registers.  When overflow occurs and a new one is
+	 * allocated, the previous one will be moved to
+	 * vc4->current_exec's free list.
+	 */
+	struct vc4_bo *overflow_mem;
+	struct work_struct overflow_mem_work;
+
+	struct {
+		uint32_t last_ct0ca, last_ct1ca;
+		struct timer_list timer;
+		struct work_struct reset_work;
+	} hangcheck;
+
+	struct semaphore async_modeset;
 };
 
 static inline struct vc4_dev *
@@ -27,6 +108,25 @@ to_vc4_dev(struct drm_device *dev)
 
 struct vc4_bo {
 	struct drm_gem_cma_object base;
+
+	/* seqno of the last job to render to this BO. */
+	uint64_t seqno;
+
+	/* List entry for the BO's position in either
+	 * vc4_exec_info->unref_list or vc4_dev->bo_cache.time_list
+	 */
+	struct list_head unref_head;
+
+	/* Time in jiffies when the BO was put in vc4->bo_cache. */
+	unsigned long free_time;
+
+	/* List entry for the BO's position in vc4_dev->bo_cache.size_list */
+	struct list_head size_head;
+
+	/* Struct for shader validation state, if created by
+	 * DRM_IOCTL_VC4_CREATE_SHADER_BO.
+	 */
+	struct vc4_validated_shader_info *validated_shader;
 };
 
 static inline struct vc4_bo *
@@ -35,6 +135,17 @@ to_vc4_bo(struct drm_gem_object *bo)
 	return (struct vc4_bo *)bo;
 }
 
+struct vc4_seqno_cb {
+	struct work_struct work;
+	uint64_t seqno;
+	void (*func)(struct vc4_seqno_cb *cb);
+};
+
+struct vc4_v3d {
+	struct platform_device *pdev;
+	void __iomem *regs;
+};
+
 struct vc4_hvs {
 	struct platform_device *pdev;
 	void __iomem *regs;
@@ -72,9 +183,142 @@ to_vc4_encoder(struct drm_encoder *encoder)
 	return container_of(encoder, struct vc4_encoder, base);
 }
 
+#define V3D_READ(offset) readl(vc4->v3d->regs + offset)
+#define V3D_WRITE(offset, val) writel(val, vc4->v3d->regs + offset)
 #define HVS_READ(offset) readl(vc4->hvs->regs + offset)
 #define HVS_WRITE(offset, val) writel(val, vc4->hvs->regs + offset)
 
+struct vc4_exec_info {
+	/* Sequence number for this bin/render job. */
+	uint64_t seqno;
+
+	/* Kernel-space copy of the ioctl arguments */
+	struct drm_vc4_submit_cl *args;
+
+	/* This is the array of BOs that were looked up at the start of exec.
+	 * Command validation will use indices into this array.
+	 */
+	struct drm_gem_cma_object **bo;
+	uint32_t bo_count;
+
+	/* Pointers for our position in vc4->job_list */
+	struct list_head head;
+
+	/* List of other BOs used in the job that need to be released
+	 * once the job is complete.
+	 */
+	struct list_head unref_list;
+
+	/* Current unvalidated indices into @bo loaded by the non-hardware
+	 * VC4_PACKET_GEM_HANDLES.
+	 */
+	uint32_t bo_index[2];
+
+	/* This is the BO where we store the validated command lists, shader
+	 * records, and uniforms.
+	 */
+	struct drm_gem_cma_object *exec_bo;
+
+	/**
+	 * This tracks the per-shader-record state (packet 64) that
+	 * determines the length of the shader record and the offset
+	 * it's expected to be found at.  It gets read in from the
+	 * command lists.
+	 */
+	struct vc4_shader_state {
+		uint32_t addr;
+		/* Maximum vertex index referenced by any primitive using this
+		 * shader state.
+		 */
+		uint32_t max_index;
+	} *shader_state;
+
+	/** How many shader states the user declared they were using. */
+	uint32_t shader_state_size;
+	/** How many shader state records the validator has seen. */
+	uint32_t shader_state_count;
+
+	bool found_tile_binning_mode_config_packet;
+	bool found_start_tile_binning_packet;
+	bool found_increment_semaphore_packet;
+	bool found_flush;
+	uint8_t bin_tiles_x, bin_tiles_y;
+	struct drm_gem_cma_object *tile_bo;
+	uint32_t tile_alloc_offset;
+
+	/**
+	 * Computed addresses pointing into exec_bo where we start the
+	 * bin thread (ct0) and render thread (ct1).
+	 */
+	uint32_t ct0ca, ct0ea;
+	uint32_t ct1ca, ct1ea;
+
+	/* Pointer to the unvalidated bin CL (if present). */
+	void *bin_u;
+
+	/* Pointers to the shader recs.  These paddr gets incremented as CL
+	 * packets are relocated in validate_gl_shader_state, and the vaddrs
+	 * (u and v) get incremented and size decremented as the shader recs
+	 * themselves are validated.
+	 */
+	void *shader_rec_u;
+	void *shader_rec_v;
+	uint32_t shader_rec_p;
+	uint32_t shader_rec_size;
+
+	/* Pointers to the uniform data.  These pointers are incremented, and
+	 * size decremented, as each batch of uniforms is uploaded.
+	 */
+	void *uniforms_u;
+	void *uniforms_v;
+	uint32_t uniforms_p;
+	uint32_t uniforms_size;
+};
+
+static inline struct vc4_exec_info *
+vc4_first_job(struct vc4_dev *vc4)
+{
+	if (list_empty(&vc4->job_list))
+		return NULL;
+	return list_first_entry(&vc4->job_list, struct vc4_exec_info, head);
+}
+
+/**
+ * struct vc4_texture_sample_info - saves the offsets into the UBO for texture
+ * setup parameters.
+ *
+ * This will be used at draw time to relocate the reference to the texture
+ * contents in p0, and validate that the offset combined with
+ * width/height/stride/etc. from p1 and p2/p3 doesn't sample outside the BO.
+ * Note that the hardware treats unprovided config parameters as 0, so not all
+ * of them need to be set up for every texure sample, and we'll store ~0 as
+ * the offset to mark the unused ones.
+ *
+ * See the VC4 3D architecture guide page 41 ("Texture and Memory Lookup Unit
+ * Setup") for definitions of the texture parameters.
+ */
+struct vc4_texture_sample_info {
+	bool is_direct;
+	uint32_t p_offset[4];
+};
+
+/**
+ * struct vc4_validated_shader_info - information about validated shaders that
+ * needs to be used from command list validation.
+ *
+ * For a given shader, each time a shader state record references it, we need
+ * to verify that the shader doesn't read more uniforms than the shader state
+ * record's uniform BO pointer can provide, and we need to apply relocations
+ * and validate the shader state record's uniforms that define the texture
+ * samples.
+ */
+struct vc4_validated_shader_info {
+	uint32_t uniforms_size;
+	uint32_t uniforms_src_size;
+	uint32_t num_texture_samples;
+	struct vc4_texture_sample_info *texture_samples;
+};
+
 /**
  * _wait_for - magic (register) wait macro
  *
@@ -104,13 +348,29 @@ to_vc4_encoder(struct drm_encoder *encoder)
 #define wait_for(COND, MS) _wait_for(COND, MS, 1)
 
 /* vc4_bo.c */
+struct drm_gem_object *vc4_create_object(struct drm_device *dev, size_t size);
 void vc4_free_object(struct drm_gem_object *gem_obj);
-struct vc4_bo *vc4_bo_create(struct drm_device *dev, size_t size);
+struct vc4_bo *vc4_bo_create(struct drm_device *dev, size_t size,
+			     bool from_cache);
 int vc4_dumb_create(struct drm_file *file_priv,
 		    struct drm_device *dev,
 		    struct drm_mode_create_dumb *args);
 struct dma_buf *vc4_prime_export(struct drm_device *dev,
 				 struct drm_gem_object *obj, int flags);
+int vc4_create_bo_ioctl(struct drm_device *dev, void *data,
+			struct drm_file *file_priv);
+int vc4_create_shader_bo_ioctl(struct drm_device *dev, void *data,
+			       struct drm_file *file_priv);
+int vc4_mmap_bo_ioctl(struct drm_device *dev, void *data,
+		      struct drm_file *file_priv);
+int vc4_get_hang_state_ioctl(struct drm_device *dev, void *data,
+			     struct drm_file *file_priv);
+int vc4_mmap(struct file *filp, struct vm_area_struct *vma);
+int vc4_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma);
+void *vc4_prime_vmap(struct drm_gem_object *obj);
+void vc4_bo_cache_init(struct drm_device *dev);
+void vc4_bo_cache_destroy(struct drm_device *dev);
+int vc4_bo_stats_debugfs(struct seq_file *m, void *arg);
 
 /* vc4_crtc.c */
 extern struct platform_driver vc4_crtc_driver;
@@ -126,10 +386,34 @@ void vc4_debugfs_cleanup(struct drm_minor *minor);
 /* vc4_drv.c */
 void __iomem *vc4_ioremap_regs(struct platform_device *dev, int index);
 
+/* vc4_gem.c */
+void vc4_gem_init(struct drm_device *dev);
+void vc4_gem_destroy(struct drm_device *dev);
+int vc4_submit_cl_ioctl(struct drm_device *dev, void *data,
+			struct drm_file *file_priv);
+int vc4_wait_seqno_ioctl(struct drm_device *dev, void *data,
+			 struct drm_file *file_priv);
+int vc4_wait_bo_ioctl(struct drm_device *dev, void *data,
+		      struct drm_file *file_priv);
+void vc4_submit_next_job(struct drm_device *dev);
+int vc4_wait_for_seqno(struct drm_device *dev, uint64_t seqno,
+		       uint64_t timeout_ns, bool interruptible);
+void vc4_job_handle_completed(struct vc4_dev *vc4);
+int vc4_queue_seqno_cb(struct drm_device *dev,
+		       struct vc4_seqno_cb *cb, uint64_t seqno,
+		       void (*func)(struct vc4_seqno_cb *cb));
+
 /* vc4_hdmi.c */
 extern struct platform_driver vc4_hdmi_driver;
 int vc4_hdmi_debugfs_regs(struct seq_file *m, void *unused);
 
+/* vc4_irq.c */
+irqreturn_t vc4_irq(int irq, void *arg);
+void vc4_irq_preinstall(struct drm_device *dev);
+int vc4_irq_postinstall(struct drm_device *dev);
+void vc4_irq_uninstall(struct drm_device *dev);
+void vc4_irq_reset(struct drm_device *dev);
+
 /* vc4_hvs.c */
 extern struct platform_driver vc4_hvs_driver;
 void vc4_hvs_dump_state(struct drm_device *dev);
@@ -143,3 +427,35 @@ struct drm_plane *vc4_plane_init(struct drm_device *dev,
 				 enum drm_plane_type type);
 u32 vc4_plane_write_dlist(struct drm_plane *plane, u32 __iomem *dlist);
 u32 vc4_plane_dlist_size(struct drm_plane_state *state);
+void vc4_plane_async_set_fb(struct drm_plane *plane,
+			    struct drm_framebuffer *fb);
+
+/* vc4_v3d.c */
+extern struct platform_driver vc4_v3d_driver;
+int vc4_v3d_debugfs_ident(struct seq_file *m, void *unused);
+int vc4_v3d_debugfs_regs(struct seq_file *m, void *unused);
+int vc4_v3d_set_power(struct vc4_dev *vc4, bool on);
+
+/* vc4_validate.c */
+int
+vc4_validate_bin_cl(struct drm_device *dev,
+		    void *validated,
+		    void *unvalidated,
+		    struct vc4_exec_info *exec);
+
+int
+vc4_validate_shader_recs(struct drm_device *dev, struct vc4_exec_info *exec);
+
+struct drm_gem_cma_object *vc4_use_bo(struct vc4_exec_info *exec,
+				      uint32_t hindex);
+
+int vc4_get_rcl(struct drm_device *dev, struct vc4_exec_info *exec);
+
+bool vc4_check_tex_size(struct vc4_exec_info *exec,
+			struct drm_gem_cma_object *fbo,
+			uint32_t offset, uint8_t tiling_format,
+			uint32_t width, uint32_t height, uint8_t cpp);
+
+/* vc4_validate_shader.c */
+struct vc4_validated_shader_info *
+vc4_validate_shader(struct drm_gem_cma_object *shader_obj);
diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c
new file mode 100644
index 000000000000..48ce30a6f4b5
--- /dev/null
+++ b/drivers/gpu/drm/vc4/vc4_gem.c
@@ -0,0 +1,866 @@
+/*
+ * Copyright © 2014 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/device.h>
+#include <linux/io.h>
+
+#include "uapi/drm/vc4_drm.h"
+#include "vc4_drv.h"
+#include "vc4_regs.h"
+#include "vc4_trace.h"
+
+static void
+vc4_queue_hangcheck(struct drm_device *dev)
+{
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
+
+	mod_timer(&vc4->hangcheck.timer,
+		  round_jiffies_up(jiffies + msecs_to_jiffies(100)));
+}
+
+struct vc4_hang_state {
+	struct drm_vc4_get_hang_state user_state;
+
+	u32 bo_count;
+	struct drm_gem_object **bo;
+};
+
+static void
+vc4_free_hang_state(struct drm_device *dev, struct vc4_hang_state *state)
+{
+	unsigned int i;
+
+	mutex_lock(&dev->struct_mutex);
+	for (i = 0; i < state->user_state.bo_count; i++)
+		drm_gem_object_unreference(state->bo[i]);
+	mutex_unlock(&dev->struct_mutex);
+
+	kfree(state);
+}
+
+int
+vc4_get_hang_state_ioctl(struct drm_device *dev, void *data,
+			 struct drm_file *file_priv)
+{
+	struct drm_vc4_get_hang_state *get_state = data;
+	struct drm_vc4_get_hang_state_bo *bo_state;
+	struct vc4_hang_state *kernel_state;
+	struct drm_vc4_get_hang_state *state;
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
+	unsigned long irqflags;
+	u32 i;
+	int ret = 0;
+
+	spin_lock_irqsave(&vc4->job_lock, irqflags);
+	kernel_state = vc4->hang_state;
+	if (!kernel_state) {
+		spin_unlock_irqrestore(&vc4->job_lock, irqflags);
+		return -ENOENT;
+	}
+	state = &kernel_state->user_state;
+
+	/* If the user's array isn't big enough, just return the
+	 * required array size.
+	 */
+	if (get_state->bo_count < state->bo_count) {
+		get_state->bo_count = state->bo_count;
+		spin_unlock_irqrestore(&vc4->job_lock, irqflags);
+		return 0;
+	}
+
+	vc4->hang_state = NULL;
+	spin_unlock_irqrestore(&vc4->job_lock, irqflags);
+
+	/* Save the user's BO pointer, so we don't stomp it with the memcpy. */
+	state->bo = get_state->bo;
+	memcpy(get_state, state, sizeof(*state));
+
+	bo_state = kcalloc(state->bo_count, sizeof(*bo_state), GFP_KERNEL);
+	if (!bo_state) {
+		ret = -ENOMEM;
+		goto err_free;
+	}
+
+	for (i = 0; i < state->bo_count; i++) {
+		struct vc4_bo *vc4_bo = to_vc4_bo(kernel_state->bo[i]);
+		u32 handle;
+
+		ret = drm_gem_handle_create(file_priv, kernel_state->bo[i],
+					    &handle);
+
+		if (ret) {
+			state->bo_count = i - 1;
+			goto err;
+		}
+		bo_state[i].handle = handle;
+		bo_state[i].paddr = vc4_bo->base.paddr;
+		bo_state[i].size = vc4_bo->base.base.size;
+	}
+
+	if (copy_to_user((void __user *)(uintptr_t)get_state->bo,
+			 bo_state,
+			 state->bo_count * sizeof(*bo_state)))
+		ret = -EFAULT;
+
+	kfree(bo_state);
+
+err_free:
+
+	vc4_free_hang_state(dev, kernel_state);
+
+err:
+	return ret;
+}
+
+static void
+vc4_save_hang_state(struct drm_device *dev)
+{
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
+	struct drm_vc4_get_hang_state *state;
+	struct vc4_hang_state *kernel_state;
+	struct vc4_exec_info *exec;
+	struct vc4_bo *bo;
+	unsigned long irqflags;
+	unsigned int i, unref_list_count;
+
+	kernel_state = kcalloc(1, sizeof(*kernel_state), GFP_KERNEL);
+	if (!kernel_state)
+		return;
+
+	state = &kernel_state->user_state;
+
+	spin_lock_irqsave(&vc4->job_lock, irqflags);
+	exec = vc4_first_job(vc4);
+	if (!exec) {
+		spin_unlock_irqrestore(&vc4->job_lock, irqflags);
+		return;
+	}
+
+	unref_list_count = 0;
+	list_for_each_entry(bo, &exec->unref_list, unref_head)
+		unref_list_count++;
+
+	state->bo_count = exec->bo_count + unref_list_count;
+	kernel_state->bo = kcalloc(state->bo_count, sizeof(*kernel_state->bo),
+				   GFP_ATOMIC);
+	if (!kernel_state->bo) {
+		spin_unlock_irqrestore(&vc4->job_lock, irqflags);
+		return;
+	}
+
+	for (i = 0; i < exec->bo_count; i++) {
+		drm_gem_object_reference(&exec->bo[i]->base);
+		kernel_state->bo[i] = &exec->bo[i]->base;
+	}
+
+	list_for_each_entry(bo, &exec->unref_list, unref_head) {
+		drm_gem_object_reference(&bo->base.base);
+		kernel_state->bo[i] = &bo->base.base;
+		i++;
+	}
+
+	state->start_bin = exec->ct0ca;
+	state->start_render = exec->ct1ca;
+
+	spin_unlock_irqrestore(&vc4->job_lock, irqflags);
+
+	state->ct0ca = V3D_READ(V3D_CTNCA(0));
+	state->ct0ea = V3D_READ(V3D_CTNEA(0));
+
+	state->ct1ca = V3D_READ(V3D_CTNCA(1));
+	state->ct1ea = V3D_READ(V3D_CTNEA(1));
+
+	state->ct0cs = V3D_READ(V3D_CTNCS(0));
+	state->ct1cs = V3D_READ(V3D_CTNCS(1));
+
+	state->ct0ra0 = V3D_READ(V3D_CT00RA0);
+	state->ct1ra0 = V3D_READ(V3D_CT01RA0);
+
+	state->bpca = V3D_READ(V3D_BPCA);
+	state->bpcs = V3D_READ(V3D_BPCS);
+	state->bpoa = V3D_READ(V3D_BPOA);
+	state->bpos = V3D_READ(V3D_BPOS);
+
+	state->vpmbase = V3D_READ(V3D_VPMBASE);
+
+	state->dbge = V3D_READ(V3D_DBGE);
+	state->fdbgo = V3D_READ(V3D_FDBGO);
+	state->fdbgb = V3D_READ(V3D_FDBGB);
+	state->fdbgr = V3D_READ(V3D_FDBGR);
+	state->fdbgs = V3D_READ(V3D_FDBGS);
+	state->errstat = V3D_READ(V3D_ERRSTAT);
+
+	spin_lock_irqsave(&vc4->job_lock, irqflags);
+	if (vc4->hang_state) {
+		spin_unlock_irqrestore(&vc4->job_lock, irqflags);
+		vc4_free_hang_state(dev, kernel_state);
+	} else {
+		vc4->hang_state = kernel_state;
+		spin_unlock_irqrestore(&vc4->job_lock, irqflags);
+	}
+}
+
+static void
+vc4_reset(struct drm_device *dev)
+{
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
+
+	DRM_INFO("Resetting GPU.\n");
+	vc4_v3d_set_power(vc4, false);
+	vc4_v3d_set_power(vc4, true);
+
+	vc4_irq_reset(dev);
+
+	/* Rearm the hangcheck -- another job might have been waiting
+	 * for our hung one to get kicked off, and vc4_irq_reset()
+	 * would have started it.
+	 */
+	vc4_queue_hangcheck(dev);
+}
+
+static void
+vc4_reset_work(struct work_struct *work)
+{
+	struct vc4_dev *vc4 =
+		container_of(work, struct vc4_dev, hangcheck.reset_work);
+
+	vc4_save_hang_state(vc4->dev);
+
+	vc4_reset(vc4->dev);
+}
+
+static void
+vc4_hangcheck_elapsed(unsigned long data)
+{
+	struct drm_device *dev = (struct drm_device *)data;
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
+	uint32_t ct0ca, ct1ca;
+
+	/* If idle, we can stop watching for hangs. */
+	if (list_empty(&vc4->job_list))
+		return;
+
+	ct0ca = V3D_READ(V3D_CTNCA(0));
+	ct1ca = V3D_READ(V3D_CTNCA(1));
+
+	/* If we've made any progress in execution, rearm the timer
+	 * and wait.
+	 */
+	if (ct0ca != vc4->hangcheck.last_ct0ca ||
+	    ct1ca != vc4->hangcheck.last_ct1ca) {
+		vc4->hangcheck.last_ct0ca = ct0ca;
+		vc4->hangcheck.last_ct1ca = ct1ca;
+		vc4_queue_hangcheck(dev);
+		return;
+	}
+
+	/* We've gone too long with no progress, reset.  This has to
+	 * be done from a work struct, since resetting can sleep and
+	 * this timer hook isn't allowed to.
+	 */
+	schedule_work(&vc4->hangcheck.reset_work);
+}
+
+static void
+submit_cl(struct drm_device *dev, uint32_t thread, uint32_t start, uint32_t end)
+{
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
+
+	/* Set the current and end address of the control list.
+	 * Writing the end register is what starts the job.
+	 */
+	V3D_WRITE(V3D_CTNCA(thread), start);
+	V3D_WRITE(V3D_CTNEA(thread), end);
+}
+
+int
+vc4_wait_for_seqno(struct drm_device *dev, uint64_t seqno, uint64_t timeout_ns,
+		   bool interruptible)
+{
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
+	int ret = 0;
+	unsigned long timeout_expire;
+	DEFINE_WAIT(wait);
+
+	if (vc4->finished_seqno >= seqno)
+		return 0;
+
+	if (timeout_ns == 0)
+		return -ETIME;
+
+	timeout_expire = jiffies + nsecs_to_jiffies(timeout_ns);
+
+	trace_vc4_wait_for_seqno_begin(dev, seqno, timeout_ns);
+	for (;;) {
+		prepare_to_wait(&vc4->job_wait_queue, &wait,
+				interruptible ? TASK_INTERRUPTIBLE :
+				TASK_UNINTERRUPTIBLE);
+
+		if (interruptible && signal_pending(current)) {
+			ret = -ERESTARTSYS;
+			break;
+		}
+
+		if (vc4->finished_seqno >= seqno)
+			break;
+
+		if (timeout_ns != ~0ull) {
+			if (time_after_eq(jiffies, timeout_expire)) {
+				ret = -ETIME;
+				break;
+			}
+			schedule_timeout(timeout_expire - jiffies);
+		} else {
+			schedule();
+		}
+	}
+
+	finish_wait(&vc4->job_wait_queue, &wait);
+	trace_vc4_wait_for_seqno_end(dev, seqno);
+
+	if (ret && ret != -ERESTARTSYS) {
+		DRM_ERROR("timeout waiting for render thread idle\n");
+		return ret;
+	}
+
+	return 0;
+}
+
+static void
+vc4_flush_caches(struct drm_device *dev)
+{
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
+
+	/* Flush the GPU L2 caches.  These caches sit on top of system
+	 * L3 (the 128kb or so shared with the CPU), and are
+	 * non-allocating in the L3.
+	 */
+	V3D_WRITE(V3D_L2CACTL,
+		  V3D_L2CACTL_L2CCLR);
+
+	V3D_WRITE(V3D_SLCACTL,
+		  VC4_SET_FIELD(0xf, V3D_SLCACTL_T1CC) |
+		  VC4_SET_FIELD(0xf, V3D_SLCACTL_T0CC) |
+		  VC4_SET_FIELD(0xf, V3D_SLCACTL_UCC) |
+		  VC4_SET_FIELD(0xf, V3D_SLCACTL_ICC));
+}
+
+/* Sets the registers for the next job to be actually be executed in
+ * the hardware.
+ *
+ * The job_lock should be held during this.
+ */
+void
+vc4_submit_next_job(struct drm_device *dev)
+{
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
+	struct vc4_exec_info *exec = vc4_first_job(vc4);
+
+	if (!exec)
+		return;
+
+	vc4_flush_caches(dev);
+
+	/* Disable the binner's pre-loaded overflow memory address */
+	V3D_WRITE(V3D_BPOA, 0);
+	V3D_WRITE(V3D_BPOS, 0);
+
+	if (exec->ct0ca != exec->ct0ea)
+		submit_cl(dev, 0, exec->ct0ca, exec->ct0ea);
+	submit_cl(dev, 1, exec->ct1ca, exec->ct1ea);
+}
+
+static void
+vc4_update_bo_seqnos(struct vc4_exec_info *exec, uint64_t seqno)
+{
+	struct vc4_bo *bo;
+	unsigned i;
+
+	for (i = 0; i < exec->bo_count; i++) {
+		bo = to_vc4_bo(&exec->bo[i]->base);
+		bo->seqno = seqno;
+	}
+
+	list_for_each_entry(bo, &exec->unref_list, unref_head) {
+		bo->seqno = seqno;
+	}
+}
+
+/* Queues a struct vc4_exec_info for execution.  If no job is
+ * currently executing, then submits it.
+ *
+ * Unlike most GPUs, our hardware only handles one command list at a
+ * time.  To queue multiple jobs at once, we'd need to edit the
+ * previous command list to have a jump to the new one at the end, and
+ * then bump the end address.  That's a change for a later date,
+ * though.
+ */
+static void
+vc4_queue_submit(struct drm_device *dev, struct vc4_exec_info *exec)
+{
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
+	uint64_t seqno;
+	unsigned long irqflags;
+
+	spin_lock_irqsave(&vc4->job_lock, irqflags);
+
+	seqno = ++vc4->emit_seqno;
+	exec->seqno = seqno;
+	vc4_update_bo_seqnos(exec, seqno);
+
+	list_add_tail(&exec->head, &vc4->job_list);
+
+	/* If no job was executing, kick ours off.  Otherwise, it'll
+	 * get started when the previous job's frame done interrupt
+	 * occurs.
+	 */
+	if (vc4_first_job(vc4) == exec) {
+		vc4_submit_next_job(dev);
+		vc4_queue_hangcheck(dev);
+	}
+
+	spin_unlock_irqrestore(&vc4->job_lock, irqflags);
+}
+
+/**
+ * Looks up a bunch of GEM handles for BOs and stores the array for
+ * use in the command validator that actually writes relocated
+ * addresses pointing to them.
+ */
+static int
+vc4_cl_lookup_bos(struct drm_device *dev,
+		  struct drm_file *file_priv,
+		  struct vc4_exec_info *exec)
+{
+	struct drm_vc4_submit_cl *args = exec->args;
+	uint32_t *handles;
+	int ret = 0;
+	int i;
+
+	exec->bo_count = args->bo_handle_count;
+
+	if (!exec->bo_count) {
+		/* See comment on bo_index for why we have to check
+		 * this.
+		 */
+		DRM_ERROR("Rendering requires BOs to validate\n");
+		return -EINVAL;
+	}
+
+	exec->bo = kcalloc(exec->bo_count, sizeof(struct drm_gem_cma_object *),
+			   GFP_KERNEL);
+	if (!exec->bo) {
+		DRM_ERROR("Failed to allocate validated BO pointers\n");
+		return -ENOMEM;
+	}
+
+	handles = drm_malloc_ab(exec->bo_count, sizeof(uint32_t));
+	if (!handles) {
+		DRM_ERROR("Failed to allocate incoming GEM handles\n");
+		goto fail;
+	}
+
+	ret = copy_from_user(handles,
+			     (void __user *)(uintptr_t)args->bo_handles,
+			     exec->bo_count * sizeof(uint32_t));
+	if (ret) {
+		DRM_ERROR("Failed to copy in GEM handles\n");
+		goto fail;
+	}
+
+	spin_lock(&file_priv->table_lock);
+	for (i = 0; i < exec->bo_count; i++) {
+		struct drm_gem_object *bo = idr_find(&file_priv->object_idr,
+						     handles[i]);
+		if (!bo) {
+			DRM_ERROR("Failed to look up GEM BO %d: %d\n",
+				  i, handles[i]);
+			ret = -EINVAL;
+			spin_unlock(&file_priv->table_lock);
+			goto fail;
+		}
+		drm_gem_object_reference(bo);
+		exec->bo[i] = (struct drm_gem_cma_object *)bo;
+	}
+	spin_unlock(&file_priv->table_lock);
+
+fail:
+	kfree(handles);
+	return 0;
+}
+
+static int
+vc4_get_bcl(struct drm_device *dev, struct vc4_exec_info *exec)
+{
+	struct drm_vc4_submit_cl *args = exec->args;
+	void *temp = NULL;
+	void *bin;
+	int ret = 0;
+	uint32_t bin_offset = 0;
+	uint32_t shader_rec_offset = roundup(bin_offset + args->bin_cl_size,
+					     16);
+	uint32_t uniforms_offset = shader_rec_offset + args->shader_rec_size;
+	uint32_t exec_size = uniforms_offset + args->uniforms_size;
+	uint32_t temp_size = exec_size + (sizeof(struct vc4_shader_state) *
+					  args->shader_rec_count);
+	struct vc4_bo *bo;
+
+	if (uniforms_offset < shader_rec_offset ||
+	    exec_size < uniforms_offset ||
+	    args->shader_rec_count >= (UINT_MAX /
+					  sizeof(struct vc4_shader_state)) ||
+	    temp_size < exec_size) {
+		DRM_ERROR("overflow in exec arguments\n");
+		goto fail;
+	}
+
+	/* Allocate space where we'll store the copied in user command lists
+	 * and shader records.
+	 *
+	 * We don't just copy directly into the BOs because we need to
+	 * read the contents back for validation, and I think the
+	 * bo->vaddr is uncached access.
+	 */
+	temp = kmalloc(temp_size, GFP_KERNEL);
+	if (!temp) {
+		DRM_ERROR("Failed to allocate storage for copying "
+			  "in bin/render CLs.\n");
+		ret = -ENOMEM;
+		goto fail;
+	}
+	bin = temp + bin_offset;
+	exec->shader_rec_u = temp + shader_rec_offset;
+	exec->uniforms_u = temp + uniforms_offset;
+	exec->shader_state = temp + exec_size;
+	exec->shader_state_size = args->shader_rec_count;
+
+	if (copy_from_user(bin,
+			   (void __user *)(uintptr_t)args->bin_cl,
+			   args->bin_cl_size)) {
+		ret = -EFAULT;
+		goto fail;
+	}
+
+	if (copy_from_user(exec->shader_rec_u,
+			   (void __user *)(uintptr_t)args->shader_rec,
+			   args->shader_rec_size)) {
+		ret = -EFAULT;
+		goto fail;
+	}
+
+	if (copy_from_user(exec->uniforms_u,
+			   (void __user *)(uintptr_t)args->uniforms,
+			   args->uniforms_size)) {
+		ret = -EFAULT;
+		goto fail;
+	}
+
+	bo = vc4_bo_create(dev, exec_size, true);
+	if (!bo) {
+		DRM_ERROR("Couldn't allocate BO for binning\n");
+		ret = -ENOMEM;
+		goto fail;
+	}
+	exec->exec_bo = &bo->base;
+
+	list_add_tail(&to_vc4_bo(&exec->exec_bo->base)->unref_head,
+		      &exec->unref_list);
+
+	exec->ct0ca = exec->exec_bo->paddr + bin_offset;
+
+	exec->bin_u = bin;
+
+	exec->shader_rec_v = exec->exec_bo->vaddr + shader_rec_offset;
+	exec->shader_rec_p = exec->exec_bo->paddr + shader_rec_offset;
+	exec->shader_rec_size = args->shader_rec_size;
+
+	exec->uniforms_v = exec->exec_bo->vaddr + uniforms_offset;
+	exec->uniforms_p = exec->exec_bo->paddr + uniforms_offset;
+	exec->uniforms_size = args->uniforms_size;
+
+	ret = vc4_validate_bin_cl(dev,
+				  exec->exec_bo->vaddr + bin_offset,
+				  bin,
+				  exec);
+	if (ret)
+		goto fail;
+
+	ret = vc4_validate_shader_recs(dev, exec);
+
+fail:
+	kfree(temp);
+	return ret;
+}
+
+static void
+vc4_complete_exec(struct drm_device *dev, struct vc4_exec_info *exec)
+{
+	unsigned i;
+
+	/* Need the struct lock for drm_gem_object_unreference(). */
+	mutex_lock(&dev->struct_mutex);
+	if (exec->bo) {
+		for (i = 0; i < exec->bo_count; i++)
+			drm_gem_object_unreference(&exec->bo[i]->base);
+		kfree(exec->bo);
+	}
+
+	while (!list_empty(&exec->unref_list)) {
+		struct vc4_bo *bo = list_first_entry(&exec->unref_list,
+						     struct vc4_bo, unref_head);
+		list_del(&bo->unref_head);
+		drm_gem_object_unreference(&bo->base.base);
+	}
+	mutex_unlock(&dev->struct_mutex);
+
+	kfree(exec);
+}
+
+void
+vc4_job_handle_completed(struct vc4_dev *vc4)
+{
+	unsigned long irqflags;
+	struct vc4_seqno_cb *cb, *cb_temp;
+
+	spin_lock_irqsave(&vc4->job_lock, irqflags);
+	while (!list_empty(&vc4->job_done_list)) {
+		struct vc4_exec_info *exec =
+			list_first_entry(&vc4->job_done_list,
+					 struct vc4_exec_info, head);
+		list_del(&exec->head);
+
+		spin_unlock_irqrestore(&vc4->job_lock, irqflags);
+		vc4_complete_exec(vc4->dev, exec);
+		spin_lock_irqsave(&vc4->job_lock, irqflags);
+	}
+
+	list_for_each_entry_safe(cb, cb_temp, &vc4->seqno_cb_list, work.entry) {
+		if (cb->seqno <= vc4->finished_seqno) {
+			list_del_init(&cb->work.entry);
+			schedule_work(&cb->work);
+		}
+	}
+
+	spin_unlock_irqrestore(&vc4->job_lock, irqflags);
+}
+
+static void vc4_seqno_cb_work(struct work_struct *work)
+{
+	struct vc4_seqno_cb *cb = container_of(work, struct vc4_seqno_cb, work);
+
+	cb->func(cb);
+}
+
+int vc4_queue_seqno_cb(struct drm_device *dev,
+		       struct vc4_seqno_cb *cb, uint64_t seqno,
+		       void (*func)(struct vc4_seqno_cb *cb))
+{
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
+	int ret = 0;
+	unsigned long irqflags;
+
+	cb->func = func;
+	INIT_WORK(&cb->work, vc4_seqno_cb_work);
+
+	spin_lock_irqsave(&vc4->job_lock, irqflags);
+	if (seqno > vc4->finished_seqno) {
+		cb->seqno = seqno;
+		list_add_tail(&cb->work.entry, &vc4->seqno_cb_list);
+	} else {
+		schedule_work(&cb->work);
+	}
+	spin_unlock_irqrestore(&vc4->job_lock, irqflags);
+
+	return ret;
+}
+
+/* Scheduled when any job has been completed, this walks the list of
+ * jobs that had completed and unrefs their BOs and frees their exec
+ * structs.
+ */
+static void
+vc4_job_done_work(struct work_struct *work)
+{
+	struct vc4_dev *vc4 =
+		container_of(work, struct vc4_dev, job_done_work);
+
+	vc4_job_handle_completed(vc4);
+}
+
+static int
+vc4_wait_for_seqno_ioctl_helper(struct drm_device *dev,
+				uint64_t seqno,
+				uint64_t *timeout_ns)
+{
+	unsigned long start = jiffies;
+	int ret = vc4_wait_for_seqno(dev, seqno, *timeout_ns, true);
+
+	if ((ret == -EINTR || ret == -ERESTARTSYS) && *timeout_ns != ~0ull) {
+		uint64_t delta = jiffies_to_nsecs(jiffies - start);
+
+		if (*timeout_ns >= delta)
+			*timeout_ns -= delta;
+	}
+
+	return ret;
+}
+
+int
+vc4_wait_seqno_ioctl(struct drm_device *dev, void *data,
+		     struct drm_file *file_priv)
+{
+	struct drm_vc4_wait_seqno *args = data;
+
+	return vc4_wait_for_seqno_ioctl_helper(dev, args->seqno,
+					       &args->timeout_ns);
+}
+
+int
+vc4_wait_bo_ioctl(struct drm_device *dev, void *data,
+		  struct drm_file *file_priv)
+{
+	int ret;
+	struct drm_vc4_wait_bo *args = data;
+	struct drm_gem_object *gem_obj;
+	struct vc4_bo *bo;
+
+	gem_obj = drm_gem_object_lookup(dev, file_priv, args->handle);
+	if (!gem_obj) {
+		DRM_ERROR("Failed to look up GEM BO %d\n", args->handle);
+		return -EINVAL;
+	}
+	bo = to_vc4_bo(gem_obj);
+
+	ret = vc4_wait_for_seqno_ioctl_helper(dev, bo->seqno,
+					      &args->timeout_ns);
+
+	drm_gem_object_unreference_unlocked(gem_obj);
+	return ret;
+}
+
+/**
+ * Submits a command list to the VC4.
+ *
+ * This is what is called batchbuffer emitting on other hardware.
+ */
+int
+vc4_submit_cl_ioctl(struct drm_device *dev, void *data,
+		    struct drm_file *file_priv)
+{
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
+	struct drm_vc4_submit_cl *args = data;
+	struct vc4_exec_info *exec;
+	int ret;
+
+	if ((args->flags & ~VC4_SUBMIT_CL_USE_CLEAR_COLOR) != 0) {
+		DRM_ERROR("Unknown flags: 0x%02x\n", args->flags);
+		return -EINVAL;
+	}
+
+	exec = kcalloc(1, sizeof(*exec), GFP_KERNEL);
+	if (!exec) {
+		DRM_ERROR("malloc failure on exec struct\n");
+		return -ENOMEM;
+	}
+
+	exec->args = args;
+	INIT_LIST_HEAD(&exec->unref_list);
+
+	ret = vc4_cl_lookup_bos(dev, file_priv, exec);
+	if (ret)
+		goto fail;
+
+	if (exec->args->bin_cl_size != 0) {
+		ret = vc4_get_bcl(dev, exec);
+		if (ret)
+			goto fail;
+	} else {
+		exec->ct0ca = 0;
+		exec->ct0ea = 0;
+	}
+
+	ret = vc4_get_rcl(dev, exec);
+	if (ret)
+		goto fail;
+
+	/* Clear this out of the struct we'll be putting in the queue,
+	 * since it's part of our stack.
+	 */
+	exec->args = NULL;
+
+	vc4_queue_submit(dev, exec);
+
+	/* Return the seqno for our job. */
+	args->seqno = vc4->emit_seqno;
+
+	return 0;
+
+fail:
+	vc4_complete_exec(vc4->dev, exec);
+
+	return ret;
+}
+
+void
+vc4_gem_init(struct drm_device *dev)
+{
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
+
+	INIT_LIST_HEAD(&vc4->job_list);
+	INIT_LIST_HEAD(&vc4->job_done_list);
+	INIT_LIST_HEAD(&vc4->seqno_cb_list);
+	spin_lock_init(&vc4->job_lock);
+
+	INIT_WORK(&vc4->hangcheck.reset_work, vc4_reset_work);
+	setup_timer(&vc4->hangcheck.timer,
+		    vc4_hangcheck_elapsed,
+		    (unsigned long)dev);
+
+	INIT_WORK(&vc4->job_done_work, vc4_job_done_work);
+}
+
+void
+vc4_gem_destroy(struct drm_device *dev)
+{
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
+
+	/* Waiting for exec to finish would need to be done before
+	 * unregistering V3D.
+	 */
+	WARN_ON(vc4->emit_seqno != vc4->finished_seqno);
+
+	/* V3D should already have disabled its interrupt and cleared
+	 * the overflow allocation registers.  Now free the object.
+	 */
+	if (vc4->overflow_mem) {
+		drm_gem_object_unreference_unlocked(&vc4->overflow_mem->base.base);
+		vc4->overflow_mem = NULL;
+	}
+
+	vc4_bo_cache_destroy(dev);
+
+	if (vc4->hang_state)
+		vc4_free_hang_state(dev, vc4->hang_state);
+}
diff --git a/drivers/gpu/drm/vc4/vc4_hdmi.c b/drivers/gpu/drm/vc4/vc4_hdmi.c
index da9a36d6e1d1..c69c0460196b 100644
--- a/drivers/gpu/drm/vc4/vc4_hdmi.c
+++ b/drivers/gpu/drm/vc4/vc4_hdmi.c
@@ -519,7 +519,7 @@ static int vc4_hdmi_bind(struct device *dev, struct device *master, void *data)
 	WARN_ON_ONCE((HD_READ(VC4_HD_M_CTL) & VC4_HD_M_ENABLE) == 0);
 
 	drm_encoder_init(drm, hdmi->encoder, &vc4_hdmi_encoder_funcs,
-			 DRM_MODE_ENCODER_TMDS);
+			 DRM_MODE_ENCODER_TMDS, NULL);
 	drm_encoder_helper_add(hdmi->encoder, &vc4_hdmi_encoder_helper_funcs);
 
 	hdmi->connector = vc4_hdmi_connector_init(drm, hdmi->encoder);
diff --git a/drivers/gpu/drm/vc4/vc4_irq.c b/drivers/gpu/drm/vc4/vc4_irq.c
new file mode 100644
index 000000000000..b68060e758db
--- /dev/null
+++ b/drivers/gpu/drm/vc4/vc4_irq.c
@@ -0,0 +1,210 @@
+/*
+ * Copyright © 2014 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/** DOC: Interrupt management for the V3D engine.
+ *
+ * We have an interrupt status register (V3D_INTCTL) which reports
+ * interrupts, and where writing 1 bits clears those interrupts.
+ * There are also a pair of interrupt registers
+ * (V3D_INTENA/V3D_INTDIS) where writing a 1 to their bits enables or
+ * disables that specific interrupt, and 0s written are ignored
+ * (reading either one returns the set of enabled interrupts).
+ *
+ * When we take a render frame interrupt, we need to wake the
+ * processes waiting for some frame to be done, and get the next frame
+ * submitted ASAP (so the hardware doesn't sit idle when there's work
+ * to do).
+ *
+ * When we take the binner out of memory interrupt, we need to
+ * allocate some new memory and pass it to the binner so that the
+ * current job can make progress.
+ */
+
+#include "vc4_drv.h"
+#include "vc4_regs.h"
+
+#define V3D_DRIVER_IRQS (V3D_INT_OUTOMEM | \
+			 V3D_INT_FRDONE)
+
+DECLARE_WAIT_QUEUE_HEAD(render_wait);
+
+static void
+vc4_overflow_mem_work(struct work_struct *work)
+{
+	struct vc4_dev *vc4 =
+		container_of(work, struct vc4_dev, overflow_mem_work);
+	struct drm_device *dev = vc4->dev;
+	struct vc4_bo *bo;
+
+	bo = vc4_bo_create(dev, 256 * 1024, true);
+	if (!bo) {
+		DRM_ERROR("Couldn't allocate binner overflow mem\n");
+		return;
+	}
+
+	/* If there's a job executing currently, then our previous
+	 * overflow allocation is getting used in that job and we need
+	 * to queue it to be released when the job is done.  But if no
+	 * job is executing at all, then we can free the old overflow
+	 * object direcctly.
+	 *
+	 * No lock necessary for this pointer since we're the only
+	 * ones that update the pointer, and our workqueue won't
+	 * reenter.
+	 */
+	if (vc4->overflow_mem) {
+		struct vc4_exec_info *current_exec;
+		unsigned long irqflags;
+
+		spin_lock_irqsave(&vc4->job_lock, irqflags);
+		current_exec = vc4_first_job(vc4);
+		if (current_exec) {
+			vc4->overflow_mem->seqno = vc4->finished_seqno + 1;
+			list_add_tail(&vc4->overflow_mem->unref_head,
+				      &current_exec->unref_list);
+			vc4->overflow_mem = NULL;
+		}
+		spin_unlock_irqrestore(&vc4->job_lock, irqflags);
+	}
+
+	if (vc4->overflow_mem)
+		drm_gem_object_unreference_unlocked(&vc4->overflow_mem->base.base);
+	vc4->overflow_mem = bo;
+
+	V3D_WRITE(V3D_BPOA, bo->base.paddr);
+	V3D_WRITE(V3D_BPOS, bo->base.base.size);
+	V3D_WRITE(V3D_INTCTL, V3D_INT_OUTOMEM);
+	V3D_WRITE(V3D_INTENA, V3D_INT_OUTOMEM);
+}
+
+static void
+vc4_irq_finish_job(struct drm_device *dev)
+{
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
+	struct vc4_exec_info *exec = vc4_first_job(vc4);
+
+	if (!exec)
+		return;
+
+	vc4->finished_seqno++;
+	list_move_tail(&exec->head, &vc4->job_done_list);
+	vc4_submit_next_job(dev);
+
+	wake_up_all(&vc4->job_wait_queue);
+	schedule_work(&vc4->job_done_work);
+}
+
+irqreturn_t
+vc4_irq(int irq, void *arg)
+{
+	struct drm_device *dev = arg;
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
+	uint32_t intctl;
+	irqreturn_t status = IRQ_NONE;
+
+	barrier();
+	intctl = V3D_READ(V3D_INTCTL);
+
+	/* Acknowledge the interrupts we're handling here. The render
+	 * frame done interrupt will be cleared, while OUTOMEM will
+	 * stay high until the underlying cause is cleared.
+	 */
+	V3D_WRITE(V3D_INTCTL, intctl);
+
+	if (intctl & V3D_INT_OUTOMEM) {
+		/* Disable OUTOMEM until the work is done. */
+		V3D_WRITE(V3D_INTDIS, V3D_INT_OUTOMEM);
+		schedule_work(&vc4->overflow_mem_work);
+		status = IRQ_HANDLED;
+	}
+
+	if (intctl & V3D_INT_FRDONE) {
+		spin_lock(&vc4->job_lock);
+		vc4_irq_finish_job(dev);
+		spin_unlock(&vc4->job_lock);
+		status = IRQ_HANDLED;
+	}
+
+	return status;
+}
+
+void
+vc4_irq_preinstall(struct drm_device *dev)
+{
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
+
+	init_waitqueue_head(&vc4->job_wait_queue);
+	INIT_WORK(&vc4->overflow_mem_work, vc4_overflow_mem_work);
+
+	/* Clear any pending interrupts someone might have left around
+	 * for us.
+	 */
+	V3D_WRITE(V3D_INTCTL, V3D_DRIVER_IRQS);
+}
+
+int
+vc4_irq_postinstall(struct drm_device *dev)
+{
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
+
+	/* Enable both the render done and out of memory interrupts. */
+	V3D_WRITE(V3D_INTENA, V3D_DRIVER_IRQS);
+
+	return 0;
+}
+
+void
+vc4_irq_uninstall(struct drm_device *dev)
+{
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
+
+	/* Disable sending interrupts for our driver's IRQs. */
+	V3D_WRITE(V3D_INTDIS, V3D_DRIVER_IRQS);
+
+	/* Clear any pending interrupts we might have left. */
+	V3D_WRITE(V3D_INTCTL, V3D_DRIVER_IRQS);
+
+	cancel_work_sync(&vc4->overflow_mem_work);
+}
+
+/** Reinitializes interrupt registers when a GPU reset is performed. */
+void vc4_irq_reset(struct drm_device *dev)
+{
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
+	unsigned long irqflags;
+
+	/* Acknowledge any stale IRQs. */
+	V3D_WRITE(V3D_INTCTL, V3D_DRIVER_IRQS);
+
+	/*
+	 * Turn all our interrupts on.  Binner out of memory is the
+	 * only one we expect to trigger at this point, since we've
+	 * just come from poweron and haven't supplied any overflow
+	 * memory yet.
+	 */
+	V3D_WRITE(V3D_INTENA, V3D_DRIVER_IRQS);
+
+	spin_lock_irqsave(&vc4->job_lock, irqflags);
+	vc4_irq_finish_job(dev);
+	spin_unlock_irqrestore(&vc4->job_lock, irqflags);
+}
diff --git a/drivers/gpu/drm/vc4/vc4_kms.c b/drivers/gpu/drm/vc4/vc4_kms.c
index 2e5597d10cc6..f95f2df5f8d1 100644
--- a/drivers/gpu/drm/vc4/vc4_kms.c
+++ b/drivers/gpu/drm/vc4/vc4_kms.c
@@ -15,6 +15,7 @@
  */
 
 #include "drm_crtc.h"
+#include "drm_atomic.h"
 #include "drm_atomic_helper.h"
 #include "drm_crtc_helper.h"
 #include "drm_plane_helper.h"
@@ -29,10 +30,152 @@ static void vc4_output_poll_changed(struct drm_device *dev)
 		drm_fbdev_cma_hotplug_event(vc4->fbdev);
 }
 
+struct vc4_commit {
+	struct drm_device *dev;
+	struct drm_atomic_state *state;
+	struct vc4_seqno_cb cb;
+};
+
+static void
+vc4_atomic_complete_commit(struct vc4_commit *c)
+{
+	struct drm_atomic_state *state = c->state;
+	struct drm_device *dev = state->dev;
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
+
+	drm_atomic_helper_commit_modeset_disables(dev, state);
+
+	drm_atomic_helper_commit_planes(dev, state, false);
+
+	drm_atomic_helper_commit_modeset_enables(dev, state);
+
+	drm_atomic_helper_wait_for_vblanks(dev, state);
+
+	drm_atomic_helper_cleanup_planes(dev, state);
+
+	drm_atomic_state_free(state);
+
+	up(&vc4->async_modeset);
+
+	kfree(c);
+}
+
+static void
+vc4_atomic_complete_commit_seqno_cb(struct vc4_seqno_cb *cb)
+{
+	struct vc4_commit *c = container_of(cb, struct vc4_commit, cb);
+
+	vc4_atomic_complete_commit(c);
+}
+
+static struct vc4_commit *commit_init(struct drm_atomic_state *state)
+{
+	struct vc4_commit *c = kzalloc(sizeof(*c), GFP_KERNEL);
+
+	if (!c)
+		return NULL;
+	c->dev = state->dev;
+	c->state = state;
+
+	return c;
+}
+
+/**
+ * vc4_atomic_commit - commit validated state object
+ * @dev: DRM device
+ * @state: the driver state object
+ * @async: asynchronous commit
+ *
+ * This function commits a with drm_atomic_helper_check() pre-validated state
+ * object. This can still fail when e.g. the framebuffer reservation fails. For
+ * now this doesn't implement asynchronous commits.
+ *
+ * RETURNS
+ * Zero for success or -errno.
+ */
+static int vc4_atomic_commit(struct drm_device *dev,
+			     struct drm_atomic_state *state,
+			     bool async)
+{
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
+	int ret;
+	int i;
+	uint64_t wait_seqno = 0;
+	struct vc4_commit *c;
+
+	c = commit_init(state);
+	if (!c)
+		return -ENOMEM;
+
+	/* Make sure that any outstanding modesets have finished. */
+	ret = down_interruptible(&vc4->async_modeset);
+	if (ret) {
+		kfree(c);
+		return ret;
+	}
+
+	ret = drm_atomic_helper_prepare_planes(dev, state);
+	if (ret) {
+		kfree(c);
+		up(&vc4->async_modeset);
+		return ret;
+	}
+
+	for (i = 0; i < dev->mode_config.num_total_plane; i++) {
+		struct drm_plane *plane = state->planes[i];
+		struct drm_plane_state *new_state = state->plane_states[i];
+
+		if (!plane)
+			continue;
+
+		if ((plane->state->fb != new_state->fb) && new_state->fb) {
+			struct drm_gem_cma_object *cma_bo =
+				drm_fb_cma_get_gem_obj(new_state->fb, 0);
+			struct vc4_bo *bo = to_vc4_bo(&cma_bo->base);
+
+			wait_seqno = max(bo->seqno, wait_seqno);
+		}
+	}
+
+	/*
+	 * This is the point of no return - everything below never fails except
+	 * when the hw goes bonghits. Which means we can commit the new state on
+	 * the software side now.
+	 */
+
+	drm_atomic_helper_swap_state(dev, state);
+
+	/*
+	 * Everything below can be run asynchronously without the need to grab
+	 * any modeset locks at all under one condition: It must be guaranteed
+	 * that the asynchronous work has either been cancelled (if the driver
+	 * supports it, which at least requires that the framebuffers get
+	 * cleaned up with drm_atomic_helper_cleanup_planes()) or completed
+	 * before the new state gets committed on the software side with
+	 * drm_atomic_helper_swap_state().
+	 *
+	 * This scheme allows new atomic state updates to be prepared and
+	 * checked in parallel to the asynchronous completion of the previous
+	 * update. Which is important since compositors need to figure out the
+	 * composition of the next frame right after having submitted the
+	 * current layout.
+	 */
+
+	if (async) {
+		vc4_queue_seqno_cb(dev, &c->cb, wait_seqno,
+				   vc4_atomic_complete_commit_seqno_cb);
+	} else {
+		vc4_wait_for_seqno(dev, wait_seqno, ~0ull, false);
+		vc4_atomic_complete_commit(c);
+	}
+
+	return 0;
+}
+
 static const struct drm_mode_config_funcs vc4_mode_funcs = {
 	.output_poll_changed = vc4_output_poll_changed,
 	.atomic_check = drm_atomic_helper_check,
-	.atomic_commit = drm_atomic_helper_commit,
+	.atomic_commit = vc4_atomic_commit,
 	.fb_create = drm_fb_cma_create,
 };
 
@@ -41,6 +184,8 @@ int vc4_kms_load(struct drm_device *dev)
 	struct vc4_dev *vc4 = to_vc4_dev(dev);
 	int ret;
 
+	sema_init(&vc4->async_modeset, 1);
+
 	ret = drm_vblank_init(dev, dev->mode_config.num_crtc);
 	if (ret < 0) {
 		dev_err(dev->dev, "failed to initialize vblank\n");
@@ -51,6 +196,8 @@ int vc4_kms_load(struct drm_device *dev)
 	dev->mode_config.max_height = 2048;
 	dev->mode_config.funcs = &vc4_mode_funcs;
 	dev->mode_config.preferred_depth = 24;
+	dev->mode_config.async_page_flip = true;
+
 	dev->vblank_disable_allowed = true;
 
 	drm_mode_config_reset(dev);
diff --git a/drivers/gpu/drm/vc4/vc4_packet.h b/drivers/gpu/drm/vc4/vc4_packet.h
new file mode 100644
index 000000000000..0f31cc06500f
--- /dev/null
+++ b/drivers/gpu/drm/vc4/vc4_packet.h
@@ -0,0 +1,399 @@
+/*
+ * Copyright © 2014 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef VC4_PACKET_H
+#define VC4_PACKET_H
+
+#include "vc4_regs.h" /* for VC4_MASK, VC4_GET_FIELD, VC4_SET_FIELD */
+
+enum vc4_packet {
+	VC4_PACKET_HALT = 0,
+	VC4_PACKET_NOP = 1,
+
+	VC4_PACKET_FLUSH = 4,
+	VC4_PACKET_FLUSH_ALL = 5,
+	VC4_PACKET_START_TILE_BINNING = 6,
+	VC4_PACKET_INCREMENT_SEMAPHORE = 7,
+	VC4_PACKET_WAIT_ON_SEMAPHORE = 8,
+
+	VC4_PACKET_BRANCH = 16,
+	VC4_PACKET_BRANCH_TO_SUB_LIST = 17,
+
+	VC4_PACKET_STORE_MS_TILE_BUFFER = 24,
+	VC4_PACKET_STORE_MS_TILE_BUFFER_AND_EOF = 25,
+	VC4_PACKET_STORE_FULL_RES_TILE_BUFFER = 26,
+	VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER = 27,
+	VC4_PACKET_STORE_TILE_BUFFER_GENERAL = 28,
+	VC4_PACKET_LOAD_TILE_BUFFER_GENERAL = 29,
+
+	VC4_PACKET_GL_INDEXED_PRIMITIVE = 32,
+	VC4_PACKET_GL_ARRAY_PRIMITIVE = 33,
+
+	VC4_PACKET_COMPRESSED_PRIMITIVE = 48,
+	VC4_PACKET_CLIPPED_COMPRESSED_PRIMITIVE = 49,
+
+	VC4_PACKET_PRIMITIVE_LIST_FORMAT = 56,
+
+	VC4_PACKET_GL_SHADER_STATE = 64,
+	VC4_PACKET_NV_SHADER_STATE = 65,
+	VC4_PACKET_VG_SHADER_STATE = 66,
+
+	VC4_PACKET_CONFIGURATION_BITS = 96,
+	VC4_PACKET_FLAT_SHADE_FLAGS = 97,
+	VC4_PACKET_POINT_SIZE = 98,
+	VC4_PACKET_LINE_WIDTH = 99,
+	VC4_PACKET_RHT_X_BOUNDARY = 100,
+	VC4_PACKET_DEPTH_OFFSET = 101,
+	VC4_PACKET_CLIP_WINDOW = 102,
+	VC4_PACKET_VIEWPORT_OFFSET = 103,
+	VC4_PACKET_Z_CLIPPING = 104,
+	VC4_PACKET_CLIPPER_XY_SCALING = 105,
+	VC4_PACKET_CLIPPER_Z_SCALING = 106,
+
+	VC4_PACKET_TILE_BINNING_MODE_CONFIG = 112,
+	VC4_PACKET_TILE_RENDERING_MODE_CONFIG = 113,
+	VC4_PACKET_CLEAR_COLORS = 114,
+	VC4_PACKET_TILE_COORDINATES = 115,
+
+	/* Not an actual hardware packet -- this is what we use to put
+	 * references to GEM bos in the command stream, since we need the u32
+	 * int the actual address packet in order to store the offset from the
+	 * start of the BO.
+	 */
+	VC4_PACKET_GEM_HANDLES = 254,
+} __attribute__ ((__packed__));
+
+#define VC4_PACKET_HALT_SIZE						1
+#define VC4_PACKET_NOP_SIZE						1
+#define VC4_PACKET_FLUSH_SIZE						1
+#define VC4_PACKET_FLUSH_ALL_SIZE					1
+#define VC4_PACKET_START_TILE_BINNING_SIZE				1
+#define VC4_PACKET_INCREMENT_SEMAPHORE_SIZE				1
+#define VC4_PACKET_WAIT_ON_SEMAPHORE_SIZE				1
+#define VC4_PACKET_BRANCH_SIZE						5
+#define VC4_PACKET_BRANCH_TO_SUB_LIST_SIZE				5
+#define VC4_PACKET_STORE_MS_TILE_BUFFER_SIZE				1
+#define VC4_PACKET_STORE_MS_TILE_BUFFER_AND_EOF_SIZE			1
+#define VC4_PACKET_STORE_FULL_RES_TILE_BUFFER_SIZE			5
+#define VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER_SIZE			5
+#define VC4_PACKET_STORE_TILE_BUFFER_GENERAL_SIZE			7
+#define VC4_PACKET_LOAD_TILE_BUFFER_GENERAL_SIZE			7
+#define VC4_PACKET_GL_INDEXED_PRIMITIVE_SIZE				14
+#define VC4_PACKET_GL_ARRAY_PRIMITIVE_SIZE				10
+#define VC4_PACKET_COMPRESSED_PRIMITIVE_SIZE				1
+#define VC4_PACKET_CLIPPED_COMPRESSED_PRIMITIVE_SIZE			1
+#define VC4_PACKET_PRIMITIVE_LIST_FORMAT_SIZE				2
+#define VC4_PACKET_GL_SHADER_STATE_SIZE					5
+#define VC4_PACKET_NV_SHADER_STATE_SIZE					5
+#define VC4_PACKET_VG_SHADER_STATE_SIZE					5
+#define VC4_PACKET_CONFIGURATION_BITS_SIZE				4
+#define VC4_PACKET_FLAT_SHADE_FLAGS_SIZE				5
+#define VC4_PACKET_POINT_SIZE_SIZE					5
+#define VC4_PACKET_LINE_WIDTH_SIZE					5
+#define VC4_PACKET_RHT_X_BOUNDARY_SIZE					3
+#define VC4_PACKET_DEPTH_OFFSET_SIZE					5
+#define VC4_PACKET_CLIP_WINDOW_SIZE					9
+#define VC4_PACKET_VIEWPORT_OFFSET_SIZE					5
+#define VC4_PACKET_Z_CLIPPING_SIZE					9
+#define VC4_PACKET_CLIPPER_XY_SCALING_SIZE				9
+#define VC4_PACKET_CLIPPER_Z_SCALING_SIZE				9
+#define VC4_PACKET_TILE_BINNING_MODE_CONFIG_SIZE			16
+#define VC4_PACKET_TILE_RENDERING_MODE_CONFIG_SIZE			11
+#define VC4_PACKET_CLEAR_COLORS_SIZE					14
+#define VC4_PACKET_TILE_COORDINATES_SIZE				3
+#define VC4_PACKET_GEM_HANDLES_SIZE					9
+
+/* Number of multisamples supported. */
+#define VC4_MAX_SAMPLES							4
+/* Size of a full resolution color or Z tile buffer load/store. */
+#define VC4_TILE_BUFFER_SIZE			(64 * 64 * 4)
+
+/** @{
+ * Bits used by packets like VC4_PACKET_STORE_TILE_BUFFER_GENERAL and
+ * VC4_PACKET_TILE_RENDERING_MODE_CONFIG.
+*/
+#define VC4_TILING_FORMAT_LINEAR    0
+#define VC4_TILING_FORMAT_T         1
+#define VC4_TILING_FORMAT_LT        2
+/** @} */
+
+/** @{
+ *
+ * low bits of VC4_PACKET_STORE_FULL_RES_TILE_BUFFER and
+ * VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER.
+ */
+#define VC4_LOADSTORE_FULL_RES_EOF                     BIT(3)
+#define VC4_LOADSTORE_FULL_RES_DISABLE_CLEAR_ALL       BIT(2)
+#define VC4_LOADSTORE_FULL_RES_DISABLE_ZS              BIT(1)
+#define VC4_LOADSTORE_FULL_RES_DISABLE_COLOR           BIT(0)
+
+/** @{
+ *
+ * low bits of VC4_PACKET_STORE_FULL_RES_TILE_BUFFER and
+ * VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER.
+ */
+#define VC4_LOADSTORE_FULL_RES_EOF                     BIT(3)
+#define VC4_LOADSTORE_FULL_RES_DISABLE_CLEAR_ALL       BIT(2)
+#define VC4_LOADSTORE_FULL_RES_DISABLE_ZS              BIT(1)
+#define VC4_LOADSTORE_FULL_RES_DISABLE_COLOR           BIT(0)
+
+/** @{
+ *
+ * byte 2 of VC4_PACKET_STORE_TILE_BUFFER_GENERAL and
+ * VC4_PACKET_LOAD_TILE_BUFFER_GENERAL (low bits of the address)
+ */
+
+#define VC4_LOADSTORE_TILE_BUFFER_EOF                  BIT(3)
+#define VC4_LOADSTORE_TILE_BUFFER_DISABLE_FULL_VG_MASK BIT(2)
+#define VC4_LOADSTORE_TILE_BUFFER_DISABLE_FULL_ZS      BIT(1)
+#define VC4_LOADSTORE_TILE_BUFFER_DISABLE_FULL_COLOR   BIT(0)
+
+/** @} */
+
+/** @{
+ *
+ * byte 0-1 of VC4_PACKET_STORE_TILE_BUFFER_GENERAL and
+ * VC4_PACKET_LOAD_TILE_BUFFER_GENERAL
+ */
+#define VC4_STORE_TILE_BUFFER_DISABLE_VG_MASK_CLEAR BIT(15)
+#define VC4_STORE_TILE_BUFFER_DISABLE_ZS_CLEAR     BIT(14)
+#define VC4_STORE_TILE_BUFFER_DISABLE_COLOR_CLEAR  BIT(13)
+#define VC4_STORE_TILE_BUFFER_DISABLE_SWAP         BIT(12)
+
+#define VC4_LOADSTORE_TILE_BUFFER_FORMAT_MASK      VC4_MASK(9, 8)
+#define VC4_LOADSTORE_TILE_BUFFER_FORMAT_SHIFT     8
+#define VC4_LOADSTORE_TILE_BUFFER_RGBA8888         0
+#define VC4_LOADSTORE_TILE_BUFFER_BGR565_DITHER    1
+#define VC4_LOADSTORE_TILE_BUFFER_BGR565           2
+/** @} */
+
+/** @{
+ *
+ * byte 0 of VC4_PACKET_STORE_TILE_BUFFER_GENERAL and
+ * VC4_PACKET_LOAD_TILE_BUFFER_GENERAL
+ */
+#define VC4_STORE_TILE_BUFFER_MODE_MASK            VC4_MASK(7, 6)
+#define VC4_STORE_TILE_BUFFER_MODE_SHIFT           6
+#define VC4_STORE_TILE_BUFFER_MODE_SAMPLE0         (0 << 6)
+#define VC4_STORE_TILE_BUFFER_MODE_DECIMATE_X4     (1 << 6)
+#define VC4_STORE_TILE_BUFFER_MODE_DECIMATE_X16    (2 << 6)
+
+/** The values of the field are VC4_TILING_FORMAT_* */
+#define VC4_LOADSTORE_TILE_BUFFER_TILING_MASK      VC4_MASK(5, 4)
+#define VC4_LOADSTORE_TILE_BUFFER_TILING_SHIFT     4
+
+#define VC4_LOADSTORE_TILE_BUFFER_BUFFER_MASK      VC4_MASK(2, 0)
+#define VC4_LOADSTORE_TILE_BUFFER_BUFFER_SHIFT     0
+#define VC4_LOADSTORE_TILE_BUFFER_NONE             0
+#define VC4_LOADSTORE_TILE_BUFFER_COLOR            1
+#define VC4_LOADSTORE_TILE_BUFFER_ZS               2
+#define VC4_LOADSTORE_TILE_BUFFER_Z                3
+#define VC4_LOADSTORE_TILE_BUFFER_VG_MASK          4
+#define VC4_LOADSTORE_TILE_BUFFER_FULL             5
+/** @} */
+
+#define VC4_INDEX_BUFFER_U8                        (0 << 4)
+#define VC4_INDEX_BUFFER_U16                       (1 << 4)
+
+/* This flag is only present in NV shader state. */
+#define VC4_SHADER_FLAG_SHADED_CLIP_COORDS         BIT(3)
+#define VC4_SHADER_FLAG_ENABLE_CLIPPING            BIT(2)
+#define VC4_SHADER_FLAG_VS_POINT_SIZE              BIT(1)
+#define VC4_SHADER_FLAG_FS_SINGLE_THREAD           BIT(0)
+
+/** @{ byte 2 of config bits. */
+#define VC4_CONFIG_BITS_EARLY_Z_UPDATE             BIT(1)
+#define VC4_CONFIG_BITS_EARLY_Z                    BIT(0)
+/** @} */
+
+/** @{ byte 1 of config bits. */
+#define VC4_CONFIG_BITS_Z_UPDATE                   BIT(7)
+/** same values in this 3-bit field as PIPE_FUNC_* */
+#define VC4_CONFIG_BITS_DEPTH_FUNC_SHIFT           4
+#define VC4_CONFIG_BITS_COVERAGE_READ_LEAVE        BIT(3)
+
+#define VC4_CONFIG_BITS_COVERAGE_UPDATE_NONZERO    (0 << 1)
+#define VC4_CONFIG_BITS_COVERAGE_UPDATE_ODD        (1 << 1)
+#define VC4_CONFIG_BITS_COVERAGE_UPDATE_OR         (2 << 1)
+#define VC4_CONFIG_BITS_COVERAGE_UPDATE_ZERO       (3 << 1)
+
+#define VC4_CONFIG_BITS_COVERAGE_PIPE_SELECT       BIT(0)
+/** @} */
+
+/** @{ byte 0 of config bits. */
+#define VC4_CONFIG_BITS_RASTERIZER_OVERSAMPLE_NONE (0 << 6)
+#define VC4_CONFIG_BITS_RASTERIZER_OVERSAMPLE_4X   (1 << 6)
+#define VC4_CONFIG_BITS_RASTERIZER_OVERSAMPLE_16X  (2 << 6)
+
+#define VC4_CONFIG_BITS_AA_POINTS_AND_LINES        BIT(4)
+#define VC4_CONFIG_BITS_ENABLE_DEPTH_OFFSET        BIT(3)
+#define VC4_CONFIG_BITS_CW_PRIMITIVES              BIT(2)
+#define VC4_CONFIG_BITS_ENABLE_PRIM_BACK           BIT(1)
+#define VC4_CONFIG_BITS_ENABLE_PRIM_FRONT          BIT(0)
+/** @} */
+
+/** @{ bits in the last u8 of VC4_PACKET_TILE_BINNING_MODE_CONFIG */
+#define VC4_BIN_CONFIG_DB_NON_MS                   BIT(7)
+
+#define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_MASK       VC4_MASK(6, 5)
+#define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_SHIFT      5
+#define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_32         0
+#define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_64         1
+#define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_128        2
+#define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_256        3
+
+#define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_MASK  VC4_MASK(4, 3)
+#define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_SHIFT 3
+#define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_32    0
+#define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_64    1
+#define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_128   2
+#define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_256   3
+
+#define VC4_BIN_CONFIG_AUTO_INIT_TSDA              BIT(2)
+#define VC4_BIN_CONFIG_TILE_BUFFER_64BIT           BIT(1)
+#define VC4_BIN_CONFIG_MS_MODE_4X                  BIT(0)
+/** @} */
+
+/** @{ bits in the last u16 of VC4_PACKET_TILE_RENDERING_MODE_CONFIG */
+#define VC4_RENDER_CONFIG_DB_NON_MS                BIT(12)
+#define VC4_RENDER_CONFIG_EARLY_Z_COVERAGE_DISABLE BIT(11)
+#define VC4_RENDER_CONFIG_EARLY_Z_DIRECTION_G      BIT(10)
+#define VC4_RENDER_CONFIG_COVERAGE_MODE            BIT(9)
+#define VC4_RENDER_CONFIG_ENABLE_VG_MASK           BIT(8)
+
+/** The values of the field are VC4_TILING_FORMAT_* */
+#define VC4_RENDER_CONFIG_MEMORY_FORMAT_MASK       VC4_MASK(7, 6)
+#define VC4_RENDER_CONFIG_MEMORY_FORMAT_SHIFT      6
+
+#define VC4_RENDER_CONFIG_DECIMATE_MODE_1X         (0 << 4)
+#define VC4_RENDER_CONFIG_DECIMATE_MODE_4X         (1 << 4)
+#define VC4_RENDER_CONFIG_DECIMATE_MODE_16X        (2 << 4)
+
+#define VC4_RENDER_CONFIG_FORMAT_MASK              VC4_MASK(3, 2)
+#define VC4_RENDER_CONFIG_FORMAT_SHIFT             2
+#define VC4_RENDER_CONFIG_FORMAT_BGR565_DITHERED   0
+#define VC4_RENDER_CONFIG_FORMAT_RGBA8888          1
+#define VC4_RENDER_CONFIG_FORMAT_BGR565            2
+
+#define VC4_RENDER_CONFIG_TILE_BUFFER_64BIT        BIT(1)
+#define VC4_RENDER_CONFIG_MS_MODE_4X               BIT(0)
+
+#define VC4_PRIMITIVE_LIST_FORMAT_16_INDEX         (1 << 4)
+#define VC4_PRIMITIVE_LIST_FORMAT_32_XY            (3 << 4)
+#define VC4_PRIMITIVE_LIST_FORMAT_TYPE_POINTS      (0 << 0)
+#define VC4_PRIMITIVE_LIST_FORMAT_TYPE_LINES       (1 << 0)
+#define VC4_PRIMITIVE_LIST_FORMAT_TYPE_TRIANGLES   (2 << 0)
+#define VC4_PRIMITIVE_LIST_FORMAT_TYPE_RHT         (3 << 0)
+
+enum vc4_texture_data_type {
+	VC4_TEXTURE_TYPE_RGBA8888 = 0,
+	VC4_TEXTURE_TYPE_RGBX8888 = 1,
+	VC4_TEXTURE_TYPE_RGBA4444 = 2,
+	VC4_TEXTURE_TYPE_RGBA5551 = 3,
+	VC4_TEXTURE_TYPE_RGB565 = 4,
+	VC4_TEXTURE_TYPE_LUMINANCE = 5,
+	VC4_TEXTURE_TYPE_ALPHA = 6,
+	VC4_TEXTURE_TYPE_LUMALPHA = 7,
+	VC4_TEXTURE_TYPE_ETC1 = 8,
+	VC4_TEXTURE_TYPE_S16F = 9,
+	VC4_TEXTURE_TYPE_S8 = 10,
+	VC4_TEXTURE_TYPE_S16 = 11,
+	VC4_TEXTURE_TYPE_BW1 = 12,
+	VC4_TEXTURE_TYPE_A4 = 13,
+	VC4_TEXTURE_TYPE_A1 = 14,
+	VC4_TEXTURE_TYPE_RGBA64 = 15,
+	VC4_TEXTURE_TYPE_RGBA32R = 16,
+	VC4_TEXTURE_TYPE_YUV422R = 17,
+};
+
+#define VC4_TEX_P0_OFFSET_MASK                     VC4_MASK(31, 12)
+#define VC4_TEX_P0_OFFSET_SHIFT                    12
+#define VC4_TEX_P0_CSWIZ_MASK                      VC4_MASK(11, 10)
+#define VC4_TEX_P0_CSWIZ_SHIFT                     10
+#define VC4_TEX_P0_CMMODE_MASK                     VC4_MASK(9, 9)
+#define VC4_TEX_P0_CMMODE_SHIFT                    9
+#define VC4_TEX_P0_FLIPY_MASK                      VC4_MASK(8, 8)
+#define VC4_TEX_P0_FLIPY_SHIFT                     8
+#define VC4_TEX_P0_TYPE_MASK                       VC4_MASK(7, 4)
+#define VC4_TEX_P0_TYPE_SHIFT                      4
+#define VC4_TEX_P0_MIPLVLS_MASK                    VC4_MASK(3, 0)
+#define VC4_TEX_P0_MIPLVLS_SHIFT                   0
+
+#define VC4_TEX_P1_TYPE4_MASK                      VC4_MASK(31, 31)
+#define VC4_TEX_P1_TYPE4_SHIFT                     31
+#define VC4_TEX_P1_HEIGHT_MASK                     VC4_MASK(30, 20)
+#define VC4_TEX_P1_HEIGHT_SHIFT                    20
+#define VC4_TEX_P1_ETCFLIP_MASK                    VC4_MASK(19, 19)
+#define VC4_TEX_P1_ETCFLIP_SHIFT                   19
+#define VC4_TEX_P1_WIDTH_MASK                      VC4_MASK(18, 8)
+#define VC4_TEX_P1_WIDTH_SHIFT                     8
+
+#define VC4_TEX_P1_MAGFILT_MASK                    VC4_MASK(7, 7)
+#define VC4_TEX_P1_MAGFILT_SHIFT                   7
+# define VC4_TEX_P1_MAGFILT_LINEAR                 0
+# define VC4_TEX_P1_MAGFILT_NEAREST                1
+
+#define VC4_TEX_P1_MINFILT_MASK                    VC4_MASK(6, 4)
+#define VC4_TEX_P1_MINFILT_SHIFT                   4
+# define VC4_TEX_P1_MINFILT_LINEAR                 0
+# define VC4_TEX_P1_MINFILT_NEAREST                1
+# define VC4_TEX_P1_MINFILT_NEAR_MIP_NEAR          2
+# define VC4_TEX_P1_MINFILT_NEAR_MIP_LIN           3
+# define VC4_TEX_P1_MINFILT_LIN_MIP_NEAR           4
+# define VC4_TEX_P1_MINFILT_LIN_MIP_LIN            5
+
+#define VC4_TEX_P1_WRAP_T_MASK                     VC4_MASK(3, 2)
+#define VC4_TEX_P1_WRAP_T_SHIFT                    2
+#define VC4_TEX_P1_WRAP_S_MASK                     VC4_MASK(1, 0)
+#define VC4_TEX_P1_WRAP_S_SHIFT                    0
+# define VC4_TEX_P1_WRAP_REPEAT                    0
+# define VC4_TEX_P1_WRAP_CLAMP                     1
+# define VC4_TEX_P1_WRAP_MIRROR                    2
+# define VC4_TEX_P1_WRAP_BORDER                    3
+
+#define VC4_TEX_P2_PTYPE_MASK                      VC4_MASK(31, 30)
+#define VC4_TEX_P2_PTYPE_SHIFT                     30
+# define VC4_TEX_P2_PTYPE_IGNORED                  0
+# define VC4_TEX_P2_PTYPE_CUBE_MAP_STRIDE          1
+# define VC4_TEX_P2_PTYPE_CHILD_IMAGE_DIMENSIONS   2
+# define VC4_TEX_P2_PTYPE_CHILD_IMAGE_OFFSETS      3
+
+/* VC4_TEX_P2_PTYPE_CUBE_MAP_STRIDE bits */
+#define VC4_TEX_P2_CMST_MASK                       VC4_MASK(29, 12)
+#define VC4_TEX_P2_CMST_SHIFT                      12
+#define VC4_TEX_P2_BSLOD_MASK                      VC4_MASK(0, 0)
+#define VC4_TEX_P2_BSLOD_SHIFT                     0
+
+/* VC4_TEX_P2_PTYPE_CHILD_IMAGE_DIMENSIONS */
+#define VC4_TEX_P2_CHEIGHT_MASK                    VC4_MASK(22, 12)
+#define VC4_TEX_P2_CHEIGHT_SHIFT                   12
+#define VC4_TEX_P2_CWIDTH_MASK                     VC4_MASK(10, 0)
+#define VC4_TEX_P2_CWIDTH_SHIFT                    0
+
+/* VC4_TEX_P2_PTYPE_CHILD_IMAGE_OFFSETS */
+#define VC4_TEX_P2_CYOFF_MASK                      VC4_MASK(22, 12)
+#define VC4_TEX_P2_CYOFF_SHIFT                     12
+#define VC4_TEX_P2_CXOFF_MASK                      VC4_MASK(10, 0)
+#define VC4_TEX_P2_CXOFF_SHIFT                     0
+
+#endif /* VC4_PACKET_H */
diff --git a/drivers/gpu/drm/vc4/vc4_plane.c b/drivers/gpu/drm/vc4/vc4_plane.c
index 887f3caad0be..0addbad15832 100644
--- a/drivers/gpu/drm/vc4/vc4_plane.c
+++ b/drivers/gpu/drm/vc4/vc4_plane.c
@@ -29,6 +29,14 @@ struct vc4_plane_state {
 	u32 *dlist;
 	u32 dlist_size; /* Number of dwords in allocated for the display list */
 	u32 dlist_count; /* Number of used dwords in the display list. */
+
+	/* Offset in the dlist to pointer word 0. */
+	u32 pw0_offset;
+
+	/* Offset where the plane's dlist was last stored in the
+	   hardware at vc4_crtc_atomic_flush() time.
+	*/
+	u32 *hw_dlist;
 };
 
 static inline struct vc4_plane_state *
@@ -207,6 +215,8 @@ static int vc4_plane_mode_set(struct drm_plane *plane,
 	/* Position Word 3: Context.  Written by the HVS. */
 	vc4_dlist_write(vc4_state, 0xc0c0c0c0);
 
+	vc4_state->pw0_offset = vc4_state->dlist_count;
+
 	/* Pointer Word 0: RGB / Y Pointer */
 	vc4_dlist_write(vc4_state, bo->paddr + offset);
 
@@ -258,6 +268,8 @@ u32 vc4_plane_write_dlist(struct drm_plane *plane, u32 __iomem *dlist)
 	struct vc4_plane_state *vc4_state = to_vc4_plane_state(plane->state);
 	int i;
 
+	vc4_state->hw_dlist = dlist;
+
 	/* Can't memcpy_toio() because it needs to be 32-bit writes. */
 	for (i = 0; i < vc4_state->dlist_count; i++)
 		writel(vc4_state->dlist[i], &dlist[i]);
@@ -272,6 +284,34 @@ u32 vc4_plane_dlist_size(struct drm_plane_state *state)
 	return vc4_state->dlist_count;
 }
 
+/* Updates the plane to immediately (well, once the FIFO needs
+ * refilling) scan out from at a new framebuffer.
+ */
+void vc4_plane_async_set_fb(struct drm_plane *plane, struct drm_framebuffer *fb)
+{
+	struct vc4_plane_state *vc4_state = to_vc4_plane_state(plane->state);
+	struct drm_gem_cma_object *bo = drm_fb_cma_get_gem_obj(fb, 0);
+	uint32_t addr;
+
+	/* We're skipping the address adjustment for negative origin,
+	 * because this is only called on the primary plane.
+	 */
+	WARN_ON_ONCE(plane->state->crtc_x < 0 || plane->state->crtc_y < 0);
+	addr = bo->paddr + fb->offsets[0];
+
+	/* Write the new address into the hardware immediately.  The
+	 * scanout will start from this address as soon as the FIFO
+	 * needs to refill with pixels.
+	 */
+	writel(addr, &vc4_state->hw_dlist[vc4_state->pw0_offset]);
+
+	/* Also update the CPU-side dlist copy, so that any later
+	 * atomic updates that don't do a new modeset on our plane
+	 * also use our updated address.
+	 */
+	vc4_state->dlist[vc4_state->pw0_offset] = addr;
+}
+
 static const struct drm_plane_helper_funcs vc4_plane_helper_funcs = {
 	.prepare_fb = NULL,
 	.cleanup_fb = NULL,
@@ -317,7 +357,7 @@ struct drm_plane *vc4_plane_init(struct drm_device *dev,
 	ret = drm_universal_plane_init(dev, plane, 0xff,
 				       &vc4_plane_funcs,
 				       formats, ARRAY_SIZE(formats),
-				       type);
+				       type, NULL);
 
 	drm_plane_helper_add(plane, &vc4_plane_helper_funcs);
 
diff --git a/drivers/gpu/drm/vc4/vc4_qpu_defines.h b/drivers/gpu/drm/vc4/vc4_qpu_defines.h
new file mode 100644
index 000000000000..d5c2f3c85ebb
--- /dev/null
+++ b/drivers/gpu/drm/vc4/vc4_qpu_defines.h
@@ -0,0 +1,264 @@
+/*
+ * Copyright © 2014 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef VC4_QPU_DEFINES_H
+#define VC4_QPU_DEFINES_H
+
+enum qpu_op_add {
+	QPU_A_NOP,
+	QPU_A_FADD,
+	QPU_A_FSUB,
+	QPU_A_FMIN,
+	QPU_A_FMAX,
+	QPU_A_FMINABS,
+	QPU_A_FMAXABS,
+	QPU_A_FTOI,
+	QPU_A_ITOF,
+	QPU_A_ADD = 12,
+	QPU_A_SUB,
+	QPU_A_SHR,
+	QPU_A_ASR,
+	QPU_A_ROR,
+	QPU_A_SHL,
+	QPU_A_MIN,
+	QPU_A_MAX,
+	QPU_A_AND,
+	QPU_A_OR,
+	QPU_A_XOR,
+	QPU_A_NOT,
+	QPU_A_CLZ,
+	QPU_A_V8ADDS = 30,
+	QPU_A_V8SUBS = 31,
+};
+
+enum qpu_op_mul {
+	QPU_M_NOP,
+	QPU_M_FMUL,
+	QPU_M_MUL24,
+	QPU_M_V8MULD,
+	QPU_M_V8MIN,
+	QPU_M_V8MAX,
+	QPU_M_V8ADDS,
+	QPU_M_V8SUBS,
+};
+
+enum qpu_raddr {
+	QPU_R_FRAG_PAYLOAD_ZW = 15, /* W for A file, Z for B file */
+	/* 0-31 are the plain regfile a or b fields */
+	QPU_R_UNIF = 32,
+	QPU_R_VARY = 35,
+	QPU_R_ELEM_QPU = 38,
+	QPU_R_NOP,
+	QPU_R_XY_PIXEL_COORD = 41,
+	QPU_R_MS_REV_FLAGS = 41,
+	QPU_R_VPM = 48,
+	QPU_R_VPM_LD_BUSY,
+	QPU_R_VPM_LD_WAIT,
+	QPU_R_MUTEX_ACQUIRE,
+};
+
+enum qpu_waddr {
+	/* 0-31 are the plain regfile a or b fields */
+	QPU_W_ACC0 = 32, /* aka r0 */
+	QPU_W_ACC1,
+	QPU_W_ACC2,
+	QPU_W_ACC3,
+	QPU_W_TMU_NOSWAP,
+	QPU_W_ACC5,
+	QPU_W_HOST_INT,
+	QPU_W_NOP,
+	QPU_W_UNIFORMS_ADDRESS,
+	QPU_W_QUAD_XY, /* X for regfile a, Y for regfile b */
+	QPU_W_MS_FLAGS = 42,
+	QPU_W_REV_FLAG = 42,
+	QPU_W_TLB_STENCIL_SETUP = 43,
+	QPU_W_TLB_Z,
+	QPU_W_TLB_COLOR_MS,
+	QPU_W_TLB_COLOR_ALL,
+	QPU_W_TLB_ALPHA_MASK,
+	QPU_W_VPM,
+	QPU_W_VPMVCD_SETUP, /* LD for regfile a, ST for regfile b */
+	QPU_W_VPM_ADDR, /* LD for regfile a, ST for regfile b */
+	QPU_W_MUTEX_RELEASE,
+	QPU_W_SFU_RECIP,
+	QPU_W_SFU_RECIPSQRT,
+	QPU_W_SFU_EXP,
+	QPU_W_SFU_LOG,
+	QPU_W_TMU0_S,
+	QPU_W_TMU0_T,
+	QPU_W_TMU0_R,
+	QPU_W_TMU0_B,
+	QPU_W_TMU1_S,
+	QPU_W_TMU1_T,
+	QPU_W_TMU1_R,
+	QPU_W_TMU1_B,
+};
+
+enum qpu_sig_bits {
+	QPU_SIG_SW_BREAKPOINT,
+	QPU_SIG_NONE,
+	QPU_SIG_THREAD_SWITCH,
+	QPU_SIG_PROG_END,
+	QPU_SIG_WAIT_FOR_SCOREBOARD,
+	QPU_SIG_SCOREBOARD_UNLOCK,
+	QPU_SIG_LAST_THREAD_SWITCH,
+	QPU_SIG_COVERAGE_LOAD,
+	QPU_SIG_COLOR_LOAD,
+	QPU_SIG_COLOR_LOAD_END,
+	QPU_SIG_LOAD_TMU0,
+	QPU_SIG_LOAD_TMU1,
+	QPU_SIG_ALPHA_MASK_LOAD,
+	QPU_SIG_SMALL_IMM,
+	QPU_SIG_LOAD_IMM,
+	QPU_SIG_BRANCH
+};
+
+enum qpu_mux {
+	/* hardware mux values */
+	QPU_MUX_R0,
+	QPU_MUX_R1,
+	QPU_MUX_R2,
+	QPU_MUX_R3,
+	QPU_MUX_R4,
+	QPU_MUX_R5,
+	QPU_MUX_A,
+	QPU_MUX_B,
+
+	/* non-hardware mux values */
+	QPU_MUX_IMM,
+};
+
+enum qpu_cond {
+	QPU_COND_NEVER,
+	QPU_COND_ALWAYS,
+	QPU_COND_ZS,
+	QPU_COND_ZC,
+	QPU_COND_NS,
+	QPU_COND_NC,
+	QPU_COND_CS,
+	QPU_COND_CC,
+};
+
+enum qpu_pack_mul {
+	QPU_PACK_MUL_NOP,
+	/* replicated to each 8 bits of the 32-bit dst. */
+	QPU_PACK_MUL_8888 = 3,
+	QPU_PACK_MUL_8A,
+	QPU_PACK_MUL_8B,
+	QPU_PACK_MUL_8C,
+	QPU_PACK_MUL_8D,
+};
+
+enum qpu_pack_a {
+	QPU_PACK_A_NOP,
+	/* convert to 16 bit float if float input, or to int16. */
+	QPU_PACK_A_16A,
+	QPU_PACK_A_16B,
+	/* replicated to each 8 bits of the 32-bit dst. */
+	QPU_PACK_A_8888,
+	/* Convert to 8-bit unsigned int. */
+	QPU_PACK_A_8A,
+	QPU_PACK_A_8B,
+	QPU_PACK_A_8C,
+	QPU_PACK_A_8D,
+
+	/* Saturating variants of the previous instructions. */
+	QPU_PACK_A_32_SAT, /* int-only */
+	QPU_PACK_A_16A_SAT, /* int or float */
+	QPU_PACK_A_16B_SAT,
+	QPU_PACK_A_8888_SAT,
+	QPU_PACK_A_8A_SAT,
+	QPU_PACK_A_8B_SAT,
+	QPU_PACK_A_8C_SAT,
+	QPU_PACK_A_8D_SAT,
+};
+
+enum qpu_unpack_r4 {
+	QPU_UNPACK_R4_NOP,
+	QPU_UNPACK_R4_F16A_TO_F32,
+	QPU_UNPACK_R4_F16B_TO_F32,
+	QPU_UNPACK_R4_8D_REP,
+	QPU_UNPACK_R4_8A,
+	QPU_UNPACK_R4_8B,
+	QPU_UNPACK_R4_8C,
+	QPU_UNPACK_R4_8D,
+};
+
+#define QPU_MASK(high, low) \
+	((((uint64_t)1 << ((high) - (low) + 1)) - 1) << (low))
+
+#define QPU_GET_FIELD(word, field) \
+	((uint32_t)(((word)  & field ## _MASK) >> field ## _SHIFT))
+
+#define QPU_SIG_SHIFT                   60
+#define QPU_SIG_MASK                    QPU_MASK(63, 60)
+
+#define QPU_UNPACK_SHIFT                57
+#define QPU_UNPACK_MASK                 QPU_MASK(59, 57)
+
+/**
+ * If set, the pack field means PACK_MUL or R4 packing, instead of normal
+ * regfile a packing.
+ */
+#define QPU_PM                          ((uint64_t)1 << 56)
+
+#define QPU_PACK_SHIFT                  52
+#define QPU_PACK_MASK                   QPU_MASK(55, 52)
+
+#define QPU_COND_ADD_SHIFT              49
+#define QPU_COND_ADD_MASK               QPU_MASK(51, 49)
+#define QPU_COND_MUL_SHIFT              46
+#define QPU_COND_MUL_MASK               QPU_MASK(48, 46)
+
+#define QPU_SF                          ((uint64_t)1 << 45)
+
+#define QPU_WADDR_ADD_SHIFT             38
+#define QPU_WADDR_ADD_MASK              QPU_MASK(43, 38)
+#define QPU_WADDR_MUL_SHIFT             32
+#define QPU_WADDR_MUL_MASK              QPU_MASK(37, 32)
+
+#define QPU_OP_MUL_SHIFT                29
+#define QPU_OP_MUL_MASK                 QPU_MASK(31, 29)
+
+#define QPU_RADDR_A_SHIFT               18
+#define QPU_RADDR_A_MASK                QPU_MASK(23, 18)
+#define QPU_RADDR_B_SHIFT               12
+#define QPU_RADDR_B_MASK                QPU_MASK(17, 12)
+#define QPU_SMALL_IMM_SHIFT             12
+#define QPU_SMALL_IMM_MASK              QPU_MASK(17, 12)
+
+#define QPU_ADD_A_SHIFT                 9
+#define QPU_ADD_A_MASK                  QPU_MASK(11, 9)
+#define QPU_ADD_B_SHIFT                 6
+#define QPU_ADD_B_MASK                  QPU_MASK(8, 6)
+#define QPU_MUL_A_SHIFT                 3
+#define QPU_MUL_A_MASK                  QPU_MASK(5, 3)
+#define QPU_MUL_B_SHIFT                 0
+#define QPU_MUL_B_MASK                  QPU_MASK(2, 0)
+
+#define QPU_WS                          ((uint64_t)1 << 44)
+
+#define QPU_OP_ADD_SHIFT                24
+#define QPU_OP_ADD_MASK                 QPU_MASK(28, 24)
+
+#endif /* VC4_QPU_DEFINES_H */
diff --git a/drivers/gpu/drm/vc4/vc4_regs.h b/drivers/gpu/drm/vc4/vc4_regs.h
index 9e4e904c668e..4e52a0a88551 100644
--- a/drivers/gpu/drm/vc4/vc4_regs.h
+++ b/drivers/gpu/drm/vc4/vc4_regs.h
@@ -154,7 +154,7 @@
 #define V3D_PCTRS14  0x006f4
 #define V3D_PCTR15   0x006f8
 #define V3D_PCTRS15  0x006fc
-#define V3D_BGE      0x00f00
+#define V3D_DBGE     0x00f00
 #define V3D_FDBGO    0x00f04
 #define V3D_FDBGB    0x00f08
 #define V3D_FDBGR    0x00f0c
diff --git a/drivers/gpu/drm/vc4/vc4_render_cl.c b/drivers/gpu/drm/vc4/vc4_render_cl.c
new file mode 100644
index 000000000000..8a2a312e2c1b
--- /dev/null
+++ b/drivers/gpu/drm/vc4/vc4_render_cl.c
@@ -0,0 +1,634 @@
+/*
+ * Copyright © 2014-2015 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/**
+ * DOC: Render command list generation
+ *
+ * In the VC4 driver, render command list generation is performed by the
+ * kernel instead of userspace.  We do this because validating a
+ * user-submitted command list is hard to get right and has high CPU overhead,
+ * while the number of valid configurations for render command lists is
+ * actually fairly low.
+ */
+
+#include "uapi/drm/vc4_drm.h"
+#include "vc4_drv.h"
+#include "vc4_packet.h"
+
+struct vc4_rcl_setup {
+	struct drm_gem_cma_object *color_read;
+	struct drm_gem_cma_object *color_write;
+	struct drm_gem_cma_object *zs_read;
+	struct drm_gem_cma_object *zs_write;
+	struct drm_gem_cma_object *msaa_color_write;
+	struct drm_gem_cma_object *msaa_zs_write;
+
+	struct drm_gem_cma_object *rcl;
+	u32 next_offset;
+};
+
+static inline void rcl_u8(struct vc4_rcl_setup *setup, u8 val)
+{
+	*(u8 *)(setup->rcl->vaddr + setup->next_offset) = val;
+	setup->next_offset += 1;
+}
+
+static inline void rcl_u16(struct vc4_rcl_setup *setup, u16 val)
+{
+	*(u16 *)(setup->rcl->vaddr + setup->next_offset) = val;
+	setup->next_offset += 2;
+}
+
+static inline void rcl_u32(struct vc4_rcl_setup *setup, u32 val)
+{
+	*(u32 *)(setup->rcl->vaddr + setup->next_offset) = val;
+	setup->next_offset += 4;
+}
+
+/*
+ * Emits a no-op STORE_TILE_BUFFER_GENERAL.
+ *
+ * If we emit a PACKET_TILE_COORDINATES, it must be followed by a store of
+ * some sort before another load is triggered.
+ */
+static void vc4_store_before_load(struct vc4_rcl_setup *setup)
+{
+	rcl_u8(setup, VC4_PACKET_STORE_TILE_BUFFER_GENERAL);
+	rcl_u16(setup,
+		VC4_SET_FIELD(VC4_LOADSTORE_TILE_BUFFER_NONE,
+			      VC4_LOADSTORE_TILE_BUFFER_BUFFER) |
+		VC4_STORE_TILE_BUFFER_DISABLE_COLOR_CLEAR |
+		VC4_STORE_TILE_BUFFER_DISABLE_ZS_CLEAR |
+		VC4_STORE_TILE_BUFFER_DISABLE_VG_MASK_CLEAR);
+	rcl_u32(setup, 0); /* no address, since we're in None mode */
+}
+
+/*
+ * Calculates the physical address of the start of a tile in a RCL surface.
+ *
+ * Unlike the other load/store packets,
+ * VC4_PACKET_LOAD/STORE_FULL_RES_TILE_BUFFER don't look at the tile
+ * coordinates packet, and instead just store to the address given.
+ */
+static uint32_t vc4_full_res_offset(struct vc4_exec_info *exec,
+				    struct drm_gem_cma_object *bo,
+				    struct drm_vc4_submit_rcl_surface *surf,
+				    uint8_t x, uint8_t y)
+{
+	return bo->paddr + surf->offset + VC4_TILE_BUFFER_SIZE *
+		(DIV_ROUND_UP(exec->args->width, 32) * y + x);
+}
+
+/*
+ * Emits a PACKET_TILE_COORDINATES if one isn't already pending.
+ *
+ * The tile coordinates packet triggers a pending load if there is one, are
+ * used for clipping during rendering, and determine where loads/stores happen
+ * relative to their base address.
+ */
+static void vc4_tile_coordinates(struct vc4_rcl_setup *setup,
+				 uint32_t x, uint32_t y)
+{
+	rcl_u8(setup, VC4_PACKET_TILE_COORDINATES);
+	rcl_u8(setup, x);
+	rcl_u8(setup, y);
+}
+
+static void emit_tile(struct vc4_exec_info *exec,
+		      struct vc4_rcl_setup *setup,
+		      uint8_t x, uint8_t y, bool first, bool last)
+{
+	struct drm_vc4_submit_cl *args = exec->args;
+	bool has_bin = args->bin_cl_size != 0;
+
+	/* Note that the load doesn't actually occur until the
+	 * tile coords packet is processed, and only one load
+	 * may be outstanding at a time.
+	 */
+	if (setup->color_read) {
+		if (args->color_read.flags &
+		    VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES) {
+			rcl_u8(setup, VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER);
+			rcl_u32(setup,
+				vc4_full_res_offset(exec, setup->color_read,
+						    &args->color_read, x, y) |
+				VC4_LOADSTORE_FULL_RES_DISABLE_ZS);
+		} else {
+			rcl_u8(setup, VC4_PACKET_LOAD_TILE_BUFFER_GENERAL);
+			rcl_u16(setup, args->color_read.bits);
+			rcl_u32(setup, setup->color_read->paddr +
+				args->color_read.offset);
+		}
+	}
+
+	if (setup->zs_read) {
+		if (args->zs_read.flags &
+		    VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES) {
+			rcl_u8(setup, VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER);
+			rcl_u32(setup,
+				vc4_full_res_offset(exec, setup->zs_read,
+						    &args->zs_read, x, y) |
+				VC4_LOADSTORE_FULL_RES_DISABLE_COLOR);
+		} else {
+			if (setup->color_read) {
+				/* Exec previous load. */
+				vc4_tile_coordinates(setup, x, y);
+				vc4_store_before_load(setup);
+			}
+
+			rcl_u8(setup, VC4_PACKET_LOAD_TILE_BUFFER_GENERAL);
+			rcl_u16(setup, args->zs_read.bits);
+			rcl_u32(setup, setup->zs_read->paddr +
+				args->zs_read.offset);
+		}
+	}
+
+	/* Clipping depends on tile coordinates having been
+	 * emitted, so we always need one here.
+	 */
+	vc4_tile_coordinates(setup, x, y);
+
+	/* Wait for the binner before jumping to the first
+	 * tile's lists.
+	 */
+	if (first && has_bin)
+		rcl_u8(setup, VC4_PACKET_WAIT_ON_SEMAPHORE);
+
+	if (has_bin) {
+		rcl_u8(setup, VC4_PACKET_BRANCH_TO_SUB_LIST);
+		rcl_u32(setup, (exec->tile_bo->paddr +
+				exec->tile_alloc_offset +
+				(y * exec->bin_tiles_x + x) * 32));
+	}
+
+	if (setup->msaa_color_write) {
+		bool last_tile_write = (!setup->msaa_zs_write &&
+					!setup->zs_write &&
+					!setup->color_write);
+		uint32_t bits = VC4_LOADSTORE_FULL_RES_DISABLE_ZS;
+
+		if (!last_tile_write)
+			bits |= VC4_LOADSTORE_FULL_RES_DISABLE_CLEAR_ALL;
+		else if (last)
+			bits |= VC4_LOADSTORE_FULL_RES_EOF;
+		rcl_u8(setup, VC4_PACKET_STORE_FULL_RES_TILE_BUFFER);
+		rcl_u32(setup,
+			vc4_full_res_offset(exec, setup->msaa_color_write,
+					    &args->msaa_color_write, x, y) |
+			bits);
+	}
+
+	if (setup->msaa_zs_write) {
+		bool last_tile_write = (!setup->zs_write &&
+					!setup->color_write);
+		uint32_t bits = VC4_LOADSTORE_FULL_RES_DISABLE_COLOR;
+
+		if (setup->msaa_color_write)
+			vc4_tile_coordinates(setup, x, y);
+		if (!last_tile_write)
+			bits |= VC4_LOADSTORE_FULL_RES_DISABLE_CLEAR_ALL;
+		else if (last)
+			bits |= VC4_LOADSTORE_FULL_RES_EOF;
+		rcl_u8(setup, VC4_PACKET_STORE_FULL_RES_TILE_BUFFER);
+		rcl_u32(setup,
+			vc4_full_res_offset(exec, setup->msaa_zs_write,
+					    &args->msaa_zs_write, x, y) |
+			bits);
+	}
+
+	if (setup->zs_write) {
+		bool last_tile_write = !setup->color_write;
+
+		if (setup->msaa_color_write || setup->msaa_zs_write)
+			vc4_tile_coordinates(setup, x, y);
+
+		rcl_u8(setup, VC4_PACKET_STORE_TILE_BUFFER_GENERAL);
+		rcl_u16(setup, args->zs_write.bits |
+			(last_tile_write ?
+			 0 : VC4_STORE_TILE_BUFFER_DISABLE_COLOR_CLEAR));
+		rcl_u32(setup,
+			(setup->zs_write->paddr + args->zs_write.offset) |
+			((last && last_tile_write) ?
+			 VC4_LOADSTORE_TILE_BUFFER_EOF : 0));
+	}
+
+	if (setup->color_write) {
+		if (setup->msaa_color_write || setup->msaa_zs_write ||
+		    setup->zs_write) {
+			vc4_tile_coordinates(setup, x, y);
+		}
+
+		if (last)
+			rcl_u8(setup, VC4_PACKET_STORE_MS_TILE_BUFFER_AND_EOF);
+		else
+			rcl_u8(setup, VC4_PACKET_STORE_MS_TILE_BUFFER);
+	}
+}
+
+static int vc4_create_rcl_bo(struct drm_device *dev, struct vc4_exec_info *exec,
+			     struct vc4_rcl_setup *setup)
+{
+	struct drm_vc4_submit_cl *args = exec->args;
+	bool has_bin = args->bin_cl_size != 0;
+	uint8_t min_x_tile = args->min_x_tile;
+	uint8_t min_y_tile = args->min_y_tile;
+	uint8_t max_x_tile = args->max_x_tile;
+	uint8_t max_y_tile = args->max_y_tile;
+	uint8_t xtiles = max_x_tile - min_x_tile + 1;
+	uint8_t ytiles = max_y_tile - min_y_tile + 1;
+	uint8_t x, y;
+	uint32_t size, loop_body_size;
+
+	size = VC4_PACKET_TILE_RENDERING_MODE_CONFIG_SIZE;
+	loop_body_size = VC4_PACKET_TILE_COORDINATES_SIZE;
+
+	if (args->flags & VC4_SUBMIT_CL_USE_CLEAR_COLOR) {
+		size += VC4_PACKET_CLEAR_COLORS_SIZE +
+			VC4_PACKET_TILE_COORDINATES_SIZE +
+			VC4_PACKET_STORE_TILE_BUFFER_GENERAL_SIZE;
+	}
+
+	if (setup->color_read) {
+		if (args->color_read.flags &
+		    VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES) {
+			loop_body_size += VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER_SIZE;
+		} else {
+			loop_body_size += VC4_PACKET_LOAD_TILE_BUFFER_GENERAL_SIZE;
+		}
+	}
+	if (setup->zs_read) {
+		if (args->zs_read.flags &
+		    VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES) {
+			loop_body_size += VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER_SIZE;
+		} else {
+			if (setup->color_read &&
+			    !(args->color_read.flags &
+			      VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES)) {
+				loop_body_size += VC4_PACKET_TILE_COORDINATES_SIZE;
+				loop_body_size += VC4_PACKET_STORE_TILE_BUFFER_GENERAL_SIZE;
+			}
+			loop_body_size += VC4_PACKET_LOAD_TILE_BUFFER_GENERAL_SIZE;
+		}
+	}
+
+	if (has_bin) {
+		size += VC4_PACKET_WAIT_ON_SEMAPHORE_SIZE;
+		loop_body_size += VC4_PACKET_BRANCH_TO_SUB_LIST_SIZE;
+	}
+
+	if (setup->msaa_color_write)
+		loop_body_size += VC4_PACKET_STORE_FULL_RES_TILE_BUFFER_SIZE;
+	if (setup->msaa_zs_write)
+		loop_body_size += VC4_PACKET_STORE_FULL_RES_TILE_BUFFER_SIZE;
+
+	if (setup->zs_write)
+		loop_body_size += VC4_PACKET_STORE_TILE_BUFFER_GENERAL_SIZE;
+	if (setup->color_write)
+		loop_body_size += VC4_PACKET_STORE_MS_TILE_BUFFER_SIZE;
+
+	/* We need a VC4_PACKET_TILE_COORDINATES in between each store. */
+	loop_body_size += VC4_PACKET_TILE_COORDINATES_SIZE *
+		((setup->msaa_color_write != NULL) +
+		 (setup->msaa_zs_write != NULL) +
+		 (setup->color_write != NULL) +
+		 (setup->zs_write != NULL) - 1);
+
+	size += xtiles * ytiles * loop_body_size;
+
+	setup->rcl = &vc4_bo_create(dev, size, true)->base;
+	if (!setup->rcl)
+		return -ENOMEM;
+	list_add_tail(&to_vc4_bo(&setup->rcl->base)->unref_head,
+		      &exec->unref_list);
+
+	rcl_u8(setup, VC4_PACKET_TILE_RENDERING_MODE_CONFIG);
+	rcl_u32(setup,
+		(setup->color_write ? (setup->color_write->paddr +
+				       args->color_write.offset) :
+		 0));
+	rcl_u16(setup, args->width);
+	rcl_u16(setup, args->height);
+	rcl_u16(setup, args->color_write.bits);
+
+	/* The tile buffer gets cleared when the previous tile is stored.  If
+	 * the clear values changed between frames, then the tile buffer has
+	 * stale clear values in it, so we have to do a store in None mode (no
+	 * writes) so that we trigger the tile buffer clear.
+	 */
+	if (args->flags & VC4_SUBMIT_CL_USE_CLEAR_COLOR) {
+		rcl_u8(setup, VC4_PACKET_CLEAR_COLORS);
+		rcl_u32(setup, args->clear_color[0]);
+		rcl_u32(setup, args->clear_color[1]);
+		rcl_u32(setup, args->clear_z);
+		rcl_u8(setup, args->clear_s);
+
+		vc4_tile_coordinates(setup, 0, 0);
+
+		rcl_u8(setup, VC4_PACKET_STORE_TILE_BUFFER_GENERAL);
+		rcl_u16(setup, VC4_LOADSTORE_TILE_BUFFER_NONE);
+		rcl_u32(setup, 0); /* no address, since we're in None mode */
+	}
+
+	for (y = min_y_tile; y <= max_y_tile; y++) {
+		for (x = min_x_tile; x <= max_x_tile; x++) {
+			bool first = (x == min_x_tile && y == min_y_tile);
+			bool last = (x == max_x_tile && y == max_y_tile);
+
+			emit_tile(exec, setup, x, y, first, last);
+		}
+	}
+
+	BUG_ON(setup->next_offset != size);
+	exec->ct1ca = setup->rcl->paddr;
+	exec->ct1ea = setup->rcl->paddr + setup->next_offset;
+
+	return 0;
+}
+
+static int vc4_full_res_bounds_check(struct vc4_exec_info *exec,
+				     struct drm_gem_cma_object *obj,
+				     struct drm_vc4_submit_rcl_surface *surf)
+{
+	struct drm_vc4_submit_cl *args = exec->args;
+	u32 render_tiles_stride = DIV_ROUND_UP(exec->args->width, 32);
+
+	if (surf->offset > obj->base.size) {
+		DRM_ERROR("surface offset %d > BO size %zd\n",
+			  surf->offset, obj->base.size);
+		return -EINVAL;
+	}
+
+	if ((obj->base.size - surf->offset) / VC4_TILE_BUFFER_SIZE <
+	    render_tiles_stride * args->max_y_tile + args->max_x_tile) {
+		DRM_ERROR("MSAA tile %d, %d out of bounds "
+			  "(bo size %zd, offset %d).\n",
+			  args->max_x_tile, args->max_y_tile,
+			  obj->base.size,
+			  surf->offset);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int vc4_rcl_msaa_surface_setup(struct vc4_exec_info *exec,
+				      struct drm_gem_cma_object **obj,
+				      struct drm_vc4_submit_rcl_surface *surf)
+{
+	if (surf->flags != 0 || surf->bits != 0) {
+		DRM_ERROR("MSAA surface had nonzero flags/bits\n");
+		return -EINVAL;
+	}
+
+	if (surf->hindex == ~0)
+		return 0;
+
+	*obj = vc4_use_bo(exec, surf->hindex);
+	if (!*obj)
+		return -EINVAL;
+
+	if (surf->offset & 0xf) {
+		DRM_ERROR("MSAA write must be 16b aligned.\n");
+		return -EINVAL;
+	}
+
+	return vc4_full_res_bounds_check(exec, *obj, surf);
+}
+
+static int vc4_rcl_surface_setup(struct vc4_exec_info *exec,
+				 struct drm_gem_cma_object **obj,
+				 struct drm_vc4_submit_rcl_surface *surf)
+{
+	uint8_t tiling = VC4_GET_FIELD(surf->bits,
+				       VC4_LOADSTORE_TILE_BUFFER_TILING);
+	uint8_t buffer = VC4_GET_FIELD(surf->bits,
+				       VC4_LOADSTORE_TILE_BUFFER_BUFFER);
+	uint8_t format = VC4_GET_FIELD(surf->bits,
+				       VC4_LOADSTORE_TILE_BUFFER_FORMAT);
+	int cpp;
+	int ret;
+
+	if (surf->flags & ~VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES) {
+		DRM_ERROR("Extra flags set\n");
+		return -EINVAL;
+	}
+
+	if (surf->hindex == ~0)
+		return 0;
+
+	*obj = vc4_use_bo(exec, surf->hindex);
+	if (!*obj)
+		return -EINVAL;
+
+	if (surf->flags & VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES) {
+		if (surf == &exec->args->zs_write) {
+			DRM_ERROR("general zs write may not be a full-res.\n");
+			return -EINVAL;
+		}
+
+		if (surf->bits != 0) {
+			DRM_ERROR("load/store general bits set with "
+				  "full res load/store.\n");
+			return -EINVAL;
+		}
+
+		ret = vc4_full_res_bounds_check(exec, *obj, surf);
+		if (!ret)
+			return ret;
+
+		return 0;
+	}
+
+	if (surf->bits & ~(VC4_LOADSTORE_TILE_BUFFER_TILING_MASK |
+			   VC4_LOADSTORE_TILE_BUFFER_BUFFER_MASK |
+			   VC4_LOADSTORE_TILE_BUFFER_FORMAT_MASK)) {
+		DRM_ERROR("Unknown bits in load/store: 0x%04x\n",
+			  surf->bits);
+		return -EINVAL;
+	}
+
+	if (tiling > VC4_TILING_FORMAT_LT) {
+		DRM_ERROR("Bad tiling format\n");
+		return -EINVAL;
+	}
+
+	if (buffer == VC4_LOADSTORE_TILE_BUFFER_ZS) {
+		if (format != 0) {
+			DRM_ERROR("No color format should be set for ZS\n");
+			return -EINVAL;
+		}
+		cpp = 4;
+	} else if (buffer == VC4_LOADSTORE_TILE_BUFFER_COLOR) {
+		switch (format) {
+		case VC4_LOADSTORE_TILE_BUFFER_BGR565:
+		case VC4_LOADSTORE_TILE_BUFFER_BGR565_DITHER:
+			cpp = 2;
+			break;
+		case VC4_LOADSTORE_TILE_BUFFER_RGBA8888:
+			cpp = 4;
+			break;
+		default:
+			DRM_ERROR("Bad tile buffer format\n");
+			return -EINVAL;
+		}
+	} else {
+		DRM_ERROR("Bad load/store buffer %d.\n", buffer);
+		return -EINVAL;
+	}
+
+	if (surf->offset & 0xf) {
+		DRM_ERROR("load/store buffer must be 16b aligned.\n");
+		return -EINVAL;
+	}
+
+	if (!vc4_check_tex_size(exec, *obj, surf->offset, tiling,
+				exec->args->width, exec->args->height, cpp)) {
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int
+vc4_rcl_render_config_surface_setup(struct vc4_exec_info *exec,
+				    struct vc4_rcl_setup *setup,
+				    struct drm_gem_cma_object **obj,
+				    struct drm_vc4_submit_rcl_surface *surf)
+{
+	uint8_t tiling = VC4_GET_FIELD(surf->bits,
+				       VC4_RENDER_CONFIG_MEMORY_FORMAT);
+	uint8_t format = VC4_GET_FIELD(surf->bits,
+				       VC4_RENDER_CONFIG_FORMAT);
+	int cpp;
+
+	if (surf->flags != 0) {
+		DRM_ERROR("No flags supported on render config.\n");
+		return -EINVAL;
+	}
+
+	if (surf->bits & ~(VC4_RENDER_CONFIG_MEMORY_FORMAT_MASK |
+			   VC4_RENDER_CONFIG_FORMAT_MASK |
+			   VC4_RENDER_CONFIG_MS_MODE_4X |
+			   VC4_RENDER_CONFIG_DECIMATE_MODE_4X)) {
+		DRM_ERROR("Unknown bits in render config: 0x%04x\n",
+			  surf->bits);
+		return -EINVAL;
+	}
+
+	if (surf->hindex == ~0)
+		return 0;
+
+	*obj = vc4_use_bo(exec, surf->hindex);
+	if (!*obj)
+		return -EINVAL;
+
+	if (tiling > VC4_TILING_FORMAT_LT) {
+		DRM_ERROR("Bad tiling format\n");
+		return -EINVAL;
+	}
+
+	switch (format) {
+	case VC4_RENDER_CONFIG_FORMAT_BGR565_DITHERED:
+	case VC4_RENDER_CONFIG_FORMAT_BGR565:
+		cpp = 2;
+		break;
+	case VC4_RENDER_CONFIG_FORMAT_RGBA8888:
+		cpp = 4;
+		break;
+	default:
+		DRM_ERROR("Bad tile buffer format\n");
+		return -EINVAL;
+	}
+
+	if (!vc4_check_tex_size(exec, *obj, surf->offset, tiling,
+				exec->args->width, exec->args->height, cpp)) {
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+int vc4_get_rcl(struct drm_device *dev, struct vc4_exec_info *exec)
+{
+	struct vc4_rcl_setup setup = {0};
+	struct drm_vc4_submit_cl *args = exec->args;
+	bool has_bin = args->bin_cl_size != 0;
+	int ret;
+
+	if (args->min_x_tile > args->max_x_tile ||
+	    args->min_y_tile > args->max_y_tile) {
+		DRM_ERROR("Bad render tile set (%d,%d)-(%d,%d)\n",
+			  args->min_x_tile, args->min_y_tile,
+			  args->max_x_tile, args->max_y_tile);
+		return -EINVAL;
+	}
+
+	if (has_bin &&
+	    (args->max_x_tile > exec->bin_tiles_x ||
+	     args->max_y_tile > exec->bin_tiles_y)) {
+		DRM_ERROR("Render tiles (%d,%d) outside of bin config "
+			  "(%d,%d)\n",
+			  args->max_x_tile, args->max_y_tile,
+			  exec->bin_tiles_x, exec->bin_tiles_y);
+		return -EINVAL;
+	}
+
+	ret = vc4_rcl_render_config_surface_setup(exec, &setup,
+						  &setup.color_write,
+						  &args->color_write);
+	if (ret)
+		return ret;
+
+	ret = vc4_rcl_surface_setup(exec, &setup.color_read, &args->color_read);
+	if (ret)
+		return ret;
+
+	ret = vc4_rcl_surface_setup(exec, &setup.zs_read, &args->zs_read);
+	if (ret)
+		return ret;
+
+	ret = vc4_rcl_surface_setup(exec, &setup.zs_write, &args->zs_write);
+	if (ret)
+		return ret;
+
+	ret = vc4_rcl_msaa_surface_setup(exec, &setup.msaa_color_write,
+					 &args->msaa_color_write);
+	if (ret)
+		return ret;
+
+	ret = vc4_rcl_msaa_surface_setup(exec, &setup.msaa_zs_write,
+					 &args->msaa_zs_write);
+	if (ret)
+		return ret;
+
+	/* We shouldn't even have the job submitted to us if there's no
+	 * surface to write out.
+	 */
+	if (!setup.color_write && !setup.zs_write &&
+	    !setup.msaa_color_write && !setup.msaa_zs_write) {
+		DRM_ERROR("RCL requires color or Z/S write\n");
+		return -EINVAL;
+	}
+
+	return vc4_create_rcl_bo(dev, exec, &setup);
+}
diff --git a/drivers/gpu/drm/vc4/vc4_trace.h b/drivers/gpu/drm/vc4/vc4_trace.h
new file mode 100644
index 000000000000..ad7b1ea720c2
--- /dev/null
+++ b/drivers/gpu/drm/vc4/vc4_trace.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright (C) 2015 Broadcom
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#if !defined(_VC4_TRACE_H_) || defined(TRACE_HEADER_MULTI_READ)
+#define _VC4_TRACE_H_
+
+#include <linux/stringify.h>
+#include <linux/types.h>
+#include <linux/tracepoint.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM vc4
+#define TRACE_INCLUDE_FILE vc4_trace
+
+TRACE_EVENT(vc4_wait_for_seqno_begin,
+	    TP_PROTO(struct drm_device *dev, uint64_t seqno, uint64_t timeout),
+	    TP_ARGS(dev, seqno, timeout),
+
+	    TP_STRUCT__entry(
+			     __field(u32, dev)
+			     __field(u64, seqno)
+			     __field(u64, timeout)
+			     ),
+
+	    TP_fast_assign(
+			   __entry->dev = dev->primary->index;
+			   __entry->seqno = seqno;
+			   __entry->timeout = timeout;
+			   ),
+
+	    TP_printk("dev=%u, seqno=%llu, timeout=%llu",
+		      __entry->dev, __entry->seqno, __entry->timeout)
+);
+
+TRACE_EVENT(vc4_wait_for_seqno_end,
+	    TP_PROTO(struct drm_device *dev, uint64_t seqno),
+	    TP_ARGS(dev, seqno),
+
+	    TP_STRUCT__entry(
+			     __field(u32, dev)
+			     __field(u64, seqno)
+			     ),
+
+	    TP_fast_assign(
+			   __entry->dev = dev->primary->index;
+			   __entry->seqno = seqno;
+			   ),
+
+	    TP_printk("dev=%u, seqno=%llu",
+		      __entry->dev, __entry->seqno)
+);
+
+#endif /* _VC4_TRACE_H_ */
+
+/* This part must be outside protection */
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#include <trace/define_trace.h>
diff --git a/drivers/gpu/drm/vc4/vc4_trace_points.c b/drivers/gpu/drm/vc4/vc4_trace_points.c
new file mode 100644
index 000000000000..e6278f25716b
--- /dev/null
+++ b/drivers/gpu/drm/vc4/vc4_trace_points.c
@@ -0,0 +1,14 @@
+/*
+ * Copyright (C) 2015 Broadcom
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include "vc4_drv.h"
+
+#ifndef __CHECKER__
+#define CREATE_TRACE_POINTS
+#include "vc4_trace.h"
+#endif
diff --git a/drivers/gpu/drm/vc4/vc4_v3d.c b/drivers/gpu/drm/vc4/vc4_v3d.c
new file mode 100644
index 000000000000..424d515ffcda
--- /dev/null
+++ b/drivers/gpu/drm/vc4/vc4_v3d.c
@@ -0,0 +1,262 @@
+/*
+ * Copyright (c) 2014 The Linux Foundation. All rights reserved.
+ * Copyright (C) 2013 Red Hat
+ * Author: Rob Clark <robdclark@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "linux/component.h"
+#include "vc4_drv.h"
+#include "vc4_regs.h"
+
+#ifdef CONFIG_DEBUG_FS
+#define REGDEF(reg) { reg, #reg }
+static const struct {
+	uint32_t reg;
+	const char *name;
+} vc4_reg_defs[] = {
+	REGDEF(V3D_IDENT0),
+	REGDEF(V3D_IDENT1),
+	REGDEF(V3D_IDENT2),
+	REGDEF(V3D_SCRATCH),
+	REGDEF(V3D_L2CACTL),
+	REGDEF(V3D_SLCACTL),
+	REGDEF(V3D_INTCTL),
+	REGDEF(V3D_INTENA),
+	REGDEF(V3D_INTDIS),
+	REGDEF(V3D_CT0CS),
+	REGDEF(V3D_CT1CS),
+	REGDEF(V3D_CT0EA),
+	REGDEF(V3D_CT1EA),
+	REGDEF(V3D_CT0CA),
+	REGDEF(V3D_CT1CA),
+	REGDEF(V3D_CT00RA0),
+	REGDEF(V3D_CT01RA0),
+	REGDEF(V3D_CT0LC),
+	REGDEF(V3D_CT1LC),
+	REGDEF(V3D_CT0PC),
+	REGDEF(V3D_CT1PC),
+	REGDEF(V3D_PCS),
+	REGDEF(V3D_BFC),
+	REGDEF(V3D_RFC),
+	REGDEF(V3D_BPCA),
+	REGDEF(V3D_BPCS),
+	REGDEF(V3D_BPOA),
+	REGDEF(V3D_BPOS),
+	REGDEF(V3D_BXCF),
+	REGDEF(V3D_SQRSV0),
+	REGDEF(V3D_SQRSV1),
+	REGDEF(V3D_SQCNTL),
+	REGDEF(V3D_SRQPC),
+	REGDEF(V3D_SRQUA),
+	REGDEF(V3D_SRQUL),
+	REGDEF(V3D_SRQCS),
+	REGDEF(V3D_VPACNTL),
+	REGDEF(V3D_VPMBASE),
+	REGDEF(V3D_PCTRC),
+	REGDEF(V3D_PCTRE),
+	REGDEF(V3D_PCTR0),
+	REGDEF(V3D_PCTRS0),
+	REGDEF(V3D_PCTR1),
+	REGDEF(V3D_PCTRS1),
+	REGDEF(V3D_PCTR2),
+	REGDEF(V3D_PCTRS2),
+	REGDEF(V3D_PCTR3),
+	REGDEF(V3D_PCTRS3),
+	REGDEF(V3D_PCTR4),
+	REGDEF(V3D_PCTRS4),
+	REGDEF(V3D_PCTR5),
+	REGDEF(V3D_PCTRS5),
+	REGDEF(V3D_PCTR6),
+	REGDEF(V3D_PCTRS6),
+	REGDEF(V3D_PCTR7),
+	REGDEF(V3D_PCTRS7),
+	REGDEF(V3D_PCTR8),
+	REGDEF(V3D_PCTRS8),
+	REGDEF(V3D_PCTR9),
+	REGDEF(V3D_PCTRS9),
+	REGDEF(V3D_PCTR10),
+	REGDEF(V3D_PCTRS10),
+	REGDEF(V3D_PCTR11),
+	REGDEF(V3D_PCTRS11),
+	REGDEF(V3D_PCTR12),
+	REGDEF(V3D_PCTRS12),
+	REGDEF(V3D_PCTR13),
+	REGDEF(V3D_PCTRS13),
+	REGDEF(V3D_PCTR14),
+	REGDEF(V3D_PCTRS14),
+	REGDEF(V3D_PCTR15),
+	REGDEF(V3D_PCTRS15),
+	REGDEF(V3D_DBGE),
+	REGDEF(V3D_FDBGO),
+	REGDEF(V3D_FDBGB),
+	REGDEF(V3D_FDBGR),
+	REGDEF(V3D_FDBGS),
+	REGDEF(V3D_ERRSTAT),
+};
+
+int vc4_v3d_debugfs_regs(struct seq_file *m, void *unused)
+{
+	struct drm_info_node *node = (struct drm_info_node *)m->private;
+	struct drm_device *dev = node->minor->dev;
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(vc4_reg_defs); i++) {
+		seq_printf(m, "%s (0x%04x): 0x%08x\n",
+			   vc4_reg_defs[i].name, vc4_reg_defs[i].reg,
+			   V3D_READ(vc4_reg_defs[i].reg));
+	}
+
+	return 0;
+}
+
+int vc4_v3d_debugfs_ident(struct seq_file *m, void *unused)
+{
+	struct drm_info_node *node = (struct drm_info_node *)m->private;
+	struct drm_device *dev = node->minor->dev;
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
+	uint32_t ident1 = V3D_READ(V3D_IDENT1);
+	uint32_t nslc = VC4_GET_FIELD(ident1, V3D_IDENT1_NSLC);
+	uint32_t tups = VC4_GET_FIELD(ident1, V3D_IDENT1_TUPS);
+	uint32_t qups = VC4_GET_FIELD(ident1, V3D_IDENT1_QUPS);
+
+	seq_printf(m, "Revision:   %d\n",
+		   VC4_GET_FIELD(ident1, V3D_IDENT1_REV));
+	seq_printf(m, "Slices:     %d\n", nslc);
+	seq_printf(m, "TMUs:       %d\n", nslc * tups);
+	seq_printf(m, "QPUs:       %d\n", nslc * qups);
+	seq_printf(m, "Semaphores: %d\n",
+		   VC4_GET_FIELD(ident1, V3D_IDENT1_NSEM));
+
+	return 0;
+}
+#endif /* CONFIG_DEBUG_FS */
+
+/*
+ * Asks the firmware to turn on power to the V3D engine.
+ *
+ * This may be doable with just the clocks interface, though this
+ * packet does some other register setup from the firmware, too.
+ */
+int
+vc4_v3d_set_power(struct vc4_dev *vc4, bool on)
+{
+	if (on)
+		return pm_generic_poweroff(&vc4->v3d->pdev->dev);
+	else
+		return pm_generic_resume(&vc4->v3d->pdev->dev);
+}
+
+static void vc4_v3d_init_hw(struct drm_device *dev)
+{
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
+
+	/* Take all the memory that would have been reserved for user
+	 * QPU programs, since we don't have an interface for running
+	 * them, anyway.
+	 */
+	V3D_WRITE(V3D_VPMBASE, 0);
+}
+
+static int vc4_v3d_bind(struct device *dev, struct device *master, void *data)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct drm_device *drm = dev_get_drvdata(master);
+	struct vc4_dev *vc4 = to_vc4_dev(drm);
+	struct vc4_v3d *v3d = NULL;
+	int ret;
+
+	v3d = devm_kzalloc(&pdev->dev, sizeof(*v3d), GFP_KERNEL);
+	if (!v3d)
+		return -ENOMEM;
+
+	v3d->pdev = pdev;
+
+	v3d->regs = vc4_ioremap_regs(pdev, 0);
+	if (IS_ERR(v3d->regs))
+		return PTR_ERR(v3d->regs);
+
+	vc4->v3d = v3d;
+
+	if (V3D_READ(V3D_IDENT0) != V3D_EXPECTED_IDENT0) {
+		DRM_ERROR("V3D_IDENT0 read 0x%08x instead of 0x%08x\n",
+			  V3D_READ(V3D_IDENT0), V3D_EXPECTED_IDENT0);
+		return -EINVAL;
+	}
+
+	/* Reset the binner overflow address/size at setup, to be sure
+	 * we don't reuse an old one.
+	 */
+	V3D_WRITE(V3D_BPOA, 0);
+	V3D_WRITE(V3D_BPOS, 0);
+
+	vc4_v3d_init_hw(drm);
+
+	ret = drm_irq_install(drm, platform_get_irq(pdev, 0));
+	if (ret) {
+		DRM_ERROR("Failed to install IRQ handler\n");
+		return ret;
+	}
+
+	return 0;
+}
+
+static void vc4_v3d_unbind(struct device *dev, struct device *master,
+			   void *data)
+{
+	struct drm_device *drm = dev_get_drvdata(master);
+	struct vc4_dev *vc4 = to_vc4_dev(drm);
+
+	drm_irq_uninstall(drm);
+
+	/* Disable the binner's overflow memory address, so the next
+	 * driver probe (if any) doesn't try to reuse our old
+	 * allocation.
+	 */
+	V3D_WRITE(V3D_BPOA, 0);
+	V3D_WRITE(V3D_BPOS, 0);
+
+	vc4->v3d = NULL;
+}
+
+static const struct component_ops vc4_v3d_ops = {
+	.bind   = vc4_v3d_bind,
+	.unbind = vc4_v3d_unbind,
+};
+
+static int vc4_v3d_dev_probe(struct platform_device *pdev)
+{
+	return component_add(&pdev->dev, &vc4_v3d_ops);
+}
+
+static int vc4_v3d_dev_remove(struct platform_device *pdev)
+{
+	component_del(&pdev->dev, &vc4_v3d_ops);
+	return 0;
+}
+
+static const struct of_device_id vc4_v3d_dt_match[] = {
+	{ .compatible = "brcm,vc4-v3d" },
+	{}
+};
+
+struct platform_driver vc4_v3d_driver = {
+	.probe = vc4_v3d_dev_probe,
+	.remove = vc4_v3d_dev_remove,
+	.driver = {
+		.name = "vc4_v3d",
+		.of_match_table = vc4_v3d_dt_match,
+	},
+};
diff --git a/drivers/gpu/drm/vc4/vc4_validate.c b/drivers/gpu/drm/vc4/vc4_validate.c
new file mode 100644
index 000000000000..0fb5b994b9dd
--- /dev/null
+++ b/drivers/gpu/drm/vc4/vc4_validate.c
@@ -0,0 +1,900 @@
+/*
+ * Copyright © 2014 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/**
+ * Command list validator for VC4.
+ *
+ * The VC4 has no IOMMU between it and system memory.  So, a user with
+ * access to execute command lists could escalate privilege by
+ * overwriting system memory (drawing to it as a framebuffer) or
+ * reading system memory it shouldn't (reading it as a texture, or
+ * uniform data, or vertex data).
+ *
+ * This validates command lists to ensure that all accesses are within
+ * the bounds of the GEM objects referenced.  It explicitly whitelists
+ * packets, and looks at the offsets in any address fields to make
+ * sure they're constrained within the BOs they reference.
+ *
+ * Note that because of the validation that's happening anyway, this
+ * is where GEM relocation processing happens.
+ */
+
+#include "uapi/drm/vc4_drm.h"
+#include "vc4_drv.h"
+#include "vc4_packet.h"
+
+#define VALIDATE_ARGS \
+	struct vc4_exec_info *exec,			\
+	void *validated,				\
+	void *untrusted
+
+/** Return the width in pixels of a 64-byte microtile. */
+static uint32_t
+utile_width(int cpp)
+{
+	switch (cpp) {
+	case 1:
+	case 2:
+		return 8;
+	case 4:
+		return 4;
+	case 8:
+		return 2;
+	default:
+		DRM_ERROR("unknown cpp: %d\n", cpp);
+		return 1;
+	}
+}
+
+/** Return the height in pixels of a 64-byte microtile. */
+static uint32_t
+utile_height(int cpp)
+{
+	switch (cpp) {
+	case 1:
+		return 8;
+	case 2:
+	case 4:
+	case 8:
+		return 4;
+	default:
+		DRM_ERROR("unknown cpp: %d\n", cpp);
+		return 1;
+	}
+}
+
+/**
+ * The texture unit decides what tiling format a particular miplevel is using
+ * this function, so we lay out our miptrees accordingly.
+ */
+static bool
+size_is_lt(uint32_t width, uint32_t height, int cpp)
+{
+	return (width <= 4 * utile_width(cpp) ||
+		height <= 4 * utile_height(cpp));
+}
+
+struct drm_gem_cma_object *
+vc4_use_bo(struct vc4_exec_info *exec, uint32_t hindex)
+{
+	struct drm_gem_cma_object *obj;
+	struct vc4_bo *bo;
+
+	if (hindex >= exec->bo_count) {
+		DRM_ERROR("BO index %d greater than BO count %d\n",
+			  hindex, exec->bo_count);
+		return NULL;
+	}
+	obj = exec->bo[hindex];
+	bo = to_vc4_bo(&obj->base);
+
+	if (bo->validated_shader) {
+		DRM_ERROR("Trying to use shader BO as something other than "
+			  "a shader\n");
+		return NULL;
+	}
+
+	return obj;
+}
+
+static struct drm_gem_cma_object *
+vc4_use_handle(struct vc4_exec_info *exec, uint32_t gem_handles_packet_index)
+{
+	return vc4_use_bo(exec, exec->bo_index[gem_handles_packet_index]);
+}
+
+static bool
+validate_bin_pos(struct vc4_exec_info *exec, void *untrusted, uint32_t pos)
+{
+	/* Note that the untrusted pointer passed to these functions is
+	 * incremented past the packet byte.
+	 */
+	return (untrusted - 1 == exec->bin_u + pos);
+}
+
+static uint32_t
+gl_shader_rec_size(uint32_t pointer_bits)
+{
+	uint32_t attribute_count = pointer_bits & 7;
+	bool extended = pointer_bits & 8;
+
+	if (attribute_count == 0)
+		attribute_count = 8;
+
+	if (extended)
+		return 100 + attribute_count * 4;
+	else
+		return 36 + attribute_count * 8;
+}
+
+bool
+vc4_check_tex_size(struct vc4_exec_info *exec, struct drm_gem_cma_object *fbo,
+		   uint32_t offset, uint8_t tiling_format,
+		   uint32_t width, uint32_t height, uint8_t cpp)
+{
+	uint32_t aligned_width, aligned_height, stride, size;
+	uint32_t utile_w = utile_width(cpp);
+	uint32_t utile_h = utile_height(cpp);
+
+	/* The shaded vertex format stores signed 12.4 fixed point
+	 * (-2048,2047) offsets from the viewport center, so we should
+	 * never have a render target larger than 4096.  The texture
+	 * unit can only sample from 2048x2048, so it's even more
+	 * restricted.  This lets us avoid worrying about overflow in
+	 * our math.
+	 */
+	if (width > 4096 || height > 4096) {
+		DRM_ERROR("Surface dimesions (%d,%d) too large", width, height);
+		return false;
+	}
+
+	switch (tiling_format) {
+	case VC4_TILING_FORMAT_LINEAR:
+		aligned_width = round_up(width, utile_w);
+		aligned_height = height;
+		break;
+	case VC4_TILING_FORMAT_T:
+		aligned_width = round_up(width, utile_w * 8);
+		aligned_height = round_up(height, utile_h * 8);
+		break;
+	case VC4_TILING_FORMAT_LT:
+		aligned_width = round_up(width, utile_w);
+		aligned_height = round_up(height, utile_h);
+		break;
+	default:
+		DRM_ERROR("buffer tiling %d unsupported\n", tiling_format);
+		return false;
+	}
+
+	stride = aligned_width * cpp;
+	size = stride * aligned_height;
+
+	if (size + offset < size ||
+	    size + offset > fbo->base.size) {
+		DRM_ERROR("Overflow in %dx%d (%dx%d) fbo size (%d + %d > %zd)\n",
+			  width, height,
+			  aligned_width, aligned_height,
+			  size, offset, fbo->base.size);
+		return false;
+	}
+
+	return true;
+}
+
+static int
+validate_flush(VALIDATE_ARGS)
+{
+	if (!validate_bin_pos(exec, untrusted, exec->args->bin_cl_size - 1)) {
+		DRM_ERROR("Bin CL must end with VC4_PACKET_FLUSH\n");
+		return -EINVAL;
+	}
+	exec->found_flush = true;
+
+	return 0;
+}
+
+static int
+validate_start_tile_binning(VALIDATE_ARGS)
+{
+	if (exec->found_start_tile_binning_packet) {
+		DRM_ERROR("Duplicate VC4_PACKET_START_TILE_BINNING\n");
+		return -EINVAL;
+	}
+	exec->found_start_tile_binning_packet = true;
+
+	if (!exec->found_tile_binning_mode_config_packet) {
+		DRM_ERROR("missing VC4_PACKET_TILE_BINNING_MODE_CONFIG\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int
+validate_increment_semaphore(VALIDATE_ARGS)
+{
+	if (!validate_bin_pos(exec, untrusted, exec->args->bin_cl_size - 2)) {
+		DRM_ERROR("Bin CL must end with "
+			  "VC4_PACKET_INCREMENT_SEMAPHORE\n");
+		return -EINVAL;
+	}
+	exec->found_increment_semaphore_packet = true;
+
+	return 0;
+}
+
+static int
+validate_indexed_prim_list(VALIDATE_ARGS)
+{
+	struct drm_gem_cma_object *ib;
+	uint32_t length = *(uint32_t *)(untrusted + 1);
+	uint32_t offset = *(uint32_t *)(untrusted + 5);
+	uint32_t max_index = *(uint32_t *)(untrusted + 9);
+	uint32_t index_size = (*(uint8_t *)(untrusted + 0) >> 4) ? 2 : 1;
+	struct vc4_shader_state *shader_state;
+
+	/* Check overflow condition */
+	if (exec->shader_state_count == 0) {
+		DRM_ERROR("shader state must precede primitives\n");
+		return -EINVAL;
+	}
+	shader_state = &exec->shader_state[exec->shader_state_count - 1];
+
+	if (max_index > shader_state->max_index)
+		shader_state->max_index = max_index;
+
+	ib = vc4_use_handle(exec, 0);
+	if (!ib)
+		return -EINVAL;
+
+	if (offset > ib->base.size ||
+	    (ib->base.size - offset) / index_size < length) {
+		DRM_ERROR("IB access overflow (%d + %d*%d > %zd)\n",
+			  offset, length, index_size, ib->base.size);
+		return -EINVAL;
+	}
+
+	*(uint32_t *)(validated + 5) = ib->paddr + offset;
+
+	return 0;
+}
+
+static int
+validate_gl_array_primitive(VALIDATE_ARGS)
+{
+	uint32_t length = *(uint32_t *)(untrusted + 1);
+	uint32_t base_index = *(uint32_t *)(untrusted + 5);
+	uint32_t max_index;
+	struct vc4_shader_state *shader_state;
+
+	/* Check overflow condition */
+	if (exec->shader_state_count == 0) {
+		DRM_ERROR("shader state must precede primitives\n");
+		return -EINVAL;
+	}
+	shader_state = &exec->shader_state[exec->shader_state_count - 1];
+
+	if (length + base_index < length) {
+		DRM_ERROR("primitive vertex count overflow\n");
+		return -EINVAL;
+	}
+	max_index = length + base_index - 1;
+
+	if (max_index > shader_state->max_index)
+		shader_state->max_index = max_index;
+
+	return 0;
+}
+
+static int
+validate_gl_shader_state(VALIDATE_ARGS)
+{
+	uint32_t i = exec->shader_state_count++;
+
+	if (i >= exec->shader_state_size) {
+		DRM_ERROR("More requests for shader states than declared\n");
+		return -EINVAL;
+	}
+
+	exec->shader_state[i].addr = *(uint32_t *)untrusted;
+	exec->shader_state[i].max_index = 0;
+
+	if (exec->shader_state[i].addr & ~0xf) {
+		DRM_ERROR("high bits set in GL shader rec reference\n");
+		return -EINVAL;
+	}
+
+	*(uint32_t *)validated = (exec->shader_rec_p +
+				  exec->shader_state[i].addr);
+
+	exec->shader_rec_p +=
+		roundup(gl_shader_rec_size(exec->shader_state[i].addr), 16);
+
+	return 0;
+}
+
+static int
+validate_tile_binning_config(VALIDATE_ARGS)
+{
+	struct drm_device *dev = exec->exec_bo->base.dev;
+	struct vc4_bo *tile_bo;
+	uint8_t flags;
+	uint32_t tile_state_size, tile_alloc_size;
+	uint32_t tile_count;
+
+	if (exec->found_tile_binning_mode_config_packet) {
+		DRM_ERROR("Duplicate VC4_PACKET_TILE_BINNING_MODE_CONFIG\n");
+		return -EINVAL;
+	}
+	exec->found_tile_binning_mode_config_packet = true;
+
+	exec->bin_tiles_x = *(uint8_t *)(untrusted + 12);
+	exec->bin_tiles_y = *(uint8_t *)(untrusted + 13);
+	tile_count = exec->bin_tiles_x * exec->bin_tiles_y;
+	flags = *(uint8_t *)(untrusted + 14);
+
+	if (exec->bin_tiles_x == 0 ||
+	    exec->bin_tiles_y == 0) {
+		DRM_ERROR("Tile binning config of %dx%d too small\n",
+			  exec->bin_tiles_x, exec->bin_tiles_y);
+		return -EINVAL;
+	}
+
+	if (flags & (VC4_BIN_CONFIG_DB_NON_MS |
+		     VC4_BIN_CONFIG_TILE_BUFFER_64BIT)) {
+		DRM_ERROR("unsupported binning config flags 0x%02x\n", flags);
+		return -EINVAL;
+	}
+
+	/* The tile state data array is 48 bytes per tile, and we put it at
+	 * the start of a BO containing both it and the tile alloc.
+	 */
+	tile_state_size = 48 * tile_count;
+
+	/* Since the tile alloc array will follow us, align. */
+	exec->tile_alloc_offset = roundup(tile_state_size, 4096);
+
+	*(uint8_t *)(validated + 14) =
+		((flags & ~(VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_MASK |
+			    VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_MASK)) |
+		 VC4_BIN_CONFIG_AUTO_INIT_TSDA |
+		 VC4_SET_FIELD(VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_32,
+			       VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE) |
+		 VC4_SET_FIELD(VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_128,
+			       VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE));
+
+	/* Initial block size. */
+	tile_alloc_size = 32 * tile_count;
+
+	/*
+	 * The initial allocation gets rounded to the next 256 bytes before
+	 * the hardware starts fulfilling further allocations.
+	 */
+	tile_alloc_size = roundup(tile_alloc_size, 256);
+
+	/* Add space for the extra allocations.  This is what gets used first,
+	 * before overflow memory.  It must have at least 4096 bytes, but we
+	 * want to avoid overflow memory usage if possible.
+	 */
+	tile_alloc_size += 1024 * 1024;
+
+	tile_bo = vc4_bo_create(dev, exec->tile_alloc_offset + tile_alloc_size,
+				true);
+	exec->tile_bo = &tile_bo->base;
+	if (!exec->tile_bo)
+		return -ENOMEM;
+	list_add_tail(&tile_bo->unref_head, &exec->unref_list);
+
+	/* tile alloc address. */
+	*(uint32_t *)(validated + 0) = (exec->tile_bo->paddr +
+					exec->tile_alloc_offset);
+	/* tile alloc size. */
+	*(uint32_t *)(validated + 4) = tile_alloc_size;
+	/* tile state address. */
+	*(uint32_t *)(validated + 8) = exec->tile_bo->paddr;
+
+	return 0;
+}
+
+static int
+validate_gem_handles(VALIDATE_ARGS)
+{
+	memcpy(exec->bo_index, untrusted, sizeof(exec->bo_index));
+	return 0;
+}
+
+#define VC4_DEFINE_PACKET(packet, func) \
+	[packet] = { packet ## _SIZE, #packet, func }
+
+static const struct cmd_info {
+	uint16_t len;
+	const char *name;
+	int (*func)(struct vc4_exec_info *exec, void *validated,
+		    void *untrusted);
+} cmd_info[] = {
+	VC4_DEFINE_PACKET(VC4_PACKET_HALT, NULL),
+	VC4_DEFINE_PACKET(VC4_PACKET_NOP, NULL),
+	VC4_DEFINE_PACKET(VC4_PACKET_FLUSH, validate_flush),
+	VC4_DEFINE_PACKET(VC4_PACKET_FLUSH_ALL, NULL),
+	VC4_DEFINE_PACKET(VC4_PACKET_START_TILE_BINNING,
+			  validate_start_tile_binning),
+	VC4_DEFINE_PACKET(VC4_PACKET_INCREMENT_SEMAPHORE,
+			  validate_increment_semaphore),
+
+	VC4_DEFINE_PACKET(VC4_PACKET_GL_INDEXED_PRIMITIVE,
+			  validate_indexed_prim_list),
+	VC4_DEFINE_PACKET(VC4_PACKET_GL_ARRAY_PRIMITIVE,
+			  validate_gl_array_primitive),
+
+	VC4_DEFINE_PACKET(VC4_PACKET_PRIMITIVE_LIST_FORMAT, NULL),
+
+	VC4_DEFINE_PACKET(VC4_PACKET_GL_SHADER_STATE, validate_gl_shader_state),
+
+	VC4_DEFINE_PACKET(VC4_PACKET_CONFIGURATION_BITS, NULL),
+	VC4_DEFINE_PACKET(VC4_PACKET_FLAT_SHADE_FLAGS, NULL),
+	VC4_DEFINE_PACKET(VC4_PACKET_POINT_SIZE, NULL),
+	VC4_DEFINE_PACKET(VC4_PACKET_LINE_WIDTH, NULL),
+	VC4_DEFINE_PACKET(VC4_PACKET_RHT_X_BOUNDARY, NULL),
+	VC4_DEFINE_PACKET(VC4_PACKET_DEPTH_OFFSET, NULL),
+	VC4_DEFINE_PACKET(VC4_PACKET_CLIP_WINDOW, NULL),
+	VC4_DEFINE_PACKET(VC4_PACKET_VIEWPORT_OFFSET, NULL),
+	VC4_DEFINE_PACKET(VC4_PACKET_CLIPPER_XY_SCALING, NULL),
+	/* Note: The docs say this was also 105, but it was 106 in the
+	 * initial userland code drop.
+	 */
+	VC4_DEFINE_PACKET(VC4_PACKET_CLIPPER_Z_SCALING, NULL),
+
+	VC4_DEFINE_PACKET(VC4_PACKET_TILE_BINNING_MODE_CONFIG,
+			  validate_tile_binning_config),
+
+	VC4_DEFINE_PACKET(VC4_PACKET_GEM_HANDLES, validate_gem_handles),
+};
+
+int
+vc4_validate_bin_cl(struct drm_device *dev,
+		    void *validated,
+		    void *unvalidated,
+		    struct vc4_exec_info *exec)
+{
+	uint32_t len = exec->args->bin_cl_size;
+	uint32_t dst_offset = 0;
+	uint32_t src_offset = 0;
+
+	while (src_offset < len) {
+		void *dst_pkt = validated + dst_offset;
+		void *src_pkt = unvalidated + src_offset;
+		u8 cmd = *(uint8_t *)src_pkt;
+		const struct cmd_info *info;
+
+		if (cmd >= ARRAY_SIZE(cmd_info)) {
+			DRM_ERROR("0x%08x: packet %d out of bounds\n",
+				  src_offset, cmd);
+			return -EINVAL;
+		}
+
+		info = &cmd_info[cmd];
+		if (!info->name) {
+			DRM_ERROR("0x%08x: packet %d invalid\n",
+				  src_offset, cmd);
+			return -EINVAL;
+		}
+
+		if (src_offset + info->len > len) {
+			DRM_ERROR("0x%08x: packet %d (%s) length 0x%08x "
+				  "exceeds bounds (0x%08x)\n",
+				  src_offset, cmd, info->name, info->len,
+				  src_offset + len);
+			return -EINVAL;
+		}
+
+		if (cmd != VC4_PACKET_GEM_HANDLES)
+			memcpy(dst_pkt, src_pkt, info->len);
+
+		if (info->func && info->func(exec,
+					     dst_pkt + 1,
+					     src_pkt + 1)) {
+			DRM_ERROR("0x%08x: packet %d (%s) failed to validate\n",
+				  src_offset, cmd, info->name);
+			return -EINVAL;
+		}
+
+		src_offset += info->len;
+		/* GEM handle loading doesn't produce HW packets. */
+		if (cmd != VC4_PACKET_GEM_HANDLES)
+			dst_offset += info->len;
+
+		/* When the CL hits halt, it'll stop reading anything else. */
+		if (cmd == VC4_PACKET_HALT)
+			break;
+	}
+
+	exec->ct0ea = exec->ct0ca + dst_offset;
+
+	if (!exec->found_start_tile_binning_packet) {
+		DRM_ERROR("Bin CL missing VC4_PACKET_START_TILE_BINNING\n");
+		return -EINVAL;
+	}
+
+	/* The bin CL must be ended with INCREMENT_SEMAPHORE and FLUSH.  The
+	 * semaphore is used to trigger the render CL to start up, and the
+	 * FLUSH is what caps the bin lists with
+	 * VC4_PACKET_RETURN_FROM_SUB_LIST (so they jump back to the main
+	 * render CL when they get called to) and actually triggers the queued
+	 * semaphore increment.
+	 */
+	if (!exec->found_increment_semaphore_packet || !exec->found_flush) {
+		DRM_ERROR("Bin CL missing VC4_PACKET_INCREMENT_SEMAPHORE + "
+			  "VC4_PACKET_FLUSH\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static bool
+reloc_tex(struct vc4_exec_info *exec,
+	  void *uniform_data_u,
+	  struct vc4_texture_sample_info *sample,
+	  uint32_t texture_handle_index)
+
+{
+	struct drm_gem_cma_object *tex;
+	uint32_t p0 = *(uint32_t *)(uniform_data_u + sample->p_offset[0]);
+	uint32_t p1 = *(uint32_t *)(uniform_data_u + sample->p_offset[1]);
+	uint32_t p2 = (sample->p_offset[2] != ~0 ?
+		       *(uint32_t *)(uniform_data_u + sample->p_offset[2]) : 0);
+	uint32_t p3 = (sample->p_offset[3] != ~0 ?
+		       *(uint32_t *)(uniform_data_u + sample->p_offset[3]) : 0);
+	uint32_t *validated_p0 = exec->uniforms_v + sample->p_offset[0];
+	uint32_t offset = p0 & VC4_TEX_P0_OFFSET_MASK;
+	uint32_t miplevels = VC4_GET_FIELD(p0, VC4_TEX_P0_MIPLVLS);
+	uint32_t width = VC4_GET_FIELD(p1, VC4_TEX_P1_WIDTH);
+	uint32_t height = VC4_GET_FIELD(p1, VC4_TEX_P1_HEIGHT);
+	uint32_t cpp, tiling_format, utile_w, utile_h;
+	uint32_t i;
+	uint32_t cube_map_stride = 0;
+	enum vc4_texture_data_type type;
+
+	tex = vc4_use_bo(exec, texture_handle_index);
+	if (!tex)
+		return false;
+
+	if (sample->is_direct) {
+		uint32_t remaining_size = tex->base.size - p0;
+
+		if (p0 > tex->base.size - 4) {
+			DRM_ERROR("UBO offset greater than UBO size\n");
+			goto fail;
+		}
+		if (p1 > remaining_size - 4) {
+			DRM_ERROR("UBO clamp would allow reads "
+				  "outside of UBO\n");
+			goto fail;
+		}
+		*validated_p0 = tex->paddr + p0;
+		return true;
+	}
+
+	if (width == 0)
+		width = 2048;
+	if (height == 0)
+		height = 2048;
+
+	if (p0 & VC4_TEX_P0_CMMODE_MASK) {
+		if (VC4_GET_FIELD(p2, VC4_TEX_P2_PTYPE) ==
+		    VC4_TEX_P2_PTYPE_CUBE_MAP_STRIDE)
+			cube_map_stride = p2 & VC4_TEX_P2_CMST_MASK;
+		if (VC4_GET_FIELD(p3, VC4_TEX_P2_PTYPE) ==
+		    VC4_TEX_P2_PTYPE_CUBE_MAP_STRIDE) {
+			if (cube_map_stride) {
+				DRM_ERROR("Cube map stride set twice\n");
+				goto fail;
+			}
+
+			cube_map_stride = p3 & VC4_TEX_P2_CMST_MASK;
+		}
+		if (!cube_map_stride) {
+			DRM_ERROR("Cube map stride not set\n");
+			goto fail;
+		}
+	}
+
+	type = (VC4_GET_FIELD(p0, VC4_TEX_P0_TYPE) |
+		(VC4_GET_FIELD(p1, VC4_TEX_P1_TYPE4) << 4));
+
+	switch (type) {
+	case VC4_TEXTURE_TYPE_RGBA8888:
+	case VC4_TEXTURE_TYPE_RGBX8888:
+	case VC4_TEXTURE_TYPE_RGBA32R:
+		cpp = 4;
+		break;
+	case VC4_TEXTURE_TYPE_RGBA4444:
+	case VC4_TEXTURE_TYPE_RGBA5551:
+	case VC4_TEXTURE_TYPE_RGB565:
+	case VC4_TEXTURE_TYPE_LUMALPHA:
+	case VC4_TEXTURE_TYPE_S16F:
+	case VC4_TEXTURE_TYPE_S16:
+		cpp = 2;
+		break;
+	case VC4_TEXTURE_TYPE_LUMINANCE:
+	case VC4_TEXTURE_TYPE_ALPHA:
+	case VC4_TEXTURE_TYPE_S8:
+		cpp = 1;
+		break;
+	case VC4_TEXTURE_TYPE_ETC1:
+	case VC4_TEXTURE_TYPE_BW1:
+	case VC4_TEXTURE_TYPE_A4:
+	case VC4_TEXTURE_TYPE_A1:
+	case VC4_TEXTURE_TYPE_RGBA64:
+	case VC4_TEXTURE_TYPE_YUV422R:
+	default:
+		DRM_ERROR("Texture format %d unsupported\n", type);
+		goto fail;
+	}
+	utile_w = utile_width(cpp);
+	utile_h = utile_height(cpp);
+
+	if (type == VC4_TEXTURE_TYPE_RGBA32R) {
+		tiling_format = VC4_TILING_FORMAT_LINEAR;
+	} else {
+		if (size_is_lt(width, height, cpp))
+			tiling_format = VC4_TILING_FORMAT_LT;
+		else
+			tiling_format = VC4_TILING_FORMAT_T;
+	}
+
+	if (!vc4_check_tex_size(exec, tex, offset + cube_map_stride * 5,
+				tiling_format, width, height, cpp)) {
+		goto fail;
+	}
+
+	/* The mipmap levels are stored before the base of the texture.  Make
+	 * sure there is actually space in the BO.
+	 */
+	for (i = 1; i <= miplevels; i++) {
+		uint32_t level_width = max(width >> i, 1u);
+		uint32_t level_height = max(height >> i, 1u);
+		uint32_t aligned_width, aligned_height;
+		uint32_t level_size;
+
+		/* Once the levels get small enough, they drop from T to LT. */
+		if (tiling_format == VC4_TILING_FORMAT_T &&
+		    size_is_lt(level_width, level_height, cpp)) {
+			tiling_format = VC4_TILING_FORMAT_LT;
+		}
+
+		switch (tiling_format) {
+		case VC4_TILING_FORMAT_T:
+			aligned_width = round_up(level_width, utile_w * 8);
+			aligned_height = round_up(level_height, utile_h * 8);
+			break;
+		case VC4_TILING_FORMAT_LT:
+			aligned_width = round_up(level_width, utile_w);
+			aligned_height = round_up(level_height, utile_h);
+			break;
+		default:
+			aligned_width = round_up(level_width, utile_w);
+			aligned_height = level_height;
+			break;
+		}
+
+		level_size = aligned_width * cpp * aligned_height;
+
+		if (offset < level_size) {
+			DRM_ERROR("Level %d (%dx%d -> %dx%d) size %db "
+				  "overflowed buffer bounds (offset %d)\n",
+				  i, level_width, level_height,
+				  aligned_width, aligned_height,
+				  level_size, offset);
+			goto fail;
+		}
+
+		offset -= level_size;
+	}
+
+	*validated_p0 = tex->paddr + p0;
+
+	return true;
+ fail:
+	DRM_INFO("Texture p0 at %d: 0x%08x\n", sample->p_offset[0], p0);
+	DRM_INFO("Texture p1 at %d: 0x%08x\n", sample->p_offset[1], p1);
+	DRM_INFO("Texture p2 at %d: 0x%08x\n", sample->p_offset[2], p2);
+	DRM_INFO("Texture p3 at %d: 0x%08x\n", sample->p_offset[3], p3);
+	return false;
+}
+
+static int
+validate_gl_shader_rec(struct drm_device *dev,
+		       struct vc4_exec_info *exec,
+		       struct vc4_shader_state *state)
+{
+	uint32_t *src_handles;
+	void *pkt_u, *pkt_v;
+	static const uint32_t shader_reloc_offsets[] = {
+		4, /* fs */
+		16, /* vs */
+		28, /* cs */
+	};
+	uint32_t shader_reloc_count = ARRAY_SIZE(shader_reloc_offsets);
+	struct drm_gem_cma_object *bo[shader_reloc_count + 8];
+	uint32_t nr_attributes, nr_relocs, packet_size;
+	int i;
+
+	nr_attributes = state->addr & 0x7;
+	if (nr_attributes == 0)
+		nr_attributes = 8;
+	packet_size = gl_shader_rec_size(state->addr);
+
+	nr_relocs = ARRAY_SIZE(shader_reloc_offsets) + nr_attributes;
+	if (nr_relocs * 4 > exec->shader_rec_size) {
+		DRM_ERROR("overflowed shader recs reading %d handles "
+			  "from %d bytes left\n",
+			  nr_relocs, exec->shader_rec_size);
+		return -EINVAL;
+	}
+	src_handles = exec->shader_rec_u;
+	exec->shader_rec_u += nr_relocs * 4;
+	exec->shader_rec_size -= nr_relocs * 4;
+
+	if (packet_size > exec->shader_rec_size) {
+		DRM_ERROR("overflowed shader recs copying %db packet "
+			  "from %d bytes left\n",
+			  packet_size, exec->shader_rec_size);
+		return -EINVAL;
+	}
+	pkt_u = exec->shader_rec_u;
+	pkt_v = exec->shader_rec_v;
+	memcpy(pkt_v, pkt_u, packet_size);
+	exec->shader_rec_u += packet_size;
+	/* Shader recs have to be aligned to 16 bytes (due to the attribute
+	 * flags being in the low bytes), so round the next validated shader
+	 * rec address up.  This should be safe, since we've got so many
+	 * relocations in a shader rec packet.
+	 */
+	BUG_ON(roundup(packet_size, 16) - packet_size > nr_relocs * 4);
+	exec->shader_rec_v += roundup(packet_size, 16);
+	exec->shader_rec_size -= packet_size;
+
+	if (!(*(uint16_t *)pkt_u & VC4_SHADER_FLAG_FS_SINGLE_THREAD)) {
+		DRM_ERROR("Multi-threaded fragment shaders not supported.\n");
+		return -EINVAL;
+	}
+
+	for (i = 0; i < shader_reloc_count; i++) {
+		if (src_handles[i] > exec->bo_count) {
+			DRM_ERROR("Shader handle %d too big\n", src_handles[i]);
+			return -EINVAL;
+		}
+
+		bo[i] = exec->bo[src_handles[i]];
+		if (!bo[i])
+			return -EINVAL;
+	}
+	for (i = shader_reloc_count; i < nr_relocs; i++) {
+		bo[i] = vc4_use_bo(exec, src_handles[i]);
+		if (!bo[i])
+			return -EINVAL;
+	}
+
+	for (i = 0; i < shader_reloc_count; i++) {
+		struct vc4_validated_shader_info *validated_shader;
+		uint32_t o = shader_reloc_offsets[i];
+		uint32_t src_offset = *(uint32_t *)(pkt_u + o);
+		uint32_t *texture_handles_u;
+		void *uniform_data_u;
+		uint32_t tex;
+
+		*(uint32_t *)(pkt_v + o) = bo[i]->paddr + src_offset;
+
+		if (src_offset != 0) {
+			DRM_ERROR("Shaders must be at offset 0 of "
+				  "the BO.\n");
+			return -EINVAL;
+		}
+
+		validated_shader = to_vc4_bo(&bo[i]->base)->validated_shader;
+		if (!validated_shader)
+			return -EINVAL;
+
+		if (validated_shader->uniforms_src_size >
+		    exec->uniforms_size) {
+			DRM_ERROR("Uniforms src buffer overflow\n");
+			return -EINVAL;
+		}
+
+		texture_handles_u = exec->uniforms_u;
+		uniform_data_u = (texture_handles_u +
+				  validated_shader->num_texture_samples);
+
+		memcpy(exec->uniforms_v, uniform_data_u,
+		       validated_shader->uniforms_size);
+
+		for (tex = 0;
+		     tex < validated_shader->num_texture_samples;
+		     tex++) {
+			if (!reloc_tex(exec,
+				       uniform_data_u,
+				       &validated_shader->texture_samples[tex],
+				       texture_handles_u[tex])) {
+				return -EINVAL;
+			}
+		}
+
+		*(uint32_t *)(pkt_v + o + 4) = exec->uniforms_p;
+
+		exec->uniforms_u += validated_shader->uniforms_src_size;
+		exec->uniforms_v += validated_shader->uniforms_size;
+		exec->uniforms_p += validated_shader->uniforms_size;
+	}
+
+	for (i = 0; i < nr_attributes; i++) {
+		struct drm_gem_cma_object *vbo =
+			bo[ARRAY_SIZE(shader_reloc_offsets) + i];
+		uint32_t o = 36 + i * 8;
+		uint32_t offset = *(uint32_t *)(pkt_u + o + 0);
+		uint32_t attr_size = *(uint8_t *)(pkt_u + o + 4) + 1;
+		uint32_t stride = *(uint8_t *)(pkt_u + o + 5);
+		uint32_t max_index;
+
+		if (state->addr & 0x8)
+			stride |= (*(uint32_t *)(pkt_u + 100 + i * 4)) & ~0xff;
+
+		if (vbo->base.size < offset ||
+		    vbo->base.size - offset < attr_size) {
+			DRM_ERROR("BO offset overflow (%d + %d > %d)\n",
+				  offset, attr_size, vbo->base.size);
+			return -EINVAL;
+		}
+
+		if (stride != 0) {
+			max_index = ((vbo->base.size - offset - attr_size) /
+				     stride);
+			if (state->max_index > max_index) {
+				DRM_ERROR("primitives use index %d out of "
+					  "supplied %d\n",
+					  state->max_index, max_index);
+				return -EINVAL;
+			}
+		}
+
+		*(uint32_t *)(pkt_v + o) = vbo->paddr + offset;
+	}
+
+	return 0;
+}
+
+int
+vc4_validate_shader_recs(struct drm_device *dev,
+			 struct vc4_exec_info *exec)
+{
+	uint32_t i;
+	int ret = 0;
+
+	for (i = 0; i < exec->shader_state_count; i++) {
+		ret = validate_gl_shader_rec(dev, exec, &exec->shader_state[i]);
+		if (ret)
+			return ret;
+	}
+
+	return ret;
+}
diff --git a/drivers/gpu/drm/vc4/vc4_validate_shaders.c b/drivers/gpu/drm/vc4/vc4_validate_shaders.c
new file mode 100644
index 000000000000..f67124b4c534
--- /dev/null
+++ b/drivers/gpu/drm/vc4/vc4_validate_shaders.c
@@ -0,0 +1,513 @@
+/*
+ * Copyright © 2014 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/**
+ * DOC: Shader validator for VC4.
+ *
+ * The VC4 has no IOMMU between it and system memory, so a user with
+ * access to execute shaders could escalate privilege by overwriting
+ * system memory (using the VPM write address register in the
+ * general-purpose DMA mode) or reading system memory it shouldn't
+ * (reading it as a texture, or uniform data, or vertex data).
+ *
+ * This walks over a shader BO, ensuring that its accesses are
+ * appropriately bounded, and recording how many texture accesses are
+ * made and where so that we can do relocations for them in the
+ * uniform stream.
+ */
+
+#include "vc4_drv.h"
+#include "vc4_qpu_defines.h"
+
+struct vc4_shader_validation_state {
+	struct vc4_texture_sample_info tmu_setup[2];
+	int tmu_write_count[2];
+
+	/* For registers that were last written to by a MIN instruction with
+	 * one argument being a uniform, the address of the uniform.
+	 * Otherwise, ~0.
+	 *
+	 * This is used for the validation of direct address memory reads.
+	 */
+	uint32_t live_min_clamp_offsets[32 + 32 + 4];
+	bool live_max_clamp_regs[32 + 32 + 4];
+};
+
+static uint32_t
+waddr_to_live_reg_index(uint32_t waddr, bool is_b)
+{
+	if (waddr < 32) {
+		if (is_b)
+			return 32 + waddr;
+		else
+			return waddr;
+	} else if (waddr <= QPU_W_ACC3) {
+		return 64 + waddr - QPU_W_ACC0;
+	} else {
+		return ~0;
+	}
+}
+
+static uint32_t
+raddr_add_a_to_live_reg_index(uint64_t inst)
+{
+	uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
+	uint32_t add_a = QPU_GET_FIELD(inst, QPU_ADD_A);
+	uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A);
+	uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B);
+
+	if (add_a == QPU_MUX_A)
+		return raddr_a;
+	else if (add_a == QPU_MUX_B && sig != QPU_SIG_SMALL_IMM)
+		return 32 + raddr_b;
+	else if (add_a <= QPU_MUX_R3)
+		return 64 + add_a;
+	else
+		return ~0;
+}
+
+static bool
+is_tmu_submit(uint32_t waddr)
+{
+	return (waddr == QPU_W_TMU0_S ||
+		waddr == QPU_W_TMU1_S);
+}
+
+static bool
+is_tmu_write(uint32_t waddr)
+{
+	return (waddr >= QPU_W_TMU0_S &&
+		waddr <= QPU_W_TMU1_B);
+}
+
+static bool
+record_texture_sample(struct vc4_validated_shader_info *validated_shader,
+		      struct vc4_shader_validation_state *validation_state,
+		      int tmu)
+{
+	uint32_t s = validated_shader->num_texture_samples;
+	int i;
+	struct vc4_texture_sample_info *temp_samples;
+
+	temp_samples = krealloc(validated_shader->texture_samples,
+				(s + 1) * sizeof(*temp_samples),
+				GFP_KERNEL);
+	if (!temp_samples)
+		return false;
+
+	memcpy(&temp_samples[s],
+	       &validation_state->tmu_setup[tmu],
+	       sizeof(*temp_samples));
+
+	validated_shader->num_texture_samples = s + 1;
+	validated_shader->texture_samples = temp_samples;
+
+	for (i = 0; i < 4; i++)
+		validation_state->tmu_setup[tmu].p_offset[i] = ~0;
+
+	return true;
+}
+
+static bool
+check_tmu_write(uint64_t inst,
+		struct vc4_validated_shader_info *validated_shader,
+		struct vc4_shader_validation_state *validation_state,
+		bool is_mul)
+{
+	uint32_t waddr = (is_mul ?
+			  QPU_GET_FIELD(inst, QPU_WADDR_MUL) :
+			  QPU_GET_FIELD(inst, QPU_WADDR_ADD));
+	uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A);
+	uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B);
+	int tmu = waddr > QPU_W_TMU0_B;
+	bool submit = is_tmu_submit(waddr);
+	bool is_direct = submit && validation_state->tmu_write_count[tmu] == 0;
+	uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
+
+	if (is_direct) {
+		uint32_t add_b = QPU_GET_FIELD(inst, QPU_ADD_B);
+		uint32_t clamp_reg, clamp_offset;
+
+		if (sig == QPU_SIG_SMALL_IMM) {
+			DRM_ERROR("direct TMU read used small immediate\n");
+			return false;
+		}
+
+		/* Make sure that this texture load is an add of the base
+		 * address of the UBO to a clamped offset within the UBO.
+		 */
+		if (is_mul ||
+		    QPU_GET_FIELD(inst, QPU_OP_ADD) != QPU_A_ADD) {
+			DRM_ERROR("direct TMU load wasn't an add\n");
+			return false;
+		}
+
+		/* We assert that the the clamped address is the first
+		 * argument, and the UBO base address is the second argument.
+		 * This is arbitrary, but simpler than supporting flipping the
+		 * two either way.
+		 */
+		clamp_reg = raddr_add_a_to_live_reg_index(inst);
+		if (clamp_reg == ~0) {
+			DRM_ERROR("direct TMU load wasn't clamped\n");
+			return false;
+		}
+
+		clamp_offset = validation_state->live_min_clamp_offsets[clamp_reg];
+		if (clamp_offset == ~0) {
+			DRM_ERROR("direct TMU load wasn't clamped\n");
+			return false;
+		}
+
+		/* Store the clamp value's offset in p1 (see reloc_tex() in
+		 * vc4_validate.c).
+		 */
+		validation_state->tmu_setup[tmu].p_offset[1] =
+			clamp_offset;
+
+		if (!(add_b == QPU_MUX_A && raddr_a == QPU_R_UNIF) &&
+		    !(add_b == QPU_MUX_B && raddr_b == QPU_R_UNIF)) {
+			DRM_ERROR("direct TMU load didn't add to a uniform\n");
+			return false;
+		}
+
+		validation_state->tmu_setup[tmu].is_direct = true;
+	} else {
+		if (raddr_a == QPU_R_UNIF || (sig != QPU_SIG_SMALL_IMM &&
+					      raddr_b == QPU_R_UNIF)) {
+			DRM_ERROR("uniform read in the same instruction as "
+				  "texture setup.\n");
+			return false;
+		}
+	}
+
+	if (validation_state->tmu_write_count[tmu] >= 4) {
+		DRM_ERROR("TMU%d got too many parameters before dispatch\n",
+			  tmu);
+		return false;
+	}
+	validation_state->tmu_setup[tmu].p_offset[validation_state->tmu_write_count[tmu]] =
+		validated_shader->uniforms_size;
+	validation_state->tmu_write_count[tmu]++;
+	/* Since direct uses a RADDR uniform reference, it will get counted in
+	 * check_instruction_reads()
+	 */
+	if (!is_direct)
+		validated_shader->uniforms_size += 4;
+
+	if (submit) {
+		if (!record_texture_sample(validated_shader,
+					   validation_state, tmu)) {
+			return false;
+		}
+
+		validation_state->tmu_write_count[tmu] = 0;
+	}
+
+	return true;
+}
+
+static bool
+check_reg_write(uint64_t inst,
+		struct vc4_validated_shader_info *validated_shader,
+		struct vc4_shader_validation_state *validation_state,
+		bool is_mul)
+{
+	uint32_t waddr = (is_mul ?
+			  QPU_GET_FIELD(inst, QPU_WADDR_MUL) :
+			  QPU_GET_FIELD(inst, QPU_WADDR_ADD));
+
+	switch (waddr) {
+	case QPU_W_UNIFORMS_ADDRESS:
+		/* XXX: We'll probably need to support this for reladdr, but
+		 * it's definitely a security-related one.
+		 */
+		DRM_ERROR("uniforms address load unsupported\n");
+		return false;
+
+	case QPU_W_TLB_COLOR_MS:
+	case QPU_W_TLB_COLOR_ALL:
+	case QPU_W_TLB_Z:
+		/* These only interact with the tile buffer, not main memory,
+		 * so they're safe.
+		 */
+		return true;
+
+	case QPU_W_TMU0_S:
+	case QPU_W_TMU0_T:
+	case QPU_W_TMU0_R:
+	case QPU_W_TMU0_B:
+	case QPU_W_TMU1_S:
+	case QPU_W_TMU1_T:
+	case QPU_W_TMU1_R:
+	case QPU_W_TMU1_B:
+		return check_tmu_write(inst, validated_shader, validation_state,
+				       is_mul);
+
+	case QPU_W_HOST_INT:
+	case QPU_W_TMU_NOSWAP:
+	case QPU_W_TLB_ALPHA_MASK:
+	case QPU_W_MUTEX_RELEASE:
+		/* XXX: I haven't thought about these, so don't support them
+		 * for now.
+		 */
+		DRM_ERROR("Unsupported waddr %d\n", waddr);
+		return false;
+
+	case QPU_W_VPM_ADDR:
+		DRM_ERROR("General VPM DMA unsupported\n");
+		return false;
+
+	case QPU_W_VPM:
+	case QPU_W_VPMVCD_SETUP:
+		/* We allow VPM setup in general, even including VPM DMA
+		 * configuration setup, because the (unsafe) DMA can only be
+		 * triggered by QPU_W_VPM_ADDR writes.
+		 */
+		return true;
+
+	case QPU_W_TLB_STENCIL_SETUP:
+		return true;
+	}
+
+	return true;
+}
+
+static void
+track_live_clamps(uint64_t inst,
+		  struct vc4_validated_shader_info *validated_shader,
+		  struct vc4_shader_validation_state *validation_state)
+{
+	uint32_t op_add = QPU_GET_FIELD(inst, QPU_OP_ADD);
+	uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD);
+	uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL);
+	uint32_t cond_add = QPU_GET_FIELD(inst, QPU_COND_ADD);
+	uint32_t add_a = QPU_GET_FIELD(inst, QPU_ADD_A);
+	uint32_t add_b = QPU_GET_FIELD(inst, QPU_ADD_B);
+	uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A);
+	uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B);
+	uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
+	bool ws = inst & QPU_WS;
+	uint32_t lri_add_a, lri_add, lri_mul;
+	bool add_a_is_min_0;
+
+	/* Check whether OP_ADD's A argumennt comes from a live MAX(x, 0),
+	 * before we clear previous live state.
+	 */
+	lri_add_a = raddr_add_a_to_live_reg_index(inst);
+	add_a_is_min_0 = (lri_add_a != ~0 &&
+			  validation_state->live_max_clamp_regs[lri_add_a]);
+
+	/* Clear live state for registers written by our instruction. */
+	lri_add = waddr_to_live_reg_index(waddr_add, ws);
+	lri_mul = waddr_to_live_reg_index(waddr_mul, !ws);
+	if (lri_mul != ~0) {
+		validation_state->live_max_clamp_regs[lri_mul] = false;
+		validation_state->live_min_clamp_offsets[lri_mul] = ~0;
+	}
+	if (lri_add != ~0) {
+		validation_state->live_max_clamp_regs[lri_add] = false;
+		validation_state->live_min_clamp_offsets[lri_add] = ~0;
+	} else {
+		/* Nothing further to do for live tracking, since only ADDs
+		 * generate new live clamp registers.
+		 */
+		return;
+	}
+
+	/* Now, handle remaining live clamp tracking for the ADD operation. */
+
+	if (cond_add != QPU_COND_ALWAYS)
+		return;
+
+	if (op_add == QPU_A_MAX) {
+		/* Track live clamps of a value to a minimum of 0 (in either
+		 * arg).
+		 */
+		if (sig != QPU_SIG_SMALL_IMM || raddr_b != 0 ||
+		    (add_a != QPU_MUX_B && add_b != QPU_MUX_B)) {
+			return;
+		}
+
+		validation_state->live_max_clamp_regs[lri_add] = true;
+	} else if (op_add == QPU_A_MIN) {
+		/* Track live clamps of a value clamped to a minimum of 0 and
+		 * a maximum of some uniform's offset.
+		 */
+		if (!add_a_is_min_0)
+			return;
+
+		if (!(add_b == QPU_MUX_A && raddr_a == QPU_R_UNIF) &&
+		    !(add_b == QPU_MUX_B && raddr_b == QPU_R_UNIF &&
+		      sig != QPU_SIG_SMALL_IMM)) {
+			return;
+		}
+
+		validation_state->live_min_clamp_offsets[lri_add] =
+			validated_shader->uniforms_size;
+	}
+}
+
+static bool
+check_instruction_writes(uint64_t inst,
+			 struct vc4_validated_shader_info *validated_shader,
+			 struct vc4_shader_validation_state *validation_state)
+{
+	uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD);
+	uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL);
+	bool ok;
+
+	if (is_tmu_write(waddr_add) && is_tmu_write(waddr_mul)) {
+		DRM_ERROR("ADD and MUL both set up textures\n");
+		return false;
+	}
+
+	ok = (check_reg_write(inst, validated_shader, validation_state,
+			      false) &&
+	      check_reg_write(inst, validated_shader, validation_state,
+			      true));
+
+	track_live_clamps(inst, validated_shader, validation_state);
+
+	return ok;
+}
+
+static bool
+check_instruction_reads(uint64_t inst,
+			struct vc4_validated_shader_info *validated_shader)
+{
+	uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A);
+	uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B);
+	uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
+
+	if (raddr_a == QPU_R_UNIF ||
+	    (raddr_b == QPU_R_UNIF && sig != QPU_SIG_SMALL_IMM)) {
+		/* This can't overflow the uint32_t, because we're reading 8
+		 * bytes of instruction to increment by 4 here, so we'd
+		 * already be OOM.
+		 */
+		validated_shader->uniforms_size += 4;
+	}
+
+	return true;
+}
+
+struct vc4_validated_shader_info *
+vc4_validate_shader(struct drm_gem_cma_object *shader_obj)
+{
+	bool found_shader_end = false;
+	int shader_end_ip = 0;
+	uint32_t ip, max_ip;
+	uint64_t *shader;
+	struct vc4_validated_shader_info *validated_shader;
+	struct vc4_shader_validation_state validation_state;
+	int i;
+
+	memset(&validation_state, 0, sizeof(validation_state));
+
+	for (i = 0; i < 8; i++)
+		validation_state.tmu_setup[i / 4].p_offset[i % 4] = ~0;
+	for (i = 0; i < ARRAY_SIZE(validation_state.live_min_clamp_offsets); i++)
+		validation_state.live_min_clamp_offsets[i] = ~0;
+
+	shader = shader_obj->vaddr;
+	max_ip = shader_obj->base.size / sizeof(uint64_t);
+
+	validated_shader = kcalloc(1, sizeof(*validated_shader), GFP_KERNEL);
+	if (!validated_shader)
+		return NULL;
+
+	for (ip = 0; ip < max_ip; ip++) {
+		uint64_t inst = shader[ip];
+		uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
+
+		switch (sig) {
+		case QPU_SIG_NONE:
+		case QPU_SIG_WAIT_FOR_SCOREBOARD:
+		case QPU_SIG_SCOREBOARD_UNLOCK:
+		case QPU_SIG_COLOR_LOAD:
+		case QPU_SIG_LOAD_TMU0:
+		case QPU_SIG_LOAD_TMU1:
+		case QPU_SIG_PROG_END:
+		case QPU_SIG_SMALL_IMM:
+			if (!check_instruction_writes(inst, validated_shader,
+						      &validation_state)) {
+				DRM_ERROR("Bad write at ip %d\n", ip);
+				goto fail;
+			}
+
+			if (!check_instruction_reads(inst, validated_shader))
+				goto fail;
+
+			if (sig == QPU_SIG_PROG_END) {
+				found_shader_end = true;
+				shader_end_ip = ip;
+			}
+
+			break;
+
+		case QPU_SIG_LOAD_IMM:
+			if (!check_instruction_writes(inst, validated_shader,
+						      &validation_state)) {
+				DRM_ERROR("Bad LOAD_IMM write at ip %d\n", ip);
+				goto fail;
+			}
+			break;
+
+		default:
+			DRM_ERROR("Unsupported QPU signal %d at "
+				  "instruction %d\n", sig, ip);
+			goto fail;
+		}
+
+		/* There are two delay slots after program end is signaled
+		 * that are still executed, then we're finished.
+		 */
+		if (found_shader_end && ip == shader_end_ip + 2)
+			break;
+	}
+
+	if (ip == max_ip) {
+		DRM_ERROR("shader failed to terminate before "
+			  "shader BO end at %zd\n",
+			  shader_obj->base.size);
+		goto fail;
+	}
+
+	/* Again, no chance of integer overflow here because the worst case
+	 * scenario is 8 bytes of uniforms plus handles per 8-byte
+	 * instruction.
+	 */
+	validated_shader->uniforms_src_size =
+		(validated_shader->uniforms_size +
+		 4 * validated_shader->num_texture_samples);
+
+	return validated_shader;
+
+fail:
+	if (validated_shader) {
+		kfree(validated_shader->texture_samples);
+		kfree(validated_shader);
+	}
+	return NULL;
+}
diff --git a/drivers/gpu/drm/virtio/virtgpu_display.c b/drivers/gpu/drm/virtio/virtgpu_display.c
index 306a7df7d013..a165f03eaa79 100644
--- a/drivers/gpu/drm/virtio/virtgpu_display.c
+++ b/drivers/gpu/drm/virtio/virtgpu_display.c
@@ -374,16 +374,6 @@ static const struct drm_connector_helper_funcs virtio_gpu_conn_helper_funcs = {
 	.best_encoder = virtio_gpu_best_encoder,
 };
 
-static void virtio_gpu_conn_save(struct drm_connector *connector)
-{
-	DRM_DEBUG("\n");
-}
-
-static void virtio_gpu_conn_restore(struct drm_connector *connector)
-{
-	DRM_DEBUG("\n");
-}
-
 static enum drm_connector_status virtio_gpu_conn_detect(
 			struct drm_connector *connector,
 			bool force)
@@ -409,8 +399,6 @@ static void virtio_gpu_conn_destroy(struct drm_connector *connector)
 
 static const struct drm_connector_funcs virtio_gpu_connector_funcs = {
 	.dpms = drm_atomic_helper_connector_dpms,
-	.save = virtio_gpu_conn_save,
-	.restore = virtio_gpu_conn_restore,
 	.detect = virtio_gpu_conn_detect,
 	.fill_modes = drm_helper_probe_single_connector_modes,
 	.destroy = virtio_gpu_conn_destroy,
@@ -443,7 +431,7 @@ static int vgdev_output_init(struct virtio_gpu_device *vgdev, int index)
 	if (IS_ERR(plane))
 		return PTR_ERR(plane);
 	drm_crtc_init_with_planes(dev, crtc, plane, NULL,
-				  &virtio_gpu_crtc_funcs);
+				  &virtio_gpu_crtc_funcs, NULL);
 	drm_mode_crtc_set_gamma_size(crtc, 256);
 	drm_crtc_helper_add(crtc, &virtio_gpu_crtc_helper_funcs);
 	plane->crtc = crtc;
@@ -453,7 +441,7 @@ static int vgdev_output_init(struct virtio_gpu_device *vgdev, int index)
 	drm_connector_helper_add(connector, &virtio_gpu_conn_helper_funcs);
 
 	drm_encoder_init(dev, encoder, &virtio_gpu_enc_funcs,
-			 DRM_MODE_ENCODER_VIRTUAL);
+			 DRM_MODE_ENCODER_VIRTUAL, NULL);
 	drm_encoder_helper_add(encoder, &virtio_gpu_enc_helper_funcs);
 	encoder->possible_crtcs = 1 << index;
 
diff --git a/drivers/gpu/drm/virtio/virtgpu_plane.c b/drivers/gpu/drm/virtio/virtgpu_plane.c
index 4a74129c5708..572fb351feab 100644
--- a/drivers/gpu/drm/virtio/virtgpu_plane.c
+++ b/drivers/gpu/drm/virtio/virtgpu_plane.c
@@ -107,7 +107,7 @@ struct drm_plane *virtio_gpu_plane_init(struct virtio_gpu_device *vgdev,
 				       &virtio_gpu_plane_funcs,
 				       virtio_gpu_formats,
 				       ARRAY_SIZE(virtio_gpu_formats),
-				       DRM_PLANE_TYPE_PRIMARY);
+				       DRM_PLANE_TYPE_PRIMARY, NULL);
 	if (ret)
 		goto err_plane_init;
 
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
index e38db35132ed..162f188969a7 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
@@ -470,7 +470,7 @@ int vmw_kms_readback(struct vmw_private *dev_priv,
 }
 
 
-static struct drm_framebuffer_funcs vmw_framebuffer_surface_funcs = {
+static const struct drm_framebuffer_funcs vmw_framebuffer_surface_funcs = {
 	.destroy = vmw_framebuffer_surface_destroy,
 	.dirty = vmw_framebuffer_surface_dirty,
 };
@@ -647,7 +647,7 @@ static int vmw_framebuffer_dmabuf_dirty(struct drm_framebuffer *framebuffer,
 	return ret;
 }
 
-static struct drm_framebuffer_funcs vmw_framebuffer_dmabuf_funcs = {
+static const struct drm_framebuffer_funcs vmw_framebuffer_dmabuf_funcs = {
 	.destroy = vmw_framebuffer_dmabuf_destroy,
 	.dirty = vmw_framebuffer_dmabuf_dirty,
 };
@@ -1331,14 +1331,6 @@ static int vmw_du_update_layout(struct vmw_private *dev_priv, unsigned num,
 	return 0;
 }
 
-void vmw_du_crtc_save(struct drm_crtc *crtc)
-{
-}
-
-void vmw_du_crtc_restore(struct drm_crtc *crtc)
-{
-}
-
 void vmw_du_crtc_gamma_set(struct drm_crtc *crtc,
 			   u16 *r, u16 *g, u16 *b,
 			   uint32_t start, uint32_t size)
@@ -1360,14 +1352,6 @@ int vmw_du_connector_dpms(struct drm_connector *connector, int mode)
 	return 0;
 }
 
-void vmw_du_connector_save(struct drm_connector *connector)
-{
-}
-
-void vmw_du_connector_restore(struct drm_connector *connector)
-{
-}
-
 enum drm_connector_status
 vmw_du_connector_detect(struct drm_connector *connector, bool force)
 {
@@ -1554,7 +1538,7 @@ int vmw_du_connector_fill_modes(struct drm_connector *connector,
 		drm_mode_probed_add(connector, mode);
 	}
 
-	drm_mode_connector_list_update(connector, true);
+	drm_mode_connector_list_update(connector);
 	/* Move the prefered mode first, help apps pick the right mode. */
 	drm_mode_sort(&connector->modes);
 
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c
index bb63e4d795fa..2def684e61a4 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c
@@ -294,9 +294,7 @@ static int vmw_ldu_crtc_set_config(struct drm_mode_set *set)
 	return vmw_ldu_commit_list(dev_priv);
 }
 
-static struct drm_crtc_funcs vmw_legacy_crtc_funcs = {
-	.save = vmw_du_crtc_save,
-	.restore = vmw_du_crtc_restore,
+static const struct drm_crtc_funcs vmw_legacy_crtc_funcs = {
 	.cursor_set = vmw_du_crtc_cursor_set,
 	.cursor_move = vmw_du_crtc_cursor_move,
 	.gamma_set = vmw_du_crtc_gamma_set,
@@ -314,7 +312,7 @@ static void vmw_ldu_encoder_destroy(struct drm_encoder *encoder)
 	vmw_ldu_destroy(vmw_encoder_to_ldu(encoder));
 }
 
-static struct drm_encoder_funcs vmw_legacy_encoder_funcs = {
+static const struct drm_encoder_funcs vmw_legacy_encoder_funcs = {
 	.destroy = vmw_ldu_encoder_destroy,
 };
 
@@ -327,10 +325,8 @@ static void vmw_ldu_connector_destroy(struct drm_connector *connector)
 	vmw_ldu_destroy(vmw_connector_to_ldu(connector));
 }
 
-static struct drm_connector_funcs vmw_legacy_connector_funcs = {
+static const struct drm_connector_funcs vmw_legacy_connector_funcs = {
 	.dpms = vmw_du_connector_dpms,
-	.save = vmw_du_connector_save,
-	.restore = vmw_du_connector_restore,
 	.detect = vmw_du_connector_detect,
 	.fill_modes = vmw_du_connector_fill_modes,
 	.set_property = vmw_du_connector_set_property,
@@ -367,7 +363,7 @@ static int vmw_ldu_init(struct vmw_private *dev_priv, unsigned unit)
 	connector->status = vmw_du_connector_detect(connector, true);
 
 	drm_encoder_init(dev, encoder, &vmw_legacy_encoder_funcs,
-			 DRM_MODE_ENCODER_VIRTUAL);
+			 DRM_MODE_ENCODER_VIRTUAL, NULL);
 	drm_mode_connector_attach_encoder(connector, encoder);
 	encoder->possible_crtcs = (1 << unit);
 	encoder->possible_clones = 0;
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c b/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c
index b96d1ab610c5..ecac70af032a 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c
@@ -530,9 +530,7 @@ out_no_fence:
 	return ret;
 }
 
-static struct drm_crtc_funcs vmw_screen_object_crtc_funcs = {
-	.save = vmw_du_crtc_save,
-	.restore = vmw_du_crtc_restore,
+static const struct drm_crtc_funcs vmw_screen_object_crtc_funcs = {
 	.cursor_set = vmw_du_crtc_cursor_set,
 	.cursor_move = vmw_du_crtc_cursor_move,
 	.gamma_set = vmw_du_crtc_gamma_set,
@@ -550,7 +548,7 @@ static void vmw_sou_encoder_destroy(struct drm_encoder *encoder)
 	vmw_sou_destroy(vmw_encoder_to_sou(encoder));
 }
 
-static struct drm_encoder_funcs vmw_screen_object_encoder_funcs = {
+static const struct drm_encoder_funcs vmw_screen_object_encoder_funcs = {
 	.destroy = vmw_sou_encoder_destroy,
 };
 
@@ -563,12 +561,8 @@ static void vmw_sou_connector_destroy(struct drm_connector *connector)
 	vmw_sou_destroy(vmw_connector_to_sou(connector));
 }
 
-static struct drm_connector_funcs vmw_sou_connector_funcs = {
+static const struct drm_connector_funcs vmw_sou_connector_funcs = {
 	.dpms = vmw_du_connector_dpms,
-	.save = vmw_du_connector_save,
-	.restore = vmw_du_connector_restore,
-	.detect = vmw_du_connector_detect,
-	.fill_modes = vmw_du_connector_fill_modes,
 	.set_property = vmw_du_connector_set_property,
 	.destroy = vmw_sou_connector_destroy,
 };
@@ -603,7 +597,7 @@ static int vmw_sou_init(struct vmw_private *dev_priv, unsigned unit)
 	connector->status = vmw_du_connector_detect(connector, true);
 
 	drm_encoder_init(dev, encoder, &vmw_screen_object_encoder_funcs,
-			 DRM_MODE_ENCODER_VIRTUAL);
+			 DRM_MODE_ENCODER_VIRTUAL, NULL);
 	drm_mode_connector_attach_encoder(connector, encoder);
 	encoder->possible_crtcs = (1 << unit);
 	encoder->possible_clones = 0;
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c b/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c
index b1fc1c02792d..87fc00af8d28 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c
@@ -1040,9 +1040,7 @@ out_finish:
 /*
  *  Screen Target CRTC dispatch table
  */
-static struct drm_crtc_funcs vmw_stdu_crtc_funcs = {
-	.save = vmw_du_crtc_save,
-	.restore = vmw_du_crtc_restore,
+static const struct drm_crtc_funcs vmw_stdu_crtc_funcs = {
 	.cursor_set = vmw_du_crtc_cursor_set,
 	.cursor_move = vmw_du_crtc_cursor_move,
 	.gamma_set = vmw_du_crtc_gamma_set,
@@ -1072,7 +1070,7 @@ static void vmw_stdu_encoder_destroy(struct drm_encoder *encoder)
 	vmw_stdu_destroy(vmw_encoder_to_stdu(encoder));
 }
 
-static struct drm_encoder_funcs vmw_stdu_encoder_funcs = {
+static const struct drm_encoder_funcs vmw_stdu_encoder_funcs = {
 	.destroy = vmw_stdu_encoder_destroy,
 };
 
@@ -1099,10 +1097,8 @@ static void vmw_stdu_connector_destroy(struct drm_connector *connector)
 
 
 
-static struct drm_connector_funcs vmw_stdu_connector_funcs = {
+static const struct drm_connector_funcs vmw_stdu_connector_funcs = {
 	.dpms = vmw_du_connector_dpms,
-	.save = vmw_du_connector_save,
-	.restore = vmw_du_connector_restore,
 	.detect = vmw_du_connector_detect,
 	.fill_modes = vmw_du_connector_fill_modes,
 	.set_property = vmw_du_connector_set_property,
@@ -1149,7 +1145,7 @@ static int vmw_stdu_init(struct vmw_private *dev_priv, unsigned unit)
 	connector->status = vmw_du_connector_detect(connector, false);
 
 	drm_encoder_init(dev, encoder, &vmw_stdu_encoder_funcs,
-			 DRM_MODE_ENCODER_VIRTUAL);
+			 DRM_MODE_ENCODER_VIRTUAL, NULL);
 	drm_mode_connector_attach_encoder(connector, encoder);
 	encoder->possible_crtcs = (1 << unit);
 	encoder->possible_clones = 0;
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c
index 7d620e82e000..c2a721a8cef9 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c
@@ -771,7 +771,7 @@ int vmw_surface_define_ioctl(struct drm_device *dev, void *data,
 	}
 	srf->offsets = kmalloc(srf->num_sizes * sizeof(*srf->offsets),
 			       GFP_KERNEL);
-	if (unlikely(srf->sizes == NULL)) {
+	if (unlikely(srf->offsets == NULL)) {
 		ret = -ENOMEM;
 		goto out_no_offsets;
 	}
@@ -815,11 +815,8 @@ int vmw_surface_define_ioctl(struct drm_device *dev, void *data,
 	    srf->sizes[0].height == 64 &&
 	    srf->format == SVGA3D_A8R8G8B8) {
 
-		srf->snooper.image = kmalloc(64 * 64 * 4, GFP_KERNEL);
-		/* clear the image */
-		if (srf->snooper.image) {
-			memset(srf->snooper.image, 0x00, 64 * 64 * 4);
-		} else {
+		srf->snooper.image = kzalloc(64 * 64 * 4, GFP_KERNEL);
+		if (!srf->snooper.image) {
 			DRM_ERROR("Failed to allocate cursor_image\n");
 			ret = -ENOMEM;
 			goto out_no_copy;
diff --git a/drivers/gpu/host1x/Makefile b/drivers/gpu/host1x/Makefile
index c1189f004441..a1d9974cfcb5 100644
--- a/drivers/gpu/host1x/Makefile
+++ b/drivers/gpu/host1x/Makefile
@@ -10,6 +10,7 @@ host1x-y = \
 	mipi.o \
 	hw/host1x01.o \
 	hw/host1x02.o \
-	hw/host1x04.o
+	hw/host1x04.o \
+	hw/host1x05.o
 
 obj-$(CONFIG_TEGRA_HOST1X) += host1x.o
diff --git a/drivers/gpu/host1x/bus.c b/drivers/gpu/host1x/bus.c
index 4a99c6416e6a..da462afcb225 100644
--- a/drivers/gpu/host1x/bus.c
+++ b/drivers/gpu/host1x/bus.c
@@ -538,6 +538,8 @@ EXPORT_SYMBOL(host1x_driver_register_full);
 
 void host1x_driver_unregister(struct host1x_driver *driver)
 {
+	driver_unregister(&driver->driver);
+
 	mutex_lock(&drivers_lock);
 	list_del_init(&driver->list);
 	mutex_unlock(&drivers_lock);
diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c
index 53d3d1d45b48..314bf3718cc7 100644
--- a/drivers/gpu/host1x/dev.c
+++ b/drivers/gpu/host1x/dev.c
@@ -35,6 +35,7 @@
 #include "hw/host1x01.h"
 #include "hw/host1x02.h"
 #include "hw/host1x04.h"
+#include "hw/host1x05.h"
 
 void host1x_sync_writel(struct host1x *host1x, u32 v, u32 r)
 {
@@ -87,7 +88,17 @@ static const struct host1x_info host1x04_info = {
 	.sync_offset = 0x2100,
 };
 
+static const struct host1x_info host1x05_info = {
+	.nb_channels = 14,
+	.nb_pts = 192,
+	.nb_mlocks = 16,
+	.nb_bases = 64,
+	.init = host1x05_init,
+	.sync_offset = 0x2100,
+};
+
 static struct of_device_id host1x_of_match[] = {
+	{ .compatible = "nvidia,tegra210-host1x", .data = &host1x05_info, },
 	{ .compatible = "nvidia,tegra124-host1x", .data = &host1x04_info, },
 	{ .compatible = "nvidia,tegra114-host1x", .data = &host1x02_info, },
 	{ .compatible = "nvidia,tegra30-host1x", .data = &host1x01_info, },
@@ -212,6 +223,11 @@ static struct platform_driver tegra_host1x_driver = {
 	.remove = host1x_remove,
 };
 
+static struct platform_driver * const drivers[] = {
+	&tegra_host1x_driver,
+	&tegra_mipi_driver,
+};
+
 static int __init tegra_host1x_init(void)
 {
 	int err;
@@ -220,28 +236,17 @@ static int __init tegra_host1x_init(void)
 	if (err < 0)
 		return err;
 
-	err = platform_driver_register(&tegra_host1x_driver);
-	if (err < 0)
-		goto unregister_bus;
-
-	err = platform_driver_register(&tegra_mipi_driver);
+	err = platform_register_drivers(drivers, ARRAY_SIZE(drivers));
 	if (err < 0)
-		goto unregister_host1x;
+		bus_unregister(&host1x_bus_type);
 
-	return 0;
-
-unregister_host1x:
-	platform_driver_unregister(&tegra_host1x_driver);
-unregister_bus:
-	bus_unregister(&host1x_bus_type);
 	return err;
 }
 module_init(tegra_host1x_init);
 
 static void __exit tegra_host1x_exit(void)
 {
-	platform_driver_unregister(&tegra_mipi_driver);
-	platform_driver_unregister(&tegra_host1x_driver);
+	platform_unregister_drivers(drivers, ARRAY_SIZE(drivers));
 	bus_unregister(&host1x_bus_type);
 }
 module_exit(tegra_host1x_exit);
diff --git a/drivers/gpu/host1x/hw/host1x05.c b/drivers/gpu/host1x/hw/host1x05.c
new file mode 100644
index 000000000000..047097ce3bad
--- /dev/null
+++ b/drivers/gpu/host1x/hw/host1x05.c
@@ -0,0 +1,42 @@
+/*
+ * Host1x init for Tegra210 SoCs
+ *
+ * Copyright (c) 2015 NVIDIA Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/* include hw specification */
+#include "host1x05.h"
+#include "host1x05_hardware.h"
+
+/* include code */
+#include "cdma_hw.c"
+#include "channel_hw.c"
+#include "debug_hw.c"
+#include "intr_hw.c"
+#include "syncpt_hw.c"
+
+#include "../dev.h"
+
+int host1x05_init(struct host1x *host)
+{
+	host->channel_op = &host1x_channel_ops;
+	host->cdma_op = &host1x_cdma_ops;
+	host->cdma_pb_op = &host1x_pushbuffer_ops;
+	host->syncpt_op = &host1x_syncpt_ops;
+	host->intr_op = &host1x_intr_ops;
+	host->debug_op = &host1x_debug_ops;
+
+	return 0;
+}
diff --git a/drivers/gpu/host1x/hw/host1x05.h b/drivers/gpu/host1x/hw/host1x05.h
new file mode 100644
index 000000000000..a306d9c05cd5
--- /dev/null
+++ b/drivers/gpu/host1x/hw/host1x05.h
@@ -0,0 +1,26 @@
+/*
+ * Host1x init for Tegra210 SoCs
+ *
+ * Copyright (c) 2015 NVIDIA Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef HOST1X_HOST1X05_H
+#define HOST1X_HOST1X05_H
+
+struct host1x;
+
+int host1x05_init(struct host1x *host);
+
+#endif
diff --git a/drivers/gpu/host1x/hw/host1x05_hardware.h b/drivers/gpu/host1x/hw/host1x05_hardware.h
new file mode 100644
index 000000000000..2937ebb6be11
--- /dev/null
+++ b/drivers/gpu/host1x/hw/host1x05_hardware.h
@@ -0,0 +1,142 @@
+/*
+ * Tegra host1x Register Offsets for Tegra210
+ *
+ * Copyright (c) 2015 NVIDIA Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __HOST1X_HOST1X05_HARDWARE_H
+#define __HOST1X_HOST1X05_HARDWARE_H
+
+#include <linux/types.h>
+#include <linux/bitops.h>
+
+#include "hw_host1x05_channel.h"
+#include "hw_host1x05_sync.h"
+#include "hw_host1x05_uclass.h"
+
+static inline u32 host1x_class_host_wait_syncpt(
+	unsigned indx, unsigned threshold)
+{
+	return host1x_uclass_wait_syncpt_indx_f(indx)
+		| host1x_uclass_wait_syncpt_thresh_f(threshold);
+}
+
+static inline u32 host1x_class_host_load_syncpt_base(
+	unsigned indx, unsigned threshold)
+{
+	return host1x_uclass_load_syncpt_base_base_indx_f(indx)
+		| host1x_uclass_load_syncpt_base_value_f(threshold);
+}
+
+static inline u32 host1x_class_host_wait_syncpt_base(
+	unsigned indx, unsigned base_indx, unsigned offset)
+{
+	return host1x_uclass_wait_syncpt_base_indx_f(indx)
+		| host1x_uclass_wait_syncpt_base_base_indx_f(base_indx)
+		| host1x_uclass_wait_syncpt_base_offset_f(offset);
+}
+
+static inline u32 host1x_class_host_incr_syncpt_base(
+	unsigned base_indx, unsigned offset)
+{
+	return host1x_uclass_incr_syncpt_base_base_indx_f(base_indx)
+		| host1x_uclass_incr_syncpt_base_offset_f(offset);
+}
+
+static inline u32 host1x_class_host_incr_syncpt(
+	unsigned cond, unsigned indx)
+{
+	return host1x_uclass_incr_syncpt_cond_f(cond)
+		| host1x_uclass_incr_syncpt_indx_f(indx);
+}
+
+static inline u32 host1x_class_host_indoff_reg_write(
+	unsigned mod_id, unsigned offset, bool auto_inc)
+{
+	u32 v = host1x_uclass_indoff_indbe_f(0xf)
+		| host1x_uclass_indoff_indmodid_f(mod_id)
+		| host1x_uclass_indoff_indroffset_f(offset);
+	if (auto_inc)
+		v |= host1x_uclass_indoff_autoinc_f(1);
+	return v;
+}
+
+static inline u32 host1x_class_host_indoff_reg_read(
+	unsigned mod_id, unsigned offset, bool auto_inc)
+{
+	u32 v = host1x_uclass_indoff_indmodid_f(mod_id)
+		| host1x_uclass_indoff_indroffset_f(offset)
+		| host1x_uclass_indoff_rwn_read_v();
+	if (auto_inc)
+		v |= host1x_uclass_indoff_autoinc_f(1);
+	return v;
+}
+
+/* cdma opcodes */
+static inline u32 host1x_opcode_setclass(
+	unsigned class_id, unsigned offset, unsigned mask)
+{
+	return (0 << 28) | (offset << 16) | (class_id << 6) | mask;
+}
+
+static inline u32 host1x_opcode_incr(unsigned offset, unsigned count)
+{
+	return (1 << 28) | (offset << 16) | count;
+}
+
+static inline u32 host1x_opcode_nonincr(unsigned offset, unsigned count)
+{
+	return (2 << 28) | (offset << 16) | count;
+}
+
+static inline u32 host1x_opcode_mask(unsigned offset, unsigned mask)
+{
+	return (3 << 28) | (offset << 16) | mask;
+}
+
+static inline u32 host1x_opcode_imm(unsigned offset, unsigned value)
+{
+	return (4 << 28) | (offset << 16) | value;
+}
+
+static inline u32 host1x_opcode_imm_incr_syncpt(unsigned cond, unsigned indx)
+{
+	return host1x_opcode_imm(host1x_uclass_incr_syncpt_r(),
+		host1x_class_host_incr_syncpt(cond, indx));
+}
+
+static inline u32 host1x_opcode_restart(unsigned address)
+{
+	return (5 << 28) | (address >> 4);
+}
+
+static inline u32 host1x_opcode_gather(unsigned count)
+{
+	return (6 << 28) | count;
+}
+
+static inline u32 host1x_opcode_gather_nonincr(unsigned offset,	unsigned count)
+{
+	return (6 << 28) | (offset << 16) | BIT(15) | count;
+}
+
+static inline u32 host1x_opcode_gather_incr(unsigned offset, unsigned count)
+{
+	return (6 << 28) | (offset << 16) | BIT(15) | BIT(14) | count;
+}
+
+#define HOST1X_OPCODE_NOP host1x_opcode_nonincr(0, 0)
+
+#endif
diff --git a/drivers/gpu/host1x/hw/hw_host1x05_channel.h b/drivers/gpu/host1x/hw/hw_host1x05_channel.h
new file mode 100644
index 000000000000..fce6e2c1ff4c
--- /dev/null
+++ b/drivers/gpu/host1x/hw/hw_host1x05_channel.h
@@ -0,0 +1,121 @@
+/*
+ * Copyright (c) 2015 NVIDIA Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+ /*
+  * Function naming determines intended use:
+  *
+  *     <x>_r(void) : Returns the offset for register <x>.
+  *
+  *     <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
+  *
+  *     <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
+  *
+  *     <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
+  *         and masked to place it at field <y> of register <x>.  This value
+  *         can be |'d with others to produce a full register value for
+  *         register <x>.
+  *
+  *     <x>_<y>_m(void) : Returns a mask for field <y> of register <x>.  This
+  *         value can be ~'d and then &'d to clear the value of field <y> for
+  *         register <x>.
+  *
+  *     <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
+  *         to place it at field <y> of register <x>.  This value can be |'d
+  *         with others to produce a full register value for <x>.
+  *
+  *     <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
+  *         <x> value 'r' after being shifted to place its LSB at bit 0.
+  *         This value is suitable for direct comparison with other unshifted
+  *         values appropriate for use in field <y> of register <x>.
+  *
+  *     <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
+  *         field <y> of register <x>.  This value is suitable for direct
+  *         comparison with unshifted values appropriate for use in field <y>
+  *         of register <x>.
+  */
+
+#ifndef HOST1X_HW_HOST1X05_CHANNEL_H
+#define HOST1X_HW_HOST1X05_CHANNEL_H
+
+static inline u32 host1x_channel_fifostat_r(void)
+{
+	return 0x0;
+}
+#define HOST1X_CHANNEL_FIFOSTAT \
+	host1x_channel_fifostat_r()
+static inline u32 host1x_channel_fifostat_cfempty_v(u32 r)
+{
+	return (r >> 11) & 0x1;
+}
+#define HOST1X_CHANNEL_FIFOSTAT_CFEMPTY_V(r) \
+	host1x_channel_fifostat_cfempty_v(r)
+static inline u32 host1x_channel_dmastart_r(void)
+{
+	return 0x14;
+}
+#define HOST1X_CHANNEL_DMASTART \
+	host1x_channel_dmastart_r()
+static inline u32 host1x_channel_dmaput_r(void)
+{
+	return 0x18;
+}
+#define HOST1X_CHANNEL_DMAPUT \
+	host1x_channel_dmaput_r()
+static inline u32 host1x_channel_dmaget_r(void)
+{
+	return 0x1c;
+}
+#define HOST1X_CHANNEL_DMAGET \
+	host1x_channel_dmaget_r()
+static inline u32 host1x_channel_dmaend_r(void)
+{
+	return 0x20;
+}
+#define HOST1X_CHANNEL_DMAEND \
+	host1x_channel_dmaend_r()
+static inline u32 host1x_channel_dmactrl_r(void)
+{
+	return 0x24;
+}
+#define HOST1X_CHANNEL_DMACTRL \
+	host1x_channel_dmactrl_r()
+static inline u32 host1x_channel_dmactrl_dmastop(void)
+{
+	return 1 << 0;
+}
+#define HOST1X_CHANNEL_DMACTRL_DMASTOP \
+	host1x_channel_dmactrl_dmastop()
+static inline u32 host1x_channel_dmactrl_dmastop_v(u32 r)
+{
+	return (r >> 0) & 0x1;
+}
+#define HOST1X_CHANNEL_DMACTRL_DMASTOP_V(r) \
+	host1x_channel_dmactrl_dmastop_v(r)
+static inline u32 host1x_channel_dmactrl_dmagetrst(void)
+{
+	return 1 << 1;
+}
+#define HOST1X_CHANNEL_DMACTRL_DMAGETRST \
+	host1x_channel_dmactrl_dmagetrst()
+static inline u32 host1x_channel_dmactrl_dmainitget(void)
+{
+	return 1 << 2;
+}
+#define HOST1X_CHANNEL_DMACTRL_DMAINITGET \
+	host1x_channel_dmactrl_dmainitget()
+
+#endif
diff --git a/drivers/gpu/host1x/hw/hw_host1x05_sync.h b/drivers/gpu/host1x/hw/hw_host1x05_sync.h
new file mode 100644
index 000000000000..ca10eee5045c
--- /dev/null
+++ b/drivers/gpu/host1x/hw/hw_host1x05_sync.h
@@ -0,0 +1,243 @@
+/*
+ * Copyright (c) 2015 NVIDIA Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+ /*
+  * Function naming determines intended use:
+  *
+  *     <x>_r(void) : Returns the offset for register <x>.
+  *
+  *     <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
+  *
+  *     <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
+  *
+  *     <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
+  *         and masked to place it at field <y> of register <x>.  This value
+  *         can be |'d with others to produce a full register value for
+  *         register <x>.
+  *
+  *     <x>_<y>_m(void) : Returns a mask for field <y> of register <x>.  This
+  *         value can be ~'d and then &'d to clear the value of field <y> for
+  *         register <x>.
+  *
+  *     <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
+  *         to place it at field <y> of register <x>.  This value can be |'d
+  *         with others to produce a full register value for <x>.
+  *
+  *     <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
+  *         <x> value 'r' after being shifted to place its LSB at bit 0.
+  *         This value is suitable for direct comparison with other unshifted
+  *         values appropriate for use in field <y> of register <x>.
+  *
+  *     <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
+  *         field <y> of register <x>.  This value is suitable for direct
+  *         comparison with unshifted values appropriate for use in field <y>
+  *         of register <x>.
+  */
+
+#ifndef HOST1X_HW_HOST1X05_SYNC_H
+#define HOST1X_HW_HOST1X05_SYNC_H
+
+#define REGISTER_STRIDE	4
+
+static inline u32 host1x_sync_syncpt_r(unsigned int id)
+{
+	return 0xf80 + id * REGISTER_STRIDE;
+}
+#define HOST1X_SYNC_SYNCPT(id) \
+	host1x_sync_syncpt_r(id)
+static inline u32 host1x_sync_syncpt_thresh_cpu0_int_status_r(unsigned int id)
+{
+	return 0xe80 + id * REGISTER_STRIDE;
+}
+#define HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(id) \
+	host1x_sync_syncpt_thresh_cpu0_int_status_r(id)
+static inline u32 host1x_sync_syncpt_thresh_int_disable_r(unsigned int id)
+{
+	return 0xf00 + id * REGISTER_STRIDE;
+}
+#define HOST1X_SYNC_SYNCPT_THRESH_INT_DISABLE(id) \
+	host1x_sync_syncpt_thresh_int_disable_r(id)
+static inline u32 host1x_sync_syncpt_thresh_int_enable_cpu0_r(unsigned int id)
+{
+	return 0xf20 + id * REGISTER_STRIDE;
+}
+#define HOST1X_SYNC_SYNCPT_THRESH_INT_ENABLE_CPU0(id) \
+	host1x_sync_syncpt_thresh_int_enable_cpu0_r(id)
+static inline u32 host1x_sync_cf_setup_r(unsigned int channel)
+{
+	return 0xc00 + channel * REGISTER_STRIDE;
+}
+#define HOST1X_SYNC_CF_SETUP(channel) \
+	host1x_sync_cf_setup_r(channel)
+static inline u32 host1x_sync_cf_setup_base_v(u32 r)
+{
+	return (r >> 0) & 0x3ff;
+}
+#define HOST1X_SYNC_CF_SETUP_BASE_V(r) \
+	host1x_sync_cf_setup_base_v(r)
+static inline u32 host1x_sync_cf_setup_limit_v(u32 r)
+{
+	return (r >> 16) & 0x3ff;
+}
+#define HOST1X_SYNC_CF_SETUP_LIMIT_V(r) \
+	host1x_sync_cf_setup_limit_v(r)
+static inline u32 host1x_sync_cmdproc_stop_r(void)
+{
+	return 0xac;
+}
+#define HOST1X_SYNC_CMDPROC_STOP \
+	host1x_sync_cmdproc_stop_r()
+static inline u32 host1x_sync_ch_teardown_r(void)
+{
+	return 0xb0;
+}
+#define HOST1X_SYNC_CH_TEARDOWN \
+	host1x_sync_ch_teardown_r()
+static inline u32 host1x_sync_usec_clk_r(void)
+{
+	return 0x1a4;
+}
+#define HOST1X_SYNC_USEC_CLK \
+	host1x_sync_usec_clk_r()
+static inline u32 host1x_sync_ctxsw_timeout_cfg_r(void)
+{
+	return 0x1a8;
+}
+#define HOST1X_SYNC_CTXSW_TIMEOUT_CFG \
+	host1x_sync_ctxsw_timeout_cfg_r()
+static inline u32 host1x_sync_ip_busy_timeout_r(void)
+{
+	return 0x1bc;
+}
+#define HOST1X_SYNC_IP_BUSY_TIMEOUT \
+	host1x_sync_ip_busy_timeout_r()
+static inline u32 host1x_sync_mlock_owner_r(unsigned int id)
+{
+	return 0x340 + id * REGISTER_STRIDE;
+}
+#define HOST1X_SYNC_MLOCK_OWNER(id) \
+	host1x_sync_mlock_owner_r(id)
+static inline u32 host1x_sync_mlock_owner_chid_v(u32 r)
+{
+	return (r >> 8) & 0xf;
+}
+#define HOST1X_SYNC_MLOCK_OWNER_CHID_V(v) \
+	host1x_sync_mlock_owner_chid_v(v)
+static inline u32 host1x_sync_mlock_owner_cpu_owns_v(u32 r)
+{
+	return (r >> 1) & 0x1;
+}
+#define HOST1X_SYNC_MLOCK_OWNER_CPU_OWNS_V(r) \
+	host1x_sync_mlock_owner_cpu_owns_v(r)
+static inline u32 host1x_sync_mlock_owner_ch_owns_v(u32 r)
+{
+	return (r >> 0) & 0x1;
+}
+#define HOST1X_SYNC_MLOCK_OWNER_CH_OWNS_V(r) \
+	host1x_sync_mlock_owner_ch_owns_v(r)
+static inline u32 host1x_sync_syncpt_int_thresh_r(unsigned int id)
+{
+	return 0x1380 + id * REGISTER_STRIDE;
+}
+#define HOST1X_SYNC_SYNCPT_INT_THRESH(id) \
+	host1x_sync_syncpt_int_thresh_r(id)
+static inline u32 host1x_sync_syncpt_base_r(unsigned int id)
+{
+	return 0x600 + id * REGISTER_STRIDE;
+}
+#define HOST1X_SYNC_SYNCPT_BASE(id) \
+	host1x_sync_syncpt_base_r(id)
+static inline u32 host1x_sync_syncpt_cpu_incr_r(unsigned int id)
+{
+	return 0xf60 + id * REGISTER_STRIDE;
+}
+#define HOST1X_SYNC_SYNCPT_CPU_INCR(id) \
+	host1x_sync_syncpt_cpu_incr_r(id)
+static inline u32 host1x_sync_cbread_r(unsigned int channel)
+{
+	return 0xc80 + channel * REGISTER_STRIDE;
+}
+#define HOST1X_SYNC_CBREAD(channel) \
+	host1x_sync_cbread_r(channel)
+static inline u32 host1x_sync_cfpeek_ctrl_r(void)
+{
+	return 0x74c;
+}
+#define HOST1X_SYNC_CFPEEK_CTRL \
+	host1x_sync_cfpeek_ctrl_r()
+static inline u32 host1x_sync_cfpeek_ctrl_addr_f(u32 v)
+{
+	return (v & 0x3ff) << 0;
+}
+#define HOST1X_SYNC_CFPEEK_CTRL_ADDR_F(v) \
+	host1x_sync_cfpeek_ctrl_addr_f(v)
+static inline u32 host1x_sync_cfpeek_ctrl_channr_f(u32 v)
+{
+	return (v & 0xf) << 16;
+}
+#define HOST1X_SYNC_CFPEEK_CTRL_CHANNR_F(v) \
+	host1x_sync_cfpeek_ctrl_channr_f(v)
+static inline u32 host1x_sync_cfpeek_ctrl_ena_f(u32 v)
+{
+	return (v & 0x1) << 31;
+}
+#define HOST1X_SYNC_CFPEEK_CTRL_ENA_F(v) \
+	host1x_sync_cfpeek_ctrl_ena_f(v)
+static inline u32 host1x_sync_cfpeek_read_r(void)
+{
+	return 0x750;
+}
+#define HOST1X_SYNC_CFPEEK_READ \
+	host1x_sync_cfpeek_read_r()
+static inline u32 host1x_sync_cfpeek_ptrs_r(void)
+{
+	return 0x754;
+}
+#define HOST1X_SYNC_CFPEEK_PTRS \
+	host1x_sync_cfpeek_ptrs_r()
+static inline u32 host1x_sync_cfpeek_ptrs_cf_rd_ptr_v(u32 r)
+{
+	return (r >> 0) & 0x3ff;
+}
+#define HOST1X_SYNC_CFPEEK_PTRS_CF_RD_PTR_V(r) \
+	host1x_sync_cfpeek_ptrs_cf_rd_ptr_v(r)
+static inline u32 host1x_sync_cfpeek_ptrs_cf_wr_ptr_v(u32 r)
+{
+	return (r >> 16) & 0x3ff;
+}
+#define HOST1X_SYNC_CFPEEK_PTRS_CF_WR_PTR_V(r) \
+	host1x_sync_cfpeek_ptrs_cf_wr_ptr_v(r)
+static inline u32 host1x_sync_cbstat_r(unsigned int channel)
+{
+	return 0xcc0 + channel * REGISTER_STRIDE;
+}
+#define HOST1X_SYNC_CBSTAT(channel) \
+	host1x_sync_cbstat_r(channel)
+static inline u32 host1x_sync_cbstat_cboffset_v(u32 r)
+{
+	return (r >> 0) & 0xffff;
+}
+#define HOST1X_SYNC_CBSTAT_CBOFFSET_V(r) \
+	host1x_sync_cbstat_cboffset_v(r)
+static inline u32 host1x_sync_cbstat_cbclass_v(u32 r)
+{
+	return (r >> 16) & 0x3ff;
+}
+#define HOST1X_SYNC_CBSTAT_CBCLASS_V(r) \
+	host1x_sync_cbstat_cbclass_v(r)
+
+#endif
diff --git a/drivers/gpu/host1x/hw/hw_host1x05_uclass.h b/drivers/gpu/host1x/hw/hw_host1x05_uclass.h
new file mode 100644
index 000000000000..0c411da6bc41
--- /dev/null
+++ b/drivers/gpu/host1x/hw/hw_host1x05_uclass.h
@@ -0,0 +1,181 @@
+/*
+ * Copyright (c) 2015 NVIDIA Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+ /*
+  * Function naming determines intended use:
+  *
+  *     <x>_r(void) : Returns the offset for register <x>.
+  *
+  *     <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
+  *
+  *     <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
+  *
+  *     <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
+  *         and masked to place it at field <y> of register <x>.  This value
+  *         can be |'d with others to produce a full register value for
+  *         register <x>.
+  *
+  *     <x>_<y>_m(void) : Returns a mask for field <y> of register <x>.  This
+  *         value can be ~'d and then &'d to clear the value of field <y> for
+  *         register <x>.
+  *
+  *     <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
+  *         to place it at field <y> of register <x>.  This value can be |'d
+  *         with others to produce a full register value for <x>.
+  *
+  *     <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
+  *         <x> value 'r' after being shifted to place its LSB at bit 0.
+  *         This value is suitable for direct comparison with other unshifted
+  *         values appropriate for use in field <y> of register <x>.
+  *
+  *     <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
+  *         field <y> of register <x>.  This value is suitable for direct
+  *         comparison with unshifted values appropriate for use in field <y>
+  *         of register <x>.
+  */
+
+#ifndef HOST1X_HW_HOST1X05_UCLASS_H
+#define HOST1X_HW_HOST1X05_UCLASS_H
+
+static inline u32 host1x_uclass_incr_syncpt_r(void)
+{
+	return 0x0;
+}
+#define HOST1X_UCLASS_INCR_SYNCPT \
+	host1x_uclass_incr_syncpt_r()
+static inline u32 host1x_uclass_incr_syncpt_cond_f(u32 v)
+{
+	return (v & 0xff) << 8;
+}
+#define HOST1X_UCLASS_INCR_SYNCPT_COND_F(v) \
+	host1x_uclass_incr_syncpt_cond_f(v)
+static inline u32 host1x_uclass_incr_syncpt_indx_f(u32 v)
+{
+	return (v & 0xff) << 0;
+}
+#define HOST1X_UCLASS_INCR_SYNCPT_INDX_F(v) \
+	host1x_uclass_incr_syncpt_indx_f(v)
+static inline u32 host1x_uclass_wait_syncpt_r(void)
+{
+	return 0x8;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT \
+	host1x_uclass_wait_syncpt_r()
+static inline u32 host1x_uclass_wait_syncpt_indx_f(u32 v)
+{
+	return (v & 0xff) << 24;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_INDX_F(v) \
+	host1x_uclass_wait_syncpt_indx_f(v)
+static inline u32 host1x_uclass_wait_syncpt_thresh_f(u32 v)
+{
+	return (v & 0xffffff) << 0;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_THRESH_F(v) \
+	host1x_uclass_wait_syncpt_thresh_f(v)
+static inline u32 host1x_uclass_wait_syncpt_base_r(void)
+{
+	return 0x9;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_BASE \
+	host1x_uclass_wait_syncpt_base_r()
+static inline u32 host1x_uclass_wait_syncpt_base_indx_f(u32 v)
+{
+	return (v & 0xff) << 24;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_BASE_INDX_F(v) \
+	host1x_uclass_wait_syncpt_base_indx_f(v)
+static inline u32 host1x_uclass_wait_syncpt_base_base_indx_f(u32 v)
+{
+	return (v & 0xff) << 16;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_BASE_BASE_INDX_F(v) \
+	host1x_uclass_wait_syncpt_base_base_indx_f(v)
+static inline u32 host1x_uclass_wait_syncpt_base_offset_f(u32 v)
+{
+	return (v & 0xffff) << 0;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_BASE_OFFSET_F(v) \
+	host1x_uclass_wait_syncpt_base_offset_f(v)
+static inline u32 host1x_uclass_load_syncpt_base_r(void)
+{
+	return 0xb;
+}
+#define HOST1X_UCLASS_LOAD_SYNCPT_BASE \
+	host1x_uclass_load_syncpt_base_r()
+static inline u32 host1x_uclass_load_syncpt_base_base_indx_f(u32 v)
+{
+	return (v & 0xff) << 24;
+}
+#define HOST1X_UCLASS_LOAD_SYNCPT_BASE_BASE_INDX_F(v) \
+	host1x_uclass_load_syncpt_base_base_indx_f(v)
+static inline u32 host1x_uclass_load_syncpt_base_value_f(u32 v)
+{
+	return (v & 0xffffff) << 0;
+}
+#define HOST1X_UCLASS_LOAD_SYNCPT_BASE_VALUE_F(v) \
+	host1x_uclass_load_syncpt_base_value_f(v)
+static inline u32 host1x_uclass_incr_syncpt_base_base_indx_f(u32 v)
+{
+	return (v & 0xff) << 24;
+}
+#define HOST1X_UCLASS_INCR_SYNCPT_BASE_BASE_INDX_F(v) \
+	host1x_uclass_incr_syncpt_base_base_indx_f(v)
+static inline u32 host1x_uclass_incr_syncpt_base_offset_f(u32 v)
+{
+	return (v & 0xffffff) << 0;
+}
+#define HOST1X_UCLASS_INCR_SYNCPT_BASE_OFFSET_F(v) \
+	host1x_uclass_incr_syncpt_base_offset_f(v)
+static inline u32 host1x_uclass_indoff_r(void)
+{
+	return 0x2d;
+}
+#define HOST1X_UCLASS_INDOFF \
+	host1x_uclass_indoff_r()
+static inline u32 host1x_uclass_indoff_indbe_f(u32 v)
+{
+	return (v & 0xf) << 28;
+}
+#define HOST1X_UCLASS_INDOFF_INDBE_F(v) \
+	host1x_uclass_indoff_indbe_f(v)
+static inline u32 host1x_uclass_indoff_autoinc_f(u32 v)
+{
+	return (v & 0x1) << 27;
+}
+#define HOST1X_UCLASS_INDOFF_AUTOINC_F(v) \
+	host1x_uclass_indoff_autoinc_f(v)
+static inline u32 host1x_uclass_indoff_indmodid_f(u32 v)
+{
+	return (v & 0xff) << 18;
+}
+#define HOST1X_UCLASS_INDOFF_INDMODID_F(v) \
+	host1x_uclass_indoff_indmodid_f(v)
+static inline u32 host1x_uclass_indoff_indroffset_f(u32 v)
+{
+	return (v & 0xffff) << 2;
+}
+#define HOST1X_UCLASS_INDOFF_INDROFFSET_F(v) \
+	host1x_uclass_indoff_indroffset_f(v)
+static inline u32 host1x_uclass_indoff_rwn_read_v(void)
+{
+	return 1;
+}
+#define HOST1X_UCLASS_INDOFF_INDROFFSET_F(v) \
+	host1x_uclass_indoff_indroffset_f(v)
+
+#endif
diff --git a/drivers/gpu/ipu-v3/ipu-common.c b/drivers/gpu/ipu-v3/ipu-common.c
index ba47b30d28fa..f2e13eb8339f 100644
--- a/drivers/gpu/ipu-v3/ipu-common.c
+++ b/drivers/gpu/ipu-v3/ipu-common.c
@@ -28,6 +28,7 @@
 #include <linux/irqchip/chained_irq.h>
 #include <linux/irqdomain.h>
 #include <linux/of_device.h>
+#include <linux/of_graph.h>
 
 #include <drm/drm_fourcc.h>
 
@@ -993,12 +994,26 @@ static void platform_device_unregister_children(struct platform_device *pdev)
 struct ipu_platform_reg {
 	struct ipu_client_platformdata pdata;
 	const char *name;
-	int reg_offset;
 };
 
+/* These must be in the order of the corresponding device tree port nodes */
 static const struct ipu_platform_reg client_reg[] = {
 	{
 		.pdata = {
+			.csi = 0,
+			.dma[0] = IPUV3_CHANNEL_CSI0,
+			.dma[1] = -EINVAL,
+		},
+		.name = "imx-ipuv3-camera",
+	}, {
+		.pdata = {
+			.csi = 1,
+			.dma[0] = IPUV3_CHANNEL_CSI1,
+			.dma[1] = -EINVAL,
+		},
+		.name = "imx-ipuv3-camera",
+	}, {
+		.pdata = {
 			.di = 0,
 			.dc = 5,
 			.dp = IPU_DP_FLOW_SYNC_BG,
@@ -1015,22 +1030,6 @@ static const struct ipu_platform_reg client_reg[] = {
 			.dma[1] = -EINVAL,
 		},
 		.name = "imx-ipuv3-crtc",
-	}, {
-		.pdata = {
-			.csi = 0,
-			.dma[0] = IPUV3_CHANNEL_CSI0,
-			.dma[1] = -EINVAL,
-		},
-		.reg_offset = IPU_CM_CSI0_REG_OFS,
-		.name = "imx-ipuv3-camera",
-	}, {
-		.pdata = {
-			.csi = 1,
-			.dma[0] = IPUV3_CHANNEL_CSI1,
-			.dma[1] = -EINVAL,
-		},
-		.reg_offset = IPU_CM_CSI1_REG_OFS,
-		.name = "imx-ipuv3-camera",
 	},
 };
 
@@ -1051,22 +1050,30 @@ static int ipu_add_client_devices(struct ipu_soc *ipu, unsigned long ipu_base)
 	for (i = 0; i < ARRAY_SIZE(client_reg); i++) {
 		const struct ipu_platform_reg *reg = &client_reg[i];
 		struct platform_device *pdev;
-		struct resource res;
-
-		if (reg->reg_offset) {
-			memset(&res, 0, sizeof(res));
-			res.flags = IORESOURCE_MEM;
-			res.start = ipu_base + ipu->devtype->cm_ofs + reg->reg_offset;
-			res.end = res.start + PAGE_SIZE - 1;
-			pdev = platform_device_register_resndata(dev, reg->name,
-				id++, &res, 1, &reg->pdata, sizeof(reg->pdata));
-		} else {
-			pdev = platform_device_register_data(dev, reg->name,
-				id++, &reg->pdata, sizeof(reg->pdata));
+
+		pdev = platform_device_alloc(reg->name, id++);
+		if (!pdev) {
+			ret = -ENOMEM;
+			goto err_register;
+		}
+
+		pdev->dev.parent = dev;
+
+		/* Associate subdevice with the corresponding port node */
+		pdev->dev.of_node = of_graph_get_port_by_id(dev->of_node, i);
+		if (!pdev->dev.of_node) {
+			dev_err(dev, "missing port@%d node in %s\n", i,
+				dev->of_node->full_name);
+			ret = -ENODEV;
+			goto err_register;
 		}
 
-		if (IS_ERR(pdev)) {
-			ret = PTR_ERR(pdev);
+		ret = platform_device_add_data(pdev, &reg->pdata,
+					       sizeof(reg->pdata));
+		if (!ret)
+			ret = platform_device_add(pdev);
+		if (ret) {
+			platform_device_put(pdev);
 			goto err_register;
 		}
 	}
diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h
index ac1feea51be3..9024a3de4032 100644
--- a/drivers/hid/hid-ids.h
+++ b/drivers/hid/hid-ids.h
@@ -609,6 +609,7 @@
 #define USB_DEVICE_ID_LOGITECH_HARMONY_FIRST  0xc110
 #define USB_DEVICE_ID_LOGITECH_HARMONY_LAST 0xc14f
 #define USB_DEVICE_ID_LOGITECH_HARMONY_PS3 0x0306
+#define USB_DEVICE_ID_LOGITECH_KEYBOARD_G710_PLUS 0xc24d
 #define USB_DEVICE_ID_LOGITECH_MOUSE_C01A	0xc01a
 #define USB_DEVICE_ID_LOGITECH_MOUSE_C05A	0xc05a
 #define USB_DEVICE_ID_LOGITECH_MOUSE_C06A	0xc06a
diff --git a/drivers/hid/hid-lg.c b/drivers/hid/hid-lg.c
index c20ac76c0a8c..c690fae02cf8 100644
--- a/drivers/hid/hid-lg.c
+++ b/drivers/hid/hid-lg.c
@@ -665,8 +665,9 @@ static int lg_probe(struct hid_device *hdev, const struct hid_device_id *id)
 	struct lg_drv_data *drv_data;
 	int ret;
 
-	/* Only work with the 1st interface (G29 presents multiple) */
-	if (iface_num != 0) {
+	/* G29 only work with the 1st interface */
+	if ((hdev->product == USB_DEVICE_ID_LOGITECH_G29_WHEEL) &&
+	    (iface_num != 0)) {
 		dbg_hid("%s: ignoring ifnum %d\n", __func__, iface_num);
 		return -ENODEV;
 	}
diff --git a/drivers/hid/usbhid/hid-quirks.c b/drivers/hid/usbhid/hid-quirks.c
index 94bb137abe32..2324520b006d 100644
--- a/drivers/hid/usbhid/hid-quirks.c
+++ b/drivers/hid/usbhid/hid-quirks.c
@@ -84,6 +84,7 @@ static const struct hid_blacklist {
 	{ USB_VENDOR_ID_HP, USB_PRODUCT_ID_HP_LOGITECH_OEM_USB_OPTICAL_MOUSE_0B4A, HID_QUIRK_ALWAYS_POLL },
 	{ USB_VENDOR_ID_HP, USB_PRODUCT_ID_HP_PIXART_OEM_USB_OPTICAL_MOUSE, HID_QUIRK_ALWAYS_POLL },
 	{ USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_C077, HID_QUIRK_ALWAYS_POLL },
+	{ USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_KEYBOARD_G710_PLUS, HID_QUIRK_NOGET },
 	{ USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_MOUSE_C01A, HID_QUIRK_ALWAYS_POLL },
 	{ USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_MOUSE_C05A, HID_QUIRK_ALWAYS_POLL },
 	{ USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_MOUSE_C06A, HID_QUIRK_ALWAYS_POLL },
diff --git a/drivers/isdn/hisax/config.c b/drivers/isdn/hisax/config.c
index b33f53b3ca93..bf04d2a3cf4a 100644
--- a/drivers/isdn/hisax/config.c
+++ b/drivers/isdn/hisax/config.c
@@ -1896,7 +1896,7 @@ static void EChannel_proc_rcv(struct hisax_d_if *d_if)
 				ptr--;
 				*ptr++ = '\n';
 				*ptr = 0;
-				HiSax_putstatus(cs, NULL, "%s", cs->dlog);
+				HiSax_putstatus(cs, NULL, cs->dlog);
 			} else
 				HiSax_putstatus(cs, "LogEcho: ",
 						"warning Frame too big (%d)",
diff --git a/drivers/isdn/hisax/hfc_pci.c b/drivers/isdn/hisax/hfc_pci.c
index 4a4825528188..90449e1e91e5 100644
--- a/drivers/isdn/hisax/hfc_pci.c
+++ b/drivers/isdn/hisax/hfc_pci.c
@@ -901,7 +901,7 @@ Begin:
 					ptr--;
 					*ptr++ = '\n';
 					*ptr = 0;
-					HiSax_putstatus(cs, NULL, "%s", cs->dlog);
+					HiSax_putstatus(cs, NULL, cs->dlog);
 				} else
 					HiSax_putstatus(cs, "LogEcho: ", "warning Frame too big (%d)", total - 3);
 			}
diff --git a/drivers/isdn/hisax/hfc_sx.c b/drivers/isdn/hisax/hfc_sx.c
index b1fad81f0722..13b2151c10f5 100644
--- a/drivers/isdn/hisax/hfc_sx.c
+++ b/drivers/isdn/hisax/hfc_sx.c
@@ -674,7 +674,7 @@ receive_emsg(struct IsdnCardState *cs)
 					ptr--;
 					*ptr++ = '\n';
 					*ptr = 0;
-					HiSax_putstatus(cs, NULL, "%s", cs->dlog);
+					HiSax_putstatus(cs, NULL, cs->dlog);
 				} else
 					HiSax_putstatus(cs, "LogEcho: ", "warning Frame too big (%d)", skb->len);
 			}
diff --git a/drivers/isdn/hisax/q931.c b/drivers/isdn/hisax/q931.c
index b420f8bd862e..ba4beb25d872 100644
--- a/drivers/isdn/hisax/q931.c
+++ b/drivers/isdn/hisax/q931.c
@@ -1179,7 +1179,7 @@ LogFrame(struct IsdnCardState *cs, u_char *buf, int size)
 		dp--;
 		*dp++ = '\n';
 		*dp = 0;
-		HiSax_putstatus(cs, NULL, "%s", cs->dlog);
+		HiSax_putstatus(cs, NULL, cs->dlog);
 	} else
 		HiSax_putstatus(cs, "LogFrame: ", "warning Frame too big (%d)", size);
 }
@@ -1246,7 +1246,7 @@ dlogframe(struct IsdnCardState *cs, struct sk_buff *skb, int dir)
 	}
 	if (finish) {
 		*dp = 0;
-		HiSax_putstatus(cs, NULL, "%s", cs->dlog);
+		HiSax_putstatus(cs, NULL, cs->dlog);
 		return;
 	}
 	if ((0xfe & buf[0]) == PROTO_DIS_N0) {	/* 1TR6 */
@@ -1509,5 +1509,5 @@ dlogframe(struct IsdnCardState *cs, struct sk_buff *skb, int dir)
 		dp += sprintf(dp, "Unknown protocol %x!", buf[0]);
 	}
 	*dp = 0;
-	HiSax_putstatus(cs, NULL, "%s", cs->dlog);
+	HiSax_putstatus(cs, NULL, cs->dlog);
 }
diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c
index 5178645ac42b..86ce887b2ed6 100644
--- a/drivers/lightnvm/core.c
+++ b/drivers/lightnvm/core.c
@@ -123,6 +123,26 @@ void nvm_unregister_mgr(struct nvmm_type *mt)
 }
 EXPORT_SYMBOL(nvm_unregister_mgr);
 
+/* register with device with a supported manager */
+static int register_mgr(struct nvm_dev *dev)
+{
+	struct nvmm_type *mt;
+	int ret = 0;
+
+	list_for_each_entry(mt, &nvm_mgrs, list) {
+		ret = mt->register_mgr(dev);
+		if (ret > 0) {
+			dev->mt = mt;
+			break; /* successfully initialized */
+		}
+	}
+
+	if (!ret)
+		pr_info("nvm: no compatible nvm manager found.\n");
+
+	return ret;
+}
+
 static struct nvm_dev *nvm_find_nvm_dev(const char *name)
 {
 	struct nvm_dev *dev;
@@ -221,7 +241,6 @@ static void nvm_free(struct nvm_dev *dev)
 
 static int nvm_init(struct nvm_dev *dev)
 {
-	struct nvmm_type *mt;
 	int ret = -EINVAL;
 
 	if (!dev->q || !dev->ops)
@@ -252,21 +271,13 @@ static int nvm_init(struct nvm_dev *dev)
 		goto err;
 	}
 
-	/* register with device with a supported manager */
-	list_for_each_entry(mt, &nvm_mgrs, list) {
-		ret = mt->register_mgr(dev);
-		if (ret < 0)
-			goto err; /* initialization failed */
-		if (ret > 0) {
-			dev->mt = mt;
-			break; /* successfully initialized */
-		}
-	}
-
-	if (!ret) {
-		pr_info("nvm: no compatible manager found.\n");
+	down_write(&nvm_lock);
+	ret = register_mgr(dev);
+	up_write(&nvm_lock);
+	if (ret < 0)
+		goto err;
+	if (!ret)
 		return 0;
-	}
 
 	pr_info("nvm: registered %s [%u/%u/%u/%u/%u/%u]\n",
 			dev->name, dev->sec_per_pg, dev->nr_planes,
@@ -308,6 +319,12 @@ int nvm_register(struct request_queue *q, char *disk_name,
 	if (ret)
 		goto err_init;
 
+	if (dev->ops->max_phys_sect > 256) {
+		pr_info("nvm: max sectors supported is 256.\n");
+		ret = -EINVAL;
+		goto err_init;
+	}
+
 	if (dev->ops->max_phys_sect > 1) {
 		dev->ppalist_pool = dev->ops->create_dma_pool(dev->q,
 								"ppalist");
@@ -316,10 +333,6 @@ int nvm_register(struct request_queue *q, char *disk_name,
 			ret = -ENOMEM;
 			goto err_init;
 		}
-	} else if (dev->ops->max_phys_sect > 256) {
-		pr_info("nvm: max sectors supported is 256.\n");
-		ret = -EINVAL;
-		goto err_init;
 	}
 
 	down_write(&nvm_lock);
@@ -335,15 +348,17 @@ EXPORT_SYMBOL(nvm_register);
 
 void nvm_unregister(char *disk_name)
 {
-	struct nvm_dev *dev = nvm_find_nvm_dev(disk_name);
+	struct nvm_dev *dev;
 
+	down_write(&nvm_lock);
+	dev = nvm_find_nvm_dev(disk_name);
 	if (!dev) {
 		pr_err("nvm: could not find device %s to unregister\n",
 								disk_name);
+		up_write(&nvm_lock);
 		return;
 	}
 
-	down_write(&nvm_lock);
 	list_del(&dev->devices);
 	up_write(&nvm_lock);
 
@@ -361,38 +376,30 @@ static int nvm_create_target(struct nvm_dev *dev,
 {
 	struct nvm_ioctl_create_simple *s = &create->conf.s;
 	struct request_queue *tqueue;
-	struct nvmm_type *mt;
 	struct gendisk *tdisk;
 	struct nvm_tgt_type *tt;
 	struct nvm_target *t;
 	void *targetdata;
 	int ret = 0;
 
+	down_write(&nvm_lock);
 	if (!dev->mt) {
-		/* register with device with a supported NVM manager */
-		list_for_each_entry(mt, &nvm_mgrs, list) {
-			ret = mt->register_mgr(dev);
-			if (ret < 0)
-				return ret; /* initialization failed */
-			if (ret > 0) {
-				dev->mt = mt;
-				break; /* successfully initialized */
-			}
-		}
-
-		if (!ret) {
-			pr_info("nvm: no compatible nvm manager found.\n");
-			return -ENODEV;
+		ret = register_mgr(dev);
+		if (!ret)
+			ret = -ENODEV;
+		if (ret < 0) {
+			up_write(&nvm_lock);
+			return ret;
 		}
 	}
 
 	tt = nvm_find_target_type(create->tgttype);
 	if (!tt) {
 		pr_err("nvm: target type %s not found\n", create->tgttype);
+		up_write(&nvm_lock);
 		return -EINVAL;
 	}
 
-	down_write(&nvm_lock);
 	list_for_each_entry(t, &dev->online_targets, list) {
 		if (!strcmp(create->tgtname, t->disk->disk_name)) {
 			pr_err("nvm: target name already exists.\n");
@@ -476,7 +483,9 @@ static int __nvm_configure_create(struct nvm_ioctl_create *create)
 	struct nvm_dev *dev;
 	struct nvm_ioctl_create_simple *s;
 
+	down_write(&nvm_lock);
 	dev = nvm_find_nvm_dev(create->dev);
+	up_write(&nvm_lock);
 	if (!dev) {
 		pr_err("nvm: device not found\n");
 		return -EINVAL;
@@ -535,7 +544,9 @@ static int nvm_configure_show(const char *val)
 		return -EINVAL;
 	}
 
+	down_write(&nvm_lock);
 	dev = nvm_find_nvm_dev(devname);
+	up_write(&nvm_lock);
 	if (!dev) {
 		pr_err("nvm: device not found\n");
 		return -EINVAL;
@@ -680,8 +691,10 @@ static long nvm_ioctl_info(struct file *file, void __user *arg)
 	info->tgtsize = tgt_iter;
 	up_write(&nvm_lock);
 
-	if (copy_to_user(arg, info, sizeof(struct nvm_ioctl_info)))
+	if (copy_to_user(arg, info, sizeof(struct nvm_ioctl_info))) {
+		kfree(info);
 		return -EFAULT;
+	}
 
 	kfree(info);
 	return 0;
@@ -724,8 +737,11 @@ static long nvm_ioctl_get_devices(struct file *file, void __user *arg)
 
 	devices->nr_devices = i;
 
-	if (copy_to_user(arg, devices, sizeof(struct nvm_ioctl_get_devices)))
+	if (copy_to_user(arg, devices,
+			 sizeof(struct nvm_ioctl_get_devices))) {
+		kfree(devices);
 		return -EFAULT;
+	}
 
 	kfree(devices);
 	return 0;
diff --git a/drivers/lightnvm/gennvm.c b/drivers/lightnvm/gennvm.c
index e20e74ec6b91..35dde84b71e9 100644
--- a/drivers/lightnvm/gennvm.c
+++ b/drivers/lightnvm/gennvm.c
@@ -75,7 +75,6 @@ static int gennvm_block_bb(struct ppa_addr ppa, int nr_blocks, u8 *blks,
 	struct nvm_block *blk;
 	int i;
 
-	ppa = dev_to_generic_addr(gn->dev, ppa);
 	lun = &gn->luns[(dev->nr_luns * ppa.g.ch) + ppa.g.lun];
 
 	for (i = 0; i < nr_blocks; i++) {
@@ -187,7 +186,7 @@ static int gennvm_blocks_init(struct nvm_dev *dev, struct gen_nvm *gn)
 			ppa.g.lun = lun->vlun.id;
 			ppa = generic_to_dev_addr(dev, ppa);
 
-			ret = dev->ops->get_bb_tbl(dev->q, ppa,
+			ret = dev->ops->get_bb_tbl(dev, ppa,
 						dev->blks_per_lun,
 						gennvm_block_bb, gn);
 			if (ret)
@@ -207,6 +206,14 @@ static int gennvm_blocks_init(struct nvm_dev *dev, struct gen_nvm *gn)
 	return 0;
 }
 
+static void gennvm_free(struct nvm_dev *dev)
+{
+	gennvm_blocks_free(dev);
+	gennvm_luns_free(dev);
+	kfree(dev->mp);
+	dev->mp = NULL;
+}
+
 static int gennvm_register(struct nvm_dev *dev)
 {
 	struct gen_nvm *gn;
@@ -234,16 +241,13 @@ static int gennvm_register(struct nvm_dev *dev)
 
 	return 1;
 err:
-	kfree(gn);
+	gennvm_free(dev);
 	return ret;
 }
 
 static void gennvm_unregister(struct nvm_dev *dev)
 {
-	gennvm_blocks_free(dev);
-	gennvm_luns_free(dev);
-	kfree(dev->mp);
-	dev->mp = NULL;
+	gennvm_free(dev);
 }
 
 static struct nvm_block *gennvm_get_blk(struct nvm_dev *dev,
diff --git a/drivers/net/can/bfin_can.c b/drivers/net/can/bfin_can.c
index 57dadd52b428..1deb8ff90a89 100644
--- a/drivers/net/can/bfin_can.c
+++ b/drivers/net/can/bfin_can.c
@@ -501,8 +501,6 @@ static int bfin_can_err(struct net_device *dev, u16 isrc, u16 status)
 			cf->data[2] |= CAN_ERR_PROT_FORM;
 		else if (status & SER)
 			cf->data[2] |= CAN_ERR_PROT_STUFF;
-		else
-			cf->data[2] |= CAN_ERR_PROT_UNSPEC;
 	}
 
 	priv->can.state = state;
diff --git a/drivers/net/can/c_can/c_can.c b/drivers/net/can/c_can/c_can.c
index 5d214d135332..f91b094288da 100644
--- a/drivers/net/can/c_can/c_can.c
+++ b/drivers/net/can/c_can/c_can.c
@@ -962,7 +962,6 @@ static int c_can_handle_bus_err(struct net_device *dev,
 	 * type of the last error to occur on the CAN bus
 	 */
 	cf->can_id |= CAN_ERR_PROT | CAN_ERR_BUSERROR;
-	cf->data[2] |= CAN_ERR_PROT_UNSPEC;
 
 	switch (lec_type) {
 	case LEC_STUFF_ERROR:
@@ -975,8 +974,7 @@ static int c_can_handle_bus_err(struct net_device *dev,
 		break;
 	case LEC_ACK_ERROR:
 		netdev_dbg(dev, "ack error\n");
-		cf->data[3] |= (CAN_ERR_PROT_LOC_ACK |
-				CAN_ERR_PROT_LOC_ACK_DEL);
+		cf->data[3] = CAN_ERR_PROT_LOC_ACK;
 		break;
 	case LEC_BIT1_ERROR:
 		netdev_dbg(dev, "bit1 error\n");
@@ -988,8 +986,7 @@ static int c_can_handle_bus_err(struct net_device *dev,
 		break;
 	case LEC_CRC_ERROR:
 		netdev_dbg(dev, "CRC error\n");
-		cf->data[3] |= (CAN_ERR_PROT_LOC_CRC_SEQ |
-				CAN_ERR_PROT_LOC_CRC_DEL);
+		cf->data[3] = CAN_ERR_PROT_LOC_CRC_SEQ;
 		break;
 	default:
 		break;
diff --git a/drivers/net/can/cc770/cc770.c b/drivers/net/can/cc770/cc770.c
index 70a8cbb29e75..1e37313054f3 100644
--- a/drivers/net/can/cc770/cc770.c
+++ b/drivers/net/can/cc770/cc770.c
@@ -578,7 +578,7 @@ static int cc770_err(struct net_device *dev, u8 status)
 				cf->data[2] |= CAN_ERR_PROT_BIT0;
 				break;
 			case STAT_LEC_CRC:
-				cf->data[3] |= CAN_ERR_PROT_LOC_CRC_SEQ;
+				cf->data[3] = CAN_ERR_PROT_LOC_CRC_SEQ;
 				break;
 			}
 		}
diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c
index 868fe945e35a..41c0fc9f3b14 100644
--- a/drivers/net/can/flexcan.c
+++ b/drivers/net/can/flexcan.c
@@ -535,13 +535,13 @@ static void do_bus_err(struct net_device *dev,
 	if (reg_esr & FLEXCAN_ESR_ACK_ERR) {
 		netdev_dbg(dev, "ACK_ERR irq\n");
 		cf->can_id |= CAN_ERR_ACK;
-		cf->data[3] |= CAN_ERR_PROT_LOC_ACK;
+		cf->data[3] = CAN_ERR_PROT_LOC_ACK;
 		tx_errors = 1;
 	}
 	if (reg_esr & FLEXCAN_ESR_CRC_ERR) {
 		netdev_dbg(dev, "CRC_ERR irq\n");
 		cf->data[2] |= CAN_ERR_PROT_BIT;
-		cf->data[3] |= CAN_ERR_PROT_LOC_CRC_SEQ;
+		cf->data[3] = CAN_ERR_PROT_LOC_CRC_SEQ;
 		rx_errors = 1;
 	}
 	if (reg_esr & FLEXCAN_ESR_FRM_ERR) {
diff --git a/drivers/net/can/janz-ican3.c b/drivers/net/can/janz-ican3.c
index c1e85368a198..5d04f5464faf 100644
--- a/drivers/net/can/janz-ican3.c
+++ b/drivers/net/can/janz-ican3.c
@@ -1096,7 +1096,6 @@ static int ican3_handle_cevtind(struct ican3_dev *mod, struct ican3_msg *msg)
 			cf->data[2] |= CAN_ERR_PROT_STUFF;
 			break;
 		default:
-			cf->data[2] |= CAN_ERR_PROT_UNSPEC;
 			cf->data[3] = ecc & ECC_SEG;
 			break;
 		}
diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c
index ef655177bb5e..39cf911f7a1e 100644
--- a/drivers/net/can/m_can/m_can.c
+++ b/drivers/net/can/m_can/m_can.c
@@ -487,7 +487,6 @@ static int m_can_handle_lec_err(struct net_device *dev,
 	 * type of the last error to occur on the CAN bus
 	 */
 	cf->can_id |= CAN_ERR_PROT | CAN_ERR_BUSERROR;
-	cf->data[2] |= CAN_ERR_PROT_UNSPEC;
 
 	switch (lec_type) {
 	case LEC_STUFF_ERROR:
@@ -500,8 +499,7 @@ static int m_can_handle_lec_err(struct net_device *dev,
 		break;
 	case LEC_ACK_ERROR:
 		netdev_dbg(dev, "ack error\n");
-		cf->data[3] |= (CAN_ERR_PROT_LOC_ACK |
-				CAN_ERR_PROT_LOC_ACK_DEL);
+		cf->data[3] = CAN_ERR_PROT_LOC_ACK;
 		break;
 	case LEC_BIT1_ERROR:
 		netdev_dbg(dev, "bit1 error\n");
@@ -513,8 +511,7 @@ static int m_can_handle_lec_err(struct net_device *dev,
 		break;
 	case LEC_CRC_ERROR:
 		netdev_dbg(dev, "CRC error\n");
-		cf->data[3] |= (CAN_ERR_PROT_LOC_CRC_SEQ |
-				CAN_ERR_PROT_LOC_CRC_DEL);
+		cf->data[3] = CAN_ERR_PROT_LOC_CRC_SEQ;
 		break;
 	default:
 		break;
diff --git a/drivers/net/can/pch_can.c b/drivers/net/can/pch_can.c
index e187ca783da0..c1317889d3d8 100644
--- a/drivers/net/can/pch_can.c
+++ b/drivers/net/can/pch_can.c
@@ -559,8 +559,7 @@ static void pch_can_error(struct net_device *ndev, u32 status)
 		stats->rx_errors++;
 		break;
 	case PCH_CRC_ERR:
-		cf->data[3] |= CAN_ERR_PROT_LOC_CRC_SEQ |
-			       CAN_ERR_PROT_LOC_CRC_DEL;
+		cf->data[3] = CAN_ERR_PROT_LOC_CRC_SEQ;
 		priv->can.can_stats.bus_error++;
 		stats->rx_errors++;
 		break;
diff --git a/drivers/net/can/rcar_can.c b/drivers/net/can/rcar_can.c
index 7bd54191f962..bc46be39549d 100644
--- a/drivers/net/can/rcar_can.c
+++ b/drivers/net/can/rcar_can.c
@@ -241,17 +241,16 @@ static void rcar_can_error(struct net_device *ndev)
 		u8 ecsr;
 
 		netdev_dbg(priv->ndev, "Bus error interrupt:\n");
-		if (skb) {
+		if (skb)
 			cf->can_id |= CAN_ERR_BUSERROR | CAN_ERR_PROT;
-			cf->data[2] = CAN_ERR_PROT_UNSPEC;
-		}
+
 		ecsr = readb(&priv->regs->ecsr);
 		if (ecsr & RCAR_CAN_ECSR_ADEF) {
 			netdev_dbg(priv->ndev, "ACK Delimiter Error\n");
 			tx_errors++;
 			writeb(~RCAR_CAN_ECSR_ADEF, &priv->regs->ecsr);
 			if (skb)
-				cf->data[3] |= CAN_ERR_PROT_LOC_ACK_DEL;
+				cf->data[3] = CAN_ERR_PROT_LOC_ACK_DEL;
 		}
 		if (ecsr & RCAR_CAN_ECSR_BE0F) {
 			netdev_dbg(priv->ndev, "Bit Error (dominant)\n");
@@ -272,7 +271,7 @@ static void rcar_can_error(struct net_device *ndev)
 			rx_errors++;
 			writeb(~RCAR_CAN_ECSR_CEF, &priv->regs->ecsr);
 			if (skb)
-				cf->data[3] |= CAN_ERR_PROT_LOC_CRC_SEQ;
+				cf->data[3] = CAN_ERR_PROT_LOC_CRC_SEQ;
 		}
 		if (ecsr & RCAR_CAN_ECSR_AEF) {
 			netdev_dbg(priv->ndev, "ACK Error\n");
@@ -280,7 +279,7 @@ static void rcar_can_error(struct net_device *ndev)
 			writeb(~RCAR_CAN_ECSR_AEF, &priv->regs->ecsr);
 			if (skb) {
 				cf->can_id |= CAN_ERR_ACK;
-				cf->data[3] |= CAN_ERR_PROT_LOC_ACK;
+				cf->data[3] = CAN_ERR_PROT_LOC_ACK;
 			}
 		}
 		if (ecsr & RCAR_CAN_ECSR_FEF) {
diff --git a/drivers/net/can/sja1000/sja1000.c b/drivers/net/can/sja1000/sja1000.c
index 7b92e911a616..8dda3b703d39 100644
--- a/drivers/net/can/sja1000/sja1000.c
+++ b/drivers/net/can/sja1000/sja1000.c
@@ -218,6 +218,9 @@ static void sja1000_start(struct net_device *dev)
 	priv->write_reg(priv, SJA1000_RXERR, 0x0);
 	priv->read_reg(priv, SJA1000_ECC);
 
+	/* clear interrupt flags */
+	priv->read_reg(priv, SJA1000_IR);
+
 	/* leave reset mode */
 	set_normal_mode(dev);
 }
@@ -446,7 +449,6 @@ static int sja1000_err(struct net_device *dev, uint8_t isrc, uint8_t status)
 			cf->data[2] |= CAN_ERR_PROT_STUFF;
 			break;
 		default:
-			cf->data[2] |= CAN_ERR_PROT_UNSPEC;
 			cf->data[3] = ecc & ECC_SEG;
 			break;
 		}
diff --git a/drivers/net/can/sun4i_can.c b/drivers/net/can/sun4i_can.c
index d9a42c646783..68ef0a4cd821 100644
--- a/drivers/net/can/sun4i_can.c
+++ b/drivers/net/can/sun4i_can.c
@@ -575,7 +575,6 @@ static int sun4i_can_err(struct net_device *dev, u8 isrc, u8 status)
 				cf->data[2] |= CAN_ERR_PROT_STUFF;
 				break;
 			default:
-				cf->data[2] |= CAN_ERR_PROT_UNSPEC;
 				cf->data[3] = (ecc & SUN4I_STA_ERR_SEG_CODE)
 					       >> 16;
 				break;
diff --git a/drivers/net/can/ti_hecc.c b/drivers/net/can/ti_hecc.c
index cf345cbfe819..680d1ff07a55 100644
--- a/drivers/net/can/ti_hecc.c
+++ b/drivers/net/can/ti_hecc.c
@@ -722,7 +722,6 @@ static int ti_hecc_error(struct net_device *ndev, int int_status,
 	if (err_status & HECC_BUS_ERROR) {
 		++priv->can.can_stats.bus_error;
 		cf->can_id |= CAN_ERR_BUSERROR | CAN_ERR_PROT;
-		cf->data[2] |= CAN_ERR_PROT_UNSPEC;
 		if (err_status & HECC_CANES_FE) {
 			hecc_set_bit(priv, HECC_CANES, HECC_CANES_FE);
 			cf->data[2] |= CAN_ERR_PROT_FORM;
@@ -737,13 +736,11 @@ static int ti_hecc_error(struct net_device *ndev, int int_status,
 		}
 		if (err_status & HECC_CANES_CRCE) {
 			hecc_set_bit(priv, HECC_CANES, HECC_CANES_CRCE);
-			cf->data[3] |= CAN_ERR_PROT_LOC_CRC_SEQ |
-					CAN_ERR_PROT_LOC_CRC_DEL;
+			cf->data[3] = CAN_ERR_PROT_LOC_CRC_SEQ;
 		}
 		if (err_status & HECC_CANES_ACKE) {
 			hecc_set_bit(priv, HECC_CANES, HECC_CANES_ACKE);
-			cf->data[3] |= CAN_ERR_PROT_LOC_ACK |
-					CAN_ERR_PROT_LOC_ACK_DEL;
+			cf->data[3] = CAN_ERR_PROT_LOC_ACK;
 		}
 	}
 
diff --git a/drivers/net/can/usb/ems_usb.c b/drivers/net/can/usb/ems_usb.c
index 2d390384ef3b..fc5b75675cd8 100644
--- a/drivers/net/can/usb/ems_usb.c
+++ b/drivers/net/can/usb/ems_usb.c
@@ -377,7 +377,6 @@ static void ems_usb_rx_err(struct ems_usb *dev, struct ems_cpc_msg *msg)
 			cf->data[2] |= CAN_ERR_PROT_STUFF;
 			break;
 		default:
-			cf->data[2] |= CAN_ERR_PROT_UNSPEC;
 			cf->data[3] = ecc & SJA1000_ECC_SEG;
 			break;
 		}
diff --git a/drivers/net/can/usb/esd_usb2.c b/drivers/net/can/usb/esd_usb2.c
index 0e5a4493ba4f..113e64fcd73b 100644
--- a/drivers/net/can/usb/esd_usb2.c
+++ b/drivers/net/can/usb/esd_usb2.c
@@ -282,7 +282,6 @@ static void esd_usb2_rx_event(struct esd_usb2_net_priv *priv,
 				cf->data[2] |= CAN_ERR_PROT_STUFF;
 				break;
 			default:
-				cf->data[2] |= CAN_ERR_PROT_UNSPEC;
 				cf->data[3] = ecc & SJA1000_ECC_SEG;
 				break;
 			}
diff --git a/drivers/net/can/usb/kvaser_usb.c b/drivers/net/can/usb/kvaser_usb.c
index 8b17a9065b0b..022bfa13ebfa 100644
--- a/drivers/net/can/usb/kvaser_usb.c
+++ b/drivers/net/can/usb/kvaser_usb.c
@@ -944,10 +944,9 @@ static void kvaser_usb_rx_error(const struct kvaser_usb *dev,
 			cf->can_id |= CAN_ERR_BUSERROR | CAN_ERR_PROT;
 
 			if (es->leaf.error_factor & M16C_EF_ACKE)
-				cf->data[3] |= (CAN_ERR_PROT_LOC_ACK);
+				cf->data[3] = CAN_ERR_PROT_LOC_ACK;
 			if (es->leaf.error_factor & M16C_EF_CRCE)
-				cf->data[3] |= (CAN_ERR_PROT_LOC_CRC_SEQ |
-						CAN_ERR_PROT_LOC_CRC_DEL);
+				cf->data[3] = CAN_ERR_PROT_LOC_CRC_SEQ;
 			if (es->leaf.error_factor & M16C_EF_FORME)
 				cf->data[2] |= CAN_ERR_PROT_FORM;
 			if (es->leaf.error_factor & M16C_EF_STFE)
diff --git a/drivers/net/can/usb/usb_8dev.c b/drivers/net/can/usb/usb_8dev.c
index de95b1ccba3e..a731720f1d13 100644
--- a/drivers/net/can/usb/usb_8dev.c
+++ b/drivers/net/can/usb/usb_8dev.c
@@ -401,9 +401,7 @@ static void usb_8dev_rx_err_msg(struct usb_8dev_priv *priv,
 		tx_errors = 1;
 		break;
 	case USB_8DEV_STATUSMSG_CRC:
-		cf->data[2] |= CAN_ERR_PROT_UNSPEC;
-		cf->data[3] |= CAN_ERR_PROT_LOC_CRC_SEQ |
-			       CAN_ERR_PROT_LOC_CRC_DEL;
+		cf->data[3] = CAN_ERR_PROT_LOC_CRC_SEQ;
 		rx_errors = 1;
 		break;
 	case USB_8DEV_STATUSMSG_BIT0:
diff --git a/drivers/net/can/xilinx_can.c b/drivers/net/can/xilinx_can.c
index fc55e8e0351d..51670b322409 100644
--- a/drivers/net/can/xilinx_can.c
+++ b/drivers/net/can/xilinx_can.c
@@ -608,17 +608,15 @@ static void xcan_err_interrupt(struct net_device *ndev, u32 isr)
 
 	/* Check for error interrupt */
 	if (isr & XCAN_IXR_ERROR_MASK) {
-		if (skb) {
+		if (skb)
 			cf->can_id |= CAN_ERR_PROT | CAN_ERR_BUSERROR;
-			cf->data[2] |= CAN_ERR_PROT_UNSPEC;
-		}
 
 		/* Check for Ack error interrupt */
 		if (err_status & XCAN_ESR_ACKER_MASK) {
 			stats->tx_errors++;
 			if (skb) {
 				cf->can_id |= CAN_ERR_ACK;
-				cf->data[3] |= CAN_ERR_PROT_LOC_ACK;
+				cf->data[3] = CAN_ERR_PROT_LOC_ACK;
 			}
 		}
 
@@ -654,8 +652,7 @@ static void xcan_err_interrupt(struct net_device *ndev, u32 isr)
 			stats->rx_errors++;
 			if (skb) {
 				cf->can_id |= CAN_ERR_PROT;
-				cf->data[3] = CAN_ERR_PROT_LOC_CRC_SEQ |
-						CAN_ERR_PROT_LOC_CRC_DEL;
+				cf->data[3] = CAN_ERR_PROT_LOC_CRC_SEQ;
 			}
 		}
 			priv->can.can_stats.bus_error++;
diff --git a/drivers/net/ethernet/Kconfig b/drivers/net/ethernet/Kconfig
index 955d06b9cdba..31c5e476fd64 100644
--- a/drivers/net/ethernet/Kconfig
+++ b/drivers/net/ethernet/Kconfig
@@ -29,6 +29,7 @@ source "drivers/net/ethernet/apm/Kconfig"
 source "drivers/net/ethernet/apple/Kconfig"
 source "drivers/net/ethernet/arc/Kconfig"
 source "drivers/net/ethernet/atheros/Kconfig"
+source "drivers/net/ethernet/aurora/Kconfig"
 source "drivers/net/ethernet/cadence/Kconfig"
 source "drivers/net/ethernet/adi/Kconfig"
 source "drivers/net/ethernet/broadcom/Kconfig"
diff --git a/drivers/net/ethernet/Makefile b/drivers/net/ethernet/Makefile
index 4a2ee98738f0..071f84eb6f3f 100644
--- a/drivers/net/ethernet/Makefile
+++ b/drivers/net/ethernet/Makefile
@@ -15,6 +15,7 @@ obj-$(CONFIG_NET_XGENE) += apm/
 obj-$(CONFIG_NET_VENDOR_APPLE) += apple/
 obj-$(CONFIG_NET_VENDOR_ARC) += arc/
 obj-$(CONFIG_NET_VENDOR_ATHEROS) += atheros/
+obj-$(CONFIG_NET_VENDOR_AURORA) += aurora/
 obj-$(CONFIG_NET_CADENCE) += cadence/
 obj-$(CONFIG_NET_BFIN) += adi/
 obj-$(CONFIG_NET_VENDOR_BROADCOM) += broadcom/
diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c
index 991412ce6f48..9147a0107c44 100644
--- a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c
+++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c
@@ -450,12 +450,12 @@ static netdev_tx_t xgene_enet_start_xmit(struct sk_buff *skb,
 		return NETDEV_TX_OK;
 	}
 
-	pdata->ring_ops->wr_cmd(tx_ring, count);
 	skb_tx_timestamp(skb);
 
 	pdata->stats.tx_packets++;
 	pdata->stats.tx_bytes += skb->len;
 
+	pdata->ring_ops->wr_cmd(tx_ring, count);
 	return NETDEV_TX_OK;
 }
 
@@ -688,10 +688,10 @@ static int xgene_enet_open(struct net_device *ndev)
 	mac_ops->tx_enable(pdata);
 	mac_ops->rx_enable(pdata);
 
+	xgene_enet_napi_enable(pdata);
 	ret = xgene_enet_register_irq(ndev);
 	if (ret)
 		return ret;
-	xgene_enet_napi_enable(pdata);
 
 	if (pdata->phy_mode == PHY_INTERFACE_MODE_RGMII)
 		phy_start(pdata->phy_dev);
@@ -715,13 +715,13 @@ static int xgene_enet_close(struct net_device *ndev)
 	else
 		cancel_delayed_work_sync(&pdata->link_work);
 
-	xgene_enet_napi_disable(pdata);
-	xgene_enet_free_irq(ndev);
-	xgene_enet_process_ring(pdata->rx_ring, -1);
-
 	mac_ops->tx_disable(pdata);
 	mac_ops->rx_disable(pdata);
 
+	xgene_enet_free_irq(ndev);
+	xgene_enet_napi_disable(pdata);
+	xgene_enet_process_ring(pdata->rx_ring, -1);
+
 	return 0;
 }
 
@@ -1474,15 +1474,15 @@ static int xgene_enet_probe(struct platform_device *pdev)
 	}
 	ndev->hw_features = ndev->features;
 
-	ret = register_netdev(ndev);
+	ret = dma_coerce_mask_and_coherent(dev, DMA_BIT_MASK(64));
 	if (ret) {
-		netdev_err(ndev, "Failed to register netdev\n");
+		netdev_err(ndev, "No usable DMA configuration\n");
 		goto err;
 	}
 
-	ret = dma_coerce_mask_and_coherent(dev, DMA_BIT_MASK(64));
+	ret = register_netdev(ndev);
 	if (ret) {
-		netdev_err(ndev, "No usable DMA configuration\n");
+		netdev_err(ndev, "Failed to register netdev\n");
 		goto err;
 	}
 
@@ -1490,14 +1490,17 @@ static int xgene_enet_probe(struct platform_device *pdev)
 	if (ret)
 		goto err;
 
-	xgene_enet_napi_add(pdata);
 	mac_ops = pdata->mac_ops;
-	if (pdata->phy_mode == PHY_INTERFACE_MODE_RGMII)
+	if (pdata->phy_mode == PHY_INTERFACE_MODE_RGMII) {
 		ret = xgene_enet_mdio_config(pdata);
-	else
+		if (ret)
+			goto err;
+	} else {
 		INIT_DELAYED_WORK(&pdata->link_work, mac_ops->link_state);
+	}
 
-	return ret;
+	xgene_enet_napi_add(pdata);
+	return 0;
 err:
 	unregister_netdev(ndev);
 	free_netdev(ndev);
diff --git a/drivers/net/ethernet/atheros/alx/main.c b/drivers/net/ethernet/atheros/alx/main.c
index c8af3ce3ea38..bd377a6b067d 100644
--- a/drivers/net/ethernet/atheros/alx/main.c
+++ b/drivers/net/ethernet/atheros/alx/main.c
@@ -1534,6 +1534,8 @@ static const struct pci_device_id alx_pci_tbl[] = {
 	  .driver_data = ALX_DEV_QUIRK_MSI_INTX_DISABLE_BUG },
 	{ PCI_VDEVICE(ATTANSIC, ALX_DEV_ID_E2200),
 	  .driver_data = ALX_DEV_QUIRK_MSI_INTX_DISABLE_BUG },
+	{ PCI_VDEVICE(ATTANSIC, ALX_DEV_ID_E2400),
+	  .driver_data = ALX_DEV_QUIRK_MSI_INTX_DISABLE_BUG },
 	{ PCI_VDEVICE(ATTANSIC, ALX_DEV_ID_AR8162),
 	  .driver_data = ALX_DEV_QUIRK_MSI_INTX_DISABLE_BUG },
 	{ PCI_VDEVICE(ATTANSIC, ALX_DEV_ID_AR8171) },
diff --git a/drivers/net/ethernet/atheros/alx/reg.h b/drivers/net/ethernet/atheros/alx/reg.h
index af006b44b2a6..0959e6824cb6 100644
--- a/drivers/net/ethernet/atheros/alx/reg.h
+++ b/drivers/net/ethernet/atheros/alx/reg.h
@@ -37,6 +37,7 @@
 
 #define ALX_DEV_ID_AR8161				0x1091
 #define ALX_DEV_ID_E2200				0xe091
+#define ALX_DEV_ID_E2400				0xe0a1
 #define ALX_DEV_ID_AR8162				0x1090
 #define ALX_DEV_ID_AR8171				0x10A1
 #define ALX_DEV_ID_AR8172				0x10A0
diff --git a/drivers/net/ethernet/aurora/Kconfig b/drivers/net/ethernet/aurora/Kconfig
new file mode 100644
index 000000000000..a3c7106fdf85
--- /dev/null
+++ b/drivers/net/ethernet/aurora/Kconfig
@@ -0,0 +1,20 @@
+config NET_VENDOR_AURORA
+	bool "Aurora VLSI devices"
+	help
+	  If you have a network (Ethernet) device belonging to this class,
+	  say Y.
+
+	  Note that the answer to this question doesn't directly affect the
+	  kernel: saying N will just cause the configurator to skip all
+	  questions about Aurora devices. If you say Y, you will be asked
+	  for your specific device in the following questions.
+
+if NET_VENDOR_AURORA
+
+config AURORA_NB8800
+	tristate "Aurora AU-NB8800 support"
+	select PHYLIB
+	help
+	 Support for the AU-NB8800 gigabit Ethernet controller.
+
+endif
diff --git a/drivers/net/ethernet/aurora/Makefile b/drivers/net/ethernet/aurora/Makefile
new file mode 100644
index 000000000000..6cb528a2fc26
--- /dev/null
+++ b/drivers/net/ethernet/aurora/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_AURORA_NB8800) += nb8800.o
diff --git a/drivers/net/ethernet/aurora/nb8800.c b/drivers/net/ethernet/aurora/nb8800.c
new file mode 100644
index 000000000000..ecc4a334c507
--- /dev/null
+++ b/drivers/net/ethernet/aurora/nb8800.c
@@ -0,0 +1,1552 @@
+/*
+ * Copyright (C) 2015 Mans Rullgard <mans@mansr.com>
+ *
+ * Mostly rewritten, based on driver from Sigma Designs.  Original
+ * copyright notice below.
+ *
+ *
+ * Driver for tangox SMP864x/SMP865x/SMP867x/SMP868x builtin Ethernet Mac.
+ *
+ * Copyright (C) 2005 Maxime Bizon <mbizon@freebox.fr>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/module.h>
+#include <linux/etherdevice.h>
+#include <linux/delay.h>
+#include <linux/ethtool.h>
+#include <linux/interrupt.h>
+#include <linux/platform_device.h>
+#include <linux/of_device.h>
+#include <linux/of_mdio.h>
+#include <linux/of_net.h>
+#include <linux/dma-mapping.h>
+#include <linux/phy.h>
+#include <linux/cache.h>
+#include <linux/jiffies.h>
+#include <linux/io.h>
+#include <linux/iopoll.h>
+#include <asm/barrier.h>
+
+#include "nb8800.h"
+
+static void nb8800_tx_done(struct net_device *dev);
+static int nb8800_dma_stop(struct net_device *dev);
+
+static inline u8 nb8800_readb(struct nb8800_priv *priv, int reg)
+{
+	return readb_relaxed(priv->base + reg);
+}
+
+static inline u32 nb8800_readl(struct nb8800_priv *priv, int reg)
+{
+	return readl_relaxed(priv->base + reg);
+}
+
+static inline void nb8800_writeb(struct nb8800_priv *priv, int reg, u8 val)
+{
+	writeb_relaxed(val, priv->base + reg);
+}
+
+static inline void nb8800_writew(struct nb8800_priv *priv, int reg, u16 val)
+{
+	writew_relaxed(val, priv->base + reg);
+}
+
+static inline void nb8800_writel(struct nb8800_priv *priv, int reg, u32 val)
+{
+	writel_relaxed(val, priv->base + reg);
+}
+
+static inline void nb8800_maskb(struct nb8800_priv *priv, int reg,
+				u32 mask, u32 val)
+{
+	u32 old = nb8800_readb(priv, reg);
+	u32 new = (old & ~mask) | (val & mask);
+
+	if (new != old)
+		nb8800_writeb(priv, reg, new);
+}
+
+static inline void nb8800_maskl(struct nb8800_priv *priv, int reg,
+				u32 mask, u32 val)
+{
+	u32 old = nb8800_readl(priv, reg);
+	u32 new = (old & ~mask) | (val & mask);
+
+	if (new != old)
+		nb8800_writel(priv, reg, new);
+}
+
+static inline void nb8800_modb(struct nb8800_priv *priv, int reg, u8 bits,
+			       bool set)
+{
+	nb8800_maskb(priv, reg, bits, set ? bits : 0);
+}
+
+static inline void nb8800_setb(struct nb8800_priv *priv, int reg, u8 bits)
+{
+	nb8800_maskb(priv, reg, bits, bits);
+}
+
+static inline void nb8800_clearb(struct nb8800_priv *priv, int reg, u8 bits)
+{
+	nb8800_maskb(priv, reg, bits, 0);
+}
+
+static inline void nb8800_modl(struct nb8800_priv *priv, int reg, u32 bits,
+			       bool set)
+{
+	nb8800_maskl(priv, reg, bits, set ? bits : 0);
+}
+
+static inline void nb8800_setl(struct nb8800_priv *priv, int reg, u32 bits)
+{
+	nb8800_maskl(priv, reg, bits, bits);
+}
+
+static inline void nb8800_clearl(struct nb8800_priv *priv, int reg, u32 bits)
+{
+	nb8800_maskl(priv, reg, bits, 0);
+}
+
+static int nb8800_mdio_wait(struct mii_bus *bus)
+{
+	struct nb8800_priv *priv = bus->priv;
+	u32 val;
+
+	return readl_poll_timeout_atomic(priv->base + NB8800_MDIO_CMD,
+					 val, !(val & MDIO_CMD_GO), 1, 1000);
+}
+
+static int nb8800_mdio_cmd(struct mii_bus *bus, u32 cmd)
+{
+	struct nb8800_priv *priv = bus->priv;
+	int err;
+
+	err = nb8800_mdio_wait(bus);
+	if (err)
+		return err;
+
+	nb8800_writel(priv, NB8800_MDIO_CMD, cmd);
+	udelay(10);
+	nb8800_writel(priv, NB8800_MDIO_CMD, cmd | MDIO_CMD_GO);
+
+	return nb8800_mdio_wait(bus);
+}
+
+static int nb8800_mdio_read(struct mii_bus *bus, int phy_id, int reg)
+{
+	struct nb8800_priv *priv = bus->priv;
+	u32 val;
+	int err;
+
+	err = nb8800_mdio_cmd(bus, MDIO_CMD_ADDR(phy_id) | MDIO_CMD_REG(reg));
+	if (err)
+		return err;
+
+	val = nb8800_readl(priv, NB8800_MDIO_STS);
+	if (val & MDIO_STS_ERR)
+		return 0xffff;
+
+	return val & 0xffff;
+}
+
+static int nb8800_mdio_write(struct mii_bus *bus, int phy_id, int reg, u16 val)
+{
+	u32 cmd = MDIO_CMD_ADDR(phy_id) | MDIO_CMD_REG(reg) |
+		MDIO_CMD_DATA(val) | MDIO_CMD_WR;
+
+	return nb8800_mdio_cmd(bus, cmd);
+}
+
+static void nb8800_mac_tx(struct net_device *dev, bool enable)
+{
+	struct nb8800_priv *priv = netdev_priv(dev);
+
+	while (nb8800_readl(priv, NB8800_TXC_CR) & TCR_EN)
+		cpu_relax();
+
+	nb8800_modb(priv, NB8800_TX_CTL1, TX_EN, enable);
+}
+
+static void nb8800_mac_rx(struct net_device *dev, bool enable)
+{
+	nb8800_modb(netdev_priv(dev), NB8800_RX_CTL, RX_EN, enable);
+}
+
+static void nb8800_mac_af(struct net_device *dev, bool enable)
+{
+	nb8800_modb(netdev_priv(dev), NB8800_RX_CTL, RX_AF_EN, enable);
+}
+
+static void nb8800_start_rx(struct net_device *dev)
+{
+	nb8800_setl(netdev_priv(dev), NB8800_RXC_CR, RCR_EN);
+}
+
+static int nb8800_alloc_rx(struct net_device *dev, unsigned int i, bool napi)
+{
+	struct nb8800_priv *priv = netdev_priv(dev);
+	struct nb8800_rx_desc *rxd = &priv->rx_descs[i];
+	struct nb8800_rx_buf *rxb = &priv->rx_bufs[i];
+	int size = L1_CACHE_ALIGN(RX_BUF_SIZE);
+	dma_addr_t dma_addr;
+	struct page *page;
+	unsigned long offset;
+	void *data;
+
+	data = napi ? napi_alloc_frag(size) : netdev_alloc_frag(size);
+	if (!data)
+		return -ENOMEM;
+
+	page = virt_to_head_page(data);
+	offset = data - page_address(page);
+
+	dma_addr = dma_map_page(&dev->dev, page, offset, RX_BUF_SIZE,
+				DMA_FROM_DEVICE);
+
+	if (dma_mapping_error(&dev->dev, dma_addr)) {
+		skb_free_frag(data);
+		return -ENOMEM;
+	}
+
+	rxb->page = page;
+	rxb->offset = offset;
+	rxd->desc.s_addr = dma_addr;
+
+	return 0;
+}
+
+static void nb8800_receive(struct net_device *dev, unsigned int i,
+			   unsigned int len)
+{
+	struct nb8800_priv *priv = netdev_priv(dev);
+	struct nb8800_rx_desc *rxd = &priv->rx_descs[i];
+	struct page *page = priv->rx_bufs[i].page;
+	int offset = priv->rx_bufs[i].offset;
+	void *data = page_address(page) + offset;
+	dma_addr_t dma = rxd->desc.s_addr;
+	struct sk_buff *skb;
+	unsigned int size;
+	int err;
+
+	size = len <= RX_COPYBREAK ? len : RX_COPYHDR;
+
+	skb = napi_alloc_skb(&priv->napi, size);
+	if (!skb) {
+		netdev_err(dev, "rx skb allocation failed\n");
+		dev->stats.rx_dropped++;
+		return;
+	}
+
+	if (len <= RX_COPYBREAK) {
+		dma_sync_single_for_cpu(&dev->dev, dma, len, DMA_FROM_DEVICE);
+		memcpy(skb_put(skb, len), data, len);
+		dma_sync_single_for_device(&dev->dev, dma, len,
+					   DMA_FROM_DEVICE);
+	} else {
+		err = nb8800_alloc_rx(dev, i, true);
+		if (err) {
+			netdev_err(dev, "rx buffer allocation failed\n");
+			dev->stats.rx_dropped++;
+			return;
+		}
+
+		dma_unmap_page(&dev->dev, dma, RX_BUF_SIZE, DMA_FROM_DEVICE);
+		memcpy(skb_put(skb, RX_COPYHDR), data, RX_COPYHDR);
+		skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page,
+				offset + RX_COPYHDR, len - RX_COPYHDR,
+				RX_BUF_SIZE);
+	}
+
+	skb->protocol = eth_type_trans(skb, dev);
+	napi_gro_receive(&priv->napi, skb);
+}
+
+static void nb8800_rx_error(struct net_device *dev, u32 report)
+{
+	if (report & RX_LENGTH_ERR)
+		dev->stats.rx_length_errors++;
+
+	if (report & RX_FCS_ERR)
+		dev->stats.rx_crc_errors++;
+
+	if (report & RX_FIFO_OVERRUN)
+		dev->stats.rx_fifo_errors++;
+
+	if (report & RX_ALIGNMENT_ERROR)
+		dev->stats.rx_frame_errors++;
+
+	dev->stats.rx_errors++;
+}
+
+static int nb8800_poll(struct napi_struct *napi, int budget)
+{
+	struct net_device *dev = napi->dev;
+	struct nb8800_priv *priv = netdev_priv(dev);
+	struct nb8800_rx_desc *rxd;
+	unsigned int last = priv->rx_eoc;
+	unsigned int next;
+	int work = 0;
+
+	nb8800_tx_done(dev);
+
+again:
+	while (work < budget) {
+		struct nb8800_rx_buf *rxb;
+		unsigned int len;
+
+		next = (last + 1) % RX_DESC_COUNT;
+
+		rxb = &priv->rx_bufs[next];
+		rxd = &priv->rx_descs[next];
+
+		if (!rxd->report)
+			break;
+
+		len = RX_BYTES_TRANSFERRED(rxd->report);
+
+		if (IS_RX_ERROR(rxd->report))
+			nb8800_rx_error(dev, rxd->report);
+		else
+			nb8800_receive(dev, next, len);
+
+		dev->stats.rx_packets++;
+		dev->stats.rx_bytes += len;
+
+		if (rxd->report & RX_MULTICAST_PKT)
+			dev->stats.multicast++;
+
+		rxd->report = 0;
+		last = next;
+		work++;
+	}
+
+	if (work) {
+		priv->rx_descs[last].desc.config |= DESC_EOC;
+		wmb();	/* ensure new EOC is written before clearing old */
+		priv->rx_descs[priv->rx_eoc].desc.config &= ~DESC_EOC;
+		priv->rx_eoc = last;
+		nb8800_start_rx(dev);
+	}
+
+	if (work < budget) {
+		nb8800_writel(priv, NB8800_RX_ITR, priv->rx_itr_irq);
+
+		/* If a packet arrived after we last checked but
+		 * before writing RX_ITR, the interrupt will be
+		 * delayed, so we retrieve it now.
+		 */
+		if (priv->rx_descs[next].report)
+			goto again;
+
+		napi_complete_done(napi, work);
+	}
+
+	return work;
+}
+
+static void __nb8800_tx_dma_start(struct net_device *dev)
+{
+	struct nb8800_priv *priv = netdev_priv(dev);
+	struct nb8800_tx_buf *txb;
+	u32 txc_cr;
+
+	txb = &priv->tx_bufs[priv->tx_queue];
+	if (!txb->ready)
+		return;
+
+	txc_cr = nb8800_readl(priv, NB8800_TXC_CR);
+	if (txc_cr & TCR_EN)
+		return;
+
+	nb8800_writel(priv, NB8800_TX_DESC_ADDR, txb->dma_desc);
+	wmb();		/* ensure desc addr is written before starting DMA */
+	nb8800_writel(priv, NB8800_TXC_CR, txc_cr | TCR_EN);
+
+	priv->tx_queue = (priv->tx_queue + txb->chain_len) % TX_DESC_COUNT;
+}
+
+static void nb8800_tx_dma_start(struct net_device *dev)
+{
+	struct nb8800_priv *priv = netdev_priv(dev);
+
+	spin_lock_irq(&priv->tx_lock);
+	__nb8800_tx_dma_start(dev);
+	spin_unlock_irq(&priv->tx_lock);
+}
+
+static void nb8800_tx_dma_start_irq(struct net_device *dev)
+{
+	struct nb8800_priv *priv = netdev_priv(dev);
+
+	spin_lock(&priv->tx_lock);
+	__nb8800_tx_dma_start(dev);
+	spin_unlock(&priv->tx_lock);
+}
+
+static int nb8800_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	struct nb8800_priv *priv = netdev_priv(dev);
+	struct nb8800_tx_desc *txd;
+	struct nb8800_tx_buf *txb;
+	struct nb8800_dma_desc *desc;
+	dma_addr_t dma_addr;
+	unsigned int dma_len;
+	unsigned int align;
+	unsigned int next;
+
+	if (atomic_read(&priv->tx_free) <= NB8800_DESC_LOW) {
+		netif_stop_queue(dev);
+		return NETDEV_TX_BUSY;
+	}
+
+	align = (8 - (uintptr_t)skb->data) & 7;
+
+	dma_len = skb->len - align;
+	dma_addr = dma_map_single(&dev->dev, skb->data + align,
+				  dma_len, DMA_TO_DEVICE);
+
+	if (dma_mapping_error(&dev->dev, dma_addr)) {
+		netdev_err(dev, "tx dma mapping error\n");
+		kfree_skb(skb);
+		dev->stats.tx_dropped++;
+		return NETDEV_TX_OK;
+	}
+
+	if (atomic_dec_return(&priv->tx_free) <= NB8800_DESC_LOW) {
+		netif_stop_queue(dev);
+		skb->xmit_more = 0;
+	}
+
+	next = priv->tx_next;
+	txb = &priv->tx_bufs[next];
+	txd = &priv->tx_descs[next];
+	desc = &txd->desc[0];
+
+	next = (next + 1) % TX_DESC_COUNT;
+
+	if (align) {
+		memcpy(txd->buf, skb->data, align);
+
+		desc->s_addr =
+			txb->dma_desc + offsetof(struct nb8800_tx_desc, buf);
+		desc->n_addr = txb->dma_desc + sizeof(txd->desc[0]);
+		desc->config = DESC_BTS(2) | DESC_DS | align;
+
+		desc++;
+	}
+
+	desc->s_addr = dma_addr;
+	desc->n_addr = priv->tx_bufs[next].dma_desc;
+	desc->config = DESC_BTS(2) | DESC_DS | DESC_EOF | dma_len;
+
+	if (!skb->xmit_more)
+		desc->config |= DESC_EOC;
+
+	txb->skb = skb;
+	txb->dma_addr = dma_addr;
+	txb->dma_len = dma_len;
+
+	if (!priv->tx_chain) {
+		txb->chain_len = 1;
+		priv->tx_chain = txb;
+	} else {
+		priv->tx_chain->chain_len++;
+	}
+
+	netdev_sent_queue(dev, skb->len);
+
+	priv->tx_next = next;
+
+	if (!skb->xmit_more) {
+		smp_wmb();
+		priv->tx_chain->ready = true;
+		priv->tx_chain = NULL;
+		nb8800_tx_dma_start(dev);
+	}
+
+	return NETDEV_TX_OK;
+}
+
+static void nb8800_tx_error(struct net_device *dev, u32 report)
+{
+	if (report & TX_LATE_COLLISION)
+		dev->stats.collisions++;
+
+	if (report & TX_PACKET_DROPPED)
+		dev->stats.tx_dropped++;
+
+	if (report & TX_FIFO_UNDERRUN)
+		dev->stats.tx_fifo_errors++;
+
+	dev->stats.tx_errors++;
+}
+
+static void nb8800_tx_done(struct net_device *dev)
+{
+	struct nb8800_priv *priv = netdev_priv(dev);
+	unsigned int limit = priv->tx_next;
+	unsigned int done = priv->tx_done;
+	unsigned int packets = 0;
+	unsigned int len = 0;
+
+	while (done != limit) {
+		struct nb8800_tx_desc *txd = &priv->tx_descs[done];
+		struct nb8800_tx_buf *txb = &priv->tx_bufs[done];
+		struct sk_buff *skb;
+
+		if (!txd->report)
+			break;
+
+		skb = txb->skb;
+		len += skb->len;
+
+		dma_unmap_single(&dev->dev, txb->dma_addr, txb->dma_len,
+				 DMA_TO_DEVICE);
+
+		if (IS_TX_ERROR(txd->report)) {
+			nb8800_tx_error(dev, txd->report);
+			kfree_skb(skb);
+		} else {
+			consume_skb(skb);
+		}
+
+		dev->stats.tx_packets++;
+		dev->stats.tx_bytes += TX_BYTES_TRANSFERRED(txd->report);
+		dev->stats.collisions += TX_EARLY_COLLISIONS(txd->report);
+
+		txb->skb = NULL;
+		txb->ready = false;
+		txd->report = 0;
+
+		done = (done + 1) % TX_DESC_COUNT;
+		packets++;
+	}
+
+	if (packets) {
+		smp_mb__before_atomic();
+		atomic_add(packets, &priv->tx_free);
+		netdev_completed_queue(dev, packets, len);
+		netif_wake_queue(dev);
+		priv->tx_done = done;
+	}
+}
+
+static irqreturn_t nb8800_irq(int irq, void *dev_id)
+{
+	struct net_device *dev = dev_id;
+	struct nb8800_priv *priv = netdev_priv(dev);
+	irqreturn_t ret = IRQ_NONE;
+	u32 val;
+
+	/* tx interrupt */
+	val = nb8800_readl(priv, NB8800_TXC_SR);
+	if (val) {
+		nb8800_writel(priv, NB8800_TXC_SR, val);
+
+		if (val & TSR_DI)
+			nb8800_tx_dma_start_irq(dev);
+
+		if (val & TSR_TI)
+			napi_schedule_irqoff(&priv->napi);
+
+		if (unlikely(val & TSR_DE))
+			netdev_err(dev, "TX DMA error\n");
+
+		/* should never happen with automatic status retrieval */
+		if (unlikely(val & TSR_TO))
+			netdev_err(dev, "TX Status FIFO overflow\n");
+
+		ret = IRQ_HANDLED;
+	}
+
+	/* rx interrupt */
+	val = nb8800_readl(priv, NB8800_RXC_SR);
+	if (val) {
+		nb8800_writel(priv, NB8800_RXC_SR, val);
+
+		if (likely(val & (RSR_RI | RSR_DI))) {
+			nb8800_writel(priv, NB8800_RX_ITR, priv->rx_itr_poll);
+			napi_schedule_irqoff(&priv->napi);
+		}
+
+		if (unlikely(val & RSR_DE))
+			netdev_err(dev, "RX DMA error\n");
+
+		/* should never happen with automatic status retrieval */
+		if (unlikely(val & RSR_RO))
+			netdev_err(dev, "RX Status FIFO overflow\n");
+
+		ret = IRQ_HANDLED;
+	}
+
+	return ret;
+}
+
+static void nb8800_mac_config(struct net_device *dev)
+{
+	struct nb8800_priv *priv = netdev_priv(dev);
+	bool gigabit = priv->speed == SPEED_1000;
+	u32 mac_mode_mask = RGMII_MODE | HALF_DUPLEX | GMAC_MODE;
+	u32 mac_mode = 0;
+	u32 slot_time;
+	u32 phy_clk;
+	u32 ict;
+
+	if (!priv->duplex)
+		mac_mode |= HALF_DUPLEX;
+
+	if (gigabit) {
+		if (priv->phy_mode == PHY_INTERFACE_MODE_RGMII)
+			mac_mode |= RGMII_MODE;
+
+		mac_mode |= GMAC_MODE;
+		phy_clk = 125000000;
+
+		/* Should be 512 but register is only 8 bits */
+		slot_time = 255;
+	} else {
+		phy_clk = 25000000;
+		slot_time = 128;
+	}
+
+	ict = DIV_ROUND_UP(phy_clk, clk_get_rate(priv->clk));
+
+	nb8800_writeb(priv, NB8800_IC_THRESHOLD, ict);
+	nb8800_writeb(priv, NB8800_SLOT_TIME, slot_time);
+	nb8800_maskb(priv, NB8800_MAC_MODE, mac_mode_mask, mac_mode);
+}
+
+static void nb8800_pause_config(struct net_device *dev)
+{
+	struct nb8800_priv *priv = netdev_priv(dev);
+	struct phy_device *phydev = priv->phydev;
+	u32 rxcr;
+
+	if (priv->pause_aneg) {
+		if (!phydev || !phydev->link)
+			return;
+
+		priv->pause_rx = phydev->pause;
+		priv->pause_tx = phydev->pause ^ phydev->asym_pause;
+	}
+
+	nb8800_modb(priv, NB8800_RX_CTL, RX_PAUSE_EN, priv->pause_rx);
+
+	rxcr = nb8800_readl(priv, NB8800_RXC_CR);
+	if (!!(rxcr & RCR_FL) == priv->pause_tx)
+		return;
+
+	if (netif_running(dev)) {
+		napi_disable(&priv->napi);
+		netif_tx_lock_bh(dev);
+		nb8800_dma_stop(dev);
+		nb8800_modl(priv, NB8800_RXC_CR, RCR_FL, priv->pause_tx);
+		nb8800_start_rx(dev);
+		netif_tx_unlock_bh(dev);
+		napi_enable(&priv->napi);
+	} else {
+		nb8800_modl(priv, NB8800_RXC_CR, RCR_FL, priv->pause_tx);
+	}
+}
+
+static void nb8800_link_reconfigure(struct net_device *dev)
+{
+	struct nb8800_priv *priv = netdev_priv(dev);
+	struct phy_device *phydev = priv->phydev;
+	int change = 0;
+
+	if (phydev->link) {
+		if (phydev->speed != priv->speed) {
+			priv->speed = phydev->speed;
+			change = 1;
+		}
+
+		if (phydev->duplex != priv->duplex) {
+			priv->duplex = phydev->duplex;
+			change = 1;
+		}
+
+		if (change)
+			nb8800_mac_config(dev);
+
+		nb8800_pause_config(dev);
+	}
+
+	if (phydev->link != priv->link) {
+		priv->link = phydev->link;
+		change = 1;
+	}
+
+	if (change)
+		phy_print_status(priv->phydev);
+}
+
+static void nb8800_update_mac_addr(struct net_device *dev)
+{
+	struct nb8800_priv *priv = netdev_priv(dev);
+	int i;
+
+	for (i = 0; i < ETH_ALEN; i++)
+		nb8800_writeb(priv, NB8800_SRC_ADDR(i), dev->dev_addr[i]);
+
+	for (i = 0; i < ETH_ALEN; i++)
+		nb8800_writeb(priv, NB8800_UC_ADDR(i), dev->dev_addr[i]);
+}
+
+static int nb8800_set_mac_address(struct net_device *dev, void *addr)
+{
+	struct sockaddr *sock = addr;
+
+	if (netif_running(dev))
+		return -EBUSY;
+
+	ether_addr_copy(dev->dev_addr, sock->sa_data);
+	nb8800_update_mac_addr(dev);
+
+	return 0;
+}
+
+static void nb8800_mc_init(struct net_device *dev, int val)
+{
+	struct nb8800_priv *priv = netdev_priv(dev);
+
+	nb8800_writeb(priv, NB8800_MC_INIT, val);
+	readb_poll_timeout_atomic(priv->base + NB8800_MC_INIT, val, !val,
+				  1, 1000);
+}
+
+static void nb8800_set_rx_mode(struct net_device *dev)
+{
+	struct nb8800_priv *priv = netdev_priv(dev);
+	struct netdev_hw_addr *ha;
+	int i;
+
+	if (dev->flags & (IFF_PROMISC | IFF_ALLMULTI)) {
+		nb8800_mac_af(dev, false);
+		return;
+	}
+
+	nb8800_mac_af(dev, true);
+	nb8800_mc_init(dev, 0);
+
+	netdev_for_each_mc_addr(ha, dev) {
+		for (i = 0; i < ETH_ALEN; i++)
+			nb8800_writeb(priv, NB8800_MC_ADDR(i), ha->addr[i]);
+
+		nb8800_mc_init(dev, 0xff);
+	}
+}
+
+#define RX_DESC_SIZE (RX_DESC_COUNT * sizeof(struct nb8800_rx_desc))
+#define TX_DESC_SIZE (TX_DESC_COUNT * sizeof(struct nb8800_tx_desc))
+
+static void nb8800_dma_free(struct net_device *dev)
+{
+	struct nb8800_priv *priv = netdev_priv(dev);
+	unsigned int i;
+
+	if (priv->rx_bufs) {
+		for (i = 0; i < RX_DESC_COUNT; i++)
+			if (priv->rx_bufs[i].page)
+				put_page(priv->rx_bufs[i].page);
+
+		kfree(priv->rx_bufs);
+		priv->rx_bufs = NULL;
+	}
+
+	if (priv->tx_bufs) {
+		for (i = 0; i < TX_DESC_COUNT; i++)
+			kfree_skb(priv->tx_bufs[i].skb);
+
+		kfree(priv->tx_bufs);
+		priv->tx_bufs = NULL;
+	}
+
+	if (priv->rx_descs) {
+		dma_free_coherent(dev->dev.parent, RX_DESC_SIZE, priv->rx_descs,
+				  priv->rx_desc_dma);
+		priv->rx_descs = NULL;
+	}
+
+	if (priv->tx_descs) {
+		dma_free_coherent(dev->dev.parent, TX_DESC_SIZE, priv->tx_descs,
+				  priv->tx_desc_dma);
+		priv->tx_descs = NULL;
+	}
+}
+
+static void nb8800_dma_reset(struct net_device *dev)
+{
+	struct nb8800_priv *priv = netdev_priv(dev);
+	struct nb8800_rx_desc *rxd;
+	struct nb8800_tx_desc *txd;
+	unsigned int i;
+
+	for (i = 0; i < RX_DESC_COUNT; i++) {
+		dma_addr_t rx_dma = priv->rx_desc_dma + i * sizeof(*rxd);
+
+		rxd = &priv->rx_descs[i];
+		rxd->desc.n_addr = rx_dma + sizeof(*rxd);
+		rxd->desc.r_addr =
+			rx_dma + offsetof(struct nb8800_rx_desc, report);
+		rxd->desc.config = priv->rx_dma_config;
+		rxd->report = 0;
+	}
+
+	rxd->desc.n_addr = priv->rx_desc_dma;
+	rxd->desc.config |= DESC_EOC;
+
+	priv->rx_eoc = RX_DESC_COUNT - 1;
+
+	for (i = 0; i < TX_DESC_COUNT; i++) {
+		struct nb8800_tx_buf *txb = &priv->tx_bufs[i];
+		dma_addr_t r_dma = txb->dma_desc +
+			offsetof(struct nb8800_tx_desc, report);
+
+		txd = &priv->tx_descs[i];
+		txd->desc[0].r_addr = r_dma;
+		txd->desc[1].r_addr = r_dma;
+		txd->report = 0;
+	}
+
+	priv->tx_next = 0;
+	priv->tx_queue = 0;
+	priv->tx_done = 0;
+	atomic_set(&priv->tx_free, TX_DESC_COUNT);
+
+	nb8800_writel(priv, NB8800_RX_DESC_ADDR, priv->rx_desc_dma);
+
+	wmb();		/* ensure all setup is written before starting */
+}
+
+static int nb8800_dma_init(struct net_device *dev)
+{
+	struct nb8800_priv *priv = netdev_priv(dev);
+	unsigned int n_rx = RX_DESC_COUNT;
+	unsigned int n_tx = TX_DESC_COUNT;
+	unsigned int i;
+	int err;
+
+	priv->rx_descs = dma_alloc_coherent(dev->dev.parent, RX_DESC_SIZE,
+					    &priv->rx_desc_dma, GFP_KERNEL);
+	if (!priv->rx_descs)
+		goto err_out;
+
+	priv->rx_bufs = kcalloc(n_rx, sizeof(*priv->rx_bufs), GFP_KERNEL);
+	if (!priv->rx_bufs)
+		goto err_out;
+
+	for (i = 0; i < n_rx; i++) {
+		err = nb8800_alloc_rx(dev, i, false);
+		if (err)
+			goto err_out;
+	}
+
+	priv->tx_descs = dma_alloc_coherent(dev->dev.parent, TX_DESC_SIZE,
+					    &priv->tx_desc_dma, GFP_KERNEL);
+	if (!priv->tx_descs)
+		goto err_out;
+
+	priv->tx_bufs = kcalloc(n_tx, sizeof(*priv->tx_bufs), GFP_KERNEL);
+	if (!priv->tx_bufs)
+		goto err_out;
+
+	for (i = 0; i < n_tx; i++)
+		priv->tx_bufs[i].dma_desc =
+			priv->tx_desc_dma + i * sizeof(struct nb8800_tx_desc);
+
+	nb8800_dma_reset(dev);
+
+	return 0;
+
+err_out:
+	nb8800_dma_free(dev);
+
+	return -ENOMEM;
+}
+
+static int nb8800_dma_stop(struct net_device *dev)
+{
+	struct nb8800_priv *priv = netdev_priv(dev);
+	struct nb8800_tx_buf *txb = &priv->tx_bufs[0];
+	struct nb8800_tx_desc *txd = &priv->tx_descs[0];
+	int retry = 5;
+	u32 txcr;
+	u32 rxcr;
+	int err;
+	unsigned int i;
+
+	/* wait for tx to finish */
+	err = readl_poll_timeout_atomic(priv->base + NB8800_TXC_CR, txcr,
+					!(txcr & TCR_EN) &&
+					priv->tx_done == priv->tx_next,
+					1000, 1000000);
+	if (err)
+		return err;
+
+	/* The rx DMA only stops if it reaches the end of chain.
+	 * To make this happen, we set the EOC flag on all rx
+	 * descriptors, put the device in loopback mode, and send
+	 * a few dummy frames.  The interrupt handler will ignore
+	 * these since NAPI is disabled and no real frames are in
+	 * the tx queue.
+	 */
+
+	for (i = 0; i < RX_DESC_COUNT; i++)
+		priv->rx_descs[i].desc.config |= DESC_EOC;
+
+	txd->desc[0].s_addr =
+		txb->dma_desc + offsetof(struct nb8800_tx_desc, buf);
+	txd->desc[0].config = DESC_BTS(2) | DESC_DS | DESC_EOF | DESC_EOC | 8;
+	memset(txd->buf, 0, sizeof(txd->buf));
+
+	nb8800_mac_af(dev, false);
+	nb8800_setb(priv, NB8800_MAC_MODE, LOOPBACK_EN);
+
+	do {
+		nb8800_writel(priv, NB8800_TX_DESC_ADDR, txb->dma_desc);
+		wmb();
+		nb8800_writel(priv, NB8800_TXC_CR, txcr | TCR_EN);
+
+		err = readl_poll_timeout_atomic(priv->base + NB8800_RXC_CR,
+						rxcr, !(rxcr & RCR_EN),
+						1000, 100000);
+	} while (err && --retry);
+
+	nb8800_mac_af(dev, true);
+	nb8800_clearb(priv, NB8800_MAC_MODE, LOOPBACK_EN);
+	nb8800_dma_reset(dev);
+
+	return retry ? 0 : -ETIMEDOUT;
+}
+
+static void nb8800_pause_adv(struct net_device *dev)
+{
+	struct nb8800_priv *priv = netdev_priv(dev);
+	u32 adv = 0;
+
+	if (!priv->phydev)
+		return;
+
+	if (priv->pause_rx)
+		adv |= ADVERTISED_Pause | ADVERTISED_Asym_Pause;
+	if (priv->pause_tx)
+		adv ^= ADVERTISED_Asym_Pause;
+
+	priv->phydev->supported |= adv;
+	priv->phydev->advertising |= adv;
+}
+
+static int nb8800_open(struct net_device *dev)
+{
+	struct nb8800_priv *priv = netdev_priv(dev);
+	int err;
+
+	/* clear any pending interrupts */
+	nb8800_writel(priv, NB8800_RXC_SR, 0xf);
+	nb8800_writel(priv, NB8800_TXC_SR, 0xf);
+
+	err = nb8800_dma_init(dev);
+	if (err)
+		return err;
+
+	err = request_irq(dev->irq, nb8800_irq, 0, dev_name(&dev->dev), dev);
+	if (err)
+		goto err_free_dma;
+
+	nb8800_mac_rx(dev, true);
+	nb8800_mac_tx(dev, true);
+
+	priv->phydev = of_phy_connect(dev, priv->phy_node,
+				      nb8800_link_reconfigure, 0,
+				      priv->phy_mode);
+	if (!priv->phydev)
+		goto err_free_irq;
+
+	nb8800_pause_adv(dev);
+
+	netdev_reset_queue(dev);
+	napi_enable(&priv->napi);
+	netif_start_queue(dev);
+
+	nb8800_start_rx(dev);
+	phy_start(priv->phydev);
+
+	return 0;
+
+err_free_irq:
+	free_irq(dev->irq, dev);
+err_free_dma:
+	nb8800_dma_free(dev);
+
+	return err;
+}
+
+static int nb8800_stop(struct net_device *dev)
+{
+	struct nb8800_priv *priv = netdev_priv(dev);
+
+	phy_stop(priv->phydev);
+
+	netif_stop_queue(dev);
+	napi_disable(&priv->napi);
+
+	nb8800_dma_stop(dev);
+	nb8800_mac_rx(dev, false);
+	nb8800_mac_tx(dev, false);
+
+	phy_disconnect(priv->phydev);
+	priv->phydev = NULL;
+
+	free_irq(dev->irq, dev);
+
+	nb8800_dma_free(dev);
+
+	return 0;
+}
+
+static int nb8800_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
+{
+	struct nb8800_priv *priv = netdev_priv(dev);
+
+	return phy_mii_ioctl(priv->phydev, rq, cmd);
+}
+
+static const struct net_device_ops nb8800_netdev_ops = {
+	.ndo_open		= nb8800_open,
+	.ndo_stop		= nb8800_stop,
+	.ndo_start_xmit		= nb8800_xmit,
+	.ndo_set_mac_address	= nb8800_set_mac_address,
+	.ndo_set_rx_mode	= nb8800_set_rx_mode,
+	.ndo_do_ioctl		= nb8800_ioctl,
+	.ndo_change_mtu		= eth_change_mtu,
+	.ndo_validate_addr	= eth_validate_addr,
+};
+
+static int nb8800_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+{
+	struct nb8800_priv *priv = netdev_priv(dev);
+
+	if (!priv->phydev)
+		return -ENODEV;
+
+	return phy_ethtool_gset(priv->phydev, cmd);
+}
+
+static int nb8800_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+{
+	struct nb8800_priv *priv = netdev_priv(dev);
+
+	if (!priv->phydev)
+		return -ENODEV;
+
+	return phy_ethtool_sset(priv->phydev, cmd);
+}
+
+static int nb8800_nway_reset(struct net_device *dev)
+{
+	struct nb8800_priv *priv = netdev_priv(dev);
+
+	if (!priv->phydev)
+		return -ENODEV;
+
+	return genphy_restart_aneg(priv->phydev);
+}
+
+static void nb8800_get_pauseparam(struct net_device *dev,
+				  struct ethtool_pauseparam *pp)
+{
+	struct nb8800_priv *priv = netdev_priv(dev);
+
+	pp->autoneg = priv->pause_aneg;
+	pp->rx_pause = priv->pause_rx;
+	pp->tx_pause = priv->pause_tx;
+}
+
+static int nb8800_set_pauseparam(struct net_device *dev,
+				 struct ethtool_pauseparam *pp)
+{
+	struct nb8800_priv *priv = netdev_priv(dev);
+
+	priv->pause_aneg = pp->autoneg;
+	priv->pause_rx = pp->rx_pause;
+	priv->pause_tx = pp->tx_pause;
+
+	nb8800_pause_adv(dev);
+
+	if (!priv->pause_aneg)
+		nb8800_pause_config(dev);
+	else if (priv->phydev)
+		phy_start_aneg(priv->phydev);
+
+	return 0;
+}
+
+static const char nb8800_stats_names[][ETH_GSTRING_LEN] = {
+	"rx_bytes_ok",
+	"rx_frames_ok",
+	"rx_undersize_frames",
+	"rx_fragment_frames",
+	"rx_64_byte_frames",
+	"rx_127_byte_frames",
+	"rx_255_byte_frames",
+	"rx_511_byte_frames",
+	"rx_1023_byte_frames",
+	"rx_max_size_frames",
+	"rx_oversize_frames",
+	"rx_bad_fcs_frames",
+	"rx_broadcast_frames",
+	"rx_multicast_frames",
+	"rx_control_frames",
+	"rx_pause_frames",
+	"rx_unsup_control_frames",
+	"rx_align_error_frames",
+	"rx_overrun_frames",
+	"rx_jabber_frames",
+	"rx_bytes",
+	"rx_frames",
+
+	"tx_bytes_ok",
+	"tx_frames_ok",
+	"tx_64_byte_frames",
+	"tx_127_byte_frames",
+	"tx_255_byte_frames",
+	"tx_511_byte_frames",
+	"tx_1023_byte_frames",
+	"tx_max_size_frames",
+	"tx_oversize_frames",
+	"tx_broadcast_frames",
+	"tx_multicast_frames",
+	"tx_control_frames",
+	"tx_pause_frames",
+	"tx_underrun_frames",
+	"tx_single_collision_frames",
+	"tx_multi_collision_frames",
+	"tx_deferred_collision_frames",
+	"tx_late_collision_frames",
+	"tx_excessive_collision_frames",
+	"tx_bytes",
+	"tx_frames",
+	"tx_collisions",
+};
+
+#define NB8800_NUM_STATS ARRAY_SIZE(nb8800_stats_names)
+
+static int nb8800_get_sset_count(struct net_device *dev, int sset)
+{
+	if (sset == ETH_SS_STATS)
+		return NB8800_NUM_STATS;
+
+	return -EOPNOTSUPP;
+}
+
+static void nb8800_get_strings(struct net_device *dev, u32 sset, u8 *buf)
+{
+	if (sset == ETH_SS_STATS)
+		memcpy(buf, &nb8800_stats_names, sizeof(nb8800_stats_names));
+}
+
+static u32 nb8800_read_stat(struct net_device *dev, int index)
+{
+	struct nb8800_priv *priv = netdev_priv(dev);
+
+	nb8800_writeb(priv, NB8800_STAT_INDEX, index);
+
+	return nb8800_readl(priv, NB8800_STAT_DATA);
+}
+
+static void nb8800_get_ethtool_stats(struct net_device *dev,
+				     struct ethtool_stats *estats, u64 *st)
+{
+	unsigned int i;
+	u32 rx, tx;
+
+	for (i = 0; i < NB8800_NUM_STATS / 2; i++) {
+		rx = nb8800_read_stat(dev, i);
+		tx = nb8800_read_stat(dev, i | 0x80);
+		st[i] = rx;
+		st[i + NB8800_NUM_STATS / 2] = tx;
+	}
+}
+
+static const struct ethtool_ops nb8800_ethtool_ops = {
+	.get_settings		= nb8800_get_settings,
+	.set_settings		= nb8800_set_settings,
+	.nway_reset		= nb8800_nway_reset,
+	.get_link		= ethtool_op_get_link,
+	.get_pauseparam		= nb8800_get_pauseparam,
+	.set_pauseparam		= nb8800_set_pauseparam,
+	.get_sset_count		= nb8800_get_sset_count,
+	.get_strings		= nb8800_get_strings,
+	.get_ethtool_stats	= nb8800_get_ethtool_stats,
+};
+
+static int nb8800_hw_init(struct net_device *dev)
+{
+	struct nb8800_priv *priv = netdev_priv(dev);
+	u32 val;
+
+	val = TX_RETRY_EN | TX_PAD_EN | TX_APPEND_FCS;
+	nb8800_writeb(priv, NB8800_TX_CTL1, val);
+
+	/* Collision retry count */
+	nb8800_writeb(priv, NB8800_TX_CTL2, 5);
+
+	val = RX_PAD_STRIP | RX_AF_EN;
+	nb8800_writeb(priv, NB8800_RX_CTL, val);
+
+	/* Chosen by fair dice roll */
+	nb8800_writeb(priv, NB8800_RANDOM_SEED, 4);
+
+	/* TX cycles per deferral period */
+	nb8800_writeb(priv, NB8800_TX_SDP, 12);
+
+	/* The following three threshold values have been
+	 * experimentally determined for good results.
+	 */
+
+	/* RX/TX FIFO threshold for partial empty (64-bit entries) */
+	nb8800_writeb(priv, NB8800_PE_THRESHOLD, 0);
+
+	/* RX/TX FIFO threshold for partial full (64-bit entries) */
+	nb8800_writeb(priv, NB8800_PF_THRESHOLD, 255);
+
+	/* Buffer size for transmit (64-bit entries) */
+	nb8800_writeb(priv, NB8800_TX_BUFSIZE, 64);
+
+	/* Configure tx DMA */
+
+	val = nb8800_readl(priv, NB8800_TXC_CR);
+	val &= TCR_LE;		/* keep endian setting */
+	val |= TCR_DM;		/* DMA descriptor mode */
+	val |= TCR_RS;		/* automatically store tx status  */
+	val |= TCR_DIE;		/* interrupt on DMA chain completion */
+	val |= TCR_TFI(7);	/* interrupt after 7 frames transmitted */
+	val |= TCR_BTS(2);	/* 32-byte bus transaction size */
+	nb8800_writel(priv, NB8800_TXC_CR, val);
+
+	/* TX complete interrupt after 10 ms or 7 frames (see above) */
+	val = clk_get_rate(priv->clk) / 100;
+	nb8800_writel(priv, NB8800_TX_ITR, val);
+
+	/* Configure rx DMA */
+
+	val = nb8800_readl(priv, NB8800_RXC_CR);
+	val &= RCR_LE;		/* keep endian setting */
+	val |= RCR_DM;		/* DMA descriptor mode */
+	val |= RCR_RS;		/* automatically store rx status */
+	val |= RCR_DIE;		/* interrupt at end of DMA chain */
+	val |= RCR_RFI(7);	/* interrupt after 7 frames received */
+	val |= RCR_BTS(2);	/* 32-byte bus transaction size */
+	nb8800_writel(priv, NB8800_RXC_CR, val);
+
+	/* The rx interrupt can fire before the DMA has completed
+	 * unless a small delay is added.  50 us is hopefully enough.
+	 */
+	priv->rx_itr_irq = clk_get_rate(priv->clk) / 20000;
+
+	/* In NAPI poll mode we want to disable interrupts, but the
+	 * hardware does not permit this.  Delay 10 ms instead.
+	 */
+	priv->rx_itr_poll = clk_get_rate(priv->clk) / 100;
+
+	nb8800_writel(priv, NB8800_RX_ITR, priv->rx_itr_irq);
+
+	priv->rx_dma_config = RX_BUF_SIZE | DESC_BTS(2) | DESC_DS | DESC_EOF;
+
+	/* Flow control settings */
+
+	/* Pause time of 0.1 ms */
+	val = 100000 / 512;
+	nb8800_writeb(priv, NB8800_PQ1, val >> 8);
+	nb8800_writeb(priv, NB8800_PQ2, val & 0xff);
+
+	/* Auto-negotiate by default */
+	priv->pause_aneg = true;
+	priv->pause_rx = true;
+	priv->pause_tx = true;
+
+	nb8800_mc_init(dev, 0);
+
+	return 0;
+}
+
+static int nb8800_tangox_init(struct net_device *dev)
+{
+	struct nb8800_priv *priv = netdev_priv(dev);
+	u32 pad_mode = PAD_MODE_MII;
+
+	switch (priv->phy_mode) {
+	case PHY_INTERFACE_MODE_MII:
+	case PHY_INTERFACE_MODE_GMII:
+		pad_mode = PAD_MODE_MII;
+		break;
+
+	case PHY_INTERFACE_MODE_RGMII:
+		pad_mode = PAD_MODE_RGMII;
+		break;
+
+	case PHY_INTERFACE_MODE_RGMII_TXID:
+		pad_mode = PAD_MODE_RGMII | PAD_MODE_GTX_CLK_DELAY;
+		break;
+
+	default:
+		dev_err(dev->dev.parent, "unsupported phy mode %s\n",
+			phy_modes(priv->phy_mode));
+		return -EINVAL;
+	}
+
+	nb8800_writeb(priv, NB8800_TANGOX_PAD_MODE, pad_mode);
+
+	return 0;
+}
+
+static int nb8800_tangox_reset(struct net_device *dev)
+{
+	struct nb8800_priv *priv = netdev_priv(dev);
+	int clk_div;
+
+	nb8800_writeb(priv, NB8800_TANGOX_RESET, 0);
+	usleep_range(1000, 10000);
+	nb8800_writeb(priv, NB8800_TANGOX_RESET, 1);
+
+	wmb();		/* ensure reset is cleared before proceeding */
+
+	clk_div = DIV_ROUND_UP(clk_get_rate(priv->clk), 2 * MAX_MDC_CLOCK);
+	nb8800_writew(priv, NB8800_TANGOX_MDIO_CLKDIV, clk_div);
+
+	return 0;
+}
+
+static const struct nb8800_ops nb8800_tangox_ops = {
+	.init	= nb8800_tangox_init,
+	.reset	= nb8800_tangox_reset,
+};
+
+static int nb8800_tango4_init(struct net_device *dev)
+{
+	struct nb8800_priv *priv = netdev_priv(dev);
+	int err;
+
+	err = nb8800_tangox_init(dev);
+	if (err)
+		return err;
+
+	/* On tango4 interrupt on DMA completion per frame works and gives
+	 * better performance despite generating more rx interrupts.
+	 */
+
+	/* Disable unnecessary interrupt on rx completion */
+	nb8800_clearl(priv, NB8800_RXC_CR, RCR_RFI(7));
+
+	/* Request interrupt on descriptor DMA completion */
+	priv->rx_dma_config |= DESC_ID;
+
+	return 0;
+}
+
+static const struct nb8800_ops nb8800_tango4_ops = {
+	.init	= nb8800_tango4_init,
+	.reset	= nb8800_tangox_reset,
+};
+
+static const struct of_device_id nb8800_dt_ids[] = {
+	{
+		.compatible = "aurora,nb8800",
+	},
+	{
+		.compatible = "sigma,smp8642-ethernet",
+		.data = &nb8800_tangox_ops,
+	},
+	{
+		.compatible = "sigma,smp8734-ethernet",
+		.data = &nb8800_tango4_ops,
+	},
+	{ }
+};
+
+static int nb8800_probe(struct platform_device *pdev)
+{
+	const struct of_device_id *match;
+	const struct nb8800_ops *ops = NULL;
+	struct nb8800_priv *priv;
+	struct resource *res;
+	struct net_device *dev;
+	struct mii_bus *bus;
+	const unsigned char *mac;
+	void __iomem *base;
+	int irq;
+	int ret;
+
+	match = of_match_device(nb8800_dt_ids, &pdev->dev);
+	if (match)
+		ops = match->data;
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq <= 0) {
+		dev_err(&pdev->dev, "No IRQ\n");
+		return -EINVAL;
+	}
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(base))
+		return PTR_ERR(base);
+
+	dev_dbg(&pdev->dev, "AU-NB8800 Ethernet at %pa\n", &res->start);
+
+	dev = alloc_etherdev(sizeof(*priv));
+	if (!dev)
+		return -ENOMEM;
+
+	platform_set_drvdata(pdev, dev);
+	SET_NETDEV_DEV(dev, &pdev->dev);
+
+	priv = netdev_priv(dev);
+	priv->base = base;
+
+	priv->phy_mode = of_get_phy_mode(pdev->dev.of_node);
+	if (priv->phy_mode < 0)
+		priv->phy_mode = PHY_INTERFACE_MODE_RGMII;
+
+	priv->clk = devm_clk_get(&pdev->dev, NULL);
+	if (IS_ERR(priv->clk)) {
+		dev_err(&pdev->dev, "failed to get clock\n");
+		ret = PTR_ERR(priv->clk);
+		goto err_free_dev;
+	}
+
+	ret = clk_prepare_enable(priv->clk);
+	if (ret)
+		goto err_free_dev;
+
+	spin_lock_init(&priv->tx_lock);
+
+	if (ops && ops->reset) {
+		ret = ops->reset(dev);
+		if (ret)
+			goto err_free_dev;
+	}
+
+	bus = devm_mdiobus_alloc(&pdev->dev);
+	if (!bus) {
+		ret = -ENOMEM;
+		goto err_disable_clk;
+	}
+
+	bus->name = "nb8800-mii";
+	bus->read = nb8800_mdio_read;
+	bus->write = nb8800_mdio_write;
+	bus->parent = &pdev->dev;
+	snprintf(bus->id, MII_BUS_ID_SIZE, "%lx.nb8800-mii",
+		 (unsigned long)res->start);
+	bus->priv = priv;
+
+	ret = of_mdiobus_register(bus, pdev->dev.of_node);
+	if (ret) {
+		dev_err(&pdev->dev, "failed to register MII bus\n");
+		goto err_disable_clk;
+	}
+
+	priv->phy_node = of_parse_phandle(pdev->dev.of_node, "phy-handle", 0);
+	if (!priv->phy_node) {
+		dev_err(&pdev->dev, "no PHY specified\n");
+		ret = -ENODEV;
+		goto err_free_bus;
+	}
+
+	priv->mii_bus = bus;
+
+	ret = nb8800_hw_init(dev);
+	if (ret)
+		goto err_free_bus;
+
+	if (ops && ops->init) {
+		ret = ops->init(dev);
+		if (ret)
+			goto err_free_bus;
+	}
+
+	dev->netdev_ops = &nb8800_netdev_ops;
+	dev->ethtool_ops = &nb8800_ethtool_ops;
+	dev->flags |= IFF_MULTICAST;
+	dev->irq = irq;
+
+	mac = of_get_mac_address(pdev->dev.of_node);
+	if (mac)
+		ether_addr_copy(dev->dev_addr, mac);
+
+	if (!is_valid_ether_addr(dev->dev_addr))
+		eth_hw_addr_random(dev);
+
+	nb8800_update_mac_addr(dev);
+
+	netif_carrier_off(dev);
+
+	ret = register_netdev(dev);
+	if (ret) {
+		netdev_err(dev, "failed to register netdev\n");
+		goto err_free_dma;
+	}
+
+	netif_napi_add(dev, &priv->napi, nb8800_poll, NAPI_POLL_WEIGHT);
+
+	netdev_info(dev, "MAC address %pM\n", dev->dev_addr);
+
+	return 0;
+
+err_free_dma:
+	nb8800_dma_free(dev);
+err_free_bus:
+	mdiobus_unregister(bus);
+err_disable_clk:
+	clk_disable_unprepare(priv->clk);
+err_free_dev:
+	free_netdev(dev);
+
+	return ret;
+}
+
+static int nb8800_remove(struct platform_device *pdev)
+{
+	struct net_device *ndev = platform_get_drvdata(pdev);
+	struct nb8800_priv *priv = netdev_priv(ndev);
+
+	unregister_netdev(ndev);
+
+	mdiobus_unregister(priv->mii_bus);
+
+	clk_disable_unprepare(priv->clk);
+
+	nb8800_dma_free(ndev);
+	free_netdev(ndev);
+
+	return 0;
+}
+
+static struct platform_driver nb8800_driver = {
+	.driver = {
+		.name		= "nb8800",
+		.of_match_table	= nb8800_dt_ids,
+	},
+	.probe	= nb8800_probe,
+	.remove	= nb8800_remove,
+};
+
+module_platform_driver(nb8800_driver);
+
+MODULE_DESCRIPTION("Aurora AU-NB8800 Ethernet driver");
+MODULE_AUTHOR("Mans Rullgard <mans@mansr.com>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/net/ethernet/aurora/nb8800.h b/drivers/net/ethernet/aurora/nb8800.h
new file mode 100644
index 000000000000..e5adbc2aac9f
--- /dev/null
+++ b/drivers/net/ethernet/aurora/nb8800.h
@@ -0,0 +1,316 @@
+#ifndef _NB8800_H_
+#define _NB8800_H_
+
+#include <linux/types.h>
+#include <linux/skbuff.h>
+#include <linux/phy.h>
+#include <linux/clk.h>
+#include <linux/bitops.h>
+
+#define RX_DESC_COUNT			256
+#define TX_DESC_COUNT			256
+
+#define NB8800_DESC_LOW			4
+
+#define RX_BUF_SIZE			1552
+
+#define RX_COPYBREAK			256
+#define RX_COPYHDR			128
+
+#define MAX_MDC_CLOCK			2500000
+
+/* Stargate Solutions SSN8800 core registers */
+#define NB8800_TX_CTL1			0x000
+#define TX_TPD				BIT(5)
+#define TX_APPEND_FCS			BIT(4)
+#define TX_PAD_EN			BIT(3)
+#define TX_RETRY_EN			BIT(2)
+#define TX_EN				BIT(0)
+
+#define NB8800_TX_CTL2			0x001
+
+#define NB8800_RX_CTL			0x004
+#define RX_BC_DISABLE			BIT(7)
+#define RX_RUNT				BIT(6)
+#define RX_AF_EN			BIT(5)
+#define RX_PAUSE_EN			BIT(3)
+#define RX_SEND_CRC			BIT(2)
+#define RX_PAD_STRIP			BIT(1)
+#define RX_EN				BIT(0)
+
+#define NB8800_RANDOM_SEED		0x008
+#define NB8800_TX_SDP			0x14
+#define NB8800_TX_TPDP1			0x18
+#define NB8800_TX_TPDP2			0x19
+#define NB8800_SLOT_TIME		0x1c
+
+#define NB8800_MDIO_CMD			0x020
+#define MDIO_CMD_GO			BIT(31)
+#define MDIO_CMD_WR			BIT(26)
+#define MDIO_CMD_ADDR(x)		((x) << 21)
+#define MDIO_CMD_REG(x)			((x) << 16)
+#define MDIO_CMD_DATA(x)		((x) <<	 0)
+
+#define NB8800_MDIO_STS			0x024
+#define MDIO_STS_ERR			BIT(31)
+
+#define NB8800_MC_ADDR(i)		(0x028 + (i))
+#define NB8800_MC_INIT			0x02e
+#define NB8800_UC_ADDR(i)		(0x03c + (i))
+
+#define NB8800_MAC_MODE			0x044
+#define RGMII_MODE			BIT(7)
+#define HALF_DUPLEX			BIT(4)
+#define BURST_EN			BIT(3)
+#define LOOPBACK_EN			BIT(2)
+#define GMAC_MODE			BIT(0)
+
+#define NB8800_IC_THRESHOLD		0x050
+#define NB8800_PE_THRESHOLD		0x051
+#define NB8800_PF_THRESHOLD		0x052
+#define NB8800_TX_BUFSIZE		0x054
+#define NB8800_FIFO_CTL			0x056
+#define NB8800_PQ1			0x060
+#define NB8800_PQ2			0x061
+#define NB8800_SRC_ADDR(i)		(0x06a + (i))
+#define NB8800_STAT_DATA		0x078
+#define NB8800_STAT_INDEX		0x07c
+#define NB8800_STAT_CLEAR		0x07d
+
+#define NB8800_SLEEP_MODE		0x07e
+#define SLEEP_MODE			BIT(0)
+
+#define NB8800_WAKEUP			0x07f
+#define WAKEUP				BIT(0)
+
+/* Aurora NB8800 host interface registers */
+#define NB8800_TXC_CR			0x100
+#define TCR_LK				BIT(12)
+#define TCR_DS				BIT(11)
+#define TCR_BTS(x)			(((x) & 0x7) << 8)
+#define TCR_DIE				BIT(7)
+#define TCR_TFI(x)			(((x) & 0x7) << 4)
+#define TCR_LE				BIT(3)
+#define TCR_RS				BIT(2)
+#define TCR_DM				BIT(1)
+#define TCR_EN				BIT(0)
+
+#define NB8800_TXC_SR			0x104
+#define TSR_DE				BIT(3)
+#define TSR_DI				BIT(2)
+#define TSR_TO				BIT(1)
+#define TSR_TI				BIT(0)
+
+#define NB8800_TX_SAR			0x108
+#define NB8800_TX_DESC_ADDR		0x10c
+
+#define NB8800_TX_REPORT_ADDR		0x110
+#define TX_BYTES_TRANSFERRED(x)		(((x) >> 16) & 0xffff)
+#define TX_FIRST_DEFERRAL		BIT(7)
+#define TX_EARLY_COLLISIONS(x)		(((x) >> 3) & 0xf)
+#define TX_LATE_COLLISION		BIT(2)
+#define TX_PACKET_DROPPED		BIT(1)
+#define TX_FIFO_UNDERRUN		BIT(0)
+#define IS_TX_ERROR(r)			((r) & 0x07)
+
+#define NB8800_TX_FIFO_SR		0x114
+#define NB8800_TX_ITR			0x118
+
+#define NB8800_RXC_CR			0x200
+#define RCR_FL				BIT(13)
+#define RCR_LK				BIT(12)
+#define RCR_DS				BIT(11)
+#define RCR_BTS(x)			(((x) & 7) << 8)
+#define RCR_DIE				BIT(7)
+#define RCR_RFI(x)			(((x) & 7) << 4)
+#define RCR_LE				BIT(3)
+#define RCR_RS				BIT(2)
+#define RCR_DM				BIT(1)
+#define RCR_EN				BIT(0)
+
+#define NB8800_RXC_SR			0x204
+#define RSR_DE				BIT(3)
+#define RSR_DI				BIT(2)
+#define RSR_RO				BIT(1)
+#define RSR_RI				BIT(0)
+
+#define NB8800_RX_SAR			0x208
+#define NB8800_RX_DESC_ADDR		0x20c
+
+#define NB8800_RX_REPORT_ADDR		0x210
+#define RX_BYTES_TRANSFERRED(x)		(((x) >> 16) & 0xFFFF)
+#define RX_MULTICAST_PKT		BIT(9)
+#define RX_BROADCAST_PKT		BIT(8)
+#define RX_LENGTH_ERR			BIT(7)
+#define RX_FCS_ERR			BIT(6)
+#define RX_RUNT_PKT			BIT(5)
+#define RX_FIFO_OVERRUN			BIT(4)
+#define RX_LATE_COLLISION		BIT(3)
+#define RX_ALIGNMENT_ERROR		BIT(2)
+#define RX_ERROR_MASK			0xfc
+#define IS_RX_ERROR(r)			((r) & RX_ERROR_MASK)
+
+#define NB8800_RX_FIFO_SR		0x214
+#define NB8800_RX_ITR			0x218
+
+/* Sigma Designs SMP86xx additional registers */
+#define NB8800_TANGOX_PAD_MODE		0x400
+#define PAD_MODE_MASK			0x7
+#define PAD_MODE_MII			0x0
+#define PAD_MODE_RGMII			0x1
+#define PAD_MODE_GTX_CLK_INV		BIT(3)
+#define PAD_MODE_GTX_CLK_DELAY		BIT(4)
+
+#define NB8800_TANGOX_MDIO_CLKDIV	0x420
+#define NB8800_TANGOX_RESET		0x424
+
+/* Hardware DMA descriptor */
+struct nb8800_dma_desc {
+	u32				s_addr;	/* start address */
+	u32				n_addr;	/* next descriptor address */
+	u32				r_addr;	/* report address */
+	u32				config;
+} __aligned(8);
+
+#define DESC_ID				BIT(23)
+#define DESC_EOC			BIT(22)
+#define DESC_EOF			BIT(21)
+#define DESC_LK				BIT(20)
+#define DESC_DS				BIT(19)
+#define DESC_BTS(x)			(((x) & 0x7) << 16)
+
+/* DMA descriptor and associated data for rx.
+ * Allocated from coherent memory.
+ */
+struct nb8800_rx_desc {
+	/* DMA descriptor */
+	struct nb8800_dma_desc		desc;
+
+	/* Status report filled in by hardware */
+	u32				report;
+};
+
+/* Address of buffer on rx ring */
+struct nb8800_rx_buf {
+	struct page			*page;
+	unsigned long			offset;
+};
+
+/* DMA descriptors and associated data for tx.
+ * Allocated from coherent memory.
+ */
+struct nb8800_tx_desc {
+	/* DMA descriptor.  The second descriptor is used if packet
+	 * data is unaligned.
+	 */
+	struct nb8800_dma_desc		desc[2];
+
+	/* Status report filled in by hardware */
+	u32				report;
+
+	/* Bounce buffer for initial unaligned part of packet */
+	u8				buf[8] __aligned(8);
+};
+
+/* Packet in tx queue */
+struct nb8800_tx_buf {
+	/* Currently queued skb */
+	struct sk_buff			*skb;
+
+	/* DMA address of the first descriptor */
+	dma_addr_t			dma_desc;
+
+	/* DMA address of packet data */
+	dma_addr_t			dma_addr;
+
+	/* Length of DMA mapping, less than skb->len if alignment
+	 * buffer is used.
+	 */
+	unsigned int			dma_len;
+
+	/* Number of packets in chain starting here */
+	unsigned int			chain_len;
+
+	/* Packet chain ready to be submitted to hardware */
+	bool				ready;
+};
+
+struct nb8800_priv {
+	struct napi_struct		napi;
+
+	void __iomem			*base;
+
+	/* RX DMA descriptors */
+	struct nb8800_rx_desc		*rx_descs;
+
+	/* RX buffers referenced by DMA descriptors */
+	struct nb8800_rx_buf		*rx_bufs;
+
+	/* Current end of chain */
+	u32				rx_eoc;
+
+	/* Value for rx interrupt time register in NAPI interrupt mode */
+	u32				rx_itr_irq;
+
+	/* Value for rx interrupt time register in NAPI poll mode */
+	u32				rx_itr_poll;
+
+	/* Value for config field of rx DMA descriptors */
+	u32				rx_dma_config;
+
+	/* TX DMA descriptors */
+	struct nb8800_tx_desc		*tx_descs;
+
+	/* TX packet queue */
+	struct nb8800_tx_buf		*tx_bufs;
+
+	/* Number of free tx queue entries */
+	atomic_t			tx_free;
+
+	/* First free tx queue entry */
+	u32				tx_next;
+
+	/* Next buffer to transmit */
+	u32				tx_queue;
+
+	/* Start of current packet chain */
+	struct nb8800_tx_buf		*tx_chain;
+
+	/* Next buffer to reclaim */
+	u32				tx_done;
+
+	/* Lock for DMA activation */
+	spinlock_t			tx_lock;
+
+	struct mii_bus			*mii_bus;
+	struct device_node		*phy_node;
+	struct phy_device		*phydev;
+
+	/* PHY connection type from DT */
+	int				phy_mode;
+
+	/* Current link status */
+	int				speed;
+	int				duplex;
+	int				link;
+
+	/* Pause settings */
+	bool				pause_aneg;
+	bool				pause_rx;
+	bool				pause_tx;
+
+	/* DMA base address of rx descriptors, see rx_descs above */
+	dma_addr_t			rx_desc_dma;
+
+	/* DMA base address of tx descriptors, see tx_descs above */
+	dma_addr_t			tx_desc_dma;
+
+	struct clk			*clk;
+};
+
+struct nb8800_ops {
+	int				(*init)(struct net_device *dev);
+	int				(*reset)(struct net_device *dev);
+};
+
+#endif /* _NB8800_H_ */
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index c9b036789184..2e611dc5f162 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
@@ -10139,8 +10139,8 @@ static void __bnx2x_del_vxlan_port(struct bnx2x *bp, u16 port)
 		DP(BNX2X_MSG_SP, "Invalid vxlan port\n");
 		return;
 	}
-	bp->vxlan_dst_port--;
-	if (bp->vxlan_dst_port)
+	bp->vxlan_dst_port_count--;
+	if (bp->vxlan_dst_port_count)
 		return;
 
 	if (netif_running(bp->dev)) {
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index db15c5ee09c5..bdf094fb6ef9 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -3625,6 +3625,7 @@ static int bnxt_hwrm_func_qcaps(struct bnxt *bp)
 		pf->fw_fid = le16_to_cpu(resp->fid);
 		pf->port_id = le16_to_cpu(resp->port_id);
 		memcpy(pf->mac_addr, resp->perm_mac_address, ETH_ALEN);
+		memcpy(bp->dev->dev_addr, pf->mac_addr, ETH_ALEN);
 		pf->max_rsscos_ctxs = le16_to_cpu(resp->max_rsscos_ctx);
 		pf->max_cp_rings = le16_to_cpu(resp->max_cmpl_rings);
 		pf->max_tx_rings = le16_to_cpu(resp->max_tx_rings);
@@ -3648,8 +3649,11 @@ static int bnxt_hwrm_func_qcaps(struct bnxt *bp)
 
 		vf->fw_fid = le16_to_cpu(resp->fid);
 		memcpy(vf->mac_addr, resp->perm_mac_address, ETH_ALEN);
-		if (!is_valid_ether_addr(vf->mac_addr))
-			random_ether_addr(vf->mac_addr);
+		if (is_valid_ether_addr(vf->mac_addr))
+			/* overwrite netdev dev_adr with admin VF MAC */
+			memcpy(bp->dev->dev_addr, vf->mac_addr, ETH_ALEN);
+		else
+			random_ether_addr(bp->dev->dev_addr);
 
 		vf->max_rsscos_ctxs = le16_to_cpu(resp->max_rsscos_ctx);
 		vf->max_cp_rings = le16_to_cpu(resp->max_cmpl_rings);
@@ -3880,6 +3884,8 @@ static int bnxt_alloc_rfs_vnics(struct bnxt *bp)
 #endif
 }
 
+static int bnxt_cfg_rx_mode(struct bnxt *);
+
 static int bnxt_init_chip(struct bnxt *bp, bool irq_re_init)
 {
 	int rc = 0;
@@ -3946,11 +3952,9 @@ static int bnxt_init_chip(struct bnxt *bp, bool irq_re_init)
 		bp->vnic_info[0].rx_mask |=
 				CFA_L2_SET_RX_MASK_REQ_MASK_PROMISCUOUS;
 
-	rc = bnxt_hwrm_cfa_l2_set_rx_mask(bp, 0);
-	if (rc) {
-		netdev_err(bp->dev, "HWRM cfa l2 rx mask failure rc: %x\n", rc);
+	rc = bnxt_cfg_rx_mode(bp);
+	if (rc)
 		goto err_out;
-	}
 
 	rc = bnxt_hwrm_set_coal(bp);
 	if (rc)
@@ -4865,7 +4869,7 @@ static void bnxt_set_rx_mode(struct net_device *dev)
 	}
 }
 
-static void bnxt_cfg_rx_mode(struct bnxt *bp)
+static int bnxt_cfg_rx_mode(struct bnxt *bp)
 {
 	struct net_device *dev = bp->dev;
 	struct bnxt_vnic_info *vnic = &bp->vnic_info[0];
@@ -4914,6 +4918,7 @@ static void bnxt_cfg_rx_mode(struct bnxt *bp)
 			netdev_err(bp->dev, "HWRM vnic filter failure rc: %x\n",
 				   rc);
 			vnic->uc_filter_count = i;
+			return rc;
 		}
 	}
 
@@ -4922,6 +4927,8 @@ skip_uc:
 	if (rc)
 		netdev_err(bp->dev, "HWRM cfa l2 rx mask failure rc: %x\n",
 			   rc);
+
+	return rc;
 }
 
 static netdev_features_t bnxt_fix_features(struct net_device *dev,
@@ -5212,13 +5219,27 @@ init_err:
 static int bnxt_change_mac_addr(struct net_device *dev, void *p)
 {
 	struct sockaddr *addr = p;
+	struct bnxt *bp = netdev_priv(dev);
+	int rc = 0;
 
 	if (!is_valid_ether_addr(addr->sa_data))
 		return -EADDRNOTAVAIL;
 
+#ifdef CONFIG_BNXT_SRIOV
+	if (BNXT_VF(bp) && is_valid_ether_addr(bp->vf.mac_addr))
+		return -EADDRNOTAVAIL;
+#endif
+
+	if (ether_addr_equal(addr->sa_data, dev->dev_addr))
+		return 0;
+
 	memcpy(dev->dev_addr, addr->sa_data, dev->addr_len);
+	if (netif_running(dev)) {
+		bnxt_close_nic(bp, false, false);
+		rc = bnxt_open_nic(bp, false, false);
+	}
 
-	return 0;
+	return rc;
 }
 
 /* rtnl_lock held */
@@ -5686,15 +5707,12 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	bnxt_set_tpa_flags(bp);
 	bnxt_set_ring_params(bp);
 	dflt_rings = netif_get_num_default_rss_queues();
-	if (BNXT_PF(bp)) {
-		memcpy(dev->dev_addr, bp->pf.mac_addr, ETH_ALEN);
+	if (BNXT_PF(bp))
 		bp->pf.max_irqs = max_irqs;
-	} else {
 #if defined(CONFIG_BNXT_SRIOV)
-		memcpy(dev->dev_addr, bp->vf.mac_addr, ETH_ALEN);
+	else
 		bp->vf.max_irqs = max_irqs;
 #endif
-	}
 	bnxt_get_max_rings(bp, &max_rx_rings, &max_tx_rings);
 	bp->rx_nr_rings = min_t(int, dflt_rings, max_rx_rings);
 	bp->tx_nr_rings_per_tc = min_t(int, dflt_rings, max_tx_rings);
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
index f4cf68861069..7a9af2887d8e 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
@@ -804,10 +804,9 @@ void bnxt_update_vf_mac(struct bnxt *bp)
 	if (!is_valid_ether_addr(resp->perm_mac_address))
 		goto update_vf_mac_exit;
 
-	if (ether_addr_equal(resp->perm_mac_address, bp->vf.mac_addr))
-		goto update_vf_mac_exit;
-
-	memcpy(bp->vf.mac_addr, resp->perm_mac_address, ETH_ALEN);
+	if (!ether_addr_equal(resp->perm_mac_address, bp->vf.mac_addr))
+		memcpy(bp->vf.mac_addr, resp->perm_mac_address, ETH_ALEN);
+	/* overwrite netdev dev_adr with admin VF MAC */
 	memcpy(bp->dev->dev_addr, bp->vf.mac_addr, ETH_ALEN);
 update_vf_mac_exit:
 	mutex_unlock(&bp->hwrm_cmd_lock);
diff --git a/drivers/net/ethernet/cadence/macb.c b/drivers/net/ethernet/cadence/macb.c
index 88c1e1a834f8..169059c92f80 100644
--- a/drivers/net/ethernet/cadence/macb.c
+++ b/drivers/net/ethernet/cadence/macb.c
@@ -1682,6 +1682,8 @@ static void macb_init_hw(struct macb *bp)
 	macb_set_hwaddr(bp);
 
 	config = macb_mdc_clk_div(bp);
+	if (bp->phy_interface == PHY_INTERFACE_MODE_SGMII)
+		config |= GEM_BIT(SGMIIEN) | GEM_BIT(PCSSEL);
 	config |= MACB_BF(RBOF, NET_IP_ALIGN);	/* Make eth data aligned */
 	config |= MACB_BIT(PAE);		/* PAuse Enable */
 	config |= MACB_BIT(DRFCS);		/* Discard Rx FCS */
@@ -2416,6 +2418,8 @@ static int macb_init(struct platform_device *pdev)
 	/* Set MII management clock divider */
 	val = macb_mdc_clk_div(bp);
 	val |= macb_dbw(bp);
+	if (bp->phy_interface == PHY_INTERFACE_MODE_SGMII)
+		val |= GEM_BIT(SGMIIEN) | GEM_BIT(PCSSEL);
 	macb_writel(bp, NCFGR, val);
 
 	return 0;
diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h
index 6e1faea00ca8..d83b0db77821 100644
--- a/drivers/net/ethernet/cadence/macb.h
+++ b/drivers/net/ethernet/cadence/macb.h
@@ -215,12 +215,17 @@
 /* GEM specific NCFGR bitfields. */
 #define GEM_GBE_OFFSET		10 /* Gigabit mode enable */
 #define GEM_GBE_SIZE		1
+#define GEM_PCSSEL_OFFSET	11
+#define GEM_PCSSEL_SIZE		1
 #define GEM_CLK_OFFSET		18 /* MDC clock division */
 #define GEM_CLK_SIZE		3
 #define GEM_DBW_OFFSET		21 /* Data bus width */
 #define GEM_DBW_SIZE		2
 #define GEM_RXCOEN_OFFSET	24
 #define GEM_RXCOEN_SIZE		1
+#define GEM_SGMIIEN_OFFSET	27
+#define GEM_SGMIIEN_SIZE	1
+
 
 /* Constants for data bus width. */
 #define GEM_DBW32		0 /* 32 bit AMBA AHB data bus width */
diff --git a/drivers/net/ethernet/cavium/thunder/nic.h b/drivers/net/ethernet/cavium/thunder/nic.h
index d3950b20feb9..39ca6744a4e6 100644
--- a/drivers/net/ethernet/cavium/thunder/nic.h
+++ b/drivers/net/ethernet/cavium/thunder/nic.h
@@ -120,10 +120,9 @@
  * Calculated for SCLK of 700Mhz
  * value written should be a 1/16th of what is expected
  *
- * 1 tick per 0.05usec = value of 2.2
- * This 10% would be covered in CQ timer thresh value
+ * 1 tick per 0.025usec
  */
-#define NICPF_CLK_PER_INT_TICK		2
+#define NICPF_CLK_PER_INT_TICK		1
 
 /* Time to wait before we decide that a SQ is stuck.
  *
diff --git a/drivers/net/ethernet/cavium/thunder/nic_main.c b/drivers/net/ethernet/cavium/thunder/nic_main.c
index c561fdcb79a7..4b7fd63ae57c 100644
--- a/drivers/net/ethernet/cavium/thunder/nic_main.c
+++ b/drivers/net/ethernet/cavium/thunder/nic_main.c
@@ -37,6 +37,7 @@ struct nicpf {
 #define	NIC_GET_BGX_FROM_VF_LMAC_MAP(map)	((map >> 4) & 0xF)
 #define	NIC_GET_LMAC_FROM_VF_LMAC_MAP(map)	(map & 0xF)
 	u8			vf_lmac_map[MAX_LMAC];
+	u8			lmac_cnt;
 	struct delayed_work     dwork;
 	struct workqueue_struct *check_link;
 	u8			link[MAX_LMAC];
@@ -279,6 +280,7 @@ static void nic_set_lmac_vf_mapping(struct nicpf *nic)
 	u64 lmac_credit;
 
 	nic->num_vf_en = 0;
+	nic->lmac_cnt = 0;
 
 	for (bgx = 0; bgx < NIC_MAX_BGX; bgx++) {
 		if (!(bgx_map & (1 << bgx)))
@@ -288,6 +290,7 @@ static void nic_set_lmac_vf_mapping(struct nicpf *nic)
 			nic->vf_lmac_map[next_bgx_lmac++] =
 						NIC_SET_VF_LMAC_MAP(bgx, lmac);
 		nic->num_vf_en += lmac_cnt;
+		nic->lmac_cnt += lmac_cnt;
 
 		/* Program LMAC credits */
 		lmac_credit = (1ull << 1); /* channel credit enable */
@@ -715,6 +718,13 @@ static void nic_handle_mbx_intr(struct nicpf *nic, int vf)
 	case NIC_MBOX_MSG_CFG_DONE:
 		/* Last message of VF config msg sequence */
 		nic->vf_enabled[vf] = true;
+		if (vf >= nic->lmac_cnt)
+			goto unlock;
+
+		bgx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
+		lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
+
+		bgx_lmac_rx_tx_enable(nic->node, bgx, lmac, true);
 		goto unlock;
 	case NIC_MBOX_MSG_SHUTDOWN:
 		/* First msg in VF teardown sequence */
@@ -722,6 +732,14 @@ static void nic_handle_mbx_intr(struct nicpf *nic, int vf)
 		if (vf >= nic->num_vf_en)
 			nic->sqs_used[vf - nic->num_vf_en] = false;
 		nic->pqs_vf[vf] = 0;
+
+		if (vf >= nic->lmac_cnt)
+			break;
+
+		bgx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
+		lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
+
+		bgx_lmac_rx_tx_enable(nic->node, bgx, lmac, false);
 		break;
 	case NIC_MBOX_MSG_ALLOC_SQS:
 		nic_alloc_sqs(nic, &mbx.sqs_alloc);
@@ -940,7 +958,7 @@ static void nic_poll_for_link(struct work_struct *work)
 
 	mbx.link_status.msg = NIC_MBOX_MSG_BGX_LINK_CHANGE;
 
-	for (vf = 0; vf < nic->num_vf_en; vf++) {
+	for (vf = 0; vf < nic->lmac_cnt; vf++) {
 		/* Poll only if VF is UP */
 		if (!nic->vf_enabled[vf])
 			continue;
@@ -1074,8 +1092,7 @@ static void nic_remove(struct pci_dev *pdev)
 
 	if (nic->check_link) {
 		/* Destroy work Queue */
-		cancel_delayed_work(&nic->dwork);
-		flush_workqueue(nic->check_link);
+		cancel_delayed_work_sync(&nic->dwork);
 		destroy_workqueue(nic->check_link);
 	}
 
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c b/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c
index af54c10945c2..a12b2e38cf61 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c
@@ -112,6 +112,13 @@ static int nicvf_get_settings(struct net_device *netdev,
 
 	cmd->supported = 0;
 	cmd->transceiver = XCVR_EXTERNAL;
+
+	if (!nic->link_up) {
+		cmd->duplex = DUPLEX_UNKNOWN;
+		ethtool_cmd_speed_set(cmd, SPEED_UNKNOWN);
+		return 0;
+	}
+
 	if (nic->speed <= 1000) {
 		cmd->port = PORT_MII;
 		cmd->autoneg = AUTONEG_ENABLE;
@@ -125,6 +132,13 @@ static int nicvf_get_settings(struct net_device *netdev,
 	return 0;
 }
 
+static u32 nicvf_get_link(struct net_device *netdev)
+{
+	struct nicvf *nic = netdev_priv(netdev);
+
+	return nic->link_up;
+}
+
 static void nicvf_get_drvinfo(struct net_device *netdev,
 			      struct ethtool_drvinfo *info)
 {
@@ -660,7 +674,7 @@ static int nicvf_set_channels(struct net_device *dev,
 
 static const struct ethtool_ops nicvf_ethtool_ops = {
 	.get_settings		= nicvf_get_settings,
-	.get_link		= ethtool_op_get_link,
+	.get_link		= nicvf_get_link,
 	.get_drvinfo		= nicvf_get_drvinfo,
 	.get_msglevel		= nicvf_get_msglevel,
 	.set_msglevel		= nicvf_set_msglevel,
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
index 7f709cbdcd87..dde8dc720cd3 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
@@ -1057,6 +1057,7 @@ int nicvf_stop(struct net_device *netdev)
 
 	netif_carrier_off(netdev);
 	netif_tx_stop_all_queues(nic->netdev);
+	nic->link_up = false;
 
 	/* Teardown secondary qsets first */
 	if (!nic->sqs_mode) {
@@ -1211,9 +1212,6 @@ int nicvf_open(struct net_device *netdev)
 	nic->drv_stats.txq_stop = 0;
 	nic->drv_stats.txq_wake = 0;
 
-	netif_carrier_on(netdev);
-	netif_tx_start_all_queues(netdev);
-
 	return 0;
 cleanup:
 	nicvf_disable_intr(nic, NICVF_INTR_MBOX, 0);
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
index e404ea837727..206b6a71a545 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
@@ -592,7 +592,7 @@ void nicvf_cmp_queue_config(struct nicvf *nic, struct queue_set *qs,
 	/* Set threshold value for interrupt generation */
 	nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_THRESH, qidx, cq->thresh);
 	nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_CFG2,
-			      qidx, nic->cq_coalesce_usecs);
+			      qidx, CMP_QUEUE_TIMER_THRESH);
 }
 
 /* Configures transmit queue */
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.h b/drivers/net/ethernet/cavium/thunder/nicvf_queues.h
index fb4957d09914..033e8306e91c 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.h
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.h
@@ -76,7 +76,7 @@
 #define CMP_QSIZE		CMP_QUEUE_SIZE2
 #define CMP_QUEUE_LEN		(1ULL << (CMP_QSIZE + 10))
 #define CMP_QUEUE_CQE_THRESH	0
-#define CMP_QUEUE_TIMER_THRESH	220 /* 10usec */
+#define CMP_QUEUE_TIMER_THRESH	80 /* ~2usec */
 
 #define RBDR_SIZE		RBDR_SIZE0
 #define RCV_BUF_COUNT		(1ULL << (RBDR_SIZE + 13))
diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
index 180aa9fabf48..9df26c2263bc 100644
--- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
+++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
@@ -186,6 +186,23 @@ void bgx_set_lmac_mac(int node, int bgx_idx, int lmacid, const u8 *mac)
 }
 EXPORT_SYMBOL(bgx_set_lmac_mac);
 
+void bgx_lmac_rx_tx_enable(int node, int bgx_idx, int lmacid, bool enable)
+{
+	struct bgx *bgx = bgx_vnic[(node * MAX_BGX_PER_CN88XX) + bgx_idx];
+	u64 cfg;
+
+	if (!bgx)
+		return;
+
+	cfg = bgx_reg_read(bgx, lmacid, BGX_CMRX_CFG);
+	if (enable)
+		cfg |= CMR_PKT_RX_EN | CMR_PKT_TX_EN;
+	else
+		cfg &= ~(CMR_PKT_RX_EN | CMR_PKT_TX_EN);
+	bgx_reg_write(bgx, lmacid, BGX_CMRX_CFG, cfg);
+}
+EXPORT_SYMBOL(bgx_lmac_rx_tx_enable);
+
 static void bgx_sgmii_change_link_state(struct lmac *lmac)
 {
 	struct bgx *bgx = lmac->bgx;
@@ -612,6 +629,8 @@ static void bgx_poll_for_link(struct work_struct *work)
 		lmac->last_duplex = 1;
 	} else {
 		lmac->link_up = 0;
+		lmac->last_speed = SPEED_UNKNOWN;
+		lmac->last_duplex = DUPLEX_UNKNOWN;
 	}
 
 	if (lmac->last_link != lmac->link_up) {
@@ -654,8 +673,7 @@ static int bgx_lmac_enable(struct bgx *bgx, u8 lmacid)
 	}
 
 	/* Enable lmac */
-	bgx_reg_modify(bgx, lmacid, BGX_CMRX_CFG,
-		       CMR_EN | CMR_PKT_RX_EN | CMR_PKT_TX_EN);
+	bgx_reg_modify(bgx, lmacid, BGX_CMRX_CFG, CMR_EN);
 
 	/* Restore default cfg, incase low level firmware changed it */
 	bgx_reg_write(bgx, lmacid, BGX_CMRX_RX_DMAC_CTL, 0x03);
@@ -695,8 +713,7 @@ static void bgx_lmac_disable(struct bgx *bgx, u8 lmacid)
 	lmac = &bgx->lmac[lmacid];
 	if (lmac->check_link) {
 		/* Destroy work queue */
-		cancel_delayed_work(&lmac->dwork);
-		flush_workqueue(lmac->check_link);
+		cancel_delayed_work_sync(&lmac->dwork);
 		destroy_workqueue(lmac->check_link);
 	}
 
@@ -1009,6 +1026,9 @@ static int bgx_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	struct bgx *bgx = NULL;
 	u8 lmac;
 
+	/* Load octeon mdio driver */
+	octeon_mdiobus_force_mod_depencency();
+
 	bgx = devm_kzalloc(dev, sizeof(*bgx), GFP_KERNEL);
 	if (!bgx)
 		return -ENOMEM;
diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h
index 07b7ec66c60d..149e179363a1 100644
--- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h
+++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h
@@ -182,6 +182,8 @@ enum MCAST_MODE {
 #define BCAST_ACCEPT	1
 #define CAM_ACCEPT	1
 
+void octeon_mdiobus_force_mod_depencency(void);
+void bgx_lmac_rx_tx_enable(int node, int bgx_idx, int lmacid, bool enable);
 void bgx_add_dmac_addr(u64 dmac, int node, int bgx_idx, int lmac);
 unsigned bgx_get_map(int node);
 int bgx_get_lmac_count(int node, int bgx);
diff --git a/drivers/net/ethernet/dec/tulip/tulip_core.c b/drivers/net/ethernet/dec/tulip/tulip_core.c
index ed41559bae77..b553409e04ad 100644
--- a/drivers/net/ethernet/dec/tulip/tulip_core.c
+++ b/drivers/net/ethernet/dec/tulip/tulip_core.c
@@ -98,8 +98,7 @@ static int csr0 = 0x01A00000 | 0x4800;
 #elif defined(__mips__)
 static int csr0 = 0x00200000 | 0x4000;
 #else
-#warning Processor architecture undefined!
-static int csr0 = 0x00A00000 | 0x4800;
+static int csr0;
 #endif
 
 /* Operational parameters that usually are not changed. */
@@ -1982,6 +1981,12 @@ static int __init tulip_init (void)
 	pr_info("%s", version);
 #endif
 
+	if (!csr0) {
+		pr_warn("tulip: unknown CPU architecture, using default csr0\n");
+		/* default to 8 longword cache line alignment */
+		csr0 = 0x00A00000 | 0x4800;
+	}
+
 	/* copy module parms into globals */
 	tulip_rx_copybreak = rx_copybreak;
 	tulip_max_interrupt_work = max_interrupt_work;
diff --git a/drivers/net/ethernet/dec/tulip/winbond-840.c b/drivers/net/ethernet/dec/tulip/winbond-840.c
index 9beb3d34d4ba..3c0e4d5c5fef 100644
--- a/drivers/net/ethernet/dec/tulip/winbond-840.c
+++ b/drivers/net/ethernet/dec/tulip/winbond-840.c
@@ -907,7 +907,7 @@ static void init_registers(struct net_device *dev)
 #elif defined(CONFIG_SPARC) || defined (CONFIG_PARISC) || defined(CONFIG_ARM)
 	i |= 0x4800;
 #else
-#warning Processor architecture undefined
+	dev_warn(&dev->dev, "unknown CPU architecture, using default csr0 setting\n");
 	i |= 0x4800;
 #endif
 	iowrite32(i, ioaddr + PCIBusCfg);
diff --git a/drivers/net/ethernet/freescale/Kconfig b/drivers/net/ethernet/freescale/Kconfig
index ff76d4e9dc1b..bee32a9d9876 100644
--- a/drivers/net/ethernet/freescale/Kconfig
+++ b/drivers/net/ethernet/freescale/Kconfig
@@ -7,7 +7,8 @@ config NET_VENDOR_FREESCALE
 	default y
 	depends on FSL_SOC || QUICC_ENGINE || CPM1 || CPM2 || PPC_MPC512x || \
 		   M523x || M527x || M5272 || M528x || M520x || M532x || \
-		   ARCH_MXC || ARCH_MXS || (PPC_MPC52xx && PPC_BESTCOMM)
+		   ARCH_MXC || ARCH_MXS || (PPC_MPC52xx && PPC_BESTCOMM) || \
+		   ARCH_LAYERSCAPE
 	---help---
 	  If you have a network (Ethernet) card belonging to this class, say Y.
 
diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c
index 3e6b9b437497..7cf898455e60 100644
--- a/drivers/net/ethernet/freescale/gianfar.c
+++ b/drivers/net/ethernet/freescale/gianfar.c
@@ -647,9 +647,9 @@ static int gfar_parse_group(struct device_node *np,
 	if (model && strcasecmp(model, "FEC")) {
 		gfar_irq(grp, RX)->irq = irq_of_parse_and_map(np, 1);
 		gfar_irq(grp, ER)->irq = irq_of_parse_and_map(np, 2);
-		if (gfar_irq(grp, TX)->irq == NO_IRQ ||
-		    gfar_irq(grp, RX)->irq == NO_IRQ ||
-		    gfar_irq(grp, ER)->irq == NO_IRQ)
+		if (!gfar_irq(grp, TX)->irq ||
+		    !gfar_irq(grp, RX)->irq ||
+		    !gfar_irq(grp, ER)->irq)
 			return -EINVAL;
 	}
 
diff --git a/drivers/net/ethernet/freescale/gianfar_ptp.c b/drivers/net/ethernet/freescale/gianfar_ptp.c
index 664d0c261269..b40fba929d65 100644
--- a/drivers/net/ethernet/freescale/gianfar_ptp.c
+++ b/drivers/net/ethernet/freescale/gianfar_ptp.c
@@ -467,7 +467,7 @@ static int gianfar_ptp_probe(struct platform_device *dev)
 
 	etsects->irq = platform_get_irq(dev, 0);
 
-	if (etsects->irq == NO_IRQ) {
+	if (etsects->irq < 0) {
 		pr_err("irq not in device tree\n");
 		goto no_node;
 	}
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
index 639263d5e833..7781e80896a6 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
@@ -627,8 +627,10 @@ static netdev_tx_t fm10k_xmit_frame(struct sk_buff *skb, struct net_device *dev)
 
 		/* verify the skb head is not shared */
 		err = skb_cow_head(skb, 0);
-		if (err)
+		if (err) {
+			dev_kfree_skb(skb);
 			return NETDEV_TX_OK;
+		}
 
 		/* locate vlan header */
 		vhdr = (struct vlan_hdr *)(skb->data + ETH_HLEN);
diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
index e84c7f2634d3..ed622fa29dfa 100644
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -36,7 +36,7 @@
 
 /* Registers */
 #define MVNETA_RXQ_CONFIG_REG(q)                (0x1400 + ((q) << 2))
-#define      MVNETA_RXQ_HW_BUF_ALLOC            BIT(1)
+#define      MVNETA_RXQ_HW_BUF_ALLOC            BIT(0)
 #define      MVNETA_RXQ_PKT_OFFSET_ALL_MASK     (0xf    << 8)
 #define      MVNETA_RXQ_PKT_OFFSET_MASK(offs)   ((offs) << 8)
 #define MVNETA_RXQ_THRESHOLD_REG(q)             (0x14c0 + ((q) << 2))
@@ -62,6 +62,7 @@
 #define MVNETA_WIN_SIZE(w)                      (0x2204 + ((w) << 3))
 #define MVNETA_WIN_REMAP(w)                     (0x2280 + ((w) << 2))
 #define MVNETA_BASE_ADDR_ENABLE                 0x2290
+#define MVNETA_ACCESS_PROTECT_ENABLE            0x2294
 #define MVNETA_PORT_CONFIG                      0x2400
 #define      MVNETA_UNI_PROMISC_MODE            BIT(0)
 #define      MVNETA_DEF_RXQ(q)                  ((q) << 1)
@@ -159,7 +160,7 @@
 
 #define MVNETA_INTR_ENABLE                       0x25b8
 #define      MVNETA_TXQ_INTR_ENABLE_ALL_MASK     0x0000ff00
-#define      MVNETA_RXQ_INTR_ENABLE_ALL_MASK     0xff000000  // note: neta says it's 0x000000FF
+#define      MVNETA_RXQ_INTR_ENABLE_ALL_MASK     0x000000ff
 
 #define MVNETA_RXQ_CMD                           0x2680
 #define      MVNETA_RXQ_DISABLE_SHIFT            8
@@ -242,6 +243,7 @@
 #define MVNETA_VLAN_TAG_LEN             4
 
 #define MVNETA_CPU_D_CACHE_LINE_SIZE    32
+#define MVNETA_TX_CSUM_DEF_SIZE		1600
 #define MVNETA_TX_CSUM_MAX_SIZE		9800
 #define MVNETA_ACC_MODE_EXT		1
 
@@ -1579,12 +1581,16 @@ static int mvneta_rx(struct mvneta_port *pp, int rx_todo,
 		}
 
 		skb = build_skb(data, pp->frag_size > PAGE_SIZE ? 0 : pp->frag_size);
-		if (!skb)
-			goto err_drop_frame;
 
+		/* After refill old buffer has to be unmapped regardless
+		 * the skb is successfully built or not.
+		 */
 		dma_unmap_single(dev->dev.parent, phys_addr,
 				 MVNETA_RX_BUF_SIZE(pp->pkt_size), DMA_FROM_DEVICE);
 
+		if (!skb)
+			goto err_drop_frame;
+
 		rcvd_pkts++;
 		rcvd_bytes += rx_bytes;
 
@@ -3191,6 +3197,7 @@ static void mvneta_conf_mbus_windows(struct mvneta_port *pp,
 	}
 
 	mvreg_write(pp, MVNETA_BASE_ADDR_ENABLE, win_enable);
+	mvreg_write(pp, MVNETA_ACCESS_PROTECT_ENABLE, win_protect);
 }
 
 /* Power up the port */
@@ -3250,6 +3257,7 @@ static int mvneta_probe(struct platform_device *pdev)
 	char hw_mac_addr[ETH_ALEN];
 	const char *mac_from;
 	const char *managed;
+	int tx_csum_limit;
 	int phy_mode;
 	int err;
 	int cpu;
@@ -3350,8 +3358,21 @@ static int mvneta_probe(struct platform_device *pdev)
 		}
 	}
 
-	if (of_device_is_compatible(dn, "marvell,armada-370-neta"))
-		pp->tx_csum_limit = 1600;
+	if (!of_property_read_u32(dn, "tx-csum-limit", &tx_csum_limit)) {
+		if (tx_csum_limit < 0 ||
+		    tx_csum_limit > MVNETA_TX_CSUM_MAX_SIZE) {
+			tx_csum_limit = MVNETA_TX_CSUM_DEF_SIZE;
+			dev_info(&pdev->dev,
+				 "Wrong TX csum limit in DT, set to %dB\n",
+				 MVNETA_TX_CSUM_DEF_SIZE);
+		}
+	} else if (of_device_is_compatible(dn, "marvell,armada-370-neta")) {
+		tx_csum_limit = MVNETA_TX_CSUM_DEF_SIZE;
+	} else {
+		tx_csum_limit = MVNETA_TX_CSUM_MAX_SIZE;
+	}
+
+	pp->tx_csum_limit = tx_csum_limit;
 
 	pp->tx_ring_size = MVNETA_MAX_TXD;
 	pp->rx_ring_size = MVNETA_MAX_RXD;
diff --git a/drivers/net/ethernet/nxp/lpc_eth.c b/drivers/net/ethernet/nxp/lpc_eth.c
index b159ef8303cc..057665180f13 100644
--- a/drivers/net/ethernet/nxp/lpc_eth.c
+++ b/drivers/net/ethernet/nxp/lpc_eth.c
@@ -1326,7 +1326,7 @@ static int lpc_eth_drv_probe(struct platform_device *pdev)
 	/* Get platform resources */
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	irq = platform_get_irq(pdev, 0);
-	if ((!res) || (irq < 0) || (irq >= NR_IRQS)) {
+	if (!res || irq < 0) {
 		dev_err(&pdev->dev, "error getting resources.\n");
 		ret = -ENXIO;
 		goto err_exit;
diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c
index ee8d1ec61fab..ed5da4d47668 100644
--- a/drivers/net/ethernet/renesas/ravb_main.c
+++ b/drivers/net/ethernet/renesas/ravb_main.c
@@ -1225,7 +1225,7 @@ static int ravb_open(struct net_device *ndev)
 	/* Device init */
 	error = ravb_dmac_init(ndev);
 	if (error)
-		goto out_free_irq;
+		goto out_free_irq2;
 	ravb_emac_init(ndev);
 
 	/* Initialise PTP Clock driver */
@@ -1243,9 +1243,11 @@ static int ravb_open(struct net_device *ndev)
 out_ptp_stop:
 	/* Stop PTP Clock driver */
 	ravb_ptp_stop(ndev);
+out_free_irq2:
+	if (priv->chip_id == RCAR_GEN3)
+		free_irq(priv->emac_irq, ndev);
 out_free_irq:
 	free_irq(ndev->irq, ndev);
-	free_irq(priv->emac_irq, ndev);
 out_napi_off:
 	napi_disable(&priv->napi[RAVB_NC]);
 	napi_disable(&priv->napi[RAVB_BE]);
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c
index 7f6f4a4fcc70..58c05acc2aab 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c
@@ -299,16 +299,17 @@ static int sti_dwmac_parse_data(struct sti_dwmac *dwmac,
 	if (IS_PHY_IF_MODE_GBIT(dwmac->interface)) {
 		const char *rs;
 
+		dwmac->tx_retime_src = TX_RETIME_SRC_CLKGEN;
+
 		err = of_property_read_string(np, "st,tx-retime-src", &rs);
 		if (err < 0) {
 			dev_warn(dev, "Use internal clock source\n");
-			dwmac->tx_retime_src = TX_RETIME_SRC_CLKGEN;
-		} else if (!strcasecmp(rs, "clk_125")) {
-			dwmac->tx_retime_src = TX_RETIME_SRC_CLK_125;
-		} else if (!strcasecmp(rs, "txclk")) {
-			dwmac->tx_retime_src = TX_RETIME_SRC_TXCLK;
+		} else {
+			if (!strcasecmp(rs, "clk_125"))
+				dwmac->tx_retime_src = TX_RETIME_SRC_CLK_125;
+			else if (!strcasecmp(rs, "txclk"))
+				dwmac->tx_retime_src = TX_RETIME_SRC_TXCLK;
 		}
-
 		dwmac->speed = SPEED_1000;
 	}
 
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 64d8aa4e0cad..3c6549aee11d 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -185,7 +185,7 @@ static void stmmac_clk_csr_set(struct stmmac_priv *priv)
 			priv->clk_csr = STMMAC_CSR_100_150M;
 		else if ((clk_rate >= CSR_F_150M) && (clk_rate < CSR_F_250M))
 			priv->clk_csr = STMMAC_CSR_150_250M;
-		else if ((clk_rate >= CSR_F_250M) && (clk_rate < CSR_F_300M))
+		else if ((clk_rate >= CSR_F_250M) && (clk_rate <= CSR_F_300M))
 			priv->clk_csr = STMMAC_CSR_250_300M;
 	}
 }
@@ -2232,6 +2232,12 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit)
 
 			frame_len = priv->hw->desc->get_rx_frame_len(p, coe);
 
+			/*  check if frame_len fits the preallocated memory */
+			if (frame_len > priv->dma_buf_sz) {
+				priv->dev->stats.rx_length_errors++;
+				break;
+			}
+
 			/* ACS is set; GMAC core strips PAD/FCS for IEEE 802.3
 			 * Type frames (LLC/LLC-SNAP)
 			 */
@@ -3102,6 +3108,7 @@ int stmmac_resume(struct net_device *ndev)
 	init_dma_desc_rings(ndev, GFP_ATOMIC);
 	stmmac_hw_setup(ndev, false);
 	stmmac_init_tx_coalesce(priv);
+	stmmac_set_rx_mode(ndev);
 
 	napi_enable(&priv->napi);
 
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
index ebf6abc4853f..bba670c42e37 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
@@ -138,7 +138,6 @@ int stmmac_mdio_reset(struct mii_bus *bus)
 
 #ifdef CONFIG_OF
 	if (priv->device->of_node) {
-		int reset_gpio, active_low;
 
 		if (data->reset_gpio < 0) {
 			struct device_node *np = priv->device->of_node;
@@ -154,24 +153,23 @@ int stmmac_mdio_reset(struct mii_bus *bus)
 						"snps,reset-active-low");
 			of_property_read_u32_array(np,
 				"snps,reset-delays-us", data->delays, 3);
-		}
 
-		reset_gpio = data->reset_gpio;
-		active_low = data->active_low;
+			if (gpio_request(data->reset_gpio, "mdio-reset"))
+				return 0;
+		}
 
-		if (!gpio_request(reset_gpio, "mdio-reset")) {
-			gpio_direction_output(reset_gpio, active_low ? 1 : 0);
-			if (data->delays[0])
-				msleep(DIV_ROUND_UP(data->delays[0], 1000));
+		gpio_direction_output(data->reset_gpio,
+				      data->active_low ? 1 : 0);
+		if (data->delays[0])
+			msleep(DIV_ROUND_UP(data->delays[0], 1000));
 
-			gpio_set_value(reset_gpio, active_low ? 0 : 1);
-			if (data->delays[1])
-				msleep(DIV_ROUND_UP(data->delays[1], 1000));
+		gpio_set_value(data->reset_gpio, data->active_low ? 0 : 1);
+		if (data->delays[1])
+			msleep(DIV_ROUND_UP(data->delays[1], 1000));
 
-			gpio_set_value(reset_gpio, active_low ? 1 : 0);
-			if (data->delays[2])
-				msleep(DIV_ROUND_UP(data->delays[2], 1000));
-		}
+		gpio_set_value(data->reset_gpio, data->active_low ? 1 : 0);
+		if (data->delays[2])
+			msleep(DIV_ROUND_UP(data->delays[2], 1000));
 	}
 #endif
 
diff --git a/drivers/net/ethernet/ti/cpsw-common.c b/drivers/net/ethernet/ti/cpsw-common.c
index c08be62bceba..1562ab4151e1 100644
--- a/drivers/net/ethernet/ti/cpsw-common.c
+++ b/drivers/net/ethernet/ti/cpsw-common.c
@@ -78,6 +78,9 @@ static int cpsw_am33xx_cm_get_macid(struct device *dev, u16 offset, int slave,
 
 int ti_cm_get_macid(struct device *dev, int slave, u8 *mac_addr)
 {
+	if (of_machine_is_compatible("ti,dm8148"))
+		return cpsw_am33xx_cm_get_macid(dev, 0x630, slave, mac_addr);
+
 	if (of_machine_is_compatible("ti,am33xx"))
 		return cpsw_am33xx_cm_get_macid(dev, 0x630, slave, mac_addr);
 
diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c
index 54036ae0a388..0fc521941c71 100644
--- a/drivers/net/macvtap.c
+++ b/drivers/net/macvtap.c
@@ -498,7 +498,7 @@ static void macvtap_sock_write_space(struct sock *sk)
 	wait_queue_head_t *wqueue;
 
 	if (!sock_writeable(sk) ||
-	    !test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags))
+	    !test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &sk->sk_socket->flags))
 		return;
 
 	wqueue = sk_sleep(sk);
@@ -585,7 +585,7 @@ static unsigned int macvtap_poll(struct file *file, poll_table * wait)
 		mask |= POLLIN | POLLRDNORM;
 
 	if (sock_writeable(&q->sk) ||
-	    (!test_and_set_bit(SOCK_ASYNC_NOSPACE, &q->sock.flags) &&
+	    (!test_and_set_bit(SOCKWQ_ASYNC_NOSPACE, &q->sock.flags) &&
 	     sock_writeable(&q->sk)))
 		mask |= POLLOUT | POLLWRNORM;
 
diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c
index 07a6119121c3..3ce5d9514623 100644
--- a/drivers/net/phy/broadcom.c
+++ b/drivers/net/phy/broadcom.c
@@ -614,7 +614,7 @@ static struct mdio_device_id __maybe_unused broadcom_tbl[] = {
 	{ PHY_ID_BCM5461, 0xfffffff0 },
 	{ PHY_ID_BCM54616S, 0xfffffff0 },
 	{ PHY_ID_BCM5464, 0xfffffff0 },
-	{ PHY_ID_BCM5482, 0xfffffff0 },
+	{ PHY_ID_BCM5481, 0xfffffff0 },
 	{ PHY_ID_BCM5482, 0xfffffff0 },
 	{ PHY_ID_BCM50610, 0xfffffff0 },
 	{ PHY_ID_BCM50610M, 0xfffffff0 },
diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
index 48ce6ef400fe..47cd306dbb3c 100644
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@@ -448,7 +448,8 @@ int phy_mii_ioctl(struct phy_device *phydev, struct ifreq *ifr, int cmd)
 		mdiobus_write(phydev->bus, mii_data->phy_id,
 			      mii_data->reg_num, val);
 
-		if (mii_data->reg_num == MII_BMCR &&
+		if (mii_data->phy_id == phydev->addr &&
+		    mii_data->reg_num == MII_BMCR &&
 		    val & BMCR_RESET)
 			return phy_init_hw(phydev);
 
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index b1878faea397..f0db770e8b2f 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -1040,7 +1040,7 @@ static unsigned int tun_chr_poll(struct file *file, poll_table *wait)
 		mask |= POLLIN | POLLRDNORM;
 
 	if (sock_writeable(sk) ||
-	    (!test_and_set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags) &&
+	    (!test_and_set_bit(SOCKWQ_ASYNC_NOSPACE, &sk->sk_socket->flags) &&
 	     sock_writeable(sk)))
 		mask |= POLLOUT | POLLWRNORM;
 
@@ -1488,7 +1488,7 @@ static void tun_sock_write_space(struct sock *sk)
 	if (!sock_writeable(sk))
 		return;
 
-	if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags))
+	if (!test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &sk->sk_socket->flags))
 		return;
 
 	wqueue = sk_sleep(sk);
diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c
index a187f08113ec..3b1ba8237768 100644
--- a/drivers/net/usb/cdc_ncm.c
+++ b/drivers/net/usb/cdc_ncm.c
@@ -691,7 +691,6 @@ static void cdc_ncm_free(struct cdc_ncm_ctx *ctx)
 
 int cdc_ncm_bind_common(struct usbnet *dev, struct usb_interface *intf, u8 data_altsetting, int drvflags)
 {
-	const struct usb_cdc_union_desc *union_desc = NULL;
 	struct cdc_ncm_ctx *ctx;
 	struct usb_driver *driver;
 	u8 *buf;
@@ -725,15 +724,16 @@ int cdc_ncm_bind_common(struct usbnet *dev, struct usb_interface *intf, u8 data_
 	/* parse through descriptors associated with control interface */
 	cdc_parse_cdc_header(&hdr, intf, buf, len);
 
-	ctx->data = usb_ifnum_to_if(dev->udev,
-				    hdr.usb_cdc_union_desc->bSlaveInterface0);
+	if (hdr.usb_cdc_union_desc)
+		ctx->data = usb_ifnum_to_if(dev->udev,
+					    hdr.usb_cdc_union_desc->bSlaveInterface0);
 	ctx->ether_desc = hdr.usb_cdc_ether_desc;
 	ctx->func_desc = hdr.usb_cdc_ncm_desc;
 	ctx->mbim_desc = hdr.usb_cdc_mbim_desc;
 	ctx->mbim_extended_desc = hdr.usb_cdc_mbim_extended_desc;
 
 	/* some buggy devices have an IAD but no CDC Union */
-	if (!union_desc && intf->intf_assoc && intf->intf_assoc->bInterfaceCount == 2) {
+	if (!hdr.usb_cdc_union_desc && intf->intf_assoc && intf->intf_assoc->bInterfaceCount == 2) {
 		ctx->data = usb_ifnum_to_if(dev->udev, intf->cur_altsetting->desc.bInterfaceNumber + 1);
 		dev_dbg(&intf->dev, "CDC Union missing - got slave from IAD\n");
 	}
diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c
index 34799eaace41..9a5be8b85186 100644
--- a/drivers/net/usb/qmi_wwan.c
+++ b/drivers/net/usb/qmi_wwan.c
@@ -725,6 +725,7 @@ static const struct usb_device_id products[] = {
 	{QMI_FIXED_INTF(0x2357, 0x9000, 4)},	/* TP-LINK MA260 */
 	{QMI_FIXED_INTF(0x1bc7, 0x1200, 5)},	/* Telit LE920 */
 	{QMI_FIXED_INTF(0x1bc7, 0x1201, 2)},	/* Telit LE920 */
+	{QMI_FIXED_INTF(0x1c9e, 0x9b01, 3)},	/* XS Stick W100-2 from 4G Systems */
 	{QMI_FIXED_INTF(0x0b3c, 0xc000, 4)},	/* Olivetti Olicard 100 */
 	{QMI_FIXED_INTF(0x0b3c, 0xc001, 4)},	/* Olivetti Olicard 120 */
 	{QMI_FIXED_INTF(0x0b3c, 0xc002, 4)},	/* Olivetti Olicard 140 */
diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c
index 899ea4288197..417903715437 100644
--- a/drivers/net/vmxnet3/vmxnet3_drv.c
+++ b/drivers/net/vmxnet3/vmxnet3_drv.c
@@ -587,6 +587,12 @@ vmxnet3_rq_alloc_rx_buf(struct vmxnet3_rx_queue *rq, u32 ring_idx,
 						&adapter->pdev->dev,
 						rbi->skb->data, rbi->len,
 						PCI_DMA_FROMDEVICE);
+				if (dma_mapping_error(&adapter->pdev->dev,
+						      rbi->dma_addr)) {
+					dev_kfree_skb_any(rbi->skb);
+					rq->stats.rx_buf_alloc_failure++;
+					break;
+				}
 			} else {
 				/* rx buffer skipped by the device */
 			}
@@ -605,13 +611,18 @@ vmxnet3_rq_alloc_rx_buf(struct vmxnet3_rx_queue *rq, u32 ring_idx,
 						&adapter->pdev->dev,
 						rbi->page, 0, PAGE_SIZE,
 						PCI_DMA_FROMDEVICE);
+				if (dma_mapping_error(&adapter->pdev->dev,
+						      rbi->dma_addr)) {
+					put_page(rbi->page);
+					rq->stats.rx_buf_alloc_failure++;
+					break;
+				}
 			} else {
 				/* rx buffers skipped by the device */
 			}
 			val = VMXNET3_RXD_BTYPE_BODY << VMXNET3_RXD_BTYPE_SHIFT;
 		}
 
-		BUG_ON(rbi->dma_addr == 0);
 		gd->rxd.addr = cpu_to_le64(rbi->dma_addr);
 		gd->dword[2] = cpu_to_le32((!ring->gen << VMXNET3_RXD_GEN_SHIFT)
 					   | val | rbi->len);
@@ -655,7 +666,7 @@ vmxnet3_append_frag(struct sk_buff *skb, struct Vmxnet3_RxCompDesc *rcd,
 }
 
 
-static void
+static int
 vmxnet3_map_pkt(struct sk_buff *skb, struct vmxnet3_tx_ctx *ctx,
 		struct vmxnet3_tx_queue *tq, struct pci_dev *pdev,
 		struct vmxnet3_adapter *adapter)
@@ -715,6 +726,8 @@ vmxnet3_map_pkt(struct sk_buff *skb, struct vmxnet3_tx_ctx *ctx,
 		tbi->dma_addr = dma_map_single(&adapter->pdev->dev,
 				skb->data + buf_offset, buf_size,
 				PCI_DMA_TODEVICE);
+		if (dma_mapping_error(&adapter->pdev->dev, tbi->dma_addr))
+			return -EFAULT;
 
 		tbi->len = buf_size;
 
@@ -755,6 +768,8 @@ vmxnet3_map_pkt(struct sk_buff *skb, struct vmxnet3_tx_ctx *ctx,
 			tbi->dma_addr = skb_frag_dma_map(&adapter->pdev->dev, frag,
 							 buf_offset, buf_size,
 							 DMA_TO_DEVICE);
+			if (dma_mapping_error(&adapter->pdev->dev, tbi->dma_addr))
+				return -EFAULT;
 
 			tbi->len = buf_size;
 
@@ -782,6 +797,8 @@ vmxnet3_map_pkt(struct sk_buff *skb, struct vmxnet3_tx_ctx *ctx,
 	/* set the last buf_info for the pkt */
 	tbi->skb = skb;
 	tbi->sop_idx = ctx->sop_txd - tq->tx_ring.base;
+
+	return 0;
 }
 
 
@@ -1020,7 +1037,8 @@ vmxnet3_tq_xmit(struct sk_buff *skb, struct vmxnet3_tx_queue *tq,
 	}
 
 	/* fill tx descs related to addr & len */
-	vmxnet3_map_pkt(skb, &ctx, tq, adapter->pdev, adapter);
+	if (vmxnet3_map_pkt(skb, &ctx, tq, adapter->pdev, adapter))
+		goto unlock_drop_pkt;
 
 	/* setup the EOP desc */
 	ctx.eop_txd->dword[3] = cpu_to_le32(VMXNET3_TXD_CQ | VMXNET3_TXD_EOP);
@@ -1231,6 +1249,7 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq,
 		struct vmxnet3_rx_buf_info *rbi;
 		struct sk_buff *skb, *new_skb = NULL;
 		struct page *new_page = NULL;
+		dma_addr_t new_dma_addr;
 		int num_to_alloc;
 		struct Vmxnet3_RxDesc *rxd;
 		u32 idx, ring_idx;
@@ -1287,6 +1306,21 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq,
 				skip_page_frags = true;
 				goto rcd_done;
 			}
+			new_dma_addr = dma_map_single(&adapter->pdev->dev,
+						      new_skb->data, rbi->len,
+						      PCI_DMA_FROMDEVICE);
+			if (dma_mapping_error(&adapter->pdev->dev,
+					      new_dma_addr)) {
+				dev_kfree_skb(new_skb);
+				/* Skb allocation failed, do not handover this
+				 * skb to stack. Reuse it. Drop the existing pkt
+				 */
+				rq->stats.rx_buf_alloc_failure++;
+				ctx->skb = NULL;
+				rq->stats.drop_total++;
+				skip_page_frags = true;
+				goto rcd_done;
+			}
 
 			dma_unmap_single(&adapter->pdev->dev, rbi->dma_addr,
 					 rbi->len,
@@ -1303,9 +1337,7 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq,
 
 			/* Immediate refill */
 			rbi->skb = new_skb;
-			rbi->dma_addr = dma_map_single(&adapter->pdev->dev,
-						       rbi->skb->data, rbi->len,
-						       PCI_DMA_FROMDEVICE);
+			rbi->dma_addr = new_dma_addr;
 			rxd->addr = cpu_to_le64(rbi->dma_addr);
 			rxd->len = rbi->len;
 			if (adapter->version == 2 &&
@@ -1348,6 +1380,19 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq,
 					skip_page_frags = true;
 					goto rcd_done;
 				}
+				new_dma_addr = dma_map_page(&adapter->pdev->dev
+							, rbi->page,
+							0, PAGE_SIZE,
+							PCI_DMA_FROMDEVICE);
+				if (dma_mapping_error(&adapter->pdev->dev,
+						      new_dma_addr)) {
+					put_page(new_page);
+					rq->stats.rx_buf_alloc_failure++;
+					dev_kfree_skb(ctx->skb);
+					ctx->skb = NULL;
+					skip_page_frags = true;
+					goto rcd_done;
+				}
 
 				dma_unmap_page(&adapter->pdev->dev,
 					       rbi->dma_addr, rbi->len,
@@ -1357,10 +1402,7 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq,
 
 				/* Immediate refill */
 				rbi->page = new_page;
-				rbi->dma_addr = dma_map_page(&adapter->pdev->dev
-							, rbi->page,
-							0, PAGE_SIZE,
-							PCI_DMA_FROMDEVICE);
+				rbi->dma_addr = new_dma_addr;
 				rxd->addr = cpu_to_le64(rbi->dma_addr);
 				rxd->len = rbi->len;
 			}
@@ -2167,7 +2209,8 @@ vmxnet3_set_mc(struct net_device *netdev)
 							PCI_DMA_TODEVICE);
 			}
 
-			if (new_table_pa) {
+			if (!dma_mapping_error(&adapter->pdev->dev,
+					       new_table_pa)) {
 				new_mode |= VMXNET3_RXM_MCAST;
 				rxConf->mfTablePA = cpu_to_le64(new_table_pa);
 			} else {
@@ -3075,6 +3118,11 @@ vmxnet3_probe_device(struct pci_dev *pdev,
 	adapter->adapter_pa = dma_map_single(&adapter->pdev->dev, adapter,
 					     sizeof(struct vmxnet3_adapter),
 					     PCI_DMA_TODEVICE);
+	if (dma_mapping_error(&adapter->pdev->dev, adapter->adapter_pa)) {
+		dev_err(&pdev->dev, "Failed to map dma\n");
+		err = -EFAULT;
+		goto err_dma_map;
+	}
 	adapter->shared = dma_alloc_coherent(
 				&adapter->pdev->dev,
 				sizeof(struct Vmxnet3_DriverShared),
@@ -3233,6 +3281,7 @@ err_alloc_queue_desc:
 err_alloc_shared:
 	dma_unmap_single(&adapter->pdev->dev, adapter->adapter_pa,
 			 sizeof(struct vmxnet3_adapter), PCI_DMA_TODEVICE);
+err_dma_map:
 	free_netdev(netdev);
 	return err;
 }
diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
index 92fa3e1ea65c..4f9748457f5a 100644
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c
@@ -907,7 +907,6 @@ static int vrf_newlink(struct net *src_net, struct net_device *dev,
 		       struct nlattr *tb[], struct nlattr *data[])
 {
 	struct net_vrf *vrf = netdev_priv(dev);
-	int err;
 
 	if (!data || !data[IFLA_VRF_TABLE])
 		return -EINVAL;
@@ -916,15 +915,7 @@ static int vrf_newlink(struct net *src_net, struct net_device *dev,
 
 	dev->priv_flags |= IFF_L3MDEV_MASTER;
 
-	err = register_netdevice(dev);
-	if (err < 0)
-		goto out_fail;
-
-	return 0;
-
-out_fail:
-	free_netdev(dev);
-	return err;
+	return register_netdevice(dev);
 }
 
 static size_t vrf_nl_getsize(const struct net_device *dev)
diff --git a/drivers/net/wan/hdlc_fr.c b/drivers/net/wan/hdlc_fr.c
index e92aaf615901..89541cc90e87 100644
--- a/drivers/net/wan/hdlc_fr.c
+++ b/drivers/net/wan/hdlc_fr.c
@@ -1075,11 +1075,10 @@ static int fr_add_pvc(struct net_device *frad, unsigned int dlci, int type)
 
 	used = pvc_is_used(pvc);
 
-	if (type == ARPHRD_ETHER) {
+	if (type == ARPHRD_ETHER)
 		dev = alloc_netdev(0, "pvceth%d", NET_NAME_UNKNOWN,
 				   ether_setup);
-		dev->priv_flags &= ~IFF_TX_SKB_SHARING;
-	} else
+	else
 		dev = alloc_netdev(0, "pvc%d", NET_NAME_UNKNOWN, pvc_setup);
 
 	if (!dev) {
@@ -1088,9 +1087,10 @@ static int fr_add_pvc(struct net_device *frad, unsigned int dlci, int type)
 		return -ENOBUFS;
 	}
 
-	if (type == ARPHRD_ETHER)
+	if (type == ARPHRD_ETHER) {
+		dev->priv_flags &= ~IFF_TX_SKB_SHARING;
 		eth_hw_addr_random(dev);
-	else {
+	} else {
 		*(__be16*)dev->dev_addr = htons(dlci);
 		dlci_to_q922(dev->broadcast, dlci);
 	}
diff --git a/drivers/net/wan/x25_asy.c b/drivers/net/wan/x25_asy.c
index 5c47b011a9d7..cd39025d2abf 100644
--- a/drivers/net/wan/x25_asy.c
+++ b/drivers/net/wan/x25_asy.c
@@ -549,16 +549,12 @@ static void x25_asy_receive_buf(struct tty_struct *tty,
 
 static int x25_asy_open_tty(struct tty_struct *tty)
 {
-	struct x25_asy *sl = tty->disc_data;
+	struct x25_asy *sl;
 	int err;
 
 	if (tty->ops->write == NULL)
 		return -EOPNOTSUPP;
 
-	/* First make sure we're not already connected. */
-	if (sl && sl->magic == X25_ASY_MAGIC)
-		return -EEXIST;
-
 	/* OK.  Find a free X.25 channel to use. */
 	sl = x25_asy_alloc();
 	if (sl == NULL)
diff --git a/drivers/net/wireless/ath/ath10k/core.c b/drivers/net/wireless/ath/ath10k/core.c
index aa9bd92ac4ed..0947cc271e69 100644
--- a/drivers/net/wireless/ath/ath10k/core.c
+++ b/drivers/net/wireless/ath/ath10k/core.c
@@ -51,6 +51,7 @@ MODULE_PARM_DESC(rawmode, "Use raw 802.11 frame datapath");
 static const struct ath10k_hw_params ath10k_hw_params_list[] = {
 	{
 		.id = QCA988X_HW_2_0_VERSION,
+		.dev_id = QCA988X_2_0_DEVICE_ID,
 		.name = "qca988x hw2.0",
 		.patch_load_addr = QCA988X_HW_2_0_PATCH_LOAD_ADDR,
 		.uart_pin = 7,
@@ -69,6 +70,25 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = {
 	},
 	{
 		.id = QCA6174_HW_2_1_VERSION,
+		.dev_id = QCA6164_2_1_DEVICE_ID,
+		.name = "qca6164 hw2.1",
+		.patch_load_addr = QCA6174_HW_2_1_PATCH_LOAD_ADDR,
+		.uart_pin = 6,
+		.otp_exe_param = 0,
+		.channel_counters_freq_hz = 88000,
+		.max_probe_resp_desc_thres = 0,
+		.fw = {
+			.dir = QCA6174_HW_2_1_FW_DIR,
+			.fw = QCA6174_HW_2_1_FW_FILE,
+			.otp = QCA6174_HW_2_1_OTP_FILE,
+			.board = QCA6174_HW_2_1_BOARD_DATA_FILE,
+			.board_size = QCA6174_BOARD_DATA_SZ,
+			.board_ext_size = QCA6174_BOARD_EXT_DATA_SZ,
+		},
+	},
+	{
+		.id = QCA6174_HW_2_1_VERSION,
+		.dev_id = QCA6174_2_1_DEVICE_ID,
 		.name = "qca6174 hw2.1",
 		.patch_load_addr = QCA6174_HW_2_1_PATCH_LOAD_ADDR,
 		.uart_pin = 6,
@@ -86,6 +106,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = {
 	},
 	{
 		.id = QCA6174_HW_3_0_VERSION,
+		.dev_id = QCA6174_2_1_DEVICE_ID,
 		.name = "qca6174 hw3.0",
 		.patch_load_addr = QCA6174_HW_3_0_PATCH_LOAD_ADDR,
 		.uart_pin = 6,
@@ -103,6 +124,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = {
 	},
 	{
 		.id = QCA6174_HW_3_2_VERSION,
+		.dev_id = QCA6174_2_1_DEVICE_ID,
 		.name = "qca6174 hw3.2",
 		.patch_load_addr = QCA6174_HW_3_0_PATCH_LOAD_ADDR,
 		.uart_pin = 6,
@@ -121,6 +143,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = {
 	},
 	{
 		.id = QCA99X0_HW_2_0_DEV_VERSION,
+		.dev_id = QCA99X0_2_0_DEVICE_ID,
 		.name = "qca99x0 hw2.0",
 		.patch_load_addr = QCA99X0_HW_2_0_PATCH_LOAD_ADDR,
 		.uart_pin = 7,
@@ -139,10 +162,31 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = {
 	},
 	{
 		.id = QCA9377_HW_1_0_DEV_VERSION,
+		.dev_id = QCA9377_1_0_DEVICE_ID,
 		.name = "qca9377 hw1.0",
 		.patch_load_addr = QCA9377_HW_1_0_PATCH_LOAD_ADDR,
-		.uart_pin = 7,
+		.uart_pin = 6,
 		.otp_exe_param = 0,
+		.channel_counters_freq_hz = 88000,
+		.max_probe_resp_desc_thres = 0,
+		.fw = {
+			.dir = QCA9377_HW_1_0_FW_DIR,
+			.fw = QCA9377_HW_1_0_FW_FILE,
+			.otp = QCA9377_HW_1_0_OTP_FILE,
+			.board = QCA9377_HW_1_0_BOARD_DATA_FILE,
+			.board_size = QCA9377_BOARD_DATA_SZ,
+			.board_ext_size = QCA9377_BOARD_EXT_DATA_SZ,
+		},
+	},
+	{
+		.id = QCA9377_HW_1_1_DEV_VERSION,
+		.dev_id = QCA9377_1_0_DEVICE_ID,
+		.name = "qca9377 hw1.1",
+		.patch_load_addr = QCA9377_HW_1_0_PATCH_LOAD_ADDR,
+		.uart_pin = 6,
+		.otp_exe_param = 0,
+		.channel_counters_freq_hz = 88000,
+		.max_probe_resp_desc_thres = 0,
 		.fw = {
 			.dir = QCA9377_HW_1_0_FW_DIR,
 			.fw = QCA9377_HW_1_0_FW_FILE,
@@ -1263,7 +1307,8 @@ static int ath10k_init_hw_params(struct ath10k *ar)
 	for (i = 0; i < ARRAY_SIZE(ath10k_hw_params_list); i++) {
 		hw_params = &ath10k_hw_params_list[i];
 
-		if (hw_params->id == ar->target_version)
+		if (hw_params->id == ar->target_version &&
+		    hw_params->dev_id == ar->dev_id)
 			break;
 	}
 
diff --git a/drivers/net/wireless/ath/ath10k/core.h b/drivers/net/wireless/ath/ath10k/core.h
index 018c64f4fd25..858d75f49a9f 100644
--- a/drivers/net/wireless/ath/ath10k/core.h
+++ b/drivers/net/wireless/ath/ath10k/core.h
@@ -636,6 +636,7 @@ struct ath10k {
 
 	struct ath10k_hw_params {
 		u32 id;
+		u16 dev_id;
 		const char *name;
 		u32 patch_load_addr;
 		int uart_pin;
diff --git a/drivers/net/wireless/ath/ath10k/hw.h b/drivers/net/wireless/ath/ath10k/hw.h
index 39966a05c1cc..713c2bcea178 100644
--- a/drivers/net/wireless/ath/ath10k/hw.h
+++ b/drivers/net/wireless/ath/ath10k/hw.h
@@ -22,6 +22,12 @@
 
 #define ATH10K_FW_DIR			"ath10k"
 
+#define QCA988X_2_0_DEVICE_ID   (0x003c)
+#define QCA6164_2_1_DEVICE_ID   (0x0041)
+#define QCA6174_2_1_DEVICE_ID   (0x003e)
+#define QCA99X0_2_0_DEVICE_ID   (0x0040)
+#define QCA9377_1_0_DEVICE_ID   (0x0042)
+
 /* QCA988X 1.0 definitions (unsupported) */
 #define QCA988X_HW_1_0_CHIP_ID_REV	0x0
 
@@ -42,6 +48,10 @@
 #define QCA6174_HW_3_0_VERSION		0x05020000
 #define QCA6174_HW_3_2_VERSION		0x05030000
 
+/* QCA9377 target BMI version signatures */
+#define QCA9377_HW_1_0_DEV_VERSION	0x05020000
+#define QCA9377_HW_1_1_DEV_VERSION	0x05020001
+
 enum qca6174_pci_rev {
 	QCA6174_PCI_REV_1_1 = 0x11,
 	QCA6174_PCI_REV_1_3 = 0x13,
@@ -60,6 +70,11 @@ enum qca6174_chip_id_rev {
 	QCA6174_HW_3_2_CHIP_ID_REV = 10,
 };
 
+enum qca9377_chip_id_rev {
+	QCA9377_HW_1_0_CHIP_ID_REV = 0x0,
+	QCA9377_HW_1_1_CHIP_ID_REV = 0x1,
+};
+
 #define QCA6174_HW_2_1_FW_DIR		"ath10k/QCA6174/hw2.1"
 #define QCA6174_HW_2_1_FW_FILE		"firmware.bin"
 #define QCA6174_HW_2_1_OTP_FILE		"otp.bin"
@@ -85,8 +100,6 @@ enum qca6174_chip_id_rev {
 #define QCA99X0_HW_2_0_PATCH_LOAD_ADDR	0x1234
 
 /* QCA9377 1.0 definitions */
-#define QCA9377_HW_1_0_DEV_VERSION     0x05020001
-#define QCA9377_HW_1_0_CHIP_ID_REV     0x1
 #define QCA9377_HW_1_0_FW_DIR          ATH10K_FW_DIR "/QCA9377/hw1.0"
 #define QCA9377_HW_1_0_FW_FILE         "firmware.bin"
 #define QCA9377_HW_1_0_OTP_FILE        "otp.bin"
diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c
index a7411fe90cc4..95a55405ebf0 100644
--- a/drivers/net/wireless/ath/ath10k/mac.c
+++ b/drivers/net/wireless/ath/ath10k/mac.c
@@ -4225,7 +4225,7 @@ static int ath10k_config(struct ieee80211_hw *hw, u32 changed)
 
 static u32 get_nss_from_chainmask(u16 chain_mask)
 {
-	if ((chain_mask & 0x15) == 0x15)
+	if ((chain_mask & 0xf) == 0xf)
 		return 4;
 	else if ((chain_mask & 0x7) == 0x7)
 		return 3;
diff --git a/drivers/net/wireless/ath/ath10k/pci.c b/drivers/net/wireless/ath/ath10k/pci.c
index 3fca200b986c..930785a724e1 100644
--- a/drivers/net/wireless/ath/ath10k/pci.c
+++ b/drivers/net/wireless/ath/ath10k/pci.c
@@ -57,12 +57,6 @@ MODULE_PARM_DESC(reset_mode, "0: auto, 1: warm only (default: 0)");
 #define ATH10K_PCI_TARGET_WAIT 3000
 #define ATH10K_PCI_NUM_WARM_RESET_ATTEMPTS 3
 
-#define QCA988X_2_0_DEVICE_ID	(0x003c)
-#define QCA6164_2_1_DEVICE_ID	(0x0041)
-#define QCA6174_2_1_DEVICE_ID	(0x003e)
-#define QCA99X0_2_0_DEVICE_ID	(0x0040)
-#define QCA9377_1_0_DEVICE_ID	(0x0042)
-
 static const struct pci_device_id ath10k_pci_id_table[] = {
 	{ PCI_VDEVICE(ATHEROS, QCA988X_2_0_DEVICE_ID) }, /* PCI-E QCA988X V2 */
 	{ PCI_VDEVICE(ATHEROS, QCA6164_2_1_DEVICE_ID) }, /* PCI-E QCA6164 V2.1 */
@@ -92,7 +86,9 @@ static const struct ath10k_pci_supp_chip ath10k_pci_supp_chips[] = {
 	{ QCA6174_2_1_DEVICE_ID, QCA6174_HW_3_2_CHIP_ID_REV },
 
 	{ QCA99X0_2_0_DEVICE_ID, QCA99X0_HW_2_0_CHIP_ID_REV },
+
 	{ QCA9377_1_0_DEVICE_ID, QCA9377_HW_1_0_CHIP_ID_REV },
+	{ QCA9377_1_0_DEVICE_ID, QCA9377_HW_1_1_CHIP_ID_REV },
 };
 
 static void ath10k_pci_buffer_cleanup(struct ath10k *ar);
@@ -111,8 +107,9 @@ static void ath10k_pci_htc_tx_cb(struct ath10k_ce_pipe *ce_state);
 static void ath10k_pci_htc_rx_cb(struct ath10k_ce_pipe *ce_state);
 static void ath10k_pci_htt_tx_cb(struct ath10k_ce_pipe *ce_state);
 static void ath10k_pci_htt_rx_cb(struct ath10k_ce_pipe *ce_state);
+static void ath10k_pci_htt_htc_rx_cb(struct ath10k_ce_pipe *ce_state);
 
-static const struct ce_attr host_ce_config_wlan[] = {
+static struct ce_attr host_ce_config_wlan[] = {
 	/* CE0: host->target HTC control and raw streams */
 	{
 		.flags = CE_ATTR_FLAGS,
@@ -128,7 +125,7 @@ static const struct ce_attr host_ce_config_wlan[] = {
 		.src_nentries = 0,
 		.src_sz_max = 2048,
 		.dest_nentries = 512,
-		.recv_cb = ath10k_pci_htc_rx_cb,
+		.recv_cb = ath10k_pci_htt_htc_rx_cb,
 	},
 
 	/* CE2: target->host WMI */
@@ -217,7 +214,7 @@ static const struct ce_attr host_ce_config_wlan[] = {
 };
 
 /* Target firmware's Copy Engine configuration. */
-static const struct ce_pipe_config target_ce_config_wlan[] = {
+static struct ce_pipe_config target_ce_config_wlan[] = {
 	/* CE0: host->target HTC control and raw streams */
 	{
 		.pipenum = __cpu_to_le32(0),
@@ -330,7 +327,7 @@ static const struct ce_pipe_config target_ce_config_wlan[] = {
  * This table is derived from the CE_PCI TABLE, above.
  * It is passed to the Target at startup for use by firmware.
  */
-static const struct service_to_pipe target_service_to_ce_map_wlan[] = {
+static struct service_to_pipe target_service_to_ce_map_wlan[] = {
 	{
 		__cpu_to_le32(ATH10K_HTC_SVC_ID_WMI_DATA_VO),
 		__cpu_to_le32(PIPEDIR_OUT),	/* out = UL = host -> target */
@@ -1208,6 +1205,16 @@ static void ath10k_pci_htc_rx_cb(struct ath10k_ce_pipe *ce_state)
 	ath10k_pci_process_rx_cb(ce_state, ath10k_htc_rx_completion_handler);
 }
 
+static void ath10k_pci_htt_htc_rx_cb(struct ath10k_ce_pipe *ce_state)
+{
+	/* CE4 polling needs to be done whenever CE pipe which transports
+	 * HTT Rx (target->host) is processed.
+	 */
+	ath10k_ce_per_engine_service(ce_state->ar, 4);
+
+	ath10k_pci_process_rx_cb(ce_state, ath10k_htc_rx_completion_handler);
+}
+
 /* Called by lower (CE) layer when a send to HTT Target completes. */
 static void ath10k_pci_htt_tx_cb(struct ath10k_ce_pipe *ce_state)
 {
@@ -2027,6 +2034,29 @@ static int ath10k_pci_init_config(struct ath10k *ar)
 	return 0;
 }
 
+static void ath10k_pci_override_ce_config(struct ath10k *ar)
+{
+	struct ce_attr *attr;
+	struct ce_pipe_config *config;
+
+	/* For QCA6174 we're overriding the Copy Engine 5 configuration,
+	 * since it is currently used for other feature.
+	 */
+
+	/* Override Host's Copy Engine 5 configuration */
+	attr = &host_ce_config_wlan[5];
+	attr->src_sz_max = 0;
+	attr->dest_nentries = 0;
+
+	/* Override Target firmware's Copy Engine configuration */
+	config = &target_ce_config_wlan[5];
+	config->pipedir = __cpu_to_le32(PIPEDIR_OUT);
+	config->nbytes_max = __cpu_to_le32(2048);
+
+	/* Map from service/endpoint to Copy Engine */
+	target_service_to_ce_map_wlan[15].pipenum = __cpu_to_le32(1);
+}
+
 static int ath10k_pci_alloc_pipes(struct ath10k *ar)
 {
 	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
@@ -3020,6 +3050,9 @@ static int ath10k_pci_probe(struct pci_dev *pdev,
 		goto err_core_destroy;
 	}
 
+	if (QCA_REV_6174(ar))
+		ath10k_pci_override_ce_config(ar);
+
 	ret = ath10k_pci_alloc_pipes(ar);
 	if (ret) {
 		ath10k_err(ar, "failed to allocate copy engine pipes: %d\n",
diff --git a/drivers/net/wireless/iwlwifi/iwl-7000.c b/drivers/net/wireless/iwlwifi/iwl-7000.c
index 1a73c7a1da77..bf88ec3a65fa 100644
--- a/drivers/net/wireless/iwlwifi/iwl-7000.c
+++ b/drivers/net/wireless/iwlwifi/iwl-7000.c
@@ -69,7 +69,7 @@
 #include "iwl-agn-hw.h"
 
 /* Highest firmware API version supported */
-#define IWL7260_UCODE_API_MAX	17
+#define IWL7260_UCODE_API_MAX	19
 
 /* Oldest version we won't warn about */
 #define IWL7260_UCODE_API_OK	13
diff --git a/drivers/net/wireless/iwlwifi/iwl-8000.c b/drivers/net/wireless/iwlwifi/iwl-8000.c
index 0116e5a4c393..9bcc0bf937d8 100644
--- a/drivers/net/wireless/iwlwifi/iwl-8000.c
+++ b/drivers/net/wireless/iwlwifi/iwl-8000.c
@@ -69,7 +69,7 @@
 #include "iwl-agn-hw.h"
 
 /* Highest firmware API version supported */
-#define IWL8000_UCODE_API_MAX	17
+#define IWL8000_UCODE_API_MAX	19
 
 /* Oldest version we won't warn about */
 #define IWL8000_UCODE_API_OK	13
diff --git a/drivers/net/wireless/iwlwifi/mvm/d3.c b/drivers/net/wireless/iwlwifi/mvm/d3.c
index 85ae902df7c0..29ae58ebf223 100644
--- a/drivers/net/wireless/iwlwifi/mvm/d3.c
+++ b/drivers/net/wireless/iwlwifi/mvm/d3.c
@@ -309,9 +309,9 @@ static void iwl_mvm_wowlan_program_keys(struct ieee80211_hw *hw,
 	 * to transmit packets to the AP, i.e. the PTK.
 	 */
 	if (key->flags & IEEE80211_KEY_FLAG_PAIRWISE) {
-		key->hw_key_idx = 0;
 		mvm->ptk_ivlen = key->iv_len;
 		mvm->ptk_icvlen = key->icv_len;
+		ret = iwl_mvm_set_sta_key(mvm, vif, sta, key, 0);
 	} else {
 		/*
 		 * firmware only supports TSC/RSC for a single key,
@@ -319,12 +319,11 @@ static void iwl_mvm_wowlan_program_keys(struct ieee80211_hw *hw,
 		 * with new ones -- this relies on mac80211 doing
 		 * list_add_tail().
 		 */
-		key->hw_key_idx = 1;
 		mvm->gtk_ivlen = key->iv_len;
 		mvm->gtk_icvlen = key->icv_len;
+		ret = iwl_mvm_set_sta_key(mvm, vif, sta, key, 1);
 	}
 
-	ret = iwl_mvm_set_sta_key(mvm, vif, sta, key, true);
 	data->error = ret != 0;
 out_unlock:
 	mutex_unlock(&mvm->mutex);
@@ -772,9 +771,6 @@ static int iwl_mvm_switch_to_d3(struct iwl_mvm *mvm)
 	 */
 	set_bit(IWL_MVM_STATUS_IN_HW_RESTART, &mvm->status);
 
-	/* We reprogram keys and shouldn't allocate new key indices */
-	memset(mvm->fw_key_table, 0, sizeof(mvm->fw_key_table));
-
 	mvm->ptk_ivlen = 0;
 	mvm->ptk_icvlen = 0;
 	mvm->ptk_ivlen = 0;
diff --git a/drivers/net/wireless/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/iwlwifi/mvm/mac80211.c
index 1fb684693040..e88afac51c5d 100644
--- a/drivers/net/wireless/iwlwifi/mvm/mac80211.c
+++ b/drivers/net/wireless/iwlwifi/mvm/mac80211.c
@@ -2941,6 +2941,7 @@ static int iwl_mvm_mac_set_key(struct ieee80211_hw *hw,
 {
 	struct iwl_mvm *mvm = IWL_MAC80211_GET_MVM(hw);
 	int ret;
+	u8 key_offset;
 
 	if (iwlwifi_mod_params.sw_crypto) {
 		IWL_DEBUG_MAC80211(mvm, "leave - hwcrypto disabled\n");
@@ -3006,10 +3007,14 @@ static int iwl_mvm_mac_set_key(struct ieee80211_hw *hw,
 			break;
 		}
 
+		/* in HW restart reuse the index, otherwise request a new one */
+		if (test_bit(IWL_MVM_STATUS_IN_HW_RESTART, &mvm->status))
+			key_offset = key->hw_key_idx;
+		else
+			key_offset = STA_KEY_IDX_INVALID;
+
 		IWL_DEBUG_MAC80211(mvm, "set hwcrypto key\n");
-		ret = iwl_mvm_set_sta_key(mvm, vif, sta, key,
-					  test_bit(IWL_MVM_STATUS_IN_HW_RESTART,
-						   &mvm->status));
+		ret = iwl_mvm_set_sta_key(mvm, vif, sta, key, key_offset);
 		if (ret) {
 			IWL_WARN(mvm, "set key failed\n");
 			/*
diff --git a/drivers/net/wireless/iwlwifi/mvm/sta.c b/drivers/net/wireless/iwlwifi/mvm/sta.c
index 300a249486e4..354acbde088e 100644
--- a/drivers/net/wireless/iwlwifi/mvm/sta.c
+++ b/drivers/net/wireless/iwlwifi/mvm/sta.c
@@ -1201,7 +1201,8 @@ static int iwl_mvm_set_fw_key_idx(struct iwl_mvm *mvm)
 	return max_offs;
 }
 
-static u8 iwl_mvm_get_key_sta_id(struct ieee80211_vif *vif,
+static u8 iwl_mvm_get_key_sta_id(struct iwl_mvm *mvm,
+				 struct ieee80211_vif *vif,
 				 struct ieee80211_sta *sta)
 {
 	struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif);
@@ -1218,8 +1219,21 @@ static u8 iwl_mvm_get_key_sta_id(struct ieee80211_vif *vif,
 	 * station ID, then use AP's station ID.
 	 */
 	if (vif->type == NL80211_IFTYPE_STATION &&
-	    mvmvif->ap_sta_id != IWL_MVM_STATION_COUNT)
-		return mvmvif->ap_sta_id;
+	    mvmvif->ap_sta_id != IWL_MVM_STATION_COUNT) {
+		u8 sta_id = mvmvif->ap_sta_id;
+
+		sta = rcu_dereference_protected(mvm->fw_id_to_mac_id[sta_id],
+						lockdep_is_held(&mvm->mutex));
+		/*
+		 * It is possible that the 'sta' parameter is NULL,
+		 * for example when a GTK is removed - the sta_id will then
+		 * be the AP ID, and no station was passed by mac80211.
+		 */
+		if (IS_ERR_OR_NULL(sta))
+			return IWL_MVM_STATION_COUNT;
+
+		return sta_id;
+	}
 
 	return IWL_MVM_STATION_COUNT;
 }
@@ -1227,7 +1241,8 @@ static u8 iwl_mvm_get_key_sta_id(struct ieee80211_vif *vif,
 static int iwl_mvm_send_sta_key(struct iwl_mvm *mvm,
 				struct iwl_mvm_sta *mvm_sta,
 				struct ieee80211_key_conf *keyconf, bool mcast,
-				u32 tkip_iv32, u16 *tkip_p1k, u32 cmd_flags)
+				u32 tkip_iv32, u16 *tkip_p1k, u32 cmd_flags,
+				u8 key_offset)
 {
 	struct iwl_mvm_add_sta_key_cmd cmd = {};
 	__le16 key_flags;
@@ -1269,7 +1284,7 @@ static int iwl_mvm_send_sta_key(struct iwl_mvm *mvm,
 	if (mcast)
 		key_flags |= cpu_to_le16(STA_KEY_MULTICAST);
 
-	cmd.key_offset = keyconf->hw_key_idx;
+	cmd.key_offset = key_offset;
 	cmd.key_flags = key_flags;
 	cmd.sta_id = sta_id;
 
@@ -1360,6 +1375,7 @@ static int __iwl_mvm_set_sta_key(struct iwl_mvm *mvm,
 				 struct ieee80211_vif *vif,
 				 struct ieee80211_sta *sta,
 				 struct ieee80211_key_conf *keyconf,
+				 u8 key_offset,
 				 bool mcast)
 {
 	struct iwl_mvm_sta *mvm_sta = iwl_mvm_sta_from_mac80211(sta);
@@ -1375,17 +1391,17 @@ static int __iwl_mvm_set_sta_key(struct iwl_mvm *mvm,
 		ieee80211_get_key_rx_seq(keyconf, 0, &seq);
 		ieee80211_get_tkip_rx_p1k(keyconf, addr, seq.tkip.iv32, p1k);
 		ret = iwl_mvm_send_sta_key(mvm, mvm_sta, keyconf, mcast,
-					   seq.tkip.iv32, p1k, 0);
+					   seq.tkip.iv32, p1k, 0, key_offset);
 		break;
 	case WLAN_CIPHER_SUITE_CCMP:
 	case WLAN_CIPHER_SUITE_WEP40:
 	case WLAN_CIPHER_SUITE_WEP104:
 		ret = iwl_mvm_send_sta_key(mvm, mvm_sta, keyconf, mcast,
-					   0, NULL, 0);
+					   0, NULL, 0, key_offset);
 		break;
 	default:
 		ret = iwl_mvm_send_sta_key(mvm, mvm_sta, keyconf, mcast,
-					   0, NULL, 0);
+					   0, NULL, 0, key_offset);
 	}
 
 	return ret;
@@ -1433,7 +1449,7 @@ int iwl_mvm_set_sta_key(struct iwl_mvm *mvm,
 			struct ieee80211_vif *vif,
 			struct ieee80211_sta *sta,
 			struct ieee80211_key_conf *keyconf,
-			bool have_key_offset)
+			u8 key_offset)
 {
 	bool mcast = !(keyconf->flags & IEEE80211_KEY_FLAG_PAIRWISE);
 	u8 sta_id;
@@ -1443,7 +1459,7 @@ int iwl_mvm_set_sta_key(struct iwl_mvm *mvm,
 	lockdep_assert_held(&mvm->mutex);
 
 	/* Get the station id from the mvm local station table */
-	sta_id = iwl_mvm_get_key_sta_id(vif, sta);
+	sta_id = iwl_mvm_get_key_sta_id(mvm, vif, sta);
 	if (sta_id == IWL_MVM_STATION_COUNT) {
 		IWL_ERR(mvm, "Failed to find station id\n");
 		return -EINVAL;
@@ -1470,18 +1486,25 @@ int iwl_mvm_set_sta_key(struct iwl_mvm *mvm,
 	if (WARN_ON_ONCE(iwl_mvm_sta_from_mac80211(sta)->vif != vif))
 		return -EINVAL;
 
-	if (!have_key_offset) {
-		/*
-		 * The D3 firmware hardcodes the PTK offset to 0, so we have to
-		 * configure it there. As a result, this workaround exists to
-		 * let the caller set the key offset (hw_key_idx), see d3.c.
-		 */
-		keyconf->hw_key_idx = iwl_mvm_set_fw_key_idx(mvm);
-		if (keyconf->hw_key_idx == STA_KEY_IDX_INVALID)
+	/* If the key_offset is not pre-assigned, we need to find a
+	 * new offset to use.  In normal cases, the offset is not
+	 * pre-assigned, but during HW_RESTART we want to reuse the
+	 * same indices, so we pass them when this function is called.
+	 *
+	 * In D3 entry, we need to hardcoded the indices (because the
+	 * firmware hardcodes the PTK offset to 0).  In this case, we
+	 * need to make sure we don't overwrite the hw_key_idx in the
+	 * keyconf structure, because otherwise we cannot configure
+	 * the original ones back when resuming.
+	 */
+	if (key_offset == STA_KEY_IDX_INVALID) {
+		key_offset  = iwl_mvm_set_fw_key_idx(mvm);
+		if (key_offset == STA_KEY_IDX_INVALID)
 			return -ENOSPC;
+		keyconf->hw_key_idx = key_offset;
 	}
 
-	ret = __iwl_mvm_set_sta_key(mvm, vif, sta, keyconf, mcast);
+	ret = __iwl_mvm_set_sta_key(mvm, vif, sta, keyconf, key_offset, mcast);
 	if (ret) {
 		__clear_bit(keyconf->hw_key_idx, mvm->fw_key_table);
 		goto end;
@@ -1495,7 +1518,8 @@ int iwl_mvm_set_sta_key(struct iwl_mvm *mvm,
 	 */
 	if (keyconf->cipher == WLAN_CIPHER_SUITE_WEP40 ||
 	    keyconf->cipher == WLAN_CIPHER_SUITE_WEP104) {
-		ret = __iwl_mvm_set_sta_key(mvm, vif, sta, keyconf, !mcast);
+		ret = __iwl_mvm_set_sta_key(mvm, vif, sta, keyconf,
+					    key_offset, !mcast);
 		if (ret) {
 			__clear_bit(keyconf->hw_key_idx, mvm->fw_key_table);
 			__iwl_mvm_remove_sta_key(mvm, sta_id, keyconf, mcast);
@@ -1521,7 +1545,7 @@ int iwl_mvm_remove_sta_key(struct iwl_mvm *mvm,
 	lockdep_assert_held(&mvm->mutex);
 
 	/* Get the station id from the mvm local station table */
-	sta_id = iwl_mvm_get_key_sta_id(vif, sta);
+	sta_id = iwl_mvm_get_key_sta_id(mvm, vif, sta);
 
 	IWL_DEBUG_WEP(mvm, "mvm remove dynamic key: idx=%d sta=%d\n",
 		      keyconf->keyidx, sta_id);
@@ -1547,24 +1571,6 @@ int iwl_mvm_remove_sta_key(struct iwl_mvm *mvm,
 		return 0;
 	}
 
-	/*
-	 * It is possible that the 'sta' parameter is NULL, and thus
-	 * there is a need to retrieve the sta from the local station table,
-	 * for example when a GTK is removed (where the sta_id will then be
-	 * the AP ID, and no station was passed by mac80211.)
-	 */
-	if (!sta) {
-		sta = rcu_dereference_protected(mvm->fw_id_to_mac_id[sta_id],
-						lockdep_is_held(&mvm->mutex));
-		if (!sta) {
-			IWL_ERR(mvm, "Invalid station id\n");
-			return -EINVAL;
-		}
-	}
-
-	if (WARN_ON_ONCE(iwl_mvm_sta_from_mac80211(sta)->vif != vif))
-		return -EINVAL;
-
 	ret = __iwl_mvm_remove_sta_key(mvm, sta_id, keyconf, mcast);
 	if (ret)
 		return ret;
@@ -1584,7 +1590,7 @@ void iwl_mvm_update_tkip_key(struct iwl_mvm *mvm,
 			     u16 *phase1key)
 {
 	struct iwl_mvm_sta *mvm_sta;
-	u8 sta_id = iwl_mvm_get_key_sta_id(vif, sta);
+	u8 sta_id = iwl_mvm_get_key_sta_id(mvm, vif, sta);
 	bool mcast = !(keyconf->flags & IEEE80211_KEY_FLAG_PAIRWISE);
 
 	if (WARN_ON_ONCE(sta_id == IWL_MVM_STATION_COUNT))
@@ -1602,7 +1608,7 @@ void iwl_mvm_update_tkip_key(struct iwl_mvm *mvm,
 
 	mvm_sta = iwl_mvm_sta_from_mac80211(sta);
 	iwl_mvm_send_sta_key(mvm, mvm_sta, keyconf, mcast,
-			     iv32, phase1key, CMD_ASYNC);
+			     iv32, phase1key, CMD_ASYNC, keyconf->hw_key_idx);
 	rcu_read_unlock();
 }
 
diff --git a/drivers/net/wireless/iwlwifi/mvm/sta.h b/drivers/net/wireless/iwlwifi/mvm/sta.h
index eedb215eba3f..0631cc0a6d3c 100644
--- a/drivers/net/wireless/iwlwifi/mvm/sta.h
+++ b/drivers/net/wireless/iwlwifi/mvm/sta.h
@@ -365,8 +365,8 @@ int iwl_mvm_rm_sta_id(struct iwl_mvm *mvm,
 int iwl_mvm_set_sta_key(struct iwl_mvm *mvm,
 			struct ieee80211_vif *vif,
 			struct ieee80211_sta *sta,
-			struct ieee80211_key_conf *key,
-			bool have_key_offset);
+			struct ieee80211_key_conf *keyconf,
+			u8 key_offset);
 int iwl_mvm_remove_sta_key(struct iwl_mvm *mvm,
 			   struct ieee80211_vif *vif,
 			   struct ieee80211_sta *sta,
diff --git a/drivers/net/wireless/iwlwifi/pcie/drv.c b/drivers/net/wireless/iwlwifi/pcie/drv.c
index 644b58bc5226..639761fb2bfb 100644
--- a/drivers/net/wireless/iwlwifi/pcie/drv.c
+++ b/drivers/net/wireless/iwlwifi/pcie/drv.c
@@ -423,14 +423,21 @@ static const struct pci_device_id iwl_hw_card_ids[] = {
 /* 8000 Series */
 	{IWL_PCI_DEVICE(0x24F3, 0x0010, iwl8260_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x24F3, 0x1010, iwl8260_2ac_cfg)},
+	{IWL_PCI_DEVICE(0x24F3, 0x0130, iwl8260_2ac_cfg)},
+	{IWL_PCI_DEVICE(0x24F3, 0x1130, iwl8260_2ac_cfg)},
+	{IWL_PCI_DEVICE(0x24F3, 0x0132, iwl8260_2ac_cfg)},
+	{IWL_PCI_DEVICE(0x24F3, 0x1132, iwl8260_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x24F3, 0x0110, iwl8260_2ac_cfg)},
+	{IWL_PCI_DEVICE(0x24F3, 0x01F0, iwl8260_2ac_cfg)},
+	{IWL_PCI_DEVICE(0x24F3, 0x0012, iwl8260_2ac_cfg)},
+	{IWL_PCI_DEVICE(0x24F3, 0x1012, iwl8260_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x24F3, 0x1110, iwl8260_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x24F3, 0x0050, iwl8260_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x24F3, 0x0250, iwl8260_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x24F3, 0x1050, iwl8260_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x24F3, 0x0150, iwl8260_2ac_cfg)},
+	{IWL_PCI_DEVICE(0x24F3, 0x1150, iwl8260_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x24F4, 0x0030, iwl8260_2ac_cfg)},
-	{IWL_PCI_DEVICE(0x24F4, 0x1130, iwl8260_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x24F4, 0x1030, iwl8260_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x24F3, 0xC010, iwl8260_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x24F3, 0xC110, iwl8260_2ac_cfg)},
@@ -438,18 +445,28 @@ static const struct pci_device_id iwl_hw_card_ids[] = {
 	{IWL_PCI_DEVICE(0x24F3, 0xC050, iwl8260_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x24F3, 0xD050, iwl8260_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x24F3, 0x8010, iwl8260_2ac_cfg)},
+	{IWL_PCI_DEVICE(0x24F3, 0x8110, iwl8260_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x24F3, 0x9010, iwl8260_2ac_cfg)},
+	{IWL_PCI_DEVICE(0x24F3, 0x9110, iwl8260_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x24F4, 0x8030, iwl8260_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x24F4, 0x9030, iwl8260_2ac_cfg)},
+	{IWL_PCI_DEVICE(0x24F3, 0x8130, iwl8260_2ac_cfg)},
+	{IWL_PCI_DEVICE(0x24F3, 0x9130, iwl8260_2ac_cfg)},
+	{IWL_PCI_DEVICE(0x24F3, 0x8132, iwl8260_2ac_cfg)},
+	{IWL_PCI_DEVICE(0x24F3, 0x9132, iwl8260_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x24F3, 0x8050, iwl8260_2ac_cfg)},
+	{IWL_PCI_DEVICE(0x24F3, 0x8150, iwl8260_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x24F3, 0x9050, iwl8260_2ac_cfg)},
+	{IWL_PCI_DEVICE(0x24F3, 0x9150, iwl8260_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x24F3, 0x0004, iwl8260_2n_cfg)},
+	{IWL_PCI_DEVICE(0x24F3, 0x0044, iwl8260_2n_cfg)},
 	{IWL_PCI_DEVICE(0x24F5, 0x0010, iwl4165_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x24F6, 0x0030, iwl4165_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x24F3, 0x0810, iwl8260_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x24F3, 0x0910, iwl8260_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x24F3, 0x0850, iwl8260_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x24F3, 0x0950, iwl8260_2ac_cfg)},
+	{IWL_PCI_DEVICE(0x24F3, 0x0930, iwl8260_2ac_cfg)},
 #endif /* CONFIG_IWLMVM */
 
 	{0}
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c
index 6e9418ed90c2..bbb789f8990b 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c
@@ -2272,7 +2272,7 @@ void rtl8821ae_enable_interrupt(struct ieee80211_hw *hw)
 	struct rtl_priv *rtlpriv = rtl_priv(hw);
 	struct rtl_pci *rtlpci = rtl_pcidev(rtl_pcipriv(hw));
 
-	if (!rtlpci->int_clear)
+	if (rtlpci->int_clear)
 		rtl8821ae_clear_interrupt(hw);/*clear it here first*/
 
 	rtl_write_dword(rtlpriv, REG_HIMR, rtlpci->irq_mask[0] & 0xFFFFFFFF);
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/sw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/sw.c
index 8ee141a55bc5..142bdff4ed60 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/sw.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/sw.c
@@ -448,7 +448,7 @@ MODULE_PARM_DESC(fwlps, "Set to 1 to use FW control power save (default 1)\n");
 MODULE_PARM_DESC(msi, "Set to 1 to use MSI interrupts mode (default 1)\n");
 MODULE_PARM_DESC(debug, "Set debug level (0-5) (default 0)");
 MODULE_PARM_DESC(disable_watchdog, "Set to 1 to disable the watchdog (default 0)\n");
-MODULE_PARM_DESC(int_clear, "Set to 1 to disable interrupt clear before set (default 0)\n");
+MODULE_PARM_DESC(int_clear, "Set to 0 to disable interrupt clear before set (default 1)\n");
 
 static SIMPLE_DEV_PM_OPS(rtlwifi_pm_ops, rtl_pci_suspend, rtl_pci_resume);
 
diff --git a/drivers/nvme/host/Makefile b/drivers/nvme/host/Makefile
index 219dc206fa5f..a5fe23952586 100644
--- a/drivers/nvme/host/Makefile
+++ b/drivers/nvme/host/Makefile
@@ -1,4 +1,5 @@
 
 obj-$(CONFIG_BLK_DEV_NVME)     += nvme.o
 
-nvme-y		+= pci.o scsi.o lightnvm.o
+lightnvm-$(CONFIG_NVM)	:= lightnvm.o
+nvme-y		+= pci.o scsi.o $(lightnvm-y)
diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c
index 9202d1a468d0..06c336410235 100644
--- a/drivers/nvme/host/lightnvm.c
+++ b/drivers/nvme/host/lightnvm.c
@@ -22,8 +22,6 @@
 
 #include "nvme.h"
 
-#ifdef CONFIG_NVM
-
 #include <linux/nvme.h>
 #include <linux/bitops.h>
 #include <linux/lightnvm.h>
@@ -357,10 +355,11 @@ out:
 	return ret;
 }
 
-static int nvme_nvm_get_bb_tbl(struct request_queue *q, struct ppa_addr ppa,
+static int nvme_nvm_get_bb_tbl(struct nvm_dev *nvmdev, struct ppa_addr ppa,
 				int nr_blocks, nvm_bb_update_fn *update_bbtbl,
 				void *priv)
 {
+	struct request_queue *q = nvmdev->q;
 	struct nvme_ns *ns = q->queuedata;
 	struct nvme_dev *dev = ns->dev;
 	struct nvme_nvm_command c = {};
@@ -404,6 +403,7 @@ static int nvme_nvm_get_bb_tbl(struct request_queue *q, struct ppa_addr ppa,
 		goto out;
 	}
 
+	ppa = dev_to_generic_addr(nvmdev, ppa);
 	ret = update_bbtbl(ppa, nr_blocks, bb_tbl->blk, priv);
 	if (ret) {
 		ret = -EINTR;
@@ -571,31 +571,27 @@ void nvme_nvm_unregister(struct request_queue *q, char *disk_name)
 	nvm_unregister(disk_name);
 }
 
+/* move to shared place when used in multiple places. */
+#define PCI_VENDOR_ID_CNEX 0x1d1d
+#define PCI_DEVICE_ID_CNEX_WL 0x2807
+#define PCI_DEVICE_ID_CNEX_QEMU 0x1f1f
+
 int nvme_nvm_ns_supported(struct nvme_ns *ns, struct nvme_id_ns *id)
 {
 	struct nvme_dev *dev = ns->dev;
 	struct pci_dev *pdev = to_pci_dev(dev->dev);
 
 	/* QEMU NVMe simulator - PCI ID + Vendor specific bit */
-	if (pdev->vendor == PCI_VENDOR_ID_INTEL && pdev->device == 0x5845 &&
+	if (pdev->vendor == PCI_VENDOR_ID_CNEX &&
+				pdev->device == PCI_DEVICE_ID_CNEX_QEMU &&
 							id->vs[0] == 0x1)
 		return 1;
 
 	/* CNEX Labs - PCI ID + Vendor specific bit */
-	if (pdev->vendor == 0x1d1d && pdev->device == 0x2807 &&
+	if (pdev->vendor == PCI_VENDOR_ID_CNEX &&
+				pdev->device == PCI_DEVICE_ID_CNEX_WL &&
 							id->vs[0] == 0x1)
 		return 1;
 
 	return 0;
 }
-#else
-int nvme_nvm_register(struct request_queue *q, char *disk_name)
-{
-	return 0;
-}
-void nvme_nvm_unregister(struct request_queue *q, char *disk_name) {};
-int nvme_nvm_ns_supported(struct nvme_ns *ns, struct nvme_id_ns *id)
-{
-	return 0;
-}
-#endif /* CONFIG_NVM */
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index fdb4e5bad9ac..044253dca30a 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -136,8 +136,22 @@ int nvme_sg_io(struct nvme_ns *ns, struct sg_io_hdr __user *u_hdr);
 int nvme_sg_io32(struct nvme_ns *ns, unsigned long arg);
 int nvme_sg_get_version_num(int __user *ip);
 
+#ifdef CONFIG_NVM
 int nvme_nvm_ns_supported(struct nvme_ns *ns, struct nvme_id_ns *id);
 int nvme_nvm_register(struct request_queue *q, char *disk_name);
 void nvme_nvm_unregister(struct request_queue *q, char *disk_name);
+#else
+static inline int nvme_nvm_register(struct request_queue *q, char *disk_name)
+{
+	return 0;
+}
+
+static inline void nvme_nvm_unregister(struct request_queue *q, char *disk_name) {};
+
+static inline int nvme_nvm_ns_supported(struct nvme_ns *ns, struct nvme_id_ns *id)
+{
+	return 0;
+}
+#endif /* CONFIG_NVM */
 
 #endif /* _NVME_H */
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index f3b53af789ef..9e294ff4e652 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -2708,6 +2708,18 @@ static int nvme_dev_map(struct nvme_dev *dev)
 	dev->q_depth = min_t(int, NVME_CAP_MQES(cap) + 1, NVME_Q_DEPTH);
 	dev->db_stride = 1 << NVME_CAP_STRIDE(cap);
 	dev->dbs = ((void __iomem *)dev->bar) + 4096;
+
+	/*
+	 * Temporary fix for the Apple controller found in the MacBook8,1 and
+	 * some MacBook7,1 to avoid controller resets and data loss.
+	 */
+	if (pdev->vendor == PCI_VENDOR_ID_APPLE && pdev->device == 0x2001) {
+		dev->q_depth = 2;
+		dev_warn(dev->dev, "detected Apple NVMe controller, set "
+			"queue depth=%u to work around controller resets\n",
+			dev->q_depth);
+	}
+
 	if (readl(&dev->bar->vs) >= NVME_VS(1, 2))
 		dev->cmb = nvme_map_cmb(dev);
 
diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
index 4446fcb5effd..d7ffd66814bb 100644
--- a/drivers/pci/pci-driver.c
+++ b/drivers/pci/pci-driver.c
@@ -1146,9 +1146,21 @@ static int pci_pm_runtime_suspend(struct device *dev)
 	pci_dev->state_saved = false;
 	pci_dev->no_d3cold = false;
 	error = pm->runtime_suspend(dev);
-	suspend_report_result(pm->runtime_suspend, error);
-	if (error)
+	if (error) {
+		/*
+		 * -EBUSY and -EAGAIN is used to request the runtime PM core
+		 * to schedule a new suspend, so log the event only with debug
+		 * log level.
+		 */
+		if (error == -EBUSY || error == -EAGAIN)
+			dev_dbg(dev, "can't suspend now (%pf returned %d)\n",
+				pm->runtime_suspend, error);
+		else
+			dev_err(dev, "can't suspend (%pf returned %d)\n",
+				pm->runtime_suspend, error);
+
 		return error;
+	}
 	if (!pci_dev->d3cold_allowed)
 		pci_dev->no_d3cold = true;
 
diff --git a/drivers/pinctrl/Kconfig b/drivers/pinctrl/Kconfig
index b422e4ed73f4..312c78b27a32 100644
--- a/drivers/pinctrl/Kconfig
+++ b/drivers/pinctrl/Kconfig
@@ -5,8 +5,6 @@
 config PINCTRL
 	bool
 
-if PINCTRL
-
 menu "Pin controllers"
 	depends on PINCTRL
 
@@ -274,5 +272,3 @@ config PINCTRL_TB10X
 	select GPIOLIB
 
 endmenu
-
-endif
diff --git a/drivers/pinctrl/freescale/pinctrl-imx1-core.c b/drivers/pinctrl/freescale/pinctrl-imx1-core.c
index 88a7fac11bd4..acaf84cadca3 100644
--- a/drivers/pinctrl/freescale/pinctrl-imx1-core.c
+++ b/drivers/pinctrl/freescale/pinctrl-imx1-core.c
@@ -538,8 +538,10 @@ static int imx1_pinctrl_parse_functions(struct device_node *np,
 		func->groups[i] = child->name;
 		grp = &info->groups[grp_index++];
 		ret = imx1_pinctrl_parse_groups(child, grp, info, i++);
-		if (ret == -ENOMEM)
+		if (ret == -ENOMEM) {
+			of_node_put(child);
 			return ret;
+		}
 	}
 
 	return 0;
@@ -582,8 +584,10 @@ static int imx1_pinctrl_parse_dt(struct platform_device *pdev,
 
 	for_each_child_of_node(np, child) {
 		ret = imx1_pinctrl_parse_functions(child, info, ifunc++);
-		if (ret == -ENOMEM)
+		if (ret == -ENOMEM) {
+			of_node_put(child);
 			return -ENOMEM;
+		}
 	}
 
 	return 0;
diff --git a/drivers/pinctrl/mediatek/pinctrl-mtk-common.c b/drivers/pinctrl/mediatek/pinctrl-mtk-common.c
index f307f1d27d64..5c717275a7fa 100644
--- a/drivers/pinctrl/mediatek/pinctrl-mtk-common.c
+++ b/drivers/pinctrl/mediatek/pinctrl-mtk-common.c
@@ -747,7 +747,7 @@ static int mtk_gpio_get_direction(struct gpio_chip *chip, unsigned offset)
 	reg_addr =  mtk_get_port(pctl, offset) + pctl->devdata->dir_offset;
 	bit = BIT(offset & 0xf);
 	regmap_read(pctl->regmap1, reg_addr, &read_val);
-	return !!(read_val & bit);
+	return !(read_val & bit);
 }
 
 static int mtk_gpio_get(struct gpio_chip *chip, unsigned offset)
@@ -757,12 +757,8 @@ static int mtk_gpio_get(struct gpio_chip *chip, unsigned offset)
 	unsigned int read_val = 0;
 	struct mtk_pinctrl *pctl = dev_get_drvdata(chip->dev);
 
-	if (mtk_gpio_get_direction(chip, offset))
-		reg_addr = mtk_get_port(pctl, offset) +
-			pctl->devdata->dout_offset;
-	else
-		reg_addr = mtk_get_port(pctl, offset) +
-			pctl->devdata->din_offset;
+	reg_addr = mtk_get_port(pctl, offset) +
+		pctl->devdata->din_offset;
 
 	bit = BIT(offset & 0xf);
 	regmap_read(pctl->regmap1, reg_addr, &read_val);
@@ -997,6 +993,7 @@ static struct gpio_chip mtk_gpio_chip = {
 	.owner			= THIS_MODULE,
 	.request		= gpiochip_generic_request,
 	.free			= gpiochip_generic_free,
+	.get_direction		= mtk_gpio_get_direction,
 	.direction_input	= mtk_gpio_direction_input,
 	.direction_output	= mtk_gpio_direction_output,
 	.get			= mtk_gpio_get,
diff --git a/drivers/pinctrl/qcom/pinctrl-ssbi-gpio.c b/drivers/pinctrl/qcom/pinctrl-ssbi-gpio.c
index d809c9eaa323..19a3c3bc2f1f 100644
--- a/drivers/pinctrl/qcom/pinctrl-ssbi-gpio.c
+++ b/drivers/pinctrl/qcom/pinctrl-ssbi-gpio.c
@@ -672,7 +672,7 @@ static int pm8xxx_gpio_probe(struct platform_device *pdev)
 		return -ENOMEM;
 
 	pctrl->dev = &pdev->dev;
-	pctrl->npins = (unsigned)of_device_get_match_data(&pdev->dev);
+	pctrl->npins = (unsigned long)of_device_get_match_data(&pdev->dev);
 
 	pctrl->regmap = dev_get_regmap(pdev->dev.parent, NULL);
 	if (!pctrl->regmap) {
diff --git a/drivers/pinctrl/qcom/pinctrl-ssbi-mpp.c b/drivers/pinctrl/qcom/pinctrl-ssbi-mpp.c
index 8982027de8e8..b868ef1766a0 100644
--- a/drivers/pinctrl/qcom/pinctrl-ssbi-mpp.c
+++ b/drivers/pinctrl/qcom/pinctrl-ssbi-mpp.c
@@ -763,7 +763,7 @@ static int pm8xxx_mpp_probe(struct platform_device *pdev)
 		return -ENOMEM;
 
 	pctrl->dev = &pdev->dev;
-	pctrl->npins = (unsigned)of_device_get_match_data(&pdev->dev);
+	pctrl->npins = (unsigned long)of_device_get_match_data(&pdev->dev);
 
 	pctrl->regmap = dev_get_regmap(pdev->dev.parent, NULL);
 	if (!pctrl->regmap) {
diff --git a/drivers/pinctrl/sh-pfc/pfc-sh7734.c b/drivers/pinctrl/sh-pfc/pfc-sh7734.c
index e7deb51de7dc..9842bb106796 100644
--- a/drivers/pinctrl/sh-pfc/pfc-sh7734.c
+++ b/drivers/pinctrl/sh-pfc/pfc-sh7734.c
@@ -31,11 +31,11 @@
 	PORT_GP_12(5, fn, sfx)
 
 #undef _GP_DATA
-#define _GP_DATA(bank, pin, name, sfx)					\
+#define _GP_DATA(bank, pin, name, sfx, cfg)				\
 	PINMUX_DATA(name##_DATA, name##_FN, name##_IN, name##_OUT)
 
-#define _GP_INOUTSEL(bank, pin, name, sfx)	name##_IN, name##_OUT
-#define _GP_INDT(bank, pin, name, sfx)		name##_DATA
+#define _GP_INOUTSEL(bank, pin, name, sfx, cfg)	name##_IN, name##_OUT
+#define _GP_INDT(bank, pin, name, sfx, cfg)	name##_DATA
 #define GP_INOUTSEL(bank)	PORT_GP_32_REV(bank, _GP_INOUTSEL, unused)
 #define GP_INDT(bank)		PORT_GP_32_REV(bank, _GP_INDT, unused)
 
diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c
index 8b3130f22b42..9e03d158f411 100644
--- a/drivers/remoteproc/remoteproc_core.c
+++ b/drivers/remoteproc/remoteproc_core.c
@@ -1478,6 +1478,8 @@ module_init(remoteproc_init);
 
 static void __exit remoteproc_exit(void)
 {
+	ida_destroy(&rproc_dev_index);
+
 	rproc_exit_debugfs();
 }
 module_exit(remoteproc_exit);
diff --git a/drivers/remoteproc/remoteproc_debugfs.c b/drivers/remoteproc/remoteproc_debugfs.c
index 9d30809bb407..916af5096f57 100644
--- a/drivers/remoteproc/remoteproc_debugfs.c
+++ b/drivers/remoteproc/remoteproc_debugfs.c
@@ -156,7 +156,7 @@ rproc_recovery_write(struct file *filp, const char __user *user_buf,
 	char buf[10];
 	int ret;
 
-	if (count > sizeof(buf))
+	if (count < 1 || count > sizeof(buf))
 		return count;
 
 	ret = copy_from_user(buf, user_buf, count);
diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig
index 5f692ae40749..64eed87d34a8 100644
--- a/drivers/scsi/Kconfig
+++ b/drivers/scsi/Kconfig
@@ -364,6 +364,7 @@ config SCSI_HPSA
 	tristate "HP Smart Array SCSI driver"
 	depends on PCI && SCSI
 	select CHECK_SIGNATURE
+	select SCSI_SAS_ATTRS
 	help
 	  This driver supports HP Smart Array Controllers (circa 2009).
 	  It is a SCSI alternative to the cciss driver, which is a block
@@ -499,6 +500,7 @@ config SCSI_ADVANSYS
 	tristate "AdvanSys SCSI support"
 	depends on SCSI
 	depends on ISA || EISA || PCI
+	depends on ISA_DMA_API || !ISA
 	help
 	  This is a driver for all SCSI host adapters manufactured by
 	  AdvanSys. It is documented in the kernel source in
diff --git a/drivers/scsi/advansys.c b/drivers/scsi/advansys.c
index 519f9a4b3dad..febbd83e2ecd 100644
--- a/drivers/scsi/advansys.c
+++ b/drivers/scsi/advansys.c
@@ -7803,7 +7803,7 @@ adv_build_req(struct asc_board *boardp, struct scsi_cmnd *scp,
 		return ASC_BUSY;
 	}
 	scsiqp->sense_addr = cpu_to_le32(sense_addr);
-	scsiqp->sense_len = cpu_to_le32(SCSI_SENSE_BUFFERSIZE);
+	scsiqp->sense_len = SCSI_SENSE_BUFFERSIZE;
 
 	/* Build ADV_SCSI_REQ_Q */
 
diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c
index 323982fd00c3..82ac1cd818ac 100644
--- a/drivers/scsi/hosts.c
+++ b/drivers/scsi/hosts.c
@@ -333,6 +333,17 @@ static void scsi_host_dev_release(struct device *dev)
 		kfree(queuedata);
 	}
 
+	if (shost->shost_state == SHOST_CREATED) {
+		/*
+		 * Free the shost_dev device name here if scsi_host_alloc()
+		 * and scsi_host_put() have been called but neither
+		 * scsi_host_add() nor scsi_host_remove() has been called.
+		 * This avoids that the memory allocated for the shost_dev
+		 * name is leaked.
+		 */
+		kfree(dev_name(&shost->shost_dev));
+	}
+
 	scsi_destroy_command_freelist(shost);
 	if (shost_use_blk_mq(shost)) {
 		if (shost->tag_set.tags)
diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c
index 6a8f95808ee0..a3860367b568 100644
--- a/drivers/scsi/hpsa.c
+++ b/drivers/scsi/hpsa.c
@@ -8671,7 +8671,7 @@ static void hpsa_disable_rld_caching(struct ctlr_info *h)
 	if ((rc != 0)  || (c->err_info->CommandStatus != 0))
 		goto errout;
 
-	if (*options && HPSA_DIAG_OPTS_DISABLE_RLD_CACHING)
+	if (*options & HPSA_DIAG_OPTS_DISABLE_RLD_CACHING)
 		goto out;
 
 errout:
diff --git a/drivers/scsi/mpt3sas/Kconfig b/drivers/scsi/mpt3sas/Kconfig
index 29061467cc17..b736dbc80485 100644
--- a/drivers/scsi/mpt3sas/Kconfig
+++ b/drivers/scsi/mpt3sas/Kconfig
@@ -71,3 +71,12 @@ config SCSI_MPT3SAS_MAX_SGE
 	MAX_PHYS_SEGMENTS in most kernels.  However in SuSE kernels this
 	can be 256. However, it may decreased down to 16.  Decreasing this
 	parameter will reduce memory requirements on a per controller instance.
+
+config SCSI_MPT2SAS
+	tristate "Legacy MPT2SAS config option"
+	default n
+	select SCSI_MPT3SAS
+	depends on PCI && SCSI
+	---help---
+	Dummy config option for backwards compatiblity: configure the MPT3SAS
+	driver instead.
diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
index d95206b7e116..9ab77b06434d 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
@@ -3905,8 +3905,7 @@ scsih_qcmd(struct Scsi_Host *shost, struct scsi_cmnd *scmd)
 	 * We do not expose raid functionality to upper layer for warpdrive.
 	 */
 	if (!ioc->is_warpdrive && !scsih_is_raid(&scmd->device->sdev_gendev)
-	    && (sas_device_priv_data->flags & MPT_DEVICE_TLR_ON) &&
-	    scmd->cmd_len != 32)
+	    && sas_is_tlr_enabled(scmd->device) && scmd->cmd_len != 32)
 		mpi_control |= MPI2_SCSIIO_CONTROL_TLR_ON;
 
 	smid = mpt3sas_base_get_smid_scsiio(ioc, ioc->scsi_io_cb_idx, scmd);
diff --git a/drivers/scsi/mvsas/mv_init.c b/drivers/scsi/mvsas/mv_init.c
index 90fdf0e859e3..675e7fab0796 100644
--- a/drivers/scsi/mvsas/mv_init.c
+++ b/drivers/scsi/mvsas/mv_init.c
@@ -758,7 +758,7 @@ mvs_store_interrupt_coalescing(struct device *cdev,
 			struct device_attribute *attr,
 			const char *buffer, size_t size)
 {
-	int val = 0;
+	unsigned int val = 0;
 	struct mvs_info *mvi = NULL;
 	struct Scsi_Host *shost = class_to_shost(cdev);
 	struct sas_ha_struct *sha = SHOST_TO_SAS_HA(shost);
@@ -766,7 +766,7 @@ mvs_store_interrupt_coalescing(struct device *cdev,
 	if (buffer == NULL)
 		return size;
 
-	if (sscanf(buffer, "%d", &val) != 1)
+	if (sscanf(buffer, "%u", &val) != 1)
 		return -EINVAL;
 
 	if (val >= 0x10000) {
diff --git a/drivers/scsi/qla2xxx/qla_nx.c b/drivers/scsi/qla2xxx/qla_nx.c
index eb0cc5475c45..b6b4cfdd7620 100644
--- a/drivers/scsi/qla2xxx/qla_nx.c
+++ b/drivers/scsi/qla2xxx/qla_nx.c
@@ -433,7 +433,7 @@ qla82xx_pci_get_crb_addr_2M(struct qla_hw_data *ha, ulong off_in,
 	if (off_in < QLA82XX_PCI_CRBSPACE)
 		return -1;
 
-	*off_out = (void __iomem *)(off_in - QLA82XX_PCI_CRBSPACE);
+	off_in -= QLA82XX_PCI_CRBSPACE;
 
 	/* Try direct map */
 	m = &crb_128M_2M_map[CRB_BLK(off_in)].sub_block[CRB_SUBBLK(off_in)];
@@ -443,6 +443,7 @@ qla82xx_pci_get_crb_addr_2M(struct qla_hw_data *ha, ulong off_in,
 		return 0;
 	}
 	/* Not in direct map, use crb window */
+	*off_out = (void __iomem *)off_in;
 	return 1;
 }
 
diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c
index dfcc45bb03b1..d09d60293c27 100644
--- a/drivers/scsi/scsi_debug.c
+++ b/drivers/scsi/scsi_debug.c
@@ -465,8 +465,9 @@ static const struct opcode_info_t opcode_info_arr[SDEB_I_LAST_ELEMENT + 1] = {
 	     0} },
 	{0, 0, 0, F_INV_OP | FF_RESPOND, NULL, NULL, /* MAINT OUT */
 	    {0,  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} },
-	{0, 0, 0, F_INV_OP | FF_RESPOND, NULL, NULL, /* VERIFY */
-	    {0,  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} },
+	{0, 0x2f, 0, F_D_OUT_MAYBE | FF_DIRECT_IO, NULL, NULL, /* VERIFY(10) */
+	    {10,  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xc7,
+	     0, 0, 0, 0, 0, 0} },
 	{1, 0x7f, 0x9, F_SA_HIGH | F_D_IN | FF_DIRECT_IO, resp_read_dt0,
 	    vl_iarr, {32,  0xc7, 0, 0, 0, 0, 0x1f, 0x18, 0x0, 0x9, 0xfe, 0,
 		      0xff, 0xff, 0xff, 0xff} },/* VARIABLE LENGTH, READ(32) */
@@ -477,8 +478,8 @@ static const struct opcode_info_t opcode_info_arr[SDEB_I_LAST_ELEMENT + 1] = {
 	    {10,  0x13, 0xff, 0xff, 0, 0, 0, 0xff, 0xff, 0xc7, 0, 0, 0, 0, 0,
 	     0} },
 /* 20 */
-	{0, 0, 0, F_INV_OP | FF_RESPOND, NULL, NULL, /* ALLOW REMOVAL */
-	    {0,  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} },
+	{0, 0x1e, 0, 0, NULL, NULL, /* ALLOW REMOVAL */
+	    {6,  0, 0, 0, 0x3, 0xc7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} },
 	{0, 0x1, 0, 0, resp_start_stop, NULL, /* REWIND ?? */
 	    {6,  0x1, 0, 0, 0, 0xc7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} },
 	{0, 0, 0, F_INV_OP | FF_RESPOND, NULL, NULL, /* ATA_PT */
diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c
index 83245391e956..054923e3393c 100644
--- a/drivers/scsi/scsi_scan.c
+++ b/drivers/scsi/scsi_scan.c
@@ -701,9 +701,12 @@ static int scsi_probe_lun(struct scsi_device *sdev, unsigned char *inq_result,
 	 * strings.
 	 */
 	if (sdev->inquiry_len < 36) {
-		sdev_printk(KERN_INFO, sdev,
-			    "scsi scan: INQUIRY result too short (%d),"
-			    " using 36\n", sdev->inquiry_len);
+		if (!sdev->host->short_inquiry) {
+			shost_printk(KERN_INFO, sdev->host,
+				    "scsi scan: INQUIRY result too short (%d),"
+				    " using 36\n", sdev->inquiry_len);
+			sdev->host->short_inquiry = 1;
+		}
 		sdev->inquiry_len = 36;
 	}
 
diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c
index 8d2312239ae0..21930c9ac9cd 100644
--- a/drivers/scsi/scsi_sysfs.c
+++ b/drivers/scsi/scsi_sysfs.c
@@ -1102,6 +1102,14 @@ void __scsi_remove_device(struct scsi_device *sdev)
 {
 	struct device *dev = &sdev->sdev_gendev;
 
+	/*
+	 * This cleanup path is not reentrant and while it is impossible
+	 * to get a new reference with scsi_device_get() someone can still
+	 * hold a previously acquired one.
+	 */
+	if (sdev->sdev_state == SDEV_DEL)
+		return;
+
 	if (sdev->is_visible) {
 		if (scsi_device_set_state(sdev, SDEV_CANCEL) != 0)
 			return;
@@ -1110,7 +1118,9 @@ void __scsi_remove_device(struct scsi_device *sdev)
 		device_unregister(&sdev->sdev_dev);
 		transport_remove_device(dev);
 		scsi_dh_remove_device(sdev);
-	}
+		device_del(dev);
+	} else
+		put_device(&sdev->sdev_dev);
 
 	/*
 	 * Stop accepting new requests and wait until all queuecommand() and
@@ -1121,16 +1131,6 @@ void __scsi_remove_device(struct scsi_device *sdev)
 	blk_cleanup_queue(sdev->request_queue);
 	cancel_work_sync(&sdev->requeue_work);
 
-	/*
-	 * Remove the device after blk_cleanup_queue() has been called such
-	 * a possible bdi_register() call with the same name occurs after
-	 * blk_cleanup_queue() has called bdi_destroy().
-	 */
-	if (sdev->is_visible)
-		device_del(dev);
-	else
-		put_device(&sdev->sdev_dev);
-
 	if (sdev->host->hostt->slave_destroy)
 		sdev->host->hostt->slave_destroy(sdev);
 	transport_destroy_device(dev);
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 54519804c46a..3d22fc3e3c1a 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -638,11 +638,24 @@ static void sd_config_discard(struct scsi_disk *sdkp, unsigned int mode)
 	unsigned int max_blocks = 0;
 
 	q->limits.discard_zeroes_data = 0;
-	q->limits.discard_alignment = sdkp->unmap_alignment *
-		logical_block_size;
-	q->limits.discard_granularity =
-		max(sdkp->physical_block_size,
-		    sdkp->unmap_granularity * logical_block_size);
+
+	/*
+	 * When LBPRZ is reported, discard alignment and granularity
+	 * must be fixed to the logical block size. Otherwise the block
+	 * layer will drop misaligned portions of the request which can
+	 * lead to data corruption. If LBPRZ is not set, we honor the
+	 * device preference.
+	 */
+	if (sdkp->lbprz) {
+		q->limits.discard_alignment = 0;
+		q->limits.discard_granularity = 1;
+	} else {
+		q->limits.discard_alignment = sdkp->unmap_alignment *
+			logical_block_size;
+		q->limits.discard_granularity =
+			max(sdkp->physical_block_size,
+			    sdkp->unmap_granularity * logical_block_size);
+	}
 
 	sdkp->provisioning_mode = mode;
 
@@ -2321,11 +2334,8 @@ got_data:
 		}
 	}
 
-	if (sdkp->capacity > 0xffffffff) {
+	if (sdkp->capacity > 0xffffffff)
 		sdp->use_16_for_rw = 1;
-		sdkp->max_xfer_blocks = SD_MAX_XFER_BLOCKS;
-	} else
-		sdkp->max_xfer_blocks = SD_DEF_XFER_BLOCKS;
 
 	/* Rescale capacity to 512-byte units */
 	if (sector_size == 4096)
@@ -2642,7 +2652,6 @@ static void sd_read_block_limits(struct scsi_disk *sdkp)
 {
 	unsigned int sector_sz = sdkp->device->sector_size;
 	const int vpd_len = 64;
-	u32 max_xfer_length;
 	unsigned char *buffer = kmalloc(vpd_len, GFP_KERNEL);
 
 	if (!buffer ||
@@ -2650,14 +2659,11 @@ static void sd_read_block_limits(struct scsi_disk *sdkp)
 	    scsi_get_vpd_page(sdkp->device, 0xb0, buffer, vpd_len))
 		goto out;
 
-	max_xfer_length = get_unaligned_be32(&buffer[8]);
-	if (max_xfer_length)
-		sdkp->max_xfer_blocks = max_xfer_length;
-
 	blk_queue_io_min(sdkp->disk->queue,
 			 get_unaligned_be16(&buffer[6]) * sector_sz);
-	blk_queue_io_opt(sdkp->disk->queue,
-			 get_unaligned_be32(&buffer[12]) * sector_sz);
+
+	sdkp->max_xfer_blocks = get_unaligned_be32(&buffer[8]);
+	sdkp->opt_xfer_blocks = get_unaligned_be32(&buffer[12]);
 
 	if (buffer[3] == 0x3c) {
 		unsigned int lba_count, desc_count;
@@ -2806,6 +2812,11 @@ static int sd_try_extended_inquiry(struct scsi_device *sdp)
 	return 0;
 }
 
+static inline u32 logical_to_sectors(struct scsi_device *sdev, u32 blocks)
+{
+	return blocks << (ilog2(sdev->sector_size) - 9);
+}
+
 /**
  *	sd_revalidate_disk - called the first time a new disk is seen,
  *	performs disk spin up, read_capacity, etc.
@@ -2815,8 +2826,9 @@ static int sd_revalidate_disk(struct gendisk *disk)
 {
 	struct scsi_disk *sdkp = scsi_disk(disk);
 	struct scsi_device *sdp = sdkp->device;
+	struct request_queue *q = sdkp->disk->queue;
 	unsigned char *buffer;
-	unsigned int max_xfer;
+	unsigned int dev_max, rw_max;
 
 	SCSI_LOG_HLQUEUE(3, sd_printk(KERN_INFO, sdkp,
 				      "sd_revalidate_disk\n"));
@@ -2864,11 +2876,26 @@ static int sd_revalidate_disk(struct gendisk *disk)
 	 */
 	sd_set_flush_flag(sdkp);
 
-	max_xfer = sdkp->max_xfer_blocks;
-	max_xfer <<= ilog2(sdp->sector_size) - 9;
+	/* Initial block count limit based on CDB TRANSFER LENGTH field size. */
+	dev_max = sdp->use_16_for_rw ? SD_MAX_XFER_BLOCKS : SD_DEF_XFER_BLOCKS;
+
+	/* Some devices report a maximum block count for READ/WRITE requests. */
+	dev_max = min_not_zero(dev_max, sdkp->max_xfer_blocks);
+	q->limits.max_dev_sectors = logical_to_sectors(sdp, dev_max);
+
+	/*
+	 * Use the device's preferred I/O size for reads and writes
+	 * unless the reported value is unreasonably large (or garbage).
+	 */
+	if (sdkp->opt_xfer_blocks && sdkp->opt_xfer_blocks <= dev_max &&
+	    sdkp->opt_xfer_blocks <= SD_DEF_XFER_BLOCKS)
+		rw_max = q->limits.io_opt =
+			logical_to_sectors(sdp, sdkp->opt_xfer_blocks);
+	else
+		rw_max = BLK_DEF_MAX_SECTORS;
 
-	sdkp->disk->queue->limits.max_sectors =
-		min_not_zero(queue_max_hw_sectors(sdkp->disk->queue), max_xfer);
+	/* Combine with controller limits */
+	q->limits.max_sectors = min(rw_max, queue_max_hw_sectors(q));
 
 	set_capacity(disk, sdkp->capacity);
 	sd_config_write_same(sdkp);
diff --git a/drivers/scsi/sd.h b/drivers/scsi/sd.h
index 63ba5ca7f9a1..5f2a84aff29f 100644
--- a/drivers/scsi/sd.h
+++ b/drivers/scsi/sd.h
@@ -67,6 +67,7 @@ struct scsi_disk {
 	atomic_t	openers;
 	sector_t	capacity;	/* size in 512-byte sectors */
 	u32		max_xfer_blocks;
+	u32		opt_xfer_blocks;
 	u32		max_ws_blocks;
 	u32		max_unmap_blocks;
 	u32		unmap_granularity;
diff --git a/drivers/scsi/st.c b/drivers/scsi/st.c
index e0a1e52a04e7..2e522951b619 100644
--- a/drivers/scsi/st.c
+++ b/drivers/scsi/st.c
@@ -4083,6 +4083,7 @@ static int create_one_cdev(struct scsi_tape *tape, int mode, int rew)
 	}
 	cdev->owner = THIS_MODULE;
 	cdev->ops = &st_fops;
+	STm->cdevs[rew] = cdev;
 
 	error = cdev_add(cdev, cdev_devno, 1);
 	if (error) {
@@ -4091,7 +4092,6 @@ static int create_one_cdev(struct scsi_tape *tape, int mode, int rew)
 		pr_err("st%d: Device not attached.\n", dev_num);
 		goto out_free;
 	}
-	STm->cdevs[rew] = cdev;
 
 	i = mode << (4 - ST_NBR_MODE_BITS);
 	snprintf(name, 10, "%s%s%s", rew ? "n" : "",
@@ -4110,8 +4110,9 @@ static int create_one_cdev(struct scsi_tape *tape, int mode, int rew)
 	return 0;
 out_free:
 	cdev_del(STm->cdevs[rew]);
-	STm->cdevs[rew] = NULL;
 out:
+	STm->cdevs[rew] = NULL;
+	STm->devs[rew] = NULL;
 	return error;
 }
 
diff --git a/drivers/spi/spi-bcm63xx.c b/drivers/spi/spi-bcm63xx.c
index 06858e04ec59..bf9a610e5b89 100644
--- a/drivers/spi/spi-bcm63xx.c
+++ b/drivers/spi/spi-bcm63xx.c
@@ -562,8 +562,8 @@ static int bcm63xx_spi_probe(struct platform_device *pdev)
 		goto out_clk_disable;
 	}
 
-	dev_info(dev, "at 0x%08x (irq %d, FIFOs size %d)\n",
-		 r->start, irq, bs->fifo_size);
+	dev_info(dev, "at %pr (irq %d, FIFOs size %d)\n",
+		 r, irq, bs->fifo_size);
 
 	return 0;
 
diff --git a/drivers/spi/spi-mt65xx.c b/drivers/spi/spi-mt65xx.c
index 563954a61424..7840067062a8 100644
--- a/drivers/spi/spi-mt65xx.c
+++ b/drivers/spi/spi-mt65xx.c
@@ -410,7 +410,7 @@ static int mtk_spi_setup(struct spi_device *spi)
 	if (!spi->controller_data)
 		spi->controller_data = (void *)&mtk_default_chip_info;
 
-	if (mdata->dev_comp->need_pad_sel)
+	if (mdata->dev_comp->need_pad_sel && gpio_is_valid(spi->cs_gpio))
 		gpio_direction_output(spi->cs_gpio, !(spi->mode & SPI_CS_HIGH));
 
 	return 0;
@@ -632,13 +632,23 @@ static int mtk_spi_probe(struct platform_device *pdev)
 			goto err_put_master;
 		}
 
-		for (i = 0; i < master->num_chipselect; i++) {
-			ret = devm_gpio_request(&pdev->dev, master->cs_gpios[i],
-						dev_name(&pdev->dev));
-			if (ret) {
-				dev_err(&pdev->dev,
-					"can't get CS GPIO %i\n", i);
-				goto err_put_master;
+		if (!master->cs_gpios && master->num_chipselect > 1) {
+			dev_err(&pdev->dev,
+				"cs_gpios not specified and num_chipselect > 1\n");
+			ret = -EINVAL;
+			goto err_put_master;
+		}
+
+		if (master->cs_gpios) {
+			for (i = 0; i < master->num_chipselect; i++) {
+				ret = devm_gpio_request(&pdev->dev,
+							master->cs_gpios[i],
+							dev_name(&pdev->dev));
+				if (ret) {
+					dev_err(&pdev->dev,
+						"can't get CS GPIO %i\n", i);
+					goto err_put_master;
+				}
 			}
 		}
 	}
diff --git a/drivers/spi/spi-pl022.c b/drivers/spi/spi-pl022.c
index 94af80676684..5e5fd77e2711 100644
--- a/drivers/spi/spi-pl022.c
+++ b/drivers/spi/spi-pl022.c
@@ -1171,19 +1171,31 @@ err_no_rxchan:
 static int pl022_dma_autoprobe(struct pl022 *pl022)
 {
 	struct device *dev = &pl022->adev->dev;
+	struct dma_chan *chan;
+	int err;
 
 	/* automatically configure DMA channels from platform, normally using DT */
-	pl022->dma_rx_channel = dma_request_slave_channel(dev, "rx");
-	if (!pl022->dma_rx_channel)
+	chan = dma_request_slave_channel_reason(dev, "rx");
+	if (IS_ERR(chan)) {
+		err = PTR_ERR(chan);
 		goto err_no_rxchan;
+	}
+
+	pl022->dma_rx_channel = chan;
 
-	pl022->dma_tx_channel = dma_request_slave_channel(dev, "tx");
-	if (!pl022->dma_tx_channel)
+	chan = dma_request_slave_channel_reason(dev, "tx");
+	if (IS_ERR(chan)) {
+		err = PTR_ERR(chan);
 		goto err_no_txchan;
+	}
+
+	pl022->dma_tx_channel = chan;
 
 	pl022->dummypage = kmalloc(PAGE_SIZE, GFP_KERNEL);
-	if (!pl022->dummypage)
+	if (!pl022->dummypage) {
+		err = -ENOMEM;
 		goto err_no_dummypage;
+	}
 
 	return 0;
 
@@ -1194,7 +1206,7 @@ err_no_txchan:
 	dma_release_channel(pl022->dma_rx_channel);
 	pl022->dma_rx_channel = NULL;
 err_no_rxchan:
-	return -ENODEV;
+	return err;
 }
 		
 static void terminate_dma(struct pl022 *pl022)
@@ -2236,6 +2248,10 @@ static int pl022_probe(struct amba_device *adev, const struct amba_id *id)
 
 	/* Get DMA channels, try autoconfiguration first */
 	status = pl022_dma_autoprobe(pl022);
+	if (status == -EPROBE_DEFER) {
+		dev_dbg(dev, "deferring probe to get DMA channel\n");
+		goto err_no_irq;
+	}
 
 	/* If that failed, use channels from platform_info */
 	if (status == 0)
diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index e2415be209d5..2b0a8ec3affb 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -376,6 +376,7 @@ static void spi_drv_shutdown(struct device *dev)
 
 /**
  * __spi_register_driver - register a SPI driver
+ * @owner: owner module of the driver to register
  * @sdrv: the driver to register
  * Context: can sleep
  *
@@ -2130,6 +2131,7 @@ static int __spi_validate(struct spi_device *spi, struct spi_message *message)
 	 * Set transfer tx_nbits and rx_nbits as single transfer default
 	 * (SPI_NBITS_SINGLE) if it is not set for this transfer.
 	 */
+	message->frame_length = 0;
 	list_for_each_entry(xfer, &message->transfers, transfer_list) {
 		message->frame_length += xfer->len;
 		if (!xfer->bits_per_word)
diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs_ioctl.h b/drivers/staging/lustre/include/linux/libcfs/libcfs_ioctl.h
index f5d741f25ffd..485ab2670918 100644
--- a/drivers/staging/lustre/include/linux/libcfs/libcfs_ioctl.h
+++ b/drivers/staging/lustre/include/linux/libcfs/libcfs_ioctl.h
@@ -110,7 +110,6 @@ struct libcfs_ioctl_handler {
 #define IOC_LIBCFS_CLEAR_DEBUG	     _IOWR('e', 31, long)
 #define IOC_LIBCFS_MARK_DEBUG	      _IOWR('e', 32, long)
 #define IOC_LIBCFS_MEMHOG		  _IOWR('e', 36, long)
-#define IOC_LIBCFS_PING_TEST	       _IOWR('e', 37, long)
 /* lnet ioctls */
 #define IOC_LIBCFS_GET_NI		  _IOWR('e', 50, long)
 #define IOC_LIBCFS_FAIL_NID		_IOWR('e', 51, long)
diff --git a/drivers/staging/lustre/lustre/libcfs/module.c b/drivers/staging/lustre/lustre/libcfs/module.c
index 07a68594c279..e7c2b26156b9 100644
--- a/drivers/staging/lustre/lustre/libcfs/module.c
+++ b/drivers/staging/lustre/lustre/libcfs/module.c
@@ -274,23 +274,6 @@ static int libcfs_ioctl_int(struct cfs_psdev_file *pfile, unsigned long cmd,
 		}
 		break;
 
-	case IOC_LIBCFS_PING_TEST: {
-		extern void (kping_client)(struct libcfs_ioctl_data *);
-		void (*ping)(struct libcfs_ioctl_data *);
-
-		CDEBUG(D_IOCTL, "doing %d pings to nid %s (%s)\n",
-		       data->ioc_count, libcfs_nid2str(data->ioc_nid),
-		       libcfs_nid2str(data->ioc_nid));
-		ping = symbol_get(kping_client);
-		if (!ping)
-			CERROR("symbol_get failed\n");
-		else {
-			ping(data);
-			symbol_put(kping_client);
-		}
-		return 0;
-	}
-
 	default: {
 		struct libcfs_ioctl_handler *hand;
 
diff --git a/fs/direct-io.c b/fs/direct-io.c
index cb5337d8c273..1c75a3a07f8f 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -1169,6 +1169,15 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
 		}
 	}
 
+	/* Once we sampled i_size check for reads beyond EOF */
+	dio->i_size = i_size_read(inode);
+	if (iov_iter_rw(iter) == READ && offset >= dio->i_size) {
+		if (dio->flags & DIO_LOCKING)
+			mutex_unlock(&inode->i_mutex);
+		kmem_cache_free(dio_cache, dio);
+		goto out;
+	}
+
 	/*
 	 * For file extending writes updating i_size before data writeouts
 	 * complete can expose uninitialized blocks in dumb filesystems.
@@ -1222,7 +1231,6 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
 	sdio.next_block_for_io = -1;
 
 	dio->iocb = iocb;
-	dio->i_size = i_size_read(inode);
 
 	spin_lock_init(&dio->bio_lock);
 	dio->refcount = 1;
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 87e9d796cf7d..3a37bd3f9637 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -421,7 +421,7 @@ static void lowcomms_write_space(struct sock *sk)
 
 	if (test_and_clear_bit(CF_APP_LIMITED, &con->flags)) {
 		con->sock->sk->sk_write_pending--;
-		clear_bit(SOCK_ASYNC_NOSPACE, &con->sock->flags);
+		clear_bit(SOCKWQ_ASYNC_NOSPACE, &con->sock->flags);
 	}
 
 	if (!test_and_set_bit(CF_WRITE_PENDING, &con->flags))
@@ -1448,7 +1448,7 @@ static void send_to_sock(struct connection *con)
 					      msg_flags);
 			if (ret == -EAGAIN || ret == 0) {
 				if (ret == -EAGAIN &&
-				    test_bit(SOCK_ASYNC_NOSPACE, &con->sock->flags) &&
+				    test_bit(SOCKWQ_ASYNC_NOSPACE, &con->sock->flags) &&
 				    !test_and_set_bit(CF_APP_LIMITED, &con->flags)) {
 					/* Notify TCP that we're limited by the
 					 * application window size.
diff --git a/fs/namei.c b/fs/namei.c
index d84d7c7515fc..0c3974cd3ecd 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1996,7 +1996,6 @@ static const char *path_init(struct nameidata *nd, unsigned flags)
 	nd->last_type = LAST_ROOT; /* if there are only slashes... */
 	nd->flags = flags | LOOKUP_JUMPED | LOOKUP_PARENT;
 	nd->depth = 0;
-	nd->total_link_count = 0;
 	if (flags & LOOKUP_ROOT) {
 		struct dentry *root = nd->root.dentry;
 		struct inode *inode = root->d_inode;
diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index 871fcb67be97..0a8983492d91 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -195,8 +195,7 @@ int ovl_set_attr(struct dentry *upperdentry, struct kstat *stat)
 
 static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir,
 			      struct dentry *dentry, struct path *lowerpath,
-			      struct kstat *stat, struct iattr *attr,
-			      const char *link)
+			      struct kstat *stat, const char *link)
 {
 	struct inode *wdir = workdir->d_inode;
 	struct inode *udir = upperdir->d_inode;
@@ -240,8 +239,6 @@ static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir,
 
 	mutex_lock(&newdentry->d_inode->i_mutex);
 	err = ovl_set_attr(newdentry, stat);
-	if (!err && attr)
-		err = notify_change(newdentry, attr, NULL);
 	mutex_unlock(&newdentry->d_inode->i_mutex);
 	if (err)
 		goto out_cleanup;
@@ -286,8 +283,7 @@ out_cleanup:
  * that point the file will have already been copied up anyway.
  */
 int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
-		    struct path *lowerpath, struct kstat *stat,
-		    struct iattr *attr)
+		    struct path *lowerpath, struct kstat *stat)
 {
 	struct dentry *workdir = ovl_workdir(dentry);
 	int err;
@@ -345,26 +341,19 @@ int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
 	}
 	upperdentry = ovl_dentry_upper(dentry);
 	if (upperdentry) {
-		unlock_rename(workdir, upperdir);
+		/* Raced with another copy-up?  Nothing to do, then... */
 		err = 0;
-		/* Raced with another copy-up?  Do the setattr here */
-		if (attr) {
-			mutex_lock(&upperdentry->d_inode->i_mutex);
-			err = notify_change(upperdentry, attr, NULL);
-			mutex_unlock(&upperdentry->d_inode->i_mutex);
-		}
-		goto out_put_cred;
+		goto out_unlock;
 	}
 
 	err = ovl_copy_up_locked(workdir, upperdir, dentry, lowerpath,
-				 stat, attr, link);
+				 stat, link);
 	if (!err) {
 		/* Restore timestamps on parent (best effort) */
 		ovl_set_timestamps(upperdir, &pstat);
 	}
 out_unlock:
 	unlock_rename(workdir, upperdir);
-out_put_cred:
 	revert_creds(old_cred);
 	put_cred(override_cred);
 
@@ -406,7 +395,7 @@ int ovl_copy_up(struct dentry *dentry)
 		ovl_path_lower(next, &lowerpath);
 		err = vfs_getattr(&lowerpath, &stat);
 		if (!err)
-			err = ovl_copy_up_one(parent, next, &lowerpath, &stat, NULL);
+			err = ovl_copy_up_one(parent, next, &lowerpath, &stat);
 
 		dput(parent);
 		dput(next);
diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
index ec0c2a050043..4060ffde8722 100644
--- a/fs/overlayfs/inode.c
+++ b/fs/overlayfs/inode.c
@@ -12,8 +12,7 @@
 #include <linux/xattr.h>
 #include "overlayfs.h"
 
-static int ovl_copy_up_last(struct dentry *dentry, struct iattr *attr,
-			    bool no_data)
+static int ovl_copy_up_truncate(struct dentry *dentry)
 {
 	int err;
 	struct dentry *parent;
@@ -30,10 +29,8 @@ static int ovl_copy_up_last(struct dentry *dentry, struct iattr *attr,
 	if (err)
 		goto out_dput_parent;
 
-	if (no_data)
-		stat.size = 0;
-
-	err = ovl_copy_up_one(parent, dentry, &lowerpath, &stat, attr);
+	stat.size = 0;
+	err = ovl_copy_up_one(parent, dentry, &lowerpath, &stat);
 
 out_dput_parent:
 	dput(parent);
@@ -49,13 +46,13 @@ int ovl_setattr(struct dentry *dentry, struct iattr *attr)
 	if (err)
 		goto out;
 
-	upperdentry = ovl_dentry_upper(dentry);
-	if (upperdentry) {
+	err = ovl_copy_up(dentry);
+	if (!err) {
+		upperdentry = ovl_dentry_upper(dentry);
+
 		mutex_lock(&upperdentry->d_inode->i_mutex);
 		err = notify_change(upperdentry, attr, NULL);
 		mutex_unlock(&upperdentry->d_inode->i_mutex);
-	} else {
-		err = ovl_copy_up_last(dentry, attr, false);
 	}
 	ovl_drop_write(dentry);
 out:
@@ -353,7 +350,7 @@ struct inode *ovl_d_select_inode(struct dentry *dentry, unsigned file_flags)
 			return ERR_PTR(err);
 
 		if (file_flags & O_TRUNC)
-			err = ovl_copy_up_last(dentry, NULL, true);
+			err = ovl_copy_up_truncate(dentry);
 		else
 			err = ovl_copy_up(dentry);
 		ovl_drop_write(dentry);
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index ea5a40b06e3a..e17154aeaae4 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -194,7 +194,6 @@ void ovl_cleanup(struct inode *dir, struct dentry *dentry);
 /* copy_up.c */
 int ovl_copy_up(struct dentry *dentry);
 int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
-		    struct path *lowerpath, struct kstat *stat,
-		    struct iattr *attr);
+		    struct path *lowerpath, struct kstat *stat);
 int ovl_copy_xattr(struct dentry *old, struct dentry *new);
 int ovl_set_attr(struct dentry *upper, struct kstat *stat);
diff --git a/include/drm/drmP.h b/include/drm/drmP.h
index bd5d2c536ddc..d7162cf1c3e1 100644
--- a/include/drm/drmP.h
+++ b/include/drm/drmP.h
@@ -309,6 +309,11 @@ struct drm_file {
 	unsigned universal_planes:1;
 	/* true if client understands atomic properties */
 	unsigned atomic:1;
+	/*
+	 * This client is allowed to gain master privileges for @master.
+	 * Protected by struct drm_device::master_mutex.
+	 */
+	unsigned allowed_master:1;
 
 	struct pid *pid;
 	kuid_t uid;
@@ -344,6 +349,8 @@ struct drm_file {
 	struct list_head event_list;
 	int event_space;
 
+	struct mutex event_read_lock;
+
 	struct drm_prime_file_private prime;
 };
 
@@ -580,6 +587,13 @@ struct drm_driver {
 	int (*gem_open_object) (struct drm_gem_object *, struct drm_file *);
 	void (*gem_close_object) (struct drm_gem_object *, struct drm_file *);
 
+	/**
+	 * Hook for allocating the GEM object struct, for use by core
+	 * helpers.
+	 */
+	struct drm_gem_object *(*gem_create_object)(struct drm_device *dev,
+						    size_t size);
+
 	/* prime: */
 	/* export handle -> fd (see drm_gem_prime_handle_to_fd() helper) */
 	int (*prime_handle_to_fd)(struct drm_device *dev, struct drm_file *file_priv,
@@ -910,6 +924,7 @@ extern int drm_open(struct inode *inode, struct file *filp);
 extern ssize_t drm_read(struct file *filp, char __user *buffer,
 			size_t count, loff_t *offset);
 extern int drm_release(struct inode *inode, struct file *filp);
+extern int drm_new_set_master(struct drm_device *dev, struct drm_file *fpriv);
 
 				/* Mapping support (drm_vm.h) */
 extern unsigned int drm_poll(struct file *filp, struct poll_table_struct *wait);
@@ -947,6 +962,10 @@ extern void drm_send_vblank_event(struct drm_device *dev, unsigned int pipe,
 				  struct drm_pending_vblank_event *e);
 extern void drm_crtc_send_vblank_event(struct drm_crtc *crtc,
 				       struct drm_pending_vblank_event *e);
+extern void drm_arm_vblank_event(struct drm_device *dev, unsigned int pipe,
+				 struct drm_pending_vblank_event *e);
+extern void drm_crtc_arm_vblank_event(struct drm_crtc *crtc,
+				      struct drm_pending_vblank_event *e);
 extern bool drm_handle_vblank(struct drm_device *dev, unsigned int pipe);
 extern bool drm_crtc_handle_vblank(struct drm_crtc *crtc);
 extern int drm_vblank_get(struct drm_device *dev, unsigned int pipe);
@@ -1049,7 +1068,7 @@ void drm_dev_ref(struct drm_device *dev);
 void drm_dev_unref(struct drm_device *dev);
 int drm_dev_register(struct drm_device *dev, unsigned long flags);
 void drm_dev_unregister(struct drm_device *dev);
-int drm_dev_set_unique(struct drm_device *dev, const char *fmt, ...);
+int drm_dev_set_unique(struct drm_device *dev, const char *name);
 
 struct drm_minor *drm_minor_acquire(unsigned int minor_id);
 void drm_minor_release(struct drm_minor *minor);
diff --git a/include/drm/drm_atomic_helper.h b/include/drm/drm_atomic_helper.h
index 8cba54a2a0a0..a286cce98720 100644
--- a/include/drm/drm_atomic_helper.h
+++ b/include/drm/drm_atomic_helper.h
@@ -62,6 +62,8 @@ void drm_atomic_helper_commit_planes(struct drm_device *dev,
 void drm_atomic_helper_cleanup_planes(struct drm_device *dev,
 				      struct drm_atomic_state *old_state);
 void drm_atomic_helper_commit_planes_on_crtc(struct drm_crtc_state *old_crtc_state);
+void drm_atomic_helper_disable_planes_on_crtc(struct drm_crtc *crtc,
+					      bool atomic);
 
 void drm_atomic_helper_swap_state(struct drm_device *dev,
 				  struct drm_atomic_state *state);
@@ -81,6 +83,12 @@ int drm_atomic_helper_set_config(struct drm_mode_set *set);
 int __drm_atomic_helper_set_config(struct drm_mode_set *set,
 		struct drm_atomic_state *state);
 
+int drm_atomic_helper_disable_all(struct drm_device *dev,
+				  struct drm_modeset_acquire_ctx *ctx);
+struct drm_atomic_state *drm_atomic_helper_suspend(struct drm_device *dev);
+int drm_atomic_helper_resume(struct drm_device *dev,
+			     struct drm_atomic_state *state);
+
 int drm_atomic_helper_crtc_set_property(struct drm_crtc *crtc,
 					struct drm_property *property,
 					uint64_t val);
diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h
index 4765df331002..3b040b355472 100644
--- a/include/drm/drm_crtc.h
+++ b/include/drm/drm_crtc.h
@@ -162,23 +162,60 @@ struct drm_tile_group {
 	u8 group_data[8];
 };
 
+/**
+ * struct drm_framebuffer_funcs - framebuffer hooks
+ */
 struct drm_framebuffer_funcs {
-	/* note: use drm_framebuffer_remove() */
+	/**
+	 * @destroy:
+	 *
+	 * Clean up framebuffer resources, specifically also unreference the
+	 * backing storage. The core guarantees to call this function for every
+	 * framebuffer successfully created by ->fb_create() in
+	 * &drm_mode_config_funcs. Drivers must also call
+	 * drm_framebuffer_cleanup() to release DRM core resources for this
+	 * framebuffer.
+	 */
 	void (*destroy)(struct drm_framebuffer *framebuffer);
+
+	/**
+	 * @create_handle:
+	 *
+	 * Create a buffer handle in the driver-specific buffer manager (either
+	 * GEM or TTM) valid for the passed-in struct &drm_file. This is used by
+	 * the core to implement the GETFB IOCTL, which returns (for
+	 * sufficiently priviledged user) also a native buffer handle. This can
+	 * be used for seamless transitions between modesetting clients by
+	 * copying the current screen contents to a private buffer and blending
+	 * between that and the new contents.
+	 *
+	 * GEM based drivers should call drm_gem_handle_create() to create the
+	 * handle.
+	 *
+	 * RETURNS:
+	 *
+	 * 0 on success or a negative error code on failure.
+	 */
 	int (*create_handle)(struct drm_framebuffer *fb,
 			     struct drm_file *file_priv,
 			     unsigned int *handle);
-	/*
-	 * Optional callback for the dirty fb ioctl.
+	/**
+	 * @dirty:
+	 *
+	 * Optional callback for the dirty fb IOCTL.
+	 *
+	 * Userspace can notify the driver via this callback that an area of the
+	 * framebuffer has changed and should be flushed to the display
+	 * hardware. This can also be used internally, e.g. by the fbdev
+	 * emulation, though that's not the case currently.
 	 *
-	 * Userspace can notify the driver via this callback
-	 * that a area of the framebuffer has changed and should
-	 * be flushed to the display hardware.
+	 * See documentation in drm_mode.h for the struct drm_mode_fb_dirty_cmd
+	 * for more information as all the semantics and arguments have a one to
+	 * one mapping on this function.
 	 *
-	 * See documentation in drm_mode.h for the struct
-	 * drm_mode_fb_dirty_cmd for more information as all
-	 * the semantics and arguments have a one to one mapping
-	 * on this function.
+	 * RETURNS:
+	 *
+	 * 0 on success or a negative error code on failure.
 	 */
 	int (*dirty)(struct drm_framebuffer *framebuffer,
 		     struct drm_file *file_priv, unsigned flags,
@@ -254,6 +291,11 @@ struct drm_plane;
 struct drm_bridge;
 struct drm_atomic_state;
 
+struct drm_crtc_helper_funcs;
+struct drm_encoder_helper_funcs;
+struct drm_connector_helper_funcs;
+struct drm_plane_helper_funcs;
+
 /**
  * struct drm_crtc_state - mutable CRTC state
  * @crtc: backpointer to the CRTC
@@ -315,23 +357,6 @@ struct drm_crtc_state {
 
 /**
  * struct drm_crtc_funcs - control CRTCs for a given device
- * @save: save CRTC state
- * @restore: restore CRTC state
- * @reset: reset CRTC after state has been invalidated (e.g. resume)
- * @cursor_set: setup the cursor
- * @cursor_set2: setup the cursor with hotspot, superseeds @cursor_set if set
- * @cursor_move: move the cursor
- * @gamma_set: specify color ramp for CRTC
- * @destroy: deinit and free object
- * @set_property: called when a property is changed
- * @set_config: apply a new CRTC configuration
- * @page_flip: initiate a page flip
- * @atomic_duplicate_state: duplicate the atomic state for this CRTC
- * @atomic_destroy_state: destroy an atomic state for this CRTC
- * @atomic_set_property: set a property on an atomic state for this CRTC
- *    (do not call directly, use drm_atomic_crtc_set_property())
- * @atomic_get_property: get a property on an atomic state for this CRTC
- *    (do not call directly, use drm_atomic_crtc_get_property())
  *
  * The drm_crtc_funcs structure is the central CRTC management structure
  * in the DRM.  Each CRTC controls one or more connectors (note that the name
@@ -343,54 +368,316 @@ struct drm_crtc_state {
  * bus accessors.
  */
 struct drm_crtc_funcs {
-	/* Save CRTC state */
-	void (*save)(struct drm_crtc *crtc); /* suspend? */
-	/* Restore CRTC state */
-	void (*restore)(struct drm_crtc *crtc); /* resume? */
-	/* Reset CRTC state */
+	/**
+	 * @reset:
+	 *
+	 * Reset CRTC hardware and software state to off. This function isn't
+	 * called by the core directly, only through drm_mode_config_reset().
+	 * It's not a helper hook only for historical reasons.
+	 *
+	 * Atomic drivers can use drm_atomic_helper_crtc_reset() to reset
+	 * atomic state using this hook.
+	 */
 	void (*reset)(struct drm_crtc *crtc);
 
-	/* cursor controls */
+	/**
+	 * @cursor_set:
+	 *
+	 * Update the cursor image. The cursor position is relative to the CRTC
+	 * and can be partially or fully outside of the visible area.
+	 *
+	 * Note that contrary to all other KMS functions the legacy cursor entry
+	 * points don't take a framebuffer object, but instead take directly a
+	 * raw buffer object id from the driver's buffer manager (which is
+	 * either GEM or TTM for current drivers).
+	 *
+	 * This entry point is deprecated, drivers should instead implement
+	 * universal plane support and register a proper cursor plane using
+	 * drm_crtc_init_with_planes().
+	 *
+	 * This callback is optional
+	 *
+	 * RETURNS:
+	 *
+	 * 0 on success or a negative error code on failure.
+	 */
 	int (*cursor_set)(struct drm_crtc *crtc, struct drm_file *file_priv,
 			  uint32_t handle, uint32_t width, uint32_t height);
+
+	/**
+	 * @cursor_set2:
+	 *
+	 * Update the cursor image, including hotspot information. The hotspot
+	 * must not affect the cursor position in CRTC coordinates, but is only
+	 * meant as a hint for virtualized display hardware to coordinate the
+	 * guests and hosts cursor position. The cursor hotspot is relative to
+	 * the cursor image. Otherwise this works exactly like @cursor_set.
+	 *
+	 * This entry point is deprecated, drivers should instead implement
+	 * universal plane support and register a proper cursor plane using
+	 * drm_crtc_init_with_planes().
+	 *
+	 * This callback is optional.
+	 *
+	 * RETURNS:
+	 *
+	 * 0 on success or a negative error code on failure.
+	 */
 	int (*cursor_set2)(struct drm_crtc *crtc, struct drm_file *file_priv,
 			   uint32_t handle, uint32_t width, uint32_t height,
 			   int32_t hot_x, int32_t hot_y);
+
+	/**
+	 * @cursor_move:
+	 *
+	 * Update the cursor position. The cursor does not need to be visible
+	 * when this hook is called.
+	 *
+	 * This entry point is deprecated, drivers should instead implement
+	 * universal plane support and register a proper cursor plane using
+	 * drm_crtc_init_with_planes().
+	 *
+	 * This callback is optional.
+	 *
+	 * RETURNS:
+	 *
+	 * 0 on success or a negative error code on failure.
+	 */
 	int (*cursor_move)(struct drm_crtc *crtc, int x, int y);
 
-	/* Set gamma on the CRTC */
+	/**
+	 * @gamma_set:
+	 *
+	 * Set gamma on the CRTC.
+	 *
+	 * This callback is optional.
+	 *
+	 * NOTE:
+	 *
+	 * Drivers that support gamma tables and also fbdev emulation through
+	 * the provided helper library need to take care to fill out the gamma
+	 * hooks for both. Currently there's a bit an unfortunate duplication
+	 * going on, which should eventually be unified to just one set of
+	 * hooks.
+	 */
 	void (*gamma_set)(struct drm_crtc *crtc, u16 *r, u16 *g, u16 *b,
 			  uint32_t start, uint32_t size);
-	/* Object destroy routine */
+
+	/**
+	 * @destroy:
+	 *
+	 * Clean up plane resources. This is only called at driver unload time
+	 * through drm_mode_config_cleanup() since a CRTC cannot be hotplugged
+	 * in DRM.
+	 */
 	void (*destroy)(struct drm_crtc *crtc);
 
+	/**
+	 * @set_config:
+	 *
+	 * This is the main legacy entry point to change the modeset state on a
+	 * CRTC. All the details of the desired configuration are passed in a
+	 * struct &drm_mode_set - see there for details.
+	 *
+	 * Drivers implementing atomic modeset should use
+	 * drm_atomic_helper_set_config() to implement this hook.
+	 *
+	 * RETURNS:
+	 *
+	 * 0 on success or a negative error code on failure.
+	 */
 	int (*set_config)(struct drm_mode_set *set);
 
-	/*
-	 * Flip to the given framebuffer.  This implements the page
-	 * flip ioctl described in drm_mode.h, specifically, the
-	 * implementation must return immediately and block all
-	 * rendering to the current fb until the flip has completed.
-	 * If userspace set the event flag in the ioctl, the event
-	 * argument will point to an event to send back when the flip
-	 * completes, otherwise it will be NULL.
+	/**
+	 * @page_flip:
+	 *
+	 * Legacy entry point to schedule a flip to the given framebuffer.
+	 *
+	 * Page flipping is a synchronization mechanism that replaces the frame
+	 * buffer being scanned out by the CRTC with a new frame buffer during
+	 * vertical blanking, avoiding tearing (except when requested otherwise
+	 * through the DRM_MODE_PAGE_FLIP_ASYNC flag). When an application
+	 * requests a page flip the DRM core verifies that the new frame buffer
+	 * is large enough to be scanned out by the CRTC in the currently
+	 * configured mode and then calls the CRTC ->page_flip() operation with a
+	 * pointer to the new frame buffer.
+	 *
+	 * The driver must wait for any pending rendering to the new framebuffer
+	 * to complete before executing the flip. It should also wait for any
+	 * pending rendering from other drivers if the underlying buffer is a
+	 * shared dma-buf.
+	 *
+	 * An application can request to be notified when the page flip has
+	 * completed. The drm core will supply a struct &drm_event in the event
+	 * parameter in this case. This can be handled by the
+	 * drm_crtc_send_vblank_event() function, which the driver should call on
+	 * the provided event upon completion of the flip. Note that if
+	 * the driver supports vblank signalling and timestamping the vblank
+	 * counters and timestamps must agree with the ones returned from page
+	 * flip events. With the current vblank helper infrastructure this can
+	 * be achieved by holding a vblank reference while the page flip is
+	 * pending, acquired through drm_crtc_vblank_get() and released with
+	 * drm_crtc_vblank_put(). Drivers are free to implement their own vblank
+	 * counter and timestamp tracking though, e.g. if they have accurate
+	 * timestamp registers in hardware.
+	 *
+	 * FIXME:
+	 *
+	 * Up to that point drivers need to manage events themselves and can use
+	 * even->base.list freely for that. Specifically they need to ensure
+	 * that they don't send out page flip (or vblank) events for which the
+	 * corresponding drm file has been closed already. The drm core
+	 * unfortunately does not (yet) take care of that. Therefore drivers
+	 * currently must clean up and release pending events in their
+	 * ->preclose driver function.
+	 *
+	 * This callback is optional.
+	 *
+	 * NOTE:
+	 *
+	 * Very early versions of the KMS ABI mandated that the driver must
+	 * block (but not reject) any rendering to the old framebuffer until the
+	 * flip operation has completed and the old framebuffer is no longer
+	 * visible. This requirement has been lifted, and userspace is instead
+	 * expected to request delivery of an event and wait with recycling old
+	 * buffers until such has been received.
+	 *
+	 * RETURNS:
+	 *
+	 * 0 on success or a negative error code on failure. Note that if a
+	 * ->page_flip() operation is already pending the callback should return
+	 * -EBUSY. Pageflips on a disabled CRTC (either by setting a NULL mode
+	 * or just runtime disabled through DPMS respectively the new atomic
+	 * "ACTIVE" state) should result in an -EINVAL error code.
 	 */
 	int (*page_flip)(struct drm_crtc *crtc,
 			 struct drm_framebuffer *fb,
 			 struct drm_pending_vblank_event *event,
 			 uint32_t flags);
 
+	/**
+	 * @set_property:
+	 *
+	 * This is the legacy entry point to update a property attached to the
+	 * CRTC.
+	 *
+	 * Drivers implementing atomic modeset should use
+	 * drm_atomic_helper_crtc_set_property() to implement this hook.
+	 *
+	 * This callback is optional if the driver does not support any legacy
+	 * driver-private properties.
+	 *
+	 * RETURNS:
+	 *
+	 * 0 on success or a negative error code on failure.
+	 */
 	int (*set_property)(struct drm_crtc *crtc,
 			    struct drm_property *property, uint64_t val);
 
-	/* atomic update handling */
+	/**
+	 * @atomic_duplicate_state:
+	 *
+	 * Duplicate the current atomic state for this CRTC and return it.
+	 * The core and helpers gurantee that any atomic state duplicated with
+	 * this hook and still owned by the caller (i.e. not transferred to the
+	 * driver by calling ->atomic_commit() from struct
+	 * &drm_mode_config_funcs) will be cleaned up by calling the
+	 * @atomic_destroy_state hook in this structure.
+	 *
+	 * Atomic drivers which don't subclass struct &drm_crtc should use
+	 * drm_atomic_helper_crtc_duplicate_state(). Drivers that subclass the
+	 * state structure to extend it with driver-private state should use
+	 * __drm_atomic_helper_crtc_duplicate_state() to make sure shared state is
+	 * duplicated in a consistent fashion across drivers.
+	 *
+	 * It is an error to call this hook before crtc->state has been
+	 * initialized correctly.
+	 *
+	 * NOTE:
+	 *
+	 * If the duplicate state references refcounted resources this hook must
+	 * acquire a reference for each of them. The driver must release these
+	 * references again in @atomic_destroy_state.
+	 *
+	 * RETURNS:
+	 *
+	 * Duplicated atomic state or NULL when the allocation failed.
+	 */
 	struct drm_crtc_state *(*atomic_duplicate_state)(struct drm_crtc *crtc);
+
+	/**
+	 * @atomic_destroy_state:
+	 *
+	 * Destroy a state duplicated with @atomic_duplicate_state and release
+	 * or unreference all resources it references
+	 */
 	void (*atomic_destroy_state)(struct drm_crtc *crtc,
 				     struct drm_crtc_state *state);
+
+	/**
+	 * @atomic_set_property:
+	 *
+	 * Decode a driver-private property value and store the decoded value
+	 * into the passed-in state structure. Since the atomic core decodes all
+	 * standardized properties (even for extensions beyond the core set of
+	 * properties which might not be implemented by all drivers) this
+	 * requires drivers to subclass the state structure.
+	 *
+	 * Such driver-private properties should really only be implemented for
+	 * truly hardware/vendor specific state. Instead it is preferred to
+	 * standardize atomic extension and decode the properties used to expose
+	 * such an extension in the core.
+	 *
+	 * Do not call this function directly, use
+	 * drm_atomic_crtc_set_property() instead.
+	 *
+	 * This callback is optional if the driver does not support any
+	 * driver-private atomic properties.
+	 *
+	 * NOTE:
+	 *
+	 * This function is called in the state assembly phase of atomic
+	 * modesets, which can be aborted for any reason (including on
+	 * userspace's request to just check whether a configuration would be
+	 * possible). Drivers MUST NOT touch any persistent state (hardware or
+	 * software) or data structures except the passed in @state parameter.
+	 *
+	 * Also since userspace controls in which order properties are set this
+	 * function must not do any input validation (since the state update is
+	 * incomplete and hence likely inconsistent). Instead any such input
+	 * validation must be done in the various atomic_check callbacks.
+	 *
+	 * RETURNS:
+	 *
+	 * 0 if the property has been found, -EINVAL if the property isn't
+	 * implemented by the driver (which should never happen, the core only
+	 * asks for properties attached to this CRTC). No other validation is
+	 * allowed by the driver. The core already checks that the property
+	 * value is within the range (integer, valid enum value, ...) the driver
+	 * set when registering the property.
+	 */
 	int (*atomic_set_property)(struct drm_crtc *crtc,
 				   struct drm_crtc_state *state,
 				   struct drm_property *property,
 				   uint64_t val);
+	/**
+	 * @atomic_get_property:
+	 *
+	 * Reads out the decoded driver-private property. This is used to
+	 * implement the GETCRTC IOCTL.
+	 *
+	 * Do not call this function directly, use
+	 * drm_atomic_crtc_get_property() instead.
+	 *
+	 * This callback is optional if the driver does not support any
+	 * driver-private atomic properties.
+	 *
+	 * RETURNS:
+	 *
+	 * 0 on success, -EINVAL if the property isn't implemented by the
+	 * driver (which should never happen, the core only asks for
+	 * properties attached to this CRTC).
+	 */
 	int (*atomic_get_property)(struct drm_crtc *crtc,
 				   const struct drm_crtc_state *state,
 				   struct drm_property *property,
@@ -420,7 +707,7 @@ struct drm_crtc_funcs {
  * @properties: property tracking for this CRTC
  * @state: current atomic state for this CRTC
  * @acquire_ctx: per-CRTC implicit acquire context used by atomic drivers for
- * 	legacy ioctls
+ * 	legacy IOCTLs
  *
  * Each CRTC may have one or more connectors associated with it.  This structure
  * allows the CRTC to be controlled.
@@ -430,6 +717,8 @@ struct drm_crtc {
 	struct device_node *port;
 	struct list_head head;
 
+	char *name;
+
 	/*
 	 * crtc mutex
 	 *
@@ -467,14 +756,14 @@ struct drm_crtc {
 	uint16_t *gamma_store;
 
 	/* if you are using the helper */
-	const void *helper_private;
+	const struct drm_crtc_helper_funcs *helper_private;
 
 	struct drm_object_properties properties;
 
 	struct drm_crtc_state *state;
 
 	/*
-	 * For legacy crtc ioctls so that atomic drivers can get at the locking
+	 * For legacy crtc IOCTLs so that atomic drivers can get at the locking
 	 * acquire context.
 	 */
 	struct drm_modeset_acquire_ctx *acquire_ctx;
@@ -499,54 +788,239 @@ struct drm_connector_state {
 
 /**
  * struct drm_connector_funcs - control connectors on a given device
- * @dpms: set power state
- * @save: save connector state
- * @restore: restore connector state
- * @reset: reset connector after state has been invalidated (e.g. resume)
- * @detect: is this connector active?
- * @fill_modes: fill mode list for this connector
- * @set_property: property for this connector may need an update
- * @destroy: make object go away
- * @force: notify the driver that the connector is forced on
- * @atomic_duplicate_state: duplicate the atomic state for this connector
- * @atomic_destroy_state: destroy an atomic state for this connector
- * @atomic_set_property: set a property on an atomic state for this connector
- *    (do not call directly, use drm_atomic_connector_set_property())
- * @atomic_get_property: get a property on an atomic state for this connector
- *    (do not call directly, use drm_atomic_connector_get_property())
  *
  * Each CRTC may have one or more connectors attached to it.  The functions
  * below allow the core DRM code to control connectors, enumerate available modes,
  * etc.
  */
 struct drm_connector_funcs {
+	/**
+	 * @dpms:
+	 *
+	 * Legacy entry point to set the per-connector DPMS state. Legacy DPMS
+	 * is exposed as a standard property on the connector, but diverted to
+	 * this callback in the drm core. Note that atomic drivers don't
+	 * implement the 4 level DPMS support on the connector any more, but
+	 * instead only have an on/off "ACTIVE" property on the CRTC object.
+	 *
+	 * Drivers implementing atomic modeset should use
+	 * drm_atomic_helper_connector_dpms() to implement this hook.
+	 *
+	 * RETURNS:
+	 *
+	 * 0 on success or a negative error code on failure.
+	 */
 	int (*dpms)(struct drm_connector *connector, int mode);
-	void (*save)(struct drm_connector *connector);
-	void (*restore)(struct drm_connector *connector);
+
+	/**
+	 * @reset:
+	 *
+	 * Reset connector hardware and software state to off. This function isn't
+	 * called by the core directly, only through drm_mode_config_reset().
+	 * It's not a helper hook only for historical reasons.
+	 *
+	 * Atomic drivers can use drm_atomic_helper_connector_reset() to reset
+	 * atomic state using this hook.
+	 */
 	void (*reset)(struct drm_connector *connector);
 
-	/* Check to see if anything is attached to the connector.
-	 * @force is set to false whilst polling, true when checking the
-	 * connector due to user request. @force can be used by the driver
-	 * to avoid expensive, destructive operations during automated
-	 * probing.
+	/**
+	 * @detect:
+	 *
+	 * Check to see if anything is attached to the connector. The parameter
+	 * force is set to false whilst polling, true when checking the
+	 * connector due to a user request. force can be used by the driver to
+	 * avoid expensive, destructive operations during automated probing.
+	 *
+	 * FIXME:
+	 *
+	 * Note that this hook is only called by the probe helper. It's not in
+	 * the helper library vtable purely for historical reasons. The only DRM
+	 * core	entry point to probe connector state is @fill_modes.
+	 *
+	 * RETURNS:
+	 *
+	 * drm_connector_status indicating the connector's status.
 	 */
 	enum drm_connector_status (*detect)(struct drm_connector *connector,
 					    bool force);
+
+	/**
+	 * @force:
+	 *
+	 * This function is called to update internal encoder state when the
+	 * connector is forced to a certain state by userspace, either through
+	 * the sysfs interfaces or on the kernel cmdline. In that case the
+	 * @detect callback isn't called.
+	 *
+	 * FIXME:
+	 *
+	 * Note that this hook is only called by the probe helper. It's not in
+	 * the helper library vtable purely for historical reasons. The only DRM
+	 * core	entry point to probe connector state is @fill_modes.
+	 */
+	void (*force)(struct drm_connector *connector);
+
+	/**
+	 * @fill_modes:
+	 *
+	 * Entry point for output detection and basic mode validation. The
+	 * driver should reprobe the output if needed (e.g. when hotplug
+	 * handling is unreliable), add all detected modes to connector->modes
+	 * and filter out any the device can't support in any configuration. It
+	 * also needs to filter out any modes wider or higher than the
+	 * parameters max_width and max_height indicate.
+	 *
+	 * The drivers must also prune any modes no longer valid from
+	 * connector->modes. Furthermore it must update connector->status and
+	 * connector->edid.  If no EDID has been received for this output
+	 * connector->edid must be NULL.
+	 *
+	 * Drivers using the probe helpers should use
+	 * drm_helper_probe_single_connector_modes() or
+	 * drm_helper_probe_single_connector_modes_nomerge() to implement this
+	 * function.
+	 *
+	 * RETURNS:
+	 *
+	 * The number of modes detected and filled into connector->modes.
+	 */
 	int (*fill_modes)(struct drm_connector *connector, uint32_t max_width, uint32_t max_height);
+
+	/**
+	 * @set_property:
+	 *
+	 * This is the legacy entry point to update a property attached to the
+	 * connector.
+	 *
+	 * Drivers implementing atomic modeset should use
+	 * drm_atomic_helper_connector_set_property() to implement this hook.
+	 *
+	 * This callback is optional if the driver does not support any legacy
+	 * driver-private properties.
+	 *
+	 * RETURNS:
+	 *
+	 * 0 on success or a negative error code on failure.
+	 */
 	int (*set_property)(struct drm_connector *connector, struct drm_property *property,
 			     uint64_t val);
+
+	/**
+	 * @destroy:
+	 *
+	 * Clean up connector resources. This is called at driver unload time
+	 * through drm_mode_config_cleanup(). It can also be called at runtime
+	 * when a connector is being hot-unplugged for drivers that support
+	 * connector hotplugging (e.g. DisplayPort MST).
+	 */
 	void (*destroy)(struct drm_connector *connector);
-	void (*force)(struct drm_connector *connector);
 
-	/* atomic update handling */
+	/**
+	 * @atomic_duplicate_state:
+	 *
+	 * Duplicate the current atomic state for this connector and return it.
+	 * The core and helpers gurantee that any atomic state duplicated with
+	 * this hook and still owned by the caller (i.e. not transferred to the
+	 * driver by calling ->atomic_commit() from struct
+	 * &drm_mode_config_funcs) will be cleaned up by calling the
+	 * @atomic_destroy_state hook in this structure.
+	 *
+	 * Atomic drivers which don't subclass struct &drm_connector_state should use
+	 * drm_atomic_helper_connector_duplicate_state(). Drivers that subclass the
+	 * state structure to extend it with driver-private state should use
+	 * __drm_atomic_helper_connector_duplicate_state() to make sure shared state is
+	 * duplicated in a consistent fashion across drivers.
+	 *
+	 * It is an error to call this hook before connector->state has been
+	 * initialized correctly.
+	 *
+	 * NOTE:
+	 *
+	 * If the duplicate state references refcounted resources this hook must
+	 * acquire a reference for each of them. The driver must release these
+	 * references again in @atomic_destroy_state.
+	 *
+	 * RETURNS:
+	 *
+	 * Duplicated atomic state or NULL when the allocation failed.
+	 */
 	struct drm_connector_state *(*atomic_duplicate_state)(struct drm_connector *connector);
+
+	/**
+	 * @atomic_destroy_state:
+	 *
+	 * Destroy a state duplicated with @atomic_duplicate_state and release
+	 * or unreference all resources it references
+	 */
 	void (*atomic_destroy_state)(struct drm_connector *connector,
 				     struct drm_connector_state *state);
+
+	/**
+	 * @atomic_set_property:
+	 *
+	 * Decode a driver-private property value and store the decoded value
+	 * into the passed-in state structure. Since the atomic core decodes all
+	 * standardized properties (even for extensions beyond the core set of
+	 * properties which might not be implemented by all drivers) this
+	 * requires drivers to subclass the state structure.
+	 *
+	 * Such driver-private properties should really only be implemented for
+	 * truly hardware/vendor specific state. Instead it is preferred to
+	 * standardize atomic extension and decode the properties used to expose
+	 * such an extension in the core.
+	 *
+	 * Do not call this function directly, use
+	 * drm_atomic_connector_set_property() instead.
+	 *
+	 * This callback is optional if the driver does not support any
+	 * driver-private atomic properties.
+	 *
+	 * NOTE:
+	 *
+	 * This function is called in the state assembly phase of atomic
+	 * modesets, which can be aborted for any reason (including on
+	 * userspace's request to just check whether a configuration would be
+	 * possible). Drivers MUST NOT touch any persistent state (hardware or
+	 * software) or data structures except the passed in @state parameter.
+	 *
+	 * Also since userspace controls in which order properties are set this
+	 * function must not do any input validation (since the state update is
+	 * incomplete and hence likely inconsistent). Instead any such input
+	 * validation must be done in the various atomic_check callbacks.
+	 *
+	 * RETURNS:
+	 *
+	 * 0 if the property has been found, -EINVAL if the property isn't
+	 * implemented by the driver (which shouldn't ever happen, the core only
+	 * asks for properties attached to this connector). No other validation
+	 * is allowed by the driver. The core already checks that the property
+	 * value is within the range (integer, valid enum value, ...) the driver
+	 * set when registering the property.
+	 */
 	int (*atomic_set_property)(struct drm_connector *connector,
 				   struct drm_connector_state *state,
 				   struct drm_property *property,
 				   uint64_t val);
+
+	/**
+	 * @atomic_get_property:
+	 *
+	 * Reads out the decoded driver-private property. This is used to
+	 * implement the GETCONNECTOR IOCTL.
+	 *
+	 * Do not call this function directly, use
+	 * drm_atomic_connector_get_property() instead.
+	 *
+	 * This callback is optional if the driver does not support any
+	 * driver-private atomic properties.
+	 *
+	 * RETURNS:
+	 *
+	 * 0 on success, -EINVAL if the property isn't implemented by the
+	 * driver (which shouldn't ever happen, the core only asks for
+	 * properties attached to this connector).
+	 */
 	int (*atomic_get_property)(struct drm_connector *connector,
 				   const struct drm_connector_state *state,
 				   struct drm_property *property,
@@ -555,13 +1029,26 @@ struct drm_connector_funcs {
 
 /**
  * struct drm_encoder_funcs - encoder controls
- * @reset: reset state (e.g. at init or resume time)
- * @destroy: cleanup and free associated data
  *
  * Encoders sit between CRTCs and connectors.
  */
 struct drm_encoder_funcs {
+	/**
+	 * @reset:
+	 *
+	 * Reset encoder hardware and software state to off. This function isn't
+	 * called by the core directly, only through drm_mode_config_reset().
+	 * It's not a helper hook only for historical reasons.
+	 */
 	void (*reset)(struct drm_encoder *encoder);
+
+	/**
+	 * @destroy:
+	 *
+	 * Clean up encoder resources. This is only called at driver unload time
+	 * through drm_mode_config_cleanup() since an encoder cannot be
+	 * hotplugged in DRM.
+	 */
 	void (*destroy)(struct drm_encoder *encoder);
 };
 
@@ -597,7 +1084,7 @@ struct drm_encoder {
 	struct drm_crtc *crtc;
 	struct drm_bridge *bridge;
 	const struct drm_encoder_funcs *funcs;
-	const void *helper_private;
+	const struct drm_encoder_helper_funcs *helper_private;
 };
 
 /* should we poll this connector for connects and disconnects */
@@ -702,7 +1189,7 @@ struct drm_connector {
 	/* requested DPMS state */
 	int dpms;
 
-	const void *helper_private;
+	const struct drm_connector_helper_funcs *helper_private;
 
 	/* forced on connector */
 	struct drm_cmdline_mode cmdline_mode;
@@ -782,40 +1269,203 @@ struct drm_plane_state {
 
 /**
  * struct drm_plane_funcs - driver plane control functions
- * @update_plane: update the plane configuration
- * @disable_plane: shut down the plane
- * @destroy: clean up plane resources
- * @reset: reset plane after state has been invalidated (e.g. resume)
- * @set_property: called when a property is changed
- * @atomic_duplicate_state: duplicate the atomic state for this plane
- * @atomic_destroy_state: destroy an atomic state for this plane
- * @atomic_set_property: set a property on an atomic state for this plane
- *    (do not call directly, use drm_atomic_plane_set_property())
- * @atomic_get_property: get a property on an atomic state for this plane
- *    (do not call directly, use drm_atomic_plane_get_property())
  */
 struct drm_plane_funcs {
+	/**
+	 * @update_plane:
+	 *
+	 * This is the legacy entry point to enable and configure the plane for
+	 * the given CRTC and framebuffer. It is never called to disable the
+	 * plane, i.e. the passed-in crtc and fb paramters are never NULL.
+	 *
+	 * The source rectangle in frame buffer memory coordinates is given by
+	 * the src_x, src_y, src_w and src_h parameters (as 16.16 fixed point
+	 * values). Devices that don't support subpixel plane coordinates can
+	 * ignore the fractional part.
+	 *
+	 * The destination rectangle in CRTC coordinates is given by the
+	 * crtc_x, crtc_y, crtc_w and crtc_h parameters (as integer values).
+	 * Devices scale the source rectangle to the destination rectangle. If
+	 * scaling is not supported, and the source rectangle size doesn't match
+	 * the destination rectangle size, the driver must return a
+	 * -<errorname>EINVAL</errorname> error.
+	 *
+	 * Drivers implementing atomic modeset should use
+	 * drm_atomic_helper_update_plane() to implement this hook.
+	 *
+	 * RETURNS:
+	 *
+	 * 0 on success or a negative error code on failure.
+	 */
 	int (*update_plane)(struct drm_plane *plane,
 			    struct drm_crtc *crtc, struct drm_framebuffer *fb,
 			    int crtc_x, int crtc_y,
 			    unsigned int crtc_w, unsigned int crtc_h,
 			    uint32_t src_x, uint32_t src_y,
 			    uint32_t src_w, uint32_t src_h);
+
+	/**
+	 * @disable_plane:
+	 *
+	 * This is the legacy entry point to disable the plane. The DRM core
+	 * calls this method in response to a DRM_IOCTL_MODE_SETPLANE IOCTL call
+	 * with the frame buffer ID set to 0.  Disabled planes must not be
+	 * processed by the CRTC.
+	 *
+	 * Drivers implementing atomic modeset should use
+	 * drm_atomic_helper_disable_plane() to implement this hook.
+	 *
+	 * RETURNS:
+	 *
+	 * 0 on success or a negative error code on failure.
+	 */
 	int (*disable_plane)(struct drm_plane *plane);
+
+	/**
+	 * @destroy:
+	 *
+	 * Clean up plane resources. This is only called at driver unload time
+	 * through drm_mode_config_cleanup() since a plane cannot be hotplugged
+	 * in DRM.
+	 */
 	void (*destroy)(struct drm_plane *plane);
+
+	/**
+	 * @reset:
+	 *
+	 * Reset plane hardware and software state to off. This function isn't
+	 * called by the core directly, only through drm_mode_config_reset().
+	 * It's not a helper hook only for historical reasons.
+	 *
+	 * Atomic drivers can use drm_atomic_helper_plane_reset() to reset
+	 * atomic state using this hook.
+	 */
 	void (*reset)(struct drm_plane *plane);
 
+	/**
+	 * @set_property:
+	 *
+	 * This is the legacy entry point to update a property attached to the
+	 * plane.
+	 *
+	 * Drivers implementing atomic modeset should use
+	 * drm_atomic_helper_plane_set_property() to implement this hook.
+	 *
+	 * This callback is optional if the driver does not support any legacy
+	 * driver-private properties.
+	 *
+	 * RETURNS:
+	 *
+	 * 0 on success or a negative error code on failure.
+	 */
 	int (*set_property)(struct drm_plane *plane,
 			    struct drm_property *property, uint64_t val);
 
-	/* atomic update handling */
+	/**
+	 * @atomic_duplicate_state:
+	 *
+	 * Duplicate the current atomic state for this plane and return it.
+	 * The core and helpers gurantee that any atomic state duplicated with
+	 * this hook and still owned by the caller (i.e. not transferred to the
+	 * driver by calling ->atomic_commit() from struct
+	 * &drm_mode_config_funcs) will be cleaned up by calling the
+	 * @atomic_destroy_state hook in this structure.
+	 *
+	 * Atomic drivers which don't subclass struct &drm_plane_state should use
+	 * drm_atomic_helper_plane_duplicate_state(). Drivers that subclass the
+	 * state structure to extend it with driver-private state should use
+	 * __drm_atomic_helper_plane_duplicate_state() to make sure shared state is
+	 * duplicated in a consistent fashion across drivers.
+	 *
+	 * It is an error to call this hook before plane->state has been
+	 * initialized correctly.
+	 *
+	 * NOTE:
+	 *
+	 * If the duplicate state references refcounted resources this hook must
+	 * acquire a reference for each of them. The driver must release these
+	 * references again in @atomic_destroy_state.
+	 *
+	 * RETURNS:
+	 *
+	 * Duplicated atomic state or NULL when the allocation failed.
+	 */
 	struct drm_plane_state *(*atomic_duplicate_state)(struct drm_plane *plane);
+
+	/**
+	 * @atomic_destroy_state:
+	 *
+	 * Destroy a state duplicated with @atomic_duplicate_state and release
+	 * or unreference all resources it references
+	 */
 	void (*atomic_destroy_state)(struct drm_plane *plane,
 				     struct drm_plane_state *state);
+
+	/**
+	 * @atomic_set_property:
+	 *
+	 * Decode a driver-private property value and store the decoded value
+	 * into the passed-in state structure. Since the atomic core decodes all
+	 * standardized properties (even for extensions beyond the core set of
+	 * properties which might not be implemented by all drivers) this
+	 * requires drivers to subclass the state structure.
+	 *
+	 * Such driver-private properties should really only be implemented for
+	 * truly hardware/vendor specific state. Instead it is preferred to
+	 * standardize atomic extension and decode the properties used to expose
+	 * such an extension in the core.
+	 *
+	 * Do not call this function directly, use
+	 * drm_atomic_plane_set_property() instead.
+	 *
+	 * This callback is optional if the driver does not support any
+	 * driver-private atomic properties.
+	 *
+	 * NOTE:
+	 *
+	 * This function is called in the state assembly phase of atomic
+	 * modesets, which can be aborted for any reason (including on
+	 * userspace's request to just check whether a configuration would be
+	 * possible). Drivers MUST NOT touch any persistent state (hardware or
+	 * software) or data structures except the passed in @state parameter.
+	 *
+	 * Also since userspace controls in which order properties are set this
+	 * function must not do any input validation (since the state update is
+	 * incomplete and hence likely inconsistent). Instead any such input
+	 * validation must be done in the various atomic_check callbacks.
+	 *
+	 * RETURNS:
+	 *
+	 * 0 if the property has been found, -EINVAL if the property isn't
+	 * implemented by the driver (which shouldn't ever happen, the core only
+	 * asks for properties attached to this plane). No other validation is
+	 * allowed by the driver. The core already checks that the property
+	 * value is within the range (integer, valid enum value, ...) the driver
+	 * set when registering the property.
+	 */
 	int (*atomic_set_property)(struct drm_plane *plane,
 				   struct drm_plane_state *state,
 				   struct drm_property *property,
 				   uint64_t val);
+
+	/**
+	 * @atomic_get_property:
+	 *
+	 * Reads out the decoded driver-private property. This is used to
+	 * implement the GETPLANE IOCTL.
+	 *
+	 * Do not call this function directly, use
+	 * drm_atomic_plane_get_property() instead.
+	 *
+	 * This callback is optional if the driver does not support any
+	 * driver-private atomic properties.
+	 *
+	 * RETURNS:
+	 *
+	 * 0 on success, -EINVAL if the property isn't implemented by the
+	 * driver (which should never happen, the core only asks for
+	 * properties attached to this plane).
+	 */
 	int (*atomic_get_property)(struct drm_plane *plane,
 				   const struct drm_plane_state *state,
 				   struct drm_property *property,
@@ -828,6 +1478,7 @@ enum drm_plane_type {
 	DRM_PLANE_TYPE_CURSOR,
 };
 
+
 /**
  * struct drm_plane - central DRM plane control structure
  * @dev: DRM device this plane belongs to
@@ -850,6 +1501,8 @@ struct drm_plane {
 	struct drm_device *dev;
 	struct list_head head;
 
+	char *name;
+
 	struct drm_modeset_lock mutex;
 
 	struct drm_mode_object base;
@@ -870,7 +1523,7 @@ struct drm_plane {
 
 	enum drm_plane_type type;
 
-	const void *helper_private;
+	const struct drm_plane_helper_funcs *helper_private;
 
 	struct drm_plane_state *state;
 };
@@ -878,24 +1531,114 @@ struct drm_plane {
 /**
  * struct drm_bridge_funcs - drm_bridge control functions
  * @attach: Called during drm_bridge_attach
- * @mode_fixup: Try to fixup (or reject entirely) proposed mode for this bridge
- * @disable: Called right before encoder prepare, disables the bridge
- * @post_disable: Called right after encoder prepare, for lockstepped disable
- * @mode_set: Set this mode to the bridge
- * @pre_enable: Called right before encoder commit, for lockstepped commit
- * @enable: Called right after encoder commit, enables the bridge
  */
 struct drm_bridge_funcs {
 	int (*attach)(struct drm_bridge *bridge);
+
+	/**
+	 * @mode_fixup:
+	 *
+	 * This callback is used to validate and adjust a mode. The paramater
+	 * mode is the display mode that should be fed to the next element in
+	 * the display chain, either the final &drm_connector or the next
+	 * &drm_bridge. The parameter adjusted_mode is the input mode the bridge
+	 * requires. It can be modified by this callback and does not need to
+	 * match mode.
+	 *
+	 * This is the only hook that allows a bridge to reject a modeset. If
+	 * this function passes all other callbacks must succeed for this
+	 * configuration.
+	 *
+	 * NOTE:
+	 *
+	 * This function is called in the check phase of atomic modesets, which
+	 * can be aborted for any reason (including on userspace's request to
+	 * just check whether a configuration would be possible). Drivers MUST
+	 * NOT touch any persistent state (hardware or software) or data
+	 * structures except the passed in @state parameter.
+	 *
+	 * RETURNS:
+	 *
+	 * True if an acceptable configuration is possible, false if the modeset
+	 * operation should be rejected.
+	 */
 	bool (*mode_fixup)(struct drm_bridge *bridge,
 			   const struct drm_display_mode *mode,
 			   struct drm_display_mode *adjusted_mode);
+	/**
+	 * @disable:
+	 *
+	 * This callback should disable the bridge. It is called right before
+	 * the preceding element in the display pipe is disabled. If the
+	 * preceding element is a bridge this means it's called before that
+	 * bridge's ->disable() function. If the preceding element is a
+	 * &drm_encoder it's called right before the encoder's ->disable(),
+	 * ->prepare() or ->dpms() hook from struct &drm_encoder_helper_funcs.
+	 *
+	 * The bridge can assume that the display pipe (i.e. clocks and timing
+	 * signals) feeding it is still running when this callback is called.
+	 */
 	void (*disable)(struct drm_bridge *bridge);
+
+	/**
+	 * @post_disable:
+	 *
+	 * This callback should disable the bridge. It is called right after
+	 * the preceding element in the display pipe is disabled. If the
+	 * preceding element is a bridge this means it's called after that
+	 * bridge's ->post_disable() function. If the preceding element is a
+	 * &drm_encoder it's called right after the encoder's ->disable(),
+	 * ->prepare() or ->dpms() hook from struct &drm_encoder_helper_funcs.
+	 *
+	 * The bridge must assume that the display pipe (i.e. clocks and timing
+	 * singals) feeding it is no longer running when this callback is
+	 * called.
+	 */
 	void (*post_disable)(struct drm_bridge *bridge);
+
+	/**
+	 * @mode_set:
+	 *
+	 * This callback should set the given mode on the bridge. It is called
+	 * after the ->mode_set() callback for the preceding element in the
+	 * display pipeline has been called already. The display pipe (i.e.
+	 * clocks and timing signals) is off when this function is called.
+	 */
 	void (*mode_set)(struct drm_bridge *bridge,
 			 struct drm_display_mode *mode,
 			 struct drm_display_mode *adjusted_mode);
+	/**
+	 * @pre_enable:
+	 *
+	 * This callback should enable the bridge. It is called right before
+	 * the preceding element in the display pipe is enabled. If the
+	 * preceding element is a bridge this means it's called before that
+	 * bridge's ->pre_enable() function. If the preceding element is a
+	 * &drm_encoder it's called right before the encoder's ->enable(),
+	 * ->commit() or ->dpms() hook from struct &drm_encoder_helper_funcs.
+	 *
+	 * The display pipe (i.e. clocks and timing signals) feeding this bridge
+	 * will not yet be running when this callback is called. The bridge must
+	 * not enable the display link feeding the next bridge in the chain (if
+	 * there is one) when this callback is called.
+	 */
 	void (*pre_enable)(struct drm_bridge *bridge);
+
+	/**
+	 * @enable:
+	 *
+	 * This callback should enable the bridge. It is called right after
+	 * the preceding element in the display pipe is enabled. If the
+	 * preceding element is a bridge this means it's called after that
+	 * bridge's ->enable() function. If the preceding element is a
+	 * &drm_encoder it's called right after the encoder's ->enable(),
+	 * ->commit() or ->dpms() hook from struct &drm_encoder_helper_funcs.
+	 *
+	 * The bridge can assume that the display pipe (i.e. clocks and timing
+	 * signals) feeding it is running when this callback is called. This
+	 * callback must enable the display link feeding the next bridge in the
+	 * chain if there is one.
+	 */
 	void (*enable)(struct drm_bridge *bridge);
 };
 
@@ -926,7 +1669,7 @@ struct drm_bridge {
  * struct drm_atomic_state - the global state object for atomic updates
  * @dev: parent DRM device
  * @allow_modeset: allow full modeset
- * @legacy_cursor_update: hint to enforce legacy cursor ioctl semantics
+ * @legacy_cursor_update: hint to enforce legacy cursor IOCTL semantics
  * @planes: pointer to array of plane pointers
  * @plane_states: pointer to array of plane states pointers
  * @crtcs: pointer to array of CRTC pointers
@@ -981,31 +1724,265 @@ struct drm_mode_set {
 
 /**
  * struct drm_mode_config_funcs - basic driver provided mode setting functions
- * @fb_create: create a new framebuffer object
- * @output_poll_changed: function to handle output configuration changes
- * @atomic_check: check whether a given atomic state update is possible
- * @atomic_commit: commit an atomic state update previously verified with
- * 	atomic_check()
- * @atomic_state_alloc: allocate a new atomic state
- * @atomic_state_clear: clear the atomic state
- * @atomic_state_free: free the atomic state
  *
  * Some global (i.e. not per-CRTC, connector, etc) mode setting functions that
  * involve drivers.
  */
 struct drm_mode_config_funcs {
+	/**
+	 * @fb_create:
+	 *
+	 * Create a new framebuffer object. The core does basic checks on the
+	 * requested metadata, but most of that is left to the driver. See
+	 * struct &drm_mode_fb_cmd2 for details.
+	 *
+	 * If the parameters are deemed valid and the backing storage objects in
+	 * the underlying memory manager all exist, then the driver allocates
+	 * a new &drm_framebuffer structure, subclassed to contain
+	 * driver-specific information (like the internal native buffer object
+	 * references). It also needs to fill out all relevant metadata, which
+	 * should be done by calling drm_helper_mode_fill_fb_struct().
+	 *
+	 * The initialization is finalized by calling drm_framebuffer_init(),
+	 * which registers the framebuffer and makes it accessible to other
+	 * threads.
+	 *
+	 * RETURNS:
+	 *
+	 * A new framebuffer with an initial reference count of 1 or a negative
+	 * error code encoded with ERR_PTR().
+	 */
 	struct drm_framebuffer *(*fb_create)(struct drm_device *dev,
 					     struct drm_file *file_priv,
 					     const struct drm_mode_fb_cmd2 *mode_cmd);
+
+	/**
+	 * @output_poll_changed:
+	 *
+	 * Callback used by helpers to inform the driver of output configuration
+	 * changes.
+	 *
+	 * Drivers implementing fbdev emulation with the helpers can call
+	 * drm_fb_helper_hotplug_changed from this hook to inform the fbdev
+	 * helper of output changes.
+	 *
+	 * FIXME:
+	 *
+	 * Except that there's no vtable for device-level helper callbacks
+	 * there's no reason this is a core function.
+	 */
 	void (*output_poll_changed)(struct drm_device *dev);
 
+	/**
+	 * @atomic_check:
+	 *
+	 * This is the only hook to validate an atomic modeset update. This
+	 * function must reject any modeset and state changes which the hardware
+	 * or driver doesn't support. This includes but is of course not limited
+	 * to:
+	 *
+	 *  - Checking that the modes, framebuffers, scaling and placement
+	 *    requirements and so on are within the limits of the hardware.
+	 *
+	 *  - Checking that any hidden shared resources are not oversubscribed.
+	 *    This can be shared PLLs, shared lanes, overall memory bandwidth,
+	 *    display fifo space (where shared between planes or maybe even
+	 *    CRTCs).
+	 *
+	 *  - Checking that virtualized resources exported to userspace are not
+	 *    oversubscribed. For various reasons it can make sense to expose
+	 *    more planes, crtcs or encoders than which are physically there. One
+	 *    example is dual-pipe operations (which generally should be hidden
+	 *    from userspace if when lockstepped in hardware, exposed otherwise),
+	 *    where a plane might need 1 hardware plane (if it's just on one
+	 *    pipe), 2 hardware planes (when it spans both pipes) or maybe even
+	 *    shared a hardware plane with a 2nd plane (if there's a compatible
+	 *    plane requested on the area handled by the other pipe).
+	 *
+	 *  - Check that any transitional state is possible and that if
+	 *    requested, the update can indeed be done in the vblank period
+	 *    without temporarily disabling some functions.
+	 *
+	 *  - Check any other constraints the driver or hardware might have.
+	 *
+	 *  - This callback also needs to correctly fill out the &drm_crtc_state
+	 *    in this update to make sure that drm_atomic_crtc_needs_modeset()
+	 *    reflects the nature of the possible update and returns true if and
+	 *    only if the update cannot be applied without tearing within one
+	 *    vblank on that CRTC. The core uses that information to reject
+	 *    updates which require a full modeset (i.e. blanking the screen, or
+	 *    at least pausing updates for a substantial amount of time) if
+	 *    userspace has disallowed that in its request.
+	 *
+	 *  - The driver also does not need to repeat basic input validation
+	 *    like done for the corresponding legacy entry points. The core does
+	 *    that before calling this hook.
+	 *
+	 * See the documentation of @atomic_commit for an exhaustive list of
+	 * error conditions which don't have to be checked at the
+	 * ->atomic_check() stage?
+	 *
+	 * See the documentation for struct &drm_atomic_state for how exactly
+	 * an atomic modeset update is described.
+	 *
+	 * Drivers using the atomic helpers can implement this hook using
+	 * drm_atomic_helper_check(), or one of the exported sub-functions of
+	 * it.
+	 *
+	 * RETURNS:
+	 *
+	 * 0 on success or one of the below negative error codes:
+	 *
+	 *  - -EINVAL, if any of the above constraints are violated.
+	 *
+	 *  - -EDEADLK, when returned from an attempt to acquire an additional
+	 *    &drm_modeset_lock through drm_modeset_lock().
+	 *
+	 *  - -ENOMEM, if allocating additional state sub-structures failed due
+	 *    to lack of memory.
+	 *
+	 *  - -EINTR, -EAGAIN or -ERESTARTSYS, if the IOCTL should be restarted.
+	 *    This can either be due to a pending signal, or because the driver
+	 *    needs to completely bail out to recover from an exceptional
+	 *    situation like a GPU hang. From a userspace point all errors are
+	 *    treated equally.
+	 */
 	int (*atomic_check)(struct drm_device *dev,
-			    struct drm_atomic_state *a);
+			    struct drm_atomic_state *state);
+
+	/**
+	 * @atomic_commit:
+	 *
+	 * This is the only hook to commit an atomic modeset update. The core
+	 * guarantees that @atomic_check has been called successfully before
+	 * calling this function, and that nothing has been changed in the
+	 * interim.
+	 *
+	 * See the documentation for struct &drm_atomic_state for how exactly
+	 * an atomic modeset update is described.
+	 *
+	 * Drivers using the atomic helpers can implement this hook using
+	 * drm_atomic_helper_commit(), or one of the exported sub-functions of
+	 * it.
+	 *
+	 * Asynchronous commits (as indicated with the async parameter) must
+	 * do any preparatory work which might result in an unsuccessful commit
+	 * in the context of this callback. The only exceptions are hardware
+	 * errors resulting in -EIO. But even in that case the driver must
+	 * ensure that the display pipe is at least running, to avoid
+	 * compositors crashing when pageflips don't work. Anything else,
+	 * specifically committing the update to the hardware, should be done
+	 * without blocking the caller. For updates which do not require a
+	 * modeset this must be guaranteed.
+	 *
+	 * The driver must wait for any pending rendering to the new
+	 * framebuffers to complete before executing the flip. It should also
+	 * wait for any pending rendering from other drivers if the underlying
+	 * buffer is a shared dma-buf. Asynchronous commits must not wait for
+	 * rendering in the context of this callback.
+	 *
+	 * An application can request to be notified when the atomic commit has
+	 * completed. These events are per-CRTC and can be distinguished by the
+	 * CRTC index supplied in &drm_event to userspace.
+	 *
+	 * The drm core will supply a struct &drm_event in the event
+	 * member of each CRTC's &drm_crtc_state structure. This can be handled by the
+	 * drm_crtc_send_vblank_event() function, which the driver should call on
+	 * the provided event upon completion of the atomic commit. Note that if
+	 * the driver supports vblank signalling and timestamping the vblank
+	 * counters and timestamps must agree with the ones returned from page
+	 * flip events. With the current vblank helper infrastructure this can
+	 * be achieved by holding a vblank reference while the page flip is
+	 * pending, acquired through drm_crtc_vblank_get() and released with
+	 * drm_crtc_vblank_put(). Drivers are free to implement their own vblank
+	 * counter and timestamp tracking though, e.g. if they have accurate
+	 * timestamp registers in hardware.
+	 *
+	 * NOTE:
+	 *
+	 * Drivers are not allowed to shut down any display pipe successfully
+	 * enabled through an atomic commit on their own. Doing so can result in
+	 * compositors crashing if a page flip is suddenly rejected because the
+	 * pipe is off.
+	 *
+	 * RETURNS:
+	 *
+	 * 0 on success or one of the below negative error codes:
+	 *
+	 *  - -EBUSY, if an asynchronous updated is requested and there is
+	 *    an earlier updated pending. Drivers are allowed to support a queue
+	 *    of outstanding updates, but currently no driver supports that.
+	 *    Note that drivers must wait for preceding updates to complete if a
+	 *    synchronous update is requested, they are not allowed to fail the
+	 *    commit in that case.
+	 *
+	 *  - -ENOMEM, if the driver failed to allocate memory. Specifically
+	 *    this can happen when trying to pin framebuffers, which must only
+	 *    be done when committing the state.
+	 *
+	 *  - -ENOSPC, as a refinement of the more generic -ENOMEM to indicate
+	 *    that the driver has run out of vram, iommu space or similar GPU
+	 *    address space needed for framebuffer.
+	 *
+	 *  - -EIO, if the hardware completely died.
+	 *
+	 *  - -EINTR, -EAGAIN or -ERESTARTSYS, if the IOCTL should be restarted.
+	 *    This can either be due to a pending signal, or because the driver
+	 *    needs to completely bail out to recover from an exceptional
+	 *    situation like a GPU hang. From a userspace point of view all errors are
+	 *    treated equally.
+	 *
+	 * This list is exhaustive. Specifically this hook is not allowed to
+	 * return -EINVAL (any invalid requests should be caught in
+	 * @atomic_check) or -EDEADLK (this function must not acquire
+	 * additional modeset locks).
+	 */
 	int (*atomic_commit)(struct drm_device *dev,
-			     struct drm_atomic_state *a,
+			     struct drm_atomic_state *state,
 			     bool async);
+
+	/**
+	 * @atomic_state_alloc:
+	 *
+	 * This optional hook can be used by drivers that want to subclass struct
+	 * &drm_atomic_state to be able to track their own driver-private global
+	 * state easily. If this hook is implemented, drivers must also
+	 * implement @atomic_state_clear and @atomic_state_free.
+	 *
+	 * RETURNS:
+	 *
+	 * A new &drm_atomic_state on success or NULL on failure.
+	 */
 	struct drm_atomic_state *(*atomic_state_alloc)(struct drm_device *dev);
+
+	/**
+	 * @atomic_state_clear:
+	 *
+	 * This hook must clear any driver private state duplicated into the
+	 * passed-in &drm_atomic_state. This hook is called when the caller
+	 * encountered a &drm_modeset_lock deadlock and needs to drop all
+	 * already acquired locks as part of the deadlock avoidance dance
+	 * implemented in drm_modeset_lock_backoff().
+	 *
+	 * Any duplicated state must be invalidated since a concurrent atomic
+	 * update might change it, and the drm atomic interfaces always apply
+	 * updates as relative changes to the current state.
+	 *
+	 * Drivers that implement this must call drm_atomic_state_default_clear()
+	 * to clear common state.
+	 */
 	void (*atomic_state_clear)(struct drm_atomic_state *state);
+
+	/**
+	 * @atomic_state_free:
+	 *
+	 * This hook needs driver private resources and the &drm_atomic_state
+	 * itself. Note that the core first calls drm_atomic_state_clear() to
+	 * avoid code duplicate between the clear and free hooks.
+	 *
+	 * Drivers that implement this must call drm_atomic_state_default_free()
+	 * to release common resources.
+	 */
 	void (*atomic_state_free)(struct drm_atomic_state *state);
 };
 
@@ -1014,7 +1991,7 @@ struct drm_mode_config_funcs {
  * @mutex: mutex protecting KMS related lists and structures
  * @connection_mutex: ww mutex protecting connector state and routing
  * @acquire_ctx: global implicit acquire context used by atomic drivers for
- * 	legacy ioctls
+ * 	legacy IOCTLs
  * @idr_mutex: mutex for KMS ID allocation and management
  * @crtc_idr: main KMS ID tracking object
  * @fb_lock: mutex to protect fb state and lists
@@ -1187,11 +2164,13 @@ struct drm_prop_enum_list {
 	char *name;
 };
 
-extern int drm_crtc_init_with_planes(struct drm_device *dev,
-				     struct drm_crtc *crtc,
-				     struct drm_plane *primary,
-				     struct drm_plane *cursor,
-				     const struct drm_crtc_funcs *funcs);
+extern __printf(6, 7)
+int drm_crtc_init_with_planes(struct drm_device *dev,
+			      struct drm_crtc *crtc,
+			      struct drm_plane *primary,
+			      struct drm_plane *cursor,
+			      const struct drm_crtc_funcs *funcs,
+			      const char *name, ...);
 extern void drm_crtc_cleanup(struct drm_crtc *crtc);
 extern unsigned int drm_crtc_index(struct drm_crtc *crtc);
 
@@ -1237,10 +2216,11 @@ void drm_bridge_mode_set(struct drm_bridge *bridge,
 void drm_bridge_pre_enable(struct drm_bridge *bridge);
 void drm_bridge_enable(struct drm_bridge *bridge);
 
-extern int drm_encoder_init(struct drm_device *dev,
-			    struct drm_encoder *encoder,
-			    const struct drm_encoder_funcs *funcs,
-			    int encoder_type);
+extern __printf(5, 6)
+int drm_encoder_init(struct drm_device *dev,
+		     struct drm_encoder *encoder,
+		     const struct drm_encoder_funcs *funcs,
+		     int encoder_type, const char *name, ...);
 
 /**
  * drm_encoder_crtc_ok - can a given crtc drive a given encoder?
@@ -1255,13 +2235,15 @@ static inline bool drm_encoder_crtc_ok(struct drm_encoder *encoder,
 	return !!(encoder->possible_crtcs & drm_crtc_mask(crtc));
 }
 
-extern int drm_universal_plane_init(struct drm_device *dev,
-				    struct drm_plane *plane,
-				    unsigned long possible_crtcs,
-				    const struct drm_plane_funcs *funcs,
-				    const uint32_t *formats,
-				    unsigned int format_count,
-				    enum drm_plane_type type);
+extern __printf(8, 9)
+int drm_universal_plane_init(struct drm_device *dev,
+			     struct drm_plane *plane,
+			     unsigned long possible_crtcs,
+			     const struct drm_plane_funcs *funcs,
+			     const uint32_t *formats,
+			     unsigned int format_count,
+			     enum drm_plane_type type,
+			     const char *name, ...);
 extern int drm_plane_init(struct drm_device *dev,
 			  struct drm_plane *plane,
 			  unsigned long possible_crtcs,
diff --git a/include/drm/drm_crtc_helper.h b/include/drm/drm_crtc_helper.h
index e22ab29d2d00..4b37afa2b73b 100644
--- a/include/drm/drm_crtc_helper.h
+++ b/include/drm/drm_crtc_helper.h
@@ -40,148 +40,7 @@
 #include <linux/fb.h>
 
 #include <drm/drm_crtc.h>
-
-enum mode_set_atomic {
-	LEAVE_ATOMIC_MODE_SET,
-	ENTER_ATOMIC_MODE_SET,
-};
-
-/**
- * struct drm_crtc_helper_funcs - helper operations for CRTCs
- * @dpms: set power state
- * @prepare: prepare the CRTC, called before @mode_set
- * @commit: commit changes to CRTC, called after @mode_set
- * @mode_fixup: try to fixup proposed mode for this CRTC
- * @mode_set: set this mode
- * @mode_set_nofb: set mode only (no scanout buffer attached)
- * @mode_set_base: update the scanout buffer
- * @mode_set_base_atomic: non-blocking mode set (used for kgdb support)
- * @load_lut: load color palette
- * @disable: disable CRTC when no longer in use
- * @enable: enable CRTC
- * @atomic_check: check for validity of an atomic state
- * @atomic_begin: begin atomic update
- * @atomic_flush: flush atomic update
- *
- * The helper operations are called by the mid-layer CRTC helper.
- *
- * Note that with atomic helpers @dpms, @prepare and @commit hooks are
- * deprecated. Used @enable and @disable instead exclusively.
- *
- * With legacy crtc helpers there's a big semantic difference between @disable
- * and the other hooks: @disable also needs to release any resources acquired in
- * @mode_set (like shared PLLs).
- */
-struct drm_crtc_helper_funcs {
-	/*
-	 * Control power levels on the CRTC.  If the mode passed in is
-	 * unsupported, the provider must use the next lowest power level.
-	 */
-	void (*dpms)(struct drm_crtc *crtc, int mode);
-	void (*prepare)(struct drm_crtc *crtc);
-	void (*commit)(struct drm_crtc *crtc);
-
-	/* Provider can fixup or change mode timings before modeset occurs */
-	bool (*mode_fixup)(struct drm_crtc *crtc,
-			   const struct drm_display_mode *mode,
-			   struct drm_display_mode *adjusted_mode);
-	/* Actually set the mode */
-	int (*mode_set)(struct drm_crtc *crtc, struct drm_display_mode *mode,
-			struct drm_display_mode *adjusted_mode, int x, int y,
-			struct drm_framebuffer *old_fb);
-	/* Actually set the mode for atomic helpers, optional */
-	void (*mode_set_nofb)(struct drm_crtc *crtc);
-
-	/* Move the crtc on the current fb to the given position *optional* */
-	int (*mode_set_base)(struct drm_crtc *crtc, int x, int y,
-			     struct drm_framebuffer *old_fb);
-	int (*mode_set_base_atomic)(struct drm_crtc *crtc,
-				    struct drm_framebuffer *fb, int x, int y,
-				    enum mode_set_atomic);
-
-	/* reload the current crtc LUT */
-	void (*load_lut)(struct drm_crtc *crtc);
-
-	void (*disable)(struct drm_crtc *crtc);
-	void (*enable)(struct drm_crtc *crtc);
-
-	/* atomic helpers */
-	int (*atomic_check)(struct drm_crtc *crtc,
-			    struct drm_crtc_state *state);
-	void (*atomic_begin)(struct drm_crtc *crtc,
-			     struct drm_crtc_state *old_crtc_state);
-	void (*atomic_flush)(struct drm_crtc *crtc,
-			     struct drm_crtc_state *old_crtc_state);
-};
-
-/**
- * struct drm_encoder_helper_funcs - helper operations for encoders
- * @dpms: set power state
- * @save: save connector state
- * @restore: restore connector state
- * @mode_fixup: try to fixup proposed mode for this connector
- * @prepare: part of the disable sequence, called before the CRTC modeset
- * @commit: called after the CRTC modeset
- * @mode_set: set this mode, optional for atomic helpers
- * @get_crtc: return CRTC that the encoder is currently attached to
- * @detect: connection status detection
- * @disable: disable encoder when not in use (overrides DPMS off)
- * @enable: enable encoder
- * @atomic_check: check for validity of an atomic update
- *
- * The helper operations are called by the mid-layer CRTC helper.
- *
- * Note that with atomic helpers @dpms, @prepare and @commit hooks are
- * deprecated. Used @enable and @disable instead exclusively.
- *
- * With legacy crtc helpers there's a big semantic difference between @disable
- * and the other hooks: @disable also needs to release any resources acquired in
- * @mode_set (like shared PLLs).
- */
-struct drm_encoder_helper_funcs {
-	void (*dpms)(struct drm_encoder *encoder, int mode);
-	void (*save)(struct drm_encoder *encoder);
-	void (*restore)(struct drm_encoder *encoder);
-
-	bool (*mode_fixup)(struct drm_encoder *encoder,
-			   const struct drm_display_mode *mode,
-			   struct drm_display_mode *adjusted_mode);
-	void (*prepare)(struct drm_encoder *encoder);
-	void (*commit)(struct drm_encoder *encoder);
-	void (*mode_set)(struct drm_encoder *encoder,
-			 struct drm_display_mode *mode,
-			 struct drm_display_mode *adjusted_mode);
-	struct drm_crtc *(*get_crtc)(struct drm_encoder *encoder);
-	/* detect for DAC style encoders */
-	enum drm_connector_status (*detect)(struct drm_encoder *encoder,
-					    struct drm_connector *connector);
-	void (*disable)(struct drm_encoder *encoder);
-
-	void (*enable)(struct drm_encoder *encoder);
-
-	/* atomic helpers */
-	int (*atomic_check)(struct drm_encoder *encoder,
-			    struct drm_crtc_state *crtc_state,
-			    struct drm_connector_state *conn_state);
-};
-
-/**
- * struct drm_connector_helper_funcs - helper operations for connectors
- * @get_modes: get mode list for this connector
- * @mode_valid: is this mode valid on the given connector? (optional)
- * @best_encoder: return the preferred encoder for this connector
- * @atomic_best_encoder: atomic version of @best_encoder
- *
- * The helper operations are called by the mid-layer CRTC helper.
- */
-struct drm_connector_helper_funcs {
-	int (*get_modes)(struct drm_connector *connector);
-	enum drm_mode_status (*mode_valid)(struct drm_connector *connector,
-					   struct drm_display_mode *mode);
-	struct drm_encoder *(*best_encoder)(struct drm_connector *connector);
-	struct drm_encoder *(*atomic_best_encoder)(struct drm_connector *connector,
-						   struct drm_connector_state *connector_state);
-};
+#include <drm/drm_modeset_helper_vtables.h>
 
 extern void drm_helper_disable_unused_functions(struct drm_device *dev);
 extern int drm_crtc_helper_set_config(struct drm_mode_set *set);
@@ -199,24 +58,6 @@ extern void drm_helper_move_panel_connectors_to_head(struct drm_device *);
 extern void drm_helper_mode_fill_fb_struct(struct drm_framebuffer *fb,
 					   const struct drm_mode_fb_cmd2 *mode_cmd);
 
-static inline void drm_crtc_helper_add(struct drm_crtc *crtc,
-				       const struct drm_crtc_helper_funcs *funcs)
-{
-	crtc->helper_private = funcs;
-}
-
-static inline void drm_encoder_helper_add(struct drm_encoder *encoder,
-					  const struct drm_encoder_helper_funcs *funcs)
-{
-	encoder->helper_private = funcs;
-}
-
-static inline void drm_connector_helper_add(struct drm_connector *connector,
-					    const struct drm_connector_helper_funcs *funcs)
-{
-	connector->helper_private = funcs;
-}
-
 extern void drm_helper_resume_force_mode(struct drm_device *dev);
 
 int drm_helper_crtc_mode_set(struct drm_crtc *crtc, struct drm_display_mode *mode,
@@ -229,10 +70,6 @@ int drm_helper_crtc_mode_set_base(struct drm_crtc *crtc, int x, int y,
 extern int drm_helper_probe_single_connector_modes(struct drm_connector
 						   *connector, uint32_t maxX,
 						   uint32_t maxY);
-extern int drm_helper_probe_single_connector_modes_nomerge(struct drm_connector
-							   *connector,
-							   uint32_t maxX,
-							   uint32_t maxY);
 extern void drm_kms_helper_poll_init(struct drm_device *dev);
 extern void drm_kms_helper_poll_fini(struct drm_device *dev);
 extern bool drm_helper_hpd_irq_event(struct drm_device *dev);
diff --git a/include/drm/drm_encoder_slave.h b/include/drm/drm_encoder_slave.h
index 8b9cc3671858..82cdf611393d 100644
--- a/include/drm/drm_encoder_slave.h
+++ b/include/drm/drm_encoder_slave.h
@@ -95,7 +95,7 @@ struct drm_encoder_slave_funcs {
 struct drm_encoder_slave {
 	struct drm_encoder base;
 
-	struct drm_encoder_slave_funcs *slave_funcs;
+	const struct drm_encoder_slave_funcs *slave_funcs;
 	void *slave_priv;
 	void *bus_priv;
 };
diff --git a/include/drm/drm_fb_helper.h b/include/drm/drm_fb_helper.h
index 87b090c4b730..d8a40dff0d1d 100644
--- a/include/drm/drm_fb_helper.h
+++ b/include/drm/drm_fb_helper.h
@@ -34,6 +34,11 @@ struct drm_fb_helper;
 
 #include <linux/kgdb.h>
 
+enum mode_set_atomic {
+	LEAVE_ATOMIC_MODE_SET,
+	ENTER_ATOMIC_MODE_SET,
+};
+
 struct drm_fb_offset {
 	int x, y;
 };
@@ -74,25 +79,76 @@ struct drm_fb_helper_surface_size {
 
 /**
  * struct drm_fb_helper_funcs - driver callbacks for the fbdev emulation library
- * @gamma_set: Set the given gamma lut register on the given crtc.
- * @gamma_get: Read the given gamma lut register on the given crtc, used to
- *             save the current lut when force-restoring the fbdev for e.g.
- *             kdbg.
- * @fb_probe: Driver callback to allocate and initialize the fbdev info
- *            structure. Furthermore it also needs to allocate the drm
- *            framebuffer used to back the fbdev.
- * @initial_config: Setup an initial fbdev display configuration
  *
  * Driver callbacks used by the fbdev emulation helper library.
  */
 struct drm_fb_helper_funcs {
+	/**
+	 * @gamma_set:
+	 *
+	 * Set the given gamma LUT register on the given CRTC.
+	 *
+	 * This callback is optional.
+	 *
+	 * FIXME:
+	 *
+	 * This callback is functionally redundant with the core gamma table
+	 * support and simply exists because the fbdev hasn't yet been
+	 * refactored to use the core gamma table interfaces.
+	 */
 	void (*gamma_set)(struct drm_crtc *crtc, u16 red, u16 green,
 			  u16 blue, int regno);
+	/**
+	 * @gamma_get:
+	 *
+	 * Read the given gamma LUT register on the given CRTC, used to save the
+	 * current LUT when force-restoring the fbdev for e.g. kdbg.
+	 *
+	 * This callback is optional.
+	 *
+	 * FIXME:
+	 *
+	 * This callback is functionally redundant with the core gamma table
+	 * support and simply exists because the fbdev hasn't yet been
+	 * refactored to use the core gamma table interfaces.
+	 */
 	void (*gamma_get)(struct drm_crtc *crtc, u16 *red, u16 *green,
 			  u16 *blue, int regno);
 
+	/**
+	 * @fb_probe:
+	 *
+	 * Driver callback to allocate and initialize the fbdev info structure.
+	 * Furthermore it also needs to allocate the DRM framebuffer used to
+	 * back the fbdev.
+	 *
+	 * This callback is mandatory.
+	 *
+	 * RETURNS:
+	 *
+	 * The driver should return 0 on success and a negative error code on
+	 * failure.
+	 */
 	int (*fb_probe)(struct drm_fb_helper *helper,
 			struct drm_fb_helper_surface_size *sizes);
+
+	/**
+	 * @initial_config:
+	 *
+	 * Driver callback to setup an initial fbdev display configuration.
+	 * Drivers can use this callback to tell the fbdev emulation what the
+	 * preferred initial configuration is. This is useful to implement
+	 * smooth booting where the fbdev (and subsequently all userspace) never
+	 * changes the mode, but always inherits the existing configuration.
+	 *
+	 * This callback is optional.
+	 *
+	 * RETURNS:
+	 *
+	 * The driver should return true if a suitable initial configuration has
+	 * been filled out and false when the fbdev helper should fall back to
+	 * the default probing logic.
+	 */
 	bool (*initial_config)(struct drm_fb_helper *fb_helper,
 			       struct drm_fb_helper_crtc **crtcs,
 			       struct drm_display_mode **modes,
@@ -105,18 +161,22 @@ struct drm_fb_helper_connector {
 };
 
 /**
- * struct drm_fb_helper - helper to emulate fbdev on top of kms
- * @fb:  Scanout framebuffer object
- * @dev:  DRM device
+ * struct drm_fb_helper - main structure to emulate fbdev on top of KMS
+ * @fb: Scanout framebuffer object
+ * @dev: DRM device
  * @crtc_count: number of possible CRTCs
  * @crtc_info: per-CRTC helper state (mode, x/y offset, etc)
  * @connector_count: number of connected connectors
  * @connector_info_alloc_count: size of connector_info
+ * @connector_info: array of per-connector information
  * @funcs: driver callbacks for fb helper
  * @fbdev: emulated fbdev device info struct
  * @pseudo_palette: fake palette of 16 colors
- * @kernel_fb_list: list_head in kernel_fb_helper_list
- * @delayed_hotplug: was there a hotplug while kms master active?
+ *
+ * This is the main structure used by the fbdev helpers. Drivers supporting
+ * fbdev emulation should embedded this into their overall driver structure.
+ * Drivers must also fill out a struct &drm_fb_helper_funcs with a few
+ * operations.
  */
 struct drm_fb_helper {
 	struct drm_framebuffer *fb;
@@ -129,10 +189,21 @@ struct drm_fb_helper {
 	const struct drm_fb_helper_funcs *funcs;
 	struct fb_info *fbdev;
 	u32 pseudo_palette[17];
+
+	/**
+	 * @kernel_fb_list:
+	 *
+	 * Entry on the global kernel_fb_helper_list, used for kgdb entry/exit.
+	 */
 	struct list_head kernel_fb_list;
 
-	/* we got a hotplug but fbdev wasn't running the console
-	   delay until next set_par */
+	/**
+	 * @delayed_hotplug:
+	 *
+	 * A hotplug was received while fbdev wasn't in control of the DRM
+	 * device, i.e. another KMS master was active. The output configuration
+	 * needs to be reprobe when fbdev is in control again.
+	 */
 	bool delayed_hotplug;
 
 	/**
diff --git a/include/drm/drm_mipi_dsi.h b/include/drm/drm_mipi_dsi.h
index f1d8d0dbb4f1..1b3b1f8c8cdf 100644
--- a/include/drm/drm_mipi_dsi.h
+++ b/include/drm/drm_mipi_dsi.h
@@ -163,9 +163,36 @@ static inline struct mipi_dsi_device *to_mipi_dsi_device(struct device *dev)
 	return container_of(dev, struct mipi_dsi_device, dev);
 }
 
+/**
+ * mipi_dsi_pixel_format_to_bpp - obtain the number of bits per pixel for any
+ *                                given pixel format defined by the MIPI DSI
+ *                                specification
+ * @fmt: MIPI DSI pixel format
+ *
+ * Returns: The number of bits per pixel of the given pixel format.
+ */
+static inline int mipi_dsi_pixel_format_to_bpp(enum mipi_dsi_pixel_format fmt)
+{
+	switch (fmt) {
+	case MIPI_DSI_FMT_RGB888:
+	case MIPI_DSI_FMT_RGB666:
+		return 24;
+
+	case MIPI_DSI_FMT_RGB666_PACKED:
+		return 18;
+
+	case MIPI_DSI_FMT_RGB565:
+		return 16;
+	}
+
+	return -EINVAL;
+}
+
 struct mipi_dsi_device *of_find_mipi_dsi_device_by_node(struct device_node *np);
 int mipi_dsi_attach(struct mipi_dsi_device *dsi);
 int mipi_dsi_detach(struct mipi_dsi_device *dsi);
+int mipi_dsi_shutdown_peripheral(struct mipi_dsi_device *dsi);
+int mipi_dsi_turn_on_peripheral(struct mipi_dsi_device *dsi);
 int mipi_dsi_set_maximum_return_packet_size(struct mipi_dsi_device *dsi,
 					    u16 value);
 
diff --git a/include/drm/drm_modes.h b/include/drm/drm_modes.h
index 08a8cac9e555..625966a906f2 100644
--- a/include/drm/drm_modes.h
+++ b/include/drm/drm_modes.h
@@ -35,46 +35,91 @@
  * structures).
  */
 
+/**
+ * enum drm_mode_status - hardware support status of a mode
+ * @MODE_OK: Mode OK
+ * @MODE_HSYNC: hsync out of range
+ * @MODE_VSYNC: vsync out of range
+ * @MODE_H_ILLEGAL: mode has illegal horizontal timings
+ * @MODE_V_ILLEGAL: mode has illegal horizontal timings
+ * @MODE_BAD_WIDTH: requires an unsupported linepitch
+ * @MODE_NOMODE: no mode with a matching name
+ * @MODE_NO_INTERLACE: interlaced mode not supported
+ * @MODE_NO_DBLESCAN: doublescan mode not supported
+ * @MODE_NO_VSCAN: multiscan mode not supported
+ * @MODE_MEM: insufficient video memory
+ * @MODE_VIRTUAL_X: mode width too large for specified virtual size
+ * @MODE_VIRTUAL_Y: mode height too large for specified virtual size
+ * @MODE_MEM_VIRT: insufficient video memory given virtual size
+ * @MODE_NOCLOCK: no fixed clock available
+ * @MODE_CLOCK_HIGH: clock required is too high
+ * @MODE_CLOCK_LOW: clock required is too low
+ * @MODE_CLOCK_RANGE: clock/mode isn't in a ClockRange
+ * @MODE_BAD_HVALUE: horizontal timing was out of range
+ * @MODE_BAD_VVALUE: vertical timing was out of range
+ * @MODE_BAD_VSCAN: VScan value out of range
+ * @MODE_HSYNC_NARROW: horizontal sync too narrow
+ * @MODE_HSYNC_WIDE: horizontal sync too wide
+ * @MODE_HBLANK_NARROW: horizontal blanking too narrow
+ * @MODE_HBLANK_WIDE: horizontal blanking too wide
+ * @MODE_VSYNC_NARROW: vertical sync too narrow
+ * @MODE_VSYNC_WIDE: vertical sync too wide
+ * @MODE_VBLANK_NARROW: vertical blanking too narrow
+ * @MODE_VBLANK_WIDE: vertical blanking too wide
+ * @MODE_PANEL: exceeds panel dimensions
+ * @MODE_INTERLACE_WIDTH: width too large for interlaced mode
+ * @MODE_ONE_WIDTH: only one width is supported
+ * @MODE_ONE_HEIGHT: only one height is supported
+ * @MODE_ONE_SIZE: only one resolution is supported
+ * @MODE_NO_REDUCED: monitor doesn't accept reduced blanking
+ * @MODE_NO_STEREO: stereo modes not supported
+ * @MODE_STALE: mode has become stale
+ * @MODE_BAD: unspecified reason
+ * @MODE_ERROR: error condition
+ *
+ * This enum is used to filter out modes not supported by the driver/hardware
+ * combination.
+ */
 enum drm_mode_status {
-    MODE_OK	= 0,	/* Mode OK */
-    MODE_HSYNC,		/* hsync out of range */
-    MODE_VSYNC,		/* vsync out of range */
-    MODE_H_ILLEGAL,	/* mode has illegal horizontal timings */
-    MODE_V_ILLEGAL,	/* mode has illegal horizontal timings */
-    MODE_BAD_WIDTH,	/* requires an unsupported linepitch */
-    MODE_NOMODE,	/* no mode with a matching name */
-    MODE_NO_INTERLACE,	/* interlaced mode not supported */
-    MODE_NO_DBLESCAN,	/* doublescan mode not supported */
-    MODE_NO_VSCAN,	/* multiscan mode not supported */
-    MODE_MEM,		/* insufficient video memory */
-    MODE_VIRTUAL_X,	/* mode width too large for specified virtual size */
-    MODE_VIRTUAL_Y,	/* mode height too large for specified virtual size */
-    MODE_MEM_VIRT,	/* insufficient video memory given virtual size */
-    MODE_NOCLOCK,	/* no fixed clock available */
-    MODE_CLOCK_HIGH,	/* clock required is too high */
-    MODE_CLOCK_LOW,	/* clock required is too low */
-    MODE_CLOCK_RANGE,	/* clock/mode isn't in a ClockRange */
-    MODE_BAD_HVALUE,	/* horizontal timing was out of range */
-    MODE_BAD_VVALUE,	/* vertical timing was out of range */
-    MODE_BAD_VSCAN,	/* VScan value out of range */
-    MODE_HSYNC_NARROW,	/* horizontal sync too narrow */
-    MODE_HSYNC_WIDE,	/* horizontal sync too wide */
-    MODE_HBLANK_NARROW,	/* horizontal blanking too narrow */
-    MODE_HBLANK_WIDE,	/* horizontal blanking too wide */
-    MODE_VSYNC_NARROW,	/* vertical sync too narrow */
-    MODE_VSYNC_WIDE,	/* vertical sync too wide */
-    MODE_VBLANK_NARROW,	/* vertical blanking too narrow */
-    MODE_VBLANK_WIDE,	/* vertical blanking too wide */
-    MODE_PANEL,         /* exceeds panel dimensions */
-    MODE_INTERLACE_WIDTH, /* width too large for interlaced mode */
-    MODE_ONE_WIDTH,     /* only one width is supported */
-    MODE_ONE_HEIGHT,    /* only one height is supported */
-    MODE_ONE_SIZE,      /* only one resolution is supported */
-    MODE_NO_REDUCED,    /* monitor doesn't accept reduced blanking */
-    MODE_NO_STEREO,	/* stereo modes not supported */
-    MODE_UNVERIFIED = -3, /* mode needs to reverified */
-    MODE_BAD = -2,	/* unspecified reason */
-    MODE_ERROR	= -1	/* error condition */
+	MODE_OK = 0,
+	MODE_HSYNC,
+	MODE_VSYNC,
+	MODE_H_ILLEGAL,
+	MODE_V_ILLEGAL,
+	MODE_BAD_WIDTH,
+	MODE_NOMODE,
+	MODE_NO_INTERLACE,
+	MODE_NO_DBLESCAN,
+	MODE_NO_VSCAN,
+	MODE_MEM,
+	MODE_VIRTUAL_X,
+	MODE_VIRTUAL_Y,
+	MODE_MEM_VIRT,
+	MODE_NOCLOCK,
+	MODE_CLOCK_HIGH,
+	MODE_CLOCK_LOW,
+	MODE_CLOCK_RANGE,
+	MODE_BAD_HVALUE,
+	MODE_BAD_VVALUE,
+	MODE_BAD_VSCAN,
+	MODE_HSYNC_NARROW,
+	MODE_HSYNC_WIDE,
+	MODE_HBLANK_NARROW,
+	MODE_HBLANK_WIDE,
+	MODE_VSYNC_NARROW,
+	MODE_VSYNC_WIDE,
+	MODE_VBLANK_NARROW,
+	MODE_VBLANK_WIDE,
+	MODE_PANEL,
+	MODE_INTERLACE_WIDTH,
+	MODE_ONE_WIDTH,
+	MODE_ONE_HEIGHT,
+	MODE_ONE_SIZE,
+	MODE_NO_REDUCED,
+	MODE_NO_STEREO,
+	MODE_STALE = -3,
+	MODE_BAD = -2,
+	MODE_ERROR = -1
 };
 
 #define DRM_MODE_TYPE_CLOCK_CRTC_C (DRM_MODE_TYPE_CLOCK_C | \
@@ -96,17 +141,125 @@ enum drm_mode_status {
 
 #define DRM_MODE_FLAG_3D_MAX	DRM_MODE_FLAG_3D_SIDE_BY_SIDE_HALF
 
+/**
+ * struct drm_display_mode - DRM kernel-internal display mode structure
+ * @hdisplay: horizontal display size
+ * @hsync_start: horizontal sync start
+ * @hsync_end: horizontal sync end
+ * @htotal: horizontal total size
+ * @hskew: horizontal skew?!
+ * @vdisplay: vertical display size
+ * @vsync_start: vertical sync start
+ * @vsync_end: vertical sync end
+ * @vtotal: vertical total size
+ * @vscan: vertical scan?!
+ * @crtc_hdisplay: hardware mode horizontal display size
+ * @crtc_hblank_start: hardware mode horizontal blank start
+ * @crtc_hblank_end: hardware mode horizontal blank end
+ * @crtc_hsync_start: hardware mode horizontal sync start
+ * @crtc_hsync_end: hardware mode horizontal sync end
+ * @crtc_htotal: hardware mode horizontal total size
+ * @crtc_hskew: hardware mode horizontal skew?!
+ * @crtc_vdisplay: hardware mode vertical display size
+ * @crtc_vblank_start: hardware mode vertical blank start
+ * @crtc_vblank_end: hardware mode vertical blank end
+ * @crtc_vsync_start: hardware mode vertical sync start
+ * @crtc_vsync_end: hardware mode vertical sync end
+ * @crtc_vtotal: hardware mode vertical total size
+ *
+ * The horizontal and vertical timings are defined per the following diagram.
+ *
+ *
+ *               Active                 Front           Sync           Back
+ *              Region                 Porch                          Porch
+ *     <-----------------------><----------------><-------------><-------------->
+ *       //////////////////////|
+ *      ////////////////////// |
+ *     //////////////////////  |..................               ................
+ *                                                _______________
+ *     <----- [hv]display ----->
+ *     <------------- [hv]sync_start ------------>
+ *     <--------------------- [hv]sync_end --------------------->
+ *     <-------------------------------- [hv]total ----------------------------->*
+ *
+ * This structure contains two copies of timings. First are the plain timings,
+ * which specify the logical mode, as it would be for a progressive 1:1 scanout
+ * at the refresh rate userspace can observe through vblank timestamps. Then
+ * there's the hardware timings, which are corrected for interlacing,
+ * double-clocking and similar things. They are provided as a convenience, and
+ * can be appropriately computed using drm_mode_set_crtcinfo().
+ */
 struct drm_display_mode {
-	/* Header */
+	/**
+	 * @head:
+	 *
+	 * struct list_head for mode lists.
+	 */
 	struct list_head head;
+
+	/**
+	 * @base:
+	 *
+	 * A display mode is a normal modeset object, possibly including public
+	 * userspace id.
+	 *
+	 * FIXME:
+	 *
+	 * This can probably be removed since the entire concept of userspace
+	 * managing modes explicitly has never landed in upstream kernel mode
+	 * setting support.
+	 */
 	struct drm_mode_object base;
 
+	/**
+	 * @name:
+	 *
+	 * Human-readable name of the mode, filled out with drm_mode_set_name().
+	 */
 	char name[DRM_DISPLAY_MODE_LEN];
 
+	/**
+	 * @status:
+	 *
+	 * Status of the mode, used to filter out modes not supported by the
+	 * hardware. See enum &drm_mode_status.
+	 */
 	enum drm_mode_status status;
+
+	/**
+	 * @type:
+	 *
+	 * A bitmask of flags, mostly about the source of a mode. Possible flags
+	 * are:
+	 *
+	 *  - DRM_MODE_TYPE_BUILTIN: Meant for hard-coded modes, effectively
+	 *    unused.
+	 *  - DRM_MODE_TYPE_PREFERRED: Preferred mode, usually the native
+	 *    resolution of an LCD panel. There should only be one preferred
+	 *    mode per connector at any given time.
+	 *  - DRM_MODE_TYPE_DRIVER: Mode created by the driver, which is all of
+	 *    them really. Drivers must set this bit for all modes they create
+	 *    and expose to userspace.
+	 *
+	 * Plus a big list of flags which shouldn't be used at all, but are
+	 * still around since these flags are also used in the userspace ABI:
+	 *
+	 *  - DRM_MODE_TYPE_DEFAULT: Again a leftover, use
+	 *    DRM_MODE_TYPE_PREFERRED instead.
+	 *  - DRM_MODE_TYPE_CLOCK_C and DRM_MODE_TYPE_CRTC_C: Define leftovers
+	 *    which are stuck around for hysterical raisins only. No one has an
+	 *    idea what they were meant for. Don't use.
+	 *  - DRM_MODE_TYPE_USERDEF: Mode defined by userspace, again a vestige
+	 *    from older kms designs where userspace had to first add a custom
+	 *    mode to the kernel's mode list before it could use it. Don't use.
+	 */
 	unsigned int type;
 
-	/* Proposed mode values */
+	/**
+	 * @clock:
+	 *
+	 * Pixel clock in kHz.
+	 */
 	int clock;		/* in kHz */
 	int hdisplay;
 	int hsync_start;
@@ -118,14 +271,74 @@ struct drm_display_mode {
 	int vsync_end;
 	int vtotal;
 	int vscan;
+	/**
+	 * @flags:
+	 *
+	 * Sync and timing flags:
+	 *
+	 *  - DRM_MODE_FLAG_PHSYNC: horizontal sync is active high.
+	 *  - DRM_MODE_FLAG_NHSYNC: horizontal sync is active low.
+	 *  - DRM_MODE_FLAG_PVSYNC: vertical sync is active high.
+	 *  - DRM_MODE_FLAG_NVSYNC: vertical sync is active low.
+	 *  - DRM_MODE_FLAG_INTERLACE: mode is interlaced.
+	 *  - DRM_MODE_FLAG_DBLSCAN: mode uses doublescan.
+	 *  - DRM_MODE_FLAG_CSYNC: mode uses composite sync.
+	 *  - DRM_MODE_FLAG_PCSYNC: composite sync is active high.
+	 *  - DRM_MODE_FLAG_NCSYNC: composite sync is active low.
+	 *  - DRM_MODE_FLAG_HSKEW: hskew provided (not used?).
+	 *  - DRM_MODE_FLAG_BCAST: not used?
+	 *  - DRM_MODE_FLAG_PIXMUX: not used?
+	 *  - DRM_MODE_FLAG_DBLCLK: double-clocked mode.
+	 *  - DRM_MODE_FLAG_CLKDIV2: half-clocked mode.
+	 *
+	 * Additionally there's flags to specify how 3D modes are packed:
+	 *
+	 *  - DRM_MODE_FLAG_3D_NONE: normal, non-3D mode.
+	 *  - DRM_MODE_FLAG_3D_FRAME_PACKING: 2 full frames for left and right.
+	 *  - DRM_MODE_FLAG_3D_FIELD_ALTERNATIVE: interleaved like fields.
+	 *  - DRM_MODE_FLAG_3D_LINE_ALTERNATIVE: interleaved lines.
+	 *  - DRM_MODE_FLAG_3D_SIDE_BY_SIDE_FULL: side-by-side full frames.
+	 *  - DRM_MODE_FLAG_3D_L_DEPTH: ?
+	 *  - DRM_MODE_FLAG_3D_L_DEPTH_GFX_GFX_DEPTH: ?
+	 *  - DRM_MODE_FLAG_3D_TOP_AND_BOTTOM: frame split into top and bottom
+	 *    parts.
+	 *  - DRM_MODE_FLAG_3D_SIDE_BY_SIDE_HALF: frame split into left and
+	 *    right parts.
+	 */
 	unsigned int flags;
 
-	/* Addressable image size (may be 0 for projectors, etc.) */
+	/**
+	 * @width_mm:
+	 *
+	 * Addressable size of the output in mm, projectors should set this to
+	 * 0.
+	 */
 	int width_mm;
+
+	/**
+	 * @height_mm:
+	 *
+	 * Addressable size of the output in mm, projectors should set this to
+	 * 0.
+	 */
 	int height_mm;
 
-	/* Actual mode we give to hw */
-	int crtc_clock;		/* in KHz */
+	/**
+	 * @crtc_clock:
+	 *
+	 * Actual pixel or dot clock in the hardware. This differs from the
+	 * logical @clock when e.g. using interlacing, double-clocking, stereo
+	 * modes or other fancy stuff that changes the timings and signals
+	 * actually sent over the wire.
+	 *
+	 * This is again in kHz.
+	 *
+	 * Note that with digital outputs like HDMI or DP there's usually a
+	 * massive confusion between the dot clock and the signal clock at the
+	 * bit encoding level. Especially when a 8b/10b encoding is used and the
+	 * difference is exactly a factor of 10.
+	 */
+	int crtc_clock;
 	int crtc_hdisplay;
 	int crtc_hblank_start;
 	int crtc_hblank_end;
@@ -140,12 +353,48 @@ struct drm_display_mode {
 	int crtc_vsync_end;
 	int crtc_vtotal;
 
-	/* Driver private mode info */
+	/**
+	 * @private:
+	 *
+	 * Pointer for driver private data. This can only be used for mode
+	 * objects passed to drivers in modeset operations. It shouldn't be used
+	 * by atomic drivers since they can store any additional data by
+	 * subclassing state structures.
+	 */
 	int *private;
+
+	/**
+	 * @private_flags:
+	 *
+	 * Similar to @private, but just an integer.
+	 */
 	int private_flags;
 
-	int vrefresh;		/* in Hz */
-	int hsync;		/* in kHz */
+	/**
+	 * @vrefresh:
+	 *
+	 * Vertical refresh rate, for debug output in human readable form. Not
+	 * used in a functional way.
+	 *
+	 * This value is in Hz.
+	 */
+	int vrefresh;
+
+	/**
+	 * @hsync:
+	 *
+	 * Horizontal refresh rate, for debug output in human readable form. Not
+	 * used in a functional way.
+	 *
+	 * This value is in kHz.
+	 */
+	int hsync;
+
+	/**
+	 * @picture_aspect_ratio:
+	 *
+	 * Field for setting the HDMI picture aspect ratio of a mode.
+	 */
 	enum hdmi_picture_aspect picture_aspect_ratio;
 };
 
@@ -222,6 +471,8 @@ struct drm_display_mode *drm_mode_duplicate(struct drm_device *dev,
 					    const struct drm_display_mode *mode);
 bool drm_mode_equal(const struct drm_display_mode *mode1,
 		    const struct drm_display_mode *mode2);
+bool drm_mode_equal_no_clocks(const struct drm_display_mode *mode1,
+			      const struct drm_display_mode *mode2);
 bool drm_mode_equal_no_clocks_no_stereo(const struct drm_display_mode *mode1,
 					const struct drm_display_mode *mode2);
 
@@ -232,7 +483,7 @@ enum drm_mode_status drm_mode_validate_size(const struct drm_display_mode *mode,
 void drm_mode_prune_invalid(struct drm_device *dev,
 			    struct list_head *mode_list, bool verbose);
 void drm_mode_sort(struct list_head *mode_list);
-void drm_mode_connector_list_update(struct drm_connector *connector, bool merge_type_bits);
+void drm_mode_connector_list_update(struct drm_connector *connector);
 
 /* parsing cmdline modes */
 bool
diff --git a/include/drm/drm_modeset_helper_vtables.h b/include/drm/drm_modeset_helper_vtables.h
new file mode 100644
index 000000000000..29e0dc50031d
--- /dev/null
+++ b/include/drm/drm_modeset_helper_vtables.h
@@ -0,0 +1,890 @@
+/*
+ * Copyright © 2006 Keith Packard
+ * Copyright © 2007-2008 Dave Airlie
+ * Copyright © 2007-2008 Intel Corporation
+ *   Jesse Barnes <jesse.barnes@intel.com>
+ * Copyright © 2011-2013 Intel Corporation
+ * Copyright © 2015 Intel Corporation
+ *   Daniel Vetter <daniel.vetter@ffwll.ch>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef __DRM_MODESET_HELPER_VTABLES_H__
+#define __DRM_MODESET_HELPER_VTABLES_H__
+
+#include <drm/drm_crtc.h>
+
+/**
+ * DOC: overview
+ *
+ * The DRM mode setting helper functions are common code for drivers to use if
+ * they wish.  Drivers are not forced to use this code in their
+ * implementations but it would be useful if the code they do use at least
+ * provides a consistent interface and operation to userspace. Therefore it is
+ * highly recommended to use the provided helpers as much as possible.
+ *
+ * Because there is only one pointer per modeset object to hold a vfunc table
+ * for helper libraries they are by necessity shared among the different
+ * helpers.
+ *
+ * To make this clear all the helper vtables are pulled together in this location here.
+ */
+
+enum mode_set_atomic;
+
+/**
+ * struct drm_crtc_helper_funcs - helper operations for CRTCs
+ *
+ * These hooks are used by the legacy CRTC helpers, the transitional plane
+ * helpers and the new atomic modesetting helpers.
+ */
+struct drm_crtc_helper_funcs {
+	/**
+	 * @dpms:
+	 *
+	 * Callback to control power levels on the CRTC.  If the mode passed in
+	 * is unsupported, the provider must use the next lowest power level.
+	 * This is used by the legacy CRTC helpers to implement DPMS
+	 * functionality in drm_helper_connector_dpms().
+	 *
+	 * This callback is also used to disable a CRTC by calling it with
+	 * DRM_MODE_DPMS_OFF if the @disable hook isn't used.
+	 *
+	 * This callback is used by the legacy CRTC helpers.  Atomic helpers
+	 * also support using this hook for enabling and disabling a CRTC to
+	 * facilitate transitions to atomic, but it is deprecated. Instead
+	 * @enable and @disable should be used.
+	 */
+	void (*dpms)(struct drm_crtc *crtc, int mode);
+
+	/**
+	 * @prepare:
+	 *
+	 * This callback should prepare the CRTC for a subsequent modeset, which
+	 * in practice means the driver should disable the CRTC if it is
+	 * running. Most drivers ended up implementing this by calling their
+	 * @dpms hook with DRM_MODE_DPMS_OFF.
+	 *
+	 * This callback is used by the legacy CRTC helpers.  Atomic helpers
+	 * also support using this hook for disabling a CRTC to facilitate
+	 * transitions to atomic, but it is deprecated. Instead @disable should
+	 * be used.
+	 */
+	void (*prepare)(struct drm_crtc *crtc);
+
+	/**
+	 * @commit:
+	 *
+	 * This callback should commit the new mode on the CRTC after a modeset,
+	 * which in practice means the driver should enable the CRTC.  Most
+	 * drivers ended up implementing this by calling their @dpms hook with
+	 * DRM_MODE_DPMS_ON.
+	 *
+	 * This callback is used by the legacy CRTC helpers.  Atomic helpers
+	 * also support using this hook for enabling a CRTC to facilitate
+	 * transitions to atomic, but it is deprecated. Instead @enable should
+	 * be used.
+	 */
+	void (*commit)(struct drm_crtc *crtc);
+
+	/**
+	 * @mode_fixup:
+	 *
+	 * This callback is used to validate a mode. The parameter mode is the
+	 * display mode that userspace requested, adjusted_mode is the mode the
+	 * encoders need to be fed with. Note that this is the inverse semantics
+	 * of the meaning for the &drm_encoder and &drm_bridge
+	 * ->mode_fixup() functions. If the CRTC cannot support the requested
+	 * conversion from mode to adjusted_mode it should reject the modeset.
+	 *
+	 * This function is used by both legacy CRTC helpers and atomic helpers.
+	 * With atomic helpers it is optional.
+	 *
+	 * NOTE:
+	 *
+	 * This function is called in the check phase of atomic modesets, which
+	 * can be aborted for any reason (including on userspace's request to
+	 * just check whether a configuration would be possible). Atomic drivers
+	 * MUST NOT touch any persistent state (hardware or software) or data
+	 * structures except the passed in adjusted_mode parameter.
+	 *
+	 * This is in contrast to the legacy CRTC helpers where this was
+	 * allowed.
+	 *
+	 * Atomic drivers which need to inspect and adjust more state should
+	 * instead use the @atomic_check callback.
+	 *
+	 * RETURNS:
+	 *
+	 * True if an acceptable configuration is possible, false if the modeset
+	 * operation should be rejected.
+	 */
+	bool (*mode_fixup)(struct drm_crtc *crtc,
+			   const struct drm_display_mode *mode,
+			   struct drm_display_mode *adjusted_mode);
+
+	/**
+	 * @mode_set:
+	 *
+	 * This callback is used by the legacy CRTC helpers to set a new mode,
+	 * position and framebuffer. Since it ties the primary plane to every
+	 * mode change it is incompatible with universal plane support. And
+	 * since it can't update other planes it's incompatible with atomic
+	 * modeset support.
+	 *
+	 * This callback is only used by CRTC helpers and deprecated.
+	 *
+	 * RETURNS:
+	 *
+	 * 0 on success or a negative error code on failure.
+	 */
+	int (*mode_set)(struct drm_crtc *crtc, struct drm_display_mode *mode,
+			struct drm_display_mode *adjusted_mode, int x, int y,
+			struct drm_framebuffer *old_fb);
+
+	/**
+	 * @mode_set_nofb:
+	 *
+	 * This callback is used to update the display mode of a CRTC without
+	 * changing anything of the primary plane configuration. This fits the
+	 * requirement of atomic and hence is used by the atomic helpers. It is
+	 * also used by the transitional plane helpers to implement a
+	 * @mode_set hook in drm_helper_crtc_mode_set().
+	 *
+	 * Note that the display pipe is completely off when this function is
+	 * called. Atomic drivers which need hardware to be running before they
+	 * program the new display mode (e.g. because they implement runtime PM)
+	 * should not use this hook. This is because the helper library calls
+	 * this hook only once per mode change and not every time the display
+	 * pipeline is suspended using either DPMS or the new "ACTIVE" property.
+	 * Which means register values set in this callback might get reset when
+	 * the CRTC is suspended, but not restored.  Such drivers should instead
+	 * move all their CRTC setup into the @enable callback.
+	 *
+	 * This callback is optional.
+	 */
+	void (*mode_set_nofb)(struct drm_crtc *crtc);
+
+	/**
+	 * @mode_set_base:
+	 *
+	 * This callback is used by the legacy CRTC helpers to set a new
+	 * framebuffer and scanout position. It is optional and used as an
+	 * optimized fast-path instead of a full mode set operation with all the
+	 * resulting flickering. Since it can't update other planes it's
+	 * incompatible with atomic modeset support.
+	 *
+	 * This callback is only used by the CRTC helpers and deprecated.
+	 *
+	 * RETURNS:
+	 *
+	 * 0 on success or a negative error code on failure.
+	 */
+	int (*mode_set_base)(struct drm_crtc *crtc, int x, int y,
+			     struct drm_framebuffer *old_fb);
+
+	/**
+	 * @mode_set_base_atomic:
+	 *
+	 * This callback is used by the fbdev helpers to set a new framebuffer
+	 * and scanout without sleeping, i.e. from an atomic calling context. It
+	 * is only used to implement kgdb support.
+	 *
+	 * This callback is optional and only needed for kgdb support in the fbdev
+	 * helpers.
+	 *
+	 * RETURNS:
+	 *
+	 * 0 on success or a negative error code on failure.
+	 */
+	int (*mode_set_base_atomic)(struct drm_crtc *crtc,
+				    struct drm_framebuffer *fb, int x, int y,
+				    enum mode_set_atomic);
+
+	/**
+	 * @load_lut:
+	 *
+	 * Load a LUT prepared with the @gamma_set functions from
+	 * &drm_fb_helper_funcs.
+	 *
+	 * This callback is optional and is only used by the fbdev emulation
+	 * helpers.
+	 *
+	 * FIXME:
+	 *
+	 * This callback is functionally redundant with the core gamma table
+	 * support and simply exists because the fbdev hasn't yet been
+	 * refactored to use the core gamma table interfaces.
+	 */
+	void (*load_lut)(struct drm_crtc *crtc);
+
+	/**
+	 * @disable:
+	 *
+	 * This callback should be used to disable the CRTC. With the atomic
+	 * drivers it is called after all encoders connected to this CRTC have
+	 * been shut off already using their own ->disable hook. If that
+	 * sequence is too simple drivers can just add their own hooks and call
+	 * it from this CRTC callback here by looping over all encoders
+	 * connected to it using for_each_encoder_on_crtc().
+	 *
+	 * This hook is used both by legacy CRTC helpers and atomic helpers.
+	 * Atomic drivers don't need to implement it if there's no need to
+	 * disable anything at the CRTC level. To ensure that runtime PM
+	 * handling (using either DPMS or the new "ACTIVE" property) works
+	 * @disable must be the inverse of @enable for atomic drivers.
+	 *
+	 * NOTE:
+	 *
+	 * With legacy CRTC helpers there's a big semantic difference between
+	 * @disable and other hooks (like @prepare or @dpms) used to shut down a
+	 * CRTC: @disable is only called when also logically disabling the
+	 * display pipeline and needs to release any resources acquired in
+	 * @mode_set (like shared PLLs, or again release pinned framebuffers).
+	 *
+	 * Therefore @disable must be the inverse of @mode_set plus @commit for
+	 * drivers still using legacy CRTC helpers, which is different from the
+	 * rules under atomic.
+	 */
+	void (*disable)(struct drm_crtc *crtc);
+
+	/**
+	 * @enable:
+	 *
+	 * This callback should be used to enable the CRTC. With the atomic
+	 * drivers it is called before all encoders connected to this CRTC are
+	 * enabled through the encoder's own ->enable hook.  If that sequence is
+	 * too simple drivers can just add their own hooks and call it from this
+	 * CRTC callback here by looping over all encoders connected to it using
+	 * for_each_encoder_on_crtc().
+	 *
+	 * This hook is used only by atomic helpers, for symmetry with @disable.
+	 * Atomic drivers don't need to implement it if there's no need to
+	 * enable anything at the CRTC level. To ensure that runtime PM handling
+	 * (using either DPMS or the new "ACTIVE" property) works
+	 * @enable must be the inverse of @disable for atomic drivers.
+	 */
+	void (*enable)(struct drm_crtc *crtc);
+
+	/**
+	 * @atomic_check:
+	 *
+	 * Drivers should check plane-update related CRTC constraints in this
+	 * hook. They can also check mode related limitations but need to be
+	 * aware of the calling order, since this hook is used by
+	 * drm_atomic_helper_check_planes() whereas the preparations needed to
+	 * check output routing and the display mode is done in
+	 * drm_atomic_helper_check_modeset(). Therefore drivers that want to
+	 * check output routing and display mode constraints in this callback
+	 * must ensure that drm_atomic_helper_check_modeset() has been called
+	 * beforehand. This is calling order used by the default helper
+	 * implementation in drm_atomic_helper_check().
+	 *
+	 * When using drm_atomic_helper_check_planes() CRTCs' ->atomic_check()
+	 * hooks are called after the ones for planes, which allows drivers to
+	 * assign shared resources requested by planes in the CRTC callback
+	 * here. For more complicated dependencies the driver can call the provided
+	 * check helpers multiple times until the computed state has a final
+	 * configuration and everything has been checked.
+	 *
+	 * This function is also allowed to inspect any other object's state and
+	 * can add more state objects to the atomic commit if needed. Care must
+	 * be taken though to ensure that state check&compute functions for
+	 * these added states are all called, and derived state in other objects
+	 * all updated. Again the recommendation is to just call check helpers
+	 * until a maximal configuration is reached.
+	 *
+	 * This callback is used by the atomic modeset helpers and by the
+	 * transitional plane helpers, but it is optional.
+	 *
+	 * NOTE:
+	 *
+	 * This function is called in the check phase of an atomic update. The
+	 * driver is not allowed to change anything outside of the free-standing
+	 * state objects passed-in or assembled in the overall &drm_atomic_state
+	 * update tracking structure.
+	 *
+	 * RETURNS:
+	 *
+	 * 0 on success, -EINVAL if the state or the transition can't be
+	 * supported, -ENOMEM on memory allocation failure and -EDEADLK if an
+	 * attempt to obtain another state object ran into a &drm_modeset_lock
+	 * deadlock.
+	 */
+	int (*atomic_check)(struct drm_crtc *crtc,
+			    struct drm_crtc_state *state);
+
+	/**
+	 * @atomic_begin:
+	 *
+	 * Drivers should prepare for an atomic update of multiple planes on
+	 * a CRTC in this hook. Depending upon hardware this might be vblank
+	 * evasion, blocking updates by setting bits or doing preparatory work
+	 * for e.g. manual update display.
+	 *
+	 * This hook is called before any plane commit functions are called.
+	 *
+	 * Note that the power state of the display pipe when this function is
+	 * called depends upon the exact helpers and calling sequence the driver
+	 * has picked. See drm_atomic_commit_planes() for a discussion of the
+	 * tradeoffs and variants of plane commit helpers.
+	 *
+	 * This callback is used by the atomic modeset helpers and by the
+	 * transitional plane helpers, but it is optional.
+	 */
+	void (*atomic_begin)(struct drm_crtc *crtc,
+			     struct drm_crtc_state *old_crtc_state);
+	/**
+	 * @atomic_flush:
+	 *
+	 * Drivers should finalize an atomic update of multiple planes on
+	 * a CRTC in this hook. Depending upon hardware this might include
+	 * checking that vblank evasion was successful, unblocking updates by
+	 * setting bits or setting the GO bit to flush out all updates.
+	 *
+	 * Simple hardware or hardware with special requirements can commit and
+	 * flush out all updates for all planes from this hook and forgo all the
+	 * other commit hooks for plane updates.
+	 *
+	 * This hook is called after any plane commit functions are called.
+	 *
+	 * Note that the power state of the display pipe when this function is
+	 * called depends upon the exact helpers and calling sequence the driver
+	 * has picked. See drm_atomic_commit_planes() for a discussion of the
+	 * tradeoffs and variants of plane commit helpers.
+	 *
+	 * This callback is used by the atomic modeset helpers and by the
+	 * transitional plane helpers, but it is optional.
+	 */
+	void (*atomic_flush)(struct drm_crtc *crtc,
+			     struct drm_crtc_state *old_crtc_state);
+};
+
+/**
+ * drm_crtc_helper_add - sets the helper vtable for a crtc
+ * @crtc: DRM CRTC
+ * @funcs: helper vtable to set for @crtc
+ */
+static inline void drm_crtc_helper_add(struct drm_crtc *crtc,
+				       const struct drm_crtc_helper_funcs *funcs)
+{
+	crtc->helper_private = funcs;
+}
+
+/**
+ * struct drm_encoder_helper_funcs - helper operations for encoders
+ *
+ * These hooks are used by the legacy CRTC helpers, the transitional plane
+ * helpers and the new atomic modesetting helpers.
+ */
+struct drm_encoder_helper_funcs {
+	/**
+	 * @dpms:
+	 *
+	 * Callback to control power levels on the encoder.  If the mode passed in
+	 * is unsupported, the provider must use the next lowest power level.
+	 * This is used by the legacy encoder helpers to implement DPMS
+	 * functionality in drm_helper_connector_dpms().
+	 *
+	 * This callback is also used to disable an encoder by calling it with
+	 * DRM_MODE_DPMS_OFF if the @disable hook isn't used.
+	 *
+	 * This callback is used by the legacy CRTC helpers.  Atomic helpers
+	 * also support using this hook for enabling and disabling an encoder to
+	 * facilitate transitions to atomic, but it is deprecated. Instead
+	 * @enable and @disable should be used.
+	 */
+	void (*dpms)(struct drm_encoder *encoder, int mode);
+
+	/**
+	 * @mode_fixup:
+	 *
+	 * This callback is used to validate and adjust a mode. The parameter
+	 * mode is the display mode that should be fed to the next element in
+	 * the display chain, either the final &drm_connector or a &drm_bridge.
+	 * The parameter adjusted_mode is the input mode the encoder requires. It
+	 * can be modified by this callback and does not need to match mode.
+	 *
+	 * This function is used by both legacy CRTC helpers and atomic helpers.
+	 * With atomic helpers it is optional.
+	 *
+	 * NOTE:
+	 *
+	 * This function is called in the check phase of atomic modesets, which
+	 * can be aborted for any reason (including on userspace's request to
+	 * just check whether a configuration would be possible). Atomic drivers
+	 * MUST NOT touch any persistent state (hardware or software) or data
+	 * structures except the passed in adjusted_mode parameter.
+	 *
+	 * This is in contrast to the legacy CRTC helpers where this was
+	 * allowed.
+	 *
+	 * Atomic drivers which need to inspect and adjust more state should
+	 * instead use the @atomic_check callback.
+	 *
+	 * RETURNS:
+	 *
+	 * True if an acceptable configuration is possible, false if the modeset
+	 * operation should be rejected.
+	 */
+	bool (*mode_fixup)(struct drm_encoder *encoder,
+			   const struct drm_display_mode *mode,
+			   struct drm_display_mode *adjusted_mode);
+
+	/**
+	 * @prepare:
+	 *
+	 * This callback should prepare the encoder for a subsequent modeset,
+	 * which in practice means the driver should disable the encoder if it
+	 * is running. Most drivers ended up implementing this by calling their
+	 * @dpms hook with DRM_MODE_DPMS_OFF.
+	 *
+	 * This callback is used by the legacy CRTC helpers.  Atomic helpers
+	 * also support using this hook for disabling an encoder to facilitate
+	 * transitions to atomic, but it is deprecated. Instead @disable should
+	 * be used.
+	 */
+	void (*prepare)(struct drm_encoder *encoder);
+
+	/**
+	 * @commit:
+	 *
+	 * This callback should commit the new mode on the encoder after a modeset,
+	 * which in practice means the driver should enable the encoder.  Most
+	 * drivers ended up implementing this by calling their @dpms hook with
+	 * DRM_MODE_DPMS_ON.
+	 *
+	 * This callback is used by the legacy CRTC helpers.  Atomic helpers
+	 * also support using this hook for enabling an encoder to facilitate
+	 * transitions to atomic, but it is deprecated. Instead @enable should
+	 * be used.
+	 */
+	void (*commit)(struct drm_encoder *encoder);
+
+	/**
+	 * @mode_set:
+	 *
+	 * This callback is used to update the display mode of an encoder.
+	 *
+	 * Note that the display pipe is completely off when this function is
+	 * called. Drivers which need hardware to be running before they program
+	 * the new display mode (because they implement runtime PM) should not
+	 * use this hook, because the helper library calls it only once and not
+	 * every time the display pipeline is suspend using either DPMS or the
+	 * new "ACTIVE" property. Such drivers should instead move all their
+	 * encoder setup into the ->enable() callback.
+	 *
+	 * This callback is used both by the legacy CRTC helpers and the atomic
+	 * modeset helpers. It is optional in the atomic helpers.
+	 */
+	void (*mode_set)(struct drm_encoder *encoder,
+			 struct drm_display_mode *mode,
+			 struct drm_display_mode *adjusted_mode);
+
+	/**
+	 * @get_crtc:
+	 *
+	 * This callback is used by the legacy CRTC helpers to work around
+	 * deficiencies in its own book-keeping.
+	 *
+	 * Do not use, use atomic helpers instead, which get the book keeping
+	 * right.
+	 *
+	 * FIXME:
+	 *
+	 * Currently only nouveau is using this, and as soon as nouveau is
+	 * atomic we can ditch this hook.
+	 */
+	struct drm_crtc *(*get_crtc)(struct drm_encoder *encoder);
+
+	/**
+	 * @detect:
+	 *
+	 * This callback can be used by drivers who want to do detection on the
+	 * encoder object instead of in connector functions.
+	 *
+	 * It is not used by any helper and therefore has purely driver-specific
+	 * semantics. New drivers shouldn't use this and instead just implement
+	 * their own private callbacks.
+	 *
+	 * FIXME:
+	 *
+	 * This should just be converted into a pile of driver vfuncs.
+	 * Currently radeon, amdgpu and nouveau are using it.
+	 */
+	enum drm_connector_status (*detect)(struct drm_encoder *encoder,
+					    struct drm_connector *connector);
+
+	/**
+	 * @disable:
+	 *
+	 * This callback should be used to disable the encoder. With the atomic
+	 * drivers it is called before this encoder's CRTC has been shut off
+	 * using the CRTC's own ->disable hook.  If that sequence is too simple
+	 * drivers can just add their own driver private encoder hooks and call
+	 * them from CRTC's callback by looping over all encoders connected to
+	 * it using for_each_encoder_on_crtc().
+	 *
+	 * This hook is used both by legacy CRTC helpers and atomic helpers.
+	 * Atomic drivers don't need to implement it if there's no need to
+	 * disable anything at the encoder level. To ensure that runtime PM
+	 * handling (using either DPMS or the new "ACTIVE" property) works
+	 * @disable must be the inverse of @enable for atomic drivers.
+	 *
+	 * NOTE:
+	 *
+	 * With legacy CRTC helpers there's a big semantic difference between
+	 * @disable and other hooks (like @prepare or @dpms) used to shut down a
+	 * encoder: @disable is only called when also logically disabling the
+	 * display pipeline and needs to release any resources acquired in
+	 * @mode_set (like shared PLLs, or again release pinned framebuffers).
+	 *
+	 * Therefore @disable must be the inverse of @mode_set plus @commit for
+	 * drivers still using legacy CRTC helpers, which is different from the
+	 * rules under atomic.
+	 */
+	void (*disable)(struct drm_encoder *encoder);
+
+	/**
+	 * @enable:
+	 *
+	 * This callback should be used to enable the encoder. With the atomic
+	 * drivers it is called after this encoder's CRTC has been enabled using
+	 * the CRTC's own ->enable hook.  If that sequence is too simple drivers
+	 * can just add their own driver private encoder hooks and call them
+	 * from CRTC's callback by looping over all encoders connected to it
+	 * using for_each_encoder_on_crtc().
+	 *
+	 * This hook is used only by atomic helpers, for symmetry with @disable.
+	 * Atomic drivers don't need to implement it if there's no need to
+	 * enable anything at the encoder level. To ensure that runtime PM handling
+	 * (using either DPMS or the new "ACTIVE" property) works
+	 * @enable must be the inverse of @disable for atomic drivers.
+	 */
+	void (*enable)(struct drm_encoder *encoder);
+
+	/**
+	 * @atomic_check:
+	 *
+	 * This callback is used to validate encoder state for atomic drivers.
+	 * Since the encoder is the object connecting the CRTC and connector it
+	 * gets passed both states, to be able to validate interactions and
+	 * update the CRTC to match what the encoder needs for the requested
+	 * connector.
+	 *
+	 * This function is used by the atomic helpers, but it is optional.
+	 *
+	 * NOTE:
+	 *
+	 * This function is called in the check phase of an atomic update. The
+	 * driver is not allowed to change anything outside of the free-standing
+	 * state objects passed-in or assembled in the overall &drm_atomic_state
+	 * update tracking structure.
+	 *
+	 * RETURNS:
+	 *
+	 * 0 on success, -EINVAL if the state or the transition can't be
+	 * supported, -ENOMEM on memory allocation failure and -EDEADLK if an
+	 * attempt to obtain another state object ran into a &drm_modeset_lock
+	 * deadlock.
+	 */
+	int (*atomic_check)(struct drm_encoder *encoder,
+			    struct drm_crtc_state *crtc_state,
+			    struct drm_connector_state *conn_state);
+};
+
+/**
+ * drm_encoder_helper_add - sets the helper vtable for an encoder
+ * @encoder: DRM encoder
+ * @funcs: helper vtable to set for @encoder
+ */
+static inline void drm_encoder_helper_add(struct drm_encoder *encoder,
+					  const struct drm_encoder_helper_funcs *funcs)
+{
+	encoder->helper_private = funcs;
+}
+
+/**
+ * struct drm_connector_helper_funcs - helper operations for connectors
+ *
+ * These functions are used by the atomic and legacy modeset helpers and by the
+ * probe helpers.
+ */
+struct drm_connector_helper_funcs {
+	/**
+	 * @get_modes:
+	 *
+	 * This function should fill in all modes currently valid for the sink
+	 * into the connector->probed_modes list. It should also update the
+	 * EDID property by calling drm_mode_connector_update_edid_property().
+	 *
+	 * The usual way to implement this is to cache the EDID retrieved in the
+	 * probe callback somewhere in the driver-private connector structure.
+	 * In this function drivers then parse the modes in the EDID and add
+	 * them by calling drm_add_edid_modes(). But connectors that driver a
+	 * fixed panel can also manually add specific modes using
+	 * drm_mode_probed_add(). Finally drivers that support audio probably
+	 * want to update the ELD data, too, using drm_edid_to_eld().
+	 *
+	 * This function is only called after the ->detect() hook has indicated
+	 * that a sink is connected and when the EDID isn't overridden through
+	 * sysfs or the kernel commandline.
+	 *
+	 * This callback is used by the probe helpers in e.g.
+	 * drm_helper_probe_single_connector_modes().
+	 *
+	 * RETURNS:
+	 *
+	 * The number of modes added by calling drm_mode_probed_add().
+	 */
+	int (*get_modes)(struct drm_connector *connector);
+
+	/**
+	 * @mode_valid:
+	 *
+	 * Callback to validate a mode for a connector, irrespective of the
+	 * specific display configuration.
+	 *
+	 * This callback is used by the probe helpers to filter the mode list
+	 * (which is usually derived from the EDID data block from the sink).
+	 * See e.g. drm_helper_probe_single_connector_modes().
+	 *
+	 * NOTE:
+	 *
+	 * This only filters the mode list supplied to userspace in the
+	 * GETCONNECOTR IOCTL. Userspace is free to create modes of its own and
+	 * ask the kernel to use them. It this case the atomic helpers or legacy
+	 * CRTC helpers will not call this function. Drivers therefore must
+	 * still fully validate any mode passed in in a modeset request.
+	 *
+	 * RETURNS:
+	 *
+	 * Either MODE_OK or one of the failure reasons in enum
+	 * &drm_mode_status.
+	 */
+	enum drm_mode_status (*mode_valid)(struct drm_connector *connector,
+					   struct drm_display_mode *mode);
+	/**
+	 * @best_encoder:
+	 *
+	 * This function should select the best encoder for the given connector.
+	 *
+	 * This function is used by both the atomic helpers (in the
+	 * drm_atomic_helper_check_modeset() function) and in the legacy CRTC
+	 * helpers.
+	 *
+	 * NOTE:
+	 *
+	 * In atomic drivers this function is called in the check phase of an
+	 * atomic update. The driver is not allowed to change or inspect
+	 * anything outside of arguments passed-in. Atomic drivers which need to
+	 * inspect dynamic configuration state should instead use
+	 * @atomic_best_encoder.
+	 *
+	 * RETURNS:
+	 *
+	 * Encoder that should be used for the given connector and connector
+	 * state, or NULL if no suitable encoder exists. Note that the helpers
+	 * will ensure that encoders aren't used twice, drivers should not check
+	 * for this.
+	 */
+	struct drm_encoder *(*best_encoder)(struct drm_connector *connector);
+
+	/**
+	 * @atomic_best_encoder:
+	 *
+	 * This is the atomic version of @best_encoder for atomic drivers which
+	 * need to select the best encoder depending upon the desired
+	 * configuration and can't select it statically.
+	 *
+	 * This function is used by drm_atomic_helper_check_modeset() and either
+	 * this or @best_encoder is required.
+	 *
+	 * NOTE:
+	 *
+	 * This function is called in the check phase of an atomic update. The
+	 * driver is not allowed to change anything outside of the free-standing
+	 * state objects passed-in or assembled in the overall &drm_atomic_state
+	 * update tracking structure.
+	 *
+	 * RETURNS:
+	 *
+	 * Encoder that should be used for the given connector and connector
+	 * state, or NULL if no suitable encoder exists. Note that the helpers
+	 * will ensure that encoders aren't used twice, drivers should not check
+	 * for this.
+	 */
+	struct drm_encoder *(*atomic_best_encoder)(struct drm_connector *connector,
+						   struct drm_connector_state *connector_state);
+};
+
+/**
+ * drm_connector_helper_add - sets the helper vtable for a connector
+ * @connector: DRM connector
+ * @funcs: helper vtable to set for @connector
+ */
+static inline void drm_connector_helper_add(struct drm_connector *connector,
+					    const struct drm_connector_helper_funcs *funcs)
+{
+	connector->helper_private = funcs;
+}
+
+/**
+ * struct drm_plane_helper_funcs - helper operations for planes
+ *
+ * These functions are used by the atomic helpers and by the transitional plane
+ * helpers.
+ */
+struct drm_plane_helper_funcs {
+	/**
+	 * @prepare_fb:
+	 *
+	 * This hook is to prepare a framebuffer for scanout by e.g. pinning
+	 * it's backing storage or relocating it into a contiguous block of
+	 * VRAM. Other possible preparatory work includes flushing caches.
+	 *
+	 * This function must not block for outstanding rendering, since it is
+	 * called in the context of the atomic IOCTL even for async commits to
+	 * be able to return any errors to userspace. Instead the recommended
+	 * way is to fill out the fence member of the passed-in
+	 * &drm_plane_state. If the driver doesn't support native fences then
+	 * equivalent functionality should be implemented through private
+	 * members in the plane structure.
+	 *
+	 * The helpers will call @cleanup_fb with matching arguments for every
+	 * successful call to this hook.
+	 *
+	 * This callback is used by the atomic modeset helpers and by the
+	 * transitional plane helpers, but it is optional.
+	 *
+	 * RETURNS:
+	 *
+	 * 0 on success or one of the following negative error codes allowed by
+	 * the atomic_commit hook in &drm_mode_config_funcs. When using helpers
+	 * this callback is the only one which can fail an atomic commit,
+	 * everything else must complete successfully.
+	 */
+	int (*prepare_fb)(struct drm_plane *plane,
+			  const struct drm_plane_state *new_state);
+	/**
+	 * @cleanup_fb:
+	 *
+	 * This hook is called to clean up any resources allocated for the given
+	 * framebuffer and plane configuration in @prepare_fb.
+	 *
+	 * This callback is used by the atomic modeset helpers and by the
+	 * transitional plane helpers, but it is optional.
+	 */
+	void (*cleanup_fb)(struct drm_plane *plane,
+			   const struct drm_plane_state *old_state);
+
+	/**
+	 * @atomic_check:
+	 *
+	 * Drivers should check plane specific constraints in this hook.
+	 *
+	 * When using drm_atomic_helper_check_planes() plane's ->atomic_check()
+	 * hooks are called before the ones for CRTCs, which allows drivers to
+	 * request shared resources that the CRTC controls here. For more
+	 * complicated dependencies the driver can call the provided check helpers
+	 * multiple times until the computed state has a final configuration and
+	 * everything has been checked.
+	 *
+	 * This function is also allowed to inspect any other object's state and
+	 * can add more state objects to the atomic commit if needed. Care must
+	 * be taken though to ensure that state check&compute functions for
+	 * these added states are all called, and derived state in other objects
+	 * all updated. Again the recommendation is to just call check helpers
+	 * until a maximal configuration is reached.
+	 *
+	 * This callback is used by the atomic modeset helpers and by the
+	 * transitional plane helpers, but it is optional.
+	 *
+	 * NOTE:
+	 *
+	 * This function is called in the check phase of an atomic update. The
+	 * driver is not allowed to change anything outside of the free-standing
+	 * state objects passed-in or assembled in the overall &drm_atomic_state
+	 * update tracking structure.
+	 *
+	 * RETURNS:
+	 *
+	 * 0 on success, -EINVAL if the state or the transition can't be
+	 * supported, -ENOMEM on memory allocation failure and -EDEADLK if an
+	 * attempt to obtain another state object ran into a &drm_modeset_lock
+	 * deadlock.
+	 */
+	int (*atomic_check)(struct drm_plane *plane,
+			    struct drm_plane_state *state);
+
+	/**
+	 * @atomic_update:
+	 *
+	 * Drivers should use this function to update the plane state.  This
+	 * hook is called in-between the ->atomic_begin() and
+	 * ->atomic_flush() of &drm_crtc_helper_funcs.
+	 *
+	 * Note that the power state of the display pipe when this function is
+	 * called depends upon the exact helpers and calling sequence the driver
+	 * has picked. See drm_atomic_commit_planes() for a discussion of the
+	 * tradeoffs and variants of plane commit helpers.
+	 *
+	 * This callback is used by the atomic modeset helpers and by the
+	 * transitional plane helpers, but it is optional.
+	 */
+	void (*atomic_update)(struct drm_plane *plane,
+			      struct drm_plane_state *old_state);
+	/**
+	 * @atomic_disable:
+	 *
+	 * Drivers should use this function to unconditionally disable a plane.
+	 * This hook is called in-between the ->atomic_begin() and
+	 * ->atomic_flush() of &drm_crtc_helper_funcs. It is an alternative to
+	 * @atomic_update, which will be called for disabling planes, too, if
+	 * the @atomic_disable hook isn't implemented.
+	 *
+	 * This hook is also useful to disable planes in preparation of a modeset,
+	 * by calling drm_atomic_helper_disable_planes_on_crtc() from the
+	 * ->disable() hook in &drm_crtc_helper_funcs.
+	 *
+	 * Note that the power state of the display pipe when this function is
+	 * called depends upon the exact helpers and calling sequence the driver
+	 * has picked. See drm_atomic_commit_planes() for a discussion of the
+	 * tradeoffs and variants of plane commit helpers.
+	 *
+	 * This callback is used by the atomic modeset helpers and by the
+	 * transitional plane helpers, but it is optional.
+	 */
+	void (*atomic_disable)(struct drm_plane *plane,
+			       struct drm_plane_state *old_state);
+};
+
+/**
+ * drm_plane_helper_add - sets the helper vtable for a plane
+ * @plane: DRM plane
+ * @funcs: helper vtable to set for @plane
+ */
+static inline void drm_plane_helper_add(struct drm_plane *plane,
+					const struct drm_plane_helper_funcs *funcs)
+{
+	plane->helper_private = funcs;
+}
+
+#endif
diff --git a/include/drm/drm_modeset_lock.h b/include/drm/drm_modeset_lock.h
index 94938d89347c..c5576fbcb909 100644
--- a/include/drm/drm_modeset_lock.h
+++ b/include/drm/drm_modeset_lock.h
@@ -138,7 +138,7 @@ void drm_warn_on_modeset_not_all_locked(struct drm_device *dev);
 struct drm_modeset_acquire_ctx *
 drm_modeset_legacy_acquire_ctx(struct drm_crtc *crtc);
 
-int drm_modeset_lock_all_crtcs(struct drm_device *dev,
-		struct drm_modeset_acquire_ctx *ctx);
+int drm_modeset_lock_all_ctx(struct drm_device *dev,
+			     struct drm_modeset_acquire_ctx *ctx);
 
 #endif /* DRM_MODESET_LOCK_H_ */
diff --git a/include/drm/drm_plane_helper.h b/include/drm/drm_plane_helper.h
index 5a7f9d4efb1d..4421f3f4ca8d 100644
--- a/include/drm/drm_plane_helper.h
+++ b/include/drm/drm_plane_helper.h
@@ -26,6 +26,7 @@
 
 #include <drm/drm_rect.h>
 #include <drm/drm_crtc.h>
+#include <drm/drm_modeset_helper_vtables.h>
 
 /*
  * Drivers that don't allow primary plane scaling may pass this macro in place
@@ -36,46 +37,9 @@
  */
 #define DRM_PLANE_HELPER_NO_SCALING (1<<16)
 
-/**
- * DOC: plane helpers
- *
- * Helper functions to assist with creation and handling of CRTC primary
- * planes.
- */
-
 int drm_crtc_init(struct drm_device *dev, struct drm_crtc *crtc,
 		  const struct drm_crtc_funcs *funcs);
 
-/**
- * drm_plane_helper_funcs - helper operations for CRTCs
- * @prepare_fb: prepare a framebuffer for use by the plane
- * @cleanup_fb: cleanup a framebuffer when it's no longer used by the plane
- * @atomic_check: check that a given atomic state is valid and can be applied
- * @atomic_update: apply an atomic state to the plane (mandatory)
- * @atomic_disable: disable the plane
- *
- * The helper operations are called by the mid-layer CRTC helper.
- */
-struct drm_plane_helper_funcs {
-	int (*prepare_fb)(struct drm_plane *plane,
-			  const struct drm_plane_state *new_state);
-	void (*cleanup_fb)(struct drm_plane *plane,
-			   const struct drm_plane_state *old_state);
-
-	int (*atomic_check)(struct drm_plane *plane,
-			    struct drm_plane_state *state);
-	void (*atomic_update)(struct drm_plane *plane,
-			      struct drm_plane_state *old_state);
-	void (*atomic_disable)(struct drm_plane *plane,
-			       struct drm_plane_state *old_state);
-};
-
-static inline void drm_plane_helper_add(struct drm_plane *plane,
-					const struct drm_plane_helper_funcs *funcs)
-{
-	plane->helper_private = funcs;
-}
-
 int drm_plane_helper_check_update(struct drm_plane *plane,
 				  struct drm_crtc *crtc,
 				  struct drm_framebuffer *fb,
diff --git a/include/drm/i915_pciids.h b/include/drm/i915_pciids.h
index f1a113e35f98..f97020904717 100644
--- a/include/drm/i915_pciids.h
+++ b/include/drm/i915_pciids.h
@@ -279,12 +279,19 @@
 #define INTEL_SKL_GT3_IDS(info) \
 	INTEL_VGA_DEVICE(0x1926, info), /* ULT GT3 */ \
 	INTEL_VGA_DEVICE(0x192B, info), /* Halo GT3 */ \
-	INTEL_VGA_DEVICE(0x192A, info) /* SRV GT3 */ \
+	INTEL_VGA_DEVICE(0x192A, info)  /* SRV GT3 */
 
-#define INTEL_SKL_IDS(info) \
+#define INTEL_SKL_GT4_IDS(info) \
+	INTEL_VGA_DEVICE(0x1932, info), /* DT GT4 */ \
+	INTEL_VGA_DEVICE(0x193B, info), /* Halo GT4 */ \
+	INTEL_VGA_DEVICE(0x193D, info), /* WKS GT4 */ \
+	INTEL_VGA_DEVICE(0x193A, info)  /* SRV GT4 */
+
+#define INTEL_SKL_IDS(info)	 \
 	INTEL_SKL_GT1_IDS(info), \
 	INTEL_SKL_GT2_IDS(info), \
-	INTEL_SKL_GT3_IDS(info)
+	INTEL_SKL_GT3_IDS(info), \
+	INTEL_SKL_GT4_IDS(info)
 
 #define INTEL_BXT_IDS(info) \
 	INTEL_VGA_DEVICE(0x0A84, info), \
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 054833939995..1991aea2ec4c 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -870,8 +870,8 @@ static inline int acpi_dev_get_property(struct acpi_device *adev,
 }
 
 static inline int acpi_node_get_property_reference(struct fwnode_handle *fwnode,
-				const char *name, const char *cells_name,
-				size_t index, struct acpi_reference_args *args)
+				const char *name, size_t index,
+				struct acpi_reference_args *args)
 {
 	return -ENXIO;
 }
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index c0d2b7927c1f..0169ba2e2e64 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -254,6 +254,7 @@ struct queue_limits {
 	unsigned long		virt_boundary_mask;
 
 	unsigned int		max_hw_sectors;
+	unsigned int		max_dev_sectors;
 	unsigned int		chunk_sectors;
 	unsigned int		max_sectors;
 	unsigned int		max_segment_size;
@@ -773,7 +774,6 @@ extern void blk_rq_set_block_pc(struct request *);
 extern void blk_requeue_request(struct request_queue *, struct request *);
 extern void blk_add_request_payload(struct request *rq, struct page *page,
 		unsigned int len);
-extern int blk_rq_check_limits(struct request_queue *q, struct request *rq);
 extern int blk_lld_busy(struct request_queue *q);
 extern int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
 			     struct bio_set *bs, gfp_t gfp_mask,
@@ -960,7 +960,6 @@ extern struct request_queue *blk_init_allocated_queue(struct request_queue *,
 extern void blk_cleanup_queue(struct request_queue *);
 extern void blk_queue_make_request(struct request_queue *, make_request_fn *);
 extern void blk_queue_bounce_limit(struct request_queue *, u64);
-extern void blk_limits_max_hw_sectors(struct queue_limits *, unsigned int);
 extern void blk_queue_max_hw_sectors(struct request_queue *, unsigned int);
 extern void blk_queue_chunk_sectors(struct request_queue *, unsigned int);
 extern void blk_queue_max_segments(struct request_queue *, unsigned short);
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index de464e6683b6..83d1926c61e4 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -40,6 +40,7 @@ struct bpf_map {
 	struct user_struct *user;
 	const struct bpf_map_ops *ops;
 	struct work_struct work;
+	atomic_t usercnt;
 };
 
 struct bpf_map_type_list {
@@ -167,8 +168,10 @@ struct bpf_prog *bpf_prog_get(u32 ufd);
 void bpf_prog_put(struct bpf_prog *prog);
 void bpf_prog_put_rcu(struct bpf_prog *prog);
 
-struct bpf_map *bpf_map_get(u32 ufd);
+struct bpf_map *bpf_map_get_with_uref(u32 ufd);
 struct bpf_map *__bpf_map_get(struct fd f);
+void bpf_map_inc(struct bpf_map *map, bool uref);
+void bpf_map_put_with_uref(struct bpf_map *map);
 void bpf_map_put(struct bpf_map *map);
 
 extern int sysctl_unprivileged_bpf_disabled;
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index ef4c5b1a860f..177c7680c1a8 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -77,6 +77,7 @@ struct cpufreq_policy {
 	unsigned int		suspend_freq; /* freq to set during suspend */
 
 	unsigned int		policy; /* see above */
+	unsigned int		last_policy; /* policy before unplug */
 	struct cpufreq_governor	*governor; /* see below */
 	void			*governor_data;
 	bool			governor_enabled; /* governor start/stop flag */
diff --git a/include/linux/dns_resolver.h b/include/linux/dns_resolver.h
index cc92268af89a..6ac3cad9aef1 100644
--- a/include/linux/dns_resolver.h
+++ b/include/linux/dns_resolver.h
@@ -27,7 +27,7 @@
 #ifdef __KERNEL__
 
 extern int dns_query(const char *type, const char *name, size_t namelen,
-		     const char *options, char **_result, time_t *_expiry);
+		     const char *options, char **_result, time64_t *_expiry);
 
 #endif /* KERNEL */
 
diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index 0ef2a97ccdb5..402753bccafa 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -227,7 +227,7 @@ struct ipv6_pinfo {
 	struct ipv6_ac_socklist	*ipv6_ac_list;
 	struct ipv6_fl_socklist __rcu *ipv6_fl_list;
 
-	struct ipv6_txoptions	*opt;
+	struct ipv6_txoptions __rcu	*opt;
 	struct sk_buff		*pktoptions;
 	struct sk_buff		*rxpmtu;
 	struct inet6_cork	cork;
diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h
index 3db5552b17d5..c6916aec43b6 100644
--- a/include/linux/lightnvm.h
+++ b/include/linux/lightnvm.h
@@ -179,7 +179,7 @@ typedef int (nvm_bb_update_fn)(struct ppa_addr, int, u8 *, void *);
 typedef int (nvm_id_fn)(struct request_queue *, struct nvm_id *);
 typedef int (nvm_get_l2p_tbl_fn)(struct request_queue *, u64, u32,
 				nvm_l2p_update_fn *, void *);
-typedef int (nvm_op_bb_tbl_fn)(struct request_queue *, struct ppa_addr, int,
+typedef int (nvm_op_bb_tbl_fn)(struct nvm_dev *, struct ppa_addr, int,
 				nvm_bb_update_fn *, void *);
 typedef int (nvm_op_set_bb_fn)(struct request_queue *, struct nvm_rq *, int);
 typedef int (nvm_submit_io_fn)(struct request_queue *, struct nvm_rq *);
diff --git a/include/linux/net.h b/include/linux/net.h
index 70ac5e28e6b7..0b4ac7da583a 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -34,8 +34,12 @@ struct inode;
 struct file;
 struct net;
 
-#define SOCK_ASYNC_NOSPACE	0
-#define SOCK_ASYNC_WAITDATA	1
+/* Historically, SOCKWQ_ASYNC_NOSPACE & SOCKWQ_ASYNC_WAITDATA were located
+ * in sock->flags, but moved into sk->sk_wq->flags to be RCU protected.
+ * Eventually all flags will be in sk->sk_wq_flags.
+ */
+#define SOCKWQ_ASYNC_NOSPACE	0
+#define SOCKWQ_ASYNC_WAITDATA	1
 #define SOCK_NOSPACE		2
 #define SOCK_PASSCRED		3
 #define SOCK_PASSSEC		4
@@ -89,6 +93,7 @@ struct socket_wq {
 	/* Note: wait MUST be first field of socket_wq */
 	wait_queue_head_t	wait;
 	struct fasync_struct	*fasync_list;
+	unsigned long		flags; /* %SOCKWQ_ASYNC_NOSPACE, etc */
 	struct rcu_head		rcu;
 } ____cacheline_aligned_in_smp;
 
@@ -96,7 +101,7 @@ struct socket_wq {
  *  struct socket - general BSD socket
  *  @state: socket state (%SS_CONNECTED, etc)
  *  @type: socket type (%SOCK_STREAM, etc)
- *  @flags: socket flags (%SOCK_ASYNC_NOSPACE, etc)
+ *  @flags: socket flags (%SOCK_NOSPACE, etc)
  *  @ops: protocol specific socket operations
  *  @file: File back pointer for gc
  *  @sk: internal networking protocol agnostic socket representation
@@ -202,7 +207,7 @@ enum {
 	SOCK_WAKE_URG,
 };
 
-int sock_wake_async(struct socket *sk, int how, int band);
+int sock_wake_async(struct socket_wq *sk_wq, int how, int band);
 int sock_register(const struct net_proto_family *fam);
 void sock_unregister(int family);
 int __sock_create(struct net *net, int family, int type, int proto,
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 67bfac1abfc1..3b5d134e945a 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1398,7 +1398,8 @@ enum netdev_priv_flags {
  *	@dma:		DMA channel
  *	@mtu:		Interface MTU value
  *	@type:		Interface hardware type
- *	@hard_header_len: Hardware header length
+ *	@hard_header_len: Hardware header length, which means that this is the
+ *			  minimum size of a packet.
  *
  *	@needed_headroom: Extra headroom the hardware may need, but not in all
  *			  cases can this be guaranteed
diff --git a/include/net/af_unix.h b/include/net/af_unix.h
index b36d837c701e..2a91a0561a47 100644
--- a/include/net/af_unix.h
+++ b/include/net/af_unix.h
@@ -62,6 +62,7 @@ struct unix_sock {
 #define UNIX_GC_CANDIDATE	0
 #define UNIX_GC_MAYBE_CYCLE	1
 	struct socket_wq	peer_wq;
+	wait_queue_t		peer_wake;
 };
 
 static inline struct unix_sock *unix_sk(const struct sock *sk)
diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 2bfb2ad2fab1..877f682989b8 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -133,27 +133,18 @@ void rt6_clean_tohost(struct net *net, struct in6_addr *gateway);
 /*
  *	Store a destination cache entry in a socket
  */
-static inline void __ip6_dst_store(struct sock *sk, struct dst_entry *dst,
-				   const struct in6_addr *daddr,
-				   const struct in6_addr *saddr)
+static inline void ip6_dst_store(struct sock *sk, struct dst_entry *dst,
+				 const struct in6_addr *daddr,
+				 const struct in6_addr *saddr)
 {
 	struct ipv6_pinfo *np = inet6_sk(sk);
-	struct rt6_info *rt = (struct rt6_info *) dst;
 
+	np->dst_cookie = rt6_get_cookie((struct rt6_info *)dst);
 	sk_setup_caps(sk, dst);
 	np->daddr_cache = daddr;
 #ifdef CONFIG_IPV6_SUBTREES
 	np->saddr_cache = saddr;
 #endif
-	np->dst_cookie = rt6_get_cookie(rt);
-}
-
-static inline void ip6_dst_store(struct sock *sk, struct dst_entry *dst,
-				 struct in6_addr *daddr, struct in6_addr *saddr)
-{
-	spin_lock(&sk->sk_dst_lock);
-	__ip6_dst_store(sk, dst, daddr, saddr);
-	spin_unlock(&sk->sk_dst_lock);
 }
 
 static inline bool ipv6_unicast_destination(const struct sk_buff *skb)
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index e1a10b0ac0b0..9a5c9f013784 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -205,6 +205,7 @@ extern rwlock_t ip6_ra_lock;
  */
 
 struct ipv6_txoptions {
+	atomic_t		refcnt;
 	/* Length of this structure */
 	int			tot_len;
 
@@ -217,7 +218,7 @@ struct ipv6_txoptions {
 	struct ipv6_opt_hdr	*dst0opt;
 	struct ipv6_rt_hdr	*srcrt;	/* Routing Header */
 	struct ipv6_opt_hdr	*dst1opt;
-
+	struct rcu_head		rcu;
 	/* Option buffer, as read by IPV6_PKTOPTIONS, starts here. */
 };
 
@@ -252,6 +253,24 @@ struct ipv6_fl_socklist {
 	struct rcu_head			rcu;
 };
 
+static inline struct ipv6_txoptions *txopt_get(const struct ipv6_pinfo *np)
+{
+	struct ipv6_txoptions *opt;
+
+	rcu_read_lock();
+	opt = rcu_dereference(np->opt);
+	if (opt && !atomic_inc_not_zero(&opt->refcnt))
+		opt = NULL;
+	rcu_read_unlock();
+	return opt;
+}
+
+static inline void txopt_put(struct ipv6_txoptions *opt)
+{
+	if (opt && atomic_dec_and_test(&opt->refcnt))
+		kfree_rcu(opt, rcu);
+}
+
 struct ip6_flowlabel *fl6_sock_lookup(struct sock *sk, __be32 label);
 struct ipv6_txoptions *fl6_merge_options(struct ipv6_txoptions *opt_space,
 					 struct ip6_flowlabel *fl,
@@ -490,6 +509,7 @@ struct ip6_create_arg {
 	u32 user;
 	const struct in6_addr *src;
 	const struct in6_addr *dst;
+	int iif;
 	u8 ecn;
 };
 
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 82045fca388b..760bc4d5a2cf 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -2003,8 +2003,10 @@ enum ieee80211_hw_flags {
  *	it shouldn't be set.
  *
  * @max_tx_aggregation_subframes: maximum number of subframes in an
- *	aggregate an HT driver will transmit, used by the peer as a
- *	hint to size its reorder buffer.
+ *	aggregate an HT driver will transmit. Though ADDBA will advertise
+ *	a constant value of 64 as some older APs can crash if the window
+ *	size is smaller (an example is LinkSys WRT120N with FW v1.0.07
+ *	build 002 Jun 18 2012).
  *
  * @offchannel_tx_hw_queue: HW queue ID to use for offchannel TX
  *	(if %IEEE80211_HW_QUEUE_CONTROL is set)
diff --git a/include/net/ndisc.h b/include/net/ndisc.h
index bf3937431030..2d8edaad29cb 100644
--- a/include/net/ndisc.h
+++ b/include/net/ndisc.h
@@ -181,8 +181,7 @@ void ndisc_cleanup(void);
 int ndisc_rcv(struct sk_buff *skb);
 
 void ndisc_send_ns(struct net_device *dev, const struct in6_addr *solicit,
-		   const struct in6_addr *daddr, const struct in6_addr *saddr,
-		   struct sk_buff *oskb);
+		   const struct in6_addr *daddr, const struct in6_addr *saddr);
 
 void ndisc_send_rs(struct net_device *dev,
 		   const struct in6_addr *saddr, const struct in6_addr *daddr);
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 4c79ce8c1f92..b2a8e6338576 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -61,6 +61,9 @@ struct Qdisc {
 				      */
 #define TCQ_F_WARN_NONWC	(1 << 16)
 #define TCQ_F_CPUSTATS		0x20 /* run using percpu statistics */
+#define TCQ_F_NOPARENT		0x40 /* root of its hierarchy :
+				      * qdisc_tree_decrease_qlen() should stop.
+				      */
 	u32			limit;
 	const struct Qdisc_ops	*ops;
 	struct qdisc_size_table	__rcu *stab;
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 495c87e367b3..7bbb71081aeb 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -775,10 +775,10 @@ struct sctp_transport {
 		hb_sent:1,
 
 		/* Is the Path MTU update pending on this tranport */
-		pmtu_pending:1;
+		pmtu_pending:1,
 
-	/* Has this transport moved the ctsn since we last sacked */
-	__u32 sack_generation;
+		/* Has this transport moved the ctsn since we last sacked */
+		sack_generation:1;
 	u32 dst_cookie;
 
 	struct flowi fl;
@@ -1482,19 +1482,19 @@ struct sctp_association {
 			prsctp_capable:1,   /* Can peer do PR-SCTP? */
 			auth_capable:1;     /* Is peer doing SCTP-AUTH? */
 
-		/* Ack State   : This flag indicates if the next received
+		/* sack_needed : This flag indicates if the next received
 		 *             : packet is to be responded to with a
-		 *             : SACK. This is initializedto 0.  When a packet
-		 *             : is received it is incremented. If this value
+		 *             : SACK. This is initialized to 0.  When a packet
+		 *             : is received sack_cnt is incremented. If this value
 		 *             : reaches 2 or more, a SACK is sent and the
 		 *             : value is reset to 0. Note: This is used only
 		 *             : when no DATA chunks are received out of
 		 *             : order.  When DATA chunks are out of order,
 		 *             : SACK's are not delayed (see Section 6).
 		 */
-		__u8    sack_needed;     /* Do we need to sack the peer? */
+		__u8    sack_needed:1,     /* Do we need to sack the peer? */
+			sack_generation:1;
 		__u32	sack_cnt;
-		__u32	sack_generation;
 
 		__u32   adaptation_ind;	 /* Adaptation Code point. */
 
diff --git a/include/net/sock.h b/include/net/sock.h
index 7f89e4ba18d1..52d27ee924f4 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -254,7 +254,6 @@ struct cg_proto;
   *	@sk_wq: sock wait queue and async head
   *	@sk_rx_dst: receive input route used by early demux
   *	@sk_dst_cache: destination cache
-  *	@sk_dst_lock: destination cache lock
   *	@sk_policy: flow policy
   *	@sk_receive_queue: incoming packets
   *	@sk_wmem_alloc: transmit queue bytes committed
@@ -384,14 +383,16 @@ struct sock {
 	int			sk_rcvbuf;
 
 	struct sk_filter __rcu	*sk_filter;
-	struct socket_wq __rcu	*sk_wq;
-
+	union {
+		struct socket_wq __rcu	*sk_wq;
+		struct socket_wq	*sk_wq_raw;
+	};
 #ifdef CONFIG_XFRM
 	struct xfrm_policy	*sk_policy[2];
 #endif
 	struct dst_entry	*sk_rx_dst;
 	struct dst_entry __rcu	*sk_dst_cache;
-	spinlock_t		sk_dst_lock;
+	/* Note: 32bit hole on 64bit arches */
 	atomic_t		sk_wmem_alloc;
 	atomic_t		sk_omem_alloc;
 	int			sk_sndbuf;
@@ -2005,10 +2006,27 @@ static inline unsigned long sock_wspace(struct sock *sk)
 	return amt;
 }
 
-static inline void sk_wake_async(struct sock *sk, int how, int band)
+/* Note:
+ *  We use sk->sk_wq_raw, from contexts knowing this
+ *  pointer is not NULL and cannot disappear/change.
+ */
+static inline void sk_set_bit(int nr, struct sock *sk)
 {
-	if (sock_flag(sk, SOCK_FASYNC))
-		sock_wake_async(sk->sk_socket, how, band);
+	set_bit(nr, &sk->sk_wq_raw->flags);
+}
+
+static inline void sk_clear_bit(int nr, struct sock *sk)
+{
+	clear_bit(nr, &sk->sk_wq_raw->flags);
+}
+
+static inline void sk_wake_async(const struct sock *sk, int how, int band)
+{
+	if (sock_flag(sk, SOCK_FASYNC)) {
+		rcu_read_lock();
+		sock_wake_async(rcu_dereference(sk->sk_wq), how, band);
+		rcu_read_unlock();
+	}
 }
 
 /* Since sk_{r,w}mem_alloc sums skb->truesize, even a small frame might
diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h
index ed527121031d..fcfa3d7f5e7e 100644
--- a/include/scsi/scsi_host.h
+++ b/include/scsi/scsi_host.h
@@ -668,6 +668,9 @@ struct Scsi_Host {
 	unsigned use_blk_mq:1;
 	unsigned use_cmd_list:1;
 
+	/* Host responded with short (<36 bytes) INQUIRY result */
+	unsigned short_inquiry:1;
+
 	/*
 	 * Optional work queue to be utilized by the transport
 	 */
diff --git a/include/sound/soc-dapm.h b/include/sound/soc-dapm.h
index 7855cfe46b69..95a937eafb79 100644
--- a/include/sound/soc-dapm.h
+++ b/include/sound/soc-dapm.h
@@ -398,6 +398,7 @@ int snd_soc_dapm_del_routes(struct snd_soc_dapm_context *dapm,
 int snd_soc_dapm_weak_routes(struct snd_soc_dapm_context *dapm,
 			     const struct snd_soc_dapm_route *route, int num);
 void snd_soc_dapm_free_widget(struct snd_soc_dapm_widget *w);
+void snd_soc_dapm_reset_cache(struct snd_soc_dapm_context *dapm);
 
 /* dapm events */
 void snd_soc_dapm_stream_event(struct snd_soc_pcm_runtime *rtd, int stream,
diff --git a/include/uapi/drm/Kbuild b/include/uapi/drm/Kbuild
index 38d437096c35..9355dd8eff3b 100644
--- a/include/uapi/drm/Kbuild
+++ b/include/uapi/drm/Kbuild
@@ -3,6 +3,7 @@ header-y += drm.h
 header-y += drm_fourcc.h
 header-y += drm_mode.h
 header-y += drm_sarea.h
+header-y += amdgpu_drm.h
 header-y += exynos_drm.h
 header-y += i810_drm.h
 header-y += i915_drm.h
@@ -17,4 +18,5 @@ header-y += tegra_drm.h
 header-y += via_drm.h
 header-y += vmwgfx_drm.h
 header-y += msm_drm.h
+header-y += vc4_drm.h
 header-y += virtgpu_drm.h
diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index e52933a73580..453a76af123c 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -76,19 +76,19 @@
 
 struct drm_amdgpu_gem_create_in  {
 	/** the requested memory size */
-	uint64_t bo_size;
+	__u64 bo_size;
 	/** physical start_addr alignment in bytes for some HW requirements */
-	uint64_t alignment;
+	__u64 alignment;
 	/** the requested memory domains */
-	uint64_t domains;
+	__u64 domains;
 	/** allocation flags */
-	uint64_t domain_flags;
+	__u64 domain_flags;
 };
 
 struct drm_amdgpu_gem_create_out  {
 	/** returned GEM object handle */
-	uint32_t handle;
-	uint32_t _pad;
+	__u32 handle;
+	__u32 _pad;
 };
 
 union drm_amdgpu_gem_create {
@@ -105,28 +105,28 @@ union drm_amdgpu_gem_create {
 
 struct drm_amdgpu_bo_list_in {
 	/** Type of operation */
-	uint32_t operation;
+	__u32 operation;
 	/** Handle of list or 0 if we want to create one */
-	uint32_t list_handle;
+	__u32 list_handle;
 	/** Number of BOs in list  */
-	uint32_t bo_number;
+	__u32 bo_number;
 	/** Size of each element describing BO */
-	uint32_t bo_info_size;
+	__u32 bo_info_size;
 	/** Pointer to array describing BOs */
-	uint64_t bo_info_ptr;
+	__u64 bo_info_ptr;
 };
 
 struct drm_amdgpu_bo_list_entry {
 	/** Handle of BO */
-	uint32_t bo_handle;
+	__u32 bo_handle;
 	/** New (if specified) BO priority to be used during migration */
-	uint32_t bo_priority;
+	__u32 bo_priority;
 };
 
 struct drm_amdgpu_bo_list_out {
 	/** Handle of resource list  */
-	uint32_t list_handle;
-	uint32_t _pad;
+	__u32 list_handle;
+	__u32 _pad;
 };
 
 union drm_amdgpu_bo_list {
@@ -150,26 +150,26 @@ union drm_amdgpu_bo_list {
 
 struct drm_amdgpu_ctx_in {
 	/** AMDGPU_CTX_OP_* */
-	uint32_t	op;
+	__u32	op;
 	/** For future use, no flags defined so far */
-	uint32_t	flags;
-	uint32_t	ctx_id;
-	uint32_t	_pad;
+	__u32	flags;
+	__u32	ctx_id;
+	__u32	_pad;
 };
 
 union drm_amdgpu_ctx_out {
 		struct {
-			uint32_t	ctx_id;
-			uint32_t	_pad;
+			__u32	ctx_id;
+			__u32	_pad;
 		} alloc;
 
 		struct {
 			/** For future use, no flags defined so far */
-			uint64_t	flags;
+			__u64	flags;
 			/** Number of resets caused by this context so far. */
-			uint32_t	hangs;
+			__u32	hangs;
 			/** Reset status since the last call of the ioctl. */
-			uint32_t	reset_status;
+			__u32	reset_status;
 		} state;
 };
 
@@ -189,12 +189,12 @@ union drm_amdgpu_ctx {
 #define AMDGPU_GEM_USERPTR_REGISTER	(1 << 3)
 
 struct drm_amdgpu_gem_userptr {
-	uint64_t		addr;
-	uint64_t		size;
+	__u64		addr;
+	__u64		size;
 	/* AMDGPU_GEM_USERPTR_* */
-	uint32_t		flags;
+	__u32		flags;
 	/* Resulting GEM handle */
-	uint32_t		handle;
+	__u32		handle;
 };
 
 /* same meaning as the GB_TILE_MODE and GL_MACRO_TILE_MODE fields */
@@ -226,28 +226,28 @@ struct drm_amdgpu_gem_userptr {
 /** The same structure is shared for input/output */
 struct drm_amdgpu_gem_metadata {
 	/** GEM Object handle */
-	uint32_t	handle;
+	__u32	handle;
 	/** Do we want get or set metadata */
-	uint32_t	op;
+	__u32	op;
 	struct {
 		/** For future use, no flags defined so far */
-		uint64_t	flags;
+		__u64	flags;
 		/** family specific tiling info */
-		uint64_t	tiling_info;
-		uint32_t	data_size_bytes;
-		uint32_t	data[64];
+		__u64	tiling_info;
+		__u32	data_size_bytes;
+		__u32	data[64];
 	} data;
 };
 
 struct drm_amdgpu_gem_mmap_in {
 	/** the GEM object handle */
-	uint32_t handle;
-	uint32_t _pad;
+	__u32 handle;
+	__u32 _pad;
 };
 
 struct drm_amdgpu_gem_mmap_out {
 	/** mmap offset from the vma offset manager */
-	uint64_t addr_ptr;
+	__u64 addr_ptr;
 };
 
 union drm_amdgpu_gem_mmap {
@@ -257,18 +257,18 @@ union drm_amdgpu_gem_mmap {
 
 struct drm_amdgpu_gem_wait_idle_in {
 	/** GEM object handle */
-	uint32_t handle;
+	__u32 handle;
 	/** For future use, no flags defined so far */
-	uint32_t flags;
+	__u32 flags;
 	/** Absolute timeout to wait */
-	uint64_t timeout;
+	__u64 timeout;
 };
 
 struct drm_amdgpu_gem_wait_idle_out {
 	/** BO status:  0 - BO is idle, 1 - BO is busy */
-	uint32_t status;
+	__u32 status;
 	/** Returned current memory domain */
-	uint32_t domain;
+	__u32 domain;
 };
 
 union drm_amdgpu_gem_wait_idle {
@@ -278,18 +278,18 @@ union drm_amdgpu_gem_wait_idle {
 
 struct drm_amdgpu_wait_cs_in {
 	/** Command submission handle */
-	uint64_t handle;
+	__u64 handle;
 	/** Absolute timeout to wait */
-	uint64_t timeout;
-	uint32_t ip_type;
-	uint32_t ip_instance;
-	uint32_t ring;
-	uint32_t ctx_id;
+	__u64 timeout;
+	__u32 ip_type;
+	__u32 ip_instance;
+	__u32 ring;
+	__u32 ctx_id;
 };
 
 struct drm_amdgpu_wait_cs_out {
 	/** CS status:  0 - CS completed, 1 - CS still busy */
-	uint64_t status;
+	__u64 status;
 };
 
 union drm_amdgpu_wait_cs {
@@ -303,11 +303,11 @@ union drm_amdgpu_wait_cs {
 /* Sets or returns a value associated with a buffer. */
 struct drm_amdgpu_gem_op {
 	/** GEM object handle */
-	uint32_t	handle;
+	__u32	handle;
 	/** AMDGPU_GEM_OP_* */
-	uint32_t	op;
+	__u32	op;
 	/** Input or return value */
-	uint64_t	value;
+	__u64	value;
 };
 
 #define AMDGPU_VA_OP_MAP			1
@@ -326,18 +326,18 @@ struct drm_amdgpu_gem_op {
 
 struct drm_amdgpu_gem_va {
 	/** GEM object handle */
-	uint32_t handle;
-	uint32_t _pad;
+	__u32 handle;
+	__u32 _pad;
 	/** AMDGPU_VA_OP_* */
-	uint32_t operation;
+	__u32 operation;
 	/** AMDGPU_VM_PAGE_* */
-	uint32_t flags;
+	__u32 flags;
 	/** va address to assign . Must be correctly aligned.*/
-	uint64_t va_address;
+	__u64 va_address;
 	/** Specify offset inside of BO to assign. Must be correctly aligned.*/
-	uint64_t offset_in_bo;
+	__u64 offset_in_bo;
 	/** Specify mapping size. Must be correctly aligned. */
-	uint64_t map_size;
+	__u64 map_size;
 };
 
 #define AMDGPU_HW_IP_GFX          0
@@ -354,24 +354,24 @@ struct drm_amdgpu_gem_va {
 #define AMDGPU_CHUNK_ID_DEPENDENCIES	0x03
 
 struct drm_amdgpu_cs_chunk {
-	uint32_t		chunk_id;
-	uint32_t		length_dw;
-	uint64_t		chunk_data;
+	__u32		chunk_id;
+	__u32		length_dw;
+	__u64		chunk_data;
 };
 
 struct drm_amdgpu_cs_in {
 	/** Rendering context id */
-	uint32_t		ctx_id;
+	__u32		ctx_id;
 	/**  Handle of resource list associated with CS */
-	uint32_t		bo_list_handle;
-	uint32_t		num_chunks;
-	uint32_t		_pad;
-	/** this points to uint64_t * which point to cs chunks */
-	uint64_t		chunks;
+	__u32		bo_list_handle;
+	__u32		num_chunks;
+	__u32		_pad;
+	/** this points to __u64 * which point to cs chunks */
+	__u64		chunks;
 };
 
 struct drm_amdgpu_cs_out {
-	uint64_t handle;
+	__u64 handle;
 };
 
 union drm_amdgpu_cs {
@@ -388,32 +388,32 @@ union drm_amdgpu_cs {
 #define AMDGPU_IB_FLAG_PREAMBLE (1<<1)
 
 struct drm_amdgpu_cs_chunk_ib {
-	uint32_t _pad;
+	__u32 _pad;
 	/** AMDGPU_IB_FLAG_* */
-	uint32_t flags;
+	__u32 flags;
 	/** Virtual address to begin IB execution */
-	uint64_t va_start;
+	__u64 va_start;
 	/** Size of submission */
-	uint32_t ib_bytes;
+	__u32 ib_bytes;
 	/** HW IP to submit to */
-	uint32_t ip_type;
+	__u32 ip_type;
 	/** HW IP index of the same type to submit to  */
-	uint32_t ip_instance;
+	__u32 ip_instance;
 	/** Ring index to submit to */
-	uint32_t ring;
+	__u32 ring;
 };
 
 struct drm_amdgpu_cs_chunk_dep {
-	uint32_t ip_type;
-	uint32_t ip_instance;
-	uint32_t ring;
-	uint32_t ctx_id;
-	uint64_t handle;
+	__u32 ip_type;
+	__u32 ip_instance;
+	__u32 ring;
+	__u32 ctx_id;
+	__u64 handle;
 };
 
 struct drm_amdgpu_cs_chunk_fence {
-	uint32_t handle;
-	uint32_t offset;
+	__u32 handle;
+	__u32 offset;
 };
 
 struct drm_amdgpu_cs_chunk_data {
@@ -486,83 +486,83 @@ struct drm_amdgpu_cs_chunk_data {
 /* Input structure for the INFO ioctl */
 struct drm_amdgpu_info {
 	/* Where the return value will be stored */
-	uint64_t return_pointer;
+	__u64 return_pointer;
 	/* The size of the return value. Just like "size" in "snprintf",
 	 * it limits how many bytes the kernel can write. */
-	uint32_t return_size;
+	__u32 return_size;
 	/* The query request id. */
-	uint32_t query;
+	__u32 query;
 
 	union {
 		struct {
-			uint32_t id;
-			uint32_t _pad;
+			__u32 id;
+			__u32 _pad;
 		} mode_crtc;
 
 		struct {
 			/** AMDGPU_HW_IP_* */
-			uint32_t type;
+			__u32 type;
 			/**
 			 * Index of the IP if there are more IPs of the same
 			 * type. Ignored by AMDGPU_INFO_HW_IP_COUNT.
 			 */
-			uint32_t ip_instance;
+			__u32 ip_instance;
 		} query_hw_ip;
 
 		struct {
-			uint32_t dword_offset;
+			__u32 dword_offset;
 			/** number of registers to read */
-			uint32_t count;
-			uint32_t instance;
+			__u32 count;
+			__u32 instance;
 			/** For future use, no flags defined so far */
-			uint32_t flags;
+			__u32 flags;
 		} read_mmr_reg;
 
 		struct {
 			/** AMDGPU_INFO_FW_* */
-			uint32_t fw_type;
+			__u32 fw_type;
 			/**
 			 * Index of the IP if there are more IPs of
 			 * the same type.
 			 */
-			uint32_t ip_instance;
+			__u32 ip_instance;
 			/**
 			 * Index of the engine. Whether this is used depends
 			 * on the firmware type. (e.g. MEC, SDMA)
 			 */
-			uint32_t index;
-			uint32_t _pad;
+			__u32 index;
+			__u32 _pad;
 		} query_fw;
 	};
 };
 
 struct drm_amdgpu_info_gds {
 	/** GDS GFX partition size */
-	uint32_t gds_gfx_partition_size;
+	__u32 gds_gfx_partition_size;
 	/** GDS compute partition size */
-	uint32_t compute_partition_size;
+	__u32 compute_partition_size;
 	/** total GDS memory size */
-	uint32_t gds_total_size;
+	__u32 gds_total_size;
 	/** GWS size per GFX partition */
-	uint32_t gws_per_gfx_partition;
+	__u32 gws_per_gfx_partition;
 	/** GSW size per compute partition */
-	uint32_t gws_per_compute_partition;
+	__u32 gws_per_compute_partition;
 	/** OA size per GFX partition */
-	uint32_t oa_per_gfx_partition;
+	__u32 oa_per_gfx_partition;
 	/** OA size per compute partition */
-	uint32_t oa_per_compute_partition;
-	uint32_t _pad;
+	__u32 oa_per_compute_partition;
+	__u32 _pad;
 };
 
 struct drm_amdgpu_info_vram_gtt {
-	uint64_t vram_size;
-	uint64_t vram_cpu_accessible_size;
-	uint64_t gtt_size;
+	__u64 vram_size;
+	__u64 vram_cpu_accessible_size;
+	__u64 gtt_size;
 };
 
 struct drm_amdgpu_info_firmware {
-	uint32_t ver;
-	uint32_t feature;
+	__u32 ver;
+	__u32 feature;
 };
 
 #define AMDGPU_VRAM_TYPE_UNKNOWN 0
@@ -576,61 +576,61 @@ struct drm_amdgpu_info_firmware {
 
 struct drm_amdgpu_info_device {
 	/** PCI Device ID */
-	uint32_t device_id;
+	__u32 device_id;
 	/** Internal chip revision: A0, A1, etc.) */
-	uint32_t chip_rev;
-	uint32_t external_rev;
+	__u32 chip_rev;
+	__u32 external_rev;
 	/** Revision id in PCI Config space */
-	uint32_t pci_rev;
-	uint32_t family;
-	uint32_t num_shader_engines;
-	uint32_t num_shader_arrays_per_engine;
+	__u32 pci_rev;
+	__u32 family;
+	__u32 num_shader_engines;
+	__u32 num_shader_arrays_per_engine;
 	/* in KHz */
-	uint32_t gpu_counter_freq;
-	uint64_t max_engine_clock;
-	uint64_t max_memory_clock;
+	__u32 gpu_counter_freq;
+	__u64 max_engine_clock;
+	__u64 max_memory_clock;
 	/* cu information */
-	uint32_t cu_active_number;
-	uint32_t cu_ao_mask;
-	uint32_t cu_bitmap[4][4];
+	__u32 cu_active_number;
+	__u32 cu_ao_mask;
+	__u32 cu_bitmap[4][4];
 	/** Render backend pipe mask. One render backend is CB+DB. */
-	uint32_t enabled_rb_pipes_mask;
-	uint32_t num_rb_pipes;
-	uint32_t num_hw_gfx_contexts;
-	uint32_t _pad;
-	uint64_t ids_flags;
+	__u32 enabled_rb_pipes_mask;
+	__u32 num_rb_pipes;
+	__u32 num_hw_gfx_contexts;
+	__u32 _pad;
+	__u64 ids_flags;
 	/** Starting virtual address for UMDs. */
-	uint64_t virtual_address_offset;
+	__u64 virtual_address_offset;
 	/** The maximum virtual address */
-	uint64_t virtual_address_max;
+	__u64 virtual_address_max;
 	/** Required alignment of virtual addresses. */
-	uint32_t virtual_address_alignment;
+	__u32 virtual_address_alignment;
 	/** Page table entry - fragment size */
-	uint32_t pte_fragment_size;
-	uint32_t gart_page_size;
+	__u32 pte_fragment_size;
+	__u32 gart_page_size;
 	/** constant engine ram size*/
-	uint32_t ce_ram_size;
+	__u32 ce_ram_size;
 	/** video memory type info*/
-	uint32_t vram_type;
+	__u32 vram_type;
 	/** video memory bit width*/
-	uint32_t vram_bit_width;
+	__u32 vram_bit_width;
 	/* vce harvesting instance */
-	uint32_t vce_harvest_config;
+	__u32 vce_harvest_config;
 };
 
 struct drm_amdgpu_info_hw_ip {
 	/** Version of h/w IP */
-	uint32_t  hw_ip_version_major;
-	uint32_t  hw_ip_version_minor;
+	__u32  hw_ip_version_major;
+	__u32  hw_ip_version_minor;
 	/** Capabilities */
-	uint64_t  capabilities_flags;
+	__u64  capabilities_flags;
 	/** command buffer address start alignment*/
-	uint32_t  ib_start_alignment;
+	__u32  ib_start_alignment;
 	/** command buffer size alignment*/
-	uint32_t  ib_size_alignment;
+	__u32  ib_size_alignment;
 	/** Bitmask of available rings. Bit 0 means ring 0, etc. */
-	uint32_t  available_rings;
-	uint32_t  _pad;
+	__u32  available_rings;
+	__u32  _pad;
 };
 
 /*
diff --git a/include/uapi/drm/armada_drm.h b/include/uapi/drm/armada_drm.h
index 8dec3fdc99c7..6de7f0196ca0 100644
--- a/include/uapi/drm/armada_drm.h
+++ b/include/uapi/drm/armada_drm.h
@@ -9,6 +9,8 @@
 #ifndef DRM_ARMADA_IOCTL_H
 #define DRM_ARMADA_IOCTL_H
 
+#include "drm.h"
+
 #define DRM_ARMADA_GEM_CREATE		0x00
 #define DRM_ARMADA_GEM_MMAP		0x02
 #define DRM_ARMADA_GEM_PWRITE		0x03
diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
index 3801584a0c53..b4e92eb12044 100644
--- a/include/uapi/drm/drm.h
+++ b/include/uapi/drm/drm.h
@@ -54,6 +54,7 @@ typedef int32_t  __s32;
 typedef uint32_t __u32;
 typedef int64_t  __s64;
 typedef uint64_t __u64;
+typedef size_t   __kernel_size_t;
 typedef unsigned long drm_handle_t;
 
 #endif
@@ -129,11 +130,11 @@ struct drm_version {
 	int version_major;	  /**< Major version */
 	int version_minor;	  /**< Minor version */
 	int version_patchlevel;	  /**< Patch level */
-	size_t name_len;	  /**< Length of name buffer */
+	__kernel_size_t name_len;	  /**< Length of name buffer */
 	char __user *name;	  /**< Name of driver */
-	size_t date_len;	  /**< Length of date buffer */
+	__kernel_size_t date_len;	  /**< Length of date buffer */
 	char __user *date;	  /**< User-space buffer to hold date */
-	size_t desc_len;	  /**< Length of desc buffer */
+	__kernel_size_t desc_len;	  /**< Length of desc buffer */
 	char __user *desc;	  /**< User-space buffer to hold desc */
 };
 
@@ -143,7 +144,7 @@ struct drm_version {
  * \sa drmGetBusid() and drmSetBusId().
  */
 struct drm_unique {
-	size_t unique_len;	  /**< Length of unique */
+	__kernel_size_t unique_len;	  /**< Length of unique */
 	char __user *unique;	  /**< Unique name for driver instantiation */
 };
 
diff --git a/include/uapi/drm/drm_fourcc.h b/include/uapi/drm/drm_fourcc.h
index 0b69a7753558..998bd253faad 100644
--- a/include/uapi/drm/drm_fourcc.h
+++ b/include/uapi/drm/drm_fourcc.h
@@ -24,7 +24,7 @@
 #ifndef DRM_FOURCC_H
 #define DRM_FOURCC_H
 
-#include <linux/types.h>
+#include "drm.h"
 
 #define fourcc_code(a, b, c, d) ((__u32)(a) | ((__u32)(b) << 8) | \
 				 ((__u32)(c) << 16) | ((__u32)(d) << 24))
diff --git a/include/uapi/drm/drm_mode.h b/include/uapi/drm/drm_mode.h
index 6c11ca401de8..50adb46204c2 100644
--- a/include/uapi/drm/drm_mode.h
+++ b/include/uapi/drm/drm_mode.h
@@ -27,7 +27,7 @@
 #ifndef _DRM_MODE_H
 #define _DRM_MODE_H
 
-#include <linux/types.h>
+#include "drm.h"
 
 #define DRM_DISPLAY_INFO_LEN	32
 #define DRM_CONNECTOR_NAME_LEN	32
@@ -526,14 +526,14 @@ struct drm_mode_crtc_page_flip {
 
 /* create a dumb scanout buffer */
 struct drm_mode_create_dumb {
-	uint32_t height;
-	uint32_t width;
-	uint32_t bpp;
-	uint32_t flags;
+	__u32 height;
+	__u32 width;
+	__u32 bpp;
+	__u32 flags;
 	/* handle, pitch, size will be returned */
-	uint32_t handle;
-	uint32_t pitch;
-	uint64_t size;
+	__u32 handle;
+	__u32 pitch;
+	__u64 size;
 };
 
 /* set up for mmap of a dumb scanout buffer */
@@ -550,7 +550,7 @@ struct drm_mode_map_dumb {
 };
 
 struct drm_mode_destroy_dumb {
-	uint32_t handle;
+	__u32 handle;
 };
 
 /* page-flip flags are valid, plus: */
diff --git a/include/uapi/drm/drm_sarea.h b/include/uapi/drm/drm_sarea.h
index 413a5642d49f..1d1a858a203d 100644
--- a/include/uapi/drm/drm_sarea.h
+++ b/include/uapi/drm/drm_sarea.h
@@ -32,7 +32,7 @@
 #ifndef _DRM_SAREA_H_
 #define _DRM_SAREA_H_
 
-#include <drm/drm.h>
+#include "drm.h"
 
 /* SAREA area needs to be at least a page */
 #if defined(__alpha__)
diff --git a/include/uapi/drm/etnaviv_drm.h b/include/uapi/drm/etnaviv_drm.h
new file mode 100644
index 000000000000..4cc989ad6851
--- /dev/null
+++ b/include/uapi/drm/etnaviv_drm.h
@@ -0,0 +1,222 @@
+/*
+ * Copyright (C) 2015 Etnaviv Project
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ETNAVIV_DRM_H__
+#define __ETNAVIV_DRM_H__
+
+#include "drm.h"
+
+/* Please note that modifications to all structs defined here are
+ * subject to backwards-compatibility constraints:
+ *  1) Do not use pointers, use __u64 instead for 32 bit / 64 bit
+ *     user/kernel compatibility
+ *  2) Keep fields aligned to their size
+ *  3) Because of how drm_ioctl() works, we can add new fields at
+ *     the end of an ioctl if some care is taken: drm_ioctl() will
+ *     zero out the new fields at the tail of the ioctl, so a zero
+ *     value should have a backwards compatible meaning.  And for
+ *     output params, userspace won't see the newly added output
+ *     fields.. so that has to be somehow ok.
+ */
+
+/* timeouts are specified in clock-monotonic absolute times (to simplify
+ * restarting interrupted ioctls).  The following struct is logically the
+ * same as 'struct timespec' but 32/64b ABI safe.
+ */
+struct drm_etnaviv_timespec {
+	__s64 tv_sec;          /* seconds */
+	__s64 tv_nsec;         /* nanoseconds */
+};
+
+#define ETNAVIV_PARAM_GPU_MODEL                     0x01
+#define ETNAVIV_PARAM_GPU_REVISION                  0x02
+#define ETNAVIV_PARAM_GPU_FEATURES_0                0x03
+#define ETNAVIV_PARAM_GPU_FEATURES_1                0x04
+#define ETNAVIV_PARAM_GPU_FEATURES_2                0x05
+#define ETNAVIV_PARAM_GPU_FEATURES_3                0x06
+#define ETNAVIV_PARAM_GPU_FEATURES_4                0x07
+
+#define ETNAVIV_PARAM_GPU_STREAM_COUNT              0x10
+#define ETNAVIV_PARAM_GPU_REGISTER_MAX              0x11
+#define ETNAVIV_PARAM_GPU_THREAD_COUNT              0x12
+#define ETNAVIV_PARAM_GPU_VERTEX_CACHE_SIZE         0x13
+#define ETNAVIV_PARAM_GPU_SHADER_CORE_COUNT         0x14
+#define ETNAVIV_PARAM_GPU_PIXEL_PIPES               0x15
+#define ETNAVIV_PARAM_GPU_VERTEX_OUTPUT_BUFFER_SIZE 0x16
+#define ETNAVIV_PARAM_GPU_BUFFER_SIZE               0x17
+#define ETNAVIV_PARAM_GPU_INSTRUCTION_COUNT         0x18
+#define ETNAVIV_PARAM_GPU_NUM_CONSTANTS             0x19
+
+#define ETNA_MAX_PIPES 4
+
+struct drm_etnaviv_param {
+	__u32 pipe;           /* in */
+	__u32 param;          /* in, ETNAVIV_PARAM_x */
+	__u64 value;          /* out (get_param) or in (set_param) */
+};
+
+/*
+ * GEM buffers:
+ */
+
+#define ETNA_BO_CACHE_MASK   0x000f0000
+/* cache modes */
+#define ETNA_BO_CACHED       0x00010000
+#define ETNA_BO_WC           0x00020000
+#define ETNA_BO_UNCACHED     0x00040000
+/* map flags */
+#define ETNA_BO_FORCE_MMU    0x00100000
+
+struct drm_etnaviv_gem_new {
+	__u64 size;           /* in */
+	__u32 flags;          /* in, mask of ETNA_BO_x */
+	__u32 handle;         /* out */
+};
+
+struct drm_etnaviv_gem_info {
+	__u32 handle;         /* in */
+	__u32 pad;
+	__u64 offset;         /* out, offset to pass to mmap() */
+};
+
+#define ETNA_PREP_READ        0x01
+#define ETNA_PREP_WRITE       0x02
+#define ETNA_PREP_NOSYNC      0x04
+
+struct drm_etnaviv_gem_cpu_prep {
+	__u32 handle;         /* in */
+	__u32 op;             /* in, mask of ETNA_PREP_x */
+	struct drm_etnaviv_timespec timeout;   /* in */
+};
+
+struct drm_etnaviv_gem_cpu_fini {
+	__u32 handle;         /* in */
+	__u32 flags;          /* in, placeholder for now, no defined values */
+};
+
+/*
+ * Cmdstream Submission:
+ */
+
+/* The value written into the cmdstream is logically:
+ * relocbuf->gpuaddr + reloc_offset
+ *
+ * NOTE that reloc's must be sorted by order of increasing submit_offset,
+ * otherwise EINVAL.
+ */
+struct drm_etnaviv_gem_submit_reloc {
+	__u32 submit_offset;  /* in, offset from submit_bo */
+	__u32 reloc_idx;      /* in, index of reloc_bo buffer */
+	__u64 reloc_offset;   /* in, offset from start of reloc_bo */
+	__u32 flags;          /* in, placeholder for now, no defined values */
+};
+
+/* Each buffer referenced elsewhere in the cmdstream submit (ie. the
+ * cmdstream buffer(s) themselves or reloc entries) has one (and only
+ * one) entry in the submit->bos[] table.
+ *
+ * As a optimization, the current buffer (gpu virtual address) can be
+ * passed back through the 'presumed' field.  If on a subsequent reloc,
+ * userspace passes back a 'presumed' address that is still valid,
+ * then patching the cmdstream for this entry is skipped.  This can
+ * avoid kernel needing to map/access the cmdstream bo in the common
+ * case.
+ */
+#define ETNA_SUBMIT_BO_READ             0x0001
+#define ETNA_SUBMIT_BO_WRITE            0x0002
+struct drm_etnaviv_gem_submit_bo {
+	__u32 flags;          /* in, mask of ETNA_SUBMIT_BO_x */
+	__u32 handle;         /* in, GEM handle */
+	__u64 presumed;       /* in/out, presumed buffer address */
+};
+
+/* Each cmdstream submit consists of a table of buffers involved, and
+ * one or more cmdstream buffers.  This allows for conditional execution
+ * (context-restore), and IB buffers needed for per tile/bin draw cmds.
+ */
+#define ETNA_PIPE_3D      0x00
+#define ETNA_PIPE_2D      0x01
+#define ETNA_PIPE_VG      0x02
+struct drm_etnaviv_gem_submit {
+	__u32 fence;          /* out */
+	__u32 pipe;           /* in */
+	__u32 exec_state;     /* in, initial execution state (ETNA_PIPE_x) */
+	__u32 nr_bos;         /* in, number of submit_bo's */
+	__u32 nr_relocs;      /* in, number of submit_reloc's */
+	__u32 stream_size;    /* in, cmdstream size */
+	__u64 bos;            /* in, ptr to array of submit_bo's */
+	__u64 relocs;         /* in, ptr to array of submit_reloc's */
+	__u64 stream;         /* in, ptr to cmdstream */
+};
+
+/* The normal way to synchronize with the GPU is just to CPU_PREP on
+ * a buffer if you need to access it from the CPU (other cmdstream
+ * submission from same or other contexts, PAGE_FLIP ioctl, etc, all
+ * handle the required synchronization under the hood).  This ioctl
+ * mainly just exists as a way to implement the gallium pipe_fence
+ * APIs without requiring a dummy bo to synchronize on.
+ */
+#define ETNA_WAIT_NONBLOCK      0x01
+struct drm_etnaviv_wait_fence {
+	__u32 pipe;           /* in */
+	__u32 fence;          /* in */
+	__u32 flags;          /* in, mask of ETNA_WAIT_x */
+	__u32 pad;
+	struct drm_etnaviv_timespec timeout;   /* in */
+};
+
+#define ETNA_USERPTR_READ	0x01
+#define ETNA_USERPTR_WRITE	0x02
+struct drm_etnaviv_gem_userptr {
+	__u64 user_ptr;	/* in, page aligned user pointer */
+	__u64 user_size;	/* in, page aligned user size */
+	__u32 flags;		/* in, flags */
+	__u32 handle;	/* out, non-zero handle */
+};
+
+struct drm_etnaviv_gem_wait {
+	__u32 pipe;				/* in */
+	__u32 handle;				/* in, bo to be waited for */
+	__u32 flags;				/* in, mask of ETNA_WAIT_x  */
+	__u32 pad;
+	struct drm_etnaviv_timespec timeout;	/* in */
+};
+
+#define DRM_ETNAVIV_GET_PARAM          0x00
+/* placeholder:
+#define DRM_ETNAVIV_SET_PARAM          0x01
+ */
+#define DRM_ETNAVIV_GEM_NEW            0x02
+#define DRM_ETNAVIV_GEM_INFO           0x03
+#define DRM_ETNAVIV_GEM_CPU_PREP       0x04
+#define DRM_ETNAVIV_GEM_CPU_FINI       0x05
+#define DRM_ETNAVIV_GEM_SUBMIT         0x06
+#define DRM_ETNAVIV_WAIT_FENCE         0x07
+#define DRM_ETNAVIV_GEM_USERPTR        0x08
+#define DRM_ETNAVIV_GEM_WAIT           0x09
+#define DRM_ETNAVIV_NUM_IOCTLS         0x0a
+
+#define DRM_IOCTL_ETNAVIV_GET_PARAM    DRM_IOWR(DRM_COMMAND_BASE + DRM_ETNAVIV_GET_PARAM, struct drm_etnaviv_param)
+#define DRM_IOCTL_ETNAVIV_GEM_NEW      DRM_IOWR(DRM_COMMAND_BASE + DRM_ETNAVIV_GEM_NEW, struct drm_etnaviv_gem_new)
+#define DRM_IOCTL_ETNAVIV_GEM_INFO     DRM_IOWR(DRM_COMMAND_BASE + DRM_ETNAVIV_GEM_INFO, struct drm_etnaviv_gem_info)
+#define DRM_IOCTL_ETNAVIV_GEM_CPU_PREP DRM_IOW(DRM_COMMAND_BASE + DRM_ETNAVIV_GEM_CPU_PREP, struct drm_etnaviv_gem_cpu_prep)
+#define DRM_IOCTL_ETNAVIV_GEM_CPU_FINI DRM_IOW(DRM_COMMAND_BASE + DRM_ETNAVIV_GEM_CPU_FINI, struct drm_etnaviv_gem_cpu_fini)
+#define DRM_IOCTL_ETNAVIV_GEM_SUBMIT   DRM_IOWR(DRM_COMMAND_BASE + DRM_ETNAVIV_GEM_SUBMIT, struct drm_etnaviv_gem_submit)
+#define DRM_IOCTL_ETNAVIV_WAIT_FENCE   DRM_IOW(DRM_COMMAND_BASE + DRM_ETNAVIV_WAIT_FENCE, struct drm_etnaviv_wait_fence)
+#define DRM_IOCTL_ETNAVIV_GEM_USERPTR  DRM_IOWR(DRM_COMMAND_BASE + DRM_ETNAVIV_GEM_USERPTR, struct drm_etnaviv_gem_userptr)
+#define DRM_IOCTL_ETNAVIV_GEM_WAIT     DRM_IOW(DRM_COMMAND_BASE + DRM_ETNAVIV_GEM_WAIT, struct drm_etnaviv_gem_wait)
+
+#endif /* __ETNAVIV_DRM_H__ */
diff --git a/include/uapi/drm/exynos_drm.h b/include/uapi/drm/exynos_drm.h
index 5575ed1598bd..312c67d744ae 100644
--- a/include/uapi/drm/exynos_drm.h
+++ b/include/uapi/drm/exynos_drm.h
@@ -15,7 +15,7 @@
 #ifndef _UAPI_EXYNOS_DRM_H_
 #define _UAPI_EXYNOS_DRM_H_
 
-#include <drm/drm.h>
+#include "drm.h"
 
 /**
  * User-desired buffer creation information structure.
@@ -27,7 +27,7 @@
  *	- this handle will be set by gem module of kernel side.
  */
 struct drm_exynos_gem_create {
-	uint64_t size;
+	__u64 size;
 	unsigned int flags;
 	unsigned int handle;
 };
@@ -44,7 +44,7 @@ struct drm_exynos_gem_create {
 struct drm_exynos_gem_info {
 	unsigned int handle;
 	unsigned int flags;
-	uint64_t size;
+	__u64 size;
 };
 
 /**
@@ -58,7 +58,7 @@ struct drm_exynos_gem_info {
 struct drm_exynos_vidi_connection {
 	unsigned int connection;
 	unsigned int extensions;
-	uint64_t edid;
+	__u64 edid;
 };
 
 /* memory type definitions. */
diff --git a/include/uapi/drm/i810_drm.h b/include/uapi/drm/i810_drm.h
index 34736efd5824..bdb028723ded 100644
--- a/include/uapi/drm/i810_drm.h
+++ b/include/uapi/drm/i810_drm.h
@@ -1,7 +1,7 @@
 #ifndef _I810_DRM_H_
 #define _I810_DRM_H_
 
-#include <drm/drm.h>
+#include "drm.h"
 
 /* WARNING: These defines must be the same as what the Xserver uses.
  * if you change them, you must change the defines in the Xserver.
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 67ef73a5d6eb..c937a3628190 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -27,7 +27,7 @@
 #ifndef _UAPI_I915_DRM_H_
 #define _UAPI_I915_DRM_H_
 
-#include <drm/drm.h>
+#include "drm.h"
 
 /* Please note that modifications to all structs defined here are
  * subject to backwards-compatibility constraints.
diff --git a/include/uapi/drm/mga_drm.h b/include/uapi/drm/mga_drm.h
index 2375bfd6e5e9..fca817009e13 100644
--- a/include/uapi/drm/mga_drm.h
+++ b/include/uapi/drm/mga_drm.h
@@ -35,7 +35,7 @@
 #ifndef __MGA_DRM_H__
 #define __MGA_DRM_H__
 
-#include <drm/drm.h>
+#include "drm.h"
 
 /* WARNING: If you change any of these defines, make sure to change the
  * defines in the Xserver file (mga_sarea.h)
diff --git a/include/uapi/drm/msm_drm.h b/include/uapi/drm/msm_drm.h
index 75a232b9a970..81e6e0d1d360 100644
--- a/include/uapi/drm/msm_drm.h
+++ b/include/uapi/drm/msm_drm.h
@@ -18,8 +18,7 @@
 #ifndef __MSM_DRM_H__
 #define __MSM_DRM_H__
 
-#include <stddef.h>
-#include <drm/drm.h>
+#include "drm.h"
 
 /* Please note that modifications to all structs defined here are
  * subject to backwards-compatibility constraints:
@@ -122,7 +121,7 @@ struct drm_msm_gem_cpu_fini {
 struct drm_msm_gem_submit_reloc {
 	__u32 submit_offset;  /* in, offset from submit_bo */
 	__u32 or;             /* in, value OR'd with result */
-	__s32  shift;          /* in, amount of left shift (can be negative) */
+	__s32 shift;          /* in, amount of left shift (can be negative) */
 	__u32 reloc_idx;      /* in, index of reloc_bo buffer */
 	__u64 reloc_offset;   /* in, offset from start of reloc_bo */
 };
diff --git a/include/uapi/drm/nouveau_drm.h b/include/uapi/drm/nouveau_drm.h
index fd594cc73cc0..500d82aecbe4 100644
--- a/include/uapi/drm/nouveau_drm.h
+++ b/include/uapi/drm/nouveau_drm.h
@@ -27,6 +27,8 @@
 
 #define DRM_NOUVEAU_EVENT_NVIF                                       0x80000000
 
+#include <drm/drm.h>
+
 #define NOUVEAU_GEM_DOMAIN_CPU       (1 << 0)
 #define NOUVEAU_GEM_DOMAIN_VRAM      (1 << 1)
 #define NOUVEAU_GEM_DOMAIN_GART      (1 << 2)
@@ -41,34 +43,34 @@
 #define NOUVEAU_GEM_TILE_NONCONTIG   0x00000008
 
 struct drm_nouveau_gem_info {
-	uint32_t handle;
-	uint32_t domain;
-	uint64_t size;
-	uint64_t offset;
-	uint64_t map_handle;
-	uint32_t tile_mode;
-	uint32_t tile_flags;
+	__u32 handle;
+	__u32 domain;
+	__u64 size;
+	__u64 offset;
+	__u64 map_handle;
+	__u32 tile_mode;
+	__u32 tile_flags;
 };
 
 struct drm_nouveau_gem_new {
 	struct drm_nouveau_gem_info info;
-	uint32_t channel_hint;
-	uint32_t align;
+	__u32 channel_hint;
+	__u32 align;
 };
 
 #define NOUVEAU_GEM_MAX_BUFFERS 1024
 struct drm_nouveau_gem_pushbuf_bo_presumed {
-	uint32_t valid;
-	uint32_t domain;
-	uint64_t offset;
+	__u32 valid;
+	__u32 domain;
+	__u64 offset;
 };
 
 struct drm_nouveau_gem_pushbuf_bo {
-	uint64_t user_priv;
-	uint32_t handle;
-	uint32_t read_domains;
-	uint32_t write_domains;
-	uint32_t valid_domains;
+	__u64 user_priv;
+	__u32 handle;
+	__u32 read_domains;
+	__u32 write_domains;
+	__u32 valid_domains;
 	struct drm_nouveau_gem_pushbuf_bo_presumed presumed;
 };
 
@@ -77,46 +79,46 @@ struct drm_nouveau_gem_pushbuf_bo {
 #define NOUVEAU_GEM_RELOC_OR   (1 << 2)
 #define NOUVEAU_GEM_MAX_RELOCS 1024
 struct drm_nouveau_gem_pushbuf_reloc {
-	uint32_t reloc_bo_index;
-	uint32_t reloc_bo_offset;
-	uint32_t bo_index;
-	uint32_t flags;
-	uint32_t data;
-	uint32_t vor;
-	uint32_t tor;
+	__u32 reloc_bo_index;
+	__u32 reloc_bo_offset;
+	__u32 bo_index;
+	__u32 flags;
+	__u32 data;
+	__u32 vor;
+	__u32 tor;
 };
 
 #define NOUVEAU_GEM_MAX_PUSH 512
 struct drm_nouveau_gem_pushbuf_push {
-	uint32_t bo_index;
-	uint32_t pad;
-	uint64_t offset;
-	uint64_t length;
+	__u32 bo_index;
+	__u32 pad;
+	__u64 offset;
+	__u64 length;
 };
 
 struct drm_nouveau_gem_pushbuf {
-	uint32_t channel;
-	uint32_t nr_buffers;
-	uint64_t buffers;
-	uint32_t nr_relocs;
-	uint32_t nr_push;
-	uint64_t relocs;
-	uint64_t push;
-	uint32_t suffix0;
-	uint32_t suffix1;
-	uint64_t vram_available;
-	uint64_t gart_available;
+	__u32 channel;
+	__u32 nr_buffers;
+	__u64 buffers;
+	__u32 nr_relocs;
+	__u32 nr_push;
+	__u64 relocs;
+	__u64 push;
+	__u32 suffix0;
+	__u32 suffix1;
+	__u64 vram_available;
+	__u64 gart_available;
 };
 
 #define NOUVEAU_GEM_CPU_PREP_NOWAIT                                  0x00000001
 #define NOUVEAU_GEM_CPU_PREP_WRITE                                   0x00000004
 struct drm_nouveau_gem_cpu_prep {
-	uint32_t handle;
-	uint32_t flags;
+	__u32 handle;
+	__u32 flags;
 };
 
 struct drm_nouveau_gem_cpu_fini {
-	uint32_t handle;
+	__u32 handle;
 };
 
 #define DRM_NOUVEAU_GETPARAM           0x00 /* deprecated */
diff --git a/include/uapi/drm/omap_drm.h b/include/uapi/drm/omap_drm.h
index 1d0b1172664e..0750c01bb480 100644
--- a/include/uapi/drm/omap_drm.h
+++ b/include/uapi/drm/omap_drm.h
@@ -20,7 +20,7 @@
 #ifndef __OMAP_DRM_H__
 #define __OMAP_DRM_H__
 
-#include <drm/drm.h>
+#include "drm.h"
 
 /* Please note that modifications to all structs defined here are
  * subject to backwards-compatibility constraints.
diff --git a/include/uapi/drm/qxl_drm.h b/include/uapi/drm/qxl_drm.h
index ebebd36c4117..4d1e32640463 100644
--- a/include/uapi/drm/qxl_drm.h
+++ b/include/uapi/drm/qxl_drm.h
@@ -24,13 +24,12 @@
 #ifndef QXL_DRM_H
 #define QXL_DRM_H
 
-#include <stddef.h>
-#include "drm/drm.h"
+#include "drm.h"
 
 /* Please note that modifications to all structs defined here are
  * subject to backwards-compatibility constraints.
  *
- * Do not use pointers, use uint64_t instead for 32 bit / 64 bit user/kernel
+ * Do not use pointers, use __u64 instead for 32 bit / 64 bit user/kernel
  * compatibility Keep fields aligned to their size
  */
 
@@ -48,14 +47,14 @@
 #define DRM_QXL_ALLOC_SURF  0x06
 
 struct drm_qxl_alloc {
-	uint32_t size;
-	uint32_t handle; /* 0 is an invalid handle */
+	__u32 size;
+	__u32 handle; /* 0 is an invalid handle */
 };
 
 struct drm_qxl_map {
-	uint64_t offset; /* use for mmap system call */
-	uint32_t handle;
-	uint32_t pad;
+	__u64 offset; /* use for mmap system call */
+	__u32 handle;
+	__u32 pad;
 };
 
 /*
@@ -68,59 +67,59 @@ struct drm_qxl_map {
 #define QXL_RELOC_TYPE_SURF 2
 
 struct drm_qxl_reloc {
-	uint64_t src_offset; /* offset into src_handle or src buffer */
-	uint64_t dst_offset; /* offset in dest handle */
-	uint32_t src_handle; /* dest handle to compute address from */
-	uint32_t dst_handle; /* 0 if to command buffer */
-	uint32_t reloc_type;
-	uint32_t pad;
+	__u64 src_offset; /* offset into src_handle or src buffer */
+	__u64 dst_offset; /* offset in dest handle */
+	__u32 src_handle; /* dest handle to compute address from */
+	__u32 dst_handle; /* 0 if to command buffer */
+	__u32 reloc_type;
+	__u32 pad;
 };
 
 struct drm_qxl_command {
-	uint64_t	 __user command; /* void* */
-	uint64_t	 __user relocs; /* struct drm_qxl_reloc* */
-	uint32_t		type;
-	uint32_t		command_size;
-	uint32_t		relocs_num;
-	uint32_t                pad;
+	__u64	 __user command; /* void* */
+	__u64	 __user relocs; /* struct drm_qxl_reloc* */
+	__u32		type;
+	__u32		command_size;
+	__u32		relocs_num;
+	__u32                pad;
 };
 
 /* XXX: call it drm_qxl_commands? */
 struct drm_qxl_execbuffer {
-	uint32_t		flags;		/* for future use */
-	uint32_t		commands_num;
-	uint64_t	 __user commands;	/* struct drm_qxl_command* */
+	__u32		flags;		/* for future use */
+	__u32		commands_num;
+	__u64	 __user commands;	/* struct drm_qxl_command* */
 };
 
 struct drm_qxl_update_area {
-	uint32_t handle;
-	uint32_t top;
-	uint32_t left;
-	uint32_t bottom;
-	uint32_t right;
-	uint32_t pad;
+	__u32 handle;
+	__u32 top;
+	__u32 left;
+	__u32 bottom;
+	__u32 right;
+	__u32 pad;
 };
 
 #define QXL_PARAM_NUM_SURFACES 1 /* rom->n_surfaces */
 #define QXL_PARAM_MAX_RELOCS 2
 struct drm_qxl_getparam {
-	uint64_t param;
-	uint64_t value;
+	__u64 param;
+	__u64 value;
 };
 
 /* these are one bit values */
 struct drm_qxl_clientcap {
-	uint32_t index;
-	uint32_t pad;
+	__u32 index;
+	__u32 pad;
 };
 
 struct drm_qxl_alloc_surf {
-	uint32_t format;
-	uint32_t width;
-	uint32_t height;
-	int32_t stride;
-	uint32_t handle;
-	uint32_t pad;
+	__u32 format;
+	__u32 width;
+	__u32 height;
+	__s32 stride;
+	__u32 handle;
+	__u32 pad;
 };
 
 #define DRM_IOCTL_QXL_ALLOC \
diff --git a/include/uapi/drm/r128_drm.h b/include/uapi/drm/r128_drm.h
index 76b0aa3e8210..7a44c6500a7e 100644
--- a/include/uapi/drm/r128_drm.h
+++ b/include/uapi/drm/r128_drm.h
@@ -33,7 +33,7 @@
 #ifndef __R128_DRM_H__
 #define __R128_DRM_H__
 
-#include <drm/drm.h>
+#include "drm.h"
 
 /* WARNING: If you change any of these defines, make sure to change the
  * defines in the X server file (r128_sarea.h)
diff --git a/include/uapi/drm/radeon_drm.h b/include/uapi/drm/radeon_drm.h
index 01aa2a8e3f8d..ccb9bcd82685 100644
--- a/include/uapi/drm/radeon_drm.h
+++ b/include/uapi/drm/radeon_drm.h
@@ -793,9 +793,9 @@ typedef struct drm_radeon_surface_free {
 #define RADEON_GEM_DOMAIN_VRAM		0x4
 
 struct drm_radeon_gem_info {
-	uint64_t	gart_size;
-	uint64_t	vram_size;
-	uint64_t	vram_visible;
+	__u64	gart_size;
+	__u64	vram_size;
+	__u64	vram_visible;
 };
 
 #define RADEON_GEM_NO_BACKING_STORE	(1 << 0)
@@ -807,11 +807,11 @@ struct drm_radeon_gem_info {
 #define RADEON_GEM_NO_CPU_ACCESS	(1 << 4)
 
 struct drm_radeon_gem_create {
-	uint64_t	size;
-	uint64_t	alignment;
-	uint32_t	handle;
-	uint32_t	initial_domain;
-	uint32_t	flags;
+	__u64	size;
+	__u64	alignment;
+	__u32	handle;
+	__u32	initial_domain;
+	__u32	flags;
 };
 
 /*
@@ -825,10 +825,10 @@ struct drm_radeon_gem_create {
 #define RADEON_GEM_USERPTR_REGISTER	(1 << 3)
 
 struct drm_radeon_gem_userptr {
-	uint64_t		addr;
-	uint64_t		size;
-	uint32_t		flags;
-	uint32_t		handle;
+	__u64		addr;
+	__u64		size;
+	__u32		flags;
+	__u32		handle;
 };
 
 #define RADEON_TILING_MACRO				0x1
@@ -850,72 +850,72 @@ struct drm_radeon_gem_userptr {
 #define RADEON_TILING_EG_STENCIL_TILE_SPLIT_MASK	0xf
 
 struct drm_radeon_gem_set_tiling {
-	uint32_t	handle;
-	uint32_t	tiling_flags;
-	uint32_t	pitch;
+	__u32	handle;
+	__u32	tiling_flags;
+	__u32	pitch;
 };
 
 struct drm_radeon_gem_get_tiling {
-	uint32_t	handle;
-	uint32_t	tiling_flags;
-	uint32_t	pitch;
+	__u32	handle;
+	__u32	tiling_flags;
+	__u32	pitch;
 };
 
 struct drm_radeon_gem_mmap {
-	uint32_t	handle;
-	uint32_t	pad;
-	uint64_t	offset;
-	uint64_t	size;
-	uint64_t	addr_ptr;
+	__u32	handle;
+	__u32	pad;
+	__u64	offset;
+	__u64	size;
+	__u64	addr_ptr;
 };
 
 struct drm_radeon_gem_set_domain {
-	uint32_t	handle;
-	uint32_t	read_domains;
-	uint32_t	write_domain;
+	__u32	handle;
+	__u32	read_domains;
+	__u32	write_domain;
 };
 
 struct drm_radeon_gem_wait_idle {
-	uint32_t	handle;
-	uint32_t	pad;
+	__u32	handle;
+	__u32	pad;
 };
 
 struct drm_radeon_gem_busy {
-	uint32_t	handle;
-	uint32_t        domain;
+	__u32	handle;
+	__u32        domain;
 };
 
 struct drm_radeon_gem_pread {
 	/** Handle for the object being read. */
-	uint32_t handle;
-	uint32_t pad;
+	__u32 handle;
+	__u32 pad;
 	/** Offset into the object to read from */
-	uint64_t offset;
+	__u64 offset;
 	/** Length of data to read */
-	uint64_t size;
+	__u64 size;
 	/** Pointer to write the data into. */
 	/* void *, but pointers are not 32/64 compatible */
-	uint64_t data_ptr;
+	__u64 data_ptr;
 };
 
 struct drm_radeon_gem_pwrite {
 	/** Handle for the object being written to. */
-	uint32_t handle;
-	uint32_t pad;
+	__u32 handle;
+	__u32 pad;
 	/** Offset into the object to write to */
-	uint64_t offset;
+	__u64 offset;
 	/** Length of data to write */
-	uint64_t size;
+	__u64 size;
 	/** Pointer to read the data from. */
 	/* void *, but pointers are not 32/64 compatible */
-	uint64_t data_ptr;
+	__u64 data_ptr;
 };
 
 /* Sets or returns a value associated with a buffer. */
 struct drm_radeon_gem_op {
-	uint32_t	handle; /* buffer */
-	uint32_t	op;     /* RADEON_GEM_OP_* */
-	uint64_t	value;  /* input or return value */
+	__u32	handle; /* buffer */
+	__u32	op;     /* RADEON_GEM_OP_* */
+	__u64	value;  /* input or return value */
 };
 
 #define RADEON_GEM_OP_GET_INITIAL_DOMAIN	0
@@ -935,11 +935,11 @@ struct drm_radeon_gem_op {
 #define RADEON_VM_PAGE_SNOOPED		(1 << 4)
 
 struct drm_radeon_gem_va {
-	uint32_t		handle;
-	uint32_t		operation;
-	uint32_t		vm_id;
-	uint32_t		flags;
-	uint64_t		offset;
+	__u32		handle;
+	__u32		operation;
+	__u32		vm_id;
+	__u32		flags;
+	__u64		offset;
 };
 
 #define RADEON_CHUNK_ID_RELOCS	0x01
@@ -961,29 +961,29 @@ struct drm_radeon_gem_va {
 /* 0 = normal, + = higher priority, - = lower priority */
 
 struct drm_radeon_cs_chunk {
-	uint32_t		chunk_id;
-	uint32_t		length_dw;
-	uint64_t		chunk_data;
+	__u32		chunk_id;
+	__u32		length_dw;
+	__u64		chunk_data;
 };
 
 /* drm_radeon_cs_reloc.flags */
 #define RADEON_RELOC_PRIO_MASK		(0xf << 0)
 
 struct drm_radeon_cs_reloc {
-	uint32_t		handle;
-	uint32_t		read_domains;
-	uint32_t		write_domain;
-	uint32_t		flags;
+	__u32		handle;
+	__u32		read_domains;
+	__u32		write_domain;
+	__u32		flags;
 };
 
 struct drm_radeon_cs {
-	uint32_t		num_chunks;
-	uint32_t		cs_id;
-	/* this points to uint64_t * which point to cs chunks */
-	uint64_t		chunks;
+	__u32		num_chunks;
+	__u32		cs_id;
+	/* this points to __u64 * which point to cs chunks */
+	__u64		chunks;
 	/* updates to the limits after this CS ioctl */
-	uint64_t		gart_limit;
-	uint64_t		vram_limit;
+	__u64		gart_limit;
+	__u64		vram_limit;
 };
 
 #define RADEON_INFO_DEVICE_ID		0x00
@@ -1042,9 +1042,9 @@ struct drm_radeon_cs {
 #define RADEON_INFO_GPU_RESET_COUNTER	0x26
 
 struct drm_radeon_info {
-	uint32_t		request;
-	uint32_t		pad;
-	uint64_t		value;
+	__u32		request;
+	__u32		pad;
+	__u64		value;
 };
 
 /* Those correspond to the tile index to use, this is to explicitly state
diff --git a/include/uapi/drm/savage_drm.h b/include/uapi/drm/savage_drm.h
index 9dc9dc1a7753..574147489c60 100644
--- a/include/uapi/drm/savage_drm.h
+++ b/include/uapi/drm/savage_drm.h
@@ -26,7 +26,7 @@
 #ifndef __SAVAGE_DRM_H__
 #define __SAVAGE_DRM_H__
 
-#include <drm/drm.h>
+#include "drm.h"
 
 #ifndef __SAVAGE_SAREA_DEFINES__
 #define __SAVAGE_SAREA_DEFINES__
diff --git a/include/uapi/drm/tegra_drm.h b/include/uapi/drm/tegra_drm.h
index 5391780c2b05..27d0b054aed0 100644
--- a/include/uapi/drm/tegra_drm.h
+++ b/include/uapi/drm/tegra_drm.h
@@ -23,7 +23,7 @@
 #ifndef _UAPI_TEGRA_DRM_H_
 #define _UAPI_TEGRA_DRM_H_
 
-#include <drm/drm.h>
+#include "drm.h"
 
 #define DRM_TEGRA_GEM_CREATE_TILED     (1 << 0)
 #define DRM_TEGRA_GEM_CREATE_BOTTOM_UP (1 << 1)
diff --git a/include/uapi/drm/vc4_drm.h b/include/uapi/drm/vc4_drm.h
new file mode 100644
index 000000000000..eeb37e394f13
--- /dev/null
+++ b/include/uapi/drm/vc4_drm.h
@@ -0,0 +1,279 @@
+/*
+ * Copyright © 2014-2015 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef _UAPI_VC4_DRM_H_
+#define _UAPI_VC4_DRM_H_
+
+#include "drm.h"
+
+#define DRM_VC4_SUBMIT_CL                         0x00
+#define DRM_VC4_WAIT_SEQNO                        0x01
+#define DRM_VC4_WAIT_BO                           0x02
+#define DRM_VC4_CREATE_BO                         0x03
+#define DRM_VC4_MMAP_BO                           0x04
+#define DRM_VC4_CREATE_SHADER_BO                  0x05
+#define DRM_VC4_GET_HANG_STATE                    0x06
+
+#define DRM_IOCTL_VC4_SUBMIT_CL           DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_SUBMIT_CL, struct drm_vc4_submit_cl)
+#define DRM_IOCTL_VC4_WAIT_SEQNO          DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_WAIT_SEQNO, struct drm_vc4_wait_seqno)
+#define DRM_IOCTL_VC4_WAIT_BO             DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_WAIT_BO, struct drm_vc4_wait_bo)
+#define DRM_IOCTL_VC4_CREATE_BO           DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_CREATE_BO, struct drm_vc4_create_bo)
+#define DRM_IOCTL_VC4_MMAP_BO             DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_MMAP_BO, struct drm_vc4_mmap_bo)
+#define DRM_IOCTL_VC4_CREATE_SHADER_BO    DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_CREATE_SHADER_BO, struct drm_vc4_create_shader_bo)
+#define DRM_IOCTL_VC4_GET_HANG_STATE      DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_GET_HANG_STATE, struct drm_vc4_get_hang_state)
+
+struct drm_vc4_submit_rcl_surface {
+	__u32 hindex; /* Handle index, or ~0 if not present. */
+	__u32 offset; /* Offset to start of buffer. */
+	/*
+	 * Bits for either render config (color_write) or load/store packet.
+	 * Bits should all be 0 for MSAA load/stores.
+	 */
+	__u16 bits;
+
+#define VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES		(1 << 0)
+	__u16 flags;
+};
+
+/**
+ * struct drm_vc4_submit_cl - ioctl argument for submitting commands to the 3D
+ * engine.
+ *
+ * Drivers typically use GPU BOs to store batchbuffers / command lists and
+ * their associated state.  However, because the VC4 lacks an MMU, we have to
+ * do validation of memory accesses by the GPU commands.  If we were to store
+ * our commands in BOs, we'd need to do uncached readback from them to do the
+ * validation process, which is too expensive.  Instead, userspace accumulates
+ * commands and associated state in plain memory, then the kernel copies the
+ * data to its own address space, and then validates and stores it in a GPU
+ * BO.
+ */
+struct drm_vc4_submit_cl {
+	/* Pointer to the binner command list.
+	 *
+	 * This is the first set of commands executed, which runs the
+	 * coordinate shader to determine where primitives land on the screen,
+	 * then writes out the state updates and draw calls necessary per tile
+	 * to the tile allocation BO.
+	 */
+	__u64 bin_cl;
+
+	/* Pointer to the shader records.
+	 *
+	 * Shader records are the structures read by the hardware that contain
+	 * pointers to uniforms, shaders, and vertex attributes.  The
+	 * reference to the shader record has enough information to determine
+	 * how many pointers are necessary (fixed number for shaders/uniforms,
+	 * and an attribute count), so those BO indices into bo_handles are
+	 * just stored as __u32s before each shader record passed in.
+	 */
+	__u64 shader_rec;
+
+	/* Pointer to uniform data and texture handles for the textures
+	 * referenced by the shader.
+	 *
+	 * For each shader state record, there is a set of uniform data in the
+	 * order referenced by the record (FS, VS, then CS).  Each set of
+	 * uniform data has a __u32 index into bo_handles per texture
+	 * sample operation, in the order the QPU_W_TMUn_S writes appear in
+	 * the program.  Following the texture BO handle indices is the actual
+	 * uniform data.
+	 *
+	 * The individual uniform state blocks don't have sizes passed in,
+	 * because the kernel has to determine the sizes anyway during shader
+	 * code validation.
+	 */
+	__u64 uniforms;
+	__u64 bo_handles;
+
+	/* Size in bytes of the binner command list. */
+	__u32 bin_cl_size;
+	/* Size in bytes of the set of shader records. */
+	__u32 shader_rec_size;
+	/* Number of shader records.
+	 *
+	 * This could just be computed from the contents of shader_records and
+	 * the address bits of references to them from the bin CL, but it
+	 * keeps the kernel from having to resize some allocations it makes.
+	 */
+	__u32 shader_rec_count;
+	/* Size in bytes of the uniform state. */
+	__u32 uniforms_size;
+
+	/* Number of BO handles passed in (size is that times 4). */
+	__u32 bo_handle_count;
+
+	/* RCL setup: */
+	__u16 width;
+	__u16 height;
+	__u8 min_x_tile;
+	__u8 min_y_tile;
+	__u8 max_x_tile;
+	__u8 max_y_tile;
+	struct drm_vc4_submit_rcl_surface color_read;
+	struct drm_vc4_submit_rcl_surface color_write;
+	struct drm_vc4_submit_rcl_surface zs_read;
+	struct drm_vc4_submit_rcl_surface zs_write;
+	struct drm_vc4_submit_rcl_surface msaa_color_write;
+	struct drm_vc4_submit_rcl_surface msaa_zs_write;
+	__u32 clear_color[2];
+	__u32 clear_z;
+	__u8 clear_s;
+
+	__u32 pad:24;
+
+#define VC4_SUBMIT_CL_USE_CLEAR_COLOR			(1 << 0)
+	__u32 flags;
+
+	/* Returned value of the seqno of this render job (for the
+	 * wait ioctl).
+	 */
+	__u64 seqno;
+};
+
+/**
+ * struct drm_vc4_wait_seqno - ioctl argument for waiting for
+ * DRM_VC4_SUBMIT_CL completion using its returned seqno.
+ *
+ * timeout_ns is the timeout in nanoseconds, where "0" means "don't
+ * block, just return the status."
+ */
+struct drm_vc4_wait_seqno {
+	__u64 seqno;
+	__u64 timeout_ns;
+};
+
+/**
+ * struct drm_vc4_wait_bo - ioctl argument for waiting for
+ * completion of the last DRM_VC4_SUBMIT_CL on a BO.
+ *
+ * This is useful for cases where multiple processes might be
+ * rendering to a BO and you want to wait for all rendering to be
+ * completed.
+ */
+struct drm_vc4_wait_bo {
+	__u32 handle;
+	__u32 pad;
+	__u64 timeout_ns;
+};
+
+/**
+ * struct drm_vc4_create_bo - ioctl argument for creating VC4 BOs.
+ *
+ * There are currently no values for the flags argument, but it may be
+ * used in a future extension.
+ */
+struct drm_vc4_create_bo {
+	__u32 size;
+	__u32 flags;
+	/** Returned GEM handle for the BO. */
+	__u32 handle;
+	__u32 pad;
+};
+
+/**
+ * struct drm_vc4_mmap_bo - ioctl argument for mapping VC4 BOs.
+ *
+ * This doesn't actually perform an mmap.  Instead, it returns the
+ * offset you need to use in an mmap on the DRM device node.  This
+ * means that tools like valgrind end up knowing about the mapped
+ * memory.
+ *
+ * There are currently no values for the flags argument, but it may be
+ * used in a future extension.
+ */
+struct drm_vc4_mmap_bo {
+	/** Handle for the object being mapped. */
+	__u32 handle;
+	__u32 flags;
+	/** offset into the drm node to use for subsequent mmap call. */
+	__u64 offset;
+};
+
+/**
+ * struct drm_vc4_create_shader_bo - ioctl argument for creating VC4
+ * shader BOs.
+ *
+ * Since allowing a shader to be overwritten while it's also being
+ * executed from would allow privlege escalation, shaders must be
+ * created using this ioctl, and they can't be mmapped later.
+ */
+struct drm_vc4_create_shader_bo {
+	/* Size of the data argument. */
+	__u32 size;
+	/* Flags, currently must be 0. */
+	__u32 flags;
+
+	/* Pointer to the data. */
+	__u64 data;
+
+	/** Returned GEM handle for the BO. */
+	__u32 handle;
+	/* Pad, must be 0. */
+	__u32 pad;
+};
+
+struct drm_vc4_get_hang_state_bo {
+	__u32 handle;
+	__u32 paddr;
+	__u32 size;
+	__u32 pad;
+};
+
+/**
+ * struct drm_vc4_hang_state - ioctl argument for collecting state
+ * from a GPU hang for analysis.
+*/
+struct drm_vc4_get_hang_state {
+	/** Pointer to array of struct drm_vc4_get_hang_state_bo. */
+	__u64 bo;
+	/**
+	 * On input, the size of the bo array.  Output is the number
+	 * of bos to be returned.
+	 */
+	__u32 bo_count;
+
+	__u32 start_bin, start_render;
+
+	__u32 ct0ca, ct0ea;
+	__u32 ct1ca, ct1ea;
+	__u32 ct0cs, ct1cs;
+	__u32 ct0ra0, ct1ra0;
+
+	__u32 bpca, bpcs;
+	__u32 bpoa, bpos;
+
+	__u32 vpmbase;
+
+	__u32 dbge;
+	__u32 fdbgo;
+	__u32 fdbgb;
+	__u32 fdbgr;
+	__u32 fdbgs;
+	__u32 errstat;
+
+	/* Pad that we may save more registers into in the future. */
+	__u32 pad[16];
+};
+
+#endif /* _UAPI_VC4_DRM_H_ */
diff --git a/include/uapi/drm/via_drm.h b/include/uapi/drm/via_drm.h
index 45bc80c3714b..fa21ed185520 100644
--- a/include/uapi/drm/via_drm.h
+++ b/include/uapi/drm/via_drm.h
@@ -24,7 +24,7 @@
 #ifndef _VIA_DRM_H_
 #define _VIA_DRM_H_
 
-#include <drm/drm.h>
+#include "drm.h"
 
 /* WARNING: These defines must be the same as what the Xserver uses.
  * if you change them, you must change the defines in the Xserver.
@@ -33,9 +33,6 @@
 #ifndef _VIA_DEFINES_
 #define _VIA_DEFINES_
 
-#ifndef __KERNEL__
-#include "via_drmclient.h"
-#endif
 
 #define VIA_NR_SAREA_CLIPRECTS		8
 #define VIA_NR_XVMC_PORTS               10
diff --git a/include/uapi/drm/virtgpu_drm.h b/include/uapi/drm/virtgpu_drm.h
index fc9e2d6e5e2f..c74f1f90cb37 100644
--- a/include/uapi/drm/virtgpu_drm.h
+++ b/include/uapi/drm/virtgpu_drm.h
@@ -24,13 +24,12 @@
 #ifndef VIRTGPU_DRM_H
 #define VIRTGPU_DRM_H
 
-#include <stddef.h>
-#include "drm/drm.h"
+#include "drm.h"
 
 /* Please note that modifications to all structs defined here are
  * subject to backwards-compatibility constraints.
  *
- * Do not use pointers, use uint64_t instead for 32 bit / 64 bit user/kernel
+ * Do not use pointers, use __u64 instead for 32 bit / 64 bit user/kernel
  * compatibility Keep fields aligned to their size
  */
 
@@ -45,88 +44,88 @@
 #define DRM_VIRTGPU_GET_CAPS  0x09
 
 struct drm_virtgpu_map {
-	uint64_t offset; /* use for mmap system call */
-	uint32_t handle;
-	uint32_t pad;
+	__u64 offset; /* use for mmap system call */
+	__u32 handle;
+	__u32 pad;
 };
 
 struct drm_virtgpu_execbuffer {
-	uint32_t		flags;		/* for future use */
-	uint32_t size;
-	uint64_t command; /* void* */
-	uint64_t bo_handles;
-	uint32_t num_bo_handles;
-	uint32_t pad;
+	__u32		flags;		/* for future use */
+	__u32 size;
+	__u64 command; /* void* */
+	__u64 bo_handles;
+	__u32 num_bo_handles;
+	__u32 pad;
 };
 
 #define VIRTGPU_PARAM_3D_FEATURES 1 /* do we have 3D features in the hw */
 
 struct drm_virtgpu_getparam {
-	uint64_t param;
-	uint64_t value;
+	__u64 param;
+	__u64 value;
 };
 
 /* NO_BO flags? NO resource flag? */
 /* resource flag for y_0_top */
 struct drm_virtgpu_resource_create {
-	uint32_t target;
-	uint32_t format;
-	uint32_t bind;
-	uint32_t width;
-	uint32_t height;
-	uint32_t depth;
-	uint32_t array_size;
-	uint32_t last_level;
-	uint32_t nr_samples;
-	uint32_t flags;
-	uint32_t bo_handle; /* if this is set - recreate a new resource attached to this bo ? */
-	uint32_t res_handle;  /* returned by kernel */
-	uint32_t size;        /* validate transfer in the host */
-	uint32_t stride;      /* validate transfer in the host */
+	__u32 target;
+	__u32 format;
+	__u32 bind;
+	__u32 width;
+	__u32 height;
+	__u32 depth;
+	__u32 array_size;
+	__u32 last_level;
+	__u32 nr_samples;
+	__u32 flags;
+	__u32 bo_handle; /* if this is set - recreate a new resource attached to this bo ? */
+	__u32 res_handle;  /* returned by kernel */
+	__u32 size;        /* validate transfer in the host */
+	__u32 stride;      /* validate transfer in the host */
 };
 
 struct drm_virtgpu_resource_info {
-	uint32_t bo_handle;
-	uint32_t res_handle;
-	uint32_t size;
-	uint32_t stride;
+	__u32 bo_handle;
+	__u32 res_handle;
+	__u32 size;
+	__u32 stride;
 };
 
 struct drm_virtgpu_3d_box {
-	uint32_t x;
-	uint32_t y;
-	uint32_t z;
-	uint32_t w;
-	uint32_t h;
-	uint32_t d;
+	__u32 x;
+	__u32 y;
+	__u32 z;
+	__u32 w;
+	__u32 h;
+	__u32 d;
 };
 
 struct drm_virtgpu_3d_transfer_to_host {
-	uint32_t bo_handle;
+	__u32 bo_handle;
 	struct drm_virtgpu_3d_box box;
-	uint32_t level;
-	uint32_t offset;
+	__u32 level;
+	__u32 offset;
 };
 
 struct drm_virtgpu_3d_transfer_from_host {
-	uint32_t bo_handle;
+	__u32 bo_handle;
 	struct drm_virtgpu_3d_box box;
-	uint32_t level;
-	uint32_t offset;
+	__u32 level;
+	__u32 offset;
 };
 
 #define VIRTGPU_WAIT_NOWAIT 1 /* like it */
 struct drm_virtgpu_3d_wait {
-	uint32_t handle; /* 0 is an invalid handle */
-	uint32_t flags;
+	__u32 handle; /* 0 is an invalid handle */
+	__u32 flags;
 };
 
 struct drm_virtgpu_get_caps {
-	uint32_t cap_set_id;
-	uint32_t cap_set_ver;
-	uint64_t addr;
-	uint32_t size;
-	uint32_t pad;
+	__u32 cap_set_id;
+	__u32 cap_set_ver;
+	__u64 addr;
+	__u32 size;
+	__u32 pad;
 };
 
 #define DRM_IOCTL_VIRTGPU_MAP \
diff --git a/include/uapi/drm/vmwgfx_drm.h b/include/uapi/drm/vmwgfx_drm.h
index 05b204954d16..5b68b4d10884 100644
--- a/include/uapi/drm/vmwgfx_drm.h
+++ b/include/uapi/drm/vmwgfx_drm.h
@@ -28,9 +28,7 @@
 #ifndef __VMWGFX_DRM_H__
 #define __VMWGFX_DRM_H__
 
-#ifndef __KERNEL__
-#include <drm/drm.h>
-#endif
+#include "drm.h"
 
 #define DRM_VMW_MAX_SURFACE_FACES 6
 #define DRM_VMW_MAX_MIP_LEVELS 24
@@ -111,9 +109,9 @@ enum drm_vmw_handle_type {
  */
 
 struct drm_vmw_getparam_arg {
-	uint64_t value;
-	uint32_t param;
-	uint32_t pad64;
+	__u64 value;
+	__u32 param;
+	__u32 pad64;
 };
 
 /*************************************************************************/
@@ -134,8 +132,8 @@ struct drm_vmw_getparam_arg {
  */
 
 struct drm_vmw_context_arg {
-	int32_t cid;
-	uint32_t pad64;
+	__s32 cid;
+	__u32 pad64;
 };
 
 /*************************************************************************/
@@ -165,7 +163,7 @@ struct drm_vmw_context_arg {
  * @mip_levels: Number of mip levels for each face.
  * An unused face should have 0 encoded.
  * @size_addr: Address of a user-space array of sruct drm_vmw_size
- * cast to an uint64_t for 32-64 bit compatibility.
+ * cast to an __u64 for 32-64 bit compatibility.
  * The size of the array should equal the total number of mipmap levels.
  * @shareable: Boolean whether other clients (as identified by file descriptors)
  * may reference this surface.
@@ -177,12 +175,12 @@ struct drm_vmw_context_arg {
  */
 
 struct drm_vmw_surface_create_req {
-	uint32_t flags;
-	uint32_t format;
-	uint32_t mip_levels[DRM_VMW_MAX_SURFACE_FACES];
-	uint64_t size_addr;
-	int32_t shareable;
-	int32_t scanout;
+	__u32 flags;
+	__u32 format;
+	__u32 mip_levels[DRM_VMW_MAX_SURFACE_FACES];
+	__u64 size_addr;
+	__s32 shareable;
+	__s32 scanout;
 };
 
 /**
@@ -197,7 +195,7 @@ struct drm_vmw_surface_create_req {
  */
 
 struct drm_vmw_surface_arg {
-	int32_t sid;
+	__s32 sid;
 	enum drm_vmw_handle_type handle_type;
 };
 
@@ -213,10 +211,10 @@ struct drm_vmw_surface_arg {
  */
 
 struct drm_vmw_size {
-	uint32_t width;
-	uint32_t height;
-	uint32_t depth;
-	uint32_t pad64;
+	__u32 width;
+	__u32 height;
+	__u32 depth;
+	__u32 pad64;
 };
 
 /**
@@ -284,13 +282,13 @@ union drm_vmw_surface_reference_arg {
 /**
  * struct drm_vmw_execbuf_arg
  *
- * @commands: User-space address of a command buffer cast to an uint64_t.
+ * @commands: User-space address of a command buffer cast to an __u64.
  * @command-size: Size in bytes of the command buffer.
  * @throttle-us: Sleep until software is less than @throttle_us
  * microseconds ahead of hardware. The driver may round this value
  * to the nearest kernel tick.
  * @fence_rep: User-space address of a struct drm_vmw_fence_rep cast to an
- * uint64_t.
+ * __u64.
  * @version: Allows expanding the execbuf ioctl parameters without breaking
  * backwards compatibility, since user-space will always tell the kernel
  * which version it uses.
@@ -302,14 +300,14 @@ union drm_vmw_surface_reference_arg {
 #define DRM_VMW_EXECBUF_VERSION 2
 
 struct drm_vmw_execbuf_arg {
-	uint64_t commands;
-	uint32_t command_size;
-	uint32_t throttle_us;
-	uint64_t fence_rep;
-	uint32_t version;
-	uint32_t flags;
-	uint32_t context_handle;
-	uint32_t pad64;
+	__u64 commands;
+	__u32 command_size;
+	__u32 throttle_us;
+	__u64 fence_rep;
+	__u32 version;
+	__u32 flags;
+	__u32 context_handle;
+	__u32 pad64;
 };
 
 /**
@@ -338,12 +336,12 @@ struct drm_vmw_execbuf_arg {
  */
 
 struct drm_vmw_fence_rep {
-	uint32_t handle;
-	uint32_t mask;
-	uint32_t seqno;
-	uint32_t passed_seqno;
-	uint32_t pad64;
-	int32_t error;
+	__u32 handle;
+	__u32 mask;
+	__u32 seqno;
+	__u32 passed_seqno;
+	__u32 pad64;
+	__s32 error;
 };
 
 /*************************************************************************/
@@ -373,8 +371,8 @@ struct drm_vmw_fence_rep {
  */
 
 struct drm_vmw_alloc_dmabuf_req {
-	uint32_t size;
-	uint32_t pad64;
+	__u32 size;
+	__u32 pad64;
 };
 
 /**
@@ -391,11 +389,11 @@ struct drm_vmw_alloc_dmabuf_req {
  */
 
 struct drm_vmw_dmabuf_rep {
-	uint64_t map_handle;
-	uint32_t handle;
-	uint32_t cur_gmr_id;
-	uint32_t cur_gmr_offset;
-	uint32_t pad64;
+	__u64 map_handle;
+	__u32 handle;
+	__u32 cur_gmr_id;
+	__u32 cur_gmr_offset;
+	__u32 pad64;
 };
 
 /**
@@ -428,8 +426,8 @@ union drm_vmw_alloc_dmabuf_arg {
  */
 
 struct drm_vmw_unref_dmabuf_arg {
-	uint32_t handle;
-	uint32_t pad64;
+	__u32 handle;
+	__u32 pad64;
 };
 
 /*************************************************************************/
@@ -452,10 +450,10 @@ struct drm_vmw_unref_dmabuf_arg {
  */
 
 struct drm_vmw_rect {
-	int32_t x;
-	int32_t y;
-	uint32_t w;
-	uint32_t h;
+	__s32 x;
+	__s32 y;
+	__u32 w;
+	__u32 h;
 };
 
 /**
@@ -477,21 +475,21 @@ struct drm_vmw_rect {
  */
 
 struct drm_vmw_control_stream_arg {
-	uint32_t stream_id;
-	uint32_t enabled;
+	__u32 stream_id;
+	__u32 enabled;
 
-	uint32_t flags;
-	uint32_t color_key;
+	__u32 flags;
+	__u32 color_key;
 
-	uint32_t handle;
-	uint32_t offset;
-	int32_t format;
-	uint32_t size;
-	uint32_t width;
-	uint32_t height;
-	uint32_t pitch[3];
+	__u32 handle;
+	__u32 offset;
+	__s32 format;
+	__u32 size;
+	__u32 width;
+	__u32 height;
+	__u32 pitch[3];
 
-	uint32_t pad64;
+	__u32 pad64;
 	struct drm_vmw_rect src;
 	struct drm_vmw_rect dst;
 };
@@ -519,12 +517,12 @@ struct drm_vmw_control_stream_arg {
  */
 
 struct drm_vmw_cursor_bypass_arg {
-	uint32_t flags;
-	uint32_t crtc_id;
-	int32_t xpos;
-	int32_t ypos;
-	int32_t xhot;
-	int32_t yhot;
+	__u32 flags;
+	__u32 crtc_id;
+	__s32 xpos;
+	__s32 ypos;
+	__s32 xhot;
+	__s32 yhot;
 };
 
 /*************************************************************************/
@@ -542,8 +540,8 @@ struct drm_vmw_cursor_bypass_arg {
  */
 
 struct drm_vmw_stream_arg {
-	uint32_t stream_id;
-	uint32_t pad64;
+	__u32 stream_id;
+	__u32 pad64;
 };
 
 /*************************************************************************/
@@ -565,7 +563,7 @@ struct drm_vmw_stream_arg {
 /**
  * struct drm_vmw_get_3d_cap_arg
  *
- * @buffer: Pointer to a buffer for capability data, cast to an uint64_t
+ * @buffer: Pointer to a buffer for capability data, cast to an __u64
  * @size: Max size to copy
  *
  * Input argument to the DRM_VMW_GET_3D_CAP_IOCTL
@@ -573,9 +571,9 @@ struct drm_vmw_stream_arg {
  */
 
 struct drm_vmw_get_3d_cap_arg {
-	uint64_t buffer;
-	uint32_t max_size;
-	uint32_t pad64;
+	__u64 buffer;
+	__u32 max_size;
+	__u32 pad64;
 };
 
 /*************************************************************************/
@@ -624,14 +622,14 @@ struct drm_vmw_get_3d_cap_arg {
  */
 
 struct drm_vmw_fence_wait_arg {
-	uint32_t handle;
-	int32_t  cookie_valid;
-	uint64_t kernel_cookie;
-	uint64_t timeout_us;
-	int32_t lazy;
-	int32_t flags;
-	int32_t wait_options;
-	int32_t pad64;
+	__u32 handle;
+	__s32  cookie_valid;
+	__u64 kernel_cookie;
+	__u64 timeout_us;
+	__s32 lazy;
+	__s32 flags;
+	__s32 wait_options;
+	__s32 pad64;
 };
 
 /*************************************************************************/
@@ -655,12 +653,12 @@ struct drm_vmw_fence_wait_arg {
  */
 
 struct drm_vmw_fence_signaled_arg {
-	 uint32_t handle;
-	 uint32_t flags;
-	 int32_t signaled;
-	 uint32_t passed_seqno;
-	 uint32_t signaled_flags;
-	 uint32_t pad64;
+	 __u32 handle;
+	 __u32 flags;
+	 __s32 signaled;
+	 __u32 passed_seqno;
+	 __u32 signaled_flags;
+	 __u32 pad64;
 };
 
 /*************************************************************************/
@@ -681,8 +679,8 @@ struct drm_vmw_fence_signaled_arg {
  */
 
 struct drm_vmw_fence_arg {
-	 uint32_t handle;
-	 uint32_t pad64;
+	 __u32 handle;
+	 __u32 pad64;
 };
 
 
@@ -703,9 +701,9 @@ struct drm_vmw_fence_arg {
 
 struct drm_vmw_event_fence {
 	struct drm_event base;
-	uint64_t user_data;
-	uint32_t tv_sec;
-	uint32_t tv_usec;
+	__u64 user_data;
+	__u32 tv_sec;
+	__u32 tv_usec;
 };
 
 /*
@@ -717,17 +715,17 @@ struct drm_vmw_event_fence {
 /**
  * struct drm_vmw_fence_event_arg
  *
- * @fence_rep: Pointer to fence_rep structure cast to uint64_t or 0 if
+ * @fence_rep: Pointer to fence_rep structure cast to __u64 or 0 if
  * the fence is not supposed to be referenced by user-space.
  * @user_info: Info to be delivered with the event.
  * @handle: Attach the event to this fence only.
  * @flags: A set of flags as defined above.
  */
 struct drm_vmw_fence_event_arg {
-	uint64_t fence_rep;
-	uint64_t user_data;
-	uint32_t handle;
-	uint32_t flags;
+	__u64 fence_rep;
+	__u64 user_data;
+	__u32 handle;
+	__u32 flags;
 };
 
 
@@ -747,7 +745,7 @@ struct drm_vmw_fence_event_arg {
  * @sid: Surface id to present from.
  * @dest_x: X placement coordinate for surface.
  * @dest_y: Y placement coordinate for surface.
- * @clips_ptr: Pointer to an array of clip rects cast to an uint64_t.
+ * @clips_ptr: Pointer to an array of clip rects cast to an __u64.
  * @num_clips: Number of cliprects given relative to the framebuffer origin,
  * in the same coordinate space as the frame buffer.
  * @pad64: Unused 64-bit padding.
@@ -756,13 +754,13 @@ struct drm_vmw_fence_event_arg {
  */
 
 struct drm_vmw_present_arg {
-	uint32_t fb_id;
-	uint32_t sid;
-	int32_t dest_x;
-	int32_t dest_y;
-	uint64_t clips_ptr;
-	uint32_t num_clips;
-	uint32_t pad64;
+	__u32 fb_id;
+	__u32 sid;
+	__s32 dest_x;
+	__s32 dest_y;
+	__u64 clips_ptr;
+	__u32 num_clips;
+	__u32 pad64;
 };
 
 
@@ -780,16 +778,16 @@ struct drm_vmw_present_arg {
  * struct drm_vmw_present_arg
  * @fb_id: fb_id to present / read back from.
  * @num_clips: Number of cliprects.
- * @clips_ptr: Pointer to an array of clip rects cast to an uint64_t.
- * @fence_rep: Pointer to a struct drm_vmw_fence_rep, cast to an uint64_t.
+ * @clips_ptr: Pointer to an array of clip rects cast to an __u64.
+ * @fence_rep: Pointer to a struct drm_vmw_fence_rep, cast to an __u64.
  * If this member is NULL, then the ioctl should not return a fence.
  */
 
 struct drm_vmw_present_readback_arg {
-	 uint32_t fb_id;
-	 uint32_t num_clips;
-	 uint64_t clips_ptr;
-	 uint64_t fence_rep;
+	 __u32 fb_id;
+	 __u32 num_clips;
+	 __u64 clips_ptr;
+	 __u64 fence_rep;
 };
 
 /*************************************************************************/
@@ -805,14 +803,14 @@ struct drm_vmw_present_readback_arg {
  * struct drm_vmw_update_layout_arg
  *
  * @num_outputs: number of active connectors
- * @rects: pointer to array of drm_vmw_rect cast to an uint64_t
+ * @rects: pointer to array of drm_vmw_rect cast to an __u64
  *
  * Input argument to the DRM_VMW_UPDATE_LAYOUT Ioctl.
  */
 struct drm_vmw_update_layout_arg {
-	uint32_t num_outputs;
-	uint32_t pad64;
-	uint64_t rects;
+	__u32 num_outputs;
+	__u32 pad64;
+	__u64 rects;
 };
 
 
@@ -849,10 +847,10 @@ enum drm_vmw_shader_type {
  */
 struct drm_vmw_shader_create_arg {
 	enum drm_vmw_shader_type shader_type;
-	uint32_t size;
-	uint32_t buffer_handle;
-	uint32_t shader_handle;
-	uint64_t offset;
+	__u32 size;
+	__u32 buffer_handle;
+	__u32 shader_handle;
+	__u64 offset;
 };
 
 /*************************************************************************/
@@ -871,8 +869,8 @@ struct drm_vmw_shader_create_arg {
  * Input argument to the DRM_VMW_UNREF_SHADER ioctl.
  */
 struct drm_vmw_shader_arg {
-	uint32_t handle;
-	uint32_t pad64;
+	__u32 handle;
+	__u32 pad64;
 };
 
 /*************************************************************************/
@@ -918,14 +916,14 @@ enum drm_vmw_surface_flags {
  * Part of output argument for the DRM_VMW_GB_SURFACE_REF Ioctl.
  */
 struct drm_vmw_gb_surface_create_req {
-	uint32_t svga3d_flags;
-	uint32_t format;
-	uint32_t mip_levels;
+	__u32 svga3d_flags;
+	__u32 format;
+	__u32 mip_levels;
 	enum drm_vmw_surface_flags drm_surface_flags;
-	uint32_t multisample_count;
-	uint32_t autogen_filter;
-	uint32_t buffer_handle;
-	uint32_t array_size;
+	__u32 multisample_count;
+	__u32 autogen_filter;
+	__u32 buffer_handle;
+	__u32 array_size;
 	struct drm_vmw_size base_size;
 };
 
@@ -944,11 +942,11 @@ struct drm_vmw_gb_surface_create_req {
  * Output argument for the DRM_VMW_GB_SURFACE_CREATE ioctl.
  */
 struct drm_vmw_gb_surface_create_rep {
-	uint32_t handle;
-	uint32_t backup_size;
-	uint32_t buffer_handle;
-	uint32_t buffer_size;
-	uint64_t buffer_map_handle;
+	__u32 handle;
+	__u32 backup_size;
+	__u32 buffer_handle;
+	__u32 buffer_size;
+	__u64 buffer_map_handle;
 };
 
 /**
@@ -1061,8 +1059,8 @@ enum drm_vmw_synccpu_op {
 struct drm_vmw_synccpu_arg {
 	enum drm_vmw_synccpu_op op;
 	enum drm_vmw_synccpu_flags flags;
-	uint32_t handle;
-	uint32_t pad64;
+	__u32 handle;
+	__u32 pad64;
 };
 
 /*************************************************************************/
diff --git a/include/uapi/linux/agpgart.h b/include/uapi/linux/agpgart.h
index 4e828cf487bc..f5251045181a 100644
--- a/include/uapi/linux/agpgart.h
+++ b/include/uapi/linux/agpgart.h
@@ -52,6 +52,7 @@
 
 #ifndef __KERNEL__
 #include <linux/types.h>
+#include <stdlib.h>
 
 struct agp_version {
 	__u16 major;
diff --git a/include/uapi/linux/virtio_gpu.h b/include/uapi/linux/virtio_gpu.h
index 7a63faa9065c..4b04ead26cd9 100644
--- a/include/uapi/linux/virtio_gpu.h
+++ b/include/uapi/linux/virtio_gpu.h
@@ -287,7 +287,7 @@ struct virtio_gpu_get_capset {
 /* VIRTIO_GPU_RESP_OK_CAPSET */
 struct virtio_gpu_resp_capset {
 	struct virtio_gpu_ctrl_hdr hdr;
-	uint8_t capset_data[];
+	__u8 capset_data[];
 };
 
 #define VIRTIO_GPU_EVENT_DISPLAY (1 << 0)
diff --git a/include/video/imx-ipu-v3.h b/include/video/imx-ipu-v3.h
index 85dedca3dcfb..eeba75395f7d 100644
--- a/include/video/imx-ipu-v3.h
+++ b/include/video/imx-ipu-v3.h
@@ -343,7 +343,6 @@ struct ipu_client_platformdata {
 	int di;
 	int dc;
 	int dp;
-	int dmfc;
 	int dma[2];
 };
 
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index 3f4c99e06c6b..b0799bced518 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -28,11 +28,17 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
 	    attr->value_size == 0)
 		return ERR_PTR(-EINVAL);
 
+	if (attr->value_size >= 1 << (KMALLOC_SHIFT_MAX - 1))
+		/* if value_size is bigger, the user space won't be able to
+		 * access the elements.
+		 */
+		return ERR_PTR(-E2BIG);
+
 	elem_size = round_up(attr->value_size, 8);
 
 	/* check round_up into zero and u32 overflow */
 	if (elem_size == 0 ||
-	    attr->max_entries > (U32_MAX - sizeof(*array)) / elem_size)
+	    attr->max_entries > (U32_MAX - PAGE_SIZE - sizeof(*array)) / elem_size)
 		return ERR_PTR(-ENOMEM);
 
 	array_size = sizeof(*array) + attr->max_entries * elem_size;
@@ -105,7 +111,7 @@ static int array_map_update_elem(struct bpf_map *map, void *key, void *value,
 		/* all elements already exist */
 		return -EEXIST;
 
-	memcpy(array->value + array->elem_size * index, value, array->elem_size);
+	memcpy(array->value + array->elem_size * index, value, map->value_size);
 	return 0;
 }
 
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index 19909b22b4f8..34777b3746fa 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -64,12 +64,35 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
 		 */
 		goto free_htab;
 
-	err = -ENOMEM;
+	if (htab->map.value_size >= (1 << (KMALLOC_SHIFT_MAX - 1)) -
+	    MAX_BPF_STACK - sizeof(struct htab_elem))
+		/* if value_size is bigger, the user space won't be able to
+		 * access the elements via bpf syscall. This check also makes
+		 * sure that the elem_size doesn't overflow and it's
+		 * kmalloc-able later in htab_map_update_elem()
+		 */
+		goto free_htab;
+
+	htab->elem_size = sizeof(struct htab_elem) +
+			  round_up(htab->map.key_size, 8) +
+			  htab->map.value_size;
+
 	/* prevent zero size kmalloc and check for u32 overflow */
 	if (htab->n_buckets == 0 ||
 	    htab->n_buckets > U32_MAX / sizeof(struct hlist_head))
 		goto free_htab;
 
+	if ((u64) htab->n_buckets * sizeof(struct hlist_head) +
+	    (u64) htab->elem_size * htab->map.max_entries >=
+	    U32_MAX - PAGE_SIZE)
+		/* make sure page count doesn't overflow */
+		goto free_htab;
+
+	htab->map.pages = round_up(htab->n_buckets * sizeof(struct hlist_head) +
+				   htab->elem_size * htab->map.max_entries,
+				   PAGE_SIZE) >> PAGE_SHIFT;
+
+	err = -ENOMEM;
 	htab->buckets = kmalloc_array(htab->n_buckets, sizeof(struct hlist_head),
 				      GFP_USER | __GFP_NOWARN);
 
@@ -85,13 +108,6 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
 	raw_spin_lock_init(&htab->lock);
 	htab->count = 0;
 
-	htab->elem_size = sizeof(struct htab_elem) +
-			  round_up(htab->map.key_size, 8) +
-			  htab->map.value_size;
-
-	htab->map.pages = round_up(htab->n_buckets * sizeof(struct hlist_head) +
-				   htab->elem_size * htab->map.max_entries,
-				   PAGE_SIZE) >> PAGE_SHIFT;
 	return &htab->map;
 
 free_htab:
@@ -222,7 +238,7 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
 	WARN_ON_ONCE(!rcu_read_lock_held());
 
 	/* allocate new element outside of lock */
-	l_new = kmalloc(htab->elem_size, GFP_ATOMIC);
+	l_new = kmalloc(htab->elem_size, GFP_ATOMIC | __GFP_NOWARN);
 	if (!l_new)
 		return -ENOMEM;
 
diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c
index be6d726e31c9..5a8a797d50b7 100644
--- a/kernel/bpf/inode.c
+++ b/kernel/bpf/inode.c
@@ -34,7 +34,7 @@ static void *bpf_any_get(void *raw, enum bpf_type type)
 		atomic_inc(&((struct bpf_prog *)raw)->aux->refcnt);
 		break;
 	case BPF_TYPE_MAP:
-		atomic_inc(&((struct bpf_map *)raw)->refcnt);
+		bpf_map_inc(raw, true);
 		break;
 	default:
 		WARN_ON_ONCE(1);
@@ -51,7 +51,7 @@ static void bpf_any_put(void *raw, enum bpf_type type)
 		bpf_prog_put(raw);
 		break;
 	case BPF_TYPE_MAP:
-		bpf_map_put(raw);
+		bpf_map_put_with_uref(raw);
 		break;
 	default:
 		WARN_ON_ONCE(1);
@@ -64,7 +64,7 @@ static void *bpf_fd_probe_obj(u32 ufd, enum bpf_type *type)
 	void *raw;
 
 	*type = BPF_TYPE_MAP;
-	raw = bpf_map_get(ufd);
+	raw = bpf_map_get_with_uref(ufd);
 	if (IS_ERR(raw)) {
 		*type = BPF_TYPE_PROG;
 		raw = bpf_prog_get(ufd);
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 0d3313d02a7e..3b39550d8485 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -82,6 +82,14 @@ static void bpf_map_free_deferred(struct work_struct *work)
 	map->ops->map_free(map);
 }
 
+static void bpf_map_put_uref(struct bpf_map *map)
+{
+	if (atomic_dec_and_test(&map->usercnt)) {
+		if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY)
+			bpf_fd_array_map_clear(map);
+	}
+}
+
 /* decrement map refcnt and schedule it for freeing via workqueue
  * (unrelying map implementation ops->map_free() might sleep)
  */
@@ -93,17 +101,15 @@ void bpf_map_put(struct bpf_map *map)
 	}
 }
 
-static int bpf_map_release(struct inode *inode, struct file *filp)
+void bpf_map_put_with_uref(struct bpf_map *map)
 {
-	struct bpf_map *map = filp->private_data;
-
-	if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY)
-		/* prog_array stores refcnt-ed bpf_prog pointers
-		 * release them all when user space closes prog_array_fd
-		 */
-		bpf_fd_array_map_clear(map);
-
+	bpf_map_put_uref(map);
 	bpf_map_put(map);
+}
+
+static int bpf_map_release(struct inode *inode, struct file *filp)
+{
+	bpf_map_put_with_uref(filp->private_data);
 	return 0;
 }
 
@@ -142,6 +148,7 @@ static int map_create(union bpf_attr *attr)
 		return PTR_ERR(map);
 
 	atomic_set(&map->refcnt, 1);
+	atomic_set(&map->usercnt, 1);
 
 	err = bpf_map_charge_memlock(map);
 	if (err)
@@ -174,7 +181,14 @@ struct bpf_map *__bpf_map_get(struct fd f)
 	return f.file->private_data;
 }
 
-struct bpf_map *bpf_map_get(u32 ufd)
+void bpf_map_inc(struct bpf_map *map, bool uref)
+{
+	atomic_inc(&map->refcnt);
+	if (uref)
+		atomic_inc(&map->usercnt);
+}
+
+struct bpf_map *bpf_map_get_with_uref(u32 ufd)
 {
 	struct fd f = fdget(ufd);
 	struct bpf_map *map;
@@ -183,7 +197,7 @@ struct bpf_map *bpf_map_get(u32 ufd)
 	if (IS_ERR(map))
 		return map;
 
-	atomic_inc(&map->refcnt);
+	bpf_map_inc(map, true);
 	fdput(f);
 
 	return map;
@@ -226,7 +240,7 @@ static int map_lookup_elem(union bpf_attr *attr)
 		goto free_key;
 
 	err = -ENOMEM;
-	value = kmalloc(map->value_size, GFP_USER);
+	value = kmalloc(map->value_size, GFP_USER | __GFP_NOWARN);
 	if (!value)
 		goto free_key;
 
@@ -285,7 +299,7 @@ static int map_update_elem(union bpf_attr *attr)
 		goto free_key;
 
 	err = -ENOMEM;
-	value = kmalloc(map->value_size, GFP_USER);
+	value = kmalloc(map->value_size, GFP_USER | __GFP_NOWARN);
 	if (!value)
 		goto free_key;
 
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index c6073056badf..a7945d10b378 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -2021,8 +2021,7 @@ static int replace_map_fd_with_map_ptr(struct verifier_env *env)
 			 * will be used by the valid program until it's unloaded
 			 * and all maps are released in free_bpf_prog_info()
 			 */
-			atomic_inc(&map->refcnt);
-
+			bpf_map_inc(map, false);
 			fdput(f);
 next_insn:
 			insn++;
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 4d568ac9319e..7063c6a07440 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1947,13 +1947,38 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
 
 #ifdef CONFIG_SMP
 	/*
+	 * Ensure we load p->on_cpu _after_ p->on_rq, otherwise it would be
+	 * possible to, falsely, observe p->on_cpu == 0.
+	 *
+	 * One must be running (->on_cpu == 1) in order to remove oneself
+	 * from the runqueue.
+	 *
+	 *  [S] ->on_cpu = 1;	[L] ->on_rq
+	 *      UNLOCK rq->lock
+	 *			RMB
+	 *      LOCK   rq->lock
+	 *  [S] ->on_rq = 0;    [L] ->on_cpu
+	 *
+	 * Pairs with the full barrier implied in the UNLOCK+LOCK on rq->lock
+	 * from the consecutive calls to schedule(); the first switching to our
+	 * task, the second putting it to sleep.
+	 */
+	smp_rmb();
+
+	/*
 	 * If the owning (remote) cpu is still in the middle of schedule() with
 	 * this task as prev, wait until its done referencing the task.
 	 */
 	while (p->on_cpu)
 		cpu_relax();
 	/*
-	 * Pairs with the smp_wmb() in finish_lock_switch().
+	 * Combined with the control dependency above, we have an effective
+	 * smp_load_acquire() without the need for full barriers.
+	 *
+	 * Pairs with the smp_store_release() in finish_lock_switch().
+	 *
+	 * This ensures that tasks getting woken will be fully ordered against
+	 * their previous state and preserve Program Order.
 	 */
 	smp_rmb();
 
@@ -2039,7 +2064,6 @@ out:
  */
 int wake_up_process(struct task_struct *p)
 {
-	WARN_ON(task_is_stopped_or_traced(p));
 	return try_to_wake_up(p, TASK_NORMAL, 0);
 }
 EXPORT_SYMBOL(wake_up_process);
@@ -5847,13 +5871,13 @@ static int init_rootdomain(struct root_domain *rd)
 {
 	memset(rd, 0, sizeof(*rd));
 
-	if (!alloc_cpumask_var(&rd->span, GFP_KERNEL))
+	if (!zalloc_cpumask_var(&rd->span, GFP_KERNEL))
 		goto out;
-	if (!alloc_cpumask_var(&rd->online, GFP_KERNEL))
+	if (!zalloc_cpumask_var(&rd->online, GFP_KERNEL))
 		goto free_span;
-	if (!alloc_cpumask_var(&rd->dlo_mask, GFP_KERNEL))
+	if (!zalloc_cpumask_var(&rd->dlo_mask, GFP_KERNEL))
 		goto free_online;
-	if (!alloc_cpumask_var(&rd->rto_mask, GFP_KERNEL))
+	if (!zalloc_cpumask_var(&rd->rto_mask, GFP_KERNEL))
 		goto free_dlo_mask;
 
 	init_dl_bw(&rd->dl_bw);
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index 26a54461bf59..05de80b48586 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -788,6 +788,9 @@ cputime_t task_gtime(struct task_struct *t)
 	unsigned int seq;
 	cputime_t gtime;
 
+	if (!context_tracking_is_enabled())
+		return t->gtime;
+
 	do {
 		seq = read_seqbegin(&t->vtime_seqlock);
 
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index e3cc16312046..8ec86abe0ea1 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -64,7 +64,7 @@ static void start_rt_bandwidth(struct rt_bandwidth *rt_b)
 	raw_spin_unlock(&rt_b->rt_runtime_lock);
 }
 
-#ifdef CONFIG_SMP
+#if defined(CONFIG_SMP) && defined(HAVE_RT_PUSH_IPI)
 static void push_irq_work_func(struct irq_work *work);
 #endif
 
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index efd3bfc7e347..b242775bf670 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1073,6 +1073,9 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
 	 * We must ensure this doesn't happen until the switch is completely
 	 * finished.
 	 *
+	 * In particular, the load of prev->state in finish_task_switch() must
+	 * happen before this.
+	 *
 	 * Pairs with the control dependency and rmb in try_to_wake_up().
 	 */
 	smp_store_release(&prev->on_cpu, 0);
diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c
index 052e02672d12..f10bd873e684 100644
--- a/kernel/sched/wait.c
+++ b/kernel/sched/wait.c
@@ -583,18 +583,18 @@ EXPORT_SYMBOL(wake_up_atomic_t);
 
 __sched int bit_wait(struct wait_bit_key *word)
 {
-	if (signal_pending_state(current->state, current))
-		return 1;
 	schedule();
+	if (signal_pending(current))
+		return -EINTR;
 	return 0;
 }
 EXPORT_SYMBOL(bit_wait);
 
 __sched int bit_wait_io(struct wait_bit_key *word)
 {
-	if (signal_pending_state(current->state, current))
-		return 1;
 	io_schedule();
+	if (signal_pending(current))
+		return -EINTR;
 	return 0;
 }
 EXPORT_SYMBOL(bit_wait_io);
@@ -602,11 +602,11 @@ EXPORT_SYMBOL(bit_wait_io);
 __sched int bit_wait_timeout(struct wait_bit_key *word)
 {
 	unsigned long now = READ_ONCE(jiffies);
-	if (signal_pending_state(current->state, current))
-		return 1;
 	if (time_after_eq(now, word->timeout))
 		return -EAGAIN;
 	schedule_timeout(word->timeout - now);
+	if (signal_pending(current))
+		return -EINTR;
 	return 0;
 }
 EXPORT_SYMBOL_GPL(bit_wait_timeout);
@@ -614,11 +614,11 @@ EXPORT_SYMBOL_GPL(bit_wait_timeout);
 __sched int bit_wait_io_timeout(struct wait_bit_key *word)
 {
 	unsigned long now = READ_ONCE(jiffies);
-	if (signal_pending_state(current->state, current))
-		return 1;
 	if (time_after_eq(now, word->timeout))
 		return -EAGAIN;
 	io_schedule_timeout(word->timeout - now);
+	if (signal_pending(current))
+		return -EINTR;
 	return 0;
 }
 EXPORT_SYMBOL_GPL(bit_wait_io_timeout);
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 75f1d05ea82d..9c6045a27ba3 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -1887,12 +1887,6 @@ rb_event_index(struct ring_buffer_event *event)
 	return (addr & ~PAGE_MASK) - BUF_PAGE_HDR_SIZE;
 }
 
-static void rb_reset_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
-{
-	cpu_buffer->read_stamp = cpu_buffer->reader_page->page->time_stamp;
-	cpu_buffer->reader_page->read = 0;
-}
-
 static void rb_inc_iter(struct ring_buffer_iter *iter)
 {
 	struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
@@ -2803,8 +2797,11 @@ rb_reserve_next_event(struct ring_buffer *buffer,
 
 	event = __rb_reserve_next(cpu_buffer, &info);
 
-	if (unlikely(PTR_ERR(event) == -EAGAIN))
+	if (unlikely(PTR_ERR(event) == -EAGAIN)) {
+		if (info.add_timestamp)
+			info.length -= RB_LEN_TIME_EXTEND;
 		goto again;
+	}
 
 	if (!event)
 		goto out_fail;
@@ -3626,7 +3623,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
 
 	/* Finally update the reader page to the new head */
 	cpu_buffer->reader_page = reader;
-	rb_reset_reader_page(cpu_buffer);
+	cpu_buffer->reader_page->read = 0;
 
 	if (overwrite != cpu_buffer->last_overrun) {
 		cpu_buffer->lost_events = overwrite - cpu_buffer->last_overrun;
@@ -3636,6 +3633,10 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
 	goto again;
 
  out:
+	/* Update the read_stamp on the first event */
+	if (reader && reader->read == 0)
+		cpu_buffer->read_stamp = reader->page->time_stamp;
+
 	arch_spin_unlock(&cpu_buffer->lock);
 	local_irq_restore(flags);
 
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 6bbc5f652355..4f6ef6912e00 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -582,6 +582,12 @@ static void __ftrace_clear_event_pids(struct trace_array *tr)
 	unregister_trace_sched_wakeup(event_filter_pid_sched_wakeup_probe_pre, tr);
 	unregister_trace_sched_wakeup(event_filter_pid_sched_wakeup_probe_post, tr);
 
+	unregister_trace_sched_wakeup_new(event_filter_pid_sched_wakeup_probe_pre, tr);
+	unregister_trace_sched_wakeup_new(event_filter_pid_sched_wakeup_probe_post, tr);
+
+	unregister_trace_sched_waking(event_filter_pid_sched_wakeup_probe_pre, tr);
+	unregister_trace_sched_waking(event_filter_pid_sched_wakeup_probe_post, tr);
+
 	list_for_each_entry(file, &tr->events, list) {
 		clear_bit(EVENT_FILE_FL_PID_FILTER_BIT, &file->flags);
 	}
@@ -1729,6 +1735,16 @@ ftrace_event_pid_write(struct file *filp, const char __user *ubuf,
 						 tr, INT_MAX);
 		register_trace_prio_sched_wakeup(event_filter_pid_sched_wakeup_probe_post,
 						 tr, 0);
+
+		register_trace_prio_sched_wakeup_new(event_filter_pid_sched_wakeup_probe_pre,
+						     tr, INT_MAX);
+		register_trace_prio_sched_wakeup_new(event_filter_pid_sched_wakeup_probe_post,
+						     tr, 0);
+
+		register_trace_prio_sched_waking(event_filter_pid_sched_wakeup_probe_pre,
+						 tr, INT_MAX);
+		register_trace_prio_sched_waking(event_filter_pid_sched_wakeup_probe_post,
+						 tr, 0);
 	}
 
 	/*
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index a3bffd1ec2b4..70306cc9d814 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -271,11 +271,11 @@ static long bt_sock_data_wait(struct sock *sk, long timeo)
 		if (signal_pending(current) || !timeo)
 			break;
 
-		set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
+		sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
 		release_sock(sk);
 		timeo = schedule_timeout(timeo);
 		lock_sock(sk);
-		clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
+		sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
 	}
 
 	__set_current_state(TASK_RUNNING);
@@ -441,7 +441,7 @@ unsigned int bt_sock_poll(struct file *file, struct socket *sock,
 	if (!test_bit(BT_SK_SUSPEND, &bt_sk(sk)->flags) && sock_writeable(sk))
 		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
 	else
-		set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+		sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
 
 	return mask;
 }
diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c
index c91353841e40..ffed8a1d4f27 100644
--- a/net/bluetooth/smp.c
+++ b/net/bluetooth/smp.c
@@ -3027,8 +3027,13 @@ static void smp_ready_cb(struct l2cap_chan *chan)
 
 	BT_DBG("chan %p", chan);
 
+	/* No need to call l2cap_chan_hold() here since we already own
+	 * the reference taken in smp_new_conn_cb(). This is just the
+	 * first time that we tie it to a specific pointer. The code in
+	 * l2cap_core.c ensures that there's no risk this function wont
+	 * get called if smp_new_conn_cb was previously called.
+	 */
 	conn->smp = chan;
-	l2cap_chan_hold(chan);
 
 	if (hcon->type == ACL_LINK && test_bit(HCI_CONN_ENCRYPT, &hcon->flags))
 		bredr_pairing(chan);
diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
index cc858919108e..aa209b1066c9 100644
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c
@@ -323,7 +323,7 @@ static long caif_stream_data_wait(struct sock *sk, long timeo)
 			!timeo)
 			break;
 
-		set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
+		sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
 		release_sock(sk);
 		timeo = schedule_timeout(timeo);
 		lock_sock(sk);
@@ -331,7 +331,7 @@ static long caif_stream_data_wait(struct sock *sk, long timeo)
 		if (sock_flag(sk, SOCK_DEAD))
 			break;
 
-		clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
+		sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
 	}
 
 	finish_wait(sk_sleep(sk), &wait);
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 617088aee21d..d62af69ad844 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -785,7 +785,7 @@ unsigned int datagram_poll(struct file *file, struct socket *sock,
 	if (sock_writeable(sk))
 		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
 	else
-		set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+		sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
 
 	return mask;
 }
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index e6af42da28d9..f18ae91b652e 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -2215,7 +2215,7 @@ static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn,
 	ndm->ndm_pad2    = 0;
 	ndm->ndm_flags	 = pn->flags | NTF_PROXY;
 	ndm->ndm_type	 = RTN_UNICAST;
-	ndm->ndm_ifindex = pn->dev->ifindex;
+	ndm->ndm_ifindex = pn->dev ? pn->dev->ifindex : 0;
 	ndm->ndm_state	 = NUD_NONE;
 
 	if (nla_put(skb, NDA_DST, tbl->key_len, pn->key))
@@ -2333,7 +2333,7 @@ static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
 		if (h > s_h)
 			s_idx = 0;
 		for (n = tbl->phash_buckets[h], idx = 0; n; n = n->next) {
-			if (dev_net(n->dev) != net)
+			if (pneigh_net(n) != net)
 				continue;
 			if (idx < s_idx)
 				goto next;
diff --git a/net/core/netclassid_cgroup.c b/net/core/netclassid_cgroup.c
index 6441f47b1a8f..2e4df84c34a1 100644
--- a/net/core/netclassid_cgroup.c
+++ b/net/core/netclassid_cgroup.c
@@ -56,7 +56,7 @@ static void cgrp_css_free(struct cgroup_subsys_state *css)
 	kfree(css_cls_state(css));
 }
 
-static int update_classid(const void *v, struct file *file, unsigned n)
+static int update_classid_sock(const void *v, struct file *file, unsigned n)
 {
 	int err;
 	struct socket *sock = sock_from_file(file, &err);
@@ -67,18 +67,25 @@ static int update_classid(const void *v, struct file *file, unsigned n)
 	return 0;
 }
 
-static void cgrp_attach(struct cgroup_subsys_state *css,
-			struct cgroup_taskset *tset)
+static void update_classid(struct cgroup_subsys_state *css, void *v)
 {
-	struct cgroup_cls_state *cs = css_cls_state(css);
-	void *v = (void *)(unsigned long)cs->classid;
+	struct css_task_iter it;
 	struct task_struct *p;
 
-	cgroup_taskset_for_each(p, tset) {
+	css_task_iter_start(css, &it);
+	while ((p = css_task_iter_next(&it))) {
 		task_lock(p);
-		iterate_fd(p->files, 0, update_classid, v);
+		iterate_fd(p->files, 0, update_classid_sock, v);
 		task_unlock(p);
 	}
+	css_task_iter_end(&it);
+}
+
+static void cgrp_attach(struct cgroup_subsys_state *css,
+			struct cgroup_taskset *tset)
+{
+	update_classid(css,
+		       (void *)(unsigned long)css_cls_state(css)->classid);
 }
 
 static u64 read_classid(struct cgroup_subsys_state *css, struct cftype *cft)
@@ -89,8 +96,11 @@ static u64 read_classid(struct cgroup_subsys_state *css, struct cftype *cft)
 static int write_classid(struct cgroup_subsys_state *css, struct cftype *cft,
 			 u64 value)
 {
-	css_cls_state(css)->classid = (u32) value;
+	struct cgroup_cls_state *cs = css_cls_state(css);
+
+	cs->classid = (u32)value;
 
+	update_classid(css, (void *)(unsigned long)cs->classid);
 	return 0;
 }
 
diff --git a/net/core/scm.c b/net/core/scm.c
index 3b6899b7d810..8a1741b14302 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -305,6 +305,8 @@ void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm)
 			err = put_user(cmlen, &cm->cmsg_len);
 		if (!err) {
 			cmlen = CMSG_SPACE(i*sizeof(int));
+			if (msg->msg_controllen < cmlen)
+				cmlen = msg->msg_controllen;
 			msg->msg_control += cmlen;
 			msg->msg_controllen -= cmlen;
 		}
diff --git a/net/core/sock.c b/net/core/sock.c
index 1e4dd54bfb5a..e31dfcee1729 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1530,7 +1530,6 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
 		skb_queue_head_init(&newsk->sk_receive_queue);
 		skb_queue_head_init(&newsk->sk_write_queue);
 
-		spin_lock_init(&newsk->sk_dst_lock);
 		rwlock_init(&newsk->sk_callback_lock);
 		lockdep_set_class_and_name(&newsk->sk_callback_lock,
 				af_callback_keys + newsk->sk_family,
@@ -1607,7 +1606,7 @@ void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
 {
 	u32 max_segs = 1;
 
-	__sk_dst_set(sk, dst);
+	sk_dst_set(sk, dst);
 	sk->sk_route_caps = dst->dev->features;
 	if (sk->sk_route_caps & NETIF_F_GSO)
 		sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE;
@@ -1815,7 +1814,7 @@ static long sock_wait_for_wmem(struct sock *sk, long timeo)
 {
 	DEFINE_WAIT(wait);
 
-	clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+	sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
 	for (;;) {
 		if (!timeo)
 			break;
@@ -1861,7 +1860,7 @@ struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
 		if (sk_wmem_alloc_get(sk) < sk->sk_sndbuf)
 			break;
 
-		set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+		sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
 		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
 		err = -EAGAIN;
 		if (!timeo)
@@ -2048,9 +2047,9 @@ int sk_wait_data(struct sock *sk, long *timeo, const struct sk_buff *skb)
 	DEFINE_WAIT(wait);
 
 	prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
-	set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
+	sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
 	rc = sk_wait_event(sk, timeo, skb_peek_tail(&sk->sk_receive_queue) != skb);
-	clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
+	sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
 	finish_wait(sk_sleep(sk), &wait);
 	return rc;
 }
@@ -2388,7 +2387,6 @@ void sock_init_data(struct socket *sock, struct sock *sk)
 	} else
 		sk->sk_wq	=	NULL;
 
-	spin_lock_init(&sk->sk_dst_lock);
 	rwlock_init(&sk->sk_callback_lock);
 	lockdep_set_class_and_name(&sk->sk_callback_lock,
 			af_callback_keys + sk->sk_family,
diff --git a/net/core/stream.c b/net/core/stream.c
index d70f77a0c889..b96f7a79e544 100644
--- a/net/core/stream.c
+++ b/net/core/stream.c
@@ -39,7 +39,7 @@ void sk_stream_write_space(struct sock *sk)
 			wake_up_interruptible_poll(&wq->wait, POLLOUT |
 						POLLWRNORM | POLLWRBAND);
 		if (wq && wq->fasync_list && !(sk->sk_shutdown & SEND_SHUTDOWN))
-			sock_wake_async(sock, SOCK_WAKE_SPACE, POLL_OUT);
+			sock_wake_async(wq, SOCK_WAKE_SPACE, POLL_OUT);
 		rcu_read_unlock();
 	}
 }
@@ -126,7 +126,7 @@ int sk_stream_wait_memory(struct sock *sk, long *timeo_p)
 		current_timeo = vm_wait = (prandom_u32() % (HZ / 5)) + 2;
 
 	while (1) {
-		set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+		sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
 
 		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
 
@@ -139,7 +139,7 @@ int sk_stream_wait_memory(struct sock *sk, long *timeo_p)
 		}
 		if (signal_pending(current))
 			goto do_interrupted;
-		clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+		sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
 		if (sk_stream_memory_free(sk) && !vm_wait)
 			break;
 
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index db5fc2440a23..9c6d0508e63a 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -202,7 +202,9 @@ static int dccp_v6_send_response(const struct sock *sk, struct request_sock *req
 	security_req_classify_flow(req, flowi6_to_flowi(&fl6));
 
 
-	final_p = fl6_update_dst(&fl6, np->opt, &final);
+	rcu_read_lock();
+	final_p = fl6_update_dst(&fl6, rcu_dereference(np->opt), &final);
+	rcu_read_unlock();
 
 	dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
 	if (IS_ERR(dst)) {
@@ -219,7 +221,10 @@ static int dccp_v6_send_response(const struct sock *sk, struct request_sock *req
 							 &ireq->ir_v6_loc_addr,
 							 &ireq->ir_v6_rmt_addr);
 		fl6.daddr = ireq->ir_v6_rmt_addr;
-		err = ip6_xmit(sk, skb, &fl6, np->opt, np->tclass);
+		rcu_read_lock();
+		err = ip6_xmit(sk, skb, &fl6, rcu_dereference(np->opt),
+			       np->tclass);
+		rcu_read_unlock();
 		err = net_xmit_eval(err);
 	}
 
@@ -387,6 +392,7 @@ static struct sock *dccp_v6_request_recv_sock(const struct sock *sk,
 	struct inet_request_sock *ireq = inet_rsk(req);
 	struct ipv6_pinfo *newnp;
 	const struct ipv6_pinfo *np = inet6_sk(sk);
+	struct ipv6_txoptions *opt;
 	struct inet_sock *newinet;
 	struct dccp6_sock *newdp6;
 	struct sock *newsk;
@@ -453,7 +459,7 @@ static struct sock *dccp_v6_request_recv_sock(const struct sock *sk,
 	 * comment in that function for the gory details. -acme
 	 */
 
-	__ip6_dst_store(newsk, dst, NULL, NULL);
+	ip6_dst_store(newsk, dst, NULL, NULL);
 	newsk->sk_route_caps = dst->dev->features & ~(NETIF_F_IP_CSUM |
 						      NETIF_F_TSO);
 	newdp6 = (struct dccp6_sock *)newsk;
@@ -488,13 +494,15 @@ static struct sock *dccp_v6_request_recv_sock(const struct sock *sk,
 	 * Yes, keeping reference count would be much more clever, but we make
 	 * one more one thing there: reattach optmem to newsk.
 	 */
-	if (np->opt != NULL)
-		newnp->opt = ipv6_dup_options(newsk, np->opt);
-
+	opt = rcu_dereference(np->opt);
+	if (opt) {
+		opt = ipv6_dup_options(newsk, opt);
+		RCU_INIT_POINTER(newnp->opt, opt);
+	}
 	inet_csk(newsk)->icsk_ext_hdr_len = 0;
-	if (newnp->opt != NULL)
-		inet_csk(newsk)->icsk_ext_hdr_len = (newnp->opt->opt_nflen +
-						     newnp->opt->opt_flen);
+	if (opt)
+		inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
+						    opt->opt_flen;
 
 	dccp_sync_mss(newsk, dst_mtu(dst));
 
@@ -757,6 +765,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct dccp_sock *dp = dccp_sk(sk);
 	struct in6_addr *saddr = NULL, *final_p, final;
+	struct ipv6_txoptions *opt;
 	struct flowi6 fl6;
 	struct dst_entry *dst;
 	int addr_type;
@@ -856,7 +865,8 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 	fl6.fl6_sport = inet->inet_sport;
 	security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
 
-	final_p = fl6_update_dst(&fl6, np->opt, &final);
+	opt = rcu_dereference_protected(np->opt, sock_owned_by_user(sk));
+	final_p = fl6_update_dst(&fl6, opt, &final);
 
 	dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
 	if (IS_ERR(dst)) {
@@ -873,12 +883,11 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 	np->saddr = *saddr;
 	inet->inet_rcv_saddr = LOOPBACK4_IPV6;
 
-	__ip6_dst_store(sk, dst, NULL, NULL);
+	ip6_dst_store(sk, dst, NULL, NULL);
 
 	icsk->icsk_ext_hdr_len = 0;
-	if (np->opt != NULL)
-		icsk->icsk_ext_hdr_len = (np->opt->opt_flen +
-					  np->opt->opt_nflen);
+	if (opt)
+		icsk->icsk_ext_hdr_len = opt->opt_flen + opt->opt_nflen;
 
 	inet->inet_dport = usin->sin6_port;
 
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index b5cf13a28009..41e65804ddf5 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -339,8 +339,7 @@ unsigned int dccp_poll(struct file *file, struct socket *sock,
 			if (sk_stream_is_writeable(sk)) {
 				mask |= POLLOUT | POLLWRNORM;
 			} else {  /* send SIGIO later */
-				set_bit(SOCK_ASYNC_NOSPACE,
-					&sk->sk_socket->flags);
+				sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
 				set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
 
 				/* Race breaker. If space is freed after
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index 675cf94e04f8..eebf5ac8ce18 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -1747,9 +1747,9 @@ static int dn_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
 		}
 
 		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
-		set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
+		sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
 		sk_wait_event(sk, &timeo, dn_data_ready(sk, queue, flags, target));
-		clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
+		sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
 		finish_wait(sk_sleep(sk), &wait);
 	}
 
@@ -2004,10 +2004,10 @@ static int dn_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
 			}
 
 			prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
-			set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
+			sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
 			sk_wait_event(sk, &timeo,
 				      !dn_queue_too_long(scp, queue, flags));
-			clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
+			sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
 			finish_wait(sk_sleep(sk), &wait);
 			continue;
 		}
diff --git a/net/dns_resolver/dns_query.c b/net/dns_resolver/dns_query.c
index 4677b6fa6dda..ecc28cff08ab 100644
--- a/net/dns_resolver/dns_query.c
+++ b/net/dns_resolver/dns_query.c
@@ -67,7 +67,7 @@
  * Returns the size of the result on success, -ve error code otherwise.
  */
 int dns_query(const char *type, const char *name, size_t namelen,
-	      const char *options, char **_result, time_t *_expiry)
+	      const char *options, char **_result, time64_t *_expiry)
 {
 	struct key *rkey;
 	const struct user_key_payload *upayload;
diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c
index 35a9788bb3ae..c7d1adca30d8 100644
--- a/net/hsr/hsr_device.c
+++ b/net/hsr/hsr_device.c
@@ -312,7 +312,7 @@ static void send_hsr_supervision_frame(struct hsr_port *master, u8 type)
 	return;
 
 out:
-	WARN_ON_ONCE("HSR: Could not send supervision frame\n");
+	WARN_ONCE(1, "HSR: Could not send supervision frame\n");
 	kfree_skb(skb);
 }
 
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 6baf36e11808..05e4cba14162 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -2126,7 +2126,7 @@ int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr)
 	ASSERT_RTNL();
 
 	in_dev = ip_mc_find_dev(net, imr);
-	if (!in_dev) {
+	if (!imr->imr_ifindex && !imr->imr_address.s_addr && !in_dev) {
 		ret = -ENODEV;
 		goto out;
 	}
@@ -2147,7 +2147,8 @@ int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr)
 
 		*imlp = iml->next_rcu;
 
-		ip_mc_dec_group(in_dev, group);
+		if (in_dev)
+			ip_mc_dec_group(in_dev, group);
 
 		/* decrease mem now to avoid the memleak warning */
 		atomic_sub(sizeof(*iml), &sk->sk_omem_alloc);
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 92dd4b74d513..c3a38353f5dc 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -134,7 +134,7 @@ static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
 			      struct mfc_cache *c, struct rtmsg *rtm);
 static void mroute_netlink_event(struct mr_table *mrt, struct mfc_cache *mfc,
 				 int cmd);
-static void mroute_clean_tables(struct mr_table *mrt);
+static void mroute_clean_tables(struct mr_table *mrt, bool all);
 static void ipmr_expire_process(unsigned long arg);
 
 #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
@@ -350,7 +350,7 @@ static struct mr_table *ipmr_new_table(struct net *net, u32 id)
 static void ipmr_free_table(struct mr_table *mrt)
 {
 	del_timer_sync(&mrt->ipmr_expire_timer);
-	mroute_clean_tables(mrt);
+	mroute_clean_tables(mrt, true);
 	kfree(mrt);
 }
 
@@ -441,10 +441,6 @@ struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v)
 	return dev;
 
 failure:
-	/* allow the register to be completed before unregistering. */
-	rtnl_unlock();
-	rtnl_lock();
-
 	unregister_netdevice(dev);
 	return NULL;
 }
@@ -540,10 +536,6 @@ static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt)
 	return dev;
 
 failure:
-	/* allow the register to be completed before unregistering. */
-	rtnl_unlock();
-	rtnl_lock();
-
 	unregister_netdevice(dev);
 	return NULL;
 }
@@ -1208,7 +1200,7 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
  *	Close the multicast socket, and clear the vif tables etc
  */
 
-static void mroute_clean_tables(struct mr_table *mrt)
+static void mroute_clean_tables(struct mr_table *mrt, bool all)
 {
 	int i;
 	LIST_HEAD(list);
@@ -1217,8 +1209,9 @@ static void mroute_clean_tables(struct mr_table *mrt)
 	/* Shut down all active vif entries */
 
 	for (i = 0; i < mrt->maxvif; i++) {
-		if (!(mrt->vif_table[i].flags & VIFF_STATIC))
-			vif_delete(mrt, i, 0, &list);
+		if (!all && (mrt->vif_table[i].flags & VIFF_STATIC))
+			continue;
+		vif_delete(mrt, i, 0, &list);
 	}
 	unregister_netdevice_many(&list);
 
@@ -1226,7 +1219,7 @@ static void mroute_clean_tables(struct mr_table *mrt)
 
 	for (i = 0; i < MFC_LINES; i++) {
 		list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[i], list) {
-			if (c->mfc_flags & MFC_STATIC)
+			if (!all && (c->mfc_flags & MFC_STATIC))
 				continue;
 			list_del_rcu(&c->list);
 			mroute_netlink_event(mrt, c, RTM_DELROUTE);
@@ -1261,7 +1254,7 @@ static void mrtsock_destruct(struct sock *sk)
 						    NETCONFA_IFINDEX_ALL,
 						    net->ipv4.devconf_all);
 			RCU_INIT_POINTER(mrt->mroute_sk, NULL);
-			mroute_clean_tables(mrt);
+			mroute_clean_tables(mrt, false);
 		}
 	}
 	rtnl_unlock();
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index c1728771cf89..c82cca18c90f 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -517,8 +517,7 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
 			if (sk_stream_is_writeable(sk)) {
 				mask |= POLLOUT | POLLWRNORM;
 			} else {  /* send SIGIO later */
-				set_bit(SOCK_ASYNC_NOSPACE,
-					&sk->sk_socket->flags);
+				sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
 				set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
 
 				/* Race breaker. If space is freed after
@@ -906,7 +905,7 @@ static ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset,
 			goto out_err;
 	}
 
-	clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+	sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
 
 	mss_now = tcp_send_mss(sk, &size_goal, flags);
 	copied = 0;
@@ -1134,7 +1133,7 @@ int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
 	}
 
 	/* This should be in poll */
-	clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+	sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
 
 	mss_now = tcp_send_mss(sk, &size_goal, flags);
 
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index fdd88c3803a6..2d656eef7f8e 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4481,19 +4481,34 @@ static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int
 int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size)
 {
 	struct sk_buff *skb;
+	int err = -ENOMEM;
+	int data_len = 0;
 	bool fragstolen;
 
 	if (size == 0)
 		return 0;
 
-	skb = alloc_skb(size, sk->sk_allocation);
+	if (size > PAGE_SIZE) {
+		int npages = min_t(size_t, size >> PAGE_SHIFT, MAX_SKB_FRAGS);
+
+		data_len = npages << PAGE_SHIFT;
+		size = data_len + (size & ~PAGE_MASK);
+	}
+	skb = alloc_skb_with_frags(size - data_len, data_len,
+				   PAGE_ALLOC_COSTLY_ORDER,
+				   &err, sk->sk_allocation);
 	if (!skb)
 		goto err;
 
+	skb_put(skb, size - data_len);
+	skb->data_len = data_len;
+	skb->len = size;
+
 	if (tcp_try_rmem_schedule(sk, skb, skb->truesize))
 		goto err_free;
 
-	if (memcpy_from_msg(skb_put(skb, size), msg, size))
+	err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
+	if (err)
 		goto err_free;
 
 	TCP_SKB_CB(skb)->seq = tcp_sk(sk)->rcv_nxt;
@@ -4509,7 +4524,8 @@ int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size)
 err_free:
 	kfree_skb(skb);
 err:
-	return -ENOMEM;
+	return err;
+
 }
 
 static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
@@ -5667,6 +5683,7 @@ discard:
 		}
 
 		tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
+		tp->copied_seq = tp->rcv_nxt;
 		tp->rcv_wup = TCP_SKB_CB(skb)->seq + 1;
 
 		/* RFC1323: The window in SYN & SYN/ACK segments is
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index ba09016d1bfd..db003438aaf5 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -921,7 +921,8 @@ int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
 	}
 
 	md5sig = rcu_dereference_protected(tp->md5sig_info,
-					   sock_owned_by_user(sk));
+					   sock_owned_by_user(sk) ||
+					   lockdep_is_held(&sk->sk_lock.slock));
 	if (!md5sig) {
 		md5sig = kmalloc(sizeof(*md5sig), gfp);
 		if (!md5sig)
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index c9c716a483e4..193ba1fa8a9a 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -168,7 +168,7 @@ static int tcp_write_timeout(struct sock *sk)
 			dst_negative_advice(sk);
 			if (tp->syn_fastopen || tp->syn_data)
 				tcp_fastopen_cache_set(sk, 0, NULL, true, 0);
-			if (tp->syn_data)
+			if (tp->syn_data && icsk->icsk_retransmits == 1)
 				NET_INC_STATS_BH(sock_net(sk),
 						 LINUX_MIB_TCPFASTOPENACTIVEFAIL);
 		}
@@ -176,6 +176,18 @@ static int tcp_write_timeout(struct sock *sk)
 		syn_set = true;
 	} else {
 		if (retransmits_timed_out(sk, sysctl_tcp_retries1, 0, 0)) {
+			/* Some middle-boxes may black-hole Fast Open _after_
+			 * the handshake. Therefore we conservatively disable
+			 * Fast Open on this path on recurring timeouts with
+			 * few or zero bytes acked after Fast Open.
+			 */
+			if (tp->syn_data_acked &&
+			    tp->bytes_acked <= tp->rx_opt.mss_clamp) {
+				tcp_fastopen_cache_set(sk, 0, NULL, true, 0);
+				if (icsk->icsk_retransmits == sysctl_tcp_retries1)
+					NET_INC_STATS_BH(sock_net(sk),
+							 LINUX_MIB_TCPFASTOPENACTIVEFAIL);
+			}
 			/* Black hole detection */
 			tcp_mtu_probing(icsk, sk);
 
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 24ec14f9825c..0c7b0e61b917 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -100,7 +100,6 @@
 #include <linux/slab.h>
 #include <net/tcp_states.h>
 #include <linux/skbuff.h>
-#include <linux/netdevice.h>
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
 #include <net/net_namespace.h>
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index d84742f003a9..61f26851655c 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -3642,7 +3642,7 @@ static void addrconf_dad_work(struct work_struct *w)
 
 	/* send a neighbour solicitation for our addr */
 	addrconf_addr_solict_mult(&ifp->addr, &mcaddr);
-	ndisc_send_ns(ifp->idev->dev, &ifp->addr, &mcaddr, &in6addr_any, NULL);
+	ndisc_send_ns(ifp->idev->dev, &ifp->addr, &mcaddr, &in6addr_any);
 out:
 	in6_ifa_put(ifp);
 	rtnl_unlock();
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 44bb66bde0e2..8ec0df75f1c4 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -428,9 +428,11 @@ void inet6_destroy_sock(struct sock *sk)
 
 	/* Free tx options */
 
-	opt = xchg(&np->opt, NULL);
-	if (opt)
-		sock_kfree_s(sk, opt, opt->tot_len);
+	opt = xchg((__force struct ipv6_txoptions **)&np->opt, NULL);
+	if (opt) {
+		atomic_sub(opt->tot_len, &sk->sk_omem_alloc);
+		txopt_put(opt);
+	}
 }
 EXPORT_SYMBOL_GPL(inet6_destroy_sock);
 
@@ -659,7 +661,10 @@ int inet6_sk_rebuild_header(struct sock *sk)
 		fl6.fl6_sport = inet->inet_sport;
 		security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
 
-		final_p = fl6_update_dst(&fl6, np->opt, &final);
+		rcu_read_lock();
+		final_p = fl6_update_dst(&fl6, rcu_dereference(np->opt),
+					 &final);
+		rcu_read_unlock();
 
 		dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
 		if (IS_ERR(dst)) {
@@ -668,7 +673,7 @@ int inet6_sk_rebuild_header(struct sock *sk)
 			return PTR_ERR(dst);
 		}
 
-		__ip6_dst_store(sk, dst, NULL, NULL);
+		ip6_dst_store(sk, dst, NULL, NULL);
 	}
 
 	return 0;
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index d70b0238f468..517c55b01ba8 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -167,8 +167,10 @@ ipv4_connected:
 
 	security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
 
-	opt = flowlabel ? flowlabel->opt : np->opt;
+	rcu_read_lock();
+	opt = flowlabel ? flowlabel->opt : rcu_dereference(np->opt);
 	final_p = fl6_update_dst(&fl6, opt, &final);
+	rcu_read_unlock();
 
 	dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
 	err = 0;
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index ce203b0402be..ea7c4d64a00a 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -727,6 +727,7 @@ ipv6_dup_options(struct sock *sk, struct ipv6_txoptions *opt)
 			*((char **)&opt2->dst1opt) += dif;
 		if (opt2->srcrt)
 			*((char **)&opt2->srcrt) += dif;
+		atomic_set(&opt2->refcnt, 1);
 	}
 	return opt2;
 }
@@ -790,7 +791,7 @@ ipv6_renew_options(struct sock *sk, struct ipv6_txoptions *opt,
 		return ERR_PTR(-ENOBUFS);
 
 	memset(opt2, 0, tot_len);
-
+	atomic_set(&opt2->refcnt, 1);
 	opt2->tot_len = tot_len;
 	p = (char *)(opt2 + 1);
 
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 36c5a98b0472..0a37ddc7af51 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -834,11 +834,6 @@ void icmpv6_flow_init(struct sock *sk, struct flowi6 *fl6,
 	security_sk_classify_flow(sk, flowi6_to_flowi(fl6));
 }
 
-/*
- * Special lock-class for __icmpv6_sk:
- */
-static struct lock_class_key icmpv6_socket_sk_dst_lock_key;
-
 static int __net_init icmpv6_sk_init(struct net *net)
 {
 	struct sock *sk;
@@ -860,15 +855,6 @@ static int __net_init icmpv6_sk_init(struct net *net)
 
 		net->ipv6.icmp_sk[i] = sk;
 
-		/*
-		 * Split off their lock-class, because sk->sk_dst_lock
-		 * gets used from softirqs, which is safe for
-		 * __icmpv6_sk (because those never get directly used
-		 * via userspace syscalls), but unsafe for normal sockets.
-		 */
-		lockdep_set_class(&sk->sk_dst_lock,
-				  &icmpv6_socket_sk_dst_lock_key);
-
 		/* Enough space for 2 64K ICMP packets, including
 		 * sk_buff struct overhead.
 		 */
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index 5d1c7cee2cb2..a7ca2cde2ecb 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -78,7 +78,9 @@ struct dst_entry *inet6_csk_route_req(const struct sock *sk,
 	memset(fl6, 0, sizeof(*fl6));
 	fl6->flowi6_proto = proto;
 	fl6->daddr = ireq->ir_v6_rmt_addr;
-	final_p = fl6_update_dst(fl6, np->opt, &final);
+	rcu_read_lock();
+	final_p = fl6_update_dst(fl6, rcu_dereference(np->opt), &final);
+	rcu_read_unlock();
 	fl6->saddr = ireq->ir_v6_loc_addr;
 	fl6->flowi6_oif = ireq->ir_iif;
 	fl6->flowi6_mark = ireq->ir_mark;
@@ -109,14 +111,6 @@ void inet6_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr)
 EXPORT_SYMBOL_GPL(inet6_csk_addr2sockaddr);
 
 static inline
-void __inet6_csk_dst_store(struct sock *sk, struct dst_entry *dst,
-			   const struct in6_addr *daddr,
-			   const struct in6_addr *saddr)
-{
-	__ip6_dst_store(sk, dst, daddr, saddr);
-}
-
-static inline
 struct dst_entry *__inet6_csk_dst_check(struct sock *sk, u32 cookie)
 {
 	return __sk_dst_check(sk, cookie);
@@ -142,14 +136,16 @@ static struct dst_entry *inet6_csk_route_socket(struct sock *sk,
 	fl6->fl6_dport = inet->inet_dport;
 	security_sk_classify_flow(sk, flowi6_to_flowi(fl6));
 
-	final_p = fl6_update_dst(fl6, np->opt, &final);
+	rcu_read_lock();
+	final_p = fl6_update_dst(fl6, rcu_dereference(np->opt), &final);
+	rcu_read_unlock();
 
 	dst = __inet6_csk_dst_check(sk, np->dst_cookie);
 	if (!dst) {
 		dst = ip6_dst_lookup_flow(sk, fl6, final_p);
 
 		if (!IS_ERR(dst))
-			__inet6_csk_dst_store(sk, dst, NULL, NULL);
+			ip6_dst_store(sk, dst, NULL, NULL);
 	}
 	return dst;
 }
@@ -175,7 +171,8 @@ int inet6_csk_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl_unused
 	/* Restore final destination back after routing done */
 	fl6.daddr = sk->sk_v6_daddr;
 
-	res = ip6_xmit(sk, skb, &fl6, np->opt, np->tclass);
+	res = ip6_xmit(sk, skb, &fl6, rcu_dereference(np->opt),
+		       np->tclass);
 	rcu_read_unlock();
 	return res;
 }
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index eabffbb89795..137fca42aaa6 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -177,7 +177,7 @@ void ip6_tnl_dst_reset(struct ip6_tnl *t)
 	int i;
 
 	for_each_possible_cpu(i)
-		ip6_tnl_per_cpu_dst_set(raw_cpu_ptr(t->dst_cache), NULL);
+		ip6_tnl_per_cpu_dst_set(per_cpu_ptr(t->dst_cache, i), NULL);
 }
 EXPORT_SYMBOL_GPL(ip6_tnl_dst_reset);
 
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index ad19136086dd..a10e77103c88 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -118,7 +118,7 @@ static void mr6_netlink_event(struct mr6_table *mrt, struct mfc6_cache *mfc,
 			      int cmd);
 static int ip6mr_rtm_dumproute(struct sk_buff *skb,
 			       struct netlink_callback *cb);
-static void mroute_clean_tables(struct mr6_table *mrt);
+static void mroute_clean_tables(struct mr6_table *mrt, bool all);
 static void ipmr_expire_process(unsigned long arg);
 
 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
@@ -334,7 +334,7 @@ static struct mr6_table *ip6mr_new_table(struct net *net, u32 id)
 static void ip6mr_free_table(struct mr6_table *mrt)
 {
 	del_timer_sync(&mrt->ipmr_expire_timer);
-	mroute_clean_tables(mrt);
+	mroute_clean_tables(mrt, true);
 	kfree(mrt);
 }
 
@@ -765,10 +765,6 @@ static struct net_device *ip6mr_reg_vif(struct net *net, struct mr6_table *mrt)
 	return dev;
 
 failure:
-	/* allow the register to be completed before unregistering. */
-	rtnl_unlock();
-	rtnl_lock();
-
 	unregister_netdevice(dev);
 	return NULL;
 }
@@ -1542,7 +1538,7 @@ static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
  *	Close the multicast socket, and clear the vif tables etc
  */
 
-static void mroute_clean_tables(struct mr6_table *mrt)
+static void mroute_clean_tables(struct mr6_table *mrt, bool all)
 {
 	int i;
 	LIST_HEAD(list);
@@ -1552,8 +1548,9 @@ static void mroute_clean_tables(struct mr6_table *mrt)
 	 *	Shut down all active vif entries
 	 */
 	for (i = 0; i < mrt->maxvif; i++) {
-		if (!(mrt->vif6_table[i].flags & VIFF_STATIC))
-			mif6_delete(mrt, i, &list);
+		if (!all && (mrt->vif6_table[i].flags & VIFF_STATIC))
+			continue;
+		mif6_delete(mrt, i, &list);
 	}
 	unregister_netdevice_many(&list);
 
@@ -1562,7 +1559,7 @@ static void mroute_clean_tables(struct mr6_table *mrt)
 	 */
 	for (i = 0; i < MFC6_LINES; i++) {
 		list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[i], list) {
-			if (c->mfc_flags & MFC_STATIC)
+			if (!all && (c->mfc_flags & MFC_STATIC))
 				continue;
 			write_lock_bh(&mrt_lock);
 			list_del(&c->list);
@@ -1625,7 +1622,7 @@ int ip6mr_sk_done(struct sock *sk)
 						     net->ipv6.devconf_all);
 			write_unlock_bh(&mrt_lock);
 
-			mroute_clean_tables(mrt);
+			mroute_clean_tables(mrt, false);
 			err = 0;
 			break;
 		}
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 63e6956917c9..4449ad1f8114 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -111,7 +111,8 @@ struct ipv6_txoptions *ipv6_update_options(struct sock *sk,
 			icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie);
 		}
 	}
-	opt = xchg(&inet6_sk(sk)->opt, opt);
+	opt = xchg((__force struct ipv6_txoptions **)&inet6_sk(sk)->opt,
+		   opt);
 	sk_dst_reset(sk);
 
 	return opt;
@@ -231,9 +232,12 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
 				sk->sk_socket->ops = &inet_dgram_ops;
 				sk->sk_family = PF_INET;
 			}
-			opt = xchg(&np->opt, NULL);
-			if (opt)
-				sock_kfree_s(sk, opt, opt->tot_len);
+			opt = xchg((__force struct ipv6_txoptions **)&np->opt,
+				   NULL);
+			if (opt) {
+				atomic_sub(opt->tot_len, &sk->sk_omem_alloc);
+				txopt_put(opt);
+			}
 			pktopt = xchg(&np->pktoptions, NULL);
 			kfree_skb(pktopt);
 
@@ -403,7 +407,8 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
 		if (optname != IPV6_RTHDR && !ns_capable(net->user_ns, CAP_NET_RAW))
 			break;
 
-		opt = ipv6_renew_options(sk, np->opt, optname,
+		opt = rcu_dereference_protected(np->opt, sock_owned_by_user(sk));
+		opt = ipv6_renew_options(sk, opt, optname,
 					 (struct ipv6_opt_hdr __user *)optval,
 					 optlen);
 		if (IS_ERR(opt)) {
@@ -432,8 +437,10 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
 		retv = 0;
 		opt = ipv6_update_options(sk, opt);
 sticky_done:
-		if (opt)
-			sock_kfree_s(sk, opt, opt->tot_len);
+		if (opt) {
+			atomic_sub(opt->tot_len, &sk->sk_omem_alloc);
+			txopt_put(opt);
+		}
 		break;
 	}
 
@@ -486,6 +493,7 @@ sticky_done:
 			break;
 
 		memset(opt, 0, sizeof(*opt));
+		atomic_set(&opt->refcnt, 1);
 		opt->tot_len = sizeof(*opt) + optlen;
 		retv = -EFAULT;
 		if (copy_from_user(opt+1, optval, optlen))
@@ -502,8 +510,10 @@ update:
 		retv = 0;
 		opt = ipv6_update_options(sk, opt);
 done:
-		if (opt)
-			sock_kfree_s(sk, opt, opt->tot_len);
+		if (opt) {
+			atomic_sub(opt->tot_len, &sk->sk_omem_alloc);
+			txopt_put(opt);
+		}
 		break;
 	}
 	case IPV6_UNICAST_HOPS:
@@ -1110,10 +1120,11 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
 	case IPV6_RTHDR:
 	case IPV6_DSTOPTS:
 	{
+		struct ipv6_txoptions *opt;
 
 		lock_sock(sk);
-		len = ipv6_getsockopt_sticky(sk, np->opt,
-					     optname, optval, len);
+		opt = rcu_dereference_protected(np->opt, sock_owned_by_user(sk));
+		len = ipv6_getsockopt_sticky(sk, opt, optname, optval, len);
 		release_sock(sk);
 		/* check if ipv6_getsockopt_sticky() returns err code */
 		if (len < 0)
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 3e0f855e1bea..d6161e1c48c8 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -556,8 +556,7 @@ static void ndisc_send_unsol_na(struct net_device *dev)
 }
 
 void ndisc_send_ns(struct net_device *dev, const struct in6_addr *solicit,
-		   const struct in6_addr *daddr, const struct in6_addr *saddr,
-		   struct sk_buff *oskb)
+		   const struct in6_addr *daddr, const struct in6_addr *saddr)
 {
 	struct sk_buff *skb;
 	struct in6_addr addr_buf;
@@ -593,9 +592,6 @@ void ndisc_send_ns(struct net_device *dev, const struct in6_addr *solicit,
 		ndisc_fill_addr_option(skb, ND_OPT_SOURCE_LL_ADDR,
 				       dev->dev_addr);
 
-	if (!(dev->priv_flags & IFF_XMIT_DST_RELEASE) && oskb)
-		skb_dst_copy(skb, oskb);
-
 	ndisc_send_skb(skb, daddr, saddr);
 }
 
@@ -682,12 +678,12 @@ static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb)
 				  "%s: trying to ucast probe in NUD_INVALID: %pI6\n",
 				  __func__, target);
 		}
-		ndisc_send_ns(dev, target, target, saddr, skb);
+		ndisc_send_ns(dev, target, target, saddr);
 	} else if ((probes -= NEIGH_VAR(neigh->parms, APP_PROBES)) < 0) {
 		neigh_app_ns(neigh);
 	} else {
 		addrconf_addr_solict_mult(target, &mcaddr);
-		ndisc_send_ns(dev, target, &mcaddr, saddr, skb);
+		ndisc_send_ns(dev, target, &mcaddr, saddr);
 	}
 }
 
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index d5efeb87350e..bab4441ed4e4 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -190,7 +190,7 @@ static void nf_ct_frag6_expire(unsigned long data)
 /* Creation primitives. */
 static inline struct frag_queue *fq_find(struct net *net, __be32 id,
 					 u32 user, struct in6_addr *src,
-					 struct in6_addr *dst, u8 ecn)
+					 struct in6_addr *dst, int iif, u8 ecn)
 {
 	struct inet_frag_queue *q;
 	struct ip6_create_arg arg;
@@ -200,6 +200,7 @@ static inline struct frag_queue *fq_find(struct net *net, __be32 id,
 	arg.user = user;
 	arg.src = src;
 	arg.dst = dst;
+	arg.iif = iif;
 	arg.ecn = ecn;
 
 	local_bh_disable();
@@ -601,7 +602,7 @@ struct sk_buff *nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 use
 	fhdr = (struct frag_hdr *)skb_transport_header(clone);
 
 	fq = fq_find(net, fhdr->identification, user, &hdr->saddr, &hdr->daddr,
-		     ip6_frag_ecn(hdr));
+		     skb->dev ? skb->dev->ifindex : 0, ip6_frag_ecn(hdr));
 	if (fq == NULL) {
 		pr_debug("Can't find and can't create new queue\n");
 		goto ret_orig;
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index dc65ec198f7c..99140986e887 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -733,6 +733,7 @@ static int raw6_getfrag(void *from, char *to, int offset, int len, int odd,
 
 static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 {
+	struct ipv6_txoptions *opt_to_free = NULL;
 	struct ipv6_txoptions opt_space;
 	DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, msg->msg_name);
 	struct in6_addr *daddr, *final_p, final;
@@ -839,8 +840,10 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 		if (!(opt->opt_nflen|opt->opt_flen))
 			opt = NULL;
 	}
-	if (!opt)
-		opt = np->opt;
+	if (!opt) {
+		opt = txopt_get(np);
+		opt_to_free = opt;
+		}
 	if (flowlabel)
 		opt = fl6_merge_options(&opt_space, flowlabel, opt);
 	opt = ipv6_fixup_options(&opt_space, opt);
@@ -906,6 +909,7 @@ done:
 	dst_release(dst);
 out:
 	fl6_sock_release(flowlabel);
+	txopt_put(opt_to_free);
 	return err < 0 ? err : len;
 do_confirm:
 	dst_confirm(dst);
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 44e21a03cfc3..45f5ae51de65 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -108,7 +108,10 @@ bool ip6_frag_match(const struct inet_frag_queue *q, const void *a)
 	return	fq->id == arg->id &&
 		fq->user == arg->user &&
 		ipv6_addr_equal(&fq->saddr, arg->src) &&
-		ipv6_addr_equal(&fq->daddr, arg->dst);
+		ipv6_addr_equal(&fq->daddr, arg->dst) &&
+		(arg->iif == fq->iif ||
+		 !(ipv6_addr_type(arg->dst) & (IPV6_ADDR_MULTICAST |
+					       IPV6_ADDR_LINKLOCAL)));
 }
 EXPORT_SYMBOL(ip6_frag_match);
 
@@ -180,7 +183,7 @@ static void ip6_frag_expire(unsigned long data)
 
 static struct frag_queue *
 fq_find(struct net *net, __be32 id, const struct in6_addr *src,
-	const struct in6_addr *dst, u8 ecn)
+	const struct in6_addr *dst, int iif, u8 ecn)
 {
 	struct inet_frag_queue *q;
 	struct ip6_create_arg arg;
@@ -190,6 +193,7 @@ fq_find(struct net *net, __be32 id, const struct in6_addr *src,
 	arg.user = IP6_DEFRAG_LOCAL_DELIVER;
 	arg.src = src;
 	arg.dst = dst;
+	arg.iif = iif;
 	arg.ecn = ecn;
 
 	hash = inet6_hash_frag(id, src, dst);
@@ -551,7 +555,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
 	}
 
 	fq = fq_find(net, fhdr->identification, &hdr->saddr, &hdr->daddr,
-		     ip6_frag_ecn(hdr));
+		     skb->dev ? skb->dev->ifindex : 0, ip6_frag_ecn(hdr));
 	if (fq) {
 		int ret;
 
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 6f01fe122abd..826e6aa44f8d 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -523,7 +523,7 @@ static void rt6_probe_deferred(struct work_struct *w)
 		container_of(w, struct __rt6_probe_work, work);
 
 	addrconf_addr_solict_mult(&work->target, &mcaddr);
-	ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL, NULL);
+	ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL);
 	dev_put(work->dev);
 	kfree(work);
 }
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index bb8f2fa1c7fb..eaf7ac496d50 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -222,7 +222,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 		memset(&fl6, 0, sizeof(fl6));
 		fl6.flowi6_proto = IPPROTO_TCP;
 		fl6.daddr = ireq->ir_v6_rmt_addr;
-		final_p = fl6_update_dst(&fl6, np->opt, &final);
+		final_p = fl6_update_dst(&fl6, rcu_dereference(np->opt), &final);
 		fl6.saddr = ireq->ir_v6_loc_addr;
 		fl6.flowi6_oif = sk->sk_bound_dev_if;
 		fl6.flowi6_mark = ireq->ir_mark;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index c5429a636f1a..e7aab561b7b4 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -120,6 +120,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct in6_addr *saddr = NULL, *final_p, final;
+	struct ipv6_txoptions *opt;
 	struct flowi6 fl6;
 	struct dst_entry *dst;
 	int addr_type;
@@ -235,7 +236,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 	fl6.fl6_dport = usin->sin6_port;
 	fl6.fl6_sport = inet->inet_sport;
 
-	final_p = fl6_update_dst(&fl6, np->opt, &final);
+	opt = rcu_dereference_protected(np->opt, sock_owned_by_user(sk));
+	final_p = fl6_update_dst(&fl6, opt, &final);
 
 	security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
 
@@ -255,7 +257,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 	inet->inet_rcv_saddr = LOOPBACK4_IPV6;
 
 	sk->sk_gso_type = SKB_GSO_TCPV6;
-	__ip6_dst_store(sk, dst, NULL, NULL);
+	ip6_dst_store(sk, dst, NULL, NULL);
 
 	if (tcp_death_row.sysctl_tw_recycle &&
 	    !tp->rx_opt.ts_recent_stamp &&
@@ -263,9 +265,9 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 		tcp_fetch_timewait_stamp(sk, dst);
 
 	icsk->icsk_ext_hdr_len = 0;
-	if (np->opt)
-		icsk->icsk_ext_hdr_len = (np->opt->opt_flen +
-					  np->opt->opt_nflen);
+	if (opt)
+		icsk->icsk_ext_hdr_len = opt->opt_flen +
+					 opt->opt_nflen;
 
 	tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
 
@@ -461,7 +463,8 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
 		if (np->repflow && ireq->pktopts)
 			fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
 
-		err = ip6_xmit(sk, skb, fl6, np->opt, np->tclass);
+		err = ip6_xmit(sk, skb, fl6, rcu_dereference(np->opt),
+			       np->tclass);
 		err = net_xmit_eval(err);
 	}
 
@@ -972,6 +975,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
 	struct inet_request_sock *ireq;
 	struct ipv6_pinfo *newnp;
 	const struct ipv6_pinfo *np = inet6_sk(sk);
+	struct ipv6_txoptions *opt;
 	struct tcp6_sock *newtcp6sk;
 	struct inet_sock *newinet;
 	struct tcp_sock *newtp;
@@ -1056,7 +1060,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
 	 */
 
 	newsk->sk_gso_type = SKB_GSO_TCPV6;
-	__ip6_dst_store(newsk, dst, NULL, NULL);
+	ip6_dst_store(newsk, dst, NULL, NULL);
 	inet6_sk_rx_dst_set(newsk, skb);
 
 	newtcp6sk = (struct tcp6_sock *)newsk;
@@ -1098,13 +1102,15 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
 	   but we make one more one thing there: reattach optmem
 	   to newsk.
 	 */
-	if (np->opt)
-		newnp->opt = ipv6_dup_options(newsk, np->opt);
-
+	opt = rcu_dereference(np->opt);
+	if (opt) {
+		opt = ipv6_dup_options(newsk, opt);
+		RCU_INIT_POINTER(newnp->opt, opt);
+	}
 	inet_csk(newsk)->icsk_ext_hdr_len = 0;
-	if (newnp->opt)
-		inet_csk(newsk)->icsk_ext_hdr_len = (newnp->opt->opt_nflen +
-						     newnp->opt->opt_flen);
+	if (opt)
+		inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
+						    opt->opt_flen;
 
 	tcp_ca_openreq_child(newsk, dst);
 
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 01bcb49619ee..9da3287a3923 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -1110,6 +1110,7 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 	DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, msg->msg_name);
 	struct in6_addr *daddr, *final_p, final;
 	struct ipv6_txoptions *opt = NULL;
+	struct ipv6_txoptions *opt_to_free = NULL;
 	struct ip6_flowlabel *flowlabel = NULL;
 	struct flowi6 fl6;
 	struct dst_entry *dst;
@@ -1263,8 +1264,10 @@ do_udp_sendmsg:
 			opt = NULL;
 		connected = 0;
 	}
-	if (!opt)
-		opt = np->opt;
+	if (!opt) {
+		opt = txopt_get(np);
+		opt_to_free = opt;
+	}
 	if (flowlabel)
 		opt = fl6_merge_options(&opt_space, flowlabel, opt);
 	opt = ipv6_fixup_options(&opt_space, opt);
@@ -1373,6 +1376,7 @@ release_dst:
 out:
 	dst_release(dst);
 	fl6_sock_release(flowlabel);
+	txopt_put(opt_to_free);
 	if (!err)
 		return len;
 	/*
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index fcb2752419c6..435608c4306d 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -1483,7 +1483,7 @@ unsigned int iucv_sock_poll(struct file *file, struct socket *sock,
 	if (sock_writeable(sk) && iucv_below_msglim(sk))
 		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
 	else
-		set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+		sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
 
 	return mask;
 }
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index aca38d8aed8e..a2c8747d2936 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -486,6 +486,7 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 	DECLARE_SOCKADDR(struct sockaddr_l2tpip6 *, lsa, msg->msg_name);
 	struct in6_addr *daddr, *final_p, final;
 	struct ipv6_pinfo *np = inet6_sk(sk);
+	struct ipv6_txoptions *opt_to_free = NULL;
 	struct ipv6_txoptions *opt = NULL;
 	struct ip6_flowlabel *flowlabel = NULL;
 	struct dst_entry *dst = NULL;
@@ -575,8 +576,10 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 			opt = NULL;
 	}
 
-	if (opt == NULL)
-		opt = np->opt;
+	if (!opt) {
+		opt = txopt_get(np);
+		opt_to_free = opt;
+	}
 	if (flowlabel)
 		opt = fl6_merge_options(&opt_space, flowlabel, opt);
 	opt = ipv6_fixup_options(&opt_space, opt);
@@ -631,6 +634,7 @@ done:
 	dst_release(dst);
 out:
 	fl6_sock_release(flowlabel);
+	txopt_put(opt_to_free);
 
 	return err < 0 ? err : len;
 
diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c
index a758eb84e8f0..ff757181b0a8 100644
--- a/net/mac80211/agg-tx.c
+++ b/net/mac80211/agg-tx.c
@@ -500,7 +500,7 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid)
 	/* send AddBA request */
 	ieee80211_send_addba_request(sdata, sta->sta.addr, tid,
 				     tid_tx->dialog_token, start_seq_num,
-				     local->hw.max_tx_aggregation_subframes,
+				     IEEE80211_MAX_AMPDU_BUF,
 				     tid_tx->timeout);
 }
 
@@ -926,6 +926,7 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local,
 	amsdu = capab & IEEE80211_ADDBA_PARAM_AMSDU_MASK;
 	tid = (capab & IEEE80211_ADDBA_PARAM_TID_MASK) >> 2;
 	buf_size = (capab & IEEE80211_ADDBA_PARAM_BUF_SIZE_MASK) >> 6;
+	buf_size = min(buf_size, local->hw.max_tx_aggregation_subframes);
 
 	mutex_lock(&sta->ampdu_mlme.mtx);
 
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index c2bd1b6a6922..da471eef07bb 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -3454,8 +3454,12 @@ static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev,
 			goto out_unlock;
 		}
 	} else {
-		/* for cookie below */
-		ack_skb = skb;
+		/* Assign a dummy non-zero cookie, it's not sent to
+		 * userspace in this case but we rely on its value
+		 * internally in the need_offchan case to distinguish
+		 * mgmt-tx from remain-on-channel.
+		 */
+		*cookie = 0xffffffff;
 	}
 
 	if (!need_offchan) {
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index d0dc1bfaeec2..c9e325d2e120 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -76,7 +76,8 @@ bool __ieee80211_recalc_txpower(struct ieee80211_sub_if_data *sdata)
 void ieee80211_recalc_txpower(struct ieee80211_sub_if_data *sdata,
 			      bool update_bss)
 {
-	if (__ieee80211_recalc_txpower(sdata) || update_bss)
+	if (__ieee80211_recalc_txpower(sdata) ||
+	    (update_bss && ieee80211_sdata_running(sdata)))
 		ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_TXPOWER);
 }
 
@@ -1861,6 +1862,7 @@ void ieee80211_if_remove(struct ieee80211_sub_if_data *sdata)
 		unregister_netdevice(sdata->dev);
 	} else {
 		cfg80211_unregister_wdev(&sdata->wdev);
+		ieee80211_teardown_sdata(sdata);
 		kfree(sdata);
 	}
 }
@@ -1870,7 +1872,6 @@ void ieee80211_sdata_stop(struct ieee80211_sub_if_data *sdata)
 	if (WARN_ON_ONCE(!test_bit(SDATA_STATE_RUNNING, &sdata->state)))
 		return;
 	ieee80211_do_stop(sdata, true);
-	ieee80211_teardown_sdata(sdata);
 }
 
 void ieee80211_remove_interfaces(struct ieee80211_local *local)
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 858f6b1cb149..175ffcf7fb06 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -541,8 +541,7 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len,
 			   NL80211_FEATURE_HT_IBSS |
 			   NL80211_FEATURE_VIF_TXPOWER |
 			   NL80211_FEATURE_MAC_ON_CREATE |
-			   NL80211_FEATURE_USERSPACE_MPM |
-			   NL80211_FEATURE_FULL_AP_CLIENT_STATE;
+			   NL80211_FEATURE_USERSPACE_MPM;
 
 	if (!ops->hw_scan)
 		wiphy->features |= NL80211_FEATURE_LOW_PRIORITY_SCAN |
diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c
index b890e225a8f1..b3b44a5dd375 100644
--- a/net/mac80211/mesh_pathtbl.c
+++ b/net/mac80211/mesh_pathtbl.c
@@ -779,10 +779,8 @@ void mesh_plink_broken(struct sta_info *sta)
 static void mesh_path_node_reclaim(struct rcu_head *rp)
 {
 	struct mpath_node *node = container_of(rp, struct mpath_node, rcu);
-	struct ieee80211_sub_if_data *sdata = node->mpath->sdata;
 
 	del_timer_sync(&node->mpath->timer);
-	atomic_dec(&sdata->u.mesh.mpaths);
 	kfree(node->mpath);
 	kfree(node);
 }
@@ -790,8 +788,9 @@ static void mesh_path_node_reclaim(struct rcu_head *rp)
 /* needs to be called with the corresponding hashwlock taken */
 static void __mesh_path_del(struct mesh_table *tbl, struct mpath_node *node)
 {
-	struct mesh_path *mpath;
-	mpath = node->mpath;
+	struct mesh_path *mpath = node->mpath;
+	struct ieee80211_sub_if_data *sdata = node->mpath->sdata;
+
 	spin_lock(&mpath->state_lock);
 	mpath->flags |= MESH_PATH_RESOLVING;
 	if (mpath->is_gate)
@@ -799,6 +798,7 @@ static void __mesh_path_del(struct mesh_table *tbl, struct mpath_node *node)
 	hlist_del_rcu(&node->list);
 	call_rcu(&node->rcu, mesh_path_node_reclaim);
 	spin_unlock(&mpath->state_lock);
+	atomic_dec(&sdata->u.mesh.mpaths);
 	atomic_dec(&tbl->entries);
 }
 
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index 4aeca4b0c3cb..a413e52f7691 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -597,8 +597,8 @@ static int __ieee80211_start_scan(struct ieee80211_sub_if_data *sdata,
 		/* We need to ensure power level is at max for scanning. */
 		ieee80211_hw_config(local, 0);
 
-		if ((req->channels[0]->flags &
-		     IEEE80211_CHAN_NO_IR) ||
+		if ((req->channels[0]->flags & (IEEE80211_CHAN_NO_IR |
+						IEEE80211_CHAN_RADAR)) ||
 		    !req->n_ssids) {
 			next_delay = IEEE80211_PASSIVE_CHANNEL_TIME;
 		} else {
@@ -645,7 +645,7 @@ ieee80211_scan_get_channel_time(struct ieee80211_channel *chan)
 	 * TODO: channel switching also consumes quite some time,
 	 * add that delay as well to get a better estimation
 	 */
-	if (chan->flags & IEEE80211_CHAN_NO_IR)
+	if (chan->flags & (IEEE80211_CHAN_NO_IR | IEEE80211_CHAN_RADAR))
 		return IEEE80211_PASSIVE_CHANNEL_TIME;
 	return IEEE80211_PROBE_DELAY + IEEE80211_CHANNEL_TIME;
 }
@@ -777,7 +777,8 @@ static void ieee80211_scan_state_set_channel(struct ieee80211_local *local,
 	 *
 	 * In any case, it is not necessary for a passive scan.
 	 */
-	if (chan->flags & IEEE80211_CHAN_NO_IR || !scan_req->n_ssids) {
+	if ((chan->flags & (IEEE80211_CHAN_NO_IR | IEEE80211_CHAN_RADAR)) ||
+	    !scan_req->n_ssids) {
 		*next_delay = IEEE80211_PASSIVE_CHANNEL_TIME;
 		local->next_scan_state = SCAN_DECISION;
 		return;
diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c
index b7de0da46acd..ecf0a0196f18 100644
--- a/net/nfc/llcp_sock.c
+++ b/net/nfc/llcp_sock.c
@@ -572,7 +572,7 @@ static unsigned int llcp_sock_poll(struct file *file, struct socket *sock,
 	if (sock_writeable(sk) && sk->sk_state == LLCP_CONNECTED)
 		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
 	else
-		set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+		sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
 
 	pr_debug("mask 0x%x\n", mask);
 
diff --git a/net/openvswitch/dp_notify.c b/net/openvswitch/dp_notify.c
index a7a80a6b77b0..653d073bae45 100644
--- a/net/openvswitch/dp_notify.c
+++ b/net/openvswitch/dp_notify.c
@@ -58,7 +58,7 @@ void ovs_dp_notify_wq(struct work_struct *work)
 			struct hlist_node *n;
 
 			hlist_for_each_entry_safe(vport, n, &dp->ports[i], dp_hash_node) {
-				if (vport->ops->type != OVS_VPORT_TYPE_NETDEV)
+				if (vport->ops->type == OVS_VPORT_TYPE_INTERNAL)
 					continue;
 
 				if (!(vport->dev->priv_flags & IFF_OVS_DATAPATH))
diff --git a/net/openvswitch/vport-geneve.c b/net/openvswitch/vport-geneve.c
index efb736bb6855..e41cd12d9b2d 100644
--- a/net/openvswitch/vport-geneve.c
+++ b/net/openvswitch/vport-geneve.c
@@ -117,7 +117,6 @@ static struct vport_ops ovs_geneve_vport_ops = {
 	.destroy	= ovs_netdev_tunnel_destroy,
 	.get_options	= geneve_get_options,
 	.send		= dev_queue_xmit,
-	.owner          = THIS_MODULE,
 };
 
 static int __init ovs_geneve_tnl_init(void)
diff --git a/net/openvswitch/vport-gre.c b/net/openvswitch/vport-gre.c
index c3257d78d3d2..7f8897f33a67 100644
--- a/net/openvswitch/vport-gre.c
+++ b/net/openvswitch/vport-gre.c
@@ -89,7 +89,6 @@ static struct vport_ops ovs_gre_vport_ops = {
 	.create		= gre_create,
 	.send		= dev_queue_xmit,
 	.destroy	= ovs_netdev_tunnel_destroy,
-	.owner		= THIS_MODULE,
 };
 
 static int __init ovs_gre_tnl_init(void)
diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c
index b327368a3848..6b0190b987ec 100644
--- a/net/openvswitch/vport-netdev.c
+++ b/net/openvswitch/vport-netdev.c
@@ -180,9 +180,13 @@ void ovs_netdev_tunnel_destroy(struct vport *vport)
 	if (vport->dev->priv_flags & IFF_OVS_DATAPATH)
 		ovs_netdev_detach_dev(vport);
 
-	/* Early release so we can unregister the device */
+	/* We can be invoked by both explicit vport deletion and
+	 * underlying netdev deregistration; delete the link only
+	 * if it's not already shutting down.
+	 */
+	if (vport->dev->reg_state == NETREG_REGISTERED)
+		rtnl_delete_link(vport->dev);
 	dev_put(vport->dev);
-	rtnl_delete_link(vport->dev);
 	vport->dev = NULL;
 	rtnl_unlock();
 
diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
index 0ac0fd004d7e..31cbc8c5c7db 100644
--- a/net/openvswitch/vport.c
+++ b/net/openvswitch/vport.c
@@ -71,7 +71,7 @@ static struct hlist_head *hash_bucket(const struct net *net, const char *name)
 	return &dev_table[hash & (VPORT_HASH_BUCKETS - 1)];
 }
 
-int ovs_vport_ops_register(struct vport_ops *ops)
+int __ovs_vport_ops_register(struct vport_ops *ops)
 {
 	int err = -EEXIST;
 	struct vport_ops *o;
@@ -87,7 +87,7 @@ errout:
 	ovs_unlock();
 	return err;
 }
-EXPORT_SYMBOL_GPL(ovs_vport_ops_register);
+EXPORT_SYMBOL_GPL(__ovs_vport_ops_register);
 
 void ovs_vport_ops_unregister(struct vport_ops *ops)
 {
@@ -256,8 +256,8 @@ int ovs_vport_set_options(struct vport *vport, struct nlattr *options)
  *
  * @vport: vport to delete.
  *
- * Detaches @vport from its datapath and destroys it.  It is possible to fail
- * for reasons such as lack of memory.  ovs_mutex must be held.
+ * Detaches @vport from its datapath and destroys it.  ovs_mutex must
+ * be held.
  */
 void ovs_vport_del(struct vport *vport)
 {
diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h
index bdfd82a7c064..8ea3a96980ac 100644
--- a/net/openvswitch/vport.h
+++ b/net/openvswitch/vport.h
@@ -196,7 +196,13 @@ static inline const char *ovs_vport_name(struct vport *vport)
 	return vport->dev->name;
 }
 
-int ovs_vport_ops_register(struct vport_ops *ops);
+int __ovs_vport_ops_register(struct vport_ops *ops);
+#define ovs_vport_ops_register(ops)		\
+	({					\
+		(ops)->owner = THIS_MODULE;	\
+		__ovs_vport_ops_register(ops);	\
+	})
+
 void ovs_vport_ops_unregister(struct vport_ops *ops);
 
 static inline struct rtable *ovs_tunnel_route_lookup(struct net *net,
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 1cf928fb573e..992396aa635c 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -2329,8 +2329,8 @@ static void tpacket_destruct_skb(struct sk_buff *skb)
 static bool ll_header_truncated(const struct net_device *dev, int len)
 {
 	/* net device doesn't like empty head */
-	if (unlikely(len <= dev->hard_header_len)) {
-		net_warn_ratelimited("%s: packet size is too short (%d <= %d)\n",
+	if (unlikely(len < dev->hard_header_len)) {
+		net_warn_ratelimited("%s: packet size is too short (%d < %d)\n",
 				     current->comm, len, dev->hard_header_len);
 		return true;
 	}
diff --git a/net/rds/connection.c b/net/rds/connection.c
index d4564036a339..e3b118cae81d 100644
--- a/net/rds/connection.c
+++ b/net/rds/connection.c
@@ -186,12 +186,6 @@ static struct rds_connection *__rds_conn_create(struct net *net,
 		}
 	}
 
-	if (trans == NULL) {
-		kmem_cache_free(rds_conn_slab, conn);
-		conn = ERR_PTR(-ENODEV);
-		goto out;
-	}
-
 	conn->c_trans = trans;
 
 	ret = trans->conn_alloc(conn, gfp);
diff --git a/net/rds/send.c b/net/rds/send.c
index 827155c2ead1..c9cdb358ea88 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -1013,11 +1013,13 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
 		release_sock(sk);
 	}
 
-	/* racing with another thread binding seems ok here */
+	lock_sock(sk);
 	if (daddr == 0 || rs->rs_bound_addr == 0) {
+		release_sock(sk);
 		ret = -ENOTCONN; /* XXX not a great errno */
 		goto out;
 	}
+	release_sock(sk);
 
 	if (payload_len > rds_sk_sndbuf(rs)) {
 		ret = -EMSGSIZE;
diff --git a/net/rxrpc/ar-ack.c b/net/rxrpc/ar-ack.c
index e0547f521f20..adc555e0323d 100644
--- a/net/rxrpc/ar-ack.c
+++ b/net/rxrpc/ar-ack.c
@@ -723,8 +723,10 @@ process_further:
 
 			if ((call->state == RXRPC_CALL_CLIENT_AWAIT_REPLY ||
 			     call->state == RXRPC_CALL_SERVER_AWAIT_ACK) &&
-			    hard > tx)
+			    hard > tx) {
+				call->acks_hard = tx;
 				goto all_acked;
+			}
 
 			smp_rmb();
 			rxrpc_rotate_tx_window(call, hard - 1);
diff --git a/net/rxrpc/ar-output.c b/net/rxrpc/ar-output.c
index a40d3afe93b7..14c4e12c47b0 100644
--- a/net/rxrpc/ar-output.c
+++ b/net/rxrpc/ar-output.c
@@ -531,7 +531,7 @@ static int rxrpc_send_data(struct rxrpc_sock *rx,
 	timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
 
 	/* this should be in poll */
-	clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+	sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
 
 	if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
 		return -EPIPE;
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index f43c8f33f09e..7ec667dd4ce1 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -253,7 +253,8 @@ int qdisc_set_default(const char *name)
 }
 
 /* We know handle. Find qdisc among all qdisc's attached to device
-   (root qdisc, all its children, children of children etc.)
+ * (root qdisc, all its children, children of children etc.)
+ * Note: caller either uses rtnl or rcu_read_lock()
  */
 
 static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle)
@@ -264,7 +265,7 @@ static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle)
 	    root->handle == handle)
 		return root;
 
-	list_for_each_entry(q, &root->list, list) {
+	list_for_each_entry_rcu(q, &root->list, list) {
 		if (q->handle == handle)
 			return q;
 	}
@@ -277,15 +278,18 @@ void qdisc_list_add(struct Qdisc *q)
 		struct Qdisc *root = qdisc_dev(q)->qdisc;
 
 		WARN_ON_ONCE(root == &noop_qdisc);
-		list_add_tail(&q->list, &root->list);
+		ASSERT_RTNL();
+		list_add_tail_rcu(&q->list, &root->list);
 	}
 }
 EXPORT_SYMBOL(qdisc_list_add);
 
 void qdisc_list_del(struct Qdisc *q)
 {
-	if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS))
-		list_del(&q->list);
+	if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
+		ASSERT_RTNL();
+		list_del_rcu(&q->list);
+	}
 }
 EXPORT_SYMBOL(qdisc_list_del);
 
@@ -750,14 +754,18 @@ void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n)
 	if (n == 0)
 		return;
 	drops = max_t(int, n, 0);
+	rcu_read_lock();
 	while ((parentid = sch->parent)) {
 		if (TC_H_MAJ(parentid) == TC_H_MAJ(TC_H_INGRESS))
-			return;
+			break;
 
+		if (sch->flags & TCQ_F_NOPARENT)
+			break;
+		/* TODO: perform the search on a per txq basis */
 		sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid));
 		if (sch == NULL) {
-			WARN_ON(parentid != TC_H_ROOT);
-			return;
+			WARN_ON_ONCE(parentid != TC_H_ROOT);
+			break;
 		}
 		cops = sch->ops->cl_ops;
 		if (cops->qlen_notify) {
@@ -768,6 +776,7 @@ void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n)
 		sch->q.qlen -= n;
 		__qdisc_qstats_drop(sch, drops);
 	}
+	rcu_read_unlock();
 }
 EXPORT_SYMBOL(qdisc_tree_decrease_qlen);
 
@@ -941,7 +950,7 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue,
 		}
 		lockdep_set_class(qdisc_lock(sch), &qdisc_tx_lock);
 		if (!netif_is_multiqueue(dev))
-			sch->flags |= TCQ_F_ONETXQUEUE;
+			sch->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
 	}
 
 	sch->handle = handle;
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index cb5d4ad32946..e82a1ad80aa5 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -737,7 +737,7 @@ static void attach_one_default_qdisc(struct net_device *dev,
 		return;
 	}
 	if (!netif_is_multiqueue(dev))
-		qdisc->flags |= TCQ_F_ONETXQUEUE;
+		qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
 	dev_queue->qdisc_sleeping = qdisc;
 }
 
diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c
index f3cbaecd283a..3e82f047caaf 100644
--- a/net/sched/sch_mq.c
+++ b/net/sched/sch_mq.c
@@ -63,7 +63,7 @@ static int mq_init(struct Qdisc *sch, struct nlattr *opt)
 		if (qdisc == NULL)
 			goto err;
 		priv->qdiscs[ntx] = qdisc;
-		qdisc->flags |= TCQ_F_ONETXQUEUE;
+		qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
 	}
 
 	sch->flags |= TCQ_F_MQROOT;
@@ -156,7 +156,7 @@ static int mq_graft(struct Qdisc *sch, unsigned long cl, struct Qdisc *new,
 
 	*old = dev_graft_qdisc(dev_queue, new);
 	if (new)
-		new->flags |= TCQ_F_ONETXQUEUE;
+		new->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
 	if (dev->flags & IFF_UP)
 		dev_activate(dev);
 	return 0;
diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c
index 3811a745452c..ad70ecf57ce7 100644
--- a/net/sched/sch_mqprio.c
+++ b/net/sched/sch_mqprio.c
@@ -132,7 +132,7 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt)
 			goto err;
 		}
 		priv->qdiscs[i] = qdisc;
-		qdisc->flags |= TCQ_F_ONETXQUEUE;
+		qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
 	}
 
 	/* If the mqprio options indicate that hardware should own
@@ -209,7 +209,7 @@ static int mqprio_graft(struct Qdisc *sch, unsigned long cl, struct Qdisc *new,
 	*old = dev_graft_qdisc(dev_queue, new);
 
 	if (new)
-		new->flags |= TCQ_F_ONETXQUEUE;
+		new->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
 
 	if (dev->flags & IFF_UP)
 		dev_activate(dev);
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index e917d27328ea..acb45b8c2a9d 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -209,6 +209,7 @@ static int sctp_v6_xmit(struct sk_buff *skb, struct sctp_transport *transport)
 	struct sock *sk = skb->sk;
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct flowi6 *fl6 = &transport->fl.u.ip6;
+	int res;
 
 	pr_debug("%s: skb:%p, len:%d, src:%pI6 dst:%pI6\n", __func__, skb,
 		 skb->len, &fl6->saddr, &fl6->daddr);
@@ -220,7 +221,10 @@ static int sctp_v6_xmit(struct sk_buff *skb, struct sctp_transport *transport)
 
 	SCTP_INC_STATS(sock_net(sk), SCTP_MIB_OUTSCTPPACKS);
 
-	return ip6_xmit(sk, skb, fl6, np->opt, np->tclass);
+	rcu_read_lock();
+	res = ip6_xmit(sk, skb, fl6, rcu_dereference(np->opt), np->tclass);
+	rcu_read_unlock();
+	return res;
 }
 
 /* Returns the dst cache entry for the given source and destination ip
@@ -262,7 +266,10 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr,
 		pr_debug("src=%pI6 - ", &fl6->saddr);
 	}
 
-	final_p = fl6_update_dst(fl6, np->opt, &final);
+	rcu_read_lock();
+	final_p = fl6_update_dst(fl6, rcu_dereference(np->opt), &final);
+	rcu_read_unlock();
+
 	dst = ip6_dst_lookup_flow(sk, fl6, final_p);
 	if (!asoc || saddr)
 		goto out;
@@ -321,7 +328,7 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr,
 	if (baddr) {
 		fl6->saddr = baddr->v6.sin6_addr;
 		fl6->fl6_sport = baddr->v6.sin6_port;
-		final_p = fl6_update_dst(fl6, np->opt, &final);
+		final_p = fl6_update_dst(fl6, rcu_dereference(np->opt), &final);
 		dst = ip6_dst_lookup_flow(sk, fl6, final_p);
 	}
 
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 897c01c029ca..03c8256063ec 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -972,7 +972,7 @@ static int sctp_setsockopt_bindx(struct sock *sk,
 		return -EFAULT;
 
 	/* Alloc space for the address array in kernel memory.  */
-	kaddrs = kmalloc(addrs_size, GFP_KERNEL);
+	kaddrs = kmalloc(addrs_size, GFP_USER | __GFP_NOWARN);
 	if (unlikely(!kaddrs))
 		return -ENOMEM;
 
@@ -4928,7 +4928,7 @@ static int sctp_getsockopt_local_addrs(struct sock *sk, int len,
 	to = optval + offsetof(struct sctp_getaddrs, addrs);
 	space_left = len - offsetof(struct sctp_getaddrs, addrs);
 
-	addrs = kmalloc(space_left, GFP_KERNEL);
+	addrs = kmalloc(space_left, GFP_USER | __GFP_NOWARN);
 	if (!addrs)
 		return -ENOMEM;
 
@@ -6458,7 +6458,7 @@ unsigned int sctp_poll(struct file *file, struct socket *sock, poll_table *wait)
 	if (sctp_writeable(sk)) {
 		mask |= POLLOUT | POLLWRNORM;
 	} else {
-		set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+		sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
 		/*
 		 * Since the socket is not locked, the buffer
 		 * might be made available after the writeable check and
@@ -6801,26 +6801,30 @@ no_packet:
 static void __sctp_write_space(struct sctp_association *asoc)
 {
 	struct sock *sk = asoc->base.sk;
-	struct socket *sock = sk->sk_socket;
 
-	if ((sctp_wspace(asoc) > 0) && sock) {
-		if (waitqueue_active(&asoc->wait))
-			wake_up_interruptible(&asoc->wait);
+	if (sctp_wspace(asoc) <= 0)
+		return;
+
+	if (waitqueue_active(&asoc->wait))
+		wake_up_interruptible(&asoc->wait);
 
-		if (sctp_writeable(sk)) {
-			wait_queue_head_t *wq = sk_sleep(sk);
+	if (sctp_writeable(sk)) {
+		struct socket_wq *wq;
 
-			if (wq && waitqueue_active(wq))
-				wake_up_interruptible(wq);
+		rcu_read_lock();
+		wq = rcu_dereference(sk->sk_wq);
+		if (wq) {
+			if (waitqueue_active(&wq->wait))
+				wake_up_interruptible(&wq->wait);
 
 			/* Note that we try to include the Async I/O support
 			 * here by modeling from the current TCP/UDP code.
 			 * We have not tested with it yet.
 			 */
 			if (!(sk->sk_shutdown & SEND_SHUTDOWN))
-				sock_wake_async(sock,
-						SOCK_WAKE_SPACE, POLL_OUT);
+				sock_wake_async(wq, SOCK_WAKE_SPACE, POLL_OUT);
 		}
+		rcu_read_unlock();
 	}
 }
 
@@ -7375,6 +7379,13 @@ struct proto sctp_prot = {
 
 #if IS_ENABLED(CONFIG_IPV6)
 
+#include <net/transp_v6.h>
+static void sctp_v6_destroy_sock(struct sock *sk)
+{
+	sctp_destroy_sock(sk);
+	inet6_destroy_sock(sk);
+}
+
 struct proto sctpv6_prot = {
 	.name		= "SCTPv6",
 	.owner		= THIS_MODULE,
@@ -7384,7 +7395,7 @@ struct proto sctpv6_prot = {
 	.accept		= sctp_accept,
 	.ioctl		= sctp_ioctl,
 	.init		= sctp_init_sock,
-	.destroy	= sctp_destroy_sock,
+	.destroy	= sctp_v6_destroy_sock,
 	.shutdown	= sctp_shutdown,
 	.setsockopt	= sctp_setsockopt,
 	.getsockopt	= sctp_getsockopt,
diff --git a/net/socket.c b/net/socket.c
index dd2c247c99e3..456fadb3d819 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -1056,27 +1056,20 @@ static int sock_fasync(int fd, struct file *filp, int on)
 	return 0;
 }
 
-/* This function may be called only under socket lock or callback_lock or rcu_lock */
+/* This function may be called only under rcu_lock */
 
-int sock_wake_async(struct socket *sock, int how, int band)
+int sock_wake_async(struct socket_wq *wq, int how, int band)
 {
-	struct socket_wq *wq;
-
-	if (!sock)
-		return -1;
-	rcu_read_lock();
-	wq = rcu_dereference(sock->wq);
-	if (!wq || !wq->fasync_list) {
-		rcu_read_unlock();
+	if (!wq || !wq->fasync_list)
 		return -1;
-	}
+
 	switch (how) {
 	case SOCK_WAKE_WAITD:
-		if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags))
+		if (test_bit(SOCKWQ_ASYNC_WAITDATA, &wq->flags))
 			break;
 		goto call_kill;
 	case SOCK_WAKE_SPACE:
-		if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags))
+		if (!test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags))
 			break;
 		/* fall through */
 	case SOCK_WAKE_IO:
@@ -1086,7 +1079,7 @@ call_kill:
 	case SOCK_WAKE_URG:
 		kill_fasync(&wq->fasync_list, SIGURG, band);
 	}
-	rcu_read_unlock();
+
 	return 0;
 }
 EXPORT_SYMBOL(sock_wake_async);
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 1d1a70498910..2ffaf6a79499 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -398,7 +398,7 @@ static int xs_sendpages(struct socket *sock, struct sockaddr *addr, int addrlen,
 	if (unlikely(!sock))
 		return -ENOTSOCK;
 
-	clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags);
+	clear_bit(SOCKWQ_ASYNC_NOSPACE, &sock->flags);
 	if (base != 0) {
 		addr = NULL;
 		addrlen = 0;
@@ -442,7 +442,7 @@ static void xs_nospace_callback(struct rpc_task *task)
 	struct sock_xprt *transport = container_of(task->tk_rqstp->rq_xprt, struct sock_xprt, xprt);
 
 	transport->inet->sk_write_pending--;
-	clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags);
+	clear_bit(SOCKWQ_ASYNC_NOSPACE, &transport->sock->flags);
 }
 
 /**
@@ -467,7 +467,7 @@ static int xs_nospace(struct rpc_task *task)
 
 	/* Don't race with disconnect */
 	if (xprt_connected(xprt)) {
-		if (test_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags)) {
+		if (test_bit(SOCKWQ_ASYNC_NOSPACE, &transport->sock->flags)) {
 			/*
 			 * Notify TCP that we're limited by the application
 			 * window size
@@ -478,7 +478,7 @@ static int xs_nospace(struct rpc_task *task)
 			xprt_wait_for_buffer_space(task, xs_nospace_callback);
 		}
 	} else {
-		clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags);
+		clear_bit(SOCKWQ_ASYNC_NOSPACE, &transport->sock->flags);
 		ret = -ENOTCONN;
 	}
 
@@ -626,7 +626,7 @@ process_status:
 	case -EPERM:
 		/* When the server has died, an ICMP port unreachable message
 		 * prompts ECONNREFUSED. */
-		clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags);
+		clear_bit(SOCKWQ_ASYNC_NOSPACE, &transport->sock->flags);
 	}
 
 	return status;
@@ -715,7 +715,7 @@ static int xs_tcp_send_request(struct rpc_task *task)
 	case -EADDRINUSE:
 	case -ENOBUFS:
 	case -EPIPE:
-		clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags);
+		clear_bit(SOCKWQ_ASYNC_NOSPACE, &transport->sock->flags);
 	}
 
 	return status;
@@ -1618,7 +1618,7 @@ static void xs_write_space(struct sock *sk)
 
 	if (unlikely(!(xprt = xprt_from_sock(sk))))
 		return;
-	if (test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags) == 0)
+	if (test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &sock->flags) == 0)
 		return;
 
 	xprt_write_space(xprt);
diff --git a/net/tipc/link.c b/net/tipc/link.c
index 9efbdbde2b08..91aea071ab27 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -191,6 +191,7 @@ void tipc_link_add_bc_peer(struct tipc_link *snd_l,
 
 	snd_l->ackers++;
 	rcv_l->acked = snd_l->snd_nxt - 1;
+	snd_l->state = LINK_ESTABLISHED;
 	tipc_link_build_bc_init_msg(uc_l, xmitq);
 }
 
@@ -206,6 +207,7 @@ void tipc_link_remove_bc_peer(struct tipc_link *snd_l,
 	rcv_l->state = LINK_RESET;
 	if (!snd_l->ackers) {
 		tipc_link_reset(snd_l);
+		snd_l->state = LINK_RESET;
 		__skb_queue_purge(xmitq);
 	}
 }
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 552dbaba9cf3..b53246fb0412 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -105,6 +105,7 @@ struct tipc_sock {
 static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *skb);
 static void tipc_data_ready(struct sock *sk);
 static void tipc_write_space(struct sock *sk);
+static void tipc_sock_destruct(struct sock *sk);
 static int tipc_release(struct socket *sock);
 static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags);
 static int tipc_wait_for_sndmsg(struct socket *sock, long *timeo_p);
@@ -381,6 +382,7 @@ static int tipc_sk_create(struct net *net, struct socket *sock,
 	sk->sk_rcvbuf = sysctl_tipc_rmem[1];
 	sk->sk_data_ready = tipc_data_ready;
 	sk->sk_write_space = tipc_write_space;
+	sk->sk_destruct = tipc_sock_destruct;
 	tsk->conn_timeout = CONN_TIMEOUT_DEFAULT;
 	tsk->sent_unacked = 0;
 	atomic_set(&tsk->dupl_rcvcnt, 0);
@@ -470,9 +472,6 @@ static int tipc_release(struct socket *sock)
 		tipc_node_remove_conn(net, dnode, tsk->portid);
 	}
 
-	/* Discard any remaining (connection-based) messages in receive queue */
-	__skb_queue_purge(&sk->sk_receive_queue);
-
 	/* Reject any messages that accumulated in backlog queue */
 	sock->state = SS_DISCONNECTING;
 	release_sock(sk);
@@ -1515,6 +1514,11 @@ static void tipc_data_ready(struct sock *sk)
 	rcu_read_unlock();
 }
 
+static void tipc_sock_destruct(struct sock *sk)
+{
+	__skb_queue_purge(&sk->sk_receive_queue);
+}
+
 /**
  * filter_connect - Handle all incoming messages for a connection-based socket
  * @tsk: TIPC socket
diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c
index ad2719ad4c1b..70c03271b798 100644
--- a/net/tipc/udp_media.c
+++ b/net/tipc/udp_media.c
@@ -158,8 +158,11 @@ static int tipc_udp_send_msg(struct net *net, struct sk_buff *skb,
 	struct udp_media_addr *src = (struct udp_media_addr *)&b->addr.value;
 	struct rtable *rt;
 
-	if (skb_headroom(skb) < UDP_MIN_HEADROOM)
-		pskb_expand_head(skb, UDP_MIN_HEADROOM, 0, GFP_ATOMIC);
+	if (skb_headroom(skb) < UDP_MIN_HEADROOM) {
+		err = pskb_expand_head(skb, UDP_MIN_HEADROOM, 0, GFP_ATOMIC);
+		if (err)
+			goto tx_error;
+	}
 
 	skb_set_inner_protocol(skb, htons(ETH_P_TIPC));
 	ub = rcu_dereference_rtnl(b->media_ptr);
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 955ec152cb71..45aebd966978 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -326,6 +326,118 @@ found:
 	return s;
 }
 
+/* Support code for asymmetrically connected dgram sockets
+ *
+ * If a datagram socket is connected to a socket not itself connected
+ * to the first socket (eg, /dev/log), clients may only enqueue more
+ * messages if the present receive queue of the server socket is not
+ * "too large". This means there's a second writeability condition
+ * poll and sendmsg need to test. The dgram recv code will do a wake
+ * up on the peer_wait wait queue of a socket upon reception of a
+ * datagram which needs to be propagated to sleeping would-be writers
+ * since these might not have sent anything so far. This can't be
+ * accomplished via poll_wait because the lifetime of the server
+ * socket might be less than that of its clients if these break their
+ * association with it or if the server socket is closed while clients
+ * are still connected to it and there's no way to inform "a polling
+ * implementation" that it should let go of a certain wait queue
+ *
+ * In order to propagate a wake up, a wait_queue_t of the client
+ * socket is enqueued on the peer_wait queue of the server socket
+ * whose wake function does a wake_up on the ordinary client socket
+ * wait queue. This connection is established whenever a write (or
+ * poll for write) hit the flow control condition and broken when the
+ * association to the server socket is dissolved or after a wake up
+ * was relayed.
+ */
+
+static int unix_dgram_peer_wake_relay(wait_queue_t *q, unsigned mode, int flags,
+				      void *key)
+{
+	struct unix_sock *u;
+	wait_queue_head_t *u_sleep;
+
+	u = container_of(q, struct unix_sock, peer_wake);
+
+	__remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait,
+			    q);
+	u->peer_wake.private = NULL;
+
+	/* relaying can only happen while the wq still exists */
+	u_sleep = sk_sleep(&u->sk);
+	if (u_sleep)
+		wake_up_interruptible_poll(u_sleep, key);
+
+	return 0;
+}
+
+static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other)
+{
+	struct unix_sock *u, *u_other;
+	int rc;
+
+	u = unix_sk(sk);
+	u_other = unix_sk(other);
+	rc = 0;
+	spin_lock(&u_other->peer_wait.lock);
+
+	if (!u->peer_wake.private) {
+		u->peer_wake.private = other;
+		__add_wait_queue(&u_other->peer_wait, &u->peer_wake);
+
+		rc = 1;
+	}
+
+	spin_unlock(&u_other->peer_wait.lock);
+	return rc;
+}
+
+static void unix_dgram_peer_wake_disconnect(struct sock *sk,
+					    struct sock *other)
+{
+	struct unix_sock *u, *u_other;
+
+	u = unix_sk(sk);
+	u_other = unix_sk(other);
+	spin_lock(&u_other->peer_wait.lock);
+
+	if (u->peer_wake.private == other) {
+		__remove_wait_queue(&u_other->peer_wait, &u->peer_wake);
+		u->peer_wake.private = NULL;
+	}
+
+	spin_unlock(&u_other->peer_wait.lock);
+}
+
+static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk,
+						   struct sock *other)
+{
+	unix_dgram_peer_wake_disconnect(sk, other);
+	wake_up_interruptible_poll(sk_sleep(sk),
+				   POLLOUT |
+				   POLLWRNORM |
+				   POLLWRBAND);
+}
+
+/* preconditions:
+ *	- unix_peer(sk) == other
+ *	- association is stable
+ */
+static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
+{
+	int connected;
+
+	connected = unix_dgram_peer_wake_connect(sk, other);
+
+	if (unix_recvq_full(other))
+		return 1;
+
+	if (connected)
+		unix_dgram_peer_wake_disconnect(sk, other);
+
+	return 0;
+}
+
 static int unix_writable(const struct sock *sk)
 {
 	return sk->sk_state != TCP_LISTEN &&
@@ -431,6 +543,8 @@ static void unix_release_sock(struct sock *sk, int embrion)
 			skpair->sk_state_change(skpair);
 			sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
 		}
+
+		unix_dgram_peer_wake_disconnect(sk, skpair);
 		sock_put(skpair); /* It may now die */
 		unix_peer(sk) = NULL;
 	}
@@ -666,6 +780,7 @@ static struct sock *unix_create1(struct net *net, struct socket *sock, int kern)
 	INIT_LIST_HEAD(&u->link);
 	mutex_init(&u->readlock); /* single task reading lock */
 	init_waitqueue_head(&u->peer_wait);
+	init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
 	unix_insert_socket(unix_sockets_unbound(sk), sk);
 out:
 	if (sk == NULL)
@@ -1033,6 +1148,8 @@ restart:
 	if (unix_peer(sk)) {
 		struct sock *old_peer = unix_peer(sk);
 		unix_peer(sk) = other;
+		unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer);
+
 		unix_state_double_unlock(sk, other);
 
 		if (other != old_peer)
@@ -1434,6 +1551,14 @@ static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool sen
 	return err;
 }
 
+static bool unix_passcred_enabled(const struct socket *sock,
+				  const struct sock *other)
+{
+	return test_bit(SOCK_PASSCRED, &sock->flags) ||
+	       !other->sk_socket ||
+	       test_bit(SOCK_PASSCRED, &other->sk_socket->flags);
+}
+
 /*
  * Some apps rely on write() giving SCM_CREDENTIALS
  * We include credentials if source or destination socket
@@ -1444,14 +1569,41 @@ static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
 {
 	if (UNIXCB(skb).pid)
 		return;
-	if (test_bit(SOCK_PASSCRED, &sock->flags) ||
-	    !other->sk_socket ||
-	    test_bit(SOCK_PASSCRED, &other->sk_socket->flags)) {
+	if (unix_passcred_enabled(sock, other)) {
 		UNIXCB(skb).pid  = get_pid(task_tgid(current));
 		current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
 	}
 }
 
+static int maybe_init_creds(struct scm_cookie *scm,
+			    struct socket *socket,
+			    const struct sock *other)
+{
+	int err;
+	struct msghdr msg = { .msg_controllen = 0 };
+
+	err = scm_send(socket, &msg, scm, false);
+	if (err)
+		return err;
+
+	if (unix_passcred_enabled(socket, other)) {
+		scm->pid = get_pid(task_tgid(current));
+		current_uid_gid(&scm->creds.uid, &scm->creds.gid);
+	}
+	return err;
+}
+
+static bool unix_skb_scm_eq(struct sk_buff *skb,
+			    struct scm_cookie *scm)
+{
+	const struct unix_skb_parms *u = &UNIXCB(skb);
+
+	return u->pid == scm->pid &&
+	       uid_eq(u->uid, scm->creds.uid) &&
+	       gid_eq(u->gid, scm->creds.gid) &&
+	       unix_secdata_eq(scm, skb);
+}
+
 /*
  *	Send AF_UNIX data.
  */
@@ -1472,6 +1624,7 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
 	struct scm_cookie scm;
 	int max_level;
 	int data_len = 0;
+	int sk_locked;
 
 	wait_for_unix_gc();
 	err = scm_send(sock, msg, &scm, false);
@@ -1550,12 +1703,14 @@ restart:
 		goto out_free;
 	}
 
+	sk_locked = 0;
 	unix_state_lock(other);
+restart_locked:
 	err = -EPERM;
 	if (!unix_may_send(sk, other))
 		goto out_unlock;
 
-	if (sock_flag(other, SOCK_DEAD)) {
+	if (unlikely(sock_flag(other, SOCK_DEAD))) {
 		/*
 		 *	Check with 1003.1g - what should
 		 *	datagram error
@@ -1563,10 +1718,14 @@ restart:
 		unix_state_unlock(other);
 		sock_put(other);
 
+		if (!sk_locked)
+			unix_state_lock(sk);
+
 		err = 0;
-		unix_state_lock(sk);
 		if (unix_peer(sk) == other) {
 			unix_peer(sk) = NULL;
+			unix_dgram_peer_wake_disconnect_wakeup(sk, other);
+
 			unix_state_unlock(sk);
 
 			unix_dgram_disconnected(sk, other);
@@ -1592,21 +1751,38 @@ restart:
 			goto out_unlock;
 	}
 
-	if (unix_peer(other) != sk && unix_recvq_full(other)) {
-		if (!timeo) {
-			err = -EAGAIN;
-			goto out_unlock;
+	if (unlikely(unix_peer(other) != sk && unix_recvq_full(other))) {
+		if (timeo) {
+			timeo = unix_wait_for_peer(other, timeo);
+
+			err = sock_intr_errno(timeo);
+			if (signal_pending(current))
+				goto out_free;
+
+			goto restart;
 		}
 
-		timeo = unix_wait_for_peer(other, timeo);
+		if (!sk_locked) {
+			unix_state_unlock(other);
+			unix_state_double_lock(sk, other);
+		}
 
-		err = sock_intr_errno(timeo);
-		if (signal_pending(current))
-			goto out_free;
+		if (unix_peer(sk) != other ||
+		    unix_dgram_peer_wake_me(sk, other)) {
+			err = -EAGAIN;
+			sk_locked = 1;
+			goto out_unlock;
+		}
 
-		goto restart;
+		if (!sk_locked) {
+			sk_locked = 1;
+			goto restart_locked;
+		}
 	}
 
+	if (unlikely(sk_locked))
+		unix_state_unlock(sk);
+
 	if (sock_flag(other, SOCK_RCVTSTAMP))
 		__net_timestamp(skb);
 	maybe_add_creds(skb, sock, other);
@@ -1620,6 +1796,8 @@ restart:
 	return len;
 
 out_unlock:
+	if (sk_locked)
+		unix_state_unlock(sk);
 	unix_state_unlock(other);
 out_free:
 	kfree_skb(skb);
@@ -1741,8 +1919,10 @@ out_err:
 static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page,
 				    int offset, size_t size, int flags)
 {
-	int err = 0;
-	bool send_sigpipe = true;
+	int err;
+	bool send_sigpipe = false;
+	bool init_scm = true;
+	struct scm_cookie scm;
 	struct sock *other, *sk = socket->sk;
 	struct sk_buff *skb, *newskb = NULL, *tail = NULL;
 
@@ -1760,7 +1940,7 @@ alloc_skb:
 		newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT,
 					      &err, 0);
 		if (!newskb)
-			return err;
+			goto err;
 	}
 
 	/* we must acquire readlock as we modify already present
@@ -1769,12 +1949,12 @@ alloc_skb:
 	err = mutex_lock_interruptible(&unix_sk(other)->readlock);
 	if (err) {
 		err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS;
-		send_sigpipe = false;
 		goto err;
 	}
 
 	if (sk->sk_shutdown & SEND_SHUTDOWN) {
 		err = -EPIPE;
+		send_sigpipe = true;
 		goto err_unlock;
 	}
 
@@ -1783,17 +1963,27 @@ alloc_skb:
 	if (sock_flag(other, SOCK_DEAD) ||
 	    other->sk_shutdown & RCV_SHUTDOWN) {
 		err = -EPIPE;
+		send_sigpipe = true;
 		goto err_state_unlock;
 	}
 
+	if (init_scm) {
+		err = maybe_init_creds(&scm, socket, other);
+		if (err)
+			goto err_state_unlock;
+		init_scm = false;
+	}
+
 	skb = skb_peek_tail(&other->sk_receive_queue);
 	if (tail && tail == skb) {
 		skb = newskb;
-	} else if (!skb) {
-		if (newskb)
+	} else if (!skb || !unix_skb_scm_eq(skb, &scm)) {
+		if (newskb) {
 			skb = newskb;
-		else
+		} else {
+			tail = skb;
 			goto alloc_skb;
+		}
 	} else if (newskb) {
 		/* this is fast path, we don't necessarily need to
 		 * call to kfree_skb even though with newskb == NULL
@@ -1814,6 +2004,9 @@ alloc_skb:
 	atomic_add(size, &sk->sk_wmem_alloc);
 
 	if (newskb) {
+		err = unix_scm_to_skb(&scm, skb, false);
+		if (err)
+			goto err_state_unlock;
 		spin_lock(&other->sk_receive_queue.lock);
 		__skb_queue_tail(&other->sk_receive_queue, newskb);
 		spin_unlock(&other->sk_receive_queue.lock);
@@ -1823,7 +2016,7 @@ alloc_skb:
 	mutex_unlock(&unix_sk(other)->readlock);
 
 	other->sk_data_ready(other);
-
+	scm_destroy(&scm);
 	return size;
 
 err_state_unlock:
@@ -1834,6 +2027,8 @@ err:
 	kfree_skb(newskb);
 	if (send_sigpipe && !(flags & MSG_NOSIGNAL))
 		send_sig(SIGPIPE, current, 0);
+	if (!init_scm)
+		scm_destroy(&scm);
 	return err;
 }
 
@@ -1996,7 +2191,7 @@ static long unix_stream_data_wait(struct sock *sk, long timeo,
 		    !timeo)
 			break;
 
-		set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
+		sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
 		unix_state_unlock(sk);
 		timeo = freezable_schedule_timeout(timeo);
 		unix_state_lock(sk);
@@ -2004,7 +2199,7 @@ static long unix_stream_data_wait(struct sock *sk, long timeo,
 		if (sock_flag(sk, SOCK_DEAD))
 			break;
 
-		clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
+		sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
 	}
 
 	finish_wait(sk_sleep(sk), &wait);
@@ -2137,10 +2332,7 @@ unlock:
 
 		if (check_creds) {
 			/* Never glue messages from different writers */
-			if ((UNIXCB(skb).pid  != scm.pid) ||
-			    !uid_eq(UNIXCB(skb).uid, scm.creds.uid) ||
-			    !gid_eq(UNIXCB(skb).gid, scm.creds.gid) ||
-			    !unix_secdata_eq(&scm, skb))
+			if (!unix_skb_scm_eq(skb, &scm))
 				break;
 		} else if (test_bit(SOCK_PASSCRED, &sock->flags)) {
 			/* Copy credentials */
@@ -2476,20 +2668,22 @@ static unsigned int unix_dgram_poll(struct file *file, struct socket *sock,
 		return mask;
 
 	writable = unix_writable(sk);
-	other = unix_peer_get(sk);
-	if (other) {
-		if (unix_peer(other) != sk) {
-			sock_poll_wait(file, &unix_sk(other)->peer_wait, wait);
-			if (unix_recvq_full(other))
-				writable = 0;
-		}
-		sock_put(other);
+	if (writable) {
+		unix_state_lock(sk);
+
+		other = unix_peer(sk);
+		if (other && unix_peer(other) != sk &&
+		    unix_recvq_full(other) &&
+		    unix_dgram_peer_wake_me(sk, other))
+			writable = 0;
+
+		unix_state_unlock(sk);
 	}
 
 	if (writable)
 		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
 	else
-		set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+		sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
 
 	return mask;
 }
diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c
index c8b8ef5246a6..ef198903c0c3 100644
--- a/sound/pci/hda/patch_conexant.c
+++ b/sound/pci/hda/patch_conexant.c
@@ -955,6 +955,7 @@ static int patch_conexant_auto(struct hda_codec *codec)
  */
 
 static const struct hda_device_id snd_hda_id_conexant[] = {
+	HDA_CODEC_ENTRY(0x14f12008, "CX8200", patch_conexant_auto),
 	HDA_CODEC_ENTRY(0x14f15045, "CX20549 (Venice)", patch_conexant_auto),
 	HDA_CODEC_ENTRY(0x14f15047, "CX20551 (Waikiki)", patch_conexant_auto),
 	HDA_CODEC_ENTRY(0x14f15051, "CX20561 (Hermosa)", patch_conexant_auto),
@@ -972,9 +973,9 @@ static const struct hda_device_id snd_hda_id_conexant[] = {
 	HDA_CODEC_ENTRY(0x14f150ac, "CX20652", patch_conexant_auto),
 	HDA_CODEC_ENTRY(0x14f150b8, "CX20664", patch_conexant_auto),
 	HDA_CODEC_ENTRY(0x14f150b9, "CX20665", patch_conexant_auto),
-	HDA_CODEC_ENTRY(0x14f150f1, "CX20721", patch_conexant_auto),
+	HDA_CODEC_ENTRY(0x14f150f1, "CX21722", patch_conexant_auto),
 	HDA_CODEC_ENTRY(0x14f150f2, "CX20722", patch_conexant_auto),
-	HDA_CODEC_ENTRY(0x14f150f3, "CX20723", patch_conexant_auto),
+	HDA_CODEC_ENTRY(0x14f150f3, "CX21724", patch_conexant_auto),
 	HDA_CODEC_ENTRY(0x14f150f4, "CX20724", patch_conexant_auto),
 	HDA_CODEC_ENTRY(0x14f1510f, "CX20751/2", patch_conexant_auto),
 	HDA_CODEC_ENTRY(0x14f15110, "CX20751/2", patch_conexant_auto),
diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c
index bdb6f226d006..4b6fb668c91c 100644
--- a/sound/pci/hda/patch_hdmi.c
+++ b/sound/pci/hda/patch_hdmi.c
@@ -2352,6 +2352,12 @@ static void intel_pin_eld_notify(void *audio_ptr, int port)
 	struct hda_codec *codec = audio_ptr;
 	int pin_nid = port + 0x04;
 
+	/* skip notification during system suspend (but not in runtime PM);
+	 * the state will be updated at resume
+	 */
+	if (snd_power_get_state(codec->card) != SNDRV_CTL_POWER_D0)
+		return;
+
 	check_presence_and_report(codec, pin_nid);
 }
 
diff --git a/sound/soc/codecs/arizona.c b/sound/soc/codecs/arizona.c
index 9929efc6b9aa..b3ea24d64c50 100644
--- a/sound/soc/codecs/arizona.c
+++ b/sound/soc/codecs/arizona.c
@@ -1023,24 +1023,18 @@ void arizona_init_dvfs(struct arizona_priv *priv)
 }
 EXPORT_SYMBOL_GPL(arizona_init_dvfs);
 
-static unsigned int arizona_sysclk_48k_rates[] = {
+static unsigned int arizona_opclk_ref_48k_rates[] = {
 	6144000,
 	12288000,
 	24576000,
 	49152000,
-	73728000,
-	98304000,
-	147456000,
 };
 
-static unsigned int arizona_sysclk_44k1_rates[] = {
+static unsigned int arizona_opclk_ref_44k1_rates[] = {
 	5644800,
 	11289600,
 	22579200,
 	45158400,
-	67737600,
-	90316800,
-	135475200,
 };
 
 static int arizona_set_opclk(struct snd_soc_codec *codec, unsigned int clk,
@@ -1065,11 +1059,11 @@ static int arizona_set_opclk(struct snd_soc_codec *codec, unsigned int clk,
 	}
 
 	if (refclk % 8000)
-		rates = arizona_sysclk_44k1_rates;
+		rates = arizona_opclk_ref_44k1_rates;
 	else
-		rates = arizona_sysclk_48k_rates;
+		rates = arizona_opclk_ref_48k_rates;
 
-	for (ref = 0; ref < ARRAY_SIZE(arizona_sysclk_48k_rates) &&
+	for (ref = 0; ref < ARRAY_SIZE(arizona_opclk_ref_48k_rates) &&
 		     rates[ref] <= refclk; ref++) {
 		div = 1;
 		while (rates[ref] / div >= freq && div < 32) {
diff --git a/sound/soc/codecs/es8328.c b/sound/soc/codecs/es8328.c
index 969e337dc17c..84f5eb07a91b 100644
--- a/sound/soc/codecs/es8328.c
+++ b/sound/soc/codecs/es8328.c
@@ -205,18 +205,18 @@ static const struct snd_kcontrol_new es8328_right_line_controls =
 
 /* Left Mixer */
 static const struct snd_kcontrol_new es8328_left_mixer_controls[] = {
-	SOC_DAPM_SINGLE("Playback Switch", ES8328_DACCONTROL17, 8, 1, 0),
-	SOC_DAPM_SINGLE("Left Bypass Switch", ES8328_DACCONTROL17, 7, 1, 0),
-	SOC_DAPM_SINGLE("Right Playback Switch", ES8328_DACCONTROL18, 8, 1, 0),
-	SOC_DAPM_SINGLE("Right Bypass Switch", ES8328_DACCONTROL18, 7, 1, 0),
+	SOC_DAPM_SINGLE("Playback Switch", ES8328_DACCONTROL17, 7, 1, 0),
+	SOC_DAPM_SINGLE("Left Bypass Switch", ES8328_DACCONTROL17, 6, 1, 0),
+	SOC_DAPM_SINGLE("Right Playback Switch", ES8328_DACCONTROL18, 7, 1, 0),
+	SOC_DAPM_SINGLE("Right Bypass Switch", ES8328_DACCONTROL18, 6, 1, 0),
 };
 
 /* Right Mixer */
 static const struct snd_kcontrol_new es8328_right_mixer_controls[] = {
-	SOC_DAPM_SINGLE("Left Playback Switch", ES8328_DACCONTROL19, 8, 1, 0),
-	SOC_DAPM_SINGLE("Left Bypass Switch", ES8328_DACCONTROL19, 7, 1, 0),
-	SOC_DAPM_SINGLE("Playback Switch", ES8328_DACCONTROL20, 8, 1, 0),
-	SOC_DAPM_SINGLE("Right Bypass Switch", ES8328_DACCONTROL20, 7, 1, 0),
+	SOC_DAPM_SINGLE("Left Playback Switch", ES8328_DACCONTROL19, 7, 1, 0),
+	SOC_DAPM_SINGLE("Left Bypass Switch", ES8328_DACCONTROL19, 6, 1, 0),
+	SOC_DAPM_SINGLE("Playback Switch", ES8328_DACCONTROL20, 7, 1, 0),
+	SOC_DAPM_SINGLE("Right Bypass Switch", ES8328_DACCONTROL20, 6, 1, 0),
 };
 
 static const char * const es8328_pga_sel[] = {
diff --git a/sound/soc/codecs/nau8825.c b/sound/soc/codecs/nau8825.c
index 7fc7b4e3f444..c1b87c5800b1 100644
--- a/sound/soc/codecs/nau8825.c
+++ b/sound/soc/codecs/nau8825.c
@@ -1271,6 +1271,36 @@ static int nau8825_i2c_remove(struct i2c_client *client)
 	return 0;
 }
 
+#ifdef CONFIG_PM_SLEEP
+static int nau8825_suspend(struct device *dev)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct nau8825 *nau8825 = dev_get_drvdata(dev);
+
+	disable_irq(client->irq);
+	regcache_cache_only(nau8825->regmap, true);
+	regcache_mark_dirty(nau8825->regmap);
+
+	return 0;
+}
+
+static int nau8825_resume(struct device *dev)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct nau8825 *nau8825 = dev_get_drvdata(dev);
+
+	regcache_cache_only(nau8825->regmap, false);
+	regcache_sync(nau8825->regmap);
+	enable_irq(client->irq);
+
+	return 0;
+}
+#endif
+
+static const struct dev_pm_ops nau8825_pm = {
+	SET_SYSTEM_SLEEP_PM_OPS(nau8825_suspend, nau8825_resume)
+};
+
 static const struct i2c_device_id nau8825_i2c_ids[] = {
 	{ "nau8825", 0 },
 	{ }
@@ -1297,6 +1327,7 @@ static struct i2c_driver nau8825_driver = {
 		.name = "nau8825",
 		.of_match_table = of_match_ptr(nau8825_of_ids),
 		.acpi_match_table = ACPI_PTR(nau8825_acpi_match),
+		.pm = &nau8825_pm,
 	},
 	.probe = nau8825_i2c_probe,
 	.remove = nau8825_i2c_remove,
diff --git a/sound/soc/codecs/rl6231.c b/sound/soc/codecs/rl6231.c
index aca479fa7670..1dc68ab08a17 100644
--- a/sound/soc/codecs/rl6231.c
+++ b/sound/soc/codecs/rl6231.c
@@ -80,8 +80,10 @@ int rl6231_calc_dmic_clk(int rate)
 	}
 
 	for (i = 0; i < ARRAY_SIZE(div); i++) {
-		/* find divider that gives DMIC frequency below 3MHz */
-		if (3000000 * div[i] >= rate)
+		if ((div[i] % 3) == 0)
+			continue;
+		/* find divider that gives DMIC frequency below 3.072MHz */
+		if (3072000 * div[i] >= rate)
 			return i;
 	}
 
diff --git a/sound/soc/codecs/rt5645.c b/sound/soc/codecs/rt5645.c
index 28132375e427..ef76940f9dcb 100644
--- a/sound/soc/codecs/rt5645.c
+++ b/sound/soc/codecs/rt5645.c
@@ -245,7 +245,7 @@ struct rt5645_priv {
 	struct snd_soc_jack *hp_jack;
 	struct snd_soc_jack *mic_jack;
 	struct snd_soc_jack *btn_jack;
-	struct delayed_work jack_detect_work;
+	struct delayed_work jack_detect_work, rcclock_work;
 	struct regulator_bulk_data supplies[ARRAY_SIZE(rt5645_supply_names)];
 	struct rt5645_eq_param_s *eq_param;
 
@@ -565,12 +565,33 @@ static int rt5645_hweq_put(struct snd_kcontrol *kcontrol,
 	.put = rt5645_hweq_put \
 }
 
+static int rt5645_spk_put_volsw(struct snd_kcontrol *kcontrol,
+		struct snd_ctl_elem_value *ucontrol)
+{
+	struct snd_soc_component *component = snd_kcontrol_chip(kcontrol);
+	struct rt5645_priv *rt5645 = snd_soc_component_get_drvdata(component);
+	int ret;
+
+	cancel_delayed_work_sync(&rt5645->rcclock_work);
+
+	regmap_update_bits(rt5645->regmap, RT5645_MICBIAS,
+		RT5645_PWR_CLK25M_MASK, RT5645_PWR_CLK25M_PU);
+
+	ret = snd_soc_put_volsw(kcontrol, ucontrol);
+
+	queue_delayed_work(system_power_efficient_wq, &rt5645->rcclock_work,
+		msecs_to_jiffies(200));
+
+	return ret;
+}
+
 static const struct snd_kcontrol_new rt5645_snd_controls[] = {
 	/* Speaker Output Volume */
 	SOC_DOUBLE("Speaker Channel Switch", RT5645_SPK_VOL,
 		RT5645_VOL_L_SFT, RT5645_VOL_R_SFT, 1, 1),
-	SOC_DOUBLE_TLV("Speaker Playback Volume", RT5645_SPK_VOL,
-		RT5645_L_VOL_SFT, RT5645_R_VOL_SFT, 39, 1, out_vol_tlv),
+	SOC_DOUBLE_EXT_TLV("Speaker Playback Volume", RT5645_SPK_VOL,
+		RT5645_L_VOL_SFT, RT5645_R_VOL_SFT, 39, 1, snd_soc_get_volsw,
+		rt5645_spk_put_volsw, out_vol_tlv),
 
 	/* ClassD modulator Speaker Gain Ratio */
 	SOC_SINGLE_TLV("Speaker ClassD Playback Volume", RT5645_SPO_CLSD_RATIO,
@@ -1498,7 +1519,7 @@ static void hp_amp_power(struct snd_soc_codec *codec, int on)
 				regmap_write(rt5645->regmap, RT5645_PR_BASE +
 					RT5645_MAMP_INT_REG2, 0xfc00);
 				snd_soc_write(codec, RT5645_DEPOP_M2, 0x1140);
-				msleep(40);
+				msleep(70);
 				rt5645->hp_on = true;
 			} else {
 				/* depop parameters */
@@ -3122,6 +3143,15 @@ static void rt5645_jack_detect_work(struct work_struct *work)
 				SND_JACK_BTN_2 | SND_JACK_BTN_3);
 }
 
+static void rt5645_rcclock_work(struct work_struct *work)
+{
+	struct rt5645_priv *rt5645 =
+		container_of(work, struct rt5645_priv, rcclock_work.work);
+
+	regmap_update_bits(rt5645->regmap, RT5645_MICBIAS,
+		RT5645_PWR_CLK25M_MASK, RT5645_PWR_CLK25M_PD);
+}
+
 static irqreturn_t rt5645_irq(int irq, void *data)
 {
 	struct rt5645_priv *rt5645 = data;
@@ -3348,6 +3378,27 @@ static const struct dmi_system_id dmi_platform_intel_braswell[] = {
 			DMI_MATCH(DMI_PRODUCT_NAME, "Reks"),
 		},
 	},
+	{
+		.ident = "Google Edgar",
+		.callback = strago_quirk_cb,
+		.matches = {
+			DMI_MATCH(DMI_PRODUCT_NAME, "Edgar"),
+		},
+	},
+	{
+		.ident = "Google Wizpig",
+		.callback = strago_quirk_cb,
+		.matches = {
+			DMI_MATCH(DMI_PRODUCT_NAME, "Wizpig"),
+		},
+	},
+	{
+		.ident = "Google Terra",
+		.callback = strago_quirk_cb,
+		.matches = {
+			DMI_MATCH(DMI_PRODUCT_NAME, "Terra"),
+		},
+	},
 	{ }
 };
 
@@ -3587,6 +3638,7 @@ static int rt5645_i2c_probe(struct i2c_client *i2c,
 	}
 
 	INIT_DELAYED_WORK(&rt5645->jack_detect_work, rt5645_jack_detect_work);
+	INIT_DELAYED_WORK(&rt5645->rcclock_work, rt5645_rcclock_work);
 
 	if (rt5645->i2c->irq) {
 		ret = request_threaded_irq(rt5645->i2c->irq, NULL, rt5645_irq,
@@ -3621,6 +3673,7 @@ static int rt5645_i2c_remove(struct i2c_client *i2c)
 		free_irq(i2c->irq, rt5645);
 
 	cancel_delayed_work_sync(&rt5645->jack_detect_work);
+	cancel_delayed_work_sync(&rt5645->rcclock_work);
 
 	snd_soc_unregister_codec(&i2c->dev);
 	regulator_bulk_disable(ARRAY_SIZE(rt5645->supplies), rt5645->supplies);
diff --git a/sound/soc/codecs/rt5670.h b/sound/soc/codecs/rt5670.h
index dc2b46236c5c..3f1b0f1df809 100644
--- a/sound/soc/codecs/rt5670.h
+++ b/sound/soc/codecs/rt5670.h
@@ -973,12 +973,12 @@
 #define RT5670_SCLK_SRC_MCLK			(0x0 << 14)
 #define RT5670_SCLK_SRC_PLL1			(0x1 << 14)
 #define RT5670_SCLK_SRC_RCCLK			(0x2 << 14) /* 15MHz */
-#define RT5670_PLL1_SRC_MASK			(0x3 << 12)
-#define RT5670_PLL1_SRC_SFT			12
-#define RT5670_PLL1_SRC_MCLK			(0x0 << 12)
-#define RT5670_PLL1_SRC_BCLK1			(0x1 << 12)
-#define RT5670_PLL1_SRC_BCLK2			(0x2 << 12)
-#define RT5670_PLL1_SRC_BCLK3			(0x3 << 12)
+#define RT5670_PLL1_SRC_MASK			(0x7 << 11)
+#define RT5670_PLL1_SRC_SFT			11
+#define RT5670_PLL1_SRC_MCLK			(0x0 << 11)
+#define RT5670_PLL1_SRC_BCLK1			(0x1 << 11)
+#define RT5670_PLL1_SRC_BCLK2			(0x2 << 11)
+#define RT5670_PLL1_SRC_BCLK3			(0x3 << 11)
 #define RT5670_PLL1_PD_MASK			(0x1 << 3)
 #define RT5670_PLL1_PD_SFT			3
 #define RT5670_PLL1_PD_1			(0x0 << 3)
diff --git a/sound/soc/codecs/rt5677.c b/sound/soc/codecs/rt5677.c
index b4cd7e3bf5f8..69d987a9935c 100644
--- a/sound/soc/codecs/rt5677.c
+++ b/sound/soc/codecs/rt5677.c
@@ -1386,90 +1386,90 @@ static const struct snd_kcontrol_new rt5677_dac_r_mix[] = {
 };
 
 static const struct snd_kcontrol_new rt5677_sto1_dac_l_mix[] = {
-	SOC_DAPM_SINGLE("ST L Switch", RT5677_STO1_DAC_MIXER,
+	SOC_DAPM_SINGLE_AUTODISABLE("ST L Switch", RT5677_STO1_DAC_MIXER,
 			RT5677_M_ST_DAC1_L_SFT, 1, 1),
-	SOC_DAPM_SINGLE("DAC1 L Switch", RT5677_STO1_DAC_MIXER,
+	SOC_DAPM_SINGLE_AUTODISABLE("DAC1 L Switch", RT5677_STO1_DAC_MIXER,
 			RT5677_M_DAC1_L_STO_L_SFT, 1, 1),
-	SOC_DAPM_SINGLE("DAC2 L Switch", RT5677_STO1_DAC_MIXER,
+	SOC_DAPM_SINGLE_AUTODISABLE("DAC2 L Switch", RT5677_STO1_DAC_MIXER,
 			RT5677_M_DAC2_L_STO_L_SFT, 1, 1),
-	SOC_DAPM_SINGLE("DAC1 R Switch", RT5677_STO1_DAC_MIXER,
+	SOC_DAPM_SINGLE_AUTODISABLE("DAC1 R Switch", RT5677_STO1_DAC_MIXER,
 			RT5677_M_DAC1_R_STO_L_SFT, 1, 1),
 };
 
 static const struct snd_kcontrol_new rt5677_sto1_dac_r_mix[] = {
-	SOC_DAPM_SINGLE("ST R Switch", RT5677_STO1_DAC_MIXER,
+	SOC_DAPM_SINGLE_AUTODISABLE("ST R Switch", RT5677_STO1_DAC_MIXER,
 			RT5677_M_ST_DAC1_R_SFT, 1, 1),
-	SOC_DAPM_SINGLE("DAC1 R Switch", RT5677_STO1_DAC_MIXER,
+	SOC_DAPM_SINGLE_AUTODISABLE("DAC1 R Switch", RT5677_STO1_DAC_MIXER,
 			RT5677_M_DAC1_R_STO_R_SFT, 1, 1),
-	SOC_DAPM_SINGLE("DAC2 R Switch", RT5677_STO1_DAC_MIXER,
+	SOC_DAPM_SINGLE_AUTODISABLE("DAC2 R Switch", RT5677_STO1_DAC_MIXER,
 			RT5677_M_DAC2_R_STO_R_SFT, 1, 1),
-	SOC_DAPM_SINGLE("DAC1 L Switch", RT5677_STO1_DAC_MIXER,
+	SOC_DAPM_SINGLE_AUTODISABLE("DAC1 L Switch", RT5677_STO1_DAC_MIXER,
 			RT5677_M_DAC1_L_STO_R_SFT, 1, 1),
 };
 
 static const struct snd_kcontrol_new rt5677_mono_dac_l_mix[] = {
-	SOC_DAPM_SINGLE("ST L Switch", RT5677_MONO_DAC_MIXER,
+	SOC_DAPM_SINGLE_AUTODISABLE("ST L Switch", RT5677_MONO_DAC_MIXER,
 			RT5677_M_ST_DAC2_L_SFT, 1, 1),
-	SOC_DAPM_SINGLE("DAC1 L Switch", RT5677_MONO_DAC_MIXER,
+	SOC_DAPM_SINGLE_AUTODISABLE("DAC1 L Switch", RT5677_MONO_DAC_MIXER,
 			RT5677_M_DAC1_L_MONO_L_SFT, 1, 1),
-	SOC_DAPM_SINGLE("DAC2 L Switch", RT5677_MONO_DAC_MIXER,
+	SOC_DAPM_SINGLE_AUTODISABLE("DAC2 L Switch", RT5677_MONO_DAC_MIXER,
 			RT5677_M_DAC2_L_MONO_L_SFT, 1, 1),
-	SOC_DAPM_SINGLE("DAC2 R Switch", RT5677_MONO_DAC_MIXER,
+	SOC_DAPM_SINGLE_AUTODISABLE("DAC2 R Switch", RT5677_MONO_DAC_MIXER,
 			RT5677_M_DAC2_R_MONO_L_SFT, 1, 1),
 };
 
 static const struct snd_kcontrol_new rt5677_mono_dac_r_mix[] = {
-	SOC_DAPM_SINGLE("ST R Switch", RT5677_MONO_DAC_MIXER,
+	SOC_DAPM_SINGLE_AUTODISABLE("ST R Switch", RT5677_MONO_DAC_MIXER,
 			RT5677_M_ST_DAC2_R_SFT, 1, 1),
-	SOC_DAPM_SINGLE("DAC1 R Switch", RT5677_MONO_DAC_MIXER,
+	SOC_DAPM_SINGLE_AUTODISABLE("DAC1 R Switch", RT5677_MONO_DAC_MIXER,
 			RT5677_M_DAC1_R_MONO_R_SFT, 1, 1),
-	SOC_DAPM_SINGLE("DAC2 R Switch", RT5677_MONO_DAC_MIXER,
+	SOC_DAPM_SINGLE_AUTODISABLE("DAC2 R Switch", RT5677_MONO_DAC_MIXER,
 			RT5677_M_DAC2_R_MONO_R_SFT, 1, 1),
-	SOC_DAPM_SINGLE("DAC2 L Switch", RT5677_MONO_DAC_MIXER,
+	SOC_DAPM_SINGLE_AUTODISABLE("DAC2 L Switch", RT5677_MONO_DAC_MIXER,
 			RT5677_M_DAC2_L_MONO_R_SFT, 1, 1),
 };
 
 static const struct snd_kcontrol_new rt5677_dd1_l_mix[] = {
-	SOC_DAPM_SINGLE("Sto DAC Mix L Switch", RT5677_DD1_MIXER,
+	SOC_DAPM_SINGLE_AUTODISABLE("Sto DAC Mix L Switch", RT5677_DD1_MIXER,
 			RT5677_M_STO_L_DD1_L_SFT, 1, 1),
-	SOC_DAPM_SINGLE("Mono DAC Mix L Switch", RT5677_DD1_MIXER,
+	SOC_DAPM_SINGLE_AUTODISABLE("Mono DAC Mix L Switch", RT5677_DD1_MIXER,
 			RT5677_M_MONO_L_DD1_L_SFT, 1, 1),
-	SOC_DAPM_SINGLE("DAC3 L Switch", RT5677_DD1_MIXER,
+	SOC_DAPM_SINGLE_AUTODISABLE("DAC3 L Switch", RT5677_DD1_MIXER,
 			RT5677_M_DAC3_L_DD1_L_SFT, 1, 1),
-	SOC_DAPM_SINGLE("DAC3 R Switch", RT5677_DD1_MIXER,
+	SOC_DAPM_SINGLE_AUTODISABLE("DAC3 R Switch", RT5677_DD1_MIXER,
 			RT5677_M_DAC3_R_DD1_L_SFT, 1, 1),
 };
 
 static const struct snd_kcontrol_new rt5677_dd1_r_mix[] = {
-	SOC_DAPM_SINGLE("Sto DAC Mix R Switch", RT5677_DD1_MIXER,
+	SOC_DAPM_SINGLE_AUTODISABLE("Sto DAC Mix R Switch", RT5677_DD1_MIXER,
 			RT5677_M_STO_R_DD1_R_SFT, 1, 1),
-	SOC_DAPM_SINGLE("Mono DAC Mix R Switch", RT5677_DD1_MIXER,
+	SOC_DAPM_SINGLE_AUTODISABLE("Mono DAC Mix R Switch", RT5677_DD1_MIXER,
 			RT5677_M_MONO_R_DD1_R_SFT, 1, 1),
-	SOC_DAPM_SINGLE("DAC3 R Switch", RT5677_DD1_MIXER,
+	SOC_DAPM_SINGLE_AUTODISABLE("DAC3 R Switch", RT5677_DD1_MIXER,
 			RT5677_M_DAC3_R_DD1_R_SFT, 1, 1),
-	SOC_DAPM_SINGLE("DAC3 L Switch", RT5677_DD1_MIXER,
+	SOC_DAPM_SINGLE_AUTODISABLE("DAC3 L Switch", RT5677_DD1_MIXER,
 			RT5677_M_DAC3_L_DD1_R_SFT, 1, 1),
 };
 
 static const struct snd_kcontrol_new rt5677_dd2_l_mix[] = {
-	SOC_DAPM_SINGLE("Sto DAC Mix L Switch", RT5677_DD2_MIXER,
+	SOC_DAPM_SINGLE_AUTODISABLE("Sto DAC Mix L Switch", RT5677_DD2_MIXER,
 			RT5677_M_STO_L_DD2_L_SFT, 1, 1),
-	SOC_DAPM_SINGLE("Mono DAC Mix L Switch", RT5677_DD2_MIXER,
+	SOC_DAPM_SINGLE_AUTODISABLE("Mono DAC Mix L Switch", RT5677_DD2_MIXER,
 			RT5677_M_MONO_L_DD2_L_SFT, 1, 1),
-	SOC_DAPM_SINGLE("DAC4 L Switch", RT5677_DD2_MIXER,
+	SOC_DAPM_SINGLE_AUTODISABLE("DAC4 L Switch", RT5677_DD2_MIXER,
 			RT5677_M_DAC4_L_DD2_L_SFT, 1, 1),
-	SOC_DAPM_SINGLE("DAC4 R Switch", RT5677_DD2_MIXER,
+	SOC_DAPM_SINGLE_AUTODISABLE("DAC4 R Switch", RT5677_DD2_MIXER,
 			RT5677_M_DAC4_R_DD2_L_SFT, 1, 1),
 };
 
 static const struct snd_kcontrol_new rt5677_dd2_r_mix[] = {
-	SOC_DAPM_SINGLE("Sto DAC Mix R Switch", RT5677_DD2_MIXER,
+	SOC_DAPM_SINGLE_AUTODISABLE("Sto DAC Mix R Switch", RT5677_DD2_MIXER,
 			RT5677_M_STO_R_DD2_R_SFT, 1, 1),
-	SOC_DAPM_SINGLE("Mono DAC Mix R Switch", RT5677_DD2_MIXER,
+	SOC_DAPM_SINGLE_AUTODISABLE("Mono DAC Mix R Switch", RT5677_DD2_MIXER,
 			RT5677_M_MONO_R_DD2_R_SFT, 1, 1),
-	SOC_DAPM_SINGLE("DAC4 R Switch", RT5677_DD2_MIXER,
+	SOC_DAPM_SINGLE_AUTODISABLE("DAC4 R Switch", RT5677_DD2_MIXER,
 			RT5677_M_DAC4_R_DD2_R_SFT, 1, 1),
-	SOC_DAPM_SINGLE("DAC4 L Switch", RT5677_DD2_MIXER,
+	SOC_DAPM_SINGLE_AUTODISABLE("DAC4 L Switch", RT5677_DD2_MIXER,
 			RT5677_M_DAC4_L_DD2_R_SFT, 1, 1),
 };
 
@@ -2596,6 +2596,21 @@ static int rt5677_vref_event(struct snd_soc_dapm_widget *w,
 	return 0;
 }
 
+static int rt5677_filter_power_event(struct snd_soc_dapm_widget *w,
+	struct snd_kcontrol *kcontrol, int event)
+{
+	switch (event) {
+	case SND_SOC_DAPM_POST_PMU:
+		msleep(50);
+		break;
+
+	default:
+		return 0;
+	}
+
+	return 0;
+}
+
 static const struct snd_soc_dapm_widget rt5677_dapm_widgets[] = {
 	SND_SOC_DAPM_SUPPLY("PLL1", RT5677_PWR_ANLG2, RT5677_PWR_PLL1_BIT,
 		0, rt5677_set_pll1_event, SND_SOC_DAPM_PRE_PMU |
@@ -3072,19 +3087,26 @@ static const struct snd_soc_dapm_widget rt5677_dapm_widgets[] = {
 
 	/* DAC Mixer */
 	SND_SOC_DAPM_SUPPLY("dac stereo1 filter", RT5677_PWR_DIG2,
-		RT5677_PWR_DAC_S1F_BIT, 0, NULL, 0),
+		RT5677_PWR_DAC_S1F_BIT, 0, rt5677_filter_power_event,
+		SND_SOC_DAPM_POST_PMU),
 	SND_SOC_DAPM_SUPPLY("dac mono2 left filter", RT5677_PWR_DIG2,
-		RT5677_PWR_DAC_M2F_L_BIT, 0, NULL, 0),
+		RT5677_PWR_DAC_M2F_L_BIT, 0, rt5677_filter_power_event,
+		SND_SOC_DAPM_POST_PMU),
 	SND_SOC_DAPM_SUPPLY("dac mono2 right filter", RT5677_PWR_DIG2,
-		RT5677_PWR_DAC_M2F_R_BIT, 0, NULL, 0),
+		RT5677_PWR_DAC_M2F_R_BIT, 0, rt5677_filter_power_event,
+		SND_SOC_DAPM_POST_PMU),
 	SND_SOC_DAPM_SUPPLY("dac mono3 left filter", RT5677_PWR_DIG2,
-		RT5677_PWR_DAC_M3F_L_BIT, 0, NULL, 0),
+		RT5677_PWR_DAC_M3F_L_BIT, 0, rt5677_filter_power_event,
+		SND_SOC_DAPM_POST_PMU),
 	SND_SOC_DAPM_SUPPLY("dac mono3 right filter", RT5677_PWR_DIG2,
-		RT5677_PWR_DAC_M3F_R_BIT, 0, NULL, 0),
+		RT5677_PWR_DAC_M3F_R_BIT, 0, rt5677_filter_power_event,
+		SND_SOC_DAPM_POST_PMU),
 	SND_SOC_DAPM_SUPPLY("dac mono4 left filter", RT5677_PWR_DIG2,
-		RT5677_PWR_DAC_M4F_L_BIT, 0, NULL, 0),
+		RT5677_PWR_DAC_M4F_L_BIT, 0, rt5677_filter_power_event,
+		SND_SOC_DAPM_POST_PMU),
 	SND_SOC_DAPM_SUPPLY("dac mono4 right filter", RT5677_PWR_DIG2,
-		RT5677_PWR_DAC_M4F_R_BIT, 0, NULL, 0),
+		RT5677_PWR_DAC_M4F_R_BIT, 0, rt5677_filter_power_event,
+		SND_SOC_DAPM_POST_PMU),
 
 	SND_SOC_DAPM_MIXER("Stereo DAC MIXL", SND_SOC_NOPM, 0, 0,
 		rt5677_sto1_dac_l_mix, ARRAY_SIZE(rt5677_sto1_dac_l_mix)),
diff --git a/sound/soc/codecs/wm8960.c b/sound/soc/codecs/wm8960.c
index 056375339ea3..5380798883b5 100644
--- a/sound/soc/codecs/wm8960.c
+++ b/sound/soc/codecs/wm8960.c
@@ -229,7 +229,7 @@ SOC_DOUBLE_R_TLV("Capture Volume", WM8960_LINVOL, WM8960_RINVOL,
 SOC_DOUBLE_R("Capture Volume ZC Switch", WM8960_LINVOL, WM8960_RINVOL,
 	6, 1, 0),
 SOC_DOUBLE_R("Capture Switch", WM8960_LINVOL, WM8960_RINVOL,
-	7, 1, 0),
+	7, 1, 1),
 
 SOC_SINGLE_TLV("Right Input Boost Mixer RINPUT3 Volume",
 	       WM8960_INBMIX1, 4, 7, 0, lineinboost_tlv),
diff --git a/sound/soc/codecs/wm8962.c b/sound/soc/codecs/wm8962.c
index 39ebd7bf4f53..a7e79784fc16 100644
--- a/sound/soc/codecs/wm8962.c
+++ b/sound/soc/codecs/wm8962.c
@@ -365,8 +365,8 @@ static const struct reg_default wm8962_reg[] = {
 	{ 16924, 0x0059 },   /* R16924 - HDBASS_PG_1 */
 	{ 16925, 0x999A },   /* R16925 - HDBASS_PG_0 */
 
-	{ 17048, 0x0083 },   /* R17408 - HPF_C_1 */
-	{ 17049, 0x98AD },   /* R17409 - HPF_C_0 */
+	{ 17408, 0x0083 },   /* R17408 - HPF_C_1 */
+	{ 17409, 0x98AD },   /* R17409 - HPF_C_0 */
 
 	{ 17920, 0x007F },   /* R17920 - ADCL_RETUNE_C1_1 */
 	{ 17921, 0xFFFF },   /* R17921 - ADCL_RETUNE_C1_0 */
diff --git a/sound/soc/davinci/davinci-mcasp.c b/sound/soc/davinci/davinci-mcasp.c
index 4495a40a9468..c1c9c2e3525b 100644
--- a/sound/soc/davinci/davinci-mcasp.c
+++ b/sound/soc/davinci/davinci-mcasp.c
@@ -681,8 +681,8 @@ static int davinci_mcasp_set_tdm_slot(struct snd_soc_dai *dai,
 	}
 
 	mcasp->tdm_slots = slots;
-	mcasp->tdm_mask[SNDRV_PCM_STREAM_PLAYBACK] = rx_mask;
-	mcasp->tdm_mask[SNDRV_PCM_STREAM_CAPTURE] = tx_mask;
+	mcasp->tdm_mask[SNDRV_PCM_STREAM_PLAYBACK] = tx_mask;
+	mcasp->tdm_mask[SNDRV_PCM_STREAM_CAPTURE] = rx_mask;
 	mcasp->slot_width = slot_width;
 
 	return davinci_mcasp_set_ch_constraints(mcasp);
@@ -908,6 +908,14 @@ static int mcasp_i2s_hw_param(struct davinci_mcasp *mcasp, int stream,
 		mcasp_set_bits(mcasp, DAVINCI_MCASP_RXFMT_REG, busel | RXORD);
 		mcasp_mod_bits(mcasp, DAVINCI_MCASP_RXFMCTL_REG,
 			       FSRMOD(total_slots), FSRMOD(0x1FF));
+		/*
+		 * If McASP is set to be TX/RX synchronous and the playback is
+		 * not running already we need to configure the TX slots in
+		 * order to have correct FSX on the bus
+		 */
+		if (mcasp_is_synchronous(mcasp) && !mcasp->channels)
+			mcasp_mod_bits(mcasp, DAVINCI_MCASP_TXFMCTL_REG,
+				       FSXMOD(total_slots), FSXMOD(0x1FF));
 	}
 
 	return 0;
diff --git a/sound/soc/fsl/Kconfig b/sound/soc/fsl/Kconfig
index 19c302b0d763..14dfdee05fd5 100644
--- a/sound/soc/fsl/Kconfig
+++ b/sound/soc/fsl/Kconfig
@@ -283,6 +283,8 @@ config SND_SOC_IMX_MC13783
 config SND_SOC_FSL_ASOC_CARD
 	tristate "Generic ASoC Sound Card with ASRC support"
 	depends on OF && I2C
+	# enforce SND_SOC_FSL_ASOC_CARD=m if SND_AC97_CODEC=m:
+	depends on SND_AC97_CODEC || SND_AC97_CODEC=n
 	select SND_SOC_IMX_AUDMUX
 	select SND_SOC_IMX_PCM_DMA
 	select SND_SOC_FSL_ESAI
diff --git a/sound/soc/fsl/fsl_sai.c b/sound/soc/fsl/fsl_sai.c
index a4435f5e3be9..ffd5f9acc849 100644
--- a/sound/soc/fsl/fsl_sai.c
+++ b/sound/soc/fsl/fsl_sai.c
@@ -454,7 +454,8 @@ static int fsl_sai_trigger(struct snd_pcm_substream *substream, int cmd,
 	 * Rx sync with Tx clocks: Clear SYNC for Tx, set it for Rx.
 	 * Tx sync with Rx clocks: Clear SYNC for Rx, set it for Tx.
 	 */
-	regmap_update_bits(sai->regmap, FSL_SAI_TCR2, FSL_SAI_CR2_SYNC, 0);
+	regmap_update_bits(sai->regmap, FSL_SAI_TCR2, FSL_SAI_CR2_SYNC,
+		           sai->synchronous[TX] ? FSL_SAI_CR2_SYNC : 0);
 	regmap_update_bits(sai->regmap, FSL_SAI_RCR2, FSL_SAI_CR2_SYNC,
 			   sai->synchronous[RX] ? FSL_SAI_CR2_SYNC : 0);
 
diff --git a/sound/soc/intel/Kconfig b/sound/soc/intel/Kconfig
index 7b778ab85f8b..d430ef5a4f38 100644
--- a/sound/soc/intel/Kconfig
+++ b/sound/soc/intel/Kconfig
@@ -144,7 +144,7 @@ config SND_SOC_INTEL_SKYLAKE
 
 config SND_SOC_INTEL_SKL_RT286_MACH
 	tristate "ASoC Audio driver for SKL with RT286 I2S mode"
-	depends on X86 && ACPI
+	depends on X86 && ACPI && I2C
 	select SND_SOC_INTEL_SST
 	select SND_SOC_INTEL_SKYLAKE
 	select SND_SOC_RT286
diff --git a/sound/soc/intel/skylake/skl-topology.c b/sound/soc/intel/skylake/skl-topology.c
index a7854c8fc523..ffea427aeca8 100644
--- a/sound/soc/intel/skylake/skl-topology.c
+++ b/sound/soc/intel/skylake/skl-topology.c
@@ -1240,6 +1240,7 @@ int skl_tplg_init(struct snd_soc_platform *platform, struct hdac_ext_bus *ebus)
 	 */
 	ret = snd_soc_tplg_component_load(&platform->component,
 					&skl_tplg_ops, fw, 0);
+	release_firmware(fw);
 	if (ret < 0) {
 		dev_err(bus->dev, "tplg component load failed%d\n", ret);
 		return -EINVAL;
diff --git a/sound/soc/rockchip/rockchip_spdif.c b/sound/soc/rockchip/rockchip_spdif.c
index a38a3029062c..ac72ff5055bb 100644
--- a/sound/soc/rockchip/rockchip_spdif.c
+++ b/sound/soc/rockchip/rockchip_spdif.c
@@ -280,7 +280,7 @@ static int rk_spdif_probe(struct platform_device *pdev)
 	int ret;
 
 	match = of_match_node(rk_spdif_match, np);
-	if ((int) match->data == RK_SPDIF_RK3288) {
+	if (match->data == (void *)RK_SPDIF_RK3288) {
 		struct regmap *grf;
 
 		grf = syscon_regmap_lookup_by_phandle(np, "rockchip,grf");
diff --git a/sound/soc/rockchip/rockchip_spdif.h b/sound/soc/rockchip/rockchip_spdif.h
index 07f86a21046a..921b4095fb92 100644
--- a/sound/soc/rockchip/rockchip_spdif.h
+++ b/sound/soc/rockchip/rockchip_spdif.h
@@ -28,9 +28,9 @@
 #define SPDIF_CFGR_VDW(x)	(x << SPDIF_CFGR_VDW_SHIFT)
 #define SDPIF_CFGR_VDW_MASK	(0xf << SPDIF_CFGR_VDW_SHIFT)
 
-#define SPDIF_CFGR_VDW_16	SPDIF_CFGR_VDW(0x00)
-#define SPDIF_CFGR_VDW_20	SPDIF_CFGR_VDW(0x01)
-#define SPDIF_CFGR_VDW_24	SPDIF_CFGR_VDW(0x10)
+#define SPDIF_CFGR_VDW_16	SPDIF_CFGR_VDW(0x0)
+#define SPDIF_CFGR_VDW_20	SPDIF_CFGR_VDW(0x1)
+#define SPDIF_CFGR_VDW_24	SPDIF_CFGR_VDW(0x2)
 
 /*
  * DMACR
diff --git a/sound/soc/sh/rcar/gen.c b/sound/soc/sh/rcar/gen.c
index 76da7620904c..edcf4cc2e84f 100644
--- a/sound/soc/sh/rcar/gen.c
+++ b/sound/soc/sh/rcar/gen.c
@@ -235,7 +235,7 @@ static int rsnd_gen2_probe(struct platform_device *pdev,
 		RSND_GEN_S_REG(SCU_SYS_STATUS0,	0x1c8),
 		RSND_GEN_S_REG(SCU_SYS_INT_EN0,	0x1cc),
 		RSND_GEN_S_REG(SCU_SYS_STATUS1,	0x1d0),
-		RSND_GEN_S_REG(SCU_SYS_INT_EN1,	0x1c4),
+		RSND_GEN_S_REG(SCU_SYS_INT_EN1,	0x1d4),
 		RSND_GEN_M_REG(SRC_SWRSR,	0x200,	0x40),
 		RSND_GEN_M_REG(SRC_SRCIR,	0x204,	0x40),
 		RSND_GEN_M_REG(SRC_ADINR,	0x214,	0x40),
diff --git a/sound/soc/sh/rcar/src.c b/sound/soc/sh/rcar/src.c
index 261b50217c48..68b439ed22d7 100644
--- a/sound/soc/sh/rcar/src.c
+++ b/sound/soc/sh/rcar/src.c
@@ -923,6 +923,7 @@ static int rsnd_src_pcm_new_gen2(struct rsnd_mod *mod,
 			    struct snd_soc_pcm_runtime *rtd)
 {
 	struct rsnd_dai *rdai = rsnd_io_to_rdai(io);
+	struct rsnd_mod *dvc = rsnd_io_to_mod_dvc(io);
 	struct rsnd_src *src = rsnd_mod_to_src(mod);
 	int ret;
 
@@ -937,6 +938,12 @@ static int rsnd_src_pcm_new_gen2(struct rsnd_mod *mod,
 		return 0;
 
 	/*
+	 * SRC In doesn't work if DVC was enabled
+	 */
+	if (dvc && !rsnd_io_is_play(io))
+		return 0;
+
+	/*
 	 * enable sync convert
 	 */
 	ret = rsnd_kctrl_new_s(mod, io, rtd,
diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c
index 24b096066a07..a1305f827a98 100644
--- a/sound/soc/soc-core.c
+++ b/sound/soc/soc-core.c
@@ -795,12 +795,12 @@ static void soc_resume_deferred(struct work_struct *work)
 
 	dev_dbg(card->dev, "ASoC: resume work completed\n");
 
-	/* userspace can access us now we are back as we were before */
-	snd_power_change_state(card->snd_card, SNDRV_CTL_POWER_D0);
-
 	/* Recheck all endpoints too, their state is affected by suspend */
 	dapm_mark_endpoints_dirty(card);
 	snd_soc_dapm_sync(&card->dapm);
+
+	/* userspace can access us now we are back as we were before */
+	snd_power_change_state(card->snd_card, SNDRV_CTL_POWER_D0);
 }
 
 /* powers up audio subsystem after a suspend */
diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c
index 016eba10b1ec..7d009428934a 100644
--- a/sound/soc/soc-dapm.c
+++ b/sound/soc/soc-dapm.c
@@ -2293,6 +2293,12 @@ void snd_soc_dapm_free_widget(struct snd_soc_dapm_widget *w)
 	kfree(w);
 }
 
+void snd_soc_dapm_reset_cache(struct snd_soc_dapm_context *dapm)
+{
+	dapm->path_sink_cache.widget = NULL;
+	dapm->path_source_cache.widget = NULL;
+}
+
 /* free all dapm widgets and resources */
 static void dapm_free_widgets(struct snd_soc_dapm_context *dapm)
 {
@@ -2303,6 +2309,7 @@ static void dapm_free_widgets(struct snd_soc_dapm_context *dapm)
 			continue;
 		snd_soc_dapm_free_widget(w);
 	}
+	snd_soc_dapm_reset_cache(dapm);
 }
 
 static struct snd_soc_dapm_widget *dapm_find_widget(
diff --git a/sound/soc/soc-ops.c b/sound/soc/soc-ops.c
index ecd38e52285a..2f67ba6d7a8f 100644
--- a/sound/soc/soc-ops.c
+++ b/sound/soc/soc-ops.c
@@ -404,7 +404,7 @@ EXPORT_SYMBOL_GPL(snd_soc_get_volsw_sx);
 /**
  * snd_soc_put_volsw_sx - double mixer set callback
  * @kcontrol: mixer control
- * @uinfo: control element information
+ * @ucontrol: control element information
  *
  * Callback to set the value of a double mixer control that spans 2 registers.
  *
diff --git a/sound/soc/soc-topology.c b/sound/soc/soc-topology.c
index 8d7ec80af51b..6963ba20991c 100644
--- a/sound/soc/soc-topology.c
+++ b/sound/soc/soc-topology.c
@@ -531,7 +531,7 @@ static int soc_tplg_kcontrol_bind_io(struct snd_soc_tplg_ctl_hdr *hdr,
 		/* TLV bytes controls need standard kcontrol info handler,
 		 * TLV callback and extended put/get handlers.
 		 */
-		k->info = snd_soc_bytes_info;
+		k->info = snd_soc_bytes_info_ext;
 		k->tlv.c = snd_soc_bytes_tlv_callback;
 
 		ext_ops = tplg->bytes_ext_ops;
@@ -1805,6 +1805,7 @@ void snd_soc_tplg_widget_remove_all(struct snd_soc_dapm_context *dapm,
 		snd_soc_tplg_widget_remove(w);
 		snd_soc_dapm_free_widget(w);
 	}
+	snd_soc_dapm_reset_cache(dapm);
 }
 EXPORT_SYMBOL_GPL(snd_soc_tplg_widget_remove_all);
 
diff --git a/sound/soc/sti/uniperif_player.c b/sound/soc/sti/uniperif_player.c
index 843f037a317d..5c2bc53f0a9b 100644
--- a/sound/soc/sti/uniperif_player.c
+++ b/sound/soc/sti/uniperif_player.c
@@ -669,6 +669,7 @@ static int uni_player_startup(struct snd_pcm_substream *substream,
 {
 	struct sti_uniperiph_data *priv = snd_soc_dai_get_drvdata(dai);
 	struct uniperif *player = priv->dai_data.uni;
+	player->substream = substream;
 
 	player->clk_adj = 0;
 
@@ -950,6 +951,8 @@ static void uni_player_shutdown(struct snd_pcm_substream *substream,
 	if (player->state != UNIPERIF_STATE_STOPPED)
 		/* Stop the player */
 		uni_player_stop(player);
+
+	player->substream = NULL;
 }
 
 static int uni_player_parse_dt_clk_glue(struct platform_device *pdev,
@@ -989,7 +992,7 @@ static int uni_player_parse_dt(struct platform_device *pdev,
 	if (!info)
 		return -ENOMEM;
 
-	if (of_property_read_u32(pnode, "version", &player->ver) ||
+	if (of_property_read_u32(pnode, "st,version", &player->ver) ||
 	    player->ver == SND_ST_UNIPERIF_VERSION_UNKNOWN) {
 		dev_err(dev, "Unknown uniperipheral version ");
 		return -EINVAL;
@@ -998,13 +1001,13 @@ static int uni_player_parse_dt(struct platform_device *pdev,
 	if (player->ver >= SND_ST_UNIPERIF_VERSION_UNI_PLR_TOP_1_0)
 		info->underflow_enabled = 1;
 
-	if (of_property_read_u32(pnode, "uniperiph-id", &info->id)) {
+	if (of_property_read_u32(pnode, "st,uniperiph-id", &info->id)) {
 		dev_err(dev, "uniperipheral id not defined");
 		return -EINVAL;
 	}
 
 	/* Read the device mode property */
-	if (of_property_read_string(pnode, "mode", &mode)) {
+	if (of_property_read_string(pnode, "st,mode", &mode)) {
 		dev_err(dev, "uniperipheral mode not defined");
 		return -EINVAL;
 	}
diff --git a/sound/soc/sti/uniperif_reader.c b/sound/soc/sti/uniperif_reader.c
index f791239a3087..8a0eb2050169 100644
--- a/sound/soc/sti/uniperif_reader.c
+++ b/sound/soc/sti/uniperif_reader.c
@@ -316,7 +316,7 @@ static int uni_reader_parse_dt(struct platform_device *pdev,
 	if (!info)
 		return -ENOMEM;
 
-	if (of_property_read_u32(node, "version", &reader->ver) ||
+	if (of_property_read_u32(node, "st,version", &reader->ver) ||
 	    reader->ver == SND_ST_UNIPERIF_VERSION_UNKNOWN) {
 		dev_err(&pdev->dev, "Unknown uniperipheral version ");
 		return -EINVAL;
@@ -346,7 +346,6 @@ int uni_reader_init(struct platform_device *pdev,
 	reader->hw = &uni_reader_pcm_hw;
 	reader->dai_ops = &uni_reader_dai_ops;
 
-	dev_err(reader->dev, "%s: enter\n", __func__);
 	ret = uni_reader_parse_dt(pdev, reader);
 	if (ret < 0) {
 		dev_err(reader->dev, "Failed to parse DeviceTree");
diff --git a/sound/soc/sunxi/sun4i-codec.c b/sound/soc/sunxi/sun4i-codec.c
index bcbf4da168b6..1bb896d78d09 100644
--- a/sound/soc/sunxi/sun4i-codec.c
+++ b/sound/soc/sunxi/sun4i-codec.c
@@ -2,6 +2,7 @@
  * Copyright 2014 Emilio López <emilio@elopez.com.ar>
  * Copyright 2014 Jon Smirl <jonsmirl@gmail.com>
  * Copyright 2015 Maxime Ripard <maxime.ripard@free-electrons.com>
+ * Copyright 2015 Adam Sampson <ats@offog.org>
  *
  * Based on the Allwinner SDK driver, released under the GPL.
  *
@@ -404,7 +405,7 @@ static const struct snd_kcontrol_new sun4i_codec_pa_mute =
 static DECLARE_TLV_DB_SCALE(sun4i_codec_pa_volume_scale, -6300, 100, 1);
 
 static const struct snd_kcontrol_new sun4i_codec_widgets[] = {
-	SOC_SINGLE_TLV("PA Volume", SUN4I_CODEC_DAC_ACTL,
+	SOC_SINGLE_TLV("Power Amplifier Volume", SUN4I_CODEC_DAC_ACTL,
 		       SUN4I_CODEC_DAC_ACTL_PA_VOL, 0x3F, 0,
 		       sun4i_codec_pa_volume_scale),
 };
@@ -452,12 +453,12 @@ static const struct snd_soc_dapm_widget sun4i_codec_dapm_widgets[] = {
 	SND_SOC_DAPM_SUPPLY("Mixer Enable", SUN4I_CODEC_DAC_ACTL,
 			    SUN4I_CODEC_DAC_ACTL_MIXEN, 0, NULL, 0),
 
-	/* Pre-Amplifier */
-	SND_SOC_DAPM_MIXER("Pre-Amplifier", SUN4I_CODEC_ADC_ACTL,
+	/* Power Amplifier */
+	SND_SOC_DAPM_MIXER("Power Amplifier", SUN4I_CODEC_ADC_ACTL,
 			   SUN4I_CODEC_ADC_ACTL_PA_EN, 0,
 			   sun4i_codec_pa_mixer_controls,
 			   ARRAY_SIZE(sun4i_codec_pa_mixer_controls)),
-	SND_SOC_DAPM_SWITCH("Pre-Amplifier Mute", SND_SOC_NOPM, 0, 0,
+	SND_SOC_DAPM_SWITCH("Power Amplifier Mute", SND_SOC_NOPM, 0, 0,
 			    &sun4i_codec_pa_mute),
 
 	SND_SOC_DAPM_OUTPUT("HP Right"),
@@ -480,16 +481,16 @@ static const struct snd_soc_dapm_route sun4i_codec_dapm_routes[] = {
 	{ "Left Mixer", NULL, "Mixer Enable" },
 	{ "Left Mixer", "Left DAC Playback Switch", "Left DAC" },
 
-	/* Pre-Amplifier Mixer Routes */
-	{ "Pre-Amplifier", "Mixer Playback Switch", "Left Mixer" },
-	{ "Pre-Amplifier", "Mixer Playback Switch", "Right Mixer" },
-	{ "Pre-Amplifier", "DAC Playback Switch", "Left DAC" },
-	{ "Pre-Amplifier", "DAC Playback Switch", "Right DAC" },
+	/* Power Amplifier Routes */
+	{ "Power Amplifier", "Mixer Playback Switch", "Left Mixer" },
+	{ "Power Amplifier", "Mixer Playback Switch", "Right Mixer" },
+	{ "Power Amplifier", "DAC Playback Switch", "Left DAC" },
+	{ "Power Amplifier", "DAC Playback Switch", "Right DAC" },
 
-	/* PA -> HP path */
-	{ "Pre-Amplifier Mute", "Switch", "Pre-Amplifier" },
-	{ "HP Right", NULL, "Pre-Amplifier Mute" },
-	{ "HP Left", NULL, "Pre-Amplifier Mute" },
+	/* Headphone Output Routes */
+	{ "Power Amplifier Mute", "Switch", "Power Amplifier" },
+	{ "HP Right", NULL, "Power Amplifier Mute" },
+	{ "HP Left", NULL, "Power Amplifier Mute" },
 };
 
 static struct snd_soc_codec_driver sun4i_codec_codec = {
diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c
index 40ab4476c80a..51cf8256c6cd 100644
--- a/tools/testing/nvdimm/test/nfit.c
+++ b/tools/testing/nvdimm/test/nfit.c
@@ -420,8 +420,7 @@ static struct nfit_test_resource *nfit_test_lookup(resource_size_t addr)
 
 static int nfit_test0_alloc(struct nfit_test *t)
 {
-	size_t nfit_size = sizeof(struct acpi_table_nfit)
-			+ sizeof(struct acpi_nfit_system_address) * NUM_SPA
+	size_t nfit_size = sizeof(struct acpi_nfit_system_address) * NUM_SPA
 			+ sizeof(struct acpi_nfit_memory_map) * NUM_MEM
 			+ sizeof(struct acpi_nfit_control_region) * NUM_DCR
 			+ sizeof(struct acpi_nfit_data_region) * NUM_BDW
@@ -471,8 +470,7 @@ static int nfit_test0_alloc(struct nfit_test *t)
 
 static int nfit_test1_alloc(struct nfit_test *t)
 {
-	size_t nfit_size = sizeof(struct acpi_table_nfit)
-		+ sizeof(struct acpi_nfit_system_address)
+	size_t nfit_size = sizeof(struct acpi_nfit_system_address)
 		+ sizeof(struct acpi_nfit_memory_map)
 		+ sizeof(struct acpi_nfit_control_region);
 
@@ -488,39 +486,24 @@ static int nfit_test1_alloc(struct nfit_test *t)
 	return 0;
 }
 
-static void nfit_test_init_header(struct acpi_table_nfit *nfit, size_t size)
-{
-	memcpy(nfit->header.signature, ACPI_SIG_NFIT, 4);
-	nfit->header.length = size;
-	nfit->header.revision = 1;
-	memcpy(nfit->header.oem_id, "LIBND", 6);
-	memcpy(nfit->header.oem_table_id, "TEST", 5);
-	nfit->header.oem_revision = 1;
-	memcpy(nfit->header.asl_compiler_id, "TST", 4);
-	nfit->header.asl_compiler_revision = 1;
-}
-
 static void nfit_test0_setup(struct nfit_test *t)
 {
 	struct nvdimm_bus_descriptor *nd_desc;
 	struct acpi_nfit_desc *acpi_desc;
 	struct acpi_nfit_memory_map *memdev;
 	void *nfit_buf = t->nfit_buf;
-	size_t size = t->nfit_size;
 	struct acpi_nfit_system_address *spa;
 	struct acpi_nfit_control_region *dcr;
 	struct acpi_nfit_data_region *bdw;
 	struct acpi_nfit_flush_address *flush;
 	unsigned int offset;
 
-	nfit_test_init_header(nfit_buf, size);
-
 	/*
 	 * spa0 (interleave first half of dimm0 and dimm1, note storage
 	 * does not actually alias the related block-data-window
 	 * regions)
 	 */
-	spa = nfit_buf + sizeof(struct acpi_table_nfit);
+	spa = nfit_buf;
 	spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
 	spa->header.length = sizeof(*spa);
 	memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_PM), 16);
@@ -533,7 +516,7 @@ static void nfit_test0_setup(struct nfit_test *t)
 	 * does not actually alias the related block-data-window
 	 * regions)
 	 */
-	spa = nfit_buf + sizeof(struct acpi_table_nfit) + sizeof(*spa);
+	spa = nfit_buf + sizeof(*spa);
 	spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
 	spa->header.length = sizeof(*spa);
 	memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_PM), 16);
@@ -542,7 +525,7 @@ static void nfit_test0_setup(struct nfit_test *t)
 	spa->length = SPA1_SIZE;
 
 	/* spa2 (dcr0) dimm0 */
-	spa = nfit_buf + sizeof(struct acpi_table_nfit) + sizeof(*spa) * 2;
+	spa = nfit_buf + sizeof(*spa) * 2;
 	spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
 	spa->header.length = sizeof(*spa);
 	memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_DCR), 16);
@@ -551,7 +534,7 @@ static void nfit_test0_setup(struct nfit_test *t)
 	spa->length = DCR_SIZE;
 
 	/* spa3 (dcr1) dimm1 */
-	spa = nfit_buf + sizeof(struct acpi_table_nfit) + sizeof(*spa) * 3;
+	spa = nfit_buf + sizeof(*spa) * 3;
 	spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
 	spa->header.length = sizeof(*spa);
 	memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_DCR), 16);
@@ -560,7 +543,7 @@ static void nfit_test0_setup(struct nfit_test *t)
 	spa->length = DCR_SIZE;
 
 	/* spa4 (dcr2) dimm2 */
-	spa = nfit_buf + sizeof(struct acpi_table_nfit) + sizeof(*spa) * 4;
+	spa = nfit_buf + sizeof(*spa) * 4;
 	spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
 	spa->header.length = sizeof(*spa);
 	memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_DCR), 16);
@@ -569,7 +552,7 @@ static void nfit_test0_setup(struct nfit_test *t)
 	spa->length = DCR_SIZE;
 
 	/* spa5 (dcr3) dimm3 */
-	spa = nfit_buf + sizeof(struct acpi_table_nfit) + sizeof(*spa) * 5;
+	spa = nfit_buf + sizeof(*spa) * 5;
 	spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
 	spa->header.length = sizeof(*spa);
 	memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_DCR), 16);
@@ -578,7 +561,7 @@ static void nfit_test0_setup(struct nfit_test *t)
 	spa->length = DCR_SIZE;
 
 	/* spa6 (bdw for dcr0) dimm0 */
-	spa = nfit_buf + sizeof(struct acpi_table_nfit) + sizeof(*spa) * 6;
+	spa = nfit_buf + sizeof(*spa) * 6;
 	spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
 	spa->header.length = sizeof(*spa);
 	memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16);
@@ -587,7 +570,7 @@ static void nfit_test0_setup(struct nfit_test *t)
 	spa->length = DIMM_SIZE;
 
 	/* spa7 (bdw for dcr1) dimm1 */
-	spa = nfit_buf + sizeof(struct acpi_table_nfit) + sizeof(*spa) * 7;
+	spa = nfit_buf + sizeof(*spa) * 7;
 	spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
 	spa->header.length = sizeof(*spa);
 	memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16);
@@ -596,7 +579,7 @@ static void nfit_test0_setup(struct nfit_test *t)
 	spa->length = DIMM_SIZE;
 
 	/* spa8 (bdw for dcr2) dimm2 */
-	spa = nfit_buf + sizeof(struct acpi_table_nfit) + sizeof(*spa) * 8;
+	spa = nfit_buf + sizeof(*spa) * 8;
 	spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
 	spa->header.length = sizeof(*spa);
 	memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16);
@@ -605,7 +588,7 @@ static void nfit_test0_setup(struct nfit_test *t)
 	spa->length = DIMM_SIZE;
 
 	/* spa9 (bdw for dcr3) dimm3 */
-	spa = nfit_buf + sizeof(struct acpi_table_nfit) + sizeof(*spa) * 9;
+	spa = nfit_buf + sizeof(*spa) * 9;
 	spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
 	spa->header.length = sizeof(*spa);
 	memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16);
@@ -613,7 +596,7 @@ static void nfit_test0_setup(struct nfit_test *t)
 	spa->address = t->dimm_dma[3];
 	spa->length = DIMM_SIZE;
 
-	offset = sizeof(struct acpi_table_nfit) + sizeof(*spa) * 10;
+	offset = sizeof(*spa) * 10;
 	/* mem-region0 (spa0, dimm0) */
 	memdev = nfit_buf + offset;
 	memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
@@ -1100,15 +1083,13 @@ static void nfit_test0_setup(struct nfit_test *t)
 
 static void nfit_test1_setup(struct nfit_test *t)
 {
-	size_t size = t->nfit_size, offset;
+	size_t offset;
 	void *nfit_buf = t->nfit_buf;
 	struct acpi_nfit_memory_map *memdev;
 	struct acpi_nfit_control_region *dcr;
 	struct acpi_nfit_system_address *spa;
 
-	nfit_test_init_header(nfit_buf, size);
-
-	offset = sizeof(struct acpi_table_nfit);
+	offset = 0;
 	/* spa0 (flat range with no bdw aliasing) */
 	spa = nfit_buf + offset;
 	spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;